0 i|F>dZddlZddlZddlmZdgZej dZej dZej dZ ej dZ ej d Z ej d Z ej d Z ej d Zej d Zej dZej dej"Zej d Zej dZGddej*Zy)zA parser for HTML and XHTML.N)unescape HTMLParserz[&<]z &[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z <[a-zA-Z]z z--\s*>z+([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*z]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*aF <[a-zA-Z][^\t\n\r\f />\x00]* # tag name (?:[\s/]* # optional whitespace before attribute name (?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name (?:\s*=+\s* # value indicator (?:'[^']*' # LITA-enclosed value |"[^"]*" # LIT-enclosed value |(?!['"])[^>\s]* # bare value ) \s* # possibly followed by a space )?(?:\s|/(?!>))* )* )? \s* # trailing whitespace z#ceZdZdZdZddfd ZfdZdZdZd Z d Z d Z d Z d Z dZddZdZdZdZdZdZdZdZdZdZdZdZdZdZdZxZS)raEFind tags and other markup and call handler functions. Usage: p = HTMLParser() p.feed(data) ... p.close() Start tags are handled by calling self.handle_starttag() or self.handle_startendtag(); end tags by self.handle_endtag(). The data between tags is passed from the parser to the derived class by calling self.handle_data() with the data as argument (the data may be split up in arbitrary chunks). If convert_charrefs is True the character references are converted automatically to the corresponding Unicode character (and self.handle_data() is no longer split in chunks), otherwise they are passed by calling self.handle_entityref() or self.handle_charref() with the string containing respectively the named or numeric reference as the argument. )scriptstyleT)convert_charrefscPt|||_|jy)zInitialize and reset this instance. If convert_charrefs is True (the default), all character references are automatically converted to the corresponding Unicode characters. N)super__init__r reset)selfr __class__s "/usr/lib/python3.12/html/parser.pyr zHTMLParser.__init__Ws!  0 cbd|_d|_t|_d|_t |y)z1Reset this instance. Loses all unprocessed data.z???N)rawdatalasttaginteresting_normal interesting cdata_elemr r )rrs rr zHTMLParser.resetas)  -  rcN|j|z|_|jdy)zFeed data to the parser. Call this as often as you want, with as little or as much text as you want (may include '\n'). rN)rgoaheadrdatas rfeedzHTMLParser.feedis ||d*  Qrc&|jdy)zHandle any buffered data.N)rrs rclosezHTMLParser.closers  QrNc|jS)z)Return full source of start tag: '<...>'.)_HTMLParser__starttag_textr s rget_starttag_textzHTMLParser.get_starttag_textxs###rc|j|_tjd|jztj|_y)Nz )lowerrrecompileIr)relems rset_cdata_modezHTMLParser.set_cdata_mode|s/**,::nt&FMrc(t|_d|_yN)rrrr s rclear_cdata_modezHTMLParser.clear_cdata_modes-rc |j}d}t|}||kr-|jrq|jse|j d|}|dkr|j dt ||dz }|dk\r'tjdj||sn|}n?|jj||}|r|j}n|jrns|}||krJ|jr*|js|jt|||n|j||||j||}||k(rn |j}|d|r1t j#||r|j%|} n|d|r|j'|} nt|d|r|j)|} nY|d|r|j+|} n>|d |r|j-|} n#|d z|ks|r|jd|d z} nn<| dkr`|sn2t j#||rnB|d|rK|d z|k(r|jdnt.j#||rn|j1||d zdn|d|rF|}d D]'} |j3| |d zs|t| z}n|j1||d z|n|d|r|j5||dzdn~|||dzj7dk(r|j9||d zdnM|d |r|j1||d zdn,|d|r|j;||d zdn t=d|} |j|| }n|d|rt>j#||}|rY|jAd d} |jC| |jE} |d| d z s| d z } |j|| }d||dvr,|j|||d z|j||d z}n|d|rtFj#||}|rW|jAd } |jI| |jE} |d| d z s| d z } |j|| }tJj#||}|rE|rB|jA||dk(r,|jE} | |kr|} |j||d z}n>|d z|kr'|jd|j||d z}nnJd||kr-|rm||krh|js\|jr*|js|jt|||n|j||||j||}||d|_y)Nr<&"z[\s;]#FA.&8&F&//!< !S[ 0 %&++GAaCN;#K3))'!A#$-8 1Q3--/;>((17#D!,++GAaCDM:#D!,wqst}5,-FGGANN1a(D!$ gq1 ;;=2.D''- A%c1Q3/Eq!,Agabk)((1Q38 NN1ac2C#!3 ;;q>D))$/ A%c1Q3/Eq!,A"((!4u{{} ;!IIK6 !A NN1a!e4!eq[$$S)q!a%0A555qw!ez 1q5$$T__  '!A,!78  1.q!$Aqr{ rcp|j}|||dzdk(sJd|||dzdk(r|j|S|||dzdk(r|j|S|||dzjd k(r7|j d |dz}|d k(ry |j ||dz||d zS|j |S) Nr7r6z+unexpected call to parse_html_declaration()r9r4r:zr)rrMparse_marked_sectionr&rArTparse_bogus_comment)rr^rgtposs rrOz!HTMLParser.parse_html_declarations,,q1~% D)C D% 1QqS>V #%%a( ( Qqs^u $,,Q/ / Qqs^ ! ! #{ 2LLac*E{   WQqS/ 07N++A. .rc|j}|||dzdvsJd|jd|dz}|dk(ry|r|j||dz||dzS)Nr7)r6r3z"unexpected call to parse_comment()rr>r)rrArQ)rr^reportrposs rrgzHTMLParser.parse_bogus_comment(su,,q1~- C1B C-ll3!$ "9    !C 0 1Qwrc|j}|||dzdk(sJdtj||dz}|sy|j}|j ||dz||j }|S)Nr7r5zunexpected call to parse_pi()r>)rpicloserDrErUrZ)rr^rrJr`s rrNzHTMLParser.parse_pi4st,,q1~%F'FF%w!, KKM wqsA' IIKrc~d|_|j|}|dkr|S|j}||||_g}tj ||dz}|sJd|j }|j djx|_}||krtj ||}|sn|j ddd\} } } | sd} n,| dddcxk(r| ddk(sn| dddcxk(r| ddk(rnn| dd} | r t| } |j| j| f|j }||kr|||j} | d vr|j||||S| jd r|j|||S|j!||||j"vr|j%||S) Nrrz#unexpected call to parse_starttag()r7r:'r>")r/>rq)r#check_for_whole_start_tagrtagfind_tolerantrJrZrXr&rattrfind_tolerantrappendstriprFrRhandle_startendtaghandle_starttagCDATA_CONTENT_ELEMENTSr+) rr^endposrattrsrJrbtagmattrnamerest attrvaluerZs rrKzHTMLParser.parse_starttag@s#//2 A:M,,&q0 &&w!4;;;u IIK"[[^1133 s&j!''3A()1a(8 %HdI 2A$8)BC.82A#7237%aO $Y/ LL(..*I6 7A&ja%%' k !   WQv. /M <<   # #C /    e ,d111##C( rcH|j}tj||}|rt|j}|||dz}|dk(r|dzS|dk(r6|j d|r|dzS|j d|ry||kDr|S|dzS|dk(ry|dvry||kDr|S|dzSt d ) Nrr/rqr7r>rz6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZr=)rlocatestarttagend_tolerantrJrZrHrV)rr^rr}r`nexts rrrz$HTMLParser.check_for_whole_start_tagls,, & , ,Wa 8 A1QqS>Ds{1u s{%%dA.q5L%%c1-q5Hq5Lrz561u1u 677rc|j}|||dzdk(sJdtj||dz}|sy|j}tj ||}|s|j |j||||Stj ||dz}|s!|||dzdk(r|dzS|j|S|jdj}|jd|j}|j||dzS|jdj}|j %||j k7r|j||||S|j||j|S) Nr7r3zunexpected call to parse_endtagrr>r:zr)r endendtagrDrZ endtagfindrJrrFrsrgrXr&rA handle_endtagr.)rr^rrJrh namematchtagnamer*s rrLzHTMLParser.parse_endtagsy,,q1~%H'HH%  !A#.   !,*  5!12 (..w!U*Q3J33A66ooa(..0G LLimmo6E   w '7N{{1~##% ?? &t&  5!12  4   rcJ|j|||j|yr-)rxrrr|r{s rrwzHTMLParser.handle_startendtags  S%( 3rcyr-rs rrxzHTMLParser.handle_starttag rcyr-r)rr|s rrzHTMLParser.handle_endtagrrcyr-rrrds rrYzHTMLParser.handle_charrefrrcyr-rrs rr\zHTMLParser.handle_entityrefrrcyr-rrs rrFzHTMLParser.handle_datarrcyr-rrs rrQzHTMLParser.handle_commentrrcyr-r)rdecls rrTzHTMLParser.handle_declrrcyr-rrs rrUzHTMLParser.handle_pirrcyr-rrs rrSzHTMLParser.unknown_declrr)r)__name__ __module__ __qualname____doc__ryr r rr!r#r$r+r.rrOrgrNrKrrrLrwrxrrYr\rFrQrTrUrS __classcell__)rs@rrr?s*1+/O$NG#X/*  (X8D%P          r)rr' _markupbasehtmlr__all__r(rr]r[rWrIrPrm commentclosersrtVERBOSErrr ParserBaserrrrrs#"  . RZZ' RZZ % BJJ> ? "**@ Arzz+& RZZ % "**S/rzz)$ 2::LMBJJ=>(RZZ)ZZ BJJsO RZZ> ? \ ''\ r