|eXdZddlZddlZddlZddlmZmZddlmZddl m Z m Z m Z m Z mZddlmZmZmZmZmZmZddlmZgdZej2d Zgd Zed ZGd d eZGddeZ Gdde Z!Gdde"Z#GddZ$e$Z%Gdde&Z'GddZ(dZ)GddZ*e*Z+dZ,Gdd Z-Gd!d"eZ.Gd#d$eZ/Gd%d&e e/Z0Gd'd(Z1Gd)d*e0Z2d+Z3Gd,d-e/Z4Gd.d/e0e4Z5y)0z pygments.lexer ~~~~~~~~~~~~~~ Base lexer classes. :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. N) apply_filtersFilter)get_filter_by_name)ErrorTextOther Whitespace _TokenType) get_bool_opt get_int_opt get_list_optmake_analysatorFuture guess_decode) regex_opt) Lexer RegexLexerExtendedRegexLexerDelegatingLexer LexerContextincludeinheritbygroupsusingthisdefaultwordsline_rez.*? ))sutf-8)szutf-32)szutf-32be)szutf-16)szutf-16becy)N)xs 0/usr/lib/python3/dist-packages/pygments/lexer.pyr%"ceZdZdZdZy) LexerMetaz This metaclass automagically converts ``analyse_text`` methods into static methods which always return float values. c\d|vrt|d|d<tj||||S)N analyse_text)rtype__new__)mcsnamebasesds r$r-zLexerMeta.__new__+s3 Q  /.0A BAn ||Cua00r'N)__name__ __module__ __qualname____doc__r-r"r'r$r)r)%s  1r'r)c\eZdZdZdZgZgZgZgZdZ dZ dZ dZ dZ dZdZdZd d Zd Zy) ra Lexer for a specific language. See also :doc:`lexerdevelopment`, a high-level guide to writing lexers. Lexer classes have attributes used for choosing the most appropriate lexer based on various criteria. .. autoattribute:: name :no-value: .. autoattribute:: aliases :no-value: .. autoattribute:: filenames :no-value: .. autoattribute:: alias_filenames .. autoattribute:: mimetypes :no-value: .. autoattribute:: priority Lexers included in Pygments should have an additional attribute: .. autoattribute:: url :no-value: Lexers included in Pygments may have additional attributes: .. autoattribute:: _example :no-value: You can pass options to the constructor. The basic options recognized by all lexers and processed by the base `Lexer` class are: ``stripnl`` Strip leading and trailing newlines from the input (default: True). ``stripall`` Strip all leading and trailing whitespace from the input (default: False). ``ensurenl`` Make sure that the input ends with a newline (default: True). This is required for some lexers that consume input linewise. .. versionadded:: 1.3 ``tabsize`` If given and greater than 0, expand tabs in the input (default: 0). ``encoding`` If given, must be an encoding name. This encoding will be used to convert the input string to Unicode, if it is not already a Unicode string (default: ``'guess'``, which uses a simple UTF-8 / Locale / Latin1 detection. Can also be ``'chardet'`` to use the chardet library, if it is installed. ``inencoding`` Overrides the ``encoding`` if given. Nrc l||_t|dd|_t|dd|_t|dd|_t |dd|_|jdd |_|jd xs |j|_g|_ t|d d D]}|j|y )a This constructor takes arbitrary options as keyword arguments. Every subclass must first process its own options and then call the `Lexer` constructor, since it processes the basic options like `stripnl`. An example looks like this: .. sourcecode:: python def __init__(self, **options): self.compress = options.get('compress', '') Lexer.__init__(self, **options) As these options must all be specifiable as strings (due to the command line usage), there are various utility functions available to help with that, see `Utilities`_. stripnlTstripallFensurenltabsizerencodingguess inencodingfiltersr"N) optionsr r8r9r:r r;getr<r?r add_filter)selfr@filter_s r$__init__zLexer.__init__s& #GY= $Wj%@ $Wj$? "7Iq9  J8  L1BT]]  #GY; %G OOG $ %r'c|jr'd|jjd|jdSd|jjzS)Nzz)r@ __class__r2rCs r$__repr__zLexer.__repr__s@ <<59^^5L5L59\\C C*DNN,C,CC Cr'c rt|ts t|fi|}|jj |y)z8 Add a new stream filter to this lexer. N) isinstancerrr?append)rCrDr@s r$rBzLexer.add_filters/'6*(.streamer s066t< 1ad  s"%)rdrr?)rCrO unfilteredrkstreams`` r$ get_tokenszLexer.get_tokenss=++D1 "64<<>F r'ct)aS This method should process the text and return an iterable of ``(index, tokentype, value)`` tuples where ``index`` is the starting position of the token within the input text. It must be overridden by subclasses. It is recommended to implement it as a generator to maximize effectiveness. )NotImplementedError)rCrOs r$rhzLexer.get_tokens_unprocesseds "!r')F)r2r3r4r5r/aliases filenamesalias_filenames mimetypespriorityurl_examplerErJrBr+rdrnrhr"r'r$rr1se6r DG IOIH CH%<D% "-^0 "r'r) metaclassc eZdZdZefdZdZy)ra  This lexer takes two lexer as arguments. A root lexer and a language lexer. First everything is scanned using the language lexer, afterwards all ``Other`` tokens are lexed using the root lexer. The lexers from the ``template`` lexer package use this base lexer. c r|di||_|di||_||_tj|fi|yNr") root_lexerlanguage_lexerneedlerrE)rC _root_lexer_language_lexer_needler@s r$rEzDelegatingLexer.__init__'s9%00-88  t'w'r'cld}g}g}|jj|D]N\}}}||jur&|r|jt ||fg}||z };|j|||fP|r|jt ||ft ||j j|S)N)r}rhr~rMrY do_insertionsr|)rCrObuffered insertions lng_bufferirirjs r$rhz&DelegatingLexer.get_tokens_unprocessed-s  **AA$G -GAq!DKK%%s8}j&AB!#JA !!1a), -    s8}j9 :Z!__CCHMO Or'N)r2r3r4r5rrErhr"r'r$rrs>C( Or'rceZdZdZy)rzI Indicates that a state should include rules from another state. Nr2r3r4r5r"r'r$rrDs  r'rceZdZdZdZy)_inheritzC Indicates the a state should inherit from its superclass. cy)Nrr"rIs r$rJz_inherit.__repr__Osr'N)r2r3r4r5rJr"r'r$rrKs r'rceZdZdZdZdZy)combinedz: Indicates a state combined from multiple states. c.tj||Srg)tupler-)clsargss r$r-zcombined.__new__Zs}}S$''r'cyrgr")rCrs r$rEzcombined.__init__]s r'N)r2r3r4r5r-rEr"r'r$rrUs( r'rc:eZdZdZdZd dZd dZd dZdZdZ y) _PseudoMatchz: A pseudo match object constructed from a string. c ||_||_yrg)_text_start)rCstartrOs r$rEz_PseudoMatch.__init__gs  r'Nc|jSrg)rrCargs r$rz_PseudoMatch.startks {{r'cF|jt|jzSrg)rrYrrs r$endz_PseudoMatch.endns{{S_,,r'c4|r td|jS)Nz No such group) IndexErrorrrs r$groupz_PseudoMatch.groupqs _- -zzr'c|jfSrg)rrIs r$groupsz_PseudoMatch.groupsvs }r'ciSrgr"rIs r$ groupdictz_PseudoMatch.groupdictys r'rg) r2r3r4r5rErrrrrr"r'r$rrbs%- r'rcdfd }|S)zL Callback that yields multiple actions for each group in the match. c 3KtD]\}}| t|tur1|j|dz}|s1|j |dz||fK|j|dz}|b|r|j |dz|_||t |j |dz||D] }|s| |r|j|_yyw)N) enumerater,r rrposrr)lexermatchctxractiondataitemrs r$callbackzbygroups..callbacks"4 'IAv~f+{{1q5)++a!e,fd::{{1q5)#"'++a!e"4 &u'3EKKA4F'Ms!T'"&J' ' iikCG s.callbacks emm,$U__.v. A4244U[[]PiP "1a!eQk! "))+sBBc3 K j|jdi }|j}|j|j fi D]\}}}||z||f|r|j |_yywr{)rr@rrhrrr) rrrrrrrirj_otherrrs r$rzusing..callbacks MM%-- (!&!B A4244U[[]PiP "1a!eQk! "))+sBBrg)poprLlistrr)rrrrrs`` @r$rrseI& JJw  a$ '!"Ig "(!Ig  ~ &2 O & Or'ceZdZdZdZy)rz Indicates a state or state action (e.g. #pop) to apply. For example default('#pop') is equivalent to ('', Token, '#pop') Note that state tuples may be used as well. .. versionadded:: 2.0 c||_yrg)r)rCrs r$rEzdefault.__init__s  r'N)r2r3r4r5rEr"r'r$rrs r'rceZdZdZddZdZy)rz Indicates a list of literal words that is transformed into an optimized regex that matches any of the words. .. versionadded:: 2.0 c.||_||_||_yrg)rprefixsuffix)rCrrrs r$rEzwords.__init__s   r'cZt|j|j|jS)Nrr)rrrrrIs r$rAz words.getsDKK LLr'N)rr)r2r3r4r5rErAr"r'r$rrs  Mr'rc<eZdZdZdZdZdZdZd dZdZ d Z y) RegexLexerMetazw Metaclass for RegexLexer, creates the self._tokens attribute from self.tokens on the first instantiation. ct|tr|j}tj||j S)zBPreprocess the regular expression component of a token definition.)rLrrArecompiler)rregexrflagsrs r$_process_regexzRegexLexerMeta._process_regexs. eV $IIKEzz%(...r'cRt|tust|s Jd||S)z5Preprocess the token component of a token definition.z0token type must be simple type or callable, not )r,r callable)rtokens r$_process_tokenzRegexLexerMeta._process_tokens.E{j(HUO LDI K L; r'ct|tr5|dk(ry||vr|fS|dk(r|S|dddk(rt|dd SJd|zt|trfd|jz}|xjd z c_g}|D]3}||k7s Jd |z|j |j |||5|||<|fSt|tr|D]}||vr|d vr Jd |z|SJd |z)z=Preprocess the state transition action of a token definition.#pop#pushNz#pop:zunknown new state %rz_tmp_%drzcircular state ref %r)rrzunknown new state zunknown new state def %r)rLrUintr_tmpnameextend_process_stater)r new_state unprocessed processed tmp_stateitokensistates r$_process_new_statez!RegexLexerMeta._process_new_statesK i %F"k)!|#g%  2A')IabM***@4y@@u  8 ,!CLL0I LLA LG# F*L,Cf,LL*s11+2;V EF F$+Ii <   5 )# 2+-"332(6124 2  @4y@ @5r'c t|tus Jd|z|ddk7s Jd|z||vr||Sgx}||<|j}||D]?}t|tr;||k7s Jd|z|j |j ||t|Ot|tr`t|trO|j|j||}|jtjdjd|ft|tus Jd|z |j!|d||}|j'|d } t)|dk(rd}n|j|d||}|j|| |fB|S#t"$r } t%d |dd |d |d | | d} ~ wwxYw)z%Preprocess a single state definition.zwrong state name %rr#zinvalid state name %rzcircular state reference %rrNzwrong rule def %rzuncompilable regex z in state z of z: r)r,rUflagsrLrrrrrrrrMrrrrr Exception ValueErrorrrY) rrrrtokensrtdefrrexerrrs r$rzRegexLexerMeta._process_state#sE{c!@#85#@@!Qx3? 7% ?? I U# #$&&5!&! 3D$(u}K&Ce&KK} c00i14T<=$)$(224::{IV  rzz"~33T9EF:& B(;d(B B& F((a&%@ &&tAw/E4yA~ 22473> K  MM3y1 2C! 3D  F "&q'5#s"<=BEF Fs*F F=F88F=Ncix}|j|<|xs|j|}t|D]}|j||||S)z-Preprocess a dictionary of token definitions.) _all_tokensrrr)rr/ tokendefsrrs r$process_tokendefzRegexLexerMeta.process_tokendefOsS,.. COOD)1D!1 )_ >(B/D $  C u!::e,# %*F5M!&+kk'&: *5K&)ooeT: &7<[]3C#(++g"6K*5{)BK&9 C CB )&! !"s$B;C ; CC CCcd|jvrLi|_d|_t|dr |jrn%|j d|j |_tj|g|i|S)z:Instantiate cls after preprocessing its token definitions._tokensrtoken_variantsr) rrrhasattrrrrrr,__call__)rrkwdss r$rzRegexLexerMeta.__call__sh CLL ( COCLs,-#2D2D!222s7H7H7JK }}S040400r'rg) r2r3r4r5rrrrrrrr"r'r$rrs. /  !AF*X/b 1r'rc4eZdZdZej ZiZddZy)rz Base for simple stateful regular expression-based lexers. Simplifies the lexing process so that you need only provide a list of states and regular expressions. c#:Kd}|j}t|}||d} |D]&\}}} |||} | s|8t|tur||| j fn||| Ed{| j }| t | trX| D]R} | dk(r t|dkDs|j(| dk(r|j|dB|j| TnWt | tr#t| t|k\r|dd=n*|| d=n$| dk(r|j|dn Jd| z||d}n7 ||dk(rd g}|d }|tdf|dz }P|t||f|dz }d7#t$rYywxYww) z~ Split ``text`` into (tokentype, text) pairs. ``stack`` is the initial stack (default: ``['root']``) rrrNrrwrong state def: %rrSr)rrr,r rrrLrrYrrMrabsr rr) rCrOrrr statestack statetokensrexmatchrrmrs r$rhz!RegexLexer.get_tokens_unprocesseds LL %[  2/ /:0 +&)T3')<:5"%vqwwy"88'-dA66%%'C ,%i7)2=#(F?'*:':(2(8%*g%5$.$5$5jn$E$.$5$5e$<=( 37 #9~Z@$.qrN$.yz$:&'1&--jn=K*?)*KK5&/ 2&? C0 J CyD(&,X &/&7 !:t33q ud3i//1HC_7P"sM8F5F0F 1>F0B!F F 2F4F F FFFFN)r) r2r3r4r5r MULTILINErrrhr"r'r$rrs LLE0F;r'rceZdZdZddZdZy)rz9 A helper object that holds lexer position data. Nc`||_||_|xs t||_|xsdg|_y)Nr)rOrrYrr)rCrOrrrs r$rEzLexerContext.__init__s. ##d)&vh r'cVd|jd|jd|jdS)Nz LexerContext(z, ))rOrrrIs r$rJzLexerContext.__repr__s IItxx- -r'NN)r2r3r4r5rErJr"r'r$rrs' -r'rceZdZdZddZy)rzE A RegexLexer that uses a context object to store its state. Nc#K|j}|st|d}|d}n |}||jd}|j} |D]\}}}|||j|j } | s)|lt |tur5|j|| jf| j |_n&||| |Ed{|s||jd}|5t|tr|D]} | dk(r4t|jdkDs!|jj<| dk(r)|jj|jdj|jj| nt|trAt|t|jk\r|jdd=nH|j|d=n8|dk(r)|jj|jdn Jd|z||jd}n |j|j k\ry||jd k(r9dg|_|d}|jt d f|xjdz c_;|jt"||jf|xjdz c_s7#t$$rYywxYww) z Split ``text`` into (tokentype, text) pairs. If ``context`` is given, use this lexer context instead. rrrrNrrr rS)rrrrOrrr,r rrLrrYrrMrr rrr) rCrOcontextrrr r rrrrs r$rhz)ExtendedRegexLexer.get_tokens_unprocessed su LL tQ'C#F+KC#CIIbM2K88D/:2 +&)T377CGG4)<:5"%''61779"<<&'eegCG'-dAs';;;#,.7 " .F  ,%i7)2<#(F?'*399~'9(+ %*g%5$'II$4$4SYYr]$C$'II$4$4U$;<( 37"9~SYY?$'IIabM$'IIij$9&'1II,,SYYr];K*?)*KK5&/ " &> G2 J ww#'')CGG},%+H &/&7 !ggtT111  ''5$sww-77GGqLGc7J:5K : KK KK r)r2r3r4r5rhr"r'r$rrs @r'rc#Kt|} t|\}}d}d}|D]\}}}||}d} |rx|t|z|k\rg|| ||z } | r||| f|t| z }|D]\} } } || | f|t| z }||z } t|\}}|r|t|z|k\rg| t|ks|||| df|t|| z z }|r9|xsd}|D]\}}}|||f|t|z } t|\}}|r8yy#t$r|Ed{7YywxYw#t$rd}YwxYw#t$rd}YywxYww)ag Helper for lexers which must combine the results of several sublexers. ``insertions`` is a list of ``(index, itokens)`` pairs. Each ``itokens`` iterable should be inserted at position ``index`` into the token stream given by the ``tokens`` argument. The result is a combined token stream. TODO: clean up the code here. NTrF)iternext StopIterationrY)rrrrrealposinsleftrrirjolditmpvalit_indexit_tokenit_valueps r$rrOsj!Jj)w GG%1a ?G!c!f*-tEAI&Fq&((3v;&07 ),(Hx113x=( )19D !%j!1w!c!f*- #a&=1ah& & s1v} $G+%0 ,Q GAq!1a-  s1v G  !*-NE7 E 4!    G  s EDA*ED,E*E9AE?D= EED) D#!D)&E(D))E, D:7E9D::E= E E E  EceZdZdZdZy)ProfilingRegexLexerMetaz>Metaclass for ProfilingRegexLexer, collects regex timing info.ct|tr-t|j|j|jn|t j |tjffd }|S)Nrcjdj fddg}tj}j|||}tj}|dxxdz cc<|dxx||z z cc<|S)Nrrr!r) _prof_data setdefaulttimer) rOrendposinfot0rest1rcompiledrrs r$ match_funcz:ProfilingRegexLexerMeta._process_regex..match_funcsr>>"%00%3xHDB..sF3CB GqLG GrBw GJr') rLrrrrrrsysmaxsize)rrrrr2r1rs` ` @@r$rz&ProfilingRegexLexerMeta._process_regexsZ eU #EKK #(<<1CC::c6*),  r'N)r2r3r4r5rr"r'r$r&r&s Hr'r&c eZdZdZgZdZddZy)ProfilingRegexLexerzFDrop-in replacement for RegexLexer that does profiling of its regexes.c#JKjjjitj ||Ed{jjj }t d|jDfdd}td|D}ttdjjt||fztdtdd ztd |D]}td |ztdy7ݭw) Nc3K|]H\\}}\}}|t|jdjdddd|d|zd|z|z fJyw)zu'z\\\NAi)reprr\rR).0rrnris r$ z=ProfilingRegexLexer.get_tokens_unprocessed..sa@+FQFQ47==/77EcrJ4!8TAX\3@sAAc"|jSrg)_prof_sort_index)r#rCs r$r%z.sAd&;&;$<r'T)keyreversec3&K|] }|d yw)Nr")r=r#s r$r@z=ProfilingRegexLexer.get_tokens_unprocessed..s+!+sz2Profiling result for %s lexing %d chars in %.3f mszn==============================================================================================================z$%-20s %-64s ncalls tottime percall)rrzn--------------------------------------------------------------------------------------------------------------z%-20s %-65s %5d %8.4f %8.4f) rHr)rMrrhrsortedrsumprintr2rY)rCrOrrawdatar sum_totalr1s` r$rhz*ProfilingRegexLexer.get_tokens_unprocesseds !!((,44T4GGG..++//1@/6}}@=" $ +d++   B~~&&D 9=> ? i 47IIJ i 5A /!3 4 5 i# HsAD#D!CD#Nr)r2r3r4r5r)rBrhr"r'r$r6r6sPJr'r6)6r5rr3r+pygments.filterrrpygments.filtersrpygments.tokenrrrr r pygments.utilr r r rrrpygments.regexoptr__all__rrrW staticmethod_default_analyser,r)rrrUrrrrrrrrrrrrrrrrrr&r6r"r'r$rTsJ 1/EE**' * "**W ,  . 1 1i"ii"XOeON c  *  u  64  w/d   MF M e1Ye1P^.^B - - EEP=@n,*0Gr'