B
    t\Y              	   @   s4  d Z ddlmZ ddlZddlZddlZddlmZ ddlZ	ddl
mZ ddlmZ ddlmZ ddlmZ ejZejZejZejZejZejZejZejZejZejZejZejZejZed	d
Ze dZ!i Z"edkre#j$Z%nej&d Z'dd Z%dd Z(dd Z)d8ddZ*dd Z+dd Z,e+dZ-e+dZ.dd Z/G dd  d ed d!d"d#d$gZ0G d%d& d&e0Z1G d'd( d(e2Z3d)d* Z4d+d, Z5d9d.d/Z6d0d1 Z7d:d2d3Z8e9d4kr0e:ej;d5krej;d6 Z<e=e<Z>e>? Z@W dQ R X n
ejA? Z@dd7lmBZBmCZC eDe@eEreBe@Z@xe6e@eC D ]ZFeGeF qW dS );a  
This tokenizer has been copied from the ``tokenize.py`` standard library
tokenizer. The reason was simple: The standard library tokenizer fails
if the indentation is not right. To make it possible to do error recovery the
    tokenizer needed to be rewritten.

Basically this is a stripped down version of the standard library module, so
you can read the documentation there. Additionally we included some speed and
memory optimizations here.
    )absolute_importN)
namedtuple)BOM_UTF8)PythonTokenTypes)
py_version)split_linesTokenCollectionzcpseudo_token single_quoted triple_quoted endpats whitespace fstring_pattern_map always_break_tokenszutf-8   _c             C   s   | t kS )N)	namechars)s r   4lib/python3.7/site-packages/parso/python/tokenize.py<lambda>7   s    r   c              O   s6   | dd}|rtd}|s$|d7 }|d|  d S )NcaptureF(z?:|))popAssertionErrorjoin)choiceskwargsr   startr   r   r   group:   s    r   c              G   s   t |  d S )N?)r   )r   r   r   r   maybeD   s    r   Fc       	      C   s   dd }dddg}| dkr$| d tdg}| d	kr\|r\d
dg}|rR|}t }qf||7 }n
|rft S x.|D ]&}x t|D ]}||| q|W qlW | dkr||d ||d |S )Nc             s   s.   x(t jdd | D  D ]}d|V  qW d S )Nc             S   s   g | ]}||  fqS r   )upper).0cr   r   r   
<listcomp>K   s    zI_all_string_prefixes.<locals>.different_case_versions.<locals>.<listcomp> )
_itertoolsproductr   )prefixr   r   r   r   different_case_versionsJ   s    z5_all_string_prefixes.<locals>.different_case_versionsbru)   r   brr!   )r)      ffr)      Zur)appendsetr"   permutationsupdate)	version_infoinclude_fstringonly_fstringr%   Zvalid_string_prefixesresultr,   r$   tr   r   r   _all_string_prefixesI   s(    




r9   c             C   s   t | t jS )N)recompileUNICODE)exprr   r   r   _compilel   s    r>   c             C   s:   yt t|  S  tk
r4   t|  t t| < }|S X d S )N)_token_collection_cachetupleKeyError_create_token_collection)r4   r7   r   r   r   _get_token_collectionp   s
    rC   z(?:[^{}\r\n]+|\{\{|\}\})+z(?:[^{}]+|\{\{|\}\})+c       -   	   C   s  d}t |}d}d}| dkrrd}d}d}d}t||||}	d	}
td
dt|
 }d|
 }t||}td|d }nbd}d}| dkrd}nd}d}t||||}	d}
tddt|
 }d|
 }t||}td|d }t|||	}t| }t| }tt| dd }t| ddd}t| }d}d}d}d }t|d! |d" }td#d$d%d&d'd(d)}d*}d+d,d-g}| dkrp|d.d/ t| }t|||}t|d0 td1d2 |d3 td4d2 }||g} d5}!|r| |t|!   td;|  }"t|dd7t|"||||dd7 }#i }$xN|D ]F}%t ||$|%d1 < t ||$|%d4 < t ||$|%d! < t ||$|%d" < qW t }&t }'i }(xJ|D ]B})xd8D ]}*|&|)|*  qnW xd9D ]}*|'|)|*  qW qdW x*|D ]"})x|!D ]}*|*|(|)|* < qW qW d:}+t |#},t|,|&|'|$||(|+S )<Nz[ \f\t]*z	#[^\r\n]*z\w+)r)   r+   z0[xX](?:_?[0-9a-fA-F])+z0[bB](?:_?[01])+z0[oO](?:_?[0-7])+z(?:0(?:_?0)*|[1-9](?:_?[0-9])*)z[eE][-+]?[0-9](?:_?[0-9])*z)[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?z\.[0-9](?:_?[0-9])*z[0-9](?:_?[0-9])*z[0-9](?:_?[0-9])*[jJ]z[jJ]z0[xX][0-9a-fA-F]+z
0[bB][01]+)r)   r   z0[oO][0-7]+z0[oO]?[0-7]+z(?:0+|[1-9][0-9]*)z[eE][-+]?[0-9]+z[0-9]+\.[0-9]*z\.[0-9]+z[0-9]+z
[0-9]+[jJ]T)r5   )r5   r6   z(?:\\.|[^'\\])*'z(?:\\.|[^"\\])*"z(?:\\.|'(?!'')|[^'\\])*'''z(?:\\.|"(?!"")|[^"\\])*"""z'''z"""z\*\*=?z>>=?z<<=?z//=?z->z[+\-*/%&@`|^!=<>]=?~z[][(){}]z\r\n?z\nz[:;.,@]r   z\.\.\.z'[^\r\n'\\]*(?:\\.[^\r\n'\\]*)*'z\\(?:\r\n?|\n)z"[^\r\n"\\]*(?:\\.[^\r\n"\\]*)*")rF   rE   z"""z'''\\(?:\r\n?|\n)|\Z)r   )rF   rE   )z"""z''')
;importclassdeftryexceptfinallywhilewithreturn)rG   )	r>   r   r   r9   insertr0   r1   addr   )-r4   
Whitespace
whitespaceCommentName	Hexnumber	Binnumber	Octnumber	Decnumber	IntnumberExponent
PointfloatExpfloatFloatnumber
ImagnumberNumberZpossible_prefixesStringPrefixZStringPrefixWithFZfstring_prefixesZFStringStartSingleDoubleSingle3Double3TripleOperatorBracketZspecial_argsSpecialFunnyContStrZpseudo_extra_poolZ
all_quotesPseudoExtrasPseudoTokenendpats_prefixsingle_quotedtriple_quotedfstring_pattern_mapr8   quoteZALWAYS_BREAK_TOKENSZpseudo_token_compiledr   r   r   rB   }   s    











rB   c               @   s   e Zd Zedd ZdS )Tokenc             C   sR   t | j}t|dkr0| jd t| d dfS | jd | jd t| j fS d S )N   r   )r   stringlen	start_pos)selflinesr   r   r   end_pos   s    
zToken.end_posN)__name__
__module____qualname__propertyr}   r   r   r   r   rv      s   rv   typerx   rz   r$   c               @   s   e Zd Zdd ZdS )PythonTokenc             C   s   d| j | jjd S )Nz6TokenInfo(type=%s, string=%r, start_pos=%r, prefix=%r))r   )_replacer   name)r{   r   r   r   __repr__   s    zPythonToken.__repr__N)r~   r   r   r   r   r   r   r   r      s   r   c               @   s4   e Zd Zdd Zdd Zdd Zdd Zd	d
 ZdS )FStringNodec             C   s"   || _ d| _d| _d | _d| _d S )Nr   r!   )ru   parentheses_countprevious_lineslast_string_start_posformat_spec_count)r{   ru   r   r   r   __init__  s
    zFStringNode.__init__c             C   s   |  j d7  _ d S )Nrw   )r   )r{   	characterr   r   r   open_parentheses  s    zFStringNode.open_parenthesesc             C   s"   |  j d8  _ | j dkrd| _d S )Nrw   r   )r   r   )r{   r   r   r   r   close_parentheses  s    
zFStringNode.close_parenthesesc             C   s   t | jdkS )Nr)   )ry   ru   )r{   r   r   r   allow_multiline  s    zFStringNode.allow_multilinec             C   s   | j | j dkS )Nr   )r   r   )r{   r   r   r   
is_in_expr  s    zFStringNode.is_in_exprN)r~   r   r   r   r   r   r   r   r   r   r   r   r     s
   	r   c             C   sf   xZt | D ]N\}}||jr
tt|j||d}d}|jr>t| |d = |dt|jfS q
W d |dfS )N)r$   r!   r   )	enumerate
startswithru   r   FSTRING_ENDr   r   ry   )fstring_stackrx   rz   additional_prefixZfstring_stack_indexZnodetokenr   r   r   _close_fstring_if_necessary  s    

r   c             C   s   |d }|  }|r"t||}nt||}|d kr@|j|fS |jsP||f|_|d}x>|D ]6}	| |	j |}
|
d k	r`|
dd t|	j  }q`W |}|t|7 }|r|	ds|	dr| j|7  _d}n
|j| }||fS )Nr   
r!   )
r   fstring_string_multi_linematchfstring_string_single_liner   r   r   ru   ry   endswith)rp   r   linelnumpostosr   r   rx   Zfstring_stack_nodeZ	end_matchZnew_posr   r   r   _find_fstring_string/  s*    




r   rw   r   c             C   s   t | dd}t|||dS )z0Generate tokens from a the source code (string).T)keepends)rz   )r   tokenize_lines)coder4   rz   r|   r   r   r   tokenizeM  s    r   c                s    fdd}|S )zL
    A small helper function to help debug the tokenize_lines function.
    c              ?   s   x | |D ]
}|V  qW d S )Nr   )argsr   r   )funcr   r   wrapperW  s    z_print_tokens.<locals>.wrapperr   )r   r   r   )r   r   _print_tokensS  s    r   c       )   	   #   s   fdd}t |\}}}}}}	}
d}dg d}d}d}d}d}d}d}d}|d d g }x,| D ]"}d7 d}t|}|r|trt}|dd }t|}d	|d  | }|d }||d 7 }d
}|r(||}|r|d}tt||d|  ||V  d}d}n|| }|| }qhx`||k r|r|d }| st	||||\}}|rtt
||jddV  d|_q,||krP ||d }t|||f|\}}}||7 }|dk	r|V  q,|||}|sV|||} |dkrx||  D ]}!|!V  qW |  }d
}tt|| |f|| d V  d}|d7 }q,||d }d}|d\}"}|"f|d}#|#dkr|st|}P |#d }$|r>|$dkr>d
}|dkr>|s>d}%|"}&x$||% dkr|%d7 }%|&d8 }&qW |& d kr$ttddV   |& x||&D ]}!|!V  q.W |$|ksf|$dkrz|#dkrz|#dkrztt|#|V  q,|$dkrtdd |D rg |dd< |s|dkr|stt|#|V  n||# }d}q,|$dkr|#drt||# }q,|#|krl||# }|||}|rN|d}||"| }#tt|#|V  n|"f}||"d }|}P q,|$|ks|#dd |ks|#dd |kr|#d dkr|"f}||$p||#d p||#d }||"d }|}P ntt|#|V  q,|#|	kr2|t|	|#  tt|#|V  q,t|$r|#|
krg |dd< d}td|d|" rx6  }'|'|"krttddV  n |' P qnW tt|#|V  n|$dkr||"d dkr||||"d  7 }P n|#dkr|r
|d  |# n|d7 }nb|#dkrD|r4|d !|# n|rv|d8 }n2|#dkrv|rv|d j"dkrv|d  j#d7  _#tt$|#|V  q,W qhW |rtt|||V  |ds|drd}|f}(x& dd D ]}'ttd|(dV  qW tt%d|(|V  dS )a)  
    A heavily modified Python standard library tokenizer.

    Additionally to the default information, yields also the prefix of each
    token. This idea comes from lib2to3. The prefix contains all information
    that is irrelevant for the parser like newlines in parentheses or comments.
    c             3   sP   xJ|  d k rJ|  d kr0t tddfdV  P t tddV     qW d S )Nr   r!   r   )r   ERROR_DEDENTDEDENTr   )r   )indentsr   sposr   r   dedent_if_necessaryg  s    z+tokenize_lines.<locals>.dedent_if_necessaryr   
0123456789r!   NTrw   ^Fr   )r$   r.   z
\#.z...z
c             s   s   | ]}|   V  qd S )N)r   )r   r,   r   r   r   	<genexpr>  s    z!tokenize_lines.<locals>.<genexpr>#r   r)   z	[ \f\t]*$\)z\
z\
z\z([{z)]}:r   )&rC   ry   r   BOM_UTF8_STRINGr   endr   STRINGr   r   FSTRING_STRINGr   r   r   
ERRORTOKENr   spanr   INDENTr0   NUMBERanyNEWLINEr   getr   FSTRING_STARTis_identifierr:   r   r   NAMEr   r   r   r   OP	ENDMARKER))r|   r4   rz   r   Zpseudo_tokenrr   rs   rp   rU   rt   Zalways_break_tokensZparen_levelmaxnumcharscontstrcontlineZnew_liner$   r   firstr   r   r   endprogendmatchZcontstr_startr   rx   restZfstring_end_tokenZquote_lengthpseudomatchr   r8   r   r   initialiZindent_startindentr}   r   )r   r   r   r   r   _  s\   




























r   __main__r.   rw   )python_bytes_to_unicodeparse_version_string)FF)r   )r   )H__doc__Z
__future__r   sysrx   r:   collectionsr   	itertoolsr"   codecsr   Zparso.python.tokenr   Zparso._compatibilityr   Zparso.utilsr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   decoder   r?   strisidentifierr   Zascii_lettersr   r   r   r9   r>   rC   r   r   rB   rv   r   objectr   r   r   r   r   r   r~   ry   argvpathopenr,   readr   stdinr   r   
isinstancebytesr   printr   r   r   r   <module>   sx   



#	x


 v



