B
    \P?                 @   s  d dl mZ d dlZyd dlmZ W n  ek
rD   d dlmZ Y nX ddlmZ dZ	dZ
d	Zed
Zdd Zdd Zdd Zdd Zdd ZG dd deZdd ZG dd deZG dd deZe ZG dd deZG dd  d eZG d!d" d"eZG d#d$ d$eZG d%d& d&eZe Zd'e_d(e_ d)d* Z!d+d, Z"d-d. Z#d/d0 Z$e$d1Z%d2e%_d3e%_ dDd4d5Z&d6d7 Z'd8d9 Z(d:d; Z)d<d= Z*ee	Z+d>e+_d?e+_ ee
Z,d@e,_dAe,_ eeZ-dBe-_dCe-_ dS )E    )absolute_importN)maxsize)maxint   )ErrorsZbolZeoleof
c             C   s   t | }|  d}t|}g }xj||k rt|| }|d }|d7 }x,||k rr|t|| krr|d7 }|d7 }qHW || || q"W |S )z
    Return a list of character codes consisting of pairs
    [code1a, code1b, code2a, code2b,...] which cover all
    the characters in |s|.
    r   r   )listsortlenordappend)sZ	char_listinresultcode1code2 r   2lib/python3.7/site-packages/Cython/Plex/Regexps.pychars_to_ranges"   s    

r   c             C   sP   t | td}t|tdd }||k rHtdtd }|| || fS dS dS )z
    If the range of characters from code1 to code2-1 includes any
    lower case letters, return the corresponding upper case range.
    azr   AN)maxr   min)r   r   code3code4dr   r   r   uppercase_range9   s    r   c             C   sP   t | td}t|tdd }||k rHtdtd }|| || fS dS dS )z
    If the range of characters from code1 to code2-1 includes any
    upper case letters, return the corresponding lower case range.
    r   Zr   r   N)r   r   r   )r   r   r   r   r   r   r   r   lowercase_rangeG   s    r!   c                s&    fddt dt dD }t| S )z
    Given a list of codes as returned by chars_to_ranges, return
    an RE which will match a character in any of the ranges.
    c                s"   g | ]}t  |  |d   qS )r   )	CodeRange).0r   )	code_listr   r   
<listcomp>Z   s    zCodeRanges.<locals>.<listcomp>r      )ranger   Alt)r$   re_listr   )r$   r   
CodeRangesU   s    r*   c             C   sB   | t   kr|k r4n ntt| t ttt d |S t| |S dS )z
    CodeRange(code1, code2) is an RE which matches any character
    with a code |c| in the range |code1| <= |c| < |code2|.
    r   N)nl_coder(   RawCodeRange
RawNewline)r   r   r   r   r   r"   ^   s
    
r"   c               @   sd   e Zd ZdZdZdZdZdd Zdd Zdd	 Z	d
d Z
dd Zdd Zdd Zdd Zdd ZdS )REa  RE is the base class for regular expression constructors.
    The following operators are defined on REs:

         re1 + re2         is an RE which matches |re1| followed by |re2|
         re1 | re2         is an RE which matches either |re1| or |re2|
    r   Nc             C   s   t d| jj dS )aM  
        This method should add states to |machine| to implement this
        RE, starting at |initial_state| and ending at |final_state|.
        If |match_bol| is true, the RE must be able to match at the
        beginning of a line. If nocase is true, upper and lower case
        letters should be treated as equivalent.
        z %s.build_machine not implementedN)NotImplementedError	__class____name__)selfmachineinitial_statefinal_state	match_bolnocaser   r   r   build_machine{   s    	zRE.build_machinec             C   s"   |  }|| ||| |S )z~
        Given a state |s| of machine |m|, return a new state
        reachable from |s| on character |c| or epsilon.
        )	new_statelink_toadd_transition)r2   mr4   cr   r   r   r   	build_opt   s    
zRE.build_optc             C   s
   t | |S )N)Seq)r2   otherr   r   r   __add__   s    z
RE.__add__c             C   s
   t | |S )N)r(   )r2   r@   r   r   r   __or__   s    z	RE.__or__c             C   s   | j r| j S |  S d S )N)strcalc_str)r2   r   r   r   __str__   s    z
RE.__str__c             C   s   t |ts| ||d d S )NzPlex.RE instance)
isinstancer.   
wrong_type)r2   numvaluer   r   r   check_re   s    
zRE.check_rec             C   s"   t |t dkr| ||d d S )N string)typerG   )r2   rH   rI   r   r   r   check_string   s    zRE.check_stringc             C   s8   |  || t|dkr4td|| jjt|f d S )Nr   zOInvalid value for argument %d of Plex.%s.Expected a string of length 1, got: %s)rN   r   r   ZPlexValueErrorr0   r1   repr)r2   rH   rI   r   r   r   
check_char   s    zRE.check_charc             C   sL   t |tjkr$d|jj|jjf }n
t |j}td|| jj||f d S )Nz%s.%s instancez<Invalid type for argument %d of Plex.%s (expected %s, got %s)rM   typesZInstanceTyper0   
__module__r1   r   ZPlexTypeError)r2   rH   rI   ZexpectedZgotr   r   r   rG      s    
zRE.wrong_type)r1   rR   __qualname____doc__nullablematch_nlrC   r8   r>   rA   rB   rE   rJ   rN   rP   rG   r   r   r   r   r.   o   s   
r.   c             C   s>   t | dkr$tt| t| d }nt| }dt|  |_|S )z;
    Char(c) is an RE which matches the character |c|.
    r   zChar(%s))r   r"   r   SpecialSymbolrO   rC   )r=   r   r   r   r   Char   s
    rX   c               @   s<   e Zd ZdZdZdZdZdZdZdd Z	dd Z
dd	 ZdS )
r,   z
    RawCodeRange(code1, code2) is a low-level RE which matches any character
    with a code |c| in the range |code1| <= |c| < |code2|, where the range
    does not include newline. For internal use only.
    r   Nc             C   s&   ||f| _ t||| _t||| _d S )N)r'   r   r!   )r2   r   r   r   r   r   __init__   s    
zRawCodeRange.__init__c             C   sP   |r|  ||t}|| j| |rL| jr8|| j| | jrL|| j| d S )N)r>   BOLr;   r'   r   r!   )r2   r<   r4   r5   r6   r7   r   r   r   r8      s    zRawCodeRange.build_machinec             C   s   d| j | jf S )NzCodeRange(%d,%d))r   r   )r2   r   r   r   rD     s    zRawCodeRange.calc_str)r1   rR   rS   rT   rU   rV   r'   r   r!   rY   r8   rD   r   r   r   r   r,      s   
r,   c               @   s    e Zd ZdZdZdZdd ZdS )_RawNewlinezd
    RawNewline is a low-level RE which matches a newline character.
    For internal use only.
    r   r   c             C   s8   |r|  ||t}|  ||t}|ttd f| d S )Nr   )r>   rZ   EOLr;   r+   )r2   r<   r4   r5   r6   r7   r   r   r   r   r8     s    z_RawNewline.build_machineN)r1   rR   rS   rT   rU   rV   r8   r   r   r   r   r[     s   r[   c               @   s,   e Zd ZdZdZdZdZdd Zdd ZdS )rW   zx
    SpecialSymbol(sym) is an RE which matches the special input
    symbol |sym|, which is one of BOL, EOL or EOF.
    r   Nc             C   s
   || _ d S )N)sym)r2   r]   r   r   r   rY   #  s    zSpecialSymbol.__init__c             C   s.   |r| j tkr| ||t}|| j | d S )N)r]   r\   r>   rZ   r;   )r2   r<   r4   r5   r6   r7   r   r   r   r8   &  s    zSpecialSymbol.build_machine)	r1   rR   rS   rT   rU   rV   r]   rY   r8   r   r   r   r   rW     s   rW   c               @   s(   e Zd ZdZdd Zdd Zdd ZdS )	r?   z]Seq(re1, re2, re3...) is an RE which matches |re1| followed by
    |re2| followed by |re3|...c             G   s   d}x*t |D ]\}}| || |o*|j}qW || _|| _t|}d}x,|rt|d8 }|| }|jrjd}P |jsJP qJW || _d S )Nr   r   )	enumeraterJ   rU   r)   r   rV   )r2   r)   rU   r   rerV   r   r   r   rY   2  s"    zSeq.__init__c             C   s   | j }t|dkr|| nb|}t|}xTt|D ]H\}	}
|	|d k rR| }n|}|
||||| |}|
jpz|oz|
j}q4W d S )Nr   r   )r)   r   r:   r^   r9   r8   rV   rU   )r2   r<   r4   r5   r6   r7   r)   s1r   r   r_   s2r   r   r   r8   E  s    
zSeq.build_machinec             C   s   dd tt| j S )NzSeq(%s),)joinmaprC   r)   )r2   r   r   r   rD   U  s    zSeq.calc_strN)r1   rR   rS   rT   rY   r8   rD   r   r   r   r   r?   .  s   r?   c               @   s(   e Zd ZdZdd Zdd Zdd ZdS )	r(   zRAlt(re1, re2, re3...) is an RE which matches either |re1| or
    |re2| or |re3|...c             G   s   || _ d}d}g }g }d}xJ|D ]B}| || |jrF|| d}n
|| |jrZd}|d7 }q W || _|| _|| _|| _d S )Nr   r   )r)   rJ   rU   r   rV   nullable_resnon_nullable_res)r2   r)   rU   rV   re   rf   r   r_   r   r   r   rY   ]  s&    


zAlt.__init__c             C   s`   x | j D ]}|||||| qW | jr\|r:| ||t}x | jD ]}||||d| qBW d S )Nr   )re   r8   rf   r>   rZ   )r2   r<   r4   r5   r6   r7   r_   r   r   r   r8   s  s    zAlt.build_machinec             C   s   dd tt| j S )NzAlt(%s)rb   )rc   rd   rC   r)   )r2   r   r   r   rD   |  s    zAlt.calc_strN)r1   rR   rS   rT   rY   r8   rD   r   r   r   r   r(   Y  s   	r(   c               @   s(   e Zd ZdZdd Zdd Zdd ZdS )	Rep1z@Rep1(re) is an RE which matches one or more repetitions of |re|.c             C   s&   |  d| || _|j| _|j| _d S )Nr   )rJ   r_   rU   rV   )r2   r_   r   r   r   rY     s    zRep1.__init__c             C   sN   |  }|  }|| | j||||p0| jj| || || d S )N)r9   r:   r_   r8   rV   )r2   r<   r4   r5   r6   r7   r`   ra   r   r   r   r8     s    

zRep1.build_machinec             C   s
   d| j  S )NzRep1(%s))r_   )r2   r   r   r   rD     s    zRep1.calc_strN)r1   rR   rS   rT   rY   r8   rD   r   r   r   r   rg     s   rg   c               @   s0   e Zd ZdZdZdZdd Zdd Zdd ZdS )	
SwitchCasez
    SwitchCase(re, nocase) is an RE which matches the same strings as RE,
    but treating upper and lower case letters according to |nocase|. If
    |nocase| is true, case is ignored, otherwise it is not.
    Nc             C   s    || _ || _|j| _|j| _d S )N)r_   r7   rU   rV   )r2   r_   r7   r   r   r   rY     s    zSwitchCase.__init__c             C   s   | j ||||| j d S )N)r_   r8   r7   )r2   r<   r4   r5   r6   r7   r   r   r   r8     s    zSwitchCase.build_machinec             C   s   | j rd}nd}d|| jf S )NNoCaseCasez%s(%s))r7   r_   )r2   namer   r   r   rD     s    zSwitchCase.calc_str)	r1   rR   rS   rT   r_   r7   rY   r8   rD   r   r   r   r   rh     s   rh   z8
    Empty is an RE which matches the empty string.
    Emptyc             C   s$   t ttt|  }dt|  |_|S )z@
    Str1(s) is an RE which matches the literal string |s|.
    zStr(%s))r?   tuplerd   rX   rO   rC   )r   r   r   r   r   Str1  s    rn   c              G   sH   t | dkrt| d S tttt|  }ddtt|  |_|S dS )z
    Str(s) is an RE which matches the literal string |s|.
    Str(s1, s2, s3, ...) is an RE which matches any of |s1| or |s2| or |s3|...
    r   r   zStr(%s)rb   N)r   rn   r(   rm   rd   rc   rO   rC   )Zstrsr   r   r   r   Str  s
    ro   c             C   s   t t| }dt|  |_|S )zH
    Any(s) is an RE which matches any character in the string |s|.
    zAny(%s))r*   r   rO   rC   )r   r   r   r   r   Any  s    rp   c             C   s:   t | }|dt  |t t|}dt|  |_|S )zp
    AnyBut(s) is an RE which matches any character (including
    newline) which is not in the string |s|.
    r   z
AnyBut(%s))r   insertr   r   r*   rO   rC   )r   rangesr   r   r   r   AnyBut  s    
rs   rK   zT
    AnyChar is an RE which matches any single character (including a newline).
    AnyCharc          	   C   s   |r*t t| t|d }d| |f |_n\g }x@tdt| dD ],}|t t| | t| |d  d  q@W t| }dt|  |_|S )a  
    Range(c1, c2) is an RE which matches any single character in the range
    |c1| to |c2| inclusive.
    Range(s) where |s| is a string of even length is an RE which matches
    any single character in the ranges |s[0]| to |s[1]|, |s[2]| to |s[3]|,...
    r   zRange(%s,%s)r   r&   z	Range(%s))r"   r   rC   r'   r   r   r(   rO   )r`   ra   r   rr   r   r   r   r   Range  s    ,ru   c             C   s   t | t}d|  |_|S )zI
    Opt(re) is an RE which matches either |re| or the empty string.
    zOpt(%s))r(   rl   rC   )r_   r   r   r   r   Opt  s    

rv   c             C   s   t t| }d|  |_|S )zJ
    Rep(re) is an RE which matches zero or more repetitions of |re|.
    zRep(%s))rv   rg   rC   )r_   r   r   r   r   Rep  s    
rw   c             C   s   t | ddS )z
    NoCase(re) is an RE which matches the same strings as RE, but treating
    upper and lower case letters as equivalent.
    r   )r7   )rh   )r_   r   r   r   ri     s    ri   c             C   s   t | ddS )z
    Case(re) is an RE which matches the same strings as RE, but treating
    upper and lower case letters as distinct, i.e. it cancels the effect
    of any enclosing NoCase().
    r   )r7   )rh   )r_   r   r   r   rj      s    rj   z=
    Bol is an RE which matches the beginning of a line.
    Bolz7
    Eol is an RE which matches the end of a line.
    Eolz9
    Eof is an RE which matches the end of the file.
    Eof)N).Z
__future__r   rQ   sysr   r   ImportErrorrK   r   rZ   r\   ZEOFr   r+   r   r   r!   r*   r"   objectr.   rX   r,   r[   r-   rW   r?   r(   rg   rh   rl   rT   rC   rn   ro   rp   rs   rt   ru   rv   rw   ri   rj   rx   ry   rz   r   r   r   r   <module>	   sb   	n+'!	

		