B
     \W                 @   s  d dl Z d dlZd dlmZmZ d dlmZmZm	Z	m
Z
mZmZmZmZmZ d dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZmZ d d	lmZ d d
lmZ d dl m!Z! d dl"m#Z# eej$G dd dej%Z&e	ej$dd e	ej$dd e	ej$dd e	ej$dd dd Z'dd Z(eej)ej*dd Z+eej*dd Z,e
ej$dd  Z-eej$d!d" Z.d#d$ Z/ed%d& Z0ed'd( Z1ed)d* Z2ed+d, Z3ed-d. Z4ed/d0d1d2 Z5d3d4 Z6ed5d6 Z7ed7d8 Z8ed9d: Z9ed/d0d;d< Z:ed=d> Z;ed?d@ Z<ed/d0dAdB Z=ed/d0dCdD Z>edEdF Z?ee@dGdH ZAee jBdIdJ ZCee jDdKdL ZEee jFdMdN ZGee jHdOdP ZIee jJdQdR ZKee jLdSdT ZMee jNdUdV ZOeej$dWdXdY ZPeej$dZd[d\ ZQeej$d]d^d_ ZReej$d`dvdbdcZSeddde ZTeej$dfdgdh ZUedwdjdkZVedldm ZWedndo ZXed/d0dpdq ZYee jZdrds Z[ee j\ee j]dtdu Z^dS )x    N)IntTypeConstant)	modelsregister_modelmake_attribute_wrapperunboxboxNativeValueoverloadoverload_method	intrinsic)lower_constant
lower_cast)cgutils)types)njit)PY_UNICODE_1BYTE_KINDPY_UNICODE_2BYTE_KINDPY_UNICODE_4BYTE_KINDPY_UNICODE_WCHAR_KIND)slicing)	c_helpers)
_Py_hash_t)memcpy_regionc               @   s   e Zd Zdd ZdS )UnicodeModelc             C   sN   dt jfdt jfdt jfdtfdt t jfdt jfg}tj	| ||| d S )Ndatalengthkindhashmeminfoparent)
r   voidptrintpint32r   ZMemInfoPointerZpyobjectr   StructModel__init__)selfZdmmZfe_typemembers r(   ,lib/python3.7/site-packages/numba/unicode.pyr%   %   s    zUnicodeModel.__init__N)__name__
__module____qualname__r%   r(   r(   r(   r)   r   #   s   r   r   _datar   _lengthr   _kindr   _hashc             C   s   ddl m}m}m}m}m}m}m}m} t	d }	|||||||||}
|
|	}| }| }| }|| ||||||}|dkrt
d|j}|j}|d t| }|| |}t||||jfS )ztGet string data from a python string for use at compile-time to embed
    the string data into the LLVM module.
    r   )	CFUNCTYPEc_void_pc_int	c_ssize_tc_ubyte	py_objectPOINTERbyrefZextract_unicodeNz1cannot extract unicode data from the given string   )Zctypesr1   r2   r3   r4   r5   r6   r7   r8   r   
ValueErrorvalue_kind_to_byte_widthZfrom_addressbytes)objr1   r2   r3   r4   r5   r6   r7   r8   Zextract_unicode_fnprotofnr   r   hashvr   nbytesoutr(   r(   r)   compile_time_get_string_data;   s     (
rD   c             C   sj   t |\}}}}|j}| ||}	t|| |}
|	|
_|
j||
_|
j||
_|
j	||
_	|

 S )zf
    Get string data by `compile_time_get_string_data()` and return a
    unicode_type LLVM value
    )rD   moduleZinsert_const_bytesr   create_struct_proxyr   r   typer   r   	_getvalue)contextbuildertypZliteral_stringZ	databytesr   r   rA   modZgvuni_strr(   r(   r)   make_string_from_constantU   s    rN   c             C   s   t | |||jS )N)rN   Zliteral_value)rI   rJ   ZfromtyZtotyvalr(   r(   r)   cast_from_literalf   s    rP   c             C   s   t | |||S )N)rN   )rI   rJ   rK   Zpyvalr(   r(   r)   constant_unicodeo   s    rQ   c       
      C   s|   |j |\}}}}}t| |j|j}||_||_||_||_	|j 
|||_||_t|j|j  }	t| |	dS )zE
    Convert a unicode str object to a native unicode structure.
    )is_error)pyapiZstring_as_string_size_and_kindr   rF   rI   rJ   r   r   r   r   Znrt_meminfo_new_from_pyobjectr   r    Zis_not_nullZerr_occurredr	   rH   )
rK   r>   cokr   r   r   rA   rM   rR   r(   r(   r)   unbox_unicode_strw   s    rV   c             C   sR   t | |j|j|d}|j|j|j|j}|j	| |jj
|j| | |S )z@
    Convert a native unicode structure to a unicode string
    )r;   )r   rF   rI   rJ   rS   Zstring_from_kind_and_datar   r   r   Zobject_hashnrtZdecref)rK   rO   rT   rM   Zresr(   r(   r)   box_unicode_str   s
    rX   c                s    fdd}|S )Nc                s@   |\}}| |t  }||||g}||tdS )N    )bitcastr   
as_pointerloadgepZzext)rI   rJ   	signatureargsr   idxptrch)bitsizer(   r)   codegen   s    z#make_deref_codegen.<locals>.codegenr(   )rc   rd   r(   )rc   r)   make_deref_codegen   s    re   c             C   s   t t jt j}|tdfS )N   )r   uint32r!   r"   re   )	typingctxr   offsetsigr(   r(   r)   deref_uint8   s    rk   c             C   s   t t jt j}|tdfS )N   )r   rg   r!   r"   re   )rh   r   ri   rj   r(   r(   r)   deref_uint16   s    rm   c             C   s   t t jt j}|tdfS )NrY   )r   rg   r!   r"   re   )rh   r   ri   rj   r(   r(   r)   deref_uint32   s    rn   c             C   s$   dd }t t jt jt j}||fS )zmake empty string with data buffer of size alloc_bytes.

    Must set length and kind values for string after it is returned
    c       
   	   S   s   |\}}}t tj}|| |}||||t|jd}	| j	||	|_
||_||_| td|_| j||j
|_t |jj|_| S )Nr9   )r   rF   r   unicode_typemuladdr   rG   rW   Zmeminfo_allocr   r   r   Zget_constantr   r   Zmeminfo_datar   Zget_null_valuer    rH   )
rI   rJ   r^   r_   Zkind_valZchar_bytes_valZ
length_valZuni_str_ctorrM   Z
nbytes_valr(   r(   r)   details   s    

z_malloc_string.<locals>.details)r   rp   r#   r"   )rh   r   Z
char_bytesr   rs   rj   r(   r(   r)   _malloc_string   s    rt   c             C   s*   t | }t| ||}t||td |S )Nr   )r<   rt   _set_code_pointnprg   )r   r   Z
char_widthsr(   r(   r)   _empty_string   s    rx   F)Z_nrtc             C   sJ   | j tkrt| j|S | j tkr,t| j|S | j tkrBt| j|S dS d S )Nr   )r/   r   rk   r-   r   rm   r   rn   )air(   r(   r)   _get_code_point   s    


r{   c                s    fdd}|S )Nc                sT   |\}}} dk r"| |t }||t  }|||||g |  S )NrY   )Ztruncr   rZ   r[   Zstorer]   Zget_dummy_value)rI   rJ   r^   r_   r   r`   rb   ra   )rc   r(   r)   rd      s    
z!make_set_codegen.<locals>.codegenr(   )rc   rd   r(   )rc   r)   make_set_codegen   s    r|   c             C   s    t t jt jt j}|tdfS )Nrf   )r   voidr!   int64rg   r|   )rh   r   r`   rb   rj   r(   r(   r)   	set_uint8   s    r   c             C   s    t t jt jt j}|tdfS )Nrl   )r   r}   r!   r~   rg   r|   )rh   r   r`   rb   rj   r(   r(   r)   
set_uint16  s    r   c             C   s    t t jt jt j}|tdfS )NrY   )r   r}   r!   r~   rg   r|   )rh   r   r`   rb   rj   r(   r(   r)   
set_uint32	  s    r   c             C   sZ   | j tkrt| j|| n<| j tkr4t| j|| n"| j tkrNt| j|| ntdd S )Nz4Unexpected unicode representation in _set_code_point)	r/   r   r   r-   r   r   r   r   AssertionError)ry   rz   rb   r(   r(   r)   ru     s    


ru   c             C   sV   | t ks|t krtd| tkr$|S | tkr>|tkr8|S | S n| tkrJ| S tdd S )Nz!PY_UNICODE_WCHAR_KIND unsupportedz/Unexpected unicode representation in _pick_kind)r   r   r   r   r   )Zkind1Zkind2r(   r(   r)   
_pick_kind!  s    r   c             C   sB   | t krdS | tkrdS | tkr$dS | tkr6tdntdd S )Nr9         z!PY_UNICODE_WCHAR_KIND unsupportedz'Unexpected unicode encoding encountered)r   r   r   r   r   )r   r(   r(   r)   r<   3  s    
r<   c             C   sz   |dkrdS || | j krdS || |j kr0dS xDt|D ]8}t| || }t||| }||k rfdS ||kr:dS q:W dS )Nr   ro   r9   )r.   ranger{   )ry   a_offsetbZb_offsetnrz   Za_chrZb_chrr(   r(   r)   _cmp_regionA  s    r   c             C   sB   x<t t|t|  d D ] }t||| dt| dkr|S qW dS )Nr9   r   ro   )r   lenr   )substrrw   rz   r(   r(   r)   _findU  s    r   c             C   s   | dkp| dkp| dkp| dkp| dkp| dkp| dkp| dkp| d	kp| d
kp| dkp| dkp| dkp| dkp| dkp| dkp| dkp| dkp| dkp| dkp| dkp| dkp| dkp| dkp| dkp| dkp| dkp| dkp| dkS )N	   
                        rY         i  i    i   i   i   i   i   i   i   i   i	   i
   i(   i)   i/   i_   i 0  r(   )
code_pointr(   r(   r)   _is_whitespace^  s:    r   c             C   s   t | tjrdd }|S d S )Nc             S   s   | j S )N)r.   )rw   r(   r(   r)   len_impl  s    zunicode_len.<locals>.len_impl)
isinstancer   UnicodeType)rw   r   r(   r(   r)   unicode_len  s    r   c             C   s(   t | tjr$t |tjr$dd }|S d S )Nc             S   s,   t | t |krdS t| d|dt | dkS )NFr   )r   r   )ry   r   r(   r(   r)   eq_impl  s    zunicode_eq.<locals>.eq_impl)r   r   r   )ry   r   r   r(   r(   r)   
unicode_eq  s    r   c             C   s(   t | tjr$t |tjr$dd }|S d S )Nc             S   s
   | |k S )Nr(   )ry   r   r(   r(   r)   ne_impl  s    zunicode_ne.<locals>.ne_impl)r   r   r   )ry   r   r   r(   r(   r)   
unicode_ne  s    r   c             C   s(   t | tjr$t |tjr$dd }|S d S )Nc             S   sJ   t t| t|}t| d|d|}|dkr.dS |dkrFt| t|k S dS )Nr   ro   TF)minr   r   )ry   r   minleneqcoder(   r(   r)   lt_impl  s    zunicode_lt.<locals>.lt_impl)r   r   r   )ry   r   r   r(   r(   r)   
unicode_lt  s    r   c             C   s(   t | tjr$t |tjr$dd }|S d S )Nc             S   sJ   t t| t|}t| d|d|}|dkr.dS |dkrFt| t|kS dS )Nr   r9   TF)r   r   r   )ry   r   r   r   r(   r(   r)   gt_impl  s    zunicode_gt.<locals>.gt_impl)r   r   r   )ry   r   r   r(   r(   r)   
unicode_gt  s    r   c             C   s(   t | tjr$t |tjr$dd }|S d S )Nc             S   s
   | |k S )Nr(   )ry   r   r(   r(   r)   le_impl  s    zunicode_le.<locals>.le_impl)r   r   r   )ry   r   r   r(   r(   r)   
unicode_le  s    r   c             C   s(   t | tjr$t |tjr$dd }|S d S )Nc             S   s
   | |k  S )Nr(   )ry   r   r(   r(   r)   ge_impl  s    zunicode_ge.<locals>.ge_impl)r   r   r   )ry   r   r   r(   r(   r)   
unicode_ge  s    r   c             C   s(   t | tjr$t |tjr$dd }|S d S )Nc             S   s   t || ddkS )N)r   rw   ro   )r   )ry   r   r(   r(   r)   contains_impl  s    z'unicode_contains.<locals>.contains_impl)r   r   r   )ry   r   r   r(   r(   r)   unicode_contains  s    r   findc             C   s   t |tjrdd }|S d S )Nc             S   s   t || dS )N)r   rw   )r   )ry   r   r(   r(   r)   	find_impl  s    zunicode_find.<locals>.find_impl)r   r   r   )ry   r   r   r(   r(   r)   unicode_find  s    r   
startswithc             C   s   t |tjrdd }|S d S )Nc             S   s   t | d|dt|dkS )Nr   )r   r   )ry   r   r(   r(   r)   startswith_impl  s    z+unicode_startswith.<locals>.startswith_impl)r   r   r   )ry   r   r   r(   r(   r)   unicode_startswith  s    r   endswithc             C   s   t |tjrdd }|S d S )Nc             S   s4   t | t | }|dk rdS t| ||dt |dkS )Nr   F)r   r   )ry   r   r   r(   r(   r)   endswith_impl  s    z'unicode_endswith.<locals>.endswith_impl)r   r   r   )ry   r   r   r(   r(   r)   unicode_endswith  s    r   splitro   c             C   sr   |dks"t |tjtjtjfs"d S t |tjr<ddd}|S |d ks`t |tjs`t|ddd krnd	dd}|S d S )
Nro   c       	      S   s   t | }t |}|dkr tdg }d}d}d}x`||k r|dksJ||k rt| ||d|dkr|| ||  ||7 }|}|d7 }q2|d7 }q2W ||kr|| |d   |S )Nr   zempty separatorro   r9   )r   r:   r   append)	ry   sepmaxsplita_lensep_lenpartslastr`   split_countr(   r(   r)   
split_impl  s$    
z!unicode_split.<locals>.split_implr;   Fc             S   s   t | }g }d}d}d}d}xlt|D ]`}t| |}	t|	}
|rP|
rFq|}d}q&|
sVq&|| ||  d}|d7 }|dkr&||kr&P q&W ||kr|s|| |d   |S )Nr   TFr9   ro   )r   r   r{   r   r   )ry   r   r   r   r   r   r`   r   Zin_whitespace_blockr   Zis_whitespacer(   r(   r)   split_whitespace_impl  s0    
z,unicode_split.<locals>.split_whitespace_impl)ro   )Nro   )r   r   ZOmittedIntegerZIntegerLiteralr   ZNoneTypegetattr)ry   r   r   r   r   r(   r(   r)   unicode_split  s    
$
 r   c             C   s   t |}|dkrdS t | }|d | }| j}x$|D ]}|t |7 }t||j}q4W t||}|d }t|d|dt | t |}	xRtd|D ]D}
t||	| d| |	|7 }	||
 }t||	|dt | |	t |7 }	qW |S )Nr    r9   )r   r/   r   rx   _strncpyr   )r   r   Z	parts_lenr   r   r   presultpart
dst_offsetr`   r(   r(   r)   	join_list1  s(    

r   joinc             C   s\   t |tjr(t |jtjrXdd }|S n0t |tjr@dd }|S t |tjrXdd }|S d S )Nc             S   s
   t | |S )N)r   )r   r   r(   r(   r)   join_list_implS  s    z$unicode_join.<locals>.join_list_implc             S   s   dd |D }t | |S )Nc             S   s   g | ]}|qS r(   r(   ).0r   r(   r(   r)   
<listcomp>Z  s    z8unicode_join.<locals>.join_iter_impl.<locals>.<listcomp>)r   )r   r   
parts_listr(   r(   r)   join_iter_implY  s    z$unicode_join.<locals>.join_iter_implc                s$    fddt t D }t| |S )Nc                s   g | ]} | qS r(   r(   )r   rz   )r   r(   r)   r   `  s    z7unicode_join.<locals>.join_str_impl.<locals>.<listcomp>)r   r   r   )r   r   r   r(   )r   r)   join_str_impl_  s    z#unicode_join.<locals>.join_str_impl)r   r   ZListZdtyper   ZIterableType)r   r   r   r   r   r(   r(   r)   unicode_joinO  s    r   Tc             C   sB   | dkr|rdS |S n| dk r&| |7 } | dk s6| |kr>t d| S )a%  
    Parameters
    ----------
    idx : int or None
        the index
    length : int
        the string length
    is_start : bool; optional with defaults to True
        Is it the *start* or the *stop* of the slice?

    Returns
    -------
    norm_idx : int
        normalized index
    Nr   zstring index out of range)
IndexError)r`   r   Zis_startr(   r(   r)   normalize_str_idxg  s    r   c             C   s   |||}dd }||fS )zFix slice object.
    c       	      S   sF   |j \}}|\}}| |||}t| ||| t||| | S )N)r_   make_helperr   Zguard_invalid_sliceZ	fix_slicerH   )	rI   rJ   rj   r_   	slicetypeZ
lengthtypesliceobjr   slicer(   r(   r)   rd     s    
z!_normalize_slice.<locals>.codegenr(   )rh   r   r   rj   rd   r(   r(   r)   _normalize_slice  s    
r   c             C   s   t |}dd }||fS )z2Compute the span from the given slice object.
    c             S   s,   |j \}|\}| |||}t||}|S )N)r_   r   r   Zget_slice_length)rI   rJ   rj   r_   r   r   r   Zresult_sizer(   r(   r)   rd     s
    z_slice_span.<locals>.codegen)r   r"   )rh   r   rj   rd   r(   r(   r)   _slice_span  s    
r   c       
      C   sx   |j | j krHt|j }|| }|| }|| }t| j||j||dd n,x*t|D ]}	t| ||	 t|||	  qRW d S )Nr9   )Zalign)r/   r<   r   r-   r   ru   r{   )
dstr   srcZ
src_offsetr   Z
byte_widthZsrc_byte_offsetZdst_byte_offsetrB   rz   r(   r(   r)   r     s    
r   c             C   s@   t | tjr<t |tjr$dd }|S t |tjr<dd }|S d S )Nc             S   s0   t |t| }t| jd}t|dt| | |S )Nr9   r   )r   r   rx   r/   ru   r{   )rw   r`   retr(   r(   r)   getitem_char  s    z%unicode_getitem.<locals>.getitem_charc             S   sx   t |t| }t|}t| j|}|jdkr@t|d| |j| n4|j}x,t|D ] }t	||t
| | ||j7 }qPW |S )Nr9   r   )r   r   r   rx   r/   stepr   startr   ru   r{   )rw   r`   Z	slice_idxspanr   Zcurrz   r(   r(   r)   getitem_slice  s    
z&unicode_getitem.<locals>.getitem_slice)r   r   r   r   Z	SliceType)rw   r`   r   r   r(   r(   r)   unicode_getitem  s    r   c             C   s(   t | tjr$t |tjr$dd }|S d S )Nc             S   s   | j |j  }t| j|j}t||}x&tt| D ]}t||t| | q2W x.tt|D ]}t|t| | t|| qZW |S )N)r.   r   r/   rx   r   r   ru   r{   )ry   r   Z
new_lengthZnew_kindr   rz   jr(   r(   r)   concat_impl  s    
z#unicode_concat.<locals>.concat_impl)r   r   r   )ry   r   r   r(   r(   r)   unicode_concat  s    	r   )Nro   )T)_operatorZnumpyrv   Zllvmlite.irr   r   Znumba.extendingr   r   r   r   r   r	   r
   r   r   Znumba.targets.imputilsr   r   Znumbar   r   r   Znumba.pythonapir   r   r   r   Znumba.targetsr   Znumba._helperlibr   Znumba.targets.hashingr   Znumba.unsafe.bytesr   r   r$   r   rD   rN   ZStringLiteralrp   rP   rQ   rV   rX   re   rk   rm   rn   rt   rx   r{   r|   r   r   r   ru   r   r<   r   r   r   r   r   eqr   ner   ltr   gtr   ler   ger   containsr   r   r   r   r   r   r   r   r   r   r   getitemr   rr   iaddr   r(   r(   r(   r)   <module>   s   ,
	
		$
	
D