B
     \-$                 @   s"  d dl mZmZ d dlZd dlmZmZmZmZm	Z	 d dlm
  mZ d dlmZ d dlmZmZmZmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lm Z  ddl!m"Z" ddl#m$Z$m%Z% ddl&m'Z' G dd dejZ(e)dej*Z+G dd deZ,G dd deZ-dS )    )print_functionabsolute_importN)TypeBuilderLINKAGE_INTERNALConstantICMP_EQ)typingtypescgutils	debuginfo
dispatcher)cached_property)BaseContext)MinimalCallConv)	cmathimpl)	cmathdecl)itanium_mangler   )nvvm)codegen	nvvmutils)	jitdevicec                   s$   e Zd Zdd Z fddZ  ZS )CUDATypingContextc             C   s8   ddl m}m} | |j | |j | tj d S )Nr   )cudadeclcudamath) r   r   install_registryregistryr   )selfr   r    r    0lib/python3.7/site-packages/numba/cuda/target.pyload_additional_registries   s    z,CUDATypingContext.load_additional_registriesc                sh   t |tjrXy
|j}W n@ tk
rV   |js4tdt||j	dd}||_|}Y nX t
t| |S )Nz<using cpu function on device but its compilation is disableddebug)r#   )
isinstancer   Z
DispatcherZ!_CUDATypingContext__cudajitdeviceAttributeErrorZ_can_compile
ValueErrorr   Ztargetoptionsgetsuperr   resolve_value_type)r   valZjd)	__class__r    r!   r)   !   s    

z$CUDATypingContext.resolve_value_type)__name__
__module____qualname__r"   r)   __classcell__r    r    )r+   r!   r      s   r   z	[^a-z0-9]c               @   s   e Zd ZdZdZejZdd Zdd Z	dd Z
dd	 Zed
d Zedd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd ZdS )CUDATargetContextTc             C   s   | j |S )N)_internal_codegenZ_create_empty_module)r   namer    r    r!   create_module@   s    zCUDATargetContext.create_modulec             C   s   t d| _ttj| _d S )Nznumba.cuda.jit)r   ZJITCUDACodegenr1   llZcreate_target_datar   Zdefault_data_layout_target_data)r   r    r    r!   initC   s    zCUDATargetContext.initc             C   sH   ddl m}m}m} | |j | |j | |j | tj d S )Nr   )cudaimpl	printimpl	libdevice)r   r7   r8   r9   r   r   r   )r   r7   r8   r9   r    r    r!   r"   G   s
    z,CUDATargetContext.load_additional_registriesc             C   s   | j S )N)r1   )r   r    r    r!   r   N   s    zCUDATargetContext.codegenc             C   s   | j S )N)r5   )r   r    r    r!   target_dataQ   s    zCUDATargetContext.target_datac             C   s   t | S )N)CUDACallConv)r   r    r    r!   	call_convU   s    zCUDATargetContext.call_convc             C   s   t ||S )N)r   Zmangle)r   r2   argtypesr    r    r!   manglerY   s    zCUDATargetContext.manglerc             C   s>   |   d}|| | j||||d}t|j ||fS )aY  
        Adapt a code library ``codelib`` with the numba compiled CUDA kernel
        with name ``fname`` and arguments ``argtypes`` for NVVM.
        A new library is created with a wrapper function that can be used as
        the kernel entry point for the given kernel.

        Returns the new code library and the wrapper function.
        r   )r#   )r   Zcreate_libraryZadd_linking_librarygenerate_kernel_wrapperr   Zfix_data_layoutZ_final_module)r   Zcodelibfnamer=   r#   librarywrapperr    r    r!   prepare_cuda_kernel\   s    	

z%CUDATargetContext.prepare_cuda_kernelc          
      sT  |  |}t|j}tt |}| dtt | j	t
jg| }j||d}	tj|	jdd}
j||
d t d} fdd}|d}g }g }x0d	D ](}||d
|  ||d|  qW || j}| j||	t
j||\}}|r t||j |  W dQ R X |||j t|jj }t!j|j|j|j|jg}j|dd}|"||||j#g}|$t%||}t&'|}||f x.t(d	|D ] \}}|)|}|*|| qW x.t(d	|D ] \}}|+|}|*|| qW W dQ R X W dQ R X |  t,-  |. |/  |0 j  S )z
        Generate the kernel wrapper in the given ``library``.
        The function being wrapped have the name ``fname`` and argument types
        ``argtypes``.  The wrapper function is returned.
        zcuda.kernel.wrapper)r2   Zcudapy)nsr   c                s,   j t  j|  d}t|jj|_|S )N)r2   )	add_global_variabler   intr2   r   nulltypepointeeinitializer)Zpostfixgv)wrapfnwrapper_moduler    r!   define_error_gv   s    
zBCUDATargetContext.generate_kernel_wrapper.<locals>.define_error_gvZ__errcode__Zxyzz	__tid%s__z__ctaid%s__NZ___numba_cas_hack)1Zget_arg_packerlistZargument_typesr   ZfunctionZvoidr3   rF   r<   Zget_return_typer
   ZpyobjectZadd_functionr   Zprepend_namespacer2   r   Zappend_basic_blockappendZfrom_argumentsargsZcall_functionr   Z	if_likelyZis_okZret_voidZif_thennot_Zis_python_excr   rG   rH   rI   lccallcodeZicmpr   r   ZSRegBuilderziptidZstoreZctaidr   Zset_cuda_kernelZadd_ir_modulefinalizeZget_function)r   rA   r@   r=   r#   ZarginfoZargtysZwrapfntyZfntyfuncZprefixedbuilderrN   Zgv_excZgv_tidZgv_ctaidiZcallargsZstatus_oldZcasfntyZcasfnZxchgZchangedZsregZdimptrr*   r    )rL   rM   r!   r?   l   sZ    








&

z)CUDATargetContext.generate_kernel_wrapperc             C   s   |  || |}| S )zn
        Return dummy value.

        XXX: We should be able to move cuda.const.array_like into here.
        )Z
make_arrayZ	_getvalue)r   rZ   typZaryar    r    r!   make_constant_array   s    z%CUDATargetContext.make_constant_arrayc             C   sv   t |}ddt|g}|j|}|dkrX|j|j|t	j
d}t|_d|_||_|jjj}t ||t	j
S )zi
        Unlike the parent version.  This returns a a pointer in the constant
        addrspace.
        $Z__conststring__N)r2   	addrspaceT)r   Zstringzjoinr   Zmangle_identifierglobalsr'   rE   rH   r   ADDRSPACE_CONSTANTr   ZlinkageZglobal_constantrJ   rI   elementZbitcastZ
as_pointer)r   modstringtextr2   rK   Zchartyr    r    r!   insert_const_string   s    



z%CUDATargetContext.insert_const_stringc             C   s"   |j }| ||}| ||tjS )z
        Insert a constant string in the constant addresspace and return a
        generic i8 pointer to the data.

        This function attempts to deduplicate.
        )modulerk   insert_addrspace_convr   rf   )r   rZ   ri   lmodrK   r    r    r!   insert_string_const_addrspace   s    z/CUDATargetContext.insert_string_const_addrspacec             C   s*   |j }|jj}t|||}|||gS )zI
        Perform addrspace conversion according to the NVVM spec
        )rl   rH   rI   r   rm   rT   )r   rZ   r^   rc   rn   Z	base_typeZconvr    r    r!   rm      s    z'CUDATargetContext.insert_addrspace_convc             C   s   dS )zRun O1 function passes
        Nr    )r   rY   r    r    r!   optimize_function   s    z#CUDATargetContext.optimize_functionN)r,   r-   r.   Zimplement_powi_as_math_callZstrict_alignmentr   ZNvvmDIBuilderZ	DIBuilderr3   r6   r"   r   propertyr:   r   r<   r>   rC   r?   ra   rk   ro   rm   rp   r    r    r    r!   r0   :   s"   L
	r0   c               @   s   e Zd ZdS )r;   N)r,   r-   r.   r    r    r    r!   r;      s   r;   ).Z
__future__r   r   reZllvmlite.llvmpy.corer   r   r   r   r   ZllvmpyZcorerS   Zllvmlite.bindingZbindingr4   Znumbar	   r
   r   r   r   Znumba.utilsr   Znumba.targets.baser   Znumba.targets.callconvr   Znumba.targetsr   Znumba.typingr   r   Zcudadrvr   r   r   r   Z
decoratorsr   r   compileIZVALID_CHARSr0   r;   r    r    r    r!   <module>   s&    C