B
     \x                 @   s   d dl mZmZ d dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZmZ d dlmZ dd	lmZ d d
lmZmZmZ G dd deZG dd deZG dd deZG dd deZG dd deZdS )    )absolute_importprint_functionN)TargetDescriptor)TargetOptions)cuda)jitautojit)devicearray   )CUDATargetDesc)UFuncMechanismGenerializedUFuncGUFuncCallStepsc               @   s`   e Zd ZeZi i fddZi fddZedd Zdd Z	dd
dZ
dd Zdd Zdd ZdS )CUDADispatcherc             C   s&   |rt || _|| _|j| _d | _d S )N)AssertionErrorpy_functargetoptions__doc__doc	_compiled)selfr   localsr    r   4lib/python3.7/site-packages/numba/cuda/dispatcher.py__init__   s
    zCUDADispatcher.__init__c             K   sX   | j d kst|rt| j }|| t|f|| j}|| _ t|drT|j| _d S )N_npm_context_)	r   r   r   copyupdater   r   hasattrr   )r   Zsigr   r   Zoptionskernelr   r   r   compile   s    


zCUDADispatcher.compilec             C   s"   | j d krt| jf| j| _ | j S )N)r   r   r   r   )r   r   r   r   compiled#   s    
zCUDADispatcher.compiledc             O   s   | j ||S )N)r!   )r   argskwsr   r   r   __call__)   s    zCUDADispatcher.__call__Tc             C   s   dS )z@Disable the compilation of new signatures at call time.
        Nr   )r   valr   r   r   disable_compile,   s    zCUDADispatcher.disable_compilec             O   s   | j j||S )N)r!   	configure)r   r"   r#   r   r   r   r'   2   s    zCUDADispatcher.configurec             G   s   | j j| S )N)r!   __getitem__)r   r"   r   r   r   r(   5   s    zCUDADispatcher.__getitem__c             C   s   t | j|S )N)getattrr!   )r   keyr   r   r   __getattr__8   s    zCUDADispatcher.__getattr__N)T)__name__
__module____qualname__r   Ztargetdescrr   r    propertyr!   r$   r&   r'   r(   r+   r   r   r   r   r      s   

r   c               @   sL   e Zd ZdZdd Zedd Zejdd Zdd Zdd
dZ	dd Z
dS )CUDAUFuncDispatcherzD
    Invoke the CUDA ufunc specialization for the given inputs.
    c             C   s   || _ d| _d S )Nr   )	functions_maxblocksize)r   Ztypes_to_retty_kernelsr   r   r   r   A   s    zCUDAUFuncDispatcher.__init__c             C   s   | j S )N)r2   )r   r   r   r   max_blocksizeE   s    z!CUDAUFuncDispatcher.max_blocksizec             C   s
   || _ d S )N)Z_max_blocksize)r   Zblkszr   r   r   r3   I   s    c             O   s   t | j||S )a  
        *args: numpy arrays or DeviceArrayBase (created by cuda.to_device).
               Cannot mix the two types in one call.

        **kws:
            stream -- cuda stream; when defined, asynchronous mode is used.
            out    -- output array. Can be a numpy array or DeviceArrayBase
                      depending on the input arguments.  Type must match
                      the input arguments.
        )CUDAUFuncMechanismZcallr1   )r   r"   r#   r   r   r   r$   M   s    zCUDAUFuncDispatcher.__call__r   c          	   C   s   t t| j d dks"td|jdks4td|jd }g }|dkrTtdn|dkrd|d S |pnt	 }|
 N t|r|}nt||}| |||}tjd|jd}|j||d	 W d Q R X |d S )
Nr      zmust be a binary ufuncr
   zmust use 1d arrayzReduction on an empty array.)r
   )dtype)stream)lenlistr1   keysr   ndimshape	TypeErrorr   r7   Zauto_synchronizer	   Zis_cuda_ndarray	to_device_CUDAUFuncDispatcher__reducenpZarrayr6   copy_to_host)r   argr7   ngpu_memsmemoutZbufr   r   r   reduceZ   s"    "



zCUDAUFuncDispatcher.reducec       
      C   s   |j d }|d dkrd||d \}}|| || | |||}|| | ||||dS ||d \}}	|| ||	 | ||	||d |d dkr| |||S |S d S )Nr   r5   r
   )rF   r7   )r<   splitappendr?   )
r   rE   rD   r7   rC   ZfatcutZthincutrF   leftrightr   r   r   Z__reducew   s    





zCUDAUFuncDispatcher.__reduceN)r   )r,   r-   r.   r   r   r/   r3   setterr$   rG   r?   r   r   r   r   r0   <   s   
r0   c               @   sJ   e Zd ZdgZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dS )_CUDAGUFuncCallSteps_streamc             C   s
   t |S )N)r   is_cuda_array)r   objr   r   r   is_device_array   s    z$_CUDAGUFuncCallSteps.is_device_arrayc             C   s
   t |S )N)r   as_cuda_array)r   rP   r   r   r   as_device_array   s    z$_CUDAGUFuncCallSteps.as_device_arrayc             C   s   t j|| jdS )N)r7   )r   r>   rN   )r   hostaryr   r   r   r>      s    z_CUDAGUFuncCallSteps.to_devicec             C   s   |j || jd}|S )N)r7   )rA   rN   )r   devaryrT   rF   r   r   r   to_host   s    z_CUDAGUFuncCallSteps.to_hostc             C   s   t j||| jdS )N)r<   r6   r7   )r   device_arrayrN   )r   r<   r6   r   r   r   rW      s    z!_CUDAGUFuncCallSteps.device_arrayc             C   s   | j dd| _d S )Nr7   r   )kwargsgetrN   )r   r   r   r   prepare_inputs   s    z#_CUDAGUFuncCallSteps.prepare_inputsc             C   s   |j || jd|  d S )N)r7   )forallrN   )r   r   Znelemr"   r   r   r   launch_kernel   s    z"_CUDAGUFuncCallSteps.launch_kernelN)r,   r-   r.   	__slots__rQ   rS   r>   rV   rW   rZ   r\   r   r   r   r   rM      s   rM   c               @   s(   e Zd Zedd Zdd Zdd ZdS )CUDAGenerializedUFuncc             C   s   t S )N)rM   )r   r   r   r   _call_steps   s    z!CUDAGenerializedUFunc._call_stepsc             C   s   t j|d|j|jdS )N)r   )r<   stridesr6   gpu_data)r	   DeviceNDArrayr6   ra   )r   aryr<   r   r   r   _broadcast_scalar_input   s    z-CUDAGenerializedUFunc._broadcast_scalar_inputc             C   s6   t |t |j }d| |j }tj|||j|jdS )N)r   )r<   r`   r6   ra   )r8   r<   r`   r	   rb   r6   ra   )r   rc   ZnewshapeZnewaxZ
newstridesr   r   r   _broadcast_add_axis   s    z)CUDAGenerializedUFunc._broadcast_add_axisN)r,   r-   r.   r/   r_   rd   re   r   r   r   r   r^      s   r^   c               @   sP   e Zd ZdZdZdZdd Zdd Zdd	 Zd
d Z	dd Z
dd Zdd ZdS )r4   z'
    Provide OpenCL specialization
    r   Ac             C   s   |j ||d|  d S )N)r7   )r[   )r   funccountr7   r"   r   r   r   launch   s    zCUDAUFuncMechanism.launchc             C   s
   t |S )N)r   rO   )r   rP   r   r   r   rQ      s    z"CUDAUFuncMechanism.is_device_arrayc             C   s
   t |S )N)r   rR   )r   rP   r   r   r   rS      s    z"CUDAUFuncMechanism.as_device_arrayc             C   s   t j||dS )N)r7   )r   r>   )r   rT   r7   r   r   r   r>      s    zCUDAUFuncMechanism.to_devicec             C   s   |j |dS )N)r7   )rA   )r   rU   r7   r   r   r   rV      s    zCUDAUFuncMechanism.to_hostc             C   s   t j|||dS )N)r<   r6   r7   )r   rW   )r   r<   r6   r7   r   r   r   rW      s    zCUDAUFuncMechanism.device_arrayc                sn    fddt tD }tt j }dg| t j }x|D ]}d||< qHW tj| j jdS )Nc                s,   g | ]$}| j ks$ j| | kr|qS r   )r;   r<   ).0ax)rc   r<   r   r   
<listcomp>   s    
z7CUDAUFuncMechanism.broadcast_device.<locals>.<listcomp>r   )r<   r`   r6   ra   )	ranger8   r<   r9   r`   r	   rb   r6   ra   )r   rc   r<   Z
ax_differsZ
missingdimr`   rk   r   )rc   r<   r   broadcast_device   s    
z#CUDAUFuncMechanism.broadcast_deviceN)r,   r-   r.   r   ZDEFAULT_STREAMZARRAY_ORDERri   rQ   rS   r>   rV   rW   rn   r   r   r   r   r4      s   r4   )Z
__future__r   r   Znumpyr@   Znumba.targets.descriptorsr   Znumba.targets.optionsr   Znumbar   Z
numba.cudar   r   Znumba.cuda.cudadrvr	   Z
descriptorr   Znumba.npyufunc.deviceufuncr   r   r   objectr   r0   rM   r^   r4   r   r   r   r   <module>   s   -S