B
     \                 @   s   d dl mZmZmZ d dlZd dlmZmZm	Z	 d dl
mZ d dlm  m  mZ d dlm  mZ G dd deZG dd deZG d	d
 d
e	ZG dd deZdS )    )absolute_importdivisionprint_functionN)UFuncMechanismGenerializedUFuncGUFuncCallSteps)dgpu_presentc               @   s*   e Zd ZdZdd Zdd Zd
ddZd	S )HsaUFuncDispatcherzC
    Invoke the HSA ufunc specialization for the given inputs.
    c             C   s
   || _ d S )N)	functions)selfZtypes_to_retty_kernels r   1lib/python3.7/site-packages/numba/roc/dispatch.py__init__   s    zHsaUFuncDispatcher.__init__c             O   s   t | j||S )a?  
        *args: numpy arrays
        **kws:
            stream -- hsa stream; when defined, asynchronous mode is used.
            out    -- output array. Can be a numpy array or DeviceArrayBase
                      depending on the input arguments.  Type must match
                      the input arguments.
        )HsaUFuncMechanismZcallr
   )r   argsZkwsr   r   r   __call__   s    	zHsaUFuncDispatcher.__call__r   c             C   s   t d S )N)NotImplementedError)r   argstreamr   r   r   reduce   s    zHsaUFuncDispatcher.reduceN)r   )__name__
__module____qualname____doc__r   r   r   r   r   r   r   r	      s   r	   c               @   sH   e Zd ZdZdZdZdd Zdd Zdd	 Zd
d Z	dd Z
dd ZdS )r   z'
    Provide OpenCL specialization
    r   Ac             C   s   t rt|S t|tjS d S )N)r   devicearrayis_hsa_ndarray
isinstancenpndarray)r   objr   r   r   is_device_array)   s    
z!HsaUFuncMechanism.is_device_arrayc             C   s   t rdS t|tjS d S )NF)r   r   r   r   )r   r    r   r   r   is_host_array/   s    zHsaUFuncMechanism.is_host_arrayc             C   s   t rt|S |S d S )N)r   api	to_device)r   hostaryr   r   r   r   r$   5   s    
zHsaUFuncMechanism.to_devicec             C   s<   d}d}||d  | }||d  | }|||f |  d S )N         r   )r   funccountr   r   ZilpZtpbZ
blockcountr   r   r   launch;   s
    zHsaUFuncMechanism.launchc             C   s$   t rtj||dS tj||dS d S )N)shapedtype)r   r#   device_arrayr   empty)r   r,   r-   r   r   r   r   r.   D   s    zHsaUFuncMechanism.device_arrayc                s~   t rtdnl fddttD }tt j }dg| t j }x|D ]}d||< qVW tj| j	 dS d S )Nzdevice broadcast_device NIYc                s,   g | ]$}| j ks$ j| | kr|qS r   )ndimr,   ).0ax)aryr,   r   r   
<listcomp>N   s    
z6HsaUFuncMechanism.broadcast_device.<locals>.<listcomp>r   )r,   stridesr-   buffer)
r   r   rangelenr,   listr5   r   r   r-   )r   r3   r,   Z
ax_differsZ
missingdimr5   r2   r   )r3   r,   r   broadcast_deviceJ   s    

z"HsaUFuncMechanism.broadcast_deviceN)r   r   r   r   ZDEFAULT_STREAMZARRAY_ORDERr!   r"   r$   r+   r.   r:   r   r   r   r   r   "   s   	r   c               @   s8   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d ZdS )_HsaGUFuncCallStepsr   c             C   s   t rt|S dS d S )NT)r   r   r   )r   r    r   r   r   r!   _   s    
z#_HsaGUFuncCallSteps.is_device_arrayc             C   s   t rt|S |S d S )N)r   r#   r$   )r   r%   r   r   r   r$   e   s    
z_HsaGUFuncCallSteps.to_devicec             C   s   t r||}|S d S )N)r   Zcopy_to_host)r   Zdevaryr%   outr   r   r   to_hostk   s    
z_HsaGUFuncCallSteps.to_hostc             C   s$   t rtj||dS tj||dS d S )N)r,   r-   )r   r#   r.   r   r/   )r   r,   r-   r   r   r   r.   r   s    z _HsaGUFuncCallSteps.device_arrayc             C   s   | |t|d|  d S )N@   )Z	configuremin)r   ZkernelZnelemr   r   r   r   launch_kernelx   s    z!_HsaGUFuncCallSteps.launch_kernelN)	r   r   r   	__slots__r!   r$   r=   r.   r@   r   r   r   r   r;   \   s   r;   c               @   s(   e Zd Zedd Zdd Zdd ZdS )HSAGenerializedUFuncc             C   s   t S )N)r;   )r   r   r   r   _call_steps}   s    z HSAGenerializedUFunc._call_stepsc             C   s4   t rtj|d|j|jdS tjjj||fddS d S )N)r   )r,   r5   r-   	dgpu_data)r,   r5   )	r   r   DeviceNDArrayr-   rD   r   libZstride_tricksZ
as_strided)r   r3   r,   r   r   r   _broadcast_scalar_input   s    
z,HSAGenerializedUFunc._broadcast_scalar_inputc             C   sB   t |t |j }d| |j }tr:tj|||j|jdS td S )N)r   )r,   r5   r-   rD   )	r8   r,   r5   r   r   rE   r-   rD   r   )r   r3   ZnewshapeZnewaxZ
newstridesr   r   r   _broadcast_add_axis   s    
z(HSAGenerializedUFunc._broadcast_add_axisN)r   r   r   propertyrC   rG   rH   r   r   r   r   rB   |   s   
rB   )Z
__future__r   r   r   Znumpyr   Znumba.npyufunc.deviceufuncr   r   r   Znumba.roc.hsadrv.driverr   Znumba.roc.hsadrv.devicearrayZrocZhsadrvr   Znumba.roc.apir#   objectr	   r   r;   rB   r   r   r   r   <module>   s   : 