B
     \                 @   sZ  d dl mZmZ d dlZd dlmZ d dlmZ ddl	m
Z
mZmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZ ddlmZmZ dd	lmZ dd
l m!Z! ye" W n e#k
r   e$Z"Y nX G dd de%Z&dd Z'dd Z(ej)ddfddZ*dd Z+d'ddZ,dd Z-dd Z.dd Z/dd  Z0d!d" Z1ej)ddfd#d$Z2ej)ddfd%d&Z3dS )(    )absolute_importprint_functionN)mviewbuf)get_context   )get_global_idget_global_sizeget_local_idget_local_sizeget_group_idget_work_dimget_num_groupsbarrier	mem_fencesharedwavebarrieractivelanepermute_wavewidth
ds_permuteds_bpermute)jit)CLK_LOCAL_MEM_FENCECLK_GLOBAL_MEM_FENCE)hsa)devicearrayc               @   s$   e Zd Zdd Zdd Zdd ZdS )_AutoDeregisterc             C   s
   || _ d S )N)args)selfr    r   ,lib/python3.7/site-packages/numba/roc/api.py__init__*   s    z_AutoDeregister.__init__c             C   s   d S )Nr   )r   r   r   r   	__enter__-   s    z_AutoDeregister.__enter__c             C   s   t | j  d S )N)
deregisterr   )r   exc_typeZexc_valZexc_tbr   r   r   __exit__0   s    z_AutoDeregister.__exit__N)__name__
__module____qualname__r   r    r#   r   r   r   r   r   )   s   r   c              G   sB   x8| D ]0}t |tjr*t|jj|j qtt	|qW t
| S )zRegister data into the HSA system

    Returns a contextmanager for use in with-context for auto deregistration.

    Use in context:

        with hsa.register(array):
            do_work_on_HSA(array)

    )
isinstancenpndarray_hsadrvZhsa_memory_registerctypesdatanbytes	TypeErrortyper   )r   r,   r   r   r   register4   s
    
r0   c              G   s>   x8| D ]0}t |tjr*t|jj|j qtt	|qW dS )z(Deregister data from the HSA system
    N)
r'   r(   r)   r*   Zhsa_memory_deregisterr+   r,   r-   r.   r/   )r   r,   r   r   r   r!   G   s    
r!   Cc             C   s$   t | |||\} }}tj| ||dS )zdevice_array(shape, dtype=np.float, strides=None, order='C')

    Allocate an empty device ndarray. Similar to :meth:`numpy.empty`.
    )shapestridesdtype)_prepare_shape_strides_dtyper   ZDeviceNDArray)r2   r4   r3   orderr   r   r   device_arrayP   s    r7   c             C   s   t | j| j| jdS )z<Call roc.devicearray() with information from the array.
    )r2   r4   r3   )r7   r2   r4   r3   )Zaryr   r   r   device_array_likeZ   s    r8   Tc             C   s4   |pt  }|dkrt| }|r0|j| ||d |S )a  to_device(obj, context, copy=True, to=None)

    Allocate and transfer a numpy ndarray or structured scalar to the device.

    To copy host->device a numpy array::

        ary = numpy.arange(10)
        d_ary = roc.to_device(ary)

    The resulting ``d_ary`` is a ``DeviceNDArray``.

    To copy device->host::

        hary = d_ary.copy_to_host()

    To copy device->host to an existing array::

        ary = numpy.empty(shape=d_ary.shape, dtype=d_ary.dtype)
        d_ary.copy_to_host(ary)

    N)streamcontext)r   r   Zfrom_array_likeZcopy_to_device)objr9   r:   copytor   r   r   	to_device`   s    

r>   c               C   s   t  S )N)r*   Zcreate_streamr   r   r   r   r9      s    r9   c             C   s   t | }dg| }|dkr\|j|d< xtt|d D ] }||d  | |d   ||< q6W nL|dkr|j|d< x8td|D ] }||d  | |d   ||< qzW ntdt|S )Nr   r1   r   Fzmust be either C/F order)lenitemsizereversedrange
ValueErrortuple)r2   r4   r6   Zndr3   dr   r   r   _fill_stride_by_order   s    

"
"rH   c             C   sZ   t |}t| ttfr| f} t|ttfr4|f}n| dkr@d} |pNt| ||}| ||fS )Nr   )r   )r(   r4   r'   intlongrH   )r2   r3   r4   r6   r   r   r   r5      s    
r5   c             C   s<   t | t |kstdt | }t| |||\}}|| S )z_Get the byte size of a contiguous memory buffer given the shape, strides
    and itemsize.
    z# dim mismatch)rA   AssertionErrorr   Zmemoryview_get_extents_info)r2   r3   rB   ndimser   r   r   _memory_size_from_info   s    rO   c       
      C   sv   ddl m} t||||\}}}t|||j}dd | D }| j|| |d}tj	|||||d}	|	j
tjdS )Nr   )devicesc             S   s   g | ]
}|j qS r   )Z_agent).0cr   r   r   
<listcomp>   s    z_host_array.<locals>.<listcomp>)	finegrainZallow_access_to)r2   r3   r4   r6   buffer)r/   )hsadrvrP   r5   rO   rB   Zget_all_contextsZget_cpu_contextZmemhostallocr(   r)   Zviewr   Z	HostArray)
rT   r2   r4   r3   r6   rP   ZbytesizeZagentsZbufZarrr   r   r   _host_array   s    rW   c             C   s   t d| |||dS )zacoarsegrain_array(shape, dtype=np.float, strides=None, order='C')
    Similar to np.empty().
    F)rT   r2   r4   r3   r6   )rW   )r2   r4   r3   r6   r   r   r   coarsegrain_array   s    rX   c             C   s   t d| |||dS )z`finegrain_array(shape, dtype=np.float, strides=None, order='C')

    Similar to np.empty().
    F)rT   r2   r4   r3   r6   )rW   )r2   r4   r3   r6   r   r   r   finegrain_array   s    rY   )NNTN)4Z
__future__r   r   Znumpyr(   Znumbar   Znumba.roc.hsadrv.devicesr   Zstubsr   r   r	   r
   r   r   r   r   r   r   r   r   r   r   Z
decoratorsr   Zenumsr   r   Zhsadrv.driverr   r*   rV   r   rJ   	NameErrorrI   objectr   r0   r!   floatr7   r8   r>   r9   rH   r5   rO   rW   rX   rY   r   r   r   r   <module>   s2   @
	

 	