B
     \j0                 @   s  d Z ddlmZmZmZ ddlZddlZddlZddlZddl	m
Z
 ddlZddlmZ ddlmZ ddlmZmZmZ dd	lmZ ye W n ek
r   eZY nX d
d Zdd Zdd ZG dd deZG dd deZ G dd dej!Z"d ddZ#dZ$dd Z%dd Z&d!ddZ'dS )"z
A HSA dGPU backed ND Array is recognized by checking the __hsa_memory__
attribute on the object.  If it exists and evaluate to True, it must define
shape, strides, dtype and size attributes similar to a NumPy ndarray.
    )print_functionabsolute_importdivisionN)c_void_p)driver   )devices)
dummyarraytypesnumpy_support)HsaContextMismatchErrorc             C   s   t | ddS )z#Check if an object is a HSA ndarray__hsa_ndarray__F)getattr)obj r   ;lib/python3.7/site-packages/numba/roc/hsadrv/devicearray.pyis_hsa_ndarray   s    r   c                sF   t    fdd}|dt |dt |dtj |dttf dS )z+Verify the HSA ndarray interface for an objc                s6   t  | st| tt | |s2td| |f d S )Nz%s must be of type %s)hasattrAttributeError
isinstancer   )attrtyp)r   r   r   requires_attr!   s    
z3verify_hsa_ndarray_interface.<locals>.requires_attrshapestridesdtypesizeN)require_hsa_ndarraytuplenpr   intlong)r   r   r   )r   r   verify_hsa_ndarray_interface   s    

r"   c             C   s   t | stddS )z8Raises ValueError if is_hsa_ndarray(obj) evaluates Falsezrequire an hsa ndarray objectN)r   
ValueError)r   r   r   r   r   -   s    r   c               @   sb   e Zd ZdZdZdZdddZedd Zedd	 Z	ed
d Z
dddZdddZdd ZdS )DeviceNDArrayBasezGBase class for an on dGPU NDArray representation cf. numpy.ndarray
    TNc             C   s  t |ttfr|f}t |ttfr(|f}t|| _t|| jkrHtdtjd|||j	| _
t|| _t|| _t|| _tt| j| _| jdkr
|dkrddlm} || j| j| jj	| _t | j}n0t|dd}|dkrtd|dkst|| _n
d}d| _|| _dS )a   
        Args
        ----

        shape
            array shape.
        strides
            array strides.
        dtype
            data type as numpy.dtype.
        dgpu_data
            user provided device memory for the ndarray data buffer
        zstrides not match ndimr   N)_memory_size_from_infoZ_hsa_memsize_z'dgpu_data as no _hsa_memsize_ attribute)r   r    r!   lenndimr#   r	   ArrayZ	from_descitemsize_dummyr   r   r   r   r   Zprodr   Znumba.roc.apir%   
alloc_sizer   Zget_contextZmempoolallocr   AssertionError	dgpu_data)selfr   r   r   r-   r%   szr   r   r   __init__9   s6    



zDeviceNDArrayBase.__init__c             C   s   | j jS )N)r-   context)r.   r   r   r   _contexth   s    zDeviceNDArrayBase._contextc             C   s   t | j}t|| jdS )zn
        Magic attribute expected by Numba to get the numba type that
        represents this object.
        A)r   Z
from_dtyper   r
   r(   r'   )r.   r   r   r   r   _numba_type_l   s    zDeviceNDArrayBase._numba_type_c             C   s   | j dkrtdS | j jS dS )z:Returns the ctypes pointer to the GPU data buffer
        Nr   )r-   r   device_ctypes_pointer)r.   r   r   r   r5   u   s    
z'DeviceNDArrayBase.device_ctypes_pointerc             C   s   |j dkrdS |dk	rF| jdk	rL| j| }}|j|jkrLt||dn| j}| j}|dkrtj  t	|t
rt| j| || qt| j| || nBt	|t
rtj| j|j| |||d ntj| jt | |||d dS )zCopy `ary` to `self`.

        If `ary` is a HSA memory, perform a device-to-device transfer.
        Otherwise, perform a a host-to-device transfer.

        If `stream` is a stream object, an async copy to used.
        r   N)expectgot)dst_ctxsrc_ctxdstsrcr   stream)r   r-   r2   Zunproxyr   r+   _driverhsaimplicit_syncr   DeviceNDArrayZdGPU_to_dGPUZhost_to_dGPUZasync_dGPU_to_dGPUZasync_host_to_dGPUr   get_cpu_context)r.   aryr<   r1   r6   r7   r/   r   r   r   copy_to_device~   s.    

	



z DeviceNDArrayBase.copy_to_devicec             C   sf  |dkrt j| jt jd}n|j| jkr0td|j| jkrhd}|j|krT| j|kshtd| j|jf |j| jkrd| jjff}|j|kr| j|kstd| j|jf |}| jdkst	d	| j
}| jdkr| j}|dkrtj  t||| | ntjt | j
|| ||d
 |dkr^| jdkrDt j| j| j|d}nt j| j| j| j|d}n|}|S )a  Copy ``self`` to ``ary`` or create a new Numpy ndarray
        if ``ary`` is ``None``.

        The transfer is synchronous: the function returns after the copy
        is finished.

        Always returns the host array.

        Example::

            import numpy as np
            from numba import hsa

            arr = np.arange(1000)
            d_arr = hsa.to_device(arr)

            my_kernel[100, 100](d_arr)

            result_array = d_arr.copy_to_host()
        N)r   r   zincompatible dtype)r   )r   z&incompatible shape; device %s; host %sr   z(incompatible strides; device %s; host %sr   zNegative memory size)r8   r9   r:   r;   r   r<   )r   r   buffer)r   r   r   rD   )r   emptyr+   Zbyter   	TypeErrorr   r   r)   r,   r2   r=   r>   r?   ZdGPU_to_hostZasync_dGPU_to_hostr   rA   r   ndarray)r.   rB   r<   ZhostaryZ
scalshapesZscalstridesr1   r/   r   r   r   copy_to_host   sF    






zDeviceNDArrayBase.copy_to_hostc             C   s   | j S )zEReturns a device memory object that is used as the argument.
        )r-   )r.   r   r   r   
as_hsa_arg   s    zDeviceNDArrayBase.as_hsa_arg)N)NN)NN)__name__
__module____qualname____doc____hsa_memory__r   r0   propertyr2   r4   r5   rC   rH   rI   r   r   r   r   r$   3   s   
/		
1
Gr$   c               @   s2   e Zd ZdZdd Zdd Zdd Zdd	d
ZdS )r@   z
    An on-dGPU array type
    c             C   s   | j jS )zA
        Return true if the array is Fortran-contiguous.
        )r*   Zis_f_contig)r.   r   r   r   is_f_contiguous   s    zDeviceNDArray.is_f_contiguousc             C   s   | j jS )z;
        Return true if the array is C-contiguous.
        )r*   Zis_c_contig)r.   r   r   r   is_c_contiguous  s    zDeviceNDArray.is_c_contiguousc             O   s   t |dkr&t|d ttfr&|d }t| }|| jkrP|| j| j| j| jdS | j	j
||\}}|| j	jgkr||j|j| j| jdS tddS )z
        Reshape the array without changing its contents, similarly to
        :meth:`numpy.ndarray.reshape`. Example::

            d_arr = d_arr.reshape(20, 50, order='F')
        r   r   )r   r   r   r-   zoperation requires copyingN)r&   r   r   listtyper   r   r   r-   r*   reshapeextentNotImplementedError)r.   ZnewshapeZkwsclsnewarrextentsr   r   r   rT     s    


zDeviceNDArray.reshapeCc             C   sL   t | }| jj|d\}}|| jjgkr@||j|j| j| jdS tddS )zr
        Flatten the array without changing its contents, similar to
        :meth:`numpy.ndarray.ravel`.
        )order)r   r   r   r-   zoperation requires copyingN)	rS   r*   ravelrU   r   r   r   r-   rV   )r.   r[   rW   rX   rY   r   r   r   r\   $  s    
zDeviceNDArray.ravelN)rZ   )rJ   rK   rL   rM   rP   rQ   rT   r\   r   r   r   r   r@      s
   r@   c               @   s   e Zd ZdZedd ZdS )	HostArrayTc             C   s   | j tS )N)ctypesZdata_asr   )r.   r   r   r   r5   7  s    zHostArray.device_ctypes_pointerN)rJ   rK   rL   rN   rO   r5   r   r   r   r   r]   4  s   r]   c             C   s*   | j dkr| d} t| j| j| j|dS )z/Create a DeviceNDArray object that is like ary.r   r   )r-   )r'   rT   r@   r   r   r   )rB   r-   r   r   r   from_array_like<  s    

r_   zArray contains non-contiguous buffer and cannot be transferred as a single memory region. Please ensure contiguous buffer with numpy .ascontiguousarray()c             C   s*   t | j}| j| | j|  }|| jkS )N)r   Zargmaxr   r   nbytes)rB   ir   r   r   r   _single_bufferK  s    rb   c             C   sF   | j d sB| j d sB| jd dkr.t| d S t| r:dS ttd S )NZC_CONTIGUOUSZF_CONTIGUOUSr   T)flagsr   sentry_contiguousrb   r#   errmsg_contiguous_buffer)rB   r   r   r   rd   Q  s    rd   Tc             C   sB   t | r| dfS t|  t| }|r6|j| ||d |dfS dS )z
    Create a DeviceArray like obj and optionally copy data from
    host to device. If obj already represents device memory, it is returned and
    no copy is made.
    F)r<   r1   TN)r=   Zis_device_memoryrd   r_   rC   )r   r1   r<   copyZdevobjr   r   r   auto_device^  s    
rg   )N)NT)(rM   Z
__future__r   r   r   warningsZmathrf   weakrefr^   r   Znumpyr   Znumba.roc.hsadrvr   r=    r   Znumbar	   r
   r   errorr   r!   	NameErrorr    r   r"   r   objectr$   r@   rG   r]   r_   re   rb   rd   rg   r   r   r   r   <module>   s6   
 J8
	