ó
\K]c           @@ sZ  d  Z  d d l m Z m Z d d l Z d d l Z d d l m Z m	 Z	 m
 Z
 d d l m Z m Z m Z y e Wn e k
 r e Z n Xe	 j Z e	 j Z e	 j Z e d d „ ƒ Z d „  Z d	 „  Z e d e d d
 „ ƒ Z e e j d d d d „ ƒ Z e e j d d d „ ƒ Z e e j d d d e e d „ ƒ Z  e j! e d d d „ ƒ ƒ Z" d „  Z# d „  Z$ d „  Z% d d „ Z& e d „  ƒ Z' e e j! d „  ƒ ƒ Z( e e j! d „  ƒ ƒ Z) e d „ Z* e
 j+ Z+ d „  Z, d „  Z- d „  Z. d „  Z/ d e d „ Z0 d „  Z1 e j! d „  ƒ Z2 e e
 j3 ƒ Z3 e e
 j4 ƒ Z4 e e
 j5 ƒ Z5 d S(   s%   
API that are reported to numba.cuda
i    (   t   print_functiont   absolute_importNi   (   t   devicearrayt   devicest   driver(   t   Int   Outt   InOutc      	   C@ s  |  j  d ƒ } | d k rH |  j  d ƒ } | d k	 rH t d ƒ ‚ qH n  |  d } |  j  d ƒ } t j |  d ƒ } t | | | d d	 ƒ\ } } } t j | | | j ƒ } t j	 |  d
 d ƒ } t j
 t ƒ  | d | d | ƒ}	 t j d | d | d | d |	 ƒ }
 |
 S(   s½   Create a DeviceNDArray from a cuda-array-interface description.
    The *owner* is the owner of the underlying memory.
    The resulting DeviceNDArray will acquire a reference from it.
    t   versioni   t   masks   Masked arrays are not supportedt   shapet   stridest   typestrt   ordert   Ct   datai    t   sizet   ownert   dtypet   gpu_dataN(   t   gett   Nonet   NotImplementedErrort   npR   t   _prepare_shape_strides_dtypeR   t   memory_size_from_infot   itemsizet   get_devptr_for_active_ctxt   MemoryPointert   current_contextR   t   DeviceNDArray(   t   descR   R   R	   R
   R   R   R   t   devptrR   t   da(    (    s-   lib/python2.7/site-packages/numba/cuda/api.pyt   from_cuda_array_interface   s"    
c         C@ s2   t  |  ƒ s t d ƒ ‚ n t |  j d |  ƒSd S(   sõ   Create a DeviceNDArray from any object that implements
    the cuda-array-interface.

    A view of the underlying GPU buffer is created.  No copying of the data
    is done.  The resulting DeviceNDArray will acquire a reference from `obj`.
    s1   *obj* doesn't implement the cuda array interface.R   N(   t   is_cuda_arrayt	   TypeErrorR"   t   __cuda_array_interface__(   t   obj(    (    s-   lib/python2.7/site-packages/numba/cuda/api.pyt   as_cuda_array8   s    c         C@ s   t  |  d ƒ S(   sw   Test if the object has defined the `__cuda_array_interface__`.

    Does not verify the validity of the interface.
    R%   (   t   hasattr(   R&   (    (    s-   lib/python2.7/site-packages/numba/cuda/api.pyR#   F   s    c         C@ sQ   | d k r1 t j |  d | d | ƒ\ } } | S| rM | j |  d | ƒn  | S(   sË  to_device(obj, stream=0, copy=True, to=None)

    Allocate and transfer a numpy ndarray or structured scalar to the device.

    To copy host->device a numpy array::

        ary = np.arange(10)
        d_ary = cuda.to_device(ary)

    To enqueue the transfer to a stream::

        stream = cuda.stream()
        d_ary = cuda.to_device(ary, stream=stream)

    The resulting ``d_ary`` is a ``DeviceNDArray``.

    To copy device->host::

        hary = d_ary.copy_to_host()

    To copy device->host to an existing array::

        ary = np.empty(shape=d_ary.shape, dtype=d_ary.dtype)
        d_ary.copy_to_host(ary)

    To enqueue the transfer to a stream::

        hary = d_ary.copy_to_host(stream=stream)
    t   streamt   copyN(   R   R   t   auto_devicet   copy_to_device(   R&   R)   R*   t   tot   new(    (    s-   lib/python2.7/site-packages/numba/cuda/api.pyt	   to_deviceN   s    !R   c      	   C@ s@   t  |  | | | ƒ \ }  } } t j d |  d | d | d | ƒ S(   s’   device_array(shape, dtype=np.float, strides=None, order='C', stream=0)

    Allocate an empty device ndarray. Similar to :meth:`numpy.empty`.
    R
   R   R   R)   (   R   R   R   (   R
   R   R   R   R)   (    (    s-   lib/python2.7/site-packages/numba/cuda/api.pyt   device_arrayu   s    c         C@ sp   t  |  | | | ƒ \ }  } } t j |  | | j ƒ } t ƒ  j | ƒ } t j d |  d | d | d | d | ƒ S(   s¢   pinned_array(shape, dtype=np.float, strides=None, order='C')

    Allocate a np.ndarray with a buffer that is pinned (pagelocked).
    Similar to np.empty().
    R
   R   R   R   t   buffer(   R   R   R   R   R   t   memhostallocR   t   ndarray(   R
   R   R   R   t   bytesizeR1   (    (    s-   lib/python2.7/site-packages/numba/cuda/api.pyt   pinned_array   s    !c         C@ sª   t  |  | | | ƒ \ }  } } t j |  | | j ƒ } t ƒ  j | d t ƒ} t j d |  d | d | d | d | ƒ }	 t j j	 |	 d t
 j ƒ}
 |
 j | d | ƒ|
 S(	   s&  mapped_array(shape, dtype=np.float, strides=None, order='C', stream=0, portable=False, wc=False)

    Allocate a mapped ndarray with a buffer that is pinned and mapped on
    to the device. Similar to np.empty()

    :param portable: a boolean flag to allow the allocated device memory to be
              usable in multiple devices.
    :param wc: a boolean flag to enable writecombined allocation which is faster
        to write by the host and to read by the device, but slower to
        write by the host and slower to write by the device.
    t   mappedR
   R   R   R   R1   t   typeR)   (   R   R   R   R   R   R2   t   TrueR   R3   t   viewR   t   MappedNDArrayt   device_setup(   R
   R   R   R   R)   t   portablet   wcR4   R1   t   nparyt
   mappedview(    (    s-   lib/python2.7/site-packages/numba/cuda/api.pyt   mapped_array‘   s    !	c         c@ sƒ   t  j | ƒ } t  j | ƒ | j } t j j |  Œ  }  t j d |  | d | ƒ} | j	 t
 ƒ  d | d | d | ƒV| j ƒ  d S(   s¨  
    A context manager that opens a IPC *handle* (*CUipcMemHandle*) that is
    represented as a sequence of bytes (e.g. *bytes*, tuple of int)
    and represent it as an array of the given *shape*, *strides* and *dtype*.
    The *strides* can be omitted.  In that case, it is assumed to be a 1D
    C contiguous array.

    Yields a device array.

    The IPC handle is closed automatically when context manager exits.
    t   offsetR
   R   R   N(   R   R   t   prodR   R   t   drvapit   cu_ipc_mem_handlet	   IpcHandleR   t
   open_arrayR   t   close(   t   handleR
   R   R   RA   R   t	   ipchandle(    (    s-   lib/python2.7/site-packages/numba/cuda/api.pyt   open_ipc_arrayª   s    c           C@ s   t  ƒ  j ƒ  S(   s    Synchronize the current context.(   R   t   synchronize(    (    (    s-   lib/python2.7/site-packages/numba/cuda/api.pyRK   Ä   s    c         C@ s‹   t  j | ƒ } t |  t t f ƒ r0 |  f }  n  t | t t f ƒ rQ | f } n- |  d k rf d }  n  | p{ t |  | | ƒ } |  | | f S(   Ni   (    (   i   (   R   R   t
   isinstancet   intt   longt   _fill_stride_by_order(   R
   R   R   R   (    (    s-   lib/python2.7/site-packages/numba/cuda/api.pyR   É   s    	c         C@ sÞ   t  |  ƒ } d g | } | d k rt | j | d <xŸ t t | d ƒ ƒ D]$ } | | d |  | d | | <qI Wn` | d k rÈ | j | d <xD t d | ƒ D]$ } | | d |  | d | | <q Wn t d ƒ ‚ t | ƒ S(   Ni    R   iÿÿÿÿi   t   Fs   must be either C/F order(   t   lenR   t   reversedt   ranget
   ValueErrort   tuple(   R
   R   R   t   ndR   t   d(    (    s-   lib/python2.7/site-packages/numba/cuda/api.pyRO   Ö   s    %%c      	   C@ s(   t  d |  j d |  j d |  j d | ƒ S(   s=   Call cuda.devicearray() with information from the array.
    R
   R   R   R)   (   R0   R
   R   R   (   t   aryR)   (    (    s-   lib/python2.7/site-packages/numba/cuda/api.pyt   device_array_likeæ   s    c           C@ s   t  ƒ  j ƒ  S(   sW   stream()

    Create a CUDA stream that represents a command queue for the device.
    (   R   t   create_stream(    (    (    s-   lib/python2.7/site-packages/numba/cuda/api.pyR)   í   s    c          g@ s]   g  } xK |  D]C } t  ƒ  j | t j | ƒ t j | ƒ d t ƒ} | j | ƒ q Wd Vd S(   sI   A context manager for temporary pinning a sequence of host ndarrays.
    R6   N(   R   t   mempinR   t   host_pointert   host_memory_sizet   Falset   append(   t   arylistt   pmlistRX   t   pm(    (    s-   lib/python2.7/site-packages/numba/cuda/api.pyt   pinnedö   s    	c          o@ sý   | s d | k s t  d ƒ ‚ | j d d ƒ } g  } g  } xs |  D]k } t ƒ  j | t j | ƒ t j | ƒ d t ƒ} | j | ƒ t	 j
 | d | d | ƒ} | j | ƒ qD Wz' t | ƒ d k rÔ | d Vn | VWd x | D] } | j ƒ  qä WXd S(   sK   A context manager for temporarily mapping a sequence of host ndarrays.
    R)   s    Only accept 'stream' as keyword.i    R6   R   i   N(   t   AssertionErrorR   R   R[   R   R\   R]   R8   R_   R   t   from_array_likeRQ   t   free(   R`   t   kwsR)   Ra   t
   devarylistRX   Rb   t   devary(    (    s-   lib/python2.7/site-packages/numba/cuda/api.pyR6     s"    		c         C@ s   t  ƒ  j d |  ƒ } | S(   ss   
    Create a CUDA event. Timing data is only recorded by the event if it is
    created with ``timing=True``.
    t   timing(   R   t   create_event(   Rj   t   evt(    (    s-   lib/python2.7/site-packages/numba/cuda/api.pyt   event!  s    c         C@ s   t  j |  ƒ } | j S(   s’   
    Make the context associated with device *device_id* the current context.

    Returns a Device instance.

    Raises exception on error.
    (   R   t   get_contextt   device(   t	   device_idt   context(    (    s-   lib/python2.7/site-packages/numba/cuda/api.pyt   select_device-  s    c           C@ s
   t  ƒ  j S(   s5   Get current device associated with the current thread(   R   Ro   (    (    (    s-   lib/python2.7/site-packages/numba/cuda/api.pyt   get_current_device9  s    c           C@ s   t  j S(   s%   Return a list of all detected devices(   R   t   gpus(    (    (    s-   lib/python2.7/site-packages/numba/cuda/api.pyt   list_devices>  s    c           C@ s   t  j ƒ  d S(   s‡   
    Explicitly clears all contexts in the current thread, and destroys all
    contexts if the current thread is the main thread.
    N(   R   t   reset(    (    (    s-   lib/python2.7/site-packages/numba/cuda/api.pyRG   C  s    c         C@ s   t  j |  d | d | ƒS(   NR)   R*   (   R   R+   (   RX   R)   R*   (    (    s-   lib/python2.7/site-packages/numba/cuda/api.pyt   _auto_deviceK  s    c          C@ s!  t  ƒ  }  t d t |  ƒ ƒ d } xÍ |  D]Å } g  } | j } | d d | f g 7} | d | j f g 7} | d | j f g 7} | d k  r— d } n d	 } | d
 7} t d | j | j | f ƒ x( | D]  \ } } t d | | f ƒ qË Wq* Wt d ƒ t d | t |  ƒ f ƒ | d k S(   s¥   
    Detect supported CUDA hardware and print a summary of the detected hardware.

    Returns a boolean indicating whether any supported devices were detected.
    s   Found %d CUDA devicesi    s   compute capabilitys   %d.%ds   pci device ids
   pci bus idi   s   [NOT SUPPORTED: CC < 2.0]s   [SUPPORTED]i   s   id %d    %20s %40ss   %40s: %ss   Summary:s   	%d/%d devices are supported(   i   i    (   Ru   t   printRQ   t   compute_capabilityt   PCI_DEVICE_IDt
   PCI_BUS_IDt   idt   name(   t   devlistt   supported_countt   devt   attrst   cct   supportt   keyt   val(    (    s-   lib/python2.7/site-packages/numba/cuda/api.pyt   detectO  s&    			

c          c@ s(   t  ƒ  j }  |  j ƒ  
 d VWd QXd S(   sV  
    Temporarily disable memory deallocation.
    Use this to prevent resource deallocation breaking asynchronous execution.

    For example::

        with defer_cleanup():
            # all cleanup is deferred in here
            do_speed_critical_code()
        # cleanup can occur here

    Note: this context manager can be nested.
    N(   R   t   deallocationst   disable(   t   deallocs(    (    s-   lib/python2.7/site-packages/numba/cuda/api.pyt   defer_cleanupm  s    (6   t   __doc__t
   __future__R    R   t
   contextlibt   numpyR   t   cudadrvR   R   R   t   argsR   R   R   RN   t	   NameErrorRM   t   require_contextRn   R   Rt   R   R"   R'   R#   R8   R/   t   floatR0   R5   R^   R@   t   contextmanagerRJ   RK   R   RO   RY   R)   Rc   R6   Rm   t   event_elapsed_timeRr   Rs   Ru   RG   Rw   R†   RŠ   t	   profilingt   profile_startt   profile_stop(    (    (    s-   lib/python2.7/site-packages/numba/cuda/api.pyt   <module>   sb   
					&										