ó
\K]c        
   @` só  d  d l  m Z m Z m Z d  d l m Z d  d l Z d  d l m Z d  d l j	 j
 Z d  d l j Z d  d l m Z d  d l m Z d  d l m Z d  d l m Z d  d	 l m Z d
 d l m Z d
 d l m Z m Z e ƒ  Z e j Z e d e j ƒ d „  ƒ Z e d e j ƒ d „  ƒ Z  e d e j ƒ d „  ƒ Z! e d e j ƒ d „  ƒ Z" e d e j ƒ d „  ƒ Z# e d e j ƒ d „  ƒ Z$ d „  Z% x- e j& j' ƒ  D] Z( e e( ƒ e% e( ƒ ƒ q¬We d e j) ƒ d „  ƒ Z* d  a+ d „  Z, e d e j e j- ƒ d „  ƒ Z. e d e j/ e j- ƒ d „  ƒ Z0 e d  e j e j- ƒ d! „  ƒ Z1 e d  e j/ e j- ƒ d" „  ƒ Z2 e e j3 ƒ d# „  ƒ Z4 e e j5 e j6 ƒ d$ „  ƒ Z7 e e j8 e j6 ƒ d% „  ƒ Z9 e e j: e j6 ƒ d& „  ƒ Z; e e j< ƒ d' „  ƒ Z= e e j> ƒ d( „  ƒ Z? e e j@ ƒ d) „  ƒ ZA e e jB e j6 ƒ d* „  ƒ ZC e e jD e j6 e j6 e j6 e j6 e j6 ƒ e e jD e j6 e j6 e jE e j6 e j6 ƒ e e jD e j6 e j6 e jF e j6 e j6 ƒ e e jD e j6 e j6 e jG e j6 e j6 ƒ d+ „  ƒ ƒ ƒ ƒ ZH e e jI e j6 e j6 e jJ ƒ d, „  ƒ ZK e e jL e j6 e j6 ƒ e e jL e j6 e jE ƒ e e jL e j6 e jF ƒ e e jL e j6 e jG ƒ d- „  ƒ ƒ ƒ ƒ ZM e e jN e j6 e j6 ƒ e e jN e j6 e jE ƒ e e jN e j6 e jF ƒ e e jN e j6 e jG ƒ d. „  ƒ ƒ ƒ ƒ ZO e e jP e j- ƒ d/ „  ƒ ZQ e e jR e j- e j- e j- ƒ d0 „  ƒ ZS e e jT e jU ƒ d1 „  ƒ ZV e e jT e jW ƒ d2 „  ƒ ZX e e jY e j- ƒ d3 „  ƒ ZZ e e j[ e j- ƒ d4 „  ƒ Z\ e e j] e j- e j- e j- ƒ d5 „  ƒ Z^ e e_ e jF e jF ƒ d6 „  ƒ Z` e e_ e jG e jF ƒ e e_ e jF e jG ƒ e e_ e jG e jG ƒ d7 „  ƒ ƒ ƒ Za e eb e jF e jF ƒ d8 „  ƒ Zc e eb e jG e jF ƒ e eb e jF e jG ƒ e eb e jG e jG ƒ d9 „  ƒ ƒ ƒ Zd e ee e jF ƒ e ee e jG ƒ d: „  ƒ ƒ Zf d; „  Zg d< „  Zh e e ji jj e j) e j e j- ƒ e e ji jj e j) e j/ e j- ƒ e e ji jj e j) e jk e j- ƒ eh d= „  ƒ ƒ ƒ ƒ Zl e e ji j_ e j) e j e j- ƒ e e ji j_ e j) e jk e j- ƒ e e ji j_ e j) e j/ e j- ƒ eh d> „  ƒ ƒ ƒ ƒ Zm e e ji jb e j) e j e j- ƒ e e ji jb e j) e jk e j- ƒ e e ji jb e j) e j/ e j- ƒ eh d? „  ƒ ƒ ƒ ƒ Zn e e ji jo e j) e j- e j- ƒ d@ „  ƒ Zp dA „  Zq er dB „ Zs dC dD „ Zt d S(E   i    (   t   print_functiont   absolute_importt   division(   t   reduceN(   t   Type(   t   Registry(   t   cgutils(   t   six(   t   types(   t   IS_PY3i   (   t   nvvm(   t	   nvvmutilst   stubss   ptx.grid.1dc         C` s+   t  | ƒ d k s t ‚ t j | d d ƒS(   Ni   t   dim(   t   lent   AssertionErrorR   t   get_global_id(   t   contextt   buildert   sigt   args(    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt
   ptx_grid1d   s    s   ptx.grid.2dc         C` sI   t  | ƒ d k s t ‚ t j | d d ƒ\ } } t j | | | g ƒ S(   Ni   R   i   (   R   R   R   R   R   t
   pack_array(   R   R   R   R   t   r1t   r2(    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt
   ptx_grid2d   s    s   ptx.grid.3dc         C` sO   t  | ƒ d k s t ‚ t j | d d ƒ\ } } } t j | | | | g ƒ S(   Ni   R   i   (   R   R   R   R   R   R   (   R   R   R   R   R   R   t   r3(    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt
   ptx_grid3d#   s    s   ptx.gridsize.1dc         C` sR   t  | ƒ d k s t ‚ t j | d ƒ } t j | d ƒ } | j | | ƒ } | S(   Ni   s   ntid.xs   nctaid.x(   R   R   R   t	   call_sregt   mul(   R   R   R   R   t   ntidxt   nctaidxt   res(    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   ptx_gridsize1d*   s
    s   ptx.gridsize.2dc   
      C` sš   t  | ƒ d k s t ‚ t j | d ƒ } t j | d ƒ } t j | d ƒ } t j | d ƒ } | j | | ƒ } | j | | ƒ }	 t j | | |	 g ƒ S(   Ni   s   ntid.xs   nctaid.xs   ntid.ys   nctaid.y(   R   R   R   R   R   R   R   (
   R   R   R   R   R   R   t   ntidyt   nctaidyR   R   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   ptx_gridsize2d4   s    s   ptx.gridsize.3dc         C` sÓ   t  | ƒ d k s t ‚ t j | d ƒ } t j | d ƒ } t j | d ƒ } t j | d ƒ } t j | d ƒ } t j | d ƒ }	 | j | | ƒ }
 | j | | ƒ } | j | |	 ƒ } t j | |
 | | g ƒ S(   Ni   s   ntid.xs   nctaid.xs   ntid.ys   nctaid.ys   ntid.zs   nctaid.z(   R   R   R   R   R   R   R   (   R   R   R   R   R   R   R"   R#   t   ntidzt   nctaidzR   R   R   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   ptx_gridsize3dB   s    c         ` s   ‡  f d †  } | S(   Nc         ` s   | s t  ‚ t j | ˆ  ƒ S(   N(   R   R   R   (   R   R   R   R   (   t   sreg(    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   ptx_sreg_implW   s    (    (   R(   R)   (    (   R(   s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   ptx_sreg_templateV   s    s   ptx.cmem.arylikec         C` s#  | j  } | \ } | j } g  t j | j d d ƒ ƒ D] } |  j t j | ƒ ^ q7 } t j	 j
 t j d ƒ | ƒ }	 t j }
 | j |	 j d d d |
 ƒ} t j | _ t | _ |	 | _ |  j | j ƒ } |  j | ƒ } d | d j ƒ  | _ t j | t j d ƒ |
 ƒ } | j t j t j d ƒ |
 ƒ ƒ } | j | | g ƒ } |  j  | ƒ |  | ƒ } g  | j! D] } |  j t j" | ƒ ^ ql} g  | j# D] } |  j t j" | ƒ ^ q—} |  j$ | d	 | j | | j% j ƒ d
 t& j' | | ƒ d t& j' | | ƒ d | j( d | j) d d  ƒ| j+ ƒ  S(   Nt   ordert   Ai   t   namet   _cudapy_cmemt	   addrspacei   i   t   datat   shapet   stridest   itemsizet   parentt   meminfo(,   t   modulet   return_typeR   t	   iterbytest   tobytest   get_constantR   t   bytet   lct   Constantt   arrayR   t   intR
   t   ADDRSPACE_CONSTANTt   add_global_variablet   typet   LINKAGE_INTERNALt   linkaget   Truet   global_constantt   initializert   get_data_typet   dtypet   get_abi_sizeoft
   bit_lengtht   alignR   t   insert_addrspace_convt   bitcastt   pointert   callt
   make_arrayR1   t   intpR2   t   populate_arrayR0   R   R   R3   R4   t   Nonet	   _getvalue(   R   R   R   R   t   lmodt   arrt   arytyt   it	   constvalst   constaryR/   t   gvt   lldtypeRL   t   convt   addrspaceptrt   genptrt   aryt   st   kshapet   kstrides(    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   ptx_cmem_arylikee   s:    			:				$++		c         C` s   t  d 7a  d j |  t  ƒ S(   sÍ   Due to bug with NVVM invalid internalizing of shared memory in the
    PTX output.  We can't mark shared memory to be internal. We have to
    ensure unique name is generated for shared memory symbol.
    i   s   {0}_{1}(   t   _unique_smem_idt   format(   R-   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   _get_unique_smem_id”   s    
s   ptx.smem.allocc         C` sC   | \ } } t  |  | d | f d | d t d ƒ d t j d t ƒS(   NR1   RI   t   symbol_namet   _cudapy_smemR/   t   can_dynsized(   t   _generic_arrayRh   R
   t   ADDRSPACE_SHAREDRE   (   R   R   R   R   t   lengthRI   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   ptx_smem_alloc_intpž   s
    	c         C` s@   | \ } } t  |  | d | d | d t d ƒ d t j d t ƒS(   NR1   RI   Ri   Rj   R/   Rk   (   Rl   Rh   R
   Rm   RE   (   R   R   R   R   R1   RI   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   ptx_smem_alloc_array§   s
    	s   ptx.lmem.allocc         C` s=   | \ } } t  |  | d | f d | d d d t j d t ƒS(   NR1   RI   Ri   t   _cudapy_lmemR/   Rk   (   Rl   R
   t   ADDRSPACE_LOCALt   False(   R   R   R   R   Rn   RI   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   ptx_lmem_alloc_intp°   s
    	c         C` s:   | \ } } t  |  | d | d | d d d t j d t ƒS(   NR1   RI   Ri   Rq   R/   Rk   (   Rl   R
   Rr   Rs   (   R   R   R   R   R1   RI   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   ptx_lmem_alloc_array¹   s
    	c         C` sc   | s t  ‚ d } | j } t j t j ƒ  d ƒ } | j | d | ƒ} | j | d ƒ |  j ƒ  S(   Ns   llvm.nvvm.barrier0R-   (    (    (   R   R6   R   t   functiont   voidt   get_or_insert_functionRP   t   get_dummy_value(   R   R   R   R   t   fnameRV   t   fntyt   sync(    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   ptx_syncthreadsÂ   s    	c         C` s[   d } | j  } t j t j d ƒ t j d ƒ f ƒ } | j | d | ƒ} | j | | ƒ S(   Ns   llvm.nvvm.barrier0.popci    R-   (   R6   R   Rv   R?   Rx   RP   (   R   R   R   R   Rz   RV   R{   R|   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   ptx_syncthreads_countÍ   s
    	'c         C` s[   d } | j  } t j t j d ƒ t j d ƒ f ƒ } | j | d | ƒ} | j | | ƒ S(   Ns   llvm.nvvm.barrier0.andi    R-   (   R6   R   Rv   R?   Rx   RP   (   R   R   R   R   Rz   RV   R{   R|   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   ptx_syncthreads_andÖ   s
    	'c         C` s[   d } | j  } t j t j d ƒ t j d ƒ f ƒ } | j | d | ƒ} | j | | ƒ S(   Ns   llvm.nvvm.barrier0.ori    R-   (   R6   R   Rv   R?   Rx   RP   (   R   R   R   R   Rz   RV   R{   R|   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   ptx_syncthreads_orß   s
    	'c         C` sc   | s t  ‚ d } | j } t j t j ƒ  d ƒ } | j | d | ƒ} | j | d ƒ |  j ƒ  S(   Ns   llvm.nvvm.membar.ctaR-   (    (    (   R   R6   R   Rv   Rw   Rx   RP   Ry   (   R   R   R   R   Rz   RV   R{   R|   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   ptx_threadfence_blockè   s    	c         C` sc   | s t  ‚ d } | j } t j t j ƒ  d ƒ } | j | d | ƒ} | j | d ƒ |  j ƒ  S(   Ns   llvm.nvvm.membar.sysR-   (    (    (   R   R6   R   Rv   Rw   Rx   RP   Ry   (   R   R   R   R   Rz   RV   R{   R|   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   ptx_threadfence_systemó   s    	c         C` sc   | s t  ‚ d } | j } t j t j ƒ  d ƒ } | j | d | ƒ} | j | d ƒ |  j ƒ  S(   Ns   llvm.nvvm.membar.glR-   (    (    (   R   R6   R   Rv   Rw   Rx   RP   Ry   (   R   R   R   R   Rz   RV   R{   R|   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   ptx_threadfence_deviceþ   s    	c         C` sb   d } | j  } t j t j ƒ  t j d ƒ f ƒ } | j | d | ƒ} | j | | ƒ |  j ƒ  S(   Ns   llvm.nvvm.bar.warp.synci    R-   (   R6   R   Rv   Rw   R?   Rx   RP   Ry   (   R   R   R   R   Rz   RV   R{   R|   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   ptx_warp_sync	  s    	$c         C` sü  | \ } } } } } | j  d }	 |	 t j k rR | j | t j |	 j ƒ ƒ } n  d }
 | j } t j t j	 t j d ƒ t j d ƒ f ƒ t j d ƒ t j d ƒ t j d ƒ t j d ƒ t j d ƒ f ƒ } | j
 | d |
 ƒ} |	 j d k r~| j | | | | | | f ƒ } |	 t j k rø| j | d ƒ } | j | d ƒ } | j | t j ƒ  ƒ } t j | | | f ƒ } qønz| j | t j d ƒ ƒ } | j | |  j t j d ƒ ƒ } | j | t j d ƒ ƒ } | j | | | | | | f ƒ } | j | | | | | | f ƒ } | j | d ƒ } | j | d ƒ } | j | d ƒ } | j | t j d ƒ ƒ } | j | t j d ƒ ƒ } | j | |  j t j d ƒ ƒ } | j | | ƒ } |	 t j k rà| j | t j ƒ  ƒ } n  t j | | | f ƒ } | S(   s‹  
    The NVVM intrinsic for shfl only supports i32, but the cuda intrinsic function supports
    both 32 and 64 bit ints and floats, so for feature parity, i64, f32, and f64 are implemented.
    Floats by way of bitcasting the float to an int, then shuffling, then bitcasting back.
    And 64-bit values by packing them into 2 32bit values, shuffling thoose, and then packing back together.
    i   s   llvm.nvvm.shfl.sync.i32i    i   R-   i    i@   (   R   R   t   real_domainRN   R   R?   t   bitwidthR6   Rv   t   structRx   RP   t   float32t   extract_valuet   floatR   t   make_anonymous_structt   trunct   lshrR:   t   i8t   zextt   shlt   or_t   float64t   double(   R   R   R   R   t   maskt   modet   valuet   indext   clampt
   value_typeRz   RV   R{   t   funct   rett   rvt   predt   fvt   value1t
   value_lshrt   value2t   ret1t   ret2t   rv1t   rv2t   rv1_64t   rv2_64t   rv_shl(    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   ptx_shfl_sync_i32  sB    !	$E!!!!!c         C` s‹   d } | j  } t j t j t j d ƒ t j d ƒ f ƒ t j d ƒ t j d ƒ t j d ƒ f ƒ } | j | d | ƒ} | j | | ƒ S(   Ns   llvm.nvvm.vote.synci    i   R-   (   R6   R   Rv   R‡   R?   Rx   RP   (   R   R   R   R   Rz   RV   R{   Rš   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   ptx_vote_syncC  s    	*-c         C` sÆ   | \ } } | j  d j } | j  d t j k rP | j | t j | ƒ ƒ } n  d j | ƒ } | j } t j	 t j d ƒ t j d ƒ t j | ƒ f ƒ }	 | j
 |	 d | ƒ}
 | j |
 | | f ƒ S(   Ni   s   llvm.nvvm.match.any.sync.i{}i    R-   (   R   R†   R   R…   RN   R   R?   Rg   R6   Rv   Rx   RP   (   R   R   R   R   R”   R–   t   widthRz   RV   R{   Rš   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   ptx_match_any_syncM  s    	3c         C` sÞ   | \ } } | j  d j } | j  d t j k rP | j | t j | ƒ ƒ } n  d j | ƒ } | j } t j	 t j
 t j d ƒ t j d ƒ f ƒ t j d ƒ t j | ƒ f ƒ }	 | j |	 d | ƒ}
 | j |
 | | f ƒ S(   Ni   s   llvm.nvvm.match.all.sync.i{}i    R-   (   R   R†   R   R…   RN   R   R?   Rg   R6   Rv   R‡   Rx   RP   (   R   R   R   R   R”   R–   R«   Rz   RV   R{   Rš   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   ptx_match_all_sync]  s    	*!c         C` s   | j  | d ƒ S(   Ni    (   t   ctpop(   R   R   R   R   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   ptx_popcn  s    c         C` s   | j  | Œ  S(   N(   t   fma(   R   R   R   R   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   ptx_fmas  s    c         C` sO   | j  j t j j t j j d ƒ t j j d ƒ f ƒ d ƒ } | j | | ƒ S(   Ni    t	   __nv_brev(   R6   Rx   R<   R   Rv   R?   RP   (   R   R   R   R   t   fn(    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   ptx_brev_u4x  s    	-	c         C` sO   | j  j t j j t j j d ƒ t j j d ƒ f ƒ d ƒ } | j | | ƒ S(   Ni@   t   __nv_brevll(   R6   Rx   R<   R   Rv   R?   RP   (   R   R   R   R   R³   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   ptx_brev_u8ƒ  s    	-	c         C` s#   | j  | d |  j t j d ƒ ƒ S(   Ni    (   t   ctlzR:   R   t   boolean(   R   R   R   R   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   ptx_clzŽ  s    c         C` s#   | j  | d |  j t j d ƒ ƒ S(   Ni    (   t   cttzR:   R   R¸   (   R   R   R   R   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   ptx_ffs•  s    c         C` s"   | \ } } } | j  | | | ƒ S(   N(   t   select(   R   R   R   R   t   testt   at   b(    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   ptx_selpœ  s    c         C` sU   | j  j t j j t j j ƒ  t j j ƒ  t j j ƒ  f ƒ d ƒ } | j | | ƒ S(   Nt
   __nv_fmaxf(   R6   Rx   R<   R   Rv   RŠ   RP   (   R   R   R   R   R³   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt
   ptx_max_f4¢  s    			c         C` s›   | j  j t j j t j j ƒ  t j j ƒ  t j j ƒ  f ƒ d ƒ } | j | |  j | | d | j d t	 j ƒ |  j | | d | j d t	 j ƒ g ƒ S(   Nt	   __nv_fmaxi    i   (
   R6   Rx   R<   R   Rv   R“   RP   t   castR   R   (   R   R   R   R   R³   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt
   ptx_max_f8¬  s    				#c         C` sU   | j  j t j j t j j ƒ  t j j ƒ  t j j ƒ  f ƒ d ƒ } | j | | ƒ S(   Nt
   __nv_fminf(   R6   Rx   R<   R   Rv   RŠ   RP   (   R   R   R   R   R³   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt
   ptx_min_f4¼  s    			c         C` s›   | j  j t j j t j j ƒ  t j j ƒ  t j j ƒ  f ƒ d ƒ } | j | |  j | | d | j d t	 j ƒ |  j | | d | j d t	 j ƒ g ƒ S(   Nt	   __nv_fmini    i   (
   R6   Rx   R<   R   Rv   R“   RP   RÄ   R   R   (   R   R   R   R   R³   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt
   ptx_min_f8Æ  s    				#c         C` s„   t  s t d ƒ ‚ n  | j j t j j t j j d ƒ t j j ƒ  f ƒ d ƒ } | j	 | |  j
 | | d | j d t j ƒ g ƒ S(   Ns#   round returns a float on Python 2.xi@   t   __nv_llrinti    (   R	   t   NotImplementedErrorR6   Rx   R<   R   Rv   R?   R“   RP   RÄ   R   R   (   R   R   R   R   R³   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt	   ptx_roundÖ  s    				c         C` s˜   | t  j k r3 t  j d | d d ƒ } | g } n t j | | d t | ƒ ƒ} g  t | | ƒ D]' \ } } |  j | | | t  j ƒ ^ qa } | | f S(   s4   
    Convert integer indices into tuple of intp
    RI   t   counti   (	   R   t   integer_domaint   UniTupleR   t   unpack_tupleR   t   zipRÄ   RR   (   R   R   t   indtyt   indst   indicest   tRY   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   _normalize_indicesæ  s    :c         ` s   ‡  f d †  } | S(   Nc         ` sê   | j  \ } } } | \ } } }	 | j }
 t |  | | | ƒ \ } } |
 | k rj t d |
 | f ƒ ‚ n  | j t | ƒ k r¡ t d | j t | ƒ f ƒ ‚ n  |  j | ƒ |  | | ƒ } t j | | | | ƒ } ˆ  |  | |
 | |	 ƒ S(   Ns   expect %s but got %ss#   indexing %d-D array with %d-D index(	   R   RI   RÖ   t	   TypeErrort   ndimR   RQ   R   t   get_item_pointer(   R   R   R   R   RX   RÒ   t   valtyRa   RÓ   t   valRI   RÔ   t   laryt   ptr(   t   dispatch_fn(    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   impõ  s    	(    (   RÞ   Rß   (    (   RÞ   s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   _atomic_dispatcherô  s    c         C` sˆ   | t  j k r7 | j } | j t j | ƒ | | f ƒ S| t  j k rn | j } | j t j | ƒ | | f ƒ S| j d | | d ƒ Sd  S(   Nt   addt	   monotonic(	   R   Rˆ   R6   RP   R   t   declare_atomic_add_float32R’   t   declare_atomic_add_float64t
   atomic_rmw(   R   R   RI   RÝ   RÛ   RV   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   ptx_atomic_add_tuple  s    		c         C` sÛ   | j  } | t j k r7 | j t j | ƒ | | f ƒ S| t j k re | j t j | ƒ | | f ƒ S| t j t j	 f k r– | j
 d | | d d ƒS| t j t j f k rÇ | j
 d | | d d ƒSt d | ƒ ‚ d  S(   Nt   maxt   orderingRâ   t   umaxs&   Unimplemented atomic max with %s array(   R6   R   R’   RP   R   t   declare_atomic_max_float64Rˆ   t   declare_atomic_max_float32t   int32t   int64Rå   t   uint32t   uint64R×   (   R   R   RI   RÝ   RÛ   RV   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   ptx_atomic_max  s    	c         C` sÛ   | j  } | t j k r7 | j t j | ƒ | | f ƒ S| t j k re | j t j | ƒ | | f ƒ S| t j t j	 f k r– | j
 d | | d d ƒS| t j t j f k rÇ | j
 d | | d d ƒSt d | ƒ ‚ d  S(   Nt   minRè   Râ   t   umins&   Unimplemented atomic min with %s array(   R6   R   R’   RP   R   t   declare_atomic_min_float64Rˆ   t   declare_atomic_min_float32Rì   Rí   Rå   Rî   Rï   R×   (   R   R   RI   RÝ   RÛ   RV   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   ptx_atomic_min,  s    	c         C` sÆ   | j  \ } } } | \ } } }	 | j }
 |  j | ƒ |  | | ƒ } |  j t j d ƒ } t j | | | | f ƒ } | j t j k r² | j	 } | j
 t j | ƒ | | |	 f ƒ St d |
 ƒ ‚ d  S(   Ni    s3   Unimplemented atomic compare_and_swap with %s array(   R   RI   RQ   R:   R   RR   R   RÙ   Rì   R6   RP   R   t   declare_atomic_cas_int32R×   (   R   R   R   R   RX   t   oldtyRÚ   Ra   t   oldRÛ   RI   RÜ   t   zeroRÝ   RV   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   ptx_atomic_cas_tuple>  s    		"c         C` s   t  j t j |  j ƒ S(   N(   t   llt   create_target_dataR
   t   data_layoutt   address_size(   R   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   _get_target_dataQ  s    c         C` sa  t  t j | ƒ } |  j | ƒ } t j | | ƒ }	 | t j k r] t j	 | |	 d | ƒ}
 nî | j
 } | j |	 | | ƒ } |  j | ƒ | _ | d k r½ | r® t j | _ qÒ t d ƒ ‚ n t j j |	 ƒ | _ | t j k rô t d | ƒ ‚ n  t j | t j d ƒ | ƒ } | j t j t j d ƒ | ƒ ƒ } | j | | g ƒ }
 t |  | |
 | | ƒ S(   NR-   i    s   array length <= 0s   unsupported type: %si   (   R   t   operatorR   RH   R   R>   R
   Rr   R   t   alloca_onceR6   RA   RJ   RL   R<   t   LINKAGE_EXTERNALRD   t
   ValueErrorR=   t   undefRG   R   t   number_domainR×   R   RM   R?   RN   RO   RP   t   _make_array(   R   R   R1   RI   Ri   R/   Rk   t	   elemcountR]   t   larytyt   dataptrRV   t   gvmemR^   R_   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyRl   U  s&    	$t   Cc         C` sŽ  t  | ƒ } t j d | d | d d ƒ } |  j | ƒ |  | ƒ } t |  ƒ }	 |  j | ƒ }
 |
 j |	 ƒ } | g } x9 t t | d ƒ ƒ D]! \ } } | j	 | | d ƒ qŒ Wg  t | ƒ D] } | ^ q¾ } g  | D] } |  j
 t j | ƒ ^ q× } g  | D] } |  j
 t j | ƒ ^ qÿ } |  j | d | j | | j j ƒ d t j | | ƒ d	 t j | | ƒ d
 |  j
 t j | ƒ d d  ƒ| j ƒ  S(   NRI   RØ   t   layoutR  i   iÿÿÿÿR0   R1   R2   R3   R5   (   R   R   t   ArrayRQ   Rÿ   RH   t   get_abi_sizet	   enumeratet   reversedt   appendR:   RR   RS   RN   R0   RB   R   R   RT   RU   (   R   R   R	  RI   R1   R  RØ   RX   Ra   t
   targetdataR]   R3   t   rstridesRY   t   lastsizeRb   R2   Rc   Rd   (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyR    s&    	#(((u   t
   __future__R    R   R   t	   functoolsR   R   t   llvmlite.llvmpy.coreR   t   llvmpyt   coreR<   t   llvmlite.bindingt   bindingRû   t   numba.targets.imputilsR   t   numbaR   R   R   t   numba.utilsR	   t   cudadrvR
   t    R   R   t   registryt   lowerRR   R   R   R   R!   R$   R'   R*   t   SREG_MAPPINGt   keysR(   R  Re   Rf   Rh   t   AnyRo   RÏ   Rp   Rt   Ru   t   syncthreadsR}   t   syncthreads_countt   i4R~   t   syncthreads_andR   t   syncthreads_orR€   t   threadfence_blockR   t   threadfence_systemR‚   t   threadfenceRƒ   t   syncwarpR„   t   shfl_sync_intrinsicRŽ   t   f4t   f8R©   t   vote_sync_intrinsicR¸   Rª   t   match_any_syncR¬   t   match_all_syncR­   t   popcR¯   R°   R±   t   brevt   u4R´   t   u8R¶   t   clzR¹   t   ffsR»   t   selpRÀ   Rç   RÂ   RÅ   Rñ   RÇ   RÉ   t   roundRÌ   RÖ   Rà   t   atomicRá   t   TupleRæ   Rð   Rõ   t   compare_and_swapRú   Rÿ   Rs   Rl   R  (    (    (    s2   lib/python2.7/site-packages/numba/cuda/cudaimpl.pyt   <module>   s¬   		
		,	
!	!	!	!				
***?-*
--**!
'!
'		!!!!!!!!!-	+