σ
\K]c           @@ s  d  d l  m Z m Z d  d l Z d  d l m Z d  d l Z d  d l Z d  d l	 m
 Z
 d  d l m Z m Z d d l m Z d d l m Z m Z m Z m Z d d	 l m Z d d
 l m Z d  d l m Z m Z d d l m Z d  d l	 m Z d  d l m Z d  d l m Z d  d l  m! Z! e! d    Z" e# d  Z$ e# d  Z% d   Z& d e' f d     YZ( d e' f d     YZ) d   Z* d   Z+ d e' f d     YZ, e d d d d  g  Z- d! e' f d"     YZ. d# e, f d$     YZ/ d%   Z0 d&   Z1 d' e, f d(     YZ2 d S()   i    (   t   print_functiont   absolute_importN(   t
   namedtuple(   t   ConcreteTemplate(   t   typest   compileri   (   t   hlc(   t   devicest   drivert   enumst   drvapi(   t   HsaKernelLaunchError(   t   gcn_occupancy(   t   hsat   dgpu_present(   t   devicearray(   t   AbstractTemplate(   t   ctypes_support(   t   config(   t   global_compiler_lockc   
      C@ s’   d d l  m } | j } | j } t j   } | j d  | j d  | j d  t j d | d | d |  d	 | d
 | d | d i   } | j	 }	 |	 j
   | S(   Ni   (   t   HSATargetDesct
   no_compilet   no_cpython_wrappert   nrtt	   typingctxt	   targetctxt   funct   argst   return_typet   flagst   locals(   t
   descriptorR   R   R   R   t   Flagst   sett   unsett   compile_extrat   libraryt   finalize(
   t   pyfuncR   R   t   debugR   R   R   R   t   cresR$   (    (    s1   lib/python2.7/site-packages/numba/roc/compiler.pyt   compile_hsa   s"    				
c         C@ sy   t  |  t j | d | } | j j | j j  } | j j | | j	 j
  } t d | j d | j d | j	 j
  } | S(   NR'   t   llvm_modulet   namet   argtypes(   R)   R   t   voidR$   t   get_functiont   fndesct   llvm_func_namet   target_contextt   prepare_hsa_kernelt	   signatureR   t	   HSAKernelt   moduleR+   (   R&   R   R'   R(   R   t   kernelt   hsakern(    (    s1   lib/python2.7/site-packages/numba/roc/compiler.pyt   compile_kernel3   s    	c         @ s§   t  |  | | d |     j j   j j  }   j j |  t     d t f    f d     Y}   j	 j
  |    j g }   j j
    j |   S(   NR'   t   device_function_templatec           @ s   e  Z  Z   j g Z RS(    (   t   __name__t
   __module__t   keyR3   t   cases(    (   R(   t   devfn(    s1   lib/python2.7/site-packages/numba/roc/compiler.pyR9   C   s   (   R)   R$   R.   R/   R0   R1   t   mark_hsa_devicet   DeviceFunctionR   t   typing_contextt   insert_user_function(   R&   R   R   R'   R   R9   t   libs(    (   R(   R>   s1   lib/python2.7/site-packages/numba/roc/compiler.pyt   compile_device=   s    c         @ sU   d d l  m } t |     d t f   f d     Y} | j } | j   |    S(   s%   Compile a DeviceFunctionTemplate
    i   (   R   R9   c           @ s   e  Z   Z   f d    Z RS(   c         @ s   | s t     j |  S(   N(   t   AssertionErrort   compile(   t   selfR   t   kws(   t   dft(    s1   lib/python2.7/site-packages/numba/roc/compiler.pyt   genericW   s    (   R:   R;   R<   RJ   (    (   RI   (    s1   lib/python2.7/site-packages/numba/roc/compiler.pyR9   T   s   (   R   R   t   DeviceFunctionTemplateR   R   RB   (   R&   R   R9   R   (    (   RI   s1   lib/python2.7/site-packages/numba/roc/compiler.pyt   compile_device_templateM   s    	RK   c           B@ s#   e  Z d  Z e d  Z d   Z RS(   s#   Unmaterialized device function
    c         C@ s   | |  _  | |  _ i  |  _ d  S(   N(   t   py_funcR'   t   _compileinfos(   RG   R&   R'   (    (    s1   lib/python2.7/site-packages/numba/roc/compiler.pyt   __init__c   s    		c         C@ sΚ   | |  j  k rΆ t |  j d | d |  j } | j j | j j  } | j	 j
 |  |  j  } | |  j  | <| j g } | r | j	 j |  | j |  qΓ | j	 j |  | j |  n |  j  | } | j S(   s‘   Compile the function for the given argument types.

        Each signature is compiled once by caching the compiled function inside
        this object.
        R'   N(   RN   R)   RM   t   NoneR'   R$   R.   R/   R0   R1   R?   RB   t   add_user_functionR3   (   RG   R   R(   R   t   first_definitionRC   (    (    s1   lib/python2.7/site-packages/numba/roc/compiler.pyRF   i   s    

(   R:   R;   t   __doc__t   FalseRO   RF   (    (    (    s1   lib/python2.7/site-packages/numba/roc/compiler.pyRK   `   s   R@   c           B@ s   e  Z d    Z RS(   c         C@ s   | |  _  d  S(   N(   R(   (   RG   R(   (    (    s1   lib/python2.7/site-packages/numba/roc/compiler.pyRO      s    (   R:   R;   RO   (    (    (    s1   lib/python2.7/site-packages/numba/roc/compiler.pyR@      s   c         C@ s*   t  |  t t f  s |  g St |   Sd  S(   N(   t
   isinstancet   tuplet   list(   t   val(    (    s1   lib/python2.7/site-packages/numba/roc/compiler.pyt   _ensure_list   s    c         C@ s7   t  |   } x$ t | |  D] } |  j d  q Wd  S(   Ni   (   t   lent   ranget   append(   RX   t   sizet   nt   _(    (    s1   lib/python2.7/site-packages/numba/roc/compiler.pyt   _ensure_size_or_append   s    t   HSAKernelBasec           B@ sG   e  Z d  Z d   Z d   Z d d d  Z d d d  Z d   Z RS(   s.   Define interface for configurable kernels
    c         C@ s   d |  _  d |  _ d  |  _ d  S(   Ni   (   i   (   i   (   t   global_sizet
   local_sizeRP   t   stream(   RG   (    (    s1   lib/python2.7/site-packages/numba/roc/compiler.pyRO      s    		c         C@ s   t  j  |   S(   N(   t   copy(   RG   (    (    s1   lib/python2.7/site-packages/numba/roc/compiler.pyRe      s    c         C@ s   t  |  } | d k	 r\ t  |  } t t |  t |   } t | |  t | |  n  |  j   } t |  | _ | r t |  n d | _ | | _	 | S(   sC   Configure the OpenCL kernel
        local_size can be None
        N(
   RY   RP   t   maxRZ   R`   Re   RV   Rb   Rc   Rd   (   RG   Rb   Rc   Rd   R]   t   clone(    (    s1   lib/python2.7/site-packages/numba/roc/compiler.pyt	   configure’   s    	i@   c         C@ s   |  j  | t | |  d | S(   s6   Simplified configuration for 1D kernel launch
        Rd   (   Rh   t   min(   RG   t   nelemRc   Rd   (    (    s1   lib/python2.7/site-packages/numba/roc/compiler.pyt   forall΅   s    c         C@ s   t  | d  } t  | d  } t t |  t |   } t | |  t | |  g  t | |  D] \ } } | | ^ qe } |  j | | | d  S(   s*  Mimick CUDA python's square-bracket notation for configuration.
        This assumes a the argument to be:
            `griddim, blockdim, stream`
        The blockdim maps directly to local_size.
        The actual global_size is computed by multiplying the local_size to
        griddim.
        i    i   i   (   RY   Rf   RZ   R`   t   zipRh   (   RG   R   t   griddimt   blockdimR]   t   gt   lt   gs(    (    s1   lib/python2.7/site-packages/numba/roc/compiler.pyt   __getitem__Ί   s    ,N(	   R:   R;   RS   RO   Re   RP   Rh   Rk   Rr   (    (    (    s1   lib/python2.7/site-packages/numba/roc/compiler.pyRa      s   		t   _CachedEntryt   symbolt
   executablet   kernarg_regiont   _CachedProgramc           B@ s   e  Z d    Z d   Z RS(   c         C@ s   | |  _  | |  _ i  |  _ d  S(   N(   t   _entry_namet   _binaryt   _cache(   RG   t
   entry_namet   binary(    (    s1   lib/python2.7/site-packages/numba/roc/compiler.pyRO   Ρ   s    		c         C@ s  t  j   } |  j j |  } | d  k rd j |  j  } | j } t |  j	  } t
 j t |  j	  } | j |  } t j   } t j j t
 j |  t |  j	  d  t
 j |   t j |  }	 t j   }
 |
 j | |	  |
 j   |
 j | |  } | j j } x6 | D]. } | j r| j t j  rA| } PqAqqW| d  k	 sWt   t! d | d |
 d |  } | |  j | <n  | | f S(   Ns   {0}Rt   Ru   Rv   ("   R   t   get_contextRz   t   getRP   t   formatRx   t   agentt	   bytearrayRy   t   ctypest   c_byteRZ   t   from_bufferR
   t   hsa_code_object_tR   R   t   hsa_code_object_deserializet	   addressoft   byreft
   CodeObjectt
   Executablet   loadt   freezet
   get_symbolt   regionst   globalst   host_accessiblet   supportsR	   t   HSA_REGION_GLOBAL_FLAG_KERNARGRE   t   _CacheEntry(   RG   t   ctxt   resultRt   R   t   bat   bblobt   bast   code_ptrt   codet   ext   symobjR   t   regRv   (    (    s1   lib/python2.7/site-packages/numba/roc/compiler.pyR~   Χ   s<    		
		(   R:   R;   RO   R~   (    (    (    s1   lib/python2.7/site-packages/numba/roc/compiler.pyRw   Π   s   	R4   c           B@ sD   e  Z d  Z d   Z d   Z d   Z d   Z d   Z d   Z RS(   s   
    A HSA kernel object
    c         C@ s   t  t |   j   | |  _ |  j   \ |  _ |  _ | |  _ t |  |  _	 g  |  _
 t d |  j d |  j  |  _ |  j   d  S(   NR{   R|   (   t   superR4   RO   t   _llvm_modulet   _generateGCNt   assemblyR|   R{   RV   t   argument_typest   _arglocRw   t
   _cacheprogt   _parse_kernel_resource(   RG   R*   R+   R,   (    (    s1   lib/python2.7/site-packages/numba/roc/compiler.pyRO     s    			c         C@ s^   t  j d |  j  } t | j d   |  _ t  j d |  j  } t | j d   |  _ d S(   s9   
        Temporary workaround for register limit
        s"   \bwavefront_sgpr_count\s*=\s*(\d+)i   s!   \bworkitem_vgpr_count\s*=\s*(\d+)N(   t   ret   searchR‘   t   intt   groupt   _wavefront_sgpr_countt   _workitem_vgpr_count(   RG   t   m(    (    s1   lib/python2.7/site-packages/numba/roc/compiler.pyR₯     s    c         C@ ss   t  j |  j  } t j d | d |  j d |  j  } | j ro d } | j d j	 | j
   } t |   n  d  S(   Nt
   group_sizet   vgpr_per_workitemt   sgpr_per_waves2   insufficient resources to launch kernel due to:
{}s   
(   t   npt   prodRc   R   t   get_limiting_factorsR«   Rͺ   t   reasonsR   t   joint   suggestionsR   (   RG   R­   t   limitst   fmtt   msg(    (    s1   lib/python2.7/site-packages/numba/roc/compiler.pyt   _sentry_resource_limit  s    			c         C@ s,   t  j   } | j t |  j   | j   S(   N(   R   t   Modulet	   load_llvmt   strR   t   generateGCN(   RG   t   hlcmod(    (    s1   lib/python2.7/site-packages/numba/roc/compiler.pyR    )  s    c         C@ st   |  j  j   \ } } | j j d k rX t j t j  | j j } | j j |  } n d } | | j | | j f S(   s'   
        Bind kernel to device
        i    N(
   R€   R~   Rt   t   kernarg_segment_sizeR   t   sizeofR   Rv   t   allocateRP   (   RG   R   t   entryt   szt   kernargs(    (    s1   lib/python2.7/site-packages/numba/roc/compiler.pyt   bind.  s    c      	   @ sχ  |  j    |  j   \ } }    g  } g  } x3 t |  j |  D] \ } } t | | | |  qA Wd } xx | D]p }	 t j |	  }
 t |
 |  } | | 7}  j | } t j	 | t j
 t |	    } |	 | d <| |
 7} qq W| j } |  j d  k r
t j   n  d  } |  j d  k	 rGt j d  } | j |  j j    n  | j |  d |  j d |  j d | |  j d  k	 r|  j j |  n  x | D] } |   qW d  k	 rσ|  j d  k rΤ  j   qσ|  j j    f d    n  d  S(   Ni    i   t   workgroup_sizet	   grid_sizet   signalc           @ s     j    S(   N(   t   free(    (   Rv   RΔ   (    s1   lib/python2.7/site-packages/numba/roc/compiler.pyt   <lambda>s  t    (   RΉ   RΕ   Rl   R’   t   _unpack_argumentR   Rΐ   t   _calc_padding_for_alignmentt   valuet   castt   POINTERt   typet   default_queueRd   RP   R   t   implicit_synct   create_signalt   insert_barriert   _get_last_signalt   dispatchRc   Rb   t   _add_signalRΙ   t   _add_callback(   RG   R   R   Rt   t   expanded_valuest   retrt   tyRX   t   baset   avt   alignt   padt   offsetedt   asptrt   qqRΘ   t   wb(    (   Rv   RΔ   s1   lib/python2.7/site-packages/numba/roc/compiler.pyt   __call__<  s@    

!
	(	   R:   R;   RS   RO   R₯   RΉ   R    RΕ   Rε   (    (    (    s1   lib/python2.7/site-packages/numba/roc/compiler.pyR4     s   						c         @ sΘ  t  |  t j  rbt j } t rm t j  t j	    \   } | ra | j
    f d    n    j } n t j  j j  } t j d  } } |  j  }	 |  j j  }
 | j
 |  | j
 |  | j
 |	  | j
 |
  | j
 |  x1 t  j  D]  } | j
 |  j |   qWxt  j  D]  } | j
 |  j |   q;Wnbt  |  t j  rt t d |     } | j
 |  n'|  t j k rΛt j   } | j
 |  nω |  t j k rωt j   } | j
 |  nΛ |  t j k r-t j t    } | j
 |  n |  t j k rq| j
 t j  j   | j
 t j  j    nS |  t j! k r΅| j
 t j  j   | j
 t j  j    n t" |     d S(   s>   
    Convert arguments to ctypes and append to kernelargs
    c           @ s     j    S(   N(   t   copy_to_host(    (   t   devaryRX   (    s1   lib/python2.7/site-packages/numba/roc/compiler.pyRΚ     RΛ   i    s   c_%sN(#   RU   R   t   ArrayR   t	   c_ssize_tR   R   t   auto_deviceR   R}   R\   t   device_ctypes_pointert   c_void_pt   dataR]   t   dtypet   itemsizeR[   t   ndimt   shapet   stridest   Integert   getattrt   float64t   c_doublet   float32t   c_floatt   booleant   c_uint8R¨   t	   complex64t   realt   imagt
   complex128t   NotImplementedError(   Rά   RX   t
   kernelargsRΫ   t   c_intpt   convRν   t   meminfot   parentt   nitemsRο   t   axt   cval(    (   Rη   RX   s1   lib/python2.7/site-packages/numba/roc/compiler.pyRΜ   v  sN    	!c         C@ s,   t  |  |  } | d k r  d S|  | Sd S(   sV   
    Returns byte padding required to move the base pointer into proper alignment
    i    N(   R¨   (   Rί   Rέ   t   rmdr(    (    s1   lib/python2.7/site-packages/numba/roc/compiler.pyRΝ   ―  s    t   AutoJitHSAKernelc           B@ s#   e  Z d    Z d   Z d   Z RS(   c         C@ sE   t  t |   j   | |  _ i  |  _ d d l m } | j |  _ d  S(   Ni   (   R   (   R   R	  RO   RM   t   definitionsR   R   R   (   RG   R   R   (    (    s1   lib/python2.7/site-packages/numba/roc/compiler.pyRO   »  s
    		c         G@ s;   |  j  |   } | j |  j |  j |  j  } | |   d  S(   N(   t
   specializeRh   Rb   Rc   Rd   (   RG   R   R6   t   cfg(    (    s1   lib/python2.7/site-packages/numba/roc/compiler.pyRε   Δ  s    c         G@ so   t  g  | D] } |  j j |  ^ q
  } |  j j |  } | d  k rk t |  j |  } | |  j | <n  | S(   N(   RV   R   t   resolve_argument_typeR
  R~   RP   R8   RM   (   RG   R   t   aR,   R6   (    (    s1   lib/python2.7/site-packages/numba/roc/compiler.pyR  Ι  s    %(   R:   R;   RO   Rε   R  (    (    (    s1   lib/python2.7/site-packages/numba/roc/compiler.pyR	  Ί  s   			(3   t
   __future__R    R   Re   t   collectionsR   R¦   t   numpyR°   t   numba.typing.templatesR   t   numbaR   R   R   t   hsadrvR   R   R	   R
   t   hsadrv.errorR   RΛ   R   t   numba.roc.hsadrv.driverR   R   R   R   R   R   R   t   numba.compiler_lockR   R)   RT   R8   RD   RL   t   objectRK   R@   RY   R`   Ra   R   Rw   R4   RΜ   RΝ   R	  (    (    (    s1   lib/python2.7/site-packages/numba/roc/compiler.pyt   <module>   s@   "
	$		63s	9	