ó
\K]c           @` s^  d  d l  m Z m Z m Z d  d l Z d  d l m Z d  d l m	 Z	 m
 Z
 m Z e	 j d e ƒ d „  ƒ Z d Z e	 j d e ƒ d „  ƒ Z e	 j d e ƒ d	 „  ƒ Z e	 j d e ƒ d
 „  ƒ Z d „  Z e e ƒ Z e	 j d e ƒ d „  ƒ Z d e j f d „  ƒ  YZ d e j f d „  ƒ  YZ d e j f d „  ƒ  YZ e d k rZe j ƒ  n  d S(   i    (   t   print_functiont   absolute_importt   divisionN(   t   unittest_support(   t   roct   intpt   int32t   devicec   	      C` s¤  | j  } d } | d } xƒ | d k rž t j t j ƒ |  | k  r‡ | d |  d d } | d |  d d } | | c | | 7<n  | d 9} | d } q Wt j t j ƒ | | d } t j t j ƒ |  d k rê d | | d <n  d } | } x— | | k  r| d } t j t j ƒ |  | k  r‚| d |  d d } | d |  d d } | | } | | | | <| | c | 7<n  | d 9} qù Wt j t j ƒ | S(   sˆ   Inclusive prefix sum within a single block

    Requires tid should have range [0, data.size) and data.size must be
    power of 2.
    i   i   i    (   t   sizeR   t   barriert   CLK_GLOBAL_MEM_FENCE(	   t   tidt   datat   nt   offsett   dt   ait   bit	   prefixsumt   tmp(    (    s>   lib/python2.7/site-packages/numba/roc/tests/hsapy/test_scan.pyt   device_scan_generic	   s<    	



i@   c         C` sl  t  j ƒ  |  t d @} | d k r? | |  c | |  d 7<n  t  j ƒ  | d k rp | |  c | |  d 7<n  t  j ƒ  | d k r¡ | |  c | |  d 7<n  t  j ƒ  | d k rÒ | |  c | |  d 7<n  t  j ƒ  | d k r| |  c | |  d 7<n  t  j ƒ  | d k r4| |  c | |  d 7<n  t  j ƒ  | rL| |  S| d k rd| |  d Sd Sd S(	   sN   Intra-warp scan

    Note
    ----
    Assume all threads are in lockstep
    i   i   i   i   i   i    i    N(   R   t   wavebarriert	   _WARPSIZE(   R   t   tempt	   inclusivet   lane(    (    s>   lib/python2.7/site-packages/numba/roc/tests/hsapy/test_scan.pyt	   warp_scan<   s.    






c         C` sP  |  t  d @} |  d ?} | | |  <t j t j ƒ t |  | | ƒ } t j t j ƒ | t  d k ru | |  | | <n  t j t j ƒ | d k r¤ t |  | t ƒ n  t j t j ƒ | d k rÕ | | | d 7} n  t j t j ƒ |  | j d k r| r| | d <q| | | d <n  t j t j ƒ | d } t j t j ƒ | | f S(   s„   
    Args
    ----
    tid:
        thread id
    data: scalar
        input for tid
    temp: shared memory for temporary work
    i   i   i    (   R   R   R	   R
   R   t   TrueR   (   R   R   R   R   R   t   warpidt   warp_scan_resR   (    (    s>   lib/python2.7/site-packages/numba/roc/tests/hsapy/test_scan.pyt   device_scand   s.    


c         C` s=   t  j d ƒ } t  j ƒ  | | t } t  j | |  ƒ } | S(   Ni    (   R   t   get_local_idR   R   t
   ds_permute(   t   valt   widthR   t   idxt   res(    (    s>   lib/python2.7/site-packages/numba/roc/tests/hsapy/test_scan.pyt
   shuffle_up™   s
    
c         ` s%   t  j d t ƒ ‡  f d †  ƒ } | S(   NR   c         ` s{  t  j d ƒ } | t d @} t  j ƒ  t |  d ƒ } | d k rU ˆ  |  | ƒ }  n  t  j ƒ  t |  d ƒ } | d k r ˆ  |  | ƒ }  n  t  j ƒ  t |  d ƒ } | d k rÅ ˆ  |  | ƒ }  n  t  j ƒ  t |  d ƒ } | d k rý ˆ  |  | ƒ }  n  t  j ƒ  t |  d ƒ } | d k r5ˆ  |  | ƒ }  n  t  j ƒ  t |  d ƒ } | d k rmˆ  |  | ƒ }  n  t  j ƒ  |  S(   Ni    i   i   i   i   i   i    (   R   R   R   R   R%   (   R!   R   R   t   shuf(   t   dtype(    s>   lib/python2.7/site-packages/numba/roc/tests/hsapy/test_scan.pyt   shuf_wave_inclusive_scan¢   s8    






(   R   t   jitR   (   R'   R(   (    (   R'   s>   lib/python2.7/site-packages/numba/roc/tests/hsapy/test_scan.pyt   make_inclusive_scan¡   s    !%c         C` s¶   t  j d ƒ } | t d @} | d ?} t |  ƒ } t  j ƒ  | t d k rZ | | | <n  t  j ƒ  | d k r t | | ƒ n  t  j ƒ  d } | d k r® | | d } n  | | S(   s   
    Args
    ----
    data: scalar
        input for tid
    temp: shared memory for temporary work, requires at least
    threadcount/wavesize storage
    i    i   i   (   R   R   R   t   shuf_wave_inclusive_scan_int32R	   (   R   R   R   R   R   R   t   blocksum(    (    s>   lib/python2.7/site-packages/numba/roc/tests/hsapy/test_scan.pyt   shuf_device_inclusive_scanÍ   s    




t   TestScanc           B` s#   e  Z d  „  Z d „  Z d „  Z RS(   c         C` s¹   t  j d „  ƒ } t j j d d d d ƒj t j ƒ } | j ƒ  } t j d d t j ƒ} | d	 | | ƒ t j	 j
 | d  | d ƒ |  j | d | d ƒ |  j d | d ƒ d  S(
   Nc         S` s   t  j j d d t ƒ} t  j d ƒ } t  j d ƒ } t  j d ƒ } |  | | | <t | | ƒ } | | |  | <| d k r‰ | | | <n  d  S(   Ni@   R'   i    (   R   t   sharedt   arrayR   R   t   get_global_idt   get_group_idR   (   R   t   sumst   sm_dataR   t   gidt   blkidR   (    (    s>   lib/python2.7/site-packages/numba/roc/tests/hsapy/test_scan.pyt
   scan_block÷   s    i    i   R   i@   i   R'   iÿÿÿÿ(   i   i@   (   R   R)   t   npt   randomt   randintt   astypeR   t   cumsumt   zerost   testingt   assert_equalt   assertEqual(   t   selfR7   R   t   expectedR3   (    (    s>   lib/python2.7/site-packages/numba/roc/tests/hsapy/test_scan.pyt   test_single_blockõ   s    'c         C` s  t  j d „  ƒ } t j j d d d d ƒj t j ƒ j d d ƒ } | j d d ƒ } t j	 d d	 t j ƒ} | d | j
 ƒ  | ƒ xv t | j d ƒ D]a } | | } | | } t j j | d
  | d ƒ |  j | d
 | | ƒ |  j d | d ƒ qš Wd  S(   Nc         S` s   t  j j d d t ƒ} t  j d ƒ } t  j d ƒ } t  j d ƒ } |  | | | <t | | ƒ } | | |  | <| d k r‰ | | | <n  d  S(   Ni@   R'   i    (   R   R/   R0   R   R   R1   R2   R   (   R   R3   R4   R   R5   R6   R   (    (    s>   lib/python2.7/site-packages/numba/roc/tests/hsapy/test_scan.pyR7     s    i    i   R   i   i@   t   axisi   R'   iÿÿÿÿiÀ   (   i   i@   (   R   R)   R8   R9   R:   R;   R   t   reshapeR<   R=   t   ravelt   ranget   shapeR>   R?   R@   (   RA   R7   t   nd_datat   nd_expectedR3   t   ndRB   R   (    (    s>   lib/python2.7/site-packages/numba/roc/tests/hsapy/test_scan.pyt   test_multi_block  s    

c         C` s  t  j d „  ƒ } t j j d d d d ƒj t j ƒ j d d ƒ } | j d d ƒ } t j	 d d	 t j ƒ} | d | j
 ƒ  | ƒ xy t | j d ƒ D]d } | | } | | } t j j | d
  | d ƒ t j j | d
 | | ƒ |  j d | d ƒ qš Wd  S(   Nc         S` s„   t  j j d d t ƒ} t  j d ƒ } t  j d ƒ } t  j d ƒ } |  | | | <t | | ƒ } | | |  | <| | | | f <d  S(   Ni€   R'   i    (   R   R/   R0   R   R   R1   R2   R   (   R   R3   R4   R   R5   R6   R   (    (    s>   lib/python2.7/site-packages/numba/roc/tests/hsapy/test_scan.pyR7   '  s    i    i   R   i   i€   RD   i   R'   iÿÿÿÿi€  (   i   i€   (   i   i€   (   R   R)   R8   R9   R:   R;   R   RE   R<   R=   RF   RG   RH   R>   R?   R@   (   RA   R7   RI   RJ   R3   RK   RB   R   (    (    s>   lib/python2.7/site-packages/numba/roc/tests/hsapy/test_scan.pyt   test_multi_large_block&  s    

(   t   __name__t
   __module__RC   RL   RM   (    (    (    s>   lib/python2.7/site-packages/numba/roc/tests/hsapy/test_scan.pyR.   ô   s   		t   TestFasterScanc           B` s#   e  Z d  „  Z d „  Z d „  Z RS(   c         C` s¹   t  j d „  ƒ } t j j d d d d ƒj t j ƒ } | j ƒ  } t j d d t j ƒ} | d	 | | ƒ t j	 j
 | d  | d ƒ |  j | d | d ƒ |  j d | d ƒ d  S(
   Nc         S` s‹   t  j j d d t ƒ} t  j d ƒ } t  j d ƒ } t  j d ƒ } t | |  | | t ƒ \ } } | |  | <| d k r‡ | | | <n  d  S(   Ni@   R'   i    (	   R   R/   R0   R   R   R1   R2   R   t   False(   R   R3   R4   R   R5   R6   t   scanvalR   (    (    s>   lib/python2.7/site-packages/numba/roc/tests/hsapy/test_scan.pyR7   C  s    
i    i   R   i@   i   R'   iÿÿÿÿ(   i   i@   (   R   R)   R8   R9   R:   R;   R   R<   R=   R>   R?   R@   (   RA   R7   R   RB   R3   (    (    s>   lib/python2.7/site-packages/numba/roc/tests/hsapy/test_scan.pyRC   B  s    'c         C` sÍ   t  j d „  ƒ } t j j d d d d ƒj t j ƒ } | j ƒ  } t j d d t j ƒ} | d	 | | ƒ t j	 j
 | d  | d ƒ t | ƒ t | ƒ |  j | d | d ƒ |  j d | d ƒ d  S(
   Nc         S` s‹   t  j j d d t ƒ} t  j d ƒ } t  j d ƒ } t  j d ƒ } t | |  | | t ƒ \ } } | |  | <| d k r‡ | | | <n  d  S(   Ni   R'   i    (	   R   R/   R0   R   R   R1   R2   R   RQ   (   R   R3   R4   R   R5   R6   RR   R   (    (    s>   lib/python2.7/site-packages/numba/roc/tests/hsapy/test_scan.pyR7   Z  s    
i    i   R   i   i   R'   iÿÿÿÿ(   i   i   (   R   R)   R8   R9   R:   R;   R   R<   R=   R>   R?   t   printR@   (   RA   R7   R   RB   R3   (    (    s>   lib/python2.7/site-packages/numba/roc/tests/hsapy/test_scan.pyt   test_single_larger_blockY  s    '

c         C` s  t  j d „  ƒ } t j j d d d d ƒj t j ƒ j d d ƒ } | j d d ƒ } t j	 d d	 t j ƒ} | d | j
 ƒ  | ƒ xy t | j d ƒ D]d } | | } | | } t j j | d
  | d ƒ t j j | d
 | | ƒ |  j d | d ƒ qš Wd  S(   Nc         S` s‚   t  j j d d t ƒ} t  j d ƒ } t  j d ƒ } t  j d ƒ } t | |  | | t ƒ \ } } | |  | <| | | | f <d  S(   Ni€   R'   i    (	   R   R/   R0   R   R   R1   R2   R   RQ   (   R   R3   R4   R   R5   R6   RR   R   (    (    s>   lib/python2.7/site-packages/numba/roc/tests/hsapy/test_scan.pyR7   r  s    
i    i   R   i   i€   RD   i   R'   iÿÿÿÿi€  (   i   i€   (   i   i€   (   R   R)   R8   R9   R:   R;   R   RE   R<   R=   RF   RG   RH   R>   R?   R@   (   RA   R7   RI   RJ   R3   RK   RB   R   (    (    s>   lib/python2.7/site-packages/numba/roc/tests/hsapy/test_scan.pyRM   q  s    

(   RN   RO   RC   RT   RM   (    (    (    s>   lib/python2.7/site-packages/numba/roc/tests/hsapy/test_scan.pyRP   @  s   		t   TestShuffleScanc           B` s,   e  Z d  „  Z d „  Z d „  Z d „  Z RS(   c         C` s¹   t  j d „  ƒ } t j d d t j ƒ} t j j d ƒ xa t d ƒ D]S } t j j d | j	 | j	 ƒ j
 t j ƒ } t j | ƒ } | d | | | ƒ qG Wt j j | | | ƒ d  S(   Nc         S` s1   t  j d ƒ } t  j |  | | | ƒ | | <d  S(   Ni    (   R   R   R    (   t   inpt   maskt   outR   (    (    s>   lib/python2.7/site-packages/numba/roc/tests/hsapy/test_scan.pyt   foo  s    i@   R'   i    i
   i   (   i   i@   (   R   R)   R8   t   arangeR   R9   t   seedRG   R:   R   R;   t
   zeros_likeR>   R?   (   RA   RY   RV   t   iRW   RX   (    (    s>   lib/python2.7/site-packages/numba/roc/tests/hsapy/test_scan.pyt   test_shuffle_ds_permuteŽ  s    *c         C` sÞ   t  j d „  ƒ } t j d d t j ƒ} t j | ƒ } | d	 | | ƒ | j d d ƒ } | j | j ƒ } xi t | j d ƒ D]T } t j	 j
 | d d  d … f | d d d  … f ƒ t j	 j
 | d
 | d ƒ q‚ Wd  S(   Nc         S` s*   t  j d ƒ } t |  | d ƒ | | <d  S(   Ni    i   (   R   R1   R%   (   RV   RX   R5   (    (    s>   lib/python2.7/site-packages/numba/roc/tests/hsapy/test_scan.pyRY     s    i€   R'   i   i   i@   i    iÿÿÿÿ(   i   i€   (   i    iÿÿÿÿ(   i    i    (   R   R)   R8   RZ   R   R\   RE   RH   RG   R>   R?   (   RA   RY   RV   RX   R]   (    (    s>   lib/python2.7/site-packages/numba/roc/tests/hsapy/test_scan.pyt   test_shuffle_upœ  s    3c         C` sg   t  j d „  ƒ } t j d d t j ƒ} t j | ƒ } | d | | ƒ t j j | j ƒ  | ƒ d  S(   Nc         S` s'   t  j d ƒ } t |  | ƒ | | <d  S(   Ni    (   R   R1   R+   (   RV   RX   R5   (    (    s>   lib/python2.7/site-packages/numba/roc/tests/hsapy/test_scan.pyRY   ®  s    i@   R'   i   (   i   i@   (	   R   R)   R8   RZ   R   R\   R>   R?   R<   (   RA   RY   RV   RX   (    (    s>   lib/python2.7/site-packages/numba/roc/tests/hsapy/test_scan.pyt   test_shuf_wave_inclusive_scan­  s
    c         C` ss   t  j d „  ƒ } t j d d t j ƒ} t j | ƒ } | d | j f | | ƒ t j j t j	 | ƒ | ƒ d  S(   Nc         S` sB   t  j d ƒ } t  j j d d t ƒ} t |  | | ƒ | | <d  S(   Ni    i   R'   (   R   R1   R/   R0   R   R-   (   RV   RX   R5   R   (    (    s>   lib/python2.7/site-packages/numba/roc/tests/hsapy/test_scan.pyRY   ¹  s    i€   R'   i   (
   R   R)   R8   RZ   R   R\   R   R>   R?   R<   (   RA   RY   RV   RX   (    (    s>   lib/python2.7/site-packages/numba/roc/tests/hsapy/test_scan.pyt   test_shuf_device_inclusive_scan¸  s
    (   RN   RO   R^   R_   R`   Ra   (    (    (    s>   lib/python2.7/site-packages/numba/roc/tests/hsapy/test_scan.pyRU   Œ  s   			t   __main__(   t
   __future__R    R   R   t   numpyR8   t   numbaR   t   unittestR   R   R   R)   R   R   R   R   R   R%   R*   R+   R-   t   TestCaseR.   RP   RU   RN   t   main(    (    (    s>   lib/python2.7/site-packages/numba/roc/tests/hsapy/test_scan.pyt   <module>   s    0(5	)'LL9