B
     \z5                 @   s  d Z ddlmZmZmZ ddlZddlZddlm  m	Z
 ddlmZmZmZmZ ddlmZ G dd deZeejZG d	d
 d
eZG dd deZG dd deZG dd deZedeZedeZdd Zdd Zdd Z ejde ddZ!dd Z"ejde"ddZ#G dd  d eZ$G d!d" d"eZ%G d#d$ d$eZ&G d%d& d&eZ'G d'd( d(eZ(G d)d* d*eZ)G d+d, d,eZ*G d-d. d.eZ+G d/d0 d0eZ,G d1d2 d2eZ-G d3d4 d4eZ.G d5d6 d6eZ/d7d8 Z0d9d: Z1G d;d< d<eZ2d=d> Z3G d?d@ d@eZ4dAdB Z5G dCdD dDeZ6G dEdF dFeZ7G dGdH dHeZ8G dIdJ dJeZ9G dKdL dLeZ:G dMdN dNeZ;G dOdP dPeZ<G dQdR dReZ=dS )Sz1
This scripts specifies all PTX special objects.
    )print_functionabsolute_importdivisionN)typesirtypingmacro   )nvvmc               @   s(   e Zd ZdZdZdZdd Zdd ZdS )	StubzlA stub object to represent special objects which is meaningless
    outside the context of CUDA-python.
    z<ptx special value> c             C   s   t d|  d S )Nz%s is not instantiable)NotImplementedError)clsr   r   /lib/python3.7/site-packages/numba/cuda/stubs.py__new__   s    zStub.__new__c             C   s   | j S )N)_description_)selfr   r   r   __repr__   s    zStub.__repr__N)__name__
__module____qualname____doc__r   	__slots__r   r   r   r   r   r   r      s
   r   c               @   s8   e Zd ZdZdZedeZedeZ	edeZ
dS )	threadIdxa  
    The thread indices in the current thread block, accessed through the
    attributes ``x``, ``y``, and ``z``. Each index is an integer spanning the
    range from 0 inclusive to the corresponding value of the attribute in
    :attr:`numba.cuda.blockDim` exclusive.
    z<threadIdx.{x,y,z}>ztid.xztid.yztid.zN)r   r   r   r   r   r   MacroSREG_SIGNATURExyzr   r   r   r   r      s
   r   c               @   s8   e Zd ZdZdZedeZedeZ	edeZ
dS )blockIdxa  
    The block indices in the grid of thread blocks, accessed through the
    attributes ``x``, ``y``, and ``z``. Each index is an integer spanning the
    range from 0 inclusive to the corresponding value of the attribute in
    :attr:`numba.cuda.gridDim` exclusive.
    z<blockIdx.{x,y,z}>zctaid.xzctaid.yzctaid.zN)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   -   s
   r   c               @   s4   e Zd ZdZedeZedeZedeZ	dS )blockDimz
    The shape of a block of threads, as declared when instantiating the
    kernel.  This value is the same for all threads in a given kernel, even
    if they belong to different blocks (i.e. each block is "full").
    zntid.xzntid.yzntid.zN)
r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    ;   s   r    c               @   s8   e Zd ZdZdZedeZedeZ	edeZ
dS )gridDimzh
    The shape of the grid of blocks, accressed through the attributes ``x``,
    ``y``, and ``z``.
    z<gridDim.{x,y,z}>znctaid.xznctaid.yznctaid.zN)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r!   F   s
   r!   warpsizelaneidc               C   s   d S )Nr   r   r   r   r   _ptx_grid1dW   s    r$   c               C   s   d S )Nr   r   r   r   r   _ptx_grid2dZ   s    r%   c             C   sp   | dkrd}t j}n@| dkr0d}t t jd}n$| dkrLd}t t jd}ntdtj|t|t j| gdS )	a  grid(ndim)

    Return the absolute position of the current thread in the entire
    grid of blocks.  *ndim* should correspond to the number of dimensions
    declared when instantiating the kernel.  If *ndim* is 1, a single integer
    is returned.  If *ndim* is 2 or 3, a tuple of the given number of
    integers is returned.

    Computation of the first integer is as follows::

        cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x

    and is similar for the other two indices, but using the ``y`` and ``z``
    attributes.
    r	   zptx.grid.1d   zptx.grid.2d   zptx.grid.3dzargument can only be 1, 2, 3)args)	r   int32UniTuple
ValueErrorr   	Intrinsicr   	signatureintp)ndimfnamerestyper   r   r   grid_expand]   s    r2   zptx.gridT)callablec             C   sp   | dkrd}t j}n@| dkr0d}t t jd}n$| dkrLd}t t jd}ntdtj|t|t j| gdS )	ax  
    Return the absolute size (or shape) in threads of the entire grid of
    blocks. *ndim* should correspond to the number of dimensions declared when
    instantiating the kernel.

    Computation of the first integer is as follows::

        cuda.blockDim.x * cuda.gridDim.x

    and is similar for the other two indices, but using the ``y`` and ``z``
    attributes.
    r	   zptx.gridsize.1dr&   zptx.gridsize.2dr'   zptx.gridsize.3dzargument can only be 1, 2 or 3)r(   )	r   r)   r*   r+   r   r,   r   r-   r.   )r/   r0   r1   r   r   r   gridsize_expand   s    r4   zptx.gridsizec               @   s   e Zd ZdZdZdS )syncthreadsa  
    Synchronize all threads in the same thread block.  This function implements
    the same pattern as barriers in traditional multi-threaded programming: this
    function waits until all threads in the block call it, at which point it
    returns control to all its callers.
    z<syncthreads()>N)r   r   r   r   r   r   r   r   r   r5      s   r5   c               @   s   e Zd ZdZdZdS )syncthreads_countz
    syncthreads_count(predictate)

    An extension to numba.cuda.syncthreads where the return value is a count
    of the threads where predicate is true.
    z<syncthreads_count()>N)r   r   r   r   r   r   r   r   r   r6      s   r6   c               @   s   e Zd ZdZdZdS )syncthreads_andz
    syncthreads_and(predictate)

    An extension to numba.cuda.syncthreads where 1 is returned if predicate is
    true for all threads or 0 otherwise.
    z<syncthreads_and()>N)r   r   r   r   r   r   r   r   r   r7      s   r7   c               @   s   e Zd ZdZdZdS )syncthreads_orz
    syncthreads_or(predictate)

    An extension to numba.cuda.syncthreads where 1 is returned if predicate is
    true for any thread or 0 otherwise.
    z<syncthreads_or()>N)r   r   r   r   r   r   r   r   r   r8      s   r8   c               @   s   e Zd ZdZdZdS )syncwarpzP
    syncwarp(mask)

    Synchronizes a masked subset of threads in a warp.
    z<warp_sync()>N)r   r   r   r   r   r   r   r   r   r9      s   r9   c               @   s   e Zd ZdZdZdS )shfl_sync_intrinsicz
    shfl_sync_intrinsic(mask, mode, value, mode_offset, clamp)

    Nvvm intrinsic for shuffling data across a warp
    docs.nvidia.com/cuda/nvvm-ir-spec/index.html#nvvm-intrin-warp-level-datamove
    z<shfl_sync()>N)r   r   r   r   r   r   r   r   r   r:      s   r:   c               @   s   e Zd ZdZdZdS )vote_sync_intrinsicz
    vote_sync_intrinsic(mask, mode, predictate)

    Nvvm intrinsic for performing a reduce and broadcast across a warp
    docs.nvidia.com/cuda/nvvm-ir-spec/index.html#nvvm-intrin-warp-level-vote
    z<vote_sync()>N)r   r   r   r   r   r   r   r   r   r;      s   r;   c               @   s   e Zd ZdZdZdS )match_any_syncz
    match_any_sync(mask, value)

    Nvvm intrinsic for performing a compare and broadcast across a warp.
    Returns a mask of threads that have same value as the given value from within the masked warp.
    z<match_any_sync()>N)r   r   r   r   r   r   r   r   r   r<      s   r<   c               @   s   e Zd ZdZdZdS )match_all_synca  
    match_all_sync(mask, value)

    Nvvm intrinsic for performing a compare and broadcast across a warp.
    Returns a tuple of (mask, pred), where mask is a mask of threads that have
    same value as the given value from within the masked warp, if they
    all have the same value, otherwise it is 0. Pred is a boolean of whether
    or not all threads in the mask warp have the same warp.
    z<match_all_sync()>N)r   r   r   r   r   r   r   r   r   r=      s   	r=   c               @   s   e Zd ZdZdZdS )threadfence_blockz.
    A memory fence at thread block level
    z<threadfence_block()>N)r   r   r   r   r   r   r   r   r   r>     s   r>   c               @   s   e Zd ZdZdZdS )threadfence_systemz8
    A memory fence at system level: across devices
    z<threadfence_system()>N)r   r   r   r   r   r   r   r   r   r?     s   r?   c               @   s   e Zd ZdZdZdS )threadfencez(
    A memory fence at device level
    z<threadfence()>N)r   r   r   r   r   r   r   r   r   r@     s   r@   c             C   s4   t | tr| S t | tr| fS tdt| d S )Nzinvalid type for shape; got {0})
isinstancetupleint	TypeErrorformattype)shaper   r   r   _legalize_shape  s
    

rH   c             C   sP   t | } t| }d}t||d}t|ttj|tj}t	j
||| |fdS )Nzptx.smem.allocC)r(   )rH   lenr   Arrayr   r-   r*   r.   Anyr   r,   )rG   dtyper/   r0   r1   sigr   r   r   shared_array&  s    rO   c               @   s*   e Zd ZdZdZejdedddgdZdS )	sharedz"
    Shared memory namespace.
    z<shared>zshared.arrayTrG   rM   )r3   argnamesN)	r   r   r   r   r   r   r   rO   arrayr   r   r   r   rP   /  s
   
	rP   c             C   sP   t | } t| }d}t||d}t|ttj|tj}t	j
||| |fdS )Nzptx.lmem.allocrI   )r(   )rH   rJ   r   rK   r   r-   r*   r.   rL   r   r,   )rG   rM   r/   r0   r1   rN   r   r   r   local_arrayF  s    rS   c               @   s*   e Zd ZdZdZejdedddgdZdS )	localz!
    Local memory namespace.
    z<local>zlocal.arrayTrG   rM   )r3   rQ   N)	r   r   r   r   r   r   r   rS   rR   r   r   r   r   rT   O  s
   
rT   c             C   s:   d}ddl m} |j| }t||}tj||| gdS )Nzptx.cmem.aryliker	   )CUDATargetDesc)r(   )Z
descriptorrU   Z	typingctxZresolve_argument_typer   r-   r   r,   )Zndarrayr0   rU   ZarytyrN   r   r   r   const_array_likeb  s
    rV   c               @   s(   e Zd ZdZdZejdeddgdZdS )constz$
    Constant memory namespace.
    z<const>zconst.array_likeTZary)r3   rQ   N)	r   r   r   r   r   r   r   rV   Z
array_liker   r   r   r   rW   l  s
   rW   c               @   s   e Zd ZdZdS )popczK
    popc(val)

    Returns the number of set bits in the given value.
    N)r   r   r   r   r   r   r   r   rX   |  s   rX   c               @   s   e Zd ZdZdS )brevzs
    brev(val)

    Reverse the bitpattern of an integer value; for example 0b10110110
    becomes 0b01101101.
    N)r   r   r   r   r   r   r   r   rY     s   rY   c               @   s   e Zd ZdZdS )clzzF
    clz(val)

    Counts the number of leading zeros in a value.
    N)r   r   r   r   r   r   r   r   rZ     s   rZ   c               @   s   e Zd ZdZdS )ffsz^
    ffs(val)

    Find the position of the least significant bit set to 1 in an integer.
    N)r   r   r   r   r   r   r   r   r[     s   r[   c               @   s   e Zd ZdZdS )selpzp
    selp(a, b, c)

    Select between source operands, based on the value of the predicate source operand.
    N)r   r   r   r   r   r   r   r   r\     s   r\   c               @   s   e Zd ZdZdS )fmazE
    fma(a, b, c)

    Perform the fused multiply-add operation.
    N)r   r   r   r   r   r   r   r   r]     s   r]   c               @   sT   e Zd ZdZdZG dd deZG dd deZG dd deZG d	d
 d
eZ	dS )atomicz$Namespace for atomic operations
    z<atomic>c               @   s   e Zd ZdZdS )z
atomic.addzadd(ary, idx, val)

        Perform atomic ary[idx] += val. Supported on int32, float32, and
        float64 operands only.

        Returns the old value at the index location as if it is loaded
        atomically.
        N)r   r   r   r   r   r   r   r   add  s   r_   c               @   s   e Zd ZdZdS )z
atomic.maxa  max(ary, idx, val)

        Perform atomic ary[idx] = max(ary[idx], val). NaN is treated as a
        missing value, so max(NaN, n) == max(n, NaN) == n. Note that this
        differs from Python and Numpy behaviour, where max(a, b) is always
        a when either a or b is a NaN.

        Supported on int32, int64, uint32, uint64, float32, float64 operands only.

        Returns the old value at the index location as if it is loaded
        atomically.
        N)r   r   r   r   r   r   r   r   max  s   r`   c               @   s   e Zd ZdZdS )z
atomic.minav  min(ary, idx, val)

        Perform atomic ary[idx] = min(ary[idx], val). NaN is treated as a
        missing value, so min(NaN, n) == min(n, NaN) == n. Note that this
        differs from Python and Numpy behaviour, where min(a, b) is always
        a when either a or b is a NaN.

        Supported on int32, int64, uint32, uint64, float32, float64 operands only.
        N)r   r   r   r   r   r   r   r   min  s   	ra   c               @   s   e Zd ZdZdS )zatomic.compare_and_swapzcompare_and_swap(ary, old, val)

        Conditionally assign ``val`` to the first element of an 1D array ``ary``
        if the current value matches ``old``.

        Returns the current value as if it is loaded atomically.
        N)r   r   r   r   r   r   r   r   compare_and_swap  s   rb   N)
r   r   r   r   r   r   r_   r`   ra   rb   r   r   r   r   r^     s   
r^   )>r   Z
__future__r   r   r   operatorZnumpyZllvmlite.llvmpy.coreZllvmpyZcoreZlcZnumbar   r   r   r   Zcudadrvr
   objectr   r-   r)   r   r   r   r    r!   r   r"   r#   r$   r%   r2   Zgridr4   Zgridsizer5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rH   rO   rP   rS   rT   rV   rW   rX   rY   rZ   r[   r\   r]   r^   r   r   r   r   <module>   s\   


	



			
	


