B
     \N:              
   @   s  d Z ddlmZmZ ddlZddlZddlZddlZddlm	Z
 ddlm	Z ddlZddlm  mZ ddlmZ ddlmZ ddlmZ ddlmZmZmZmZ dd	 Ze Z d
d Z!G dd dej"Z#dd Z$G dd dej%Z&dd Z'e
 Z(y
e Z)W nX e*k
rT Z+ z8dZ,e-e,e.e+  ddl/m0Z0 e0dd Z1e1 Z)W ddZ+[+X Y nX da2da3dd Z4dd Z5dej6kZ7e8ej69ddZ:e7re:re5  dS )a  
This file implements the code-generator for parallel-vectorize.

ParallelUFunc is the platform independent base class for generating
the thread dispatcher.  This thread dispatcher launches threads
that execute the generated function of UFuncCore.
UFuncCore is subclassed to specialize for the input/output types.
The actual workload is invoked inside the function generated by UFuncCore.
UFuncCore also defines a work-stealing mechanism that allows idle threads
to steal works from other threads.
    )print_functionabsolute_importN)RLock)ufuncbuilder)as_dtype)typesutilscgutilsconfigc              C   s   t j} | dk rtd| S )z*
    Gets the available thread count.
       z(Number of threads specified must be > 0.)r
   ZNUMBA_NUM_THREADS
ValueError)t r   6lib/python3.7/site-packages/numba/npyufunc/parallel.pyget_thread_count   s    r   c                s  t jd}t j||tjt jt j t jt jt jg}|	 
d}|d}|j|dt| d}	|	d}
t |
|	j\}}}}|}| }| }fdd t|jd	 }t jt j gd
 gd  }|j|dd}|tj|} fdd||||gD }||g| fdd||fD   || ||   || ||  ||	j |	j fS )a  Wrap the original CPU ufunc/gufunc with a parallel dispatcher.
    This function will wrap gufuncs and ufuncs something like.

    Args
    ----
    ctx
        numba's codegen context

    innerfunc
        llvm function of the original CPU gufunc

    sig
        type signature of the gufunc

    inner_ndim
        inner dimension of the gufunc (this is len(sig.args) in the case of a
        ufunc)

    Details
    -------

    The kernel signature looks like this:

    void kernel(char **args, npy_intp *dimensions, npy_intp* steps, void* data)

    args - the input arrays + output arrays
    dimensions - the dimensions of the arrays
    steps - the step size for the array (this is like sizeof(type))
    data - any additional data

    The parallel backend then stages multiple calls to this kernel concurrently
    across a number of threads. Practically, for each item of work, the backend
    duplicates `dimensions` and adjusts the first entry to reflect the size of
    the item of work, it also forms up an array of pointers into the args for
    offsets to read/write from/to with respect to its position in the items of
    work. This allows the same kernel to be used for each item of work, with
    simply adjusted reads/writes/domain sizes and is safe by virtue of the
    domain partitioning.

    NOTE: The execution backend is passed the requested thread count, but it can
    choose to ignore it (TBB)!
       Zparallelgufuncwrapperzparallel.gufunc.wrapperz.kernel.)name c                s     | S )N)Zbitcast)arg)builder
byte_ptr_tr   r   as_void_ptrs   s    z(build_gufunc_kernel.<locals>.as_void_ptrr         numba_parallel_forc                s   g | ]} |qS r   r   ).0x)r   r   r   
<listcomp>   s    z'build_gufunc_kernel.<locals>.<listcomp>c                s   g | ]} |qS r   r   )r   r   )intp_tr   r   r      s    )!lcZTypeintZpointerZget_value_typer   ZintpZfunctionZvoidZcodegenZcreate_libraryZcreate_ir_moduleZadd_functionstrZappend_basic_blockZBuilderargsZget_python_apiZ
gil_ensureZsave_threadlenZget_or_insert_functionZget_constantZuintpZinttoptrZcallZrestore_threadZgil_releaseZret_voidZadd_ir_moduleZadd_linking_libraryZget_pointer_to_functionr   )libraryctx	innerfuncsig
inner_ndimZbyte_tZfntyZ
wrapperlibmodZlfuncZbb_entryr"   Z
dimensionsZstepsdataZpyapiZ	gil_stateZthread_stateZarray_countZparallel_for_typarallel_forZfnptrZ	innerargsr   )r   r   r   r   r   build_gufunc_kernel,   sB    ,










r,   c               @   s   e Zd Zdd ZdS )ParallelUFuncBuilderc       
      C   sd   t   |j}|j}|j}|jj}t|||||}dd |jD }|t	
|jjj d}	|||	fS )Nc             S   s   g | ]}t |jjqS r   )npdtyper   num)r   ar   r   r   r      s    z.ParallelUFuncBuilder.build.<locals>.<listcomp>r   )_launch_threadstarget_context	signaturer$   ZfndescZllvm_func_namebuild_ufunc_wrapperr"   appendr.   r/   Zreturn_typer   r0   )
selfcresr'   r%   r4   r$   fnameptr	dtypenumsZ	keepaliver   r   r   build   s    zParallelUFuncBuilder.buildN)__name__
__module____qualname__r<   r   r   r   r   r-      s   r-   c             C   s4   t j| |||d|d}t| |||t|j\}}|S )NF)Zobjmoder8   )r   r5   r,   r#   r"   )r$   r%   r9   r4   r8   r&   r:   r   r   r   r   r5      s    

r5   c                   s,   e Zd Zddi f fdd	Zdd Z  ZS )ParallelGUFuncBuilderNFc                s.   | tdd tt| j|||||d d S )NT)Znopython)py_funcr4   identitycachetargetoptions)updatedictsuperr@   __init__)r7   rA   r4   rB   rC   rD   )	__class__r   r   rH      s    zParallelGUFuncBuilder.__init__c             C   sn   t   t| j|| j| j| jd\}}}g }x8|jjD ],}t|t	j
rL|j}n|}|t|j q4W |||fS )zJ
        Returns (dtype numbers, function ptr, EnvironmentObject)
        )rC   )r2   build_gufunc_wrapperrA   sinsoutrC   r4   r"   
isinstancer   ZArrayr/   r6   r   r0   )r7   r8   r:   envwrapper_namer;   r1   Ztyr   r   r   r<      s    zParallelGUFuncBuilder.build)r=   r>   r?   rH   r<   __classcell__r   r   )rI   r   r@      s   r@   c             C   sz   |j }|j}|j}tj| ||||d\}}	}
tdd |D }tdd |D }t||B }t|||||\}}||	|fS )N)rC   c             s   s   | ]}|D ]
}|V  q
qd S )Nr   )r   termsymr   r   r   	<genexpr>   s    z'build_gufunc_wrapper.<locals>.<genexpr>c             s   s   | ]}|D ]
}|V  q
qd S )Nr   )r   rQ   rR   r   r   r   rS      s    )r$   r3   r4   r   rJ   setr#   r,   )rA   r8   rK   rL   rC   r$   r%   r4   r&   rN   rO   Zsym_inZsym_outr(   r:   r   r   r   r   rJ      s    rJ   a@  Could not obtain multiprocessing lock due to OS level error: %s
A likely cause of this problem is '/dev/shm' is missing orread-only such that necessary semaphores cannot be written.
*** The responsibility of ensuring multiprocessing safe access to this initialization sequence/module import is deferred to the user! ***
)contextmanagerc               c   s
   d V  d S )Nr   r   r   r   r   nop   s    rV   Fc               C   s   t dkrtdnt S dS )zM
    Get the name of the threading layer in use for parallel CPU targets
    Nz#Threading layer is not initialized.)_threading_layerr   r   r   r   r   threading_layer   s    
rX   c           
      s6  t & t trd S ddlm} m}m} dd fdd}ttj	
 }ddd	g}d }t d
k}t dk}t  d d< d d< g }	 fdd}
||krވ|}|s|dkr|	d n|dkr|r|	d |}n|dkr~dg}|	d |dkrnl|dkr*|r|	d |d nF|dkr`|sD|d |rT|	d |d	 nd}t|| ||\}}nH|dkr||\}}|s|	d |r|	d nd}t|| |s|
|	 td|j td|j td|j | d ||j}|t |adaW d Q R X W d Q R X d S )Nr   )	CFUNCTYPEc_void_pc_intc             S   s   d}|  dr6yddlm} W q tk
r2   Y qX nZ|  drhyddlm} W q tk
rd   Y qX n(|  drddlm} nd	}t||  |S )
zZ
                Loads a specific threading layer backend based on string
                Ntbbr   )tbbpoolomp)omppool	workqueue)r`   z/Unknown value specified for threading layer: %s)
startswithr   r]   ImportErrorr_   r`   r   )backendlibmsgr   r   r   select_known_backend  s     


z-_launch_threads.<locals>.select_known_backendc                s0   d}x"| D ]} |}|dk	r
P q
W d}||fS )z_
                Selects from presented backends and returns the first working
                Nr   r   )Zbackendsrd   rc   )rf   r   r   select_from_backends,  s    
z-_launch_threads.<locals>.select_from_backendsr\   r^   r`   ZDarwinZLinuxz3Intel TBB is required, try:
$ conda/pip install tbbZTBBz?Intel OpenMP is required, try:
$ conda/pip install intel-openmpZOSX_OMPc                st   d}d}t | dkrd}t | dkr4| | d   }t | dkrdd fdd| D }|d	|  }t|| d S )
Nz&No threading layer could be loaded.
%szHINT:
%sr   r   r   z
OR
c                s   g | ]} | qS r   r   )r   r   )err_helpersr   r   r   N  s    z<_launch_threads.<locals>.raise_with_hint.<locals>.<listcomp>z
One of:
%s)r#   joinr   )ZrequirederrmsgZhintmsgZhintZoptions)rh   r   r   raise_with_hintF  s    z(_launch_threads.<locals>.raise_with_hint)
threadsafeforksafesafern   rl   rm   z+No threading layer available for purpose %sdefaultz7The threading layer requested '%s' is unknown to Numba.r   do_scheduling_signeddo_scheduling_unsignedT)_backend_init_process_lock_backend_init_thread_lock_is_initializedZctypesrY   rZ   r[   r!   r
   ZTHREADING_LAYERlowerplatformsystemrF   r6   r   llZ
add_symbolr+   rp   rq   launch_threadsNUM_THREADSrW   )rY   rZ   r[   rg   r   Znamedbackendsrd   Z_IS_OSXZ	_IS_LINUXZrequirementsrk   ZlibnameZ	availablere   ry   r   )rh   rf   r   r2   
  sx    











r2   ZNUMBA_DYLD_WORKAROUND);__doc__Z
__future__r   r   sysosrv   warningsZ	threadingr   ZthreadRLockZmultiprocessingZ	procRLockZnumpyr.   Zllvmlite.llvmpy.coreZllvmpyZcorer   Zllvmlite.bindingZbindingrx   Znumba.npyufuncr   Znumba.numpy_supportr   Znumbar   r   r	   r
   r   rz   r,   ZUFuncBuilderr-   r5   ZGUFuncBuilderr@   rJ   rs   rr   OSErrorere   warnr!   
contextlibrU   rV   rt   rW   rX   r2   environZ_DYLD_WORKAROUND_SETr    getZ_DYLD_WORKAROUND_VALr   r   r   r   <module>   sL   
f'

 
