B
    T\0                 @   s  d dl mZmZmZ d dlZd dlZd dlZd dl	m
Z
 d dlmZ ddlmZ ddlmZ ed	krrd d
l	mZ nd d
lmZ edkrddiZni ZdEddZdFddZdGddZdHddZdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ ZdId%d&Zd'd( Zd)d* Z d+d, Z!d-d. Z"d/d0 Z#d1d2 Z$d3d4 Z%dJd5d6Z&d7d8 Z'd9d: Z(d;d< Z)ed	k rld=d> Z*nd?d> Z*ed@Z+dKdBd@Z,e+-ej.ej/ej0fdLdCdDZ1dS )M    )print_functionabsolute_importdivisionN)is_categorical_dtype)	partition   )PANDAS_VERSION   )Dispatchz0.20.0)union_categoricalsz0.23sortFc             C   s$   |dkr| j | S | j ||f S dS )z"
    .loc for known divisions
    N)loc)dfiindexercindexer r   5lib/python3.7/site-packages/dask/dataframe/methods.pyr      s    
r   c             C   s   | j d d |f S )N)iloc)r   r   r   r   r   r   $   s    r   c             C   s:   yt | ||S  tk
r4   | dj dd|f S X dS )z$
    .loc for unknown divisions
    r   N)r   KeyErrorhead)r   r   r   r   r   r   try_loc(   s    r   Tc       	      C   s   | j r
| S |dkrr| jjsr|dk	rD|r6| | j|k } n| | j|k } |dk	rn|r`| | j|k } n| | j|k  } | S t| ||| }|s|j|d|}|jd| }|s|j|d|}|j|d }|S )aY  Index slice start/stop. Can switch include/exclude boundaries.

    Examples
    --------
    >>> df = pd.DataFrame({'x': [10, 20, 30, 40, 50]}, index=[1, 2, 2, 3, 4])
    >>> boundary_slice(df, 2, None)
        x
    2  20
    2  30
    3  40
    4  50
    >>> boundary_slice(df, 1, 3)
        x
    1  10
    2  20
    2  30
    3  40
    >>> boundary_slice(df, 1, 3, right_boundary=False)
        x
    1  10
    2  20
    2  30

    Empty input DataFrames are returned

    >>> df_empty = pd.DataFrame()
    >>> boundary_slice(df_empty, 1, 3)
    Empty DataFrame
    Columns: []
    Index: []
    r   Nleftright)emptyindexZis_monotonicgetattrZget_slice_boundr   )	r   startstopZright_boundaryZleft_boundaryZkindresultZright_indexZ
left_indexr   r   r   boundary_slice2   s(    !r   c             C   s   t |  S )N)pdZnotnullsum)xr   r   r   index_countq   s    r#   c          	   C   sL   y*t jdd t d | | S Q R X W n tk
rF   ttjS X d S )NT)recordalways)warningscatch_warningssimplefilterZeroDivisionErrornpfloat64nan)snr   r   r   mean_aggregatev   s    
r/   c          	   C   st   yRt jdd$ t d | | || d  }W d Q R X |dkrP|| ||  }|S  tk
rn   ttjS X d S )NT)r$   r%   r	   r   )r&   r'   r(   r)   r*   r+   r,   )Zx2r"   r.   Zddofr   r   r   r   var_aggregate   s    
r0   c       
      C   s   t | dkst| \}}}}}}t|tjr2tjntj}|||||gddddgd}dd |j D |_||gd	gd}	tj|||	gft	S )
N   countmeanstdmin)r   c             S   s   g | ]}d  |d qS )z{0:g}%d   )format).0lr   r   r   
<listcomp>   s    z&describe_aggregate.<locals>.<listcomp>max)
lenAssertionError
isinstancer    Series	DataFramer   tolistconcatconcat_kwargs)
valuesr2   r3   r4   r5   qr;   typZpart1Zpart3r   r   r   describe_aggregate   s    rG   c             C   sH   t | tjtjfr4| j| |k |  B || jd dS | |k r@| S |S d S )Nr   )axis)r>   r    r?   r@   whereisnullndim)r"   yr   r   r   cummin_aggregate   s    "rM   c             C   sH   t | tjtjfr4| j| |k|  B || jd dS | |kr@| S |S d S )Nr   )rH   )r>   r    r?   r@   rI   rJ   rK   )r"   rL   r   r   r   cummax_aggregate   s    "rN   c             G   s   t td|}| jf |S )Nr	   )dictr   assign)r   Zpairskwargsr   r   r   rP      s    rP   c             C   s   t j|  |dS )N)name)r    r?   unique)r"   Zseries_namer   r   r   rS      s    rS   c             C   s   | j dd S )Nr   )level)groupbyr!   )r"   r   r   r   value_counts_combine   s    rV   c             C   s   | j dd jddS )Nr   )rT   F)Z	ascending)rU   r!   Zsort_values)r"   r   r   r   value_counts_aggregate   s    rW   c             C   s   | j S )N)nbytes)r"   r   r   r   rX      s    rX   c             C   s   | j S )N)size)r"   r   r   r   rY      s    rY   c             C   s   | j S )N)rD   )r   r   r   r   rD      s    rD   c             C   s,   t j|}t| dkr(| j|||dS | S )Nr   )Zrandom_statefracreplace)r*   ZrandomZRandomStater<   sample)r   staterZ   r[   Zrsr   r   r   r\      s    r\   c             C   s    | j |dd} | j|| _| S )Nr   )rH   )dropcolumnsastype)r   r_   dtyper   r   r   drop_columns   s    rb   c             C   s2   | j |d}|r.| jjdd r.td|S )N)methodr   )rH   zAll NaN partition encountered in `fillna`. Try using ``df.repartition`` to increase the partition size, or specify `limit` in `fillna`.)ZfillnarJ   rD   allany
ValueError)r   rc   Zcheckoutr   r   r   fillna_check   s    rh   c             C   s   | j dd S )Nr   )rT   )rU   r!   )r   r   r   r   	pivot_agg   s    ri   c             C   s   t j| |||ddS )Nr!   )r   r_   rD   aggfunc)r    pivot_table)r   r   r_   rD   r   r   r   	pivot_sum   s    
rl   c             C   s   t j| |||ddtjS )Nr2   )r   r_   rD   rj   )r    rk   r`   r*   r+   )r   r   r_   rD   r   r   r   pivot_count   s    
rm   c             C   s
   |  |S )N)Zget_level_values)r"   r.   r   r   r   _get_level_values   s    rn   c             C   s
   |  |S )N)rn   )r"   r.   r   r   r   rn      s    rB   outerc             C   s<   t | dkr| d S tt| d }|| ||||dS dS )a  Concatenate, handling some edge cases:

    - Unions categoricals between partitions
    - Ignores empty partitions

    Parameters
    ----------
    dfs : list of DataFrame, Series, or Index
    axis : int or str, optional
    join : str, optional
    uniform : bool, optional
        Whether to treat ``dfs[0]`` as representative of ``dfs[1:]``. Set to
        True if all arguments have the same columns and dtypes (but not
        necessarily categories). Default is False.
    r   r   )rH   joinuniformfilter_warningN)r<   concat_dispatchdispatchtype)dfsrH   rp   rq   rr   funcr   r   r   rB      s
    
c          	      s2  |dkr t j f||dtS t d t jr,t d t jrZt jt  d jdS t d t jr d  dd   }t	fdd|D r fddt
jD }t jj|jd	S jftd
d |D  }t|}yt jj|jd	S  tk
r   t |S X  d  dd  S  d j}	t|	t jpdt|	t jodtdd |	jD }
|
rdd  D }tdd  D }n }d }|rt|d t jrntdd |D r|r|}|d jdk}nddd |D }t H tdt |rtdt t jdd |D fd|it }W d Q R X | rR||  jt jfdd|D fd|it}|j}x|jD ]}x$|D ]}| |}|d k	rP qW g }x`|D ]X}||j!kr|||  n6tj"t#|ddd}t j$%||j&j'|j&j(}|| qW t|||< t#|s||_qW |j)|jd}nHt : tdt |rztdt t j|fd|it}W d Q R X nt*|d j+r|d krtdd |D }t j,t|||d jdS t . |rtdt t j|fd|it}W d Q R X |d k	r.||_|S )Nr   )rH   rp   r   )rR   c             3   s&   | ]}t |tjo|j jkV  qd S )N)r>   r    
MultiIndexnlevels)r8   o)firstr   r   	<genexpr>  s   z concat_pandas.<locals>.<genexpr>c                s"   g | ] t  fd dD qS )c                s   g | ]}t | qS r   )rn   )r8   i)r.   r   r   r:      s    z,concat_pandas.<locals>.<listcomp>.<listcomp>)rB   )r8   )rv   )r.   r   r:      s   z!concat_pandas.<locals>.<listcomp>)namesc             s   s   | ]}|j V  qd S )N)Z_values)r8   kr   r   r   r|   $  s    c             s   s   | ]}t |tjV  qd S )N)r>   r    CategoricalIndex)r8   r}   r   r   r   r|   2  s    c             S   s   g | ]}|j d dqS )T)r^   )Zreset_index)r8   r   r   r   r   r:   5  s    c             S   s   g | ]
}|j qS r   )r   )r8   r   r   r   r   r:   6  s    c             s   s   | ]}t |tjV  qd S )N)r>   r    r@   )r8   r   r   r   r   r|   =  s    categoryc             S   s2   g | ]*}t |tjr|n| j|jd idqS )r   )r_   )r>   r    r@   to_framerenamerR   )r8   r   r   r   r   r:   E  s   ignorec             S   s   g | ]}|j d k jqS )r   )dtypesr   T)r8   r   r   r   r   r:   L  s   rp   c                s   g | ]}||j   qS r   )r_   intersection)r8   r   )not_catr   r   r:   S  s   Zi8)ra   )r_   c             S   s   g | ]
}|j qS r   )r   )r8   r   r   r   r   r:   w  s    )r   rR   )-r    rB   rC   r>   Indexr   r   rR   rx   rd   rangery   Zfrom_arraysr~   rD   tupler*   ZconcatenateZfrom_tuples	Exceptionappendr   re   Zlevelsr@   r   r&   r'   r(   RuntimeWarningFutureWarning
differencegetr_   Zfullr<   ZCategoricalZ
from_codescatZ
categoriesZorderedZreindexr   ra   r?   )rv   rH   rp   rq   rr   restZarraysZ	to_concatZ
new_tuplesZ
dfs0_indexZhas_categoricalindexZdfs2ZindZdfs3Zcat_maskrg   Ztemp_indcolr   r\   partsZcodesdatar   )rv   r{   r   r   concat_pandas  s    














"

 
r   )N)N)N)TTr   )N)T)r   ro   FT)r   ro   FT)2Z
__future__r   r   r   r&   Znumpyr*   Zpandasr    Zpandas.api.typesr   Ztoolzr   Zutilsr   r
   r   Zpandas.types.concatrC   r   r   r   r   r#   r/   r0   rG   rM   rN   rP   rS   rV   rW   rX   rY   rD   r\   rb   rh   ri   rl   rm   rn   rs   rB   registerr@   r?   r   r   r   r   r   r   <module>   sX   





 
>	




