B
    T\o              	   @   sD  d dl mZmZmZ d dlZd dlZd dlmZ d dlZd dl	Z	d dl
mZ d dlZd dlZd dlm  mZ d dlmZmZmZmZ yd dlmZ W n  ek
r   d dlmZ Y nX yd dlmZ W n ek
r   dd	 ZY nX d
dlmZ ddlm Z  ddl!m"Z"m#Z#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z*m+Z+m,Z, eej-Z.e.edkZ/e/Z0dd Z1dd Z2dZ3dZ4dd Z5ed[ddZ6dZ7dd  Z8d!d" Z9d\d$d%Z:d]d&d'Z;e,d(Z<e<=ej>ej?fd^d)d*Z@e<=ejAd_d+d,ZBe<=eCd`d-d.ZDe.d/krejEejFejGfZHnejEejFfZHe,d0ZIeI=eCd1d2 ZJeI=ej?d3d4 ZKeI=ejAd5d6 ZLeMd#eNd7eOd8ePd
eQd9eQd9eRd9d9d:ZSd;d< ZTd=d> ZUeI=ej>dad?d@ZVdAdB ZWdCdD ZXdEdF ZYdbdGdHZZdcdIdJZ[dddKdLZ\dMdN Z]dedOdPZ^dQdR Z_dSdT Z`dUdV ZadfdWdXZbdgdYdZZcdS )h    )absolute_importdivisionprint_functionN)LooseVersion)contextmanager)is_categorical_dtype	is_scalar	is_sparseis_period_dtype)is_datetime64tz_dtype)is_interval_dtypec             C   s   dS )NF )dtyper   r   3lib/python3.7/site-packages/dask/dataframe/utils.py<lambda>   s    r      )make_array_nonempty   )is_dask_collection)PY2IteratorMapping)get_deps)get_sync)
asciitableis_arraylikeDispatchz	0.24.0rc1c             C   sD   t | d| }tr6tjtjtjtjtjtjtj	tj
f}nd}t||S )Nr   r   )getattr
HAS_INT_NApdZ	Int8DtypeZ
Int16DtypeZ
Int32DtypeZ
Int64DtypeZ
UInt8DtypeZUInt16DtypeZUInt32DtypeZUInt64Dtype
isinstance)tr   typesr   r   r   is_integer_na_dtype)   s    r#   c             c   s   t |trt|}t|s"| V  nt|}|  } | j}t|rJ|	 }|
|}| jd|d  V  x4tt|d D ] }| j|| ||d   V  qzW | j|d d V  dS )aM   Shard a DataFrame by ranges on its index

    Examples
    --------

    >>> df = pd.DataFrame({'a': [0, 10, 20, 30, 40], 'b': [5, 4 ,3, 2, 1]})
    >>> df
        a  b
    0   0  5
    1  10  4
    2  20  3
    3  30  2
    4  40  1

    >>> shards = list(shard_df_on_index(df, [2, 4]))
    >>> shards[0]
        a  b
    0   0  5
    1  10  4

    >>> shards[1]
        a  b
    2  20  3
    3  30  2

    >>> shards[2]
        a  b
    4  40  1

    >>> list(shard_df_on_index(df, []))[0]  # empty case
        a  b
    0   0  5
    1  10  4
    2  20  3
    3  30  2
    4  40  1
    Nr   r   )r    r   listlennparray
sort_indexindexr   Z
as_orderedZsearchsortedilocrange)df	divisionsr*   indicesir   r   r   shard_df_on_index5   s    '


 r1   z?meta : pd.DataFrame, pd.Series, dict, iterable, tuple, optionalaW  An empty ``pd.DataFrame`` or ``pd.Series`` that matches the dtypes and
column names of the output. This metadata is necessary for many algorithms
in dask dataframe to work.  For ease of use, some alternative inputs are
also available. Instead of a ``DataFrame``, a ``dict`` of ``{name: dtype}``
or iterable of ``(name, dtype)`` can be provided. Instead of a series, a
tuple of ``(name, dtype)`` can be used. If not provided, dask will try to
infer the metadata. This may lead to unexpected results, so providing
``meta`` is recommended. For more information, see
``dask.dataframe.utils.make_meta``.
c                 s   | s fddS | d }d  dd }tjt||dd}d	td
|}|jrd|jkrn|jd||_nPd|dd  }t	
d|j\}}|
dd\}	}
d|||	|dd ||
|_|S )zReplace `$META` in docstring with param description.

    If pad keyword is provided, will pad description by that number of
    spaces (default is 8).c                s   t | f S )N)insert_meta_param_description)f)kwargsr   r   r      s    z/insert_meta_param_description.<locals>.<lambda>r    Zpad   N   )Zinitial_indentZsubsequent_indentwidthz{0}
{1}
z$METAzParameters
%s----------   NzParameters\n[ ]*----------z

r   z{0}{1}{2}
{3}{4}

{5})gettextwrapZwrap_META_DESCRIPTIONformat_META_TYPESjoin__doc__replaceresplit)argsr4   r3   indentZbodyZdescrZparameter_headerfirstZlastZ
parametersrestr   )r4   r   r2   {   s"    


r2   Fc          
   c   s   y
dV  W n t k
r } zbt \}}}dt|}d}|rJ|d7 }|d7 }|| rdd| ndt||}t|W dd}~X Y nX dS )zReraise errors in this block to show metadata inference failure.

    Parameters
    ----------
    funcname : str, optional
        If provided, will be added to the error message to indicate the
        name of the method that failed.
    N zMetadata inference failed{0}.

zYou have supplied a custom function and Dask is unable to 
determine the type of output that that function returns. 

To resolve this please provide a meta= keyword.
The docstring of the Dask function you ran should have more information.

zOOriginal error is below:
------------------------
{1}

Traceback:
---------
{2}z	 in `{0}`)		Exceptionsysexc_infor@   	traceback	format_tbr>   repr
ValueError)funcnameZudfeexc_type	exc_valueexc_tracebacktbmsgr   r   r   raise_on_meta_error   s    

 rX   Z__UNKNOWN_CATEGORIES__c             C   sF   t | d| } t| tjr$t| jjkS t| tjr:t| jkS tddS )zwReturns whether the categories in `x` are known.

    Parameters
    ----------
    x : Series or CategoricalIndex
    _metaz#Expected Series or CategoricalIndexN)	r   r    r   SeriesUNKNOWN_CATEGORIEScat
categoriesCategoricalIndex	TypeError)xr   r   r   has_known_categories   s    
ra   c             C   s   t | tjtjfr|  } t | tjrr| jdk}| r|| j}x\|D ]$}t| | sH| | j	j
g dd qHW n.t | tjrt| jrt| s| j	j
g dd t | jtjrt| js| j
g | _nt | tjrt| s| 
g } | S )zReplace any unknown categoricals with empty categoricals.

    Useful for preventing ``UNKNOWN_CATEGORIES`` from leaking into results.
    categoryT)inplace)r    r   rZ   	DataFramecopydtypesanyr*   ra   r\   set_categoriesr   r   r^   )r`   Zcat_maskcatscr   r   r   strip_unknown_categories   s$    




rk   Tc             C   s   t | tjtjfr|  } t | tjr| jdk}|dkrD|| j}n|j|  sZt	dxL|D ]}| | j
jtgdd q`W n(t | tjrt| jr| j
jtgdd |rt | jtjr| jtg| _nt | tjr| tg} | S )a  Set categories to be unknown.

    Parameters
    ----------
    x : DataFrame, Series, Index
    cols : iterable, optional
        If x is a DataFrame, set only categoricals in these columns to unknown.
        By default, all categorical columns are set to unknown categoricals
    index : bool, optional
        If True and x is a Series or DataFrame, set the clear known categories
        in the index as well.
    rb   Nz Not all columns are categoricalsT)rc   )r    r   rZ   rd   re   rf   r*   ZlocallrP   r\   rh   r[   r   r   r^   )r`   Zcolsr*   maskrj   r   r   r   clear_known_categories   s$    


rn   c             C   sF   t |tr4|dkr4tjttg| |djd d S tjg || |dS )Nrb   )namer*   r   )r   ro   r*   )r    strr   rZ   Categoricalr[   r+   )ro   r   r*   r   r   r   _empty_series   s    rr   	make_metac             C   s   | j d d S )Nr   )r+   )r`   r*   r   r   r   make_meta_pandas
  s    rt   c             C   s   | dd S )Nr   r   )r`   r*   r   r   r   make_meta_index  s    ru   c                sD  t | dr| jS t| r$| dd S  dk	r4t  t| tr^tj fdd|  D  dS t| t	rt
| dkrt| d | d  dS t| tt	frtd	d
 | D std| tj fdd| D dd | D  dS t | ds | dk	r yt| }t|S  tk
r   Y nX t| r2t| S td| dS )au  Create an empty pandas object containing the desired metadata.

    Parameters
    ----------
    x : dict, tuple, list, pd.Series, pd.DataFrame, pd.Index, dtype, scalar
        To create a DataFrame, provide a `dict` mapping of `{name: dtype}`, or
        an iterable of `(name, dtype)` tuples. To create a `Series`, provide a
        tuple of `(name, dtype)`. If a pandas object, names, dtypes, and index
        should match the desired output. If a dtype or scalar, a scalar of the
        same dtype is returned.
    index :  pd.Index, optional
        Any pandas index to use in the metadata. If none provided, a
        `RangeIndex` will be used.

    Examples
    --------
    >>> make_meta([('a', 'i8'), ('b', 'O')])
    Empty DataFrame
    Columns: [a, b]
    Index: []
    >>> make_meta(('a', 'f8'))
    Series([], Name: a, dtype: float64)
    >>> make_meta('i8')
    1
    rY   Nr   c                s    i | ]\}}t || d |qS ))r*   )rr   ).0rj   d)r*   r   r   
<dictcomp>8  s   z$make_meta_object.<locals>.<dictcomp>)r*   r   r   c             s   s$   | ]}t |tot|d kV  qdS )r   N)r    tupler&   )rv   r0   r   r   r   	<genexpr>=  s    z#make_meta_object.<locals>.<genexpr>z5Expected iterable of tuples of (name, dtype), got {0}c                s    i | ]\}}t || d |qS ))r*   )rr   )rv   rj   rw   )r*   r   r   rx   @  s    c             S   s   g | ]\}}|qS r   r   )rv   rj   rw   r   r   r   
<listcomp>A  s    z$make_meta_object.<locals>.<listcomp>)columnsr*   r   z*Don't know how to create metadata from {0})hasattrrY   r   rs   r    dictr   rd   itemsry   r&   rr   r%   rl   rP   r>   r'   r   _scalar_from_dtyperJ   r   _nonempty_scalarr_   )r`   r*   r   r   )r*   r   make_meta_object  s4    



r   z0.20.0meta_nonemptyc             C   s(   t | rt| S tdt| jdS )zCreate a nonempty pandas object from the given metadata.

    Returns a pandas DataFrame, Series, or Index that contains two rows
    of fake data.
    z5Expected Index, Series, DataFrame, or scalar, got {0}N)r   r   r_   r>   type__name__)r`   r   r   r   meta_nonempty_object\  s    r   c                sL   t j  fddtjD }tj| ttjd}j|_|S )Nc                s,   i | ]$\}}t jd d |f  d|qS )N)idx)_nonempty_seriesr+   )rv   r0   rj   )r   r`   r   r   rx   m  s   z+meta_nonempty_dataframe.<locals>.<dictcomp>)r*   r|   )	r   r*   	enumerater|   r   rd   r'   Zaranger&   )r`   dataresr   )r   r`   r   meta_nonempty_dataframej  s    
r   c             C   s  t | }|tjkr"tjd| jdS |tkr<|ddg| jdS |tjkrZtjddg| jdS |tjkrd}| jd krz|dgnd }tj||d| j| j| jdS |tj	krtj
dd| j| jd	S |tjkrtdd
}| jd kr||d gnd }tj||d| j| jd	S |tjkrbt| jdkr8tjt| j| jd}ntjjddg| j| jd}tj|| jdS |tjkrdd | jD }dd | jD }tj||| jdS tdt | jd S )Nr   )ro   r   abz
1970-01-01z
1970-01-02)startperiodsfreqtzro   )r   r   r   ro   Dr   )orderedr$   )r]   r   c             S   s   g | ]}t |qS r   )_nonempty_index)rv   lr   r   r   r{     s    z#_nonempty_index.<locals>.<listcomp>c             S   s   g | ]}d d gqS )r   r   )rv   r0   r   r   r   r{     s    )levelslabelsnamesz*Don't know how to handle index of type {0})r   r   Z
RangeIndexro   _numeric_index_typesIndexZDatetimeIndexr   r   ZPeriodIndexZperiod_rangeZTimedeltaIndexr'   timedelta64r^   r&   r]   rq   r   r   Z
from_codes
MultiIndexr   r   r_   r>   r   )r   typr   r   r   r   r   r   r   r   u  sB    





r       z
1970-01-01Zfoo)r   VMmSr   UOc             C   sl   | j dkr| dS | j dkr.| tddS | j tkrZt| j  }| j dkrV|| S |S td| d S )N)r0   r3   ur   rj   r   )r   r   zCan't handle dtype: {0})kindr   complex_simple_fake_mappingZastyper_   r>   )r   or   r   r   r     s    




r   c             C   sb   t | tjtjtjfr| S t| rJt| dr4| jntt	| }t
|S tdt	| jd S )Nr   zCan't handle meta of type '{0}')r    r   	Timestamp	TimedeltaPeriodr'   isscalarr}   r   r   r   r_   r>   r   )r`   r   r   r   r   r     s    
r   c             C   s  |d krt | j}| j}t|r<tjd|jd}||g}nHt|rt| j	j
rl| j	j
d gd }| j	j
}nt | j	j
}d }tj||| j	jd}nt|rtjdd g|d}nt|r|j}td|td	|g}nt|rtrt|j}n
t|j}tj||g|d}nvt|rPt|j}tr<tj||g|d}ntj||g|d}n4t|tjkrjt|}nt|}tj||g|d}tj|| j|d
S )Nz
1970-01-01)r   r   r   )r]   r   r   )r   Z2000Z2001)ro   r*   )r   r*   r   r   r   r   r   r   r&   r\   r]   rq   r   r#   r(   r
   r   r   r	   PANDAS_GT_0240r   ZsubtypeZSparseArrayr   r'   r   r   Z_lookuprZ   ro   )sr   r   entryr   ri   r   r   r   r   r     sD    






r   c             C   s$   t t| ddddhko"t| t S )z Looks like a Pandas DataFrame rf   r|   groupbyhead)setdirr    r   )r-   r   r   r   is_dataframe_like  s    r   c             C   s$   t t| ddddhko"t| t S )z Looks like a Pandas Series ro   r   r   r   )r   r   r    r   )r   r   r   r   is_series_like  s    r   c             C   s,   t t| }|ddhko*d|ko*t| t S )z Looks like a Pandas Index ro   r   r   )r   r   r    r   )r   Zattrsr   r   r   is_index_like  s    r   c                s  |rdddhnt    fddt|s8t|s8t|r@t|rRtdt|j t| t|kr|dt|jt| jf }nt|r4t }t	t
dkrd	|d
< tj| j|jgfddi|}fdd|d D }|rdt|jtdddg|f }n4tt|jt| js.d|j| jf }n| S n@| j|jrH| S dt|jtddgd| jfd|jfgf }td|rd| nd|f dS )a  Check that the dask metadata matches the result.

    If metadata matches, ``x`` is passed through unchanged. A nice error is
    raised if metadata doesn't match.

    Parameters
    ----------
    x : DataFrame, Series, or Index
    meta : DataFrame, Series, or Index
        The expected metadata that ``x`` should match
    funcname : str, optional
        The name of the function in which the metadata was specified. If
        provided, the function name will be included in the error message to be
        more helpful to users.
    numeric_equal : bool, optionl
        If True, integer and floating dtypes compare equal. This is useful due
        to panda's implicit conversion of integer to floating upon encountering
        missingness, which is hard to infer statically.
    r0   r3   r   c                s   t | t |krdS t| tr&| dks8t|tr<|dkr<dS t | rtt |rttdkrlt| jksht|jkrldS | |kS | j kr|j kp| |kS )NF-z0.21.0T)r   r    rp   PANDAS_VERSIONr[   r]   r   )r   r   )eq_typesr   r   equal_dtypes  s    $

z check_meta.<locals>.equal_dtypesz>Expected partition to be DataFrame, Series, or Index, got `%s`z,Expected partition of type `%s` but got `%s`z0.23.0Tsortaxisr   c                s&   g | ]\}}} ||s|||fqS r   r   )rv   colr   r   )r   r   r   r{   3  s    zcheck_meta.<locals>.<listcomp>r   zPartition type: `%s`
%sZColumnZFoundZExpectedz]The columns in the computed data do not match the columns in the provided metadata.
 %s
  :%srI   r   zMetadata mismatch found%s.

%sz in `%s`N)r   r   r   r   r   r_   r   r   r~   r   r   r   concatrf   Zfillna
itertuplesr   r'   Zarray_equalZ
nan_to_numr|   r   rP   )r`   metarQ   numeric_equalerrmsgr4   rf   Z
bad_dtypesr   )r   r   r   
check_meta  sB    
r   c             C   sL   t | }|dkr| jj}|r:| d }| d }d||}nd}d|||S )z+Summarized representation of an Index.
    Nr   r$   z
, {} to {}rI   z{}: {} entries{})r&   	__class__r   r>   )r   ro   nr   tailZsummaryr   r   r   index_summaryN  s    r   c             C   sl  dd l m} t| drh|d kr,| jdd}t| |jrdt|jksRtt||r| j	|j	ksft| j
j	|j	ksxtt|tjr|j| j
jkst|rt| | nt| |jrPdt|jkstt|t| j
t|kstt| j
|r(| j	|j	kst| j	|j	f| j
j	|j	ks(t|r8t| | t| j|||jd nt| |jrdt|jksztt|t| jtjstt| jt| j
t|kstt| j
|rt| j|j t| j
j|j |rt| | t| j|||jd n\t| |jjrNt|s<t|tjtjfs<t|rdt| | nd	t| }t||S | S )
Nr   dasksync)Z	schedulerr   rZ   )check_namescheck_dtypesresultrd   z#Unsupported dask instance {0} found)Zdask.dataframeZ	dataframer}   Zcomputer    r   r   r   AssertionErrorro   rY   r   r   r   assert_dask_dtypesrZ   _check_daskr*   rd   r|   tmassert_index_equalcoreZScalarr'   r   r   r   r>   )Zdskr   r   r   ZddrW   r   r   r   r   c  sV     
"
r   c          
   C   s   y`t | tjrVt| jjt| j@ rBdd tt| jjD | j_| j	| j
 d} n| 	 } W n tttfk
rz   Y nX |  S )Nc             S   s   g | ]}d | qS )z-overlapped-index-name-%dr   )rv   r0   r   r   r   r{     s   z_maybe_sort.<locals>.<listcomp>)Zby)r    r   rd   r   r*   r   r|   r,   r&   Zsort_valuestolistr_   
IndexErrorrP   r)   )r   r   r   r   _maybe_sort  s    r   c       	      K   s  |rlt |  t | t| drlt|drltt| j d }tt|j d }||kslt||ft|  t| t	| ||d} t	|||d}|s| j
dd} |j
dd}t| dr|  } t|dr| }t| tjrt| } t|}tj| |f| nt| tjr@t| } t|}tj| |fd|i| n^t| tjr`tj| |f| n>| |krndS t| rt|stnt| |stdS )Nr.   r   )r   r   T)Zdrop	to_pandasr   )assert_divisionsr}   r   r'   Zasarrayr.   r   r   assert_sane_keynamesr   Zreset_indexr   r    r   rd   r   r   Zassert_frame_equalrZ   Zassert_series_equalr   r   ZisnanZallclose)	r   r   r   r   Zcheck_divisionsZcheck_indexr4   ZatZbtr   r   r   	assert_eq  sD    


r   c             C   s`   t | dr| j} t| tstx,| D ]$}t|tr:|d }||r$dS q$W tdj|dd S )Nr   r   Tz/given dask graph doesn't contain label: {label})label)r}   r   r    r   r   ry   
startswithr>   )r   r   kr   r   r   assert_dask_graph  s    



r   c             C   s   t | dsd S t | dsd S | js&d S dd }t| j|  }x\t|d d D ]H\}}t|rP|| | j| kszt	||
 | j|d  k sPt	qPW t|d r||d  | jd kst	||d 
 | jd kst	d S )Nr.   r*   c             S   s8   t | tjr| S y| jdS  tk
r2   | jS X d S )Nr   )r    r   r   r*   Zget_level_valuesAttributeError)r`   r   r   r   r*     s    zassert_divisions.<locals>.indexr$   r   )r}   Zknown_divisionsr   r   Z__dask_keys__r   r&   minr.   r   max)ddfr*   Zresultsr0   r-   r   r   r   r     s    

"r   c             C   s   t | dsd S xr| j D ]d}xt|tr4|d }q W t|ttfsHtt|dk sXtd|ksdtt	s|
dd  stqW d S )Nr   r   d   r5   r   )r}   r   keysr    ry   rp   bytesr   r&   r   rD   isidentifier)r   r   r   r   r   r     s    
r   c             C   s>  ddddh}|r| d t|tjrtxJtj| jj|jgdd D ]*\}}}|j|krb|j|ksD||ksDt	qDW nt|tj
tjfr| jj}|j}|j|kr|j|ks||kst	nt| jdr"| jj}t|dst|st	tt|}n|j}|j|kr|j|ks:||ks:t	nt| jt|ks:t	d	S )
a  Check that the dask metadata matches the result.

    If `numeric_equal`, integer and floating dtypes compare equal. This is
    useful due to the implicit conversion of integer to floating upon
    encountering missingness, which is hard to infer statically.r   r   r   r   )r0   r3   r   )r   r   N)updater    r   rd   r   rY   rf   r   r   r   rZ   r   r   r}   r'   r   r   )r   r   r   r   r   r   r   r   r   r   r      s&    
&"
(r   c             C   sL   t | j\}}|r.ttt| |ksHtnttt| |ksHtd S )N)r   r   r   mapr&   valuesr   )r`   r   eqZdependenciesZ
dependentsr   r   r   assert_max_deps   s    r   )NF)NT)N)N)N)N)N)NT)N)TTN)TTTT)T)T)dZ
__future__r   r   r   rC   r<   Zdistutils.versionr   rK   rM   
contextlibr   Znumpyr'   Zpandasr   Zpandas.util.testingutilZtestingr   Zpandas.api.typesr   r   r	   r
   r   ImportErrorZpandas.core.commonr   
extensionsr   baser   Zcompatibilityr   r   r   r   r   Zlocalr   Zutilsr   r   r   __version__r   r   r   r#   r1   r?   r=   r2   rX   r[   ra   rk   rn   rr   rs   registerrZ   rd   rt   r   ru   objectr   Z
Int64IndexZFloat64IndexZUInt64Indexr   r   r   r   r   Zbool_ZvoidZ
datetime64r   Zstr_Zunicode_r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   s   
8
!

>
,
.
L

0 
(
 