B
    T\F                 @   s@  d dl mZmZmZ d dlZd dlZd dlZd dlZd dl	Z
d dlZddlmZmZmZmZmZmZmZ ddlmZ ddlmZ ddlmZmZmZmZmZ dd	lmZ dd
lm Z m!Z!m"Z"m#Z# ddl$m%Z% dd Z&dd Z'dd Z(dd Z)dd Z*dd Z+dd Z,G dd de-Z.d^ddZ/dd Z0d d! Z1d"d# Z2d$d% Z3d&d' Z4d(d) Z5d*d+ Z6d,d- Z7d.d/ Z8d0d1 Z9d2d3 Z:d4d5 Z;d6d7 Z<d8d9 Z=d:d; Z>d<d= Z?d>d? Z@d@dA ZAdBdC ZBdDdE ZCdFdG ZDdHdI ZEdJdK ZFdLdM ZGd_dNdOZHd`dPdQZIdRdS ZJdTdU ZKdadVdWZLG dXdY dYe-ZMG dZd[ d[eMZNG d\d] d]eMZOdS )b    )absolute_importdivisionprint_functionN   )	DataFrameSeriesacamap_partitionsnew_dd_object
no_defaultsplit_out_on_index)drop_columns)shuffle)	make_metainsert_meta_param_descriptionraise_on_meta_erroris_series_likeis_dataframe_like   )tokenize)derived_fromMfuncname
itemgetter)HighLevelGraphc             C   s2   t | ttfr*t| dkr*ttt| S dS dS )z6Determine the correct levels argument to groupby.
    r   r   N)
isinstancetuplelistlenrange)index r!   5lib/python3.7/site-packages/dask/dataframe/groupby.py_determine_levels6   s    r#   c                s   t  ts|S t |tr* fdd|D S t|rV|j jkrV|j |j jkrV|jS t |trt|j jr|j |j jkrt|jS |S dS )zDReplace series with column names in an index wherever possible.
    c                s   g | ]}t  |qS r!   )_normalize_index).0col)dfr!   r"   
<listcomp>F   s    z$_normalize_index.<locals>.<listcomp>N)	r   r   r   r   namecolumns_namesetissubset)r'   r    r!   )r'   r"   r$   ?   s    



r$   c             C   s@   t | tjjjr<|dk	r<t |ttttjfr4t|}| | S | S )z9
    Slice columns if grouped is pd.DataFrameGroupBy
    N)	r   pdcoregroupbyDataFrameGroupByr   r   r,   Index)groupedr*   r!   r!   r"   _maybe_sliceU   s    r4   c                sJ   t |st|r j|jS t|ttfrBt fdd|D S dS dS )z+Check if `df` and `by` have aligned indicesc             3   s   | ]}t  |V  qd S )N)_is_aligned)r%   i)r'   r!   r"   	<genexpr>f   s    z_is_aligned.<locals>.<genexpr>TN)r   r   r    Zequalsr   r   r   all)r'   byr!   )r'   r"   r5   a   s
    r5   c             K   sX   | dd}|dk	r,t| |s,d}t|n |dk	rLt|rL|jt|d | jf |S )a  Groupby, but raise if df and `by` key are unaligned.

    Pandas supports grouping by a column that doesn't align with the input
    frame/series/index. However, the reindexing this causes doesn't seem to be
    threadsafe, and can result in incorrect results. Since grouping by an
    unaligned key is generally a bad idea, we just error loudly in dask.

    For more information see pandas GH issue #15244 and Dask GH issue #1876.r9   Na  Grouping by an unaligned index is unsafe and unsupported.
This can be caused by filtering only one of the object or
grouping key. For example, the following works in pandas,
but not in dask:

df[df.foo < 0].groupby(df.bar)

This can be avoided by either filtering beforehand, or
passing in the name of the column instead:

df2 = df[df.foo < 0]
df2.groupby(df2.bar)
# or
df[df.foo < 0].groupby('bar')

For more information see dask GH issue #1876.)r9   )getr5   
ValueErrorr   updater   r0   )r'   kwargsr9   msgr!   r!   r"   _groupby_raise_unalignedk   s    	
r?   c             O   s(   |  |}|r|| }|j|f||S )N)r0   apply)r'   Zgrouperkeyfuncargsr=   gr!   r!   r"   _groupby_slice_apply   s    
rE   c             C   sR   t | |d}||jkr0t| r&|| }||S t| r@| | } | jdd S d S )N)r9   r   )r?   groupsr   	get_groupZiloc)r'   Zby_keyZget_keyr*   r3   r!   r!   r"   _groupby_get_group   s    

rH   c               @   s   e Zd ZdZdddZdS )Aggregationa&  A user defined aggregation.

    Parameters
    ----------
    name : str
        the name of the aggregation. It should be unique, since intermediate
        result will be identified by this name.
    chunk : callable
        a function that will be called with the grouped column of each
        partition. It can either return a single series or a tuple of series.
        The index has to be equal to the groups.
    agg : callable
        a function that will be called to aggregate the results of each chunk.
        Again the argument(s) will be grouped series. If ``chunk`` returned a
        tuple, ``agg`` will be called with all of them as individual positional
        arguments.
    finalize : callable
        an optional finalizer that will be called with the results from the
        aggregation.

    Examples
    --------

    ``sum`` can be implemented as::

        custom_sum = dd.Aggregation('custom_sum', lambda s: s.sum(), lambda s0: s0.sum())
        df.groupby('g').agg(custom_sum)

    and ``mean`` can be implemented as::

        custom_mean = dd.Aggregation(
            'custom_mean',
            lambda s: (s.count(), s.sum()),
            lambda count, sum: (count.sum(), sum.sum()),
            lambda count, sum: sum / count,
        )
        df.groupby('g').agg(custom_mean)

    Nc             C   s   || _ || _|| _|| _d S )N)chunkaggfinalize__name__)selfr)   rJ   rK   rL   r!   r!   r"   __init__   s    zAggregation.__init__)N)rM   
__module____qualname____doc__rO   r!   r!   r!   r"   rI      s   'rI   c             C   s   || j |ddS )NF)levelsort)r0   )r'   aggfunclevelsr!   r!   r"   _groupby_aggregate   s    rW   c             O   sd   | d}| d}t| |d}t| s0|d kr8||S t|ttttjfrTt|}||| S d S )NrJ   r*   )r9   )	popr?   r   r   r   r   r,   r.   r2   )r'   r    r=   rB   r*   rD   r!   r!   r"   _apply_chunk   s    

rY   c             G   s   t | r|  } |  } |  j}t| |d}| }||j  jdd d}| | d | |< t| |d}| jdd d}|j	|_	t
j|||gddS )	N)r9   c             S   s   | d S )Nz-countr!   )cr!   r!   r"   <lambda>   s    z_var_chunk.<locals>.<lambda>)r*   r   c             S   s   | d S )Nz-x2r!   )rZ   r!   r!   r"   r[      s    r   )axis)r   to_framecopyZ_get_numeric_datar*   r?   sumcountrenamer    r.   concat)r'   r    colsrD   xnZg2x2r!   r!   r"   
_var_chunk   s    
rg   c             C   s   | j |dd S )NF)rS   rT   )r0   r_   )rD   rV   r!   r!   r"   _var_combine  s    rh   c       	      C   s   | j |dd } t| j}| | jd |d   }| | j|d d| d   jdd d}| | j| d d   jdd d}||d |  }|| }d	||d	k < || }tj||| d	k< t|st|S )
NF)rS   rT      r   c             S   s   | d d S )Nr!   )rZ   r!   r!   r"   r[     s    z_var_agg.<locals>.<lambda>)r*   c             S   s   | d d S )Nir!   )rZ   r!   r!   r"   r[     s    r   )	r0   r_   r   r*   ra   npnanr   AssertionError)	rD   rV   ddofZncrd   rf   re   resultdivr!   r!   r"   _var_agg  s    
*$rq   c                s   | d}| d}t| |d}t| dkrz||g tjj t|trhtj	
 fdd|D  _q jj|d _n$||g    | j j    S )NrV   r)   )r9   r   c                s   g | ]} j j|d qS ))rS   )r    get_level_values)r%   rS   )r3   r!   r"   r(   (  s    z%_nunique_df_chunk.<locals>.<listcomp>)rS   )rX   r?   r   r@   r.   r   drop_duplicatesr   r   
MultiIndexfrom_arraysr    rr   nuniqueZastypeZdtypesr*   Zto_dict)r'   r    r=   rV   r)   rD   r!   )r3   r"   _nunique_df_chunk  s    


rw   c             C   s    d g| j j }|  j|ddS )NF)r^   )r    Znlevelsrs   Zrename_axis)r'   namesr!   r!   r"   _drop_duplicates_rename5  s    ry   c                sP   | j |ddt t|tr<tj fdd|D  _n jj	|d _ S )NF)rS   rT   c                s   g | ]} j j|d qS ))rS   )r    rr   )r%   rS   )ro   r!   r"   r(   C  s    z'_nunique_df_combine.<locals>.<listcomp>)rS   )
r0   r@   ry   r   r   r.   rt   ru   r    rr   )r'   rV   r!   )ro   r"   _nunique_df_combine=  s    
rz   c             C   s   | j |dd|  S )NF)rS   rT   )r0   rv   )r'   rV   r)   r!   r!   r"   _nunique_df_aggregateK  s    r{   c             O   s:   t | st|  } t| jd t|d}t| f||S )Nr   )r)   rV   )r   rm   r]   dictr*   r#   rw   )r'   r    Z	_ignored_r=   r!   r!   r"   _nunique_series_chunkO  s    r}   c             C   s   d | |t| |S )Nz{!s}-{!s}-{})formatr   )rB   columnr!   r!   r"   _make_agg_ide  s    r   c                s   t | ts tt|t| } g }t | trxx|  D ]X\}t |trh|fdd| D  q8t |t	sx|g}|fdd|D  q8W nt
dt| t	ttf t fdd|  D  }|rdd |D }|S )a  
    Return a list of ``(result_column, func, input_column)`` tuples.

    Spec can be

    - a function
    - a list of functions
    - a dictionary that maps input-columns to functions
    - a dictionary that maps input-columns to a lists of functions
    - a dictionary that maps input-columns to a dictionaries that map
      output-columns to functions.

    The non-group columns are a list of all column names that are not used in
    the groupby operation.

    Usually, the result columns are mutli-level names, returned as tuples.
    If only a single function is supplied or dictionary mapping columns
    to single functions, simple names are returned as strings (see the first
    two examples below).

    Examples
    --------
    >>> _normalize_spec('mean', ['a', 'b', 'c'])
    [('a', 'mean', 'a'), ('b', 'mean', 'b'), ('c', 'mean', 'c')]

    >>> spec = collections.OrderedDict([('a', 'mean'), ('b', 'count')])
    >>> _normalize_spec(spec, ['a', 'b', 'c'])
    [('a', 'mean', 'a'), ('b', 'count', 'b')]

    >>> _normalize_spec(['var', 'mean'], ['a', 'b', 'c'])
    ... # doctest: +NORMALIZE_WHITESPACE
    [(('a', 'var'), 'var', 'a'), (('a', 'mean'), 'mean', 'a'),      (('b', 'var'), 'var', 'b'), (('b', 'mean'), 'mean', 'b'),      (('c', 'var'), 'var', 'c'), (('c', 'mean'), 'mean', 'c')]

    >>> spec = collections.OrderedDict([('a', 'mean'), ('b', ['sum', 'count'])])
    >>> _normalize_spec(spec, ['a', 'b', 'c'])
    ... # doctest: +NORMALIZE_WHITESPACE
    [(('a', 'mean'), 'mean', 'a'), (('b', 'sum'), 'sum', 'b'),       (('b', 'count'), 'count', 'b')]

    >>> spec = collections.OrderedDict()
    >>> spec['a'] = ['mean', 'size']
    >>> spec['b'] = collections.OrderedDict([('e', 'count'), ('f', 'var')])
    >>> _normalize_spec(spec, ['a', 'b', 'c'])
    ... # doctest: +NORMALIZE_WHITESPACE
    [(('a', 'mean'), 'mean', 'a'), (('a', 'size'), 'size', 'a'),      (('b', 'e'), 'count', 'b'), (('b', 'f'), 'var', 'b')]
    c             3   s    | ]\}} |f| fV  qd S )Nr!   )r%   result_columnrB   )input_columnr!   r"   r7     s   z"_normalize_spec.<locals>.<genexpr>c             3   s    | ]} t |f| fV  qd S )N)r   )r%   rB   )r   r!   r"   r7     s   zunsupported agg spec of type {}c             3   s   | ]}t | V  qd S )N)r   )r%   subspec)	compoundsr!   r"   r7     s   c             S   s   g | ]\}}}|||fqS r!   r!   )r%   _rB   Z	input_colr!   r!   r"   r(     s    z#_normalize_spec.<locals>.<listcomp>)r   r|   collectionsOrderedDictzipitrepeatitemsextendr   r;   r~   typer   anyvalues)specnon_group_columnsZresr   Zuse_flat_columnsr!   )r   r   r"   _normalize_speci  s&    2




r   c             C   s$  t jdt jdi}i }x<| D ]4\}}}t||||f}||g ||f qW x*| D ]}t|dkr\t	d
|q\W i }i }	g }
xv| D ]n\}}}t|tst|||}t|||}|dd |d D  |	dd |d	 D  |
|d
  qW t| }t|	 }	||	|
fS )au  
    Create transformation functions for a normalized aggregate spec.

    Parameters
    ----------
    spec: a list of (result-column, aggregation-function, input-column) triples.
        To work with all arugment forms understood by pandas use
        ``_normalize_spec`` to normalize the argment before passing it on to
        ``_build_agg_args``.

    Returns
    -------
    chunk_funcs: a list of (intermediate-column, function, keyword) triples
        that are applied on grouped chunks of the initial dataframe.

    agg_funcs: a list of (intermediate-column, functions, keword) triples that
        are applied on the grouped concatination of the preprocessed chunks.

    finalizers: a list of (result-column, function, keyword) triples that are
        applied after the ``agg_funcs``. They are used to create final results
        from intermediate representations.
    minmaxr   z%conflicting aggregation functions: {}c             s   s   | ]}|d  |fV  qdS )r   Nr!   )r%   r   r!   r!   r"   r7     s    z"_build_agg_args.<locals>.<genexpr>chunk_funcsc             s   s   | ]}|d  |fV  qdS )r   Nr!   )r%   r   r!   r!   r"   r7     s    aggregate_funcs	finalizer)rk   r   r   r   r:   
setdefaultappendr   r   r;   r~   r   rI   _build_agg_args_singler<   sorted)r   Zknown_np_funcsZby_namer   rB   r   rA   funcsZchunksZaggs
finalizersr   implsr!   r!   r"   _build_agg_args  s*    
r   c             C   s   t jt jft jt jft jt jft jt jft jt jft jt jft jt jfd}|| krjt	| |||| S |dkr~t
| ||S |dkrt| ||S |dkrt| ||S t|trt| ||S td|d S )N)r_   r   r   r`   sizefirstlastvarstdmeanzunknown aggregate {})r   r_   r   r   r`   r   r   r   keys_build_agg_args_simple_build_agg_args_var_build_agg_args_std_build_agg_args_meanr   rI   _build_agg_args_customr;   r~   )r   rB   r   Zsimple_implr!   r!   r"   r     s&    







r   c             C   sL   t ||}|\}}t|tt||dfg|tt||dfg| t|t fdS )N)r   rB   )r   r   r   )r   r|   _apply_func_to_columnr   )r   rB   r   Z	impl_pairZintermediateZ
chunk_implZagg_implr!   r!   r"   r     s    
r   c          
   C   s~   t d|}t d|}t d|}t|tt|tjdf|tt|tjdf|tt|dfgdd |||fD | tt|||dfd	S )
Nr_   Zsum2r`   )r   rB   )r   c             S   s    g | ]}|t t|tjd fqS ))r   rB   )r   r|   r   r_   )r%   r&   r!   r!   r"   r(   .  s   z'_build_agg_args_var.<locals>.<listcomp>)
sum_columncount_columnsum2_column)r   r   r   )r   r|   r   r   r_   r`   _compute_sum_of_squares_finalize_var)r   rB   r   int_sumZint_sum2	int_countr!   r!   r"   r     s    


r   c             C   s,   t | ||}|d \} }}| t|f|d< |S )Nr   )r   _finalize_std)r   rB   r   r   r   r=   r!   r!   r"   r   7  s    r   c          	   C   sb   t d|}t d|}t|tt|tjdf|tt|tjdfgdd ||fD | tt||dfdS )Nr_   r`   )r   rB   c             S   s    g | ]}|t t|tjd fqS ))r   rB   )r   r|   r   r_   )r%   r&   r!   r!   r"   r(   L  s   z(_build_agg_args_mean.<locals>.<listcomp>)r   r   )r   r   r   )r   r|   r   r   r_   r`   _finalize_mean)r   rB   r   r   r   r!   r!   r"   r   @  s    


r   c             C   sr   t t||}|jd kr,| t|t f}n| tt|j|df}t|tt|j|dfg|tt|j	|dfg|dS )N)rB   prefix)rB   r   )r   r   r   )
r   r   rL   operatorr   r|   _apply_func_to_columnsr   rJ   rK   )r   rB   r   r&   r   r!   r!   r"   r   T  s    
r   c             O   s   t |r|jt|d |d}t| f|}t }xX|D ]P\}}}||f|}	t|	trx.t	|	D ]\}
}||d
||
< qfW q<|	||< q<W t|S )a  
    Group a dataframe and apply multiple aggregation functions.

    Parameters
    ----------
    df: pandas.DataFrame
        The dataframe to work on.
    index: list of groupers
        If given, they are added to the keyword arguments as the ``by``
        argument.
    funcs: list of result-colum, function, keywordargument triples
        The list of functions that are applied on the grouped data frame.
        Has to be passed as a keyword argument.
    kwargs:
        All keyword arguments, but ``funcs``, are passed verbatim to the groupby
        operation of the dataframe

    Returns
    -------
    aggregated:
        the aggregated dataframe.
    )r9   r   z{}-{})r   r<   r   rX   r?   r   r   r   r   	enumerater~   r.   r   )r'   r    r=   r   r3   ro   r   rB   Zfunc_kwargsridxsr!   r!   r"   _groupby_apply_funcsl  s    

r   c             C   s"   |d k	r| | n| }| dd S )Nc             S   s   | d   S )Nr   )r_   )rd   r!   r!   r"   r[     s    z)_compute_sum_of_squares.<locals>.<lambda>)r@   )r3   r   baser!   r!   r"   r     s    r   c             C   sD   t | ||d} t }x"|D ]\}}}|| f|||< qW t|S )N)r   rS   )r   r   r   r.   r   )r'   r   finalize_funcsrS   ro   r   rB   r=   r!   r!   r"   _agg_finalize  s
    r   c             C   s   |d kr|| S || | S )Nr!   )df_liker   rB   r!   r!   r"   r     s    r   c                sH   t  r j}n jj}tfdd|D } fdd|D }|| S )Nc             3   s   | ]}|  r|V  qd S )N)
startswith)r%   r&   )r   r!   r"   r7     s    z)_apply_func_to_columns.<locals>.<genexpr>c                s   g | ]} | qS r!   r!   )r%   r&   )r   r!   r"   r(     s    z*_apply_func_to_columns.<locals>.<listcomp>)r   r*   Z_selected_objr   )r   r   rB   r*   r!   )r   r   r"   r     s    r   c             C   s   | | | |  S )Nr!   )r'   r   r   r!   r!   r"   r     s    r   c       
      C   sZ   | | }| | }| | }||d |  }|| }	d|	|	dk < ||	 }t j||| dk< |S )Nr   r   )rk   rl   )
r'   r   r   r   rn   re   rd   rf   ro   rp   r!   r!   r"   r     s    r   c             C   s   t | ||||}t|S )N)r   rk   sqrt)r'   r   r   r   rn   ro   r!   r!   r"   r     s    r   c             C   s,   |j | |j|d}| j|_|| | |S )N)
fill_value)reindexZ	set_indexr    )partZcum_lastr    r*   rB   initialZalignr!   r!   r"   _cum_agg_aligned  s    r   c             C   s0   | j |j }|| j||d|j||d|dS )N)r   )r    unionr   )abrB   r   r   r!   r!   r"   _cum_agg_filled  s    r   c             C   s   | j ||dd S )N)r   r   )add)r   r   r   r!   r!   r"   _cumcount_aggregate  s    r   c               @   s  e Zd ZdZd/ddZedd Zd0dd	Zd
d Ze	e
jjjd1ddZe	e
jjjd2ddZe	e
jjjd3ddZe	e
jjjd4ddZe	e
jjjd5ddZe	e
jjjd6ddZe	e
jjjd7ddZe	e
jjjd8ddZe	e
jjjd9ddZe	e
jjjd:dd Ze	e
jjjd;d!d"Ze	e
jjjd<d#d$Ze	e
jjjd=d%d&Ze	e
jjjd'd( Zd>d)d*Zed+d,d-d. ZdS )?_GroupBya   Superclass for DataFrameGroupBy and SeriesGroupBy

    Parameters
    ----------

    obj: DataFrame or Series
        DataFrame or Series to be grouped
    by: str, list or Series
        The key for grouping
    slice: str, list
        The slice keys applied to GroupBy result
    Nc                s   t  ttfst | _t || _t | jtrJt fdd| jD }n t | jtrf j	| jj	k}nd}|svt
d|| _t | jtrdd | jD }nt | jtr| jj}n| j}| jj|| _d S )Nc             3   s(   | ] }t |tr|j jknd V  qdS )TN)r   r   	divisions)r%   item)r'   r!   r"   r7     s   z$_GroupBy.__init__.<locals>.<genexpr>TzIThe grouped object and index of the groupby must have the same divisions.c             S   s    g | ]}t |tr|jn|qS r!   )r   r   _meta)r%   r   r!   r!   r"   r(     s    z%_GroupBy.__init__.<locals>.<listcomp>)r   r   r   rm   objr$   r    r   r8   r   NotImplementedError_slicer   r0   )rN   r'   r9   sliceZdo_index_partition_align
index_metar!   )r'   r"   rO     s&    

z_GroupBy.__init__c             C   sX   | j j}t| jtr&dd | jD }nt| jtr<| jj}n| j}||}t|| jS )z]
        Return a pd.DataFrameGroupBy / pd.SeriesGroupBy which contains sample data.
        c             S   s    g | ]}t |tr|jn|qS r!   )r   r   _meta_nonempty)r%   r   r!   r!   r"   r(     s    z+_GroupBy._meta_nonempty.<locals>.<listcomp>)	r   r   r   r    r   r   r0   r4   r   )rN   Zsampler   r3   r!   r!   r"   r     s    

z_GroupBy._meta_nonemptyr   c       	      C   s   |d kr|}|| j }t|r$|jn|j}| j| }t| j}tt| jt	sX| j
| jgn| j
g| j tt||dt|||t||d|td
S )N)rJ   r*   )rU   rV   )	rJ   chunk_kwargs	aggregatemetatokensplit_everyaggregate_kwargs	split_outsplit_out_setup)r   r   r)   r*   _token_prefixr#   r    r   r   r   r   rY   r|   rW   r   )	rN   r   rB   rU   r   r   r   r*   rV   r!   r!   r"   _aca_agg)  s    


&

z_GroupBy._aca_aggc                s  | j }t|r|jn|j}t jtr0 jn jg} j| }|d }	|d }
|d }tt	 j
f||||	|d}t|r| n|}|jf  fdd|D }dd |D }tt	|f||d	krd
n|tj||
d}t ||||}|d| 7 }|d| 7 }i }|jd
f||d
f< xtd j
jD ]}|dkrR|j|d f|||f< n(t||d f|j|d f||f|||f< t|j|f||f||d	krd
n|||f|||f< q,W tj|||||gd}t||| j  j
jS )z* Wrapper for cumulative groupby operation z-mapz
-take-lastz	-cum-last)rJ   r*   r   r   c                s6   i | ].}t |r(| jjkr( j| n jj|qS r!   )rk   isscalarr   r*   r    )r%   r6   )rN   r!   r"   
<dictcomp>S  s   z%_GroupBy._cum_agg.<locals>.<dictcomp>c             S   s   g | ]}t j|d qS ))rA   )r.   ZGrouper)r%   Zindr!   r!   r"   r(   \  s    z%_GroupBy._cum_agg.<locals>.<listcomp>Nr   )r*   rJ   r   r   -r   )Zdependencies)r   r   r)   r*   r   r    r   r   r	   rY   r   r]   assignr   r   r   r+   r   Znpartitionsr   r   r   Zfrom_collectionsr
   r   )rN   r   rJ   r   r   r   r*   r    r)   Z	name_partZ	name_lastZname_cumZcumpart_rawZcumpart_raw_frameZcumpart_extZindex_groupersZcumlast_hashZdaskr6   Zgraphr!   )rN   r"   _cum_agg<  sR    







z_GroupBy._cum_aggr   c             C   s,   |r| j j|dS | jdtjtjddS d S )N)r\   cumsumr   )rJ   r   r   )r   r   r   r   r   )rN   r\   r!   r!   r"   r   {  s    z_GroupBy.cumsumc             C   s,   |r| j j|dS | jdtjtjddS d S )N)r\   cumprodr   )rJ   r   r   )r   r   r   r   mul)rN   r\   r!   r!   r"   r     s    z_GroupBy.cumprodc             C   s   | j dtjtddS )Ncumcount)rJ   r   r   )r   r   r   r   )rN   r\   r!   r!   r"   r     s    z_GroupBy.cumcountc             C   s   | j dtj||dS )Nr_   )r   rB   r   r   )r   r   r_   )rN   r   r   r!   r!   r"   r_     s    z_GroupBy.sumc             C   s   | j dtj||dS )Nr   )r   rB   r   r   )r   r   r   )rN   r   r   r!   r!   r"   r     s    z_GroupBy.minc             C   s   | j dtj||dS )Nr   )r   rB   r   r   )r   r   r   )rN   r   r   r!   r!   r"   r     s    z_GroupBy.maxc             C   s   | j dtjtj||dS )Nr`   )r   rB   rU   r   r   )r   r   r`   r_   )rN   r   r   r!   r!   r"   r`     s    
z_GroupBy.countc             C   s6   | j ||d}| j||d}t|r.||j }|| S )N)r   r   )r_   r`   r   r*   )rN   r   r   r   rZ   r!   r!   r"   r     s
    
z_GroupBy.meanc             C   s   | j dtjtj||dS )Nr   )r   rB   rU   r   r   )r   r   r   r_   )rN   r   r   r!   r!   r"   r     s    z_GroupBy.sizec             C   s   t | j}tt| jts$| j| jgn| jg| j ttt| j	d ||dd|i||t
d
}t| jtrr||jd  }| jr|| j }|S )Nr   )rn   rV   rV   )	rJ   r   combiner   r   combine_kwargsr   r   r   r   )r#   r    r   r   r   r   rg   rq   rh   r   r   r   r*   r   )rN   rn   r   r   rV   ro   r!   r!   r"   r     s    
&
z_GroupBy.varc             C   s$   | j |||d}ttj||d}|S )N)r   r   )r   )r   r	   rk   r   )rN   rn   r   r   vro   r!   r!   r"   r     s    z_GroupBy.stdc             C   s   | j dtj||dS )Nr   )r   rB   r   r   )r   r   r   )rN   r   r   r!   r!   r"   r     s    z_GroupBy.firstc             C   s   | j dtj||dS )Nr   )r   rB   r   r   )r   r   r   )rN   r   r   r!   r!   r"   r     s    z_GroupBy.lastc          	   C   s\   | j d }| jj}t|r.| jd k	r.|| j }t|r<|jn|j}tt| j| j	||||dS )NrG   )r   r   )
r   r   r   r   r   r*   r)   r	   rH   r    )rN   rA   r   r   r*   r!   r!   r"   rG     s    

z_GroupBy.get_groupc                s  t jtrt jts$tjr.jh n$t jtrLdd jD  nt  j	rpj	}t |ts|g}n fddjj
D }t||}nlt jtrt |tttfrtd |ig }dd |D }qtd |ig }fdd|D }ntdjt|\}}}t jttfrBtjdkrBtttj}	nd	}	t jtsbjjg}
njgj }
t|
tt|d
tt||	dtt|||	dd||tdS )Nc             S   s$   h | ]}t |tst|r|qS r!   )r   r   rk   r   )r%   r6   r!   r!   r"   	<setcomp>  s    z%_GroupBy.aggregate.<locals>.<setcomp>c                s   g | ]}| kr|qS r!   r!   )r%   r&   )group_columnsr!   r"   r(     s    z&_GroupBy.aggregate.<locals>.<listcomp>c             S   s    g | ]\\}}}}|||fqS r!   r!   )r%   r   r   rB   r   r!   r!   r"   r(     s   c                s    g | ]\}}} j j||fqS r!   )r   r)   )r%   r   rB   r   )rN   r!   r"   r(     s   zaggregate on unknown object {}r   r   )r   )r   rS   )r   r   rS   r   )
rJ   r   r   r   r   r   r   r   r   r   )r   r   r   r    r   rk   r   r   r,   r   r*   r   r   r|   r;   r~   r   r   r   r   r   r   r   )rN   argr   r   r   r   r   r   r   rV   Z
chunk_argsr!   )r   rN   r"   r     sP    




"
z_GroupBy.aggregate   )Zpadc          	      s  | dt}|tkrXd}tj|dd tdt|  jj|f||}W dQ R X t	|}t
 jtrtdd  jD rtd	 j}|jo| j }|rt
 jtr|jf  fd
d jjD } j}	n2t
 jtr|j jd} j}	n|}| j}	t||	}
n|}
|rxt
 jtrxdd  jjD }|
| }t|rh|
t||jj}n|
j|dd}n^|rt
 jtr|
d } jj|_t|r|
td|jj}n|
jddd}n
|
} j}||d< tt|| j|f|dt|i|}|S )a   Parallel version of pandas GroupBy.apply

        This mimics the pandas version except for the following:

        1.  The user should provide output metadata.
        2.  If the grouper does not align with the index then this causes a full
            shuffle.  The order of rows within each group may not be preserved.

        Parameters
        ----------
        func: function
            Function to apply
        args, kwargs : Scalar, Delayed or object
            Arguments and keywords to pass to the function.
        $META

        Returns
        -------
        applied : Series or DataFrame depending on columns keyword
        r   a  `meta` is not specified, inferred from partial data. Please provide `meta` if the result is unexpected.
  Before: .apply(func)
  After:  .apply(func, meta={'x': 'f8', 'y': 'f8'}) for dataframe result
  or:     .apply(func, meta=('x', 'f8'))            for series resultr   )
stacklevelzgroupby.apply({0})Nc             s   s   | ]}t |tV  qd S )N)r   r   )r%   r   r!   r!   r"   r7   T  s    z!_GroupBy.apply.<locals>.<genexpr>z?groupby-apply with a multiple Series is currently not supportedc                s   i | ]} j | d | qS )_index_)r    )r%   rZ   )rN   r!   r"   r   ^  s   z"_GroupBy.apply.<locals>.<dictcomp>)_indexc             S   s   g | ]}d | qS )r   r!   )r%   rZ   r!   r!   r"   r(   n  s    z"_GroupBy.apply.<locals>.<listcomp>r   )r\   r   r   ) r:   r   warningswarnr   r~   r   r   r@   r   r   r    r   r   r   r   Zknown_divisionsZ_contains_index_namer   r   r*   r   Z_select_columns_or_indexr   r   r	   r   ZdtypeZdropr)   rE   r   )rN   rB   rC   r=   r   r>   r'   Zshould_shuffleZdf2r    Zdf3rc   Zindex2Zdf4Zdf5r!   )rN   r"   r@   -  sZ    


z_GroupBy.apply)NN)NNr   )r   )r   )N)Nr   )Nr   )Nr   )Nr   )Nr   )Nr   )r   Nr   )r   Nr   )Nr   )Nr   )r   )rM   rP   rQ   rR   rO   propertyr   r   r   r   r.   r/   r0   ZGroupByr   r   r   r_   r   r   r`   r   r   r   r   r   r   rG   r   r   r@   r!   r!   r!   r"   r     sF   
# 
?		
Fr   c                   s`   e Zd ZdZdd Zdd Zdd Zeej	j
jd fd
d	Zeej	j
jdddZ  ZS )r1   zdataframe-groupby-c             C   s@   t |trt| j| j|d}nt| j| j|d}|j| |_|S )N)r9   r   )r   r   r1   r   r    SeriesGroupByr   )rN   rA   rD   r!   r!   r"   __getitem__  s
    
zDataFrameGroupBy.__getitem__c             C   s4   t ttt| t| j tttjj	| j
j S )N)r   r,   dirr   r   __dict__filterr.   compatisidentifierr   r*   )rN   r!   r!   r"   __dir__  s    zDataFrameGroupBy.__dir__c          
   C   s8   y| | S  t k
r2 } zt|W d d }~X Y nX d S )N)KeyErrorAttributeError)rN   rA   er!   r!   r"   __getattr__  s    zDataFrameGroupBy.__getattr__Nr   c                s&   |dkr|   S tt| j|||dS )Nr   )r   r   )r   superr1   r   )rN   r   r   r   )	__class__r!   r"   r     s    zDataFrameGroupBy.aggregatec             C   s   | j |||dS )N)r   r   )r   )rN   r   r   r   r!   r!   r"   rK     s    zDataFrameGroupBy.agg)Nr   )Nr   )rM   rP   rQ   r   r   r  r  r   r.   r/   r0   r1   r   rK   __classcell__r!   r!   )r  r"   r1     s   
r1   c                   s`   e Zd ZdZd fdd	ZdddZeejj	j
d fdd		Zeejj	j
dd
dZ  ZS )r   zseries-groupby-Nc                sv   t |tr\t |trnFt |trPt|dkr4tddd |D }|j| n|j| tt| j	|||d d S )Nr   zNo group keys passed!c             S   s   g | ]}t |ts|qS r!   )r   r   )r%   r   r!   r!   r"   r(     s    z*SeriesGroupBy.__init__.<locals>.<listcomp>)r9   r   )
r   r   r   r   r;   r   r0   r  r   rO   )rN   r'   r9   r   Znon_series_items)r  r!   r"   rO     s    


zSeriesGroupBy.__init__r   c             C   sz   | j jj}t| j}t| jtr&t}nt}t	t| jt
sD| j| jgn| jg| j |ttd||d||dd|i||tdS )Nzseries-groupby-nunique)rV   r)   rV   )
rJ   r   r   r   r   r   r   r   r   r   )r   r   r)   r#   r    r   r   rw   r}   r   r   r{   rz   r   )rN   r   r   r)   rV   rJ   r!   r!   r"   rv     s    

&zSeriesGroupBy.nuniquec                sP   t t| j|||d}| jr&|| j }t|ttfsLt|trL||jd  }|S )N)r   r   r   )	r  r   r   r   r   r   r|   r   r*   )rN   r   r   r   ro   )r  r!   r"   r     s    
zSeriesGroupBy.aggregatec             C   s   | j |||dS )N)r   r   )r   )rN   r   r   r   r!   r!   r"   rK     s    zSeriesGroupBy.agg)NN)Nr   )Nr   )Nr   )rM   rP   rQ   r   rO   rv   r   r.   r/   r0   r   r   rK   r	  r!   r!   )r  r"   r     s   

r   )NN)r   )r   )N)PZ
__future__r   r   r   r   	itertoolsr   r   r   Znumpyrk   Zpandasr.   r/   r   r   r   r	   r
   r   r   methodsr   r   Zutilsr   r   r   r   r   r   r   r   r   r   r   Zhighlevelgraphr   r#   r$   r4   r5   r?   rE   rH   objectrI   rW   rY   rg   rh   rq   rw   ry   rz   r{   r}   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r1   r   r!   r!   r!   r"   <module>   sr   $#	
$	/
Q9	.


   &$