
\c           @` s  d  d l  m Z m Z m Z d  d l Z d  d l Z d  d l Z d  d l Z d  d l	 Z
 d  d l Z d d l m Z m Z m Z m Z m Z m Z m Z m Z d d l m Z m Z d d l m Z d d l m Z m Z m Z m Z m Z d d	 l  m! Z! d d
 l m" Z" m# Z# m$ Z$ m% Z% d d l& m' Z' d   Z( d   Z) d   Z* d   Z+ d   Z, d   Z- d   Z. d e/ f d     YZ0 e1 e1 d  Z2 d   Z3 d   Z4 d   Z5 d   Z6 d   Z7 d   Z8 d   Z9 d   Z: d   Z; d   Z< d    Z= d!   Z> d"   Z? d#   Z@ d$   ZA d%   ZB d&   ZC d'   ZD d(   ZE d)   ZF d*   ZG d+   ZH d,   ZI d-   ZJ d d.  ZK d d/  ZL d0   ZM d1   ZN e1 d2  ZO d3 e/ f d4     YZP d5 eP f d6     YZQ d7 eP f d8     YZR d S(9   i    (   t   absolute_importt   divisiont   print_functionNi   (   t	   DataFramet   Seriest   acat   map_partitionst   new_dd_objectt
   no_defaultt   split_out_on_indext   _extract_meta(   t   drop_columnst   concat(   t   shuffle(   t	   make_metat   insert_meta_param_descriptiont   raise_on_meta_errort   is_series_liket   is_dataframe_likei   (   t   tokenize(   t   derived_fromt   Mt   funcnamet
   itemgetter(   t   HighLevelGraphc         C` sE   t  |  t t f  r= t |   d k r= t t t |     Sd Sd S(   s6   Determine the correct levels argument to groupby.
    i   i    N(   t
   isinstancet   tuplet   listt   lent   range(   t   index(    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyt   _determine_levels7   s    'c         C` s   t  |  t  s | St  | t  rB g  | D] } t |  |  ^ q) St |  r | j |  j k r | j |  | j j k r | j St  | t  r t | j  j	 |  j  r | j |  | j j k r t | j  S| Sd S(   sD   Replace series with column names in an index wherever possible.
    N(
   R   R   R   t   _normalize_indexR   t   namet   columnst   _namet   sett   issubset(   t   dfR   t   col(    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR    @   s     c         C` s`   t  |  t j j j  r\ | d k	 r\ t  | t t t t j	 f  rQ t |  } n  |  | Sn  |  S(   s9   
    Slice columns if grouped is pd.DataFrameGroupBy
    N(
   R   t   pdt   coret   groupbyt   DataFrameGroupByt   NoneR   R   R$   t   Index(   t   groupedR"   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyt   _maybe_sliceV   s    c         ` sb   t  |  s t |  r+   j j | j  St | t t f  rZ t   f d   | D  St Sd S(   s+   Check if `df` and `by` have aligned indicesc         3` s   |  ] } t    |  Vq d  S(   N(   t   _is_aligned(   t   .0t   i(   R&   (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pys	   <genexpr>g   s    N(	   R   R   R   t   equalsR   R   R   t   allt   True(   R&   t   by(    (   R&   s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR0   b   s
    c         K` s   | j  d d  } | d k	 rC t |  |  rC d } t |   n1 | d k	 rt t |  rt | j d t |   n  |  j |   S(   s  Groupby, but raise if df and `by` key are unaligned.

    Pandas supports grouping by a column that doesn't align with the input
    frame/series/index. However, the reindexing this causes doesn't seem to be
    threadsafe, and can result in incorrect results. Since grouping by an
    unaligned key is generally a bad idea, we just error loudly in dask.

    For more information see pandas GH issue #15244 and Dask GH issue #1876.R6   s  Grouping by an unaligned index is unsafe and unsupported.
This can be caused by filtering only one of the object or
grouping key. For example, the following works in pandas,
but not in dask:

df[df.foo < 0].groupby(df.bar)

This can be avoided by either filtering beforehand, or
passing in the name of the column instead:

df2 = df[df.foo < 0]
df2.groupby(df2.bar)
# or
df[df.foo < 0].groupby('bar')

For more information see dask GH issue #1876.N(   t   getR,   R0   t
   ValueErrorR   t   updateR   R*   (   R&   t   kwargsR6   t   msg(    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyt   _groupby_raise_unalignedl   s    	c         O` sM   | j  d t  } |  j | d | } | r: | | } n  | j | | |  S(   Nt
   group_keys(   t   popR5   R*   t   apply(   R&   t   groupert   keyt   funct   argsR:   R=   t   g(    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyt   _groupby_slice_apply   s
    c         C` sr   t  |  d | } | | j k rG t |   r: | | } n  | j |  St |   r` |  | }  n  |  j d d !Sd  S(   NR6   i    (   R<   t   groupsR   t	   get_groupt   iloc(   R&   t   by_keyt   get_keyR"   R.   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyt   _groupby_get_group   s    t   Aggregationc           B` s   e  Z d  Z d d  Z RS(   s]  User defined groupby-aggregation.

    This class allows users to define their own custom aggregation in terms of
    operations on Pandas dataframes in a map-reduce style. You need to specify
    what operation to do on each chunk of data, how to combine those chunks of
    data together, and then how to finalize the result.

    Parameters
    ----------
    name : str
        the name of the aggregation. It should be unique, since intermediate
        result will be identified by this name.
    chunk : callable
        a function that will be called with the grouped column of each
        partition. It can either return a single series or a tuple of series.
        The index has to be equal to the groups.
    agg : callable
        a function that will be called to aggregate the results of each chunk.
        Again the argument(s) will be grouped series. If ``chunk`` returned a
        tuple, ``agg`` will be called with all of them as individual positional
        arguments.
    finalize : callable
        an optional finalizer that will be called with the results from the
        aggregation.

    Examples
    --------
    We could implement ``sum`` as follows:

    >>> custom_sum = dd.Aggregation(
    ...     name='custom_sum',
    ...     chunk=lambda s: s.sum(),
    ...     agg=lambda s0: s0.sum()
    ... )  # doctest: +SKIP
    >>> df.groupby('g').agg(custom_sum)  # doctest: +SKIP

    We can implement ``mean`` as follows:

    >>> custom_mean = dd.Aggregation(
    ...     name='custom_mean',
    ...     chunk=lambda s: (s.count(), s.sum()),
    ...     agg=lambda count, sum: (count.sum(), sum.sum()),
    ...     finalize=lambda count, sum: sum / count,
    ... )  # doctest: +SKIP
    >>> df.groupby('g').agg(custom_mean)  # doctest: +SKIP

    Though of course, both of these are built-in and so you don't need to
    implement them yourself.
    c         C` s(   | |  _  | |  _ | |  _ | |  _ d  S(   N(   t   chunkt   aggt   finalizet   __name__(   t   selfR!   RM   RN   RO   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyt   __init__   s    			N(   RP   t
   __module__t   __doc__R,   RR   (    (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyRL      s   1c         C` s   | |  j  d | d t   S(   Nt   levelt   sort(   R*   t   False(   R&   t   aggfunct   levels(    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyt   _groupby_aggregate   s    c         O` s   | j  d  } | j  d  } t |  d | } t |   sH | d  k rR | |  St | t t t t j	 f  r t |  } n  | | |  Sd  S(   NRM   R"   R6   (
   R>   R<   R   R,   R   R   R   R$   R(   R-   (   R&   R   R:   RB   R"   RD   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyt   _apply_chunk   s    
c         G` s   t  |   r |  j   }  n  |  j   }  |  j   j } t |  d | } | j   } | | j j   j d d    } |  | d |  | <t |  d | } | j   j d d    } | j	 | _	 t
 | | | g d d S(   NR6   R"   c         S` s   |  d S(   Ns   -count(    (   t   c(    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyt   <lambda>  t    i   c         S` s   |  d S(   Ns   -x2(    (   R\   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR]   	  R^   t   axisi   (   R   t   to_framet   copyt   _get_numeric_dataR"   R<   t   sumt   countt   renameR   R   (   R&   R   t   colsRD   t   xt   nt   g2t   x2(    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyt
   _var_chunk   s    "c         C` s   |  j  d | d t  j   S(   NRU   RV   (   R*   RW   Rc   (   RD   RY   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyt   _var_combine  s    c   	      C` s   |  j  d | d t  j   }  t |  j  } |  |  j | d  } |  |  j | d d | d !j d d    } |  |  j | d j d d    } | | d | } | | } d | | d k  <| | } t j | | | d k <t |  s t	  | S(	   NRU   RV   i   i   R"   c         S` s   |  d  S(   Ni(    (   R\   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR]     R^   c         S` s   |  d  S(   Ni(    (   R\   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR]     R^   i    (
   R*   RW   Rc   R   R"   Re   t   npt   nanR   t   AssertionError(	   RD   RY   t   ddoft   ncRg   Rj   Rh   t   resultt   div(    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyt   _var_agg  s    /%

c         O` s   | j  d  } | j  d  } t |  d | } t |   d k r | | g j t j  } t | t  r t j	 j
 g  | D] } | j j d |  ^ qz  | _ q | j j d |  | _ n2 | | g j   } | j |  j | j j    } | S(   NRY   R!   R6   i    RU   (   R>   R<   R   R?   R   t   drop_duplicatesR   R   R(   t
   MultiIndext   from_arraysR   t   get_level_valuest   nuniquet   astypet   dtypesR"   t   to_dict(   R&   R   R:   RY   R!   RD   R.   RU   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyt   _nunique_df_chunk(  s    	1c         C` s,   d  g |  j j } |  j   j | d t S(   NRa   (   R,   R   t   nlevelsRu   t   rename_axisRW   (   R&   t   names(    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyt   _drop_duplicates_rename@  s    c         C` s   |  j  d | d t  j t  } t | t  rj t j j g  | D] } | j	 j
 d |  ^ q@  | _	 n | j	 j
 d |  | _	 | S(   NRU   RV   (   R*   RW   R?   R   R   R   R(   Rv   Rw   R   Rx   (   R&   RY   Rr   RU   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyt   _nunique_df_combineH  s    	1c         C` s    |  j  d | d t  | j   S(   NRU   RV   (   R*   RW   Ry   (   R&   RY   R!   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyt   _nunique_df_aggregateV  s    c         O` sP   t  |   s t  |  j   }  t d |  j d d t |   } t |  | |  S(   NR!   i    RY   (   R   Ro   R`   t   dictR"   R   R}   (   R&   R   t	   _ignored_R:   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyt   _nunique_series_chunkZ  s    "c         C` s   d j  |  | t |  |   S(   Ns   {!s}-{!s}-{}(   t   formatR   (   RB   t   column(    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyt   _make_agg_idp  s    c         ` sZ  t  |  t  s3 t j t | t j |     }  n  g  } t  |  t  r x |  j   D]y \  } t  | t  r | j  f d   | j   D  qU t  | t	  s | g } n  | j  f d   | D  qU Wn t
 d j t |      t	 t t f   t   f d   |  j   D  } | rVg  | D] \ } } } | | | f ^ q/} n  | S(   s  
    Return a list of ``(result_column, func, input_column)`` tuples.

    Spec can be

    - a function
    - a list of functions
    - a dictionary that maps input-columns to functions
    - a dictionary that maps input-columns to a lists of functions
    - a dictionary that maps input-columns to a dictionaries that map
      output-columns to functions.

    The non-group columns are a list of all column names that are not used in
    the groupby operation.

    Usually, the result columns are mutli-level names, returned as tuples.
    If only a single function is supplied or dictionary mapping columns
    to single functions, simple names are returned as strings (see the first
    two examples below).

    Examples
    --------
    >>> _normalize_spec('mean', ['a', 'b', 'c'])
    [('a', 'mean', 'a'), ('b', 'mean', 'b'), ('c', 'mean', 'c')]

    >>> spec = collections.OrderedDict([('a', 'mean'), ('b', 'count')])
    >>> _normalize_spec(spec, ['a', 'b', 'c'])
    [('a', 'mean', 'a'), ('b', 'count', 'b')]

    >>> _normalize_spec(['var', 'mean'], ['a', 'b', 'c'])
    ... # doctest: +NORMALIZE_WHITESPACE
    [(('a', 'var'), 'var', 'a'), (('a', 'mean'), 'mean', 'a'),      (('b', 'var'), 'var', 'b'), (('b', 'mean'), 'mean', 'b'),      (('c', 'var'), 'var', 'c'), (('c', 'mean'), 'mean', 'c')]

    >>> spec = collections.OrderedDict([('a', 'mean'), ('b', ['sum', 'count'])])
    >>> _normalize_spec(spec, ['a', 'b', 'c'])
    ... # doctest: +NORMALIZE_WHITESPACE
    [(('a', 'mean'), 'mean', 'a'), (('b', 'sum'), 'sum', 'b'),       (('b', 'count'), 'count', 'b')]

    >>> spec = collections.OrderedDict()
    >>> spec['a'] = ['mean', 'size']
    >>> spec['b'] = collections.OrderedDict([('e', 'count'), ('f', 'var')])
    >>> _normalize_spec(spec, ['a', 'b', 'c'])
    ... # doctest: +NORMALIZE_WHITESPACE
    [(('a', 'mean'), 'mean', 'a'), (('a', 'size'), 'size', 'a'),      (('b', 'e'), 'count', 'b'), (('b', 'f'), 'var', 'b')]
    c         3` s*   |  ]  \ } }   | f |   f Vq d  S(   N(    (   R1   t   result_columnRB   (   t   input_column(    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pys	   <genexpr>  s   c         3` s*   |  ]  }   t  |  f |   f Vq d  S(   N(   R   (   R1   RB   (   R   (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pys	   <genexpr>  s   s   unsupported agg spec of type {}c         3` s   |  ] } t  |    Vq d  S(   N(   R   (   R1   t   subspec(   t	   compounds(    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pys	   <genexpr>  s   (   R   R   t   collectionst   OrderedDictt   zipt   itt   repeatt   itemst   extendR   R8   R   t   typeR   t   anyt   values(   t   spect   non_group_columnst   resR   t   use_flat_columnst   _RB   t	   input_col(    (   R   R   s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyt   _normalize_spect  s&    2$.c         C` s  i d t  j 6d t  j 6} i  } xT |  D]L \ } } } t | j | |   | f } | j | g   j | | f  q' Wx> | j   D]0 } t |  d k r t	 d j
 |    q q Wi  } i  }	 g  }
 x |  D] \ } } } t | t  s
t | j | |   } n  t | | |  } | j d   | d D  |	 j d   | d D  |
 j | d	  q Wt | j    } t |	 j    }	 | |	 |
 f S(
   su  
    Create transformation functions for a normalized aggregate spec.

    Parameters
    ----------
    spec: a list of (result-column, aggregation-function, input-column) triples.
        To work with all arugment forms understood by pandas use
        ``_normalize_spec`` to normalize the argment before passing it on to
        ``_build_agg_args``.

    Returns
    -------
    chunk_funcs: a list of (intermediate-column, function, keyword) triples
        that are applied on grouped chunks of the initial dataframe.

    agg_funcs: a list of (intermediate-column, functions, keword) triples that
        are applied on the grouped concatination of the preprocessed chunks.

    finalizers: a list of (result-column, function, keyword) triples that are
        applied after the ``agg_funcs``. They are used to create final results
        from intermediate representations.
    t   mint   maxi   s%   conflicting aggregation functions: {}c         s` s   |  ] } | d  | f Vq d S(   i    N(    (   R1   R   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pys	   <genexpr>  s    t   chunk_funcsc         s` s   |  ] } | d  | f Vq d S(   i    N(    (   R1   R   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pys	   <genexpr>  s    t   aggregate_funcst	   finalizer(   Rm   R   R   R   R7   t
   setdefaultt   appendR   R   R8   R   R   RL   t   _build_agg_args_singleR9   t   sorted(   R   t   known_np_funcst   by_nameR   RB   R   RA   t   funcst   chunkst   aggst
   finalizersR   t   impls(    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyt   _build_agg_args  s*    #c         C` sS  i t  j t  j f d 6t  j t  j f d 6t  j t  j f d 6t  j t  j f d 6t  j t  j f d 6t  j t  j f d 6t  j t  j f d 6t  j t  j f d 6} | | j	   k r t
 |  | | | |  S| d	 k r t |  | |  S| d
 k r t |  | |  S| d k rt |  | |  St | t  r:t |  | |  St d j |    d  S(   NRc   R   R   Rd   t   sizet   firstt   lastt   prodt   vart   stdt   means   unknown aggregate {}(   R   Rc   R   R   Rd   R   R   R   R   t   keyst   _build_agg_args_simplet   _build_agg_args_vart   _build_agg_args_stdt   _build_agg_args_meanR   RL   t   _build_agg_args_customR8   R   (   R   RB   R   t   simple_impl(    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR     s*    c         C` s|   t  | |  } | \ } } t d | t t d | d |  f g d | t t d | d |  f g d |  t |  t   f  S(   NR   R   RB   R   R   (   R   R   t   _apply_func_to_columnR   (   R   RB   R   t	   impl_pairt   intermediatet
   chunk_implt   agg_impl(    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR     s    c         C` s   t  d |  } t  d |  } t  d |  } t d | t t d | d t j  f | t t d | d t j  f | t t d |  f g d g  | | | f D]' } | t t d | d t j  f ^ q d |  t t d	 | d
 | d |  f  S(   NRc   t   sum2Rd   R   R   RB   R   R   t
   sum_columnt   count_columnt   sum2_column(   R   R   R   R   Rc   Rd   t   _compute_sum_of_squarest   _finalize_var(   R   RB   R   t   int_sumt   int_sum2t	   int_countR'   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR   +  s    :c         C` s<   t  |  | |  } | d \ }  } } |  t | f | d <| S(   NR   (   R   t   _finalize_std(   R   RB   R   R   R   R:   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR   C  s    c         C` s   t  d |  } t  d |  } t d | t t d | d t j  f | t t d | d t j  f g d g  | | f D]' } | t t d | d t j  f ^ qs d |  t t d | d	 |  f  S(
   NRc   Rd   R   R   RB   R   R   R   R   (   R   R   R   R   Rc   Rd   t   _finalize_mean(   R   RB   R   R   R   R'   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR   L  s    7c         C` s   t  t |  |  } | j d  k rB |  t j |  t   f } n! |  t t d | j d |  f } t d | t t d | j	 d |  f g d | t t d | j
 d |  f g d |  S(   NRB   t   prefixR   R   R   R   (   R   R   RO   R,   t   operatorR   R   t   _apply_func_to_columnsR   RM   RN   (   R   RB   R   R'   R   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR   `  s    $c         O` s  t  |  r% | j d t |   n  | j d  } t |  |  } t j   } xu | D]m \ } } } | | |  }	 t |	 t  r x= t	 |	  D]" \ }
 } | | d j
 | |
  <q WqV |	 | | <qV Wt |   r t |   |  St |  j d  j    |  Sd S(   s  
    Group a dataframe and apply multiple aggregation functions.

    Parameters
    ----------
    df: pandas.DataFrame
        The dataframe to work on.
    index: list of groupers
        If given, they are added to the keyword arguments as the ``by``
        argument.
    funcs: list of result-colum, function, keywordargument triples
        The list of functions that are applied on the grouped data frame.
        Has to be passed as a keyword argument.
    kwargs:
        All keyword arguments, but ``funcs``, are passed verbatim to the groupby
        operation of the dataframe

    Returns
    -------
    aggregated:
        the aggregated dataframe.
    R6   R   s   {}-{}i    N(   R   R9   R   R>   R<   R   R   R   R   t	   enumerateR   R   R   t   headR`   (   R&   R   R:   R   R.   Rr   R   RB   t   func_kwargst   rt   idxt   s(    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyt   _groupby_apply_funcsx  s    c         C` s,   | d  k	 r |  | n |  } | j d    S(   Nc         S` s   |  d j    S(   Ni   (   Rc   (   Rg   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR]     R^   (   R,   R?   (   R.   R   t   base(    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR     s    c         C` sa   t  |  d | d | }  t j   } x* | D]" \ } } } | |  |  | | <q+ Wt |   |  S(   NR   RU   (   R   R   R   R   (   R&   R   t   finalize_funcsRU   Rr   R   RB   R:   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyt   _agg_finalize  s
    c         C` s$   | d  k r | |   S| |  |  S(   N(   R,   (   t   df_likeR   RB   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR     s    
c         ` sg   t  |   r |  j } n |  j j } t   f d   | D  } g  | D] } |  | ^ qG } | |   S(   Nc         3` s$   |  ] } | j     r | Vq d  S(   N(   t
   startswith(   R1   R'   (   R   (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pys	   <genexpr>  s    (   R   R"   t   _selected_objR   (   R   R   RB   R"   R'   (    (   R   s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR     s    c         C` s   |  | |  | S(   N(    (   R&   R   R   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR     s    c   
      C` so   |  | } |  | } |  | } | | d | } | | }	 d |	 |	 d k  <| |	 } t  j | | | d k <| S(   Ni   i    (   Rm   Rn   (
   R&   R   R   R   Rp   Rh   Rg   Rj   Rr   Rs   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR     s    




c         C` s%   t  |  | | | |  } t j |  S(   N(   R   Rm   t   sqrt(   R&   R   R   R   Rp   Rr   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR     s    c         C` s>   | j  |  j |  j d | } |  j | _ | |  | |  S(   Nt
   fill_value(   t   reindext	   set_indexR   (   t   partt   cum_lastR   R"   RB   t   initialt   align(    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyt   _cum_agg_aligned  s    !c         C` sF   |  j  j | j   } | |  j | d | | j | d | d | S(   NR   (   R   t   unionR   (   t   at   bRB   R   R   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyt   _cum_agg_filled  s    c         C` s   |  j  | d | d S(   NR   i   (   t   add(   R   R   R   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyt   _cumcount_aggregate  s    t   _GroupByc           B` s  e  Z d  Z d d e d  Z e d    Z d d d d  Z d   Z	 e
 e j j j  d d   Z e
 e j j j  d d   Z e
 e j j j  d d	   Z e
 e j j j  d d d d
   Z e
 e j j j  d d d d   Z e
 e j j j  d d d   Z e
 e j j j  d d d   Z e
 e j j j  d d d   Z e
 e j j j  d d d   Z e
 e j j j  d d d   Z e
 e j j j  d d d d   Z e
 e j j j  d d d d   Z e
 e j j j  d d d   Z e
 e j j j  d d d   Z e
 e j j j  d    Z d d  Z e d d  d    Z  RS(   s_   Superclass for DataFrameGroupBy and SeriesGroupBy

    Parameters
    ----------

    obj: DataFrame or Series
        DataFrame or Series to be grouped
    by: str, list or Series
        The key for grouping
    slice: str, list
        The slice keys applied to GroupBy result
    group_keys: bool
        Passed to pandas.DataFrame.groupby()
    c         ` sV  t    t t f  s t  | |  _   |  _ t   |  |  _ t  |  j t  rs t	   f d   |  j D  } n0 t  |  j t  r   j
 |  j j
 k } n t } | s t d   n  | |  _ t  |  j t  r
g  |  j D]$ } t  | t  r | j n | ^ q } n* t  |  j t  r+|  j j } n	 |  j } |  j j j | d | |  _ d  S(   Nc         3` s6   |  ], } t  | t  r* | j   j k n t Vq d  S(   N(   R   R   t   npartitionsR5   (   R1   t   item(   R&   (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pys	   <genexpr>  s   sI   The grouped object and index of the groupby must have the same divisions.R=   (   R   R   R   Ro   R=   t   objR    R   R   R4   R   R5   t   NotImplementedErrort   _slicet   _metaR*   (   RQ   R&   R6   t   sliceR=   t   do_index_partition_alignR   t
   index_meta(    (   R&   s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyRR     s(    			7	c         C` s   |  j  j } t |  j t  rU g  |  j D]$ } t | t  rF | j n | ^ q( } n* t |  j t  rv |  j j } n	 |  j } | j | d |  j } t | |  j	  S(   s]   
        Return a pd.DataFrameGroupBy / pd.SeriesGroupBy which contains sample data.
        R=   (
   R   t   _meta_nonemptyR   R   R   R   R*   R=   R/   R   (   RQ   t   sampleR   R   R.   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR   )  s    7	i   c   	      C` s   | d  k r | } n  | |  j  } t |  r9 | j n | j } |  j | } t |  j  } t t	 |  j t
  s |  j |  j g n |  j g |  j d t d t d | d |  d t d | d | d | d t d	 | d
 |  d | d t 	S(   NRM   t   chunk_kwargsR"   t	   aggregatet   metat   tokent   split_everyt   aggregate_kwargsRX   RY   t	   split_outt   split_out_setup(   R,   R   R   R!   R"   t   _token_prefixR   R   R   R   R   R   R[   R   RZ   R	   (	   RQ   R   RB   RX   R   R   R   R"   RY   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyt   _aca_agg<  s    	:c         ` s  |   j   } t |  r$ | j n | j } t   j t  rH   j n	   j g }   j | } | d }	 | d }
 | d } t t	   j
 d | d | d |	 d | | } t |  r | j   n | } | j   f d   | D   } g  | D] } t j d	 |  ^ q } t t	 | d | d k r,d
 n | d t j d | d |
 | } t   | | | |  } | d | 7} | d | 7} i  } | j d
 f | | d
 f <x t d   j
 j  D] } | d k r| j | d f | | | f <n6 t | | d f | j | d f | | f | | | f <t | j | f | | f | | d k rKd
 n | | | f | | | f <qWt j | | d | | | g } t | | |   j     j
 j  S(   s*    Wrapper for cumulative groupby operation s   -maps
   -take-lasts	   -cum-lastRM   R"   R   R   c         ` sM   i  |  ]C } t  j |  r: |   j j k r:   j | n	   j j |  q S(    (   Rm   t   isscalarR   R"   R   (   R1   R2   (   RQ   (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pys
   <dictcomp>f  s   	RA   i    t   -i   t   dependenciesN(   R   R   R!   R"   R   R   R   R  R   R[   R   R`   t   assignR(   t   GrouperR,   R   R   R   R#   R   R   R   R   R   t   from_collectionsR   t	   divisions(   RQ   R   RM   R   R   R   R"   R   R!   t	   name_partt	   name_lastt   name_cumt   cumpart_rawt   cumpart_raw_framet   cumpart_extt   indt   index_grouperst   cumlastt   _hasht   daskR2   t   graph(    (   RQ   s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyt   _cum_aggO  sR    '


%	 !i    c         C` sB   | r |  j  j d |  S|  j d d t j d t j d d Sd  S(   NR_   t   cumsumRM   R   R   i    (   R   R  R  R   R   (   RQ   R_   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR    s    		c         C` sB   | r |  j  j d |  S|  j d d t j d t j d d Sd  S(   NR_   t   cumprodRM   R   R   i   (   R   R  R  R   t   mul(   RQ   R_   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR    s    		c         C` s"   |  j  d d t j d t d d S(   Nt   cumcountRM   R   R   i(   R  R   R  R   (   RQ   R_   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR    s    	c      	   C` sW   |  j  d d d t j d | d |  } | rO | j |  j   | k d t j S| Sd  S(   NR   Rc   RB   R   R   t   other(   R  R   Rc   t   whereRd   Rm   t   NaN(   RQ   R   R   t	   min_countRr   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyRc     s    	
c      	   C` sW   |  j  d d d t j d | d |  } | rO | j |  j   | k d t j S| Sd  S(   NR   R   RB   R   R   R  (   R  R   R   R  Rd   Rm   R  (   RQ   R   R   R  Rr   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR     s    	
c      	   C` s%   |  j  d d d t j d | d |  S(   NR   R   RB   R   R   (   R  R   R   (   RQ   R   R   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR     s    c      	   C` s%   |  j  d d d t j d | d |  S(   NR   R   RB   R   R   (   R  R   R   (   RQ   R   R   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR     s    c         C` s.   |  j  d d d t j d t j d | d |  S(   NR   Rd   RB   RX   R   R   (   R  R   Rd   Rc   (   RQ   R   R   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyRd     s    c         C` sT   |  j  d | d |  } |  j d | d |  } t |  rL | | j } n  | | S(   NR   R   (   Rc   Rd   R   R"   (   RQ   R   R   R   R\   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR     s
    c         C` s.   |  j  d d d t j d t j d | d |  S(   NR   R   RB   RX   R   R   (   R  R   R   Rc   (   RQ   R   R   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR     s    !c         C` s   t  |  j  } t t |  j t  s6 |  j |  j g n |  j g |  j d t d t d t d |  j	 d d i | d 6| d 6d	 i | d 6d
 | d | d t
 	} t |  j t  r | | j d } n  |  j r | |  j } n  | S(   NRM   R   t   combineR   R   R   Rp   RY   t   combine_kwargsR   R   R   i    (   R   R   R   R   R   R   Rk   Rt   Rl   R  R	   R   R"   R   (   RQ   Rp   R   R   RY   Rr   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR     s    :		c         C` s7   |  j  | d | d | } t t j | d | } | S(   NR   R   R   (   R   R   Rm   R   (   RQ   Rp   R   R   t   vRr   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR     s    c      	   C` s%   |  j  d d d t j d | d |  S(   NR   R   RB   R   R   (   R  R   R   (   RQ   R   R   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR     s    c      	   C` s%   |  j  d d d t j d | d |  S(   NR   R   RB   R   R   (   R  R   R   (   RQ   R   R   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR     s    c      
   C` s   |  j  d } |  j j } t |  rD |  j d  k	 rD | |  j } n  t |  rY | j n | j } t t	 |  j |  j
 | | d | d | S(   NRG   R   R   (   R  R   R   R   R   R,   R"   R!   R   RK   R   (   RQ   RA   R   R   R"   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyRG     s    c         C` s  t  |  j t  r t  |  j t  s6 t j |  j  rE |  j h } n1 t  |  j t  rm d   |  j D } n	 t   } |  j	 r |  j	 } t  | t  s | g } q n+ g  |  j j
 D] } | | k r | ^ q } t | |  } n t  |  j t  rt  | t t t f  rWt i | d  6g   } g  | D]$ \ \ } }	 }
 } |	 |
 | f ^ q*} qt i | d  6g   } g  | D]$ \ } }
 } |  j j |
 | f ^ qt} n t d j |  j    t |  \ } } } t  |  j t t f  rt |  j  d k rt t t |  j    } n d } t  |  j t  sF|  j |  j g } n |  j g |  j } t | d t d t d |  d t d	 t d | d
 |  d t d t d | d | d
 |  d d d | d | d t 
S(   Nc         S` s4   h  |  ]* } t  | t  s* t j |  r |  q S(    (   R   R   Rm   R  (   R1   R2   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pys	   <setcomp>  s   	 s   aggregate on unknown object {}i   i    RM   R   R   R  R   RU   R   R   R   R   R   R   R   R   (   R   R   R   R   R   Rm   R  R   R$   R   R"   R   R   R   R,   R!   R8   R   R   R   R   R   R   R   R	   (   RQ   t   argR   R   t   group_columnsR   R'   R   R   R   RB   R   R   R   R   RY   t
   chunk_args(    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR   	  sR    $			11-		t   padi   c         ` sK  | j  d t  } | t k r t d j t |   d t ; t | | f d t \ } }   j j | | |  } Wd QXd } t	 j
 | d d n  t |  } t   j t  r t d	     j D  r t d
   n    j } | j o | j   j  }	 |	 rt   j t  r3| j d  } t }
 n t }
 t   j t  r|| j   f d     j j D   }   j } nK t   j t  r| j d   j  }   j } n | } | j   j  } t | |  } t   j t  rWg    j j D] } d | ^ q} | | } t |  r?| j t | | j j  } q| j  | d d } n| t   j t  r| d }   j j! | _! t |  r| j t d | j j  } q| j  d d d } n | }   j } |
 r| d j"   j j!  } qn | }   j } | | d <t t# | |   j$ | d t |  d   j% | | } | S(   s   Parallel version of pandas GroupBy.apply

        This mimics the pandas version except for the following:

        1.  The user should provide output metadata.
        2.  If the grouper does not align with the index then this causes a full
            shuffle.  The order of rows within each group may not be preserved.

        Parameters
        ----------
        func: function
            Function to apply
        args, kwargs : Scalar, Delayed or object
            Arguments and keywords to pass to the function.
        $META

        Returns
        -------
        applied : Series or DataFrame depending on columns keyword
        R   s   groupby.apply({0})t   udft   nonemptyNs  `meta` is not specified, inferred from partial data. Please provide `meta` if the result is unexpected.
  Before: .apply(func)
  After:  .apply(func, meta={'x': 'f8', 'y': 'f8'}) for dataframe result
  or:     .apply(func, meta=('x', 'f8'))            for series resultt
   stackleveli   c         s` s   |  ] } t  | t  Vq d  S(   N(   R   R   (   R1   R   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pys	   <genexpr>w  s    s?   groupby-apply with a multiple Series is currently not supportedt
   __series__c         ` s$   i  |  ] }   j  | d  |  q S(   t   _index_(   R   (   R1   R\   (   RQ   (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pys
   <dictcomp>  s   	t   _indexR*  R_   i   R   R=   (&   R7   R   R   R   R   R5   R
   R   R?   t   warningst   warnR   R   R   R   R   R   R   t   known_divisionst   _contains_index_nameR   R`   RW   R   R  R"   t   _select_columns_or_indexR   R   R   R   t   dtypet   dropR!   Re   RE   R   R=   (   RQ   RB   RC   R:   R   t	   meta_argst   meta_kwargsR;   R&   t   should_shufflet   convert_back_to_seriest   df2R   t   df3R\   Rf   t   index2t   df4t   df5(    (   RQ   s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR?   O  sj    "			#

		
!	N(!   RP   RS   RT   R,   R5   RR   t   propertyR   R  R  R   R(   R)   R*   t   GroupByR  R  R  Rc   R   R   R   Rd   R   R   R   R   R   R   RG   R   R   R?   (    (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR     sJ   $	?				FR+   c           B` sq   e  Z d  Z d   Z d   Z d   Z e e j j	 j
  d d d   Z e e j j	 j
  d d d   Z RS(   s   dataframe-groupby-c         C` sb   t  | t  r0 t |  j d |  j d | } n t |  j d |  j d | } | j | | _ | S(   NR6   R   (   R   R   R+   R   R   t   SeriesGroupByR   (   RQ   RA   RD   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyt   __getitem__  s
    !c         C` sH   t  t t t |    t |  j  t t t j j	 |  j
 j     S(   N(   R   R$   t   dirR   R   t   __dict__t   filterR(   t   compatt   isidentifierR   R"   (   RQ   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyt   __dir__  s    "c         C` s2   y |  | SWn t  k
 r- } t |   n Xd  S(   N(   t   KeyErrort   AttributeError(   RQ   RA   t   e(    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyt   __getattr__  s    i   c         C` s8   | d k r |  j    St t |   j | d | d | S(   NR   R   R   (   R   t   superR+   R   (   RQ   R"  R   R   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR     s    
c         C` s   |  j  | d | d | S(   NR   R   (   R   (   RQ   R"  R   R   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyRN     s    N(   RP   RS   R  R?  RE  RI  R   R(   R)   R*   R+   R,   R   RN   (    (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR+     s   	
		R>  c           B` st   e  Z d  Z d d d  Z d d d  Z e e j j	 j
  d d d   Z e e j j	 j
  d d d   Z RS(   s   series-groupby-c         K` s   t  | t  r t  | t  r! q t  | t  r t |  d k rQ t d   n  g  | D] } t  | t  sX | ^ qX } | j j |  q | j j |  n  t t |   j	 | d | d | | d  S(   Ni    s   No group keys passed!R6   R   (
   R   R   R   R   R8   R   R*   RJ  R>  RR   (   RQ   R&   R6   R   R:   R   t   non_series_items(    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyRR     s    i   c         C` s   |  j  j j } t |  j  } t |  j t  r9 t } n t } t	 t |  j t
  sf |  j |  j g n |  j g |  j d | d t d t d d d i | d 6| d 6d	 i | d 6| d 6d
 i | d 6d | d | d t 
S(   NRM   R   R  R   s   series-groupby-nuniqueR   RY   R!   R   R   R   R   R   (   R   R   R!   R   R   R   R   R}   R   R   R   R   R   R	   (   RQ   R   R   R!   RY   RM   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyRy     s    	:c         C` sz   t  t |   j | d | d | } |  j r= | |  j } n  t | t t f  rv t | t  rv | | j d } n  | S(   NR   R   i    (	   RJ  R>  R   R   R   R   R   R   R"   (   RQ   R"  R   R   Rr   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR   	  s    $	%c         C` s   |  j  | d | d | S(   NR   R   (   R   (   RQ   R"  R   R   (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyRN     s    N(   RP   RS   R  R,   RR   Ry   R   R(   R)   R*   R>  R   RN   (    (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyR>    s   
(S   t
   __future__R    R   R   R   t	   itertoolsR   R   R,  t   numpyRm   t   pandasR(   R)   R   R   R   R   R   R   R	   R
   t   methodsR   R   R   t   utilsR   R   R   R   R   R   R   R   R   R   R   t   highlevelgraphR   R   R    R/   R0   R<   RE   RK   t   objectRL   R,   RZ   R[   Rk   Rl   Rt   R}   R   R   R   R   R   R   R   R   R   R   R   R   R   R   R   R   R   R   R   R   R   R   R   R   R   R+   R>  (    (    (    s5   lib/python2.7/site-packages/dask/dataframe/groupby.pyt   <module>   sn   :("#					
	$	
	9											Q	9	 							2							 $