
\c           @` s<  d  d l  m Z m Z m Z d  d l Z d  d l Z d  d l m Z m Z d  d l	 m
 Z
 m Z d  d l m Z d  d l m Z d  d l Z d  d l Z d  d l m Z m Z d  d l m Z m Z m Z m Z m Z y d  d	 l m Z Wn e k
 re  Z n Xd
 d l! m" Z# d
 d l! m$ Z$ d
 d l% m& Z& m' Z' m( Z( d
 d l! m) Z) d
 d l* m+ Z+ m, Z, m- Z- m. Z. m/ Z/ m0 Z0 d
 d l1 m2 Z2 d
 d l% m3 Z3 m4 Z4 m5 Z5 m6 Z6 m7 Z7 m8 Z8 m9 Z9 m: Z: m; Z; m< Z< m= Z= m> Z> d
 d l? m@ Z@ mA ZA d
 d lB mB ZB mC ZC d
 d lD mE ZE mF ZF mG ZG mH ZH d
 d lI mI ZI mJ ZJ mK ZK d
 d lL mM ZM d d l! mN ZN d d lO mP ZP mQ ZQ d d lR mS ZS mT ZT d d lU mU ZU d d l% mV ZV mW ZW mX ZX mY ZY mZ ZZ m[ Z[ m\ Z\ m] Z] m^ Z^ m_ Z_ m` Z` ma Za mb Zb d Zc e jd d ee  d   Zf d    Zg d! eE e; f d"     YZh ee d#  Zi d$ eE e; f d%     YZj d&   Zk d' ej f d(     YZl d) el f d*     YZm d+ ej f d,     YZn x e jo e jp e jq e, e jr e js e jt e ju e jv e jw e jx e jy e jz e j{ e j| e j} e j~ e j e j e j g D]  Z ej j e  eh j e  q+Wx d- d. d/ d0 d1 d2 d3 d4 d5 d6 d7 d8 d9 d: d; d< g D]J Z e e jn e  Z en j e e  e e jl e  Z el j e e  qWxd d= d> d? d@ dA dB g D]J Z e e jn e  Z en j e e  e e jl e  Z el j e e  qWdC   Z dD   Z dE   Z dF   Z e e dG  Z dH   Z dI   Z e dJ  Z eX e e e ec e e e e e e e e dK   Z e Z ee dL  Z dM   Z eX dN    Z dO   Z dP   Z dQ   Z dR dS  Z e ee ee ee dT  Z ee dU  Z ee dV  Z d
 ee ee dW  Z e dX  Z e dY  Z dZ   Z ee d[  Z e d\  Z d]   Z e ee d^  Z e d_  Z e d`  Z e da  Z e e db  Z e e dc  Z e e dd  Z e e ee de  Z df   Z dg   Z e e j  dh    Z e e j  di dj dk   Z e e dl  rue e j  dm    Z n  dn   Z e' do  Z e j e jl  dp    Z e j e jn  dq    Z e j e jm  dr    Z e j e  ds    Z e j ej  dt    Z du   Z dv   Z dw   Z dx   Z dy   Z d S(z   i    (   t   absolute_importt   divisiont   print_functionN(   t   wrapst   partial(   t   Numbert   Integral(   t   getitem(   t   pformat(   t   cache_readonlyt   hash_pandas_object(   t   merget   firstt   uniquet   partition_allt   remove(   t   Chesti   (   t   array(   t   core(   t   partial_by_ordert   Dispatcht   IndexCallable(   t   threaded(   t   applyt   operator_divt   bind_methodt   string_typest   Iteratort   Sequence(   t   globalmethod(   t   random_state_datat   pseudorandomt   derived_fromt   funcnamet   memory_reprt	   put_linest   Mt	   key_splitt   OperatorMethodMixint   is_arrayliket   typenamet   skip_doctest(   t   Arrayt   normalize_arg(   t	   blockwiset	   Blockwise(   t   DaskMethodsMixint   tokenizet   dont_optimizet   is_dask_collection(   t   delayedt   Delayedt   unpack_collections(   t   HighLevelGraphi   (   t   methods(   t   DatetimeAccessort   StringAccessor(   t   CategoricalAccessort
   categorize(   t   optimize(   t   meta_nonemptyt	   make_metat   insert_meta_param_descriptiont   raise_on_meta_errort   clear_known_categoriest   is_categorical_dtypet   has_known_categoriest   PANDAS_VERSIONt   index_summaryt   is_dataframe_liket   is_series_liket   is_index_liket   valid_divisionst   __no_default__s   compute.use_numexprc         C` s   |  s
 |  St  t t j |    t j  r; t j j |   St |  d  st y t	 j
 |   SWqt t k
 rp |  SXn  g  |  D] } t |  r{ | ^ q{ } | s |  d St j | d t S(   Ni    t   uniform(   t
   isinstanceR   R   t   flattent   npt   ndarrayt   dat   concatenate3t   has_parallel_typet   pdt   Seriest	   Exceptiont   lenR6   t   concatt   True(   t   argst   it   args2(    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   _concat5   s    !%c         C` s
   t  |   S(   N(   R[   (   t   results(    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   finalizeG   s    t   Scalarc           B` s  e  Z d  Z d d  Z d   Z d   Z d   Z d   Z e	 e
 d d d e Z e e j  Z d	   Z d
   Z e d    Z e d    Z d   Z e d    Z d   Z d   Z e d    Z d   Z d   Z e d    Z e d    Z e e  d   Z! e" d  Z# RS(   s+    A Dask object to represent a pandas scalarc         C` s   t  | t  s* t j | | d g  } n  | |  _ | |  _ t |  } t |  sl t |  sl t |  r t	 d j
 t t |      n  | |  _ d  S(   Nt   dependenciess(   Expected meta to specify scalar, got {0}(   RK   R5   t   from_collectionst   daskt   _nameR=   RE   RF   RG   t	   TypeErrort   formatR(   t   typet   _meta(   t   selft   dskt   namet   metat	   divisions(    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   __init__M   s    		$	c         C` s   |  j  S(   N(   Ra   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   __dask_graph__Z   s    c         C` s
   |  j  g S(   N(   t   key(   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   __dask_keys__]   s    c         C` s   |  j  S(   N(   Rb   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   __dask_tokenize__`   s    c         C` s
   |  j  f S(   N(   Rn   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   __dask_layers__c   s    Rn   t   dataframe_optimizet   falseyc         C` s
   t  d f S(   N(    (   R   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   __dask_postcompute__j   s    c         C` s   t  |  j |  j |  j f f S(   N(   R^   Rb   Rf   Rk   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   __dask_postpersist__m   s    c         C` s   |  j  S(   N(   Rf   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   _meta_nonemptyp   s    c         C` s
   |  j  j S(   N(   Rf   t   dtype(   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRw   t   s    c         C` sT   t  t t |     } | j |  j  t |  j d  sJ | j d  n  t |  S(   NRw   (	   t   sett   dirRe   t   updatet   __dict__t   hasattrRf   R   t   list(   Rg   t   o(    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   __dir__x   s
    c         C` s
   d d g S(   s6   Dummy divisions to be compat with Series and DataFrameN(   t   None(   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRk      s    c         C` sx   t  |  j  d k  r |  j n |  j d  d } t |  j d  rT d |  j j } n d t |  j  j } d | | f S(   Ni
   i   s   ...Rw   s
   , dtype=%ss	   , type=%ss   dd.Scalar<%s%s>(   RU   Rb   R|   Rf   Rw   Re   t   __name__(   Rg   Ri   t   extra(    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   __repr__   s
    /c         C` s   t  j |  j    S(   N(   RM   t   asarrayt   compute(   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt	   __array__   s    c         C` s   |  j  |  j |  j f S(   N(   Ra   Rb   Rf   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   _args   s    c         C` s   |  j  S(   N(   R   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   __getstate__   s    c         C` s   | \ |  _  |  _ |  _ d  S(   N(   Ra   Rb   Rf   (   Rg   t   state(    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   __setstate__   s    c         C` s   |  j  d f S(   Ni    (   Rb   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRn      s    c         ` s     f d   } | S(   Nc         ` sv   t     d t |   } i   |  j d f f | d f 6}   |  j  } t j | | d |  g } t | | |  S(   Nt   -i    R_   (   R!   R/   Rb   Rv   R5   R`   R^   (   Rg   Ri   Rh   Rj   t   graph(   t   op(    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   f   s
    "(    (   t   clsR   R   (    (   R   s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   _get_unary_operator   s    c         ` s      f d   S(   Nc         ` s   t   |  | d   S(   Nt   inv(   t   _scalar_binary(   Rg   t   other(   R   R   (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   <lambda>   t    (    (   R   R   R   (    (   R   R   s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   _get_binary_operator   s    c         C` sb   |  j    } | rR |  j | |  j    } d |  j } t j | | d d } n  t |  j |  S(   s   Convert into a ``dask.delayed`` object.

        Parameters
        ----------
        optimize_graph : bool, optional
            If True [default], the graph is optimized before converting into
            ``dask.delayed`` objects.
        s   delayed-R_   (    (   Rm   t   __dask_optimize__Ro   Rb   R5   R`   R3   Rn   (   Rg   t   optimize_graphRh   Ri   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt
   to_delayed   s    	N($   R   t
   __module__t   __doc__R   Rl   Rm   Ro   Rp   Rq   R   R;   R0   R   t   staticmethodR   t   gett   __dask_scheduler__Rt   Ru   t   propertyRv   Rw   R   Rk   R   R   R   R   R   Rn   t   classmethodR   t   FalseR   RW   R   (    (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR^   K   s2   												
c         C` s  d j  t |   t | |   } | g } i  } t |  } t | t  rj | j |  | j d f } n t |  rz t	 S| } | r | j
 i |  | | j d f f | d f 6 n, | j
 i |  | j d f | f | d f 6 t |  }	 t |	  }
 | r|  |
 | j  } n |  | j |
  } t j | | d | } | t k	 ru| | | | | j j   | j j   g  St | | |  Sd  S(   Ns   {0}-{1}i    R_   (   Rd   R!   R/   t   get_parallel_typeRK   R^   t   appendRb   R1   t   NotImplementedRz   R=   R<   Rv   R5   R`   t   indext   mint   max(   R   Rg   R   R   Ri   R_   Rh   t   return_typet	   other_keyt
   other_metat   other_meta_nonemptyRj   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR      s.    !	/,t   _Framec        
   B` s  e  Z d  Z d   Z d   Z d   Z d   Z d   Z e e	 d d d e
 Z e e j  Z d	   Z d
   Z e d    Z e d    Z e d    Z e d    Z e d    Z d   Z d   Z d   Z ds d  Z ds d  Z d   Z e d    Z d   Z  e d    Z! d   Z" e d    Z# e# j$ d    Z# e% d  Z& e d    Z' d   Z( d   Z) e* e+ j,  ds d  d!   Z- d"   Z. d#   Z/ e/ Z0 d$   Z1 d%   Z2 d&   Z3 e3 Z4 d'   Z5 e6 d( d)  d*    Z7 e6 d( d)  d+    Z8 e6 d( d)  ds ds e9 ds ds ds ds ds d,   Z: e* e+ j,  d-    Z; ds d.  Z< d/ d  e= d0  Z> d/ e= d1  Z? e d2    Z@ d3   ZA e d4    ZB ds ds ds e% d5  ZC e* e+ j,  ds ds ds ds d6   ZD e* e+ j,  ds ds d7   ZE e* e+ j,  ds ds d8   ZF ds ds e% ds d9  ZG e* e+ j,  ds ds e% d:   ZH ds d;  ZI d< e% d=  ZJ d>   ZK d?   ZL e= d@  ZM eN dA    ZO eN e% dB   ZP ds ds e% ds dC dD  ZQ e* e+ j,  d  dC dE   ZR e* e+ j,  d  ds dC dF   ZS ds e= e% ds dG  ZT e* e+ j,  dH    ZU e* e+ j,  ds e= e% ds dI   ZV e* e+ j,  ds e= e% ds dJ   ZW e* e+ j,  ds e= e% ds ds ds dK   ZX e* e+ j,  ds e= e% ds ds ds dL   ZY e* e+ j,  ds e= e% ds dM   ZZ e* e+ j,  ds e= e% ds dN   Z[ e* e+ j,  ds e= e% dO   Z\ e* e+ j,  ds e= e% dP   Z] e* e+ j,  ds e% dQ   Z^ e* e+ j,  ds e= e% ds ds dR   Z_ e* e+ j,  ds e= d  e% ds ds dS   Z` e* e+ j,  ds e= d  e% ds ds dT   Za e* e+ j,  ds ds d  e% dU   Zb dV dC dW dX  Zc e* e+ j,  e% ds dW dY   Zd e= ds ds dZ  Ze e* e+ j,  ds e= ds ds d[   Zf e* e+ j,  ds e= ds ds d\   Zg e* e+ j,  ds e= ds d]   Zh e* e+ j,  ds e= ds d^   Zi e* e+ j,  ej jk d_   Zl e* e+ j,  ej jk d`   Zm e* e+ j,  da    Zn e* e+ j,  db    Zo e* e+ j,  dc    Zp e* e+ j,  dd    Zq e* e+ j,  de    Zr e* e+ js  e% df   Zt e* e+ j,  dg ds ds dh   Zu e* e+ j,  ds e= di   Zv e* e+ j,  dj    Zw eN dk    Zx e* e+ j,  ds ds dl   Zy e* e+ j,  dm    Zz e* e+ j,  dn    Z{ ds do  Z| e dp    Z} dq   Z~ dr   Z RS(t   s   Superclass for DataFrame and Series

    Parameters
    ----------
    dsk: dict
        The dask graph to compute this DataFrame
    name: str
        The key prefix that specifies which keys in the dask comprise this
        particular DataFrame / Series
    meta: pandas.DataFrame, pandas.Series, or pandas.Index
        An empty pandas object with names, dtypes, and indices matching the
        expected output.
    divisions: tuple of index values
        Values along which we partition our blocks on the index
    c         C` s   t  | t  s* t j | | d g  } n  | |  _ | |  _ t |  } |  j |  s t d j t	 |   j
 t t	 |      n  | |  _ t |  |  _ d  S(   NR_   s/   Expected meta to specify type {0}, got type {1}(   RK   R5   R`   Ra   Rb   R=   t   _is_partition_typeRc   Rd   Re   R   R(   Rf   t   tupleRk   (   Rg   Rh   Ri   Rj   Rk   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRl      s    				c         C` s   |  j  S(   N(   Ra   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRm      s    c         C` s)   g  t  |  j  D] } |  j | f ^ q S(   N(   t   ranget   npartitionsRb   (   Rg   RY   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRo     s    c         C` s
   |  j  f S(   N(   Rb   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRq     s    c         C` s   |  j  S(   N(   Rb   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRp     s    Rn   Rr   Rs   c         C` s
   t  d f S(   N(    (   R]   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRt     s    c         C` s"   t  |   |  j |  j |  j f f S(   N(   Re   Rb   Rf   Rk   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRu     s    c         C` s   t  S(   N(   t   new_dd_object(   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   _constructor  s    c         C` s   t  |  j  d S(   s   Return number of partitionsi   (   RU   Rk   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR     s    c      	   C` s(   |  j  t j t j d d d t d t S(   s   Size of the Series or DataFrame as a Delayed object.

        Examples
        --------
        >>> series.size  # doctest: +SKIP
        dd.Scalar<size-ag..., dtype=int64>
        t   tokent   sizeRj   t   split_every(   t	   reductionR6   R   RM   t   sumt   intR   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR     s    	!c         C` s   t  |  j  S(   s/    A non-empty version of `_meta` with fake data.(   R<   Rf   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRv   )  s    c         C` s   |  j  |  j |  j |  j f S(   N(   Ra   Rb   Rf   Rk   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR   .  s    c         C` s   |  j  S(   N(   R   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR   2  s    c         C` s"   | \ |  _  |  _ |  _ |  _ d  S(   N(   Ra   Rb   Rf   Rk   (   Rg   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR   5  s    c         C` s   t  |  j |  j |  j |  j  S(   s    Make a copy of the dataframe

        This is strictly a shallow copy of the underlying computational graph.
        It does not affect the underlying data
        (   R   Ra   Rb   Rf   Rk   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   copy8  s    c         K` s%   |  j    |  _ t j |  j  } | S(   N(   R   t	   _computedRM   R   (   Rg   Rw   t   kwargst   x(    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR   A  s    c         C` s
   t   d  S(   N(   t   NotImplementedError(   Rg   R   t   context(    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   __array_wrap__F  s    c      
   O` s   | j  d d  } xj | | D]^ } t | t j  rJ | j d k rJ q q t | t t t t t	 j
 t	 j t	 j f  s t Sq W| d k r | j d  k	 r t S| j d k r t St | | |  Sn t Sd  S(   Nt   outt   __call__i   (    (    (   R   RK   RM   RN   t   shapeR   R^   R   R*   RR   t	   DataFrameRS   t   IndexR   t	   signatureR   t   noutt   elemwise(   Rg   t   numpy_ufunct   methodt   inputsR   R   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   __array_ufunc__I  s    !c         C` s   t  S(   N(   R   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt	   _elemwisea  s    c         C` s
   t   d  S(   N(   R   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt
   _repr_datae  s    c         C` s]   d j  |  j  } |  j r6 t j |  j d | } n# t j d g |  j d d | } | S(   Ns   npartitions={0}Ri   R   i   (   Rd   R   t   known_divisionsRR   R   Rk   (   Rg   Ri   Rk   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   _repr_divisionsh  s
    	#c      
   C` sX   |  j    j d d d t  } d j d |  j j d | d t |  j  d t |  j	   S(	   Nt   max_rowsi   t   show_dimensionss>   Dask {klass} Structure:
{data}
Dask Name: {name}, {task} taskst   klasst   dataRi   t   task(
   R   t	   to_stringR   Rd   t	   __class__R   R%   Rb   RU   Ra   (   Rg   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR   r  s    c         C` s)   |  j  t d d |  j d d |  j j S(   s   Return dask Index instanceR   R   s   -indexRj   (   t   map_partitionst   getattrRb   Rf   R   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR   z  s    c         C` sI   | j  |  _  t t j |  |  } | j |  _ | j |  _ | j |  _ d  S(   N(   Rk   R   R6   t   assign_indexRa   Rb   Rf   (   Rg   t   valuet   result(    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR     s
    c         C` s   |  j  t j d | j   S(   sy  Reset the index to the default index.

        Note that unlike in ``pandas``, the reset ``dask.dataframe`` index will
        not be monotonically increasing from 0. Instead, it will restart at 0
        for each partition (e.g. ``index1 = [0, ..., 10], index2 = [0, ...]``).
        This is due to the inability to statically know the full length of the
        index.

        For DataFrame with multi-level index, returns a new DataFrame with
        labeling information in the columns under the index names, defaulting
        to 'level_0', 'level_1', etc. if any are None. For a standard index,
        the index name will be used (if set), otherwise a default 'index' or
        'level_0' (if 'index' is already taken) will be used.

        Parameters
        ----------
        drop : boolean, default False
            Do not try to insert index into dataframe columns.
        t   drop(   R   R$   t   reset_indext   clear_divisions(   Rg   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR     s    c         C` s&   t  |  j  d k o% |  j d d k	 S(   s#   Whether divisions are already knowni    N(   RU   Rk   R   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR     s    c         C` s3   d |  j d } t |   |  j |  j |  j |  S(   s    Forget division information i   N(   N(   R   R   Re   Ra   Rb   Rf   (   Rg   Rk   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR     s    c         C` s   d | k o |  j  k  n r d t |  |  j f } |  j | | d !} i |  j | f | d f 6} t j | | d |  g } t | | |  j |  Sd j |  j   } t	 |   d S(   s=   Get a dask DataFrame/Series representing the `nth` partition.i    s   get-partition-%s-%si   R_   s   n must be 0 <= n < {0}N(
   R   t   strRb   Rk   R5   R`   R   Rf   Rd   t
   ValueError(   Rg   t   nRi   Rk   t   layerR   t   msg(    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   get_partition  s    i   c         K` s   |  j  j |   d | k rF | d d  k	 rF t } i | d d 6} n
 d  } } | j d t  t k rw t d   n  t j } t	 |  d | d | d |  j
 d d	 d
 | d | d | d | | S(   Nt   subsett   colst   keeps   drop_duplicates with keep=Falset   chunkt	   aggregateRj   R   s   drop-duplicatesR   t	   split_outt   split_out_setupt   split_out_setup_kwargs(   Rv   t   drop_duplicatesR   t   split_out_on_colsR   RW   R   R   R$   t   acaRf   (   Rg   R   R   R   R   R   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR     s    
	c      	   C` s+   |  j  t t j d d d t d t j   S(   NR   RU   Rj   R   (   R   RU   RM   R   R   R   R   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   __len__  s    c         C` s   t  d j |  j j    d  S(   Ns>   The truth value of a {0} is ambiguous. Use a.any() or a.all().(   R   Rd   R   R   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   __bool__  s    	c         ` s     f d   } | S(   Nc           ` s   t  d j t       d  S(   Ns    cannot convert the series to {0}(   Rc   Rd   R   (    (   t	   cast_type(    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   wrapper  s    	(    (   Rg   R   R   (    (   R   s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   _scalarfunc  s    c         C` s   |  j  t  S(   N(   R   t   float(   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt	   __float__  s    c         C` s   |  j  t  S(   N(   R   R   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   __int__  s    c         C` s   |  j  t  S(   N(   R   t   complex(   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   __complex__  s    t   padi   c         O` s   t  | |  | |  S(   s
   Apply Python function on each DataFrame partition.

        Note that the index and divisions are assumed to remain unchanged.

        Parameters
        ----------
        func : function
            Function applied to each partition.
        args, kwargs :
            Arguments and keywords to pass to the function. The partition will
            be the first argument, and these will be passed *after*. Arguments
            and keywords may contain ``Scalar``, ``Delayed`` or regular
            python objects. DataFrame-like args (both dask and pandas) will be
            repartitioned to align (if necessary) before applying the function.
        $META

        Examples
        --------
        Given a DataFrame, Series, or Index, such as:

        >>> import dask.dataframe as dd
        >>> df = pd.DataFrame({'x': [1, 2, 3, 4, 5],
        ...                    'y': [1., 2., 3., 4., 5.]})
        >>> ddf = dd.from_pandas(df, npartitions=2)

        One can use ``map_partitions`` to apply a function on each partition.
        Extra arguments and keywords can optionally be provided, and will be
        passed to the function after the partition.

        Here we apply a function with arguments and keywords to a DataFrame,
        resulting in a Series:

        >>> def myadd(df, a, b=1):
        ...     return df.x + df.y + a + b
        >>> res = ddf.map_partitions(myadd, 1, b=2)
        >>> res.dtype
        dtype('float64')

        By default, dask tries to infer the output metadata by running your
        provided function on some fake data. This works well in many cases, but
        can sometimes be expensive, or even fail. To avoid this, you can
        manually specify the output metadata with the ``meta`` keyword. This
        can be specified in many forms, for more information see
        ``dask.dataframe.utils.make_meta``.

        Here we specify the output is a Series with no name, and dtype
        ``float64``:

        >>> res = ddf.map_partitions(myadd, 1, b=2, meta=(None, 'f8'))

        Here we map a function that takes in a DataFrame, and returns a
        DataFrame with a new column:

        >>> res = ddf.map_partitions(lambda df: df.assign(z=df.x * df.y))
        >>> res.dtypes
        x      int64
        y    float64
        z    float64
        dtype: object

        As before, the output metadata can also be specified manually. This
        time we pass in a ``dict``, as the output is a DataFrame:

        >>> res = ddf.map_partitions(lambda df: df.assign(z=df.x * df.y),
        ...                          meta={'x': 'i8', 'y': 'f8', 'z': 'f8'})

        In the case where the metadata doesn't change, you can also pass in
        the object itself directly:

        >>> res = ddf.map_partitions(lambda df: df.head(), meta=df)

        Also note that the index and divisions are assumed to remain unchanged.
        If the function you're mapping changes the index/divisions, you'll need
        to clear them afterwards:

        >>> ddf.map_partitions(func).clear_divisions()  # doctest: +SKIP
        (   R   (   Rg   t   funcRX   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR     s    Oc         O` s)   d d l  m } | | |  | | | |  S(   s  Apply a function to each partition, sharing rows with adjacent partitions.

        This can be useful for implementing windowing functions such as
        ``df.rolling(...).mean()`` or ``df.diff()``.

        Parameters
        ----------
        func : function
            Function applied to each partition.
        before : int
            The number of rows to prepend to partition ``i`` from the end of
            partition ``i - 1``.
        after : int
            The number of rows to append to partition ``i`` from the beginning
            of partition ``i + 1``.
        args, kwargs :
            Arguments and keywords to pass to the function. The partition will
            be the first argument, and these will be passed *after*.
        $META

        Notes
        -----
        Given positive integers ``before`` and ``after``, and a function
        ``func``, ``map_overlap`` does the following:

        1. Prepend ``before`` rows to each partition ``i`` from the end of
           partition ``i - 1``. The first partition has no rows prepended.

        2. Append ``after`` rows to each partition ``i`` from the beginning of
           partition ``i + 1``. The last partition has no rows appended.

        3. Apply ``func`` to each partition, passing in any extra ``args`` and
           ``kwargs`` if provided.

        4. Trim ``before`` rows from the beginning of all but the first
           partition.

        5. Trim ``after`` rows from the end of all but the last partition.

        Note that the index and divisions are assumed to remain unchanged.

        Examples
        --------
        Given a DataFrame, Series, or Index, such as:

        >>> import dask.dataframe as dd
        >>> df = pd.DataFrame({'x': [1, 2, 4, 7, 11],
        ...                    'y': [1., 2., 3., 4., 5.]})
        >>> ddf = dd.from_pandas(df, npartitions=2)

        A rolling sum with a trailing moving window of size 2 can be computed by
        overlapping 2 rows before each partition, and then mapping calls to
        ``df.rolling(2).sum()``:

        >>> ddf.compute()
            x    y
        0   1  1.0
        1   2  2.0
        2   4  3.0
        3   7  4.0
        4  11  5.0
        >>> ddf.map_overlap(lambda df: df.rolling(2).sum(), 2, 0).compute()
              x    y
        0   NaN  NaN
        1   3.0  3.0
        2   6.0  5.0
        3  11.0  7.0
        4  18.0  9.0

        The pandas ``diff`` method computes a discrete difference shifted by a
        number of periods (can be positive or negative). This can be
        implemented by mapping calls to ``df.diff`` to each partition after
        prepending/appending that many rows, depending on sign:

        >>> def diff(df, periods=1):
        ...     before, after = (periods, 0) if periods > 0 else (0, -periods)
        ...     return df.map_overlap(lambda df, periods=1: df.diff(periods),
        ...                           periods, 0, periods=periods)
        >>> diff(ddf, 1).compute()
             x    y
        0  NaN  NaN
        1  1.0  1.0
        2  2.0  1.0
        3  3.0  1.0
        4  4.0  1.0

        If you have a ``DatetimeIndex``, you can use a ``pd.Timedelta`` for time-
        based windows.

        >>> ts = pd.Series(range(10), index=pd.date_range('2017', periods=10))
        >>> dts = dd.from_pandas(ts, npartitions=2)
        >>> dts.map_overlap(lambda df: df.rolling('2D').sum(),
        ...                 pd.Timedelta('2D'), 0).compute()
        2017-01-01     0.0
        2017-01-02     1.0
        2017-01-03     3.0
        2017-01-04     5.0
        2017-01-05     7.0
        2017-01-06     9.0
        2017-01-07    11.0
        2017-01-08    13.0
        2017-01-09    15.0
        2017-01-10    17.0
        dtype: float64
        i   (   t   map_overlap(   t   rollingR   (   Rg   R   t   beforet   afterRX   R   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR   5  s    kc
         K` s   | d k r | } n  | d k rE |	 r6 t d   n  | } | }	 n  | rW | j   n i  } | | d <|	 ry |	 j   n i  }	 | |	 d <| r | j   n i  } | | d <t |  d t d t d t d | d	 | d
 | d | d | d |	 |
 	S(   sF  Generic row-wise reductions.

        Parameters
        ----------
        chunk : callable
            Function to operate on each partition. Should return a
            ``pandas.DataFrame``, ``pandas.Series``, or a scalar.
        aggregate : callable, optional
            Function to operate on the concatenated result of ``chunk``. If not
            specified, defaults to ``chunk``. Used to do the final aggregation
            in a tree reduction.

            The input to ``aggregate`` depends on the output of ``chunk``.
            If the output of ``chunk`` is a:

            - scalar: Input is a Series, with one row per partition.
            - Series: Input is a DataFrame, with one row per partition. Columns
              are the rows in the output series.
            - DataFrame: Input is a DataFrame, with one row per partition.
              Columns are the columns in the output dataframes.

            Should return a ``pandas.DataFrame``, ``pandas.Series``, or a
            scalar.
        combine : callable, optional
            Function to operate on intermediate concatenated results of
            ``chunk`` in a tree-reduction. If not provided, defaults to
            ``aggregate``. The input/output requirements should match that of
            ``aggregate`` described above.
        $META
        token : str, optional
            The name to use for the output keys.
        split_every : int, optional
            Group partitions into groups of this size while performing a
            tree-reduction. If set to False, no tree-reduction will be used,
            and all intermediates will be concatenated and passed to
            ``aggregate``. Default is 8.
        chunk_kwargs : dict, optional
            Keyword arguments to pass on to ``chunk`` only.
        aggregate_kwargs : dict, optional
            Keyword arguments to pass on to ``aggregate`` only.
        combine_kwargs : dict, optional
            Keyword arguments to pass on to ``combine`` only.
        kwargs :
            All remaining keywords will be passed to ``chunk``, ``combine``,
            and ``aggregate``.

        Examples
        --------
        >>> import pandas as pd
        >>> import dask.dataframe as dd
        >>> df = pd.DataFrame({'x': range(50), 'y': range(50, 100)})
        >>> ddf = dd.from_pandas(df, npartitions=4)

        Count the number of rows in a DataFrame. To do this, count the number
        of rows in each partition, then sum the results:

        >>> res = ddf.reduction(lambda x: x.count(),
        ...                     aggregate=lambda x: x.sum())
        >>> res.compute()
        x    50
        y    50
        dtype: int64

        Count the number of rows in a Series with elements greater than or
        equal to a value (provided via a keyword).

        >>> def count_greater(x, value=0):
        ...     return (x >= value).sum()
        >>> res = ddf.x.reduction(count_greater, aggregate=lambda x: x.sum(),
        ...                       chunk_kwargs={'value': 25})
        >>> res.compute()
        25

        Aggregate both the sum and count of a Series at the same time:

        >>> def sum_and_count(x):
        ...     return pd.Series({'count': x.count(), 'sum': x.sum()},
        ...                      index=['count', 'sum'])
        >>> res = ddf.x.reduction(sum_and_count, aggregate=lambda x: x.sum())
        >>> res.compute()
        count      50
        sum      1225
        dtype: int64

        Doing the same, but for a DataFrame. Here ``chunk`` returns a
        DataFrame, meaning the input to ``aggregate`` is a DataFrame with an
        index with non-unique entries for both 'x' and 'y'. We groupby the
        index, and sum each group to get the final result.

        >>> def sum_and_count(x):
        ...     return pd.DataFrame({'count': x.count(), 'sum': x.sum()},
        ...                         columns=['count', 'sum'])
        >>> res = ddf.reduction(sum_and_count,
        ...                     aggregate=lambda x: x.groupby(level=0).sum())
        >>> res.compute()
           count   sum
        x     50  1225
        y     50  3725
        s+   `combine_kwargs` provided with no `combine`t	   aca_chunkt   aca_combinet   aca_aggregateR   R   t   combineRj   R   R   t   chunk_kwargst   aggregate_kwargst   combine_kwargsN(   R   R   R   R   t   _reduction_chunkt   _reduction_aggregatet   _reduction_combine(   Rg   R   R   R  Rj   R   R   R  R  R  R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR     s$    g		


c         O` se   t  | t  rQ | \ } } | | k r: t d |   n  |  | | <| | |   S| |  | |  Sd  S(   Ns1   %s is both the pipe target and a keyword argument(   RK   R   R   (   Rg   R   RX   R   t   target(    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   pipe"  s    
c   
      ` s%  t  j t    d  s' t d   n  t  j |  } t    |  } d |      f d   t |  D } g  } x t t	     D]  d  | f     f d   t  j  D } t
 j  t | |  d  g } t   |   j  j  }	 | j |	  q W| S(   s   Pseudorandomly split dataframe into different pieces row-wise

        Parameters
        ----------
        frac : list
            List of floats that should sum to one.
        random_state: int or np.random.RandomState
            If int create a new RandomState with this as the seed
        Otherwise draw from the passed RandomState

        Examples
        --------

        50/50 split

        >>> a, b = df.random_split([0.5, 0.5])  # doctest: +SKIP

        80/10/10 split, consistent random_state

        >>> a, b, c = df.random_split([0.8, 0.1, 0.1], random_state=123)  # doctest: +SKIP

        See Also
        --------
        dask.DataFrame.sample
        i   s   frac should sum to 1s   split-c         ` s:   i  |  ]0 \ } } t   j | f   | f  | f  q S(    (   t   pd_splitRb   (   t   .0RY   R   (   t   fracRi   Rg   (    s2   lib/python2.7/site-packages/dask/dataframe/core.pys
   <dictcomp>O  s   	s   split-%d-%sc         ` s.   i  |  ]$ } t   | f   f  | f  q S(    (   R   (   R  t   j(   RY   Ri   t   name2(    s2   lib/python2.7/site-packages/dask/dataframe/core.pys
   <dictcomp>U  s   	R_   (   RM   t   allcloseR   R   R   R   R/   t	   enumerateR   RU   R5   R`   R   Re   Rf   Rk   R   (
   Rg   R  t   random_statet
   state_dataR   R   R   t   dsk2R   t   out_df(    (   R  RY   Ri   R  Rg   s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   random_split0  s     
$!i   c         C` s  | d k r |  j  } n  | |  j  k rK d } t | j |  j  |    n  d | | |  j f } | d k r	d | |  j f } i  } x9 t |  D]+ } t j |  j | f | f | | | f <q Wt g  t |  D] } | | f ^ q f }	 t |	 | f | | d f <n% i t |  j d f | f | d f 6} t	 j
 | | d |  g }
 t |
 | |  j |  j d |  j | g  } | r| j   } n  | S(   s1   First n rows of the dataset

        Parameters
        ----------
        n : int, optional
            The number of rows to return. Default is 5.
        npartitions : int, optional
            Elements are only taken from the first ``npartitions``, with a
            default of 1. If there are fewer than ``n`` rows in the first
            ``npartitions`` a warning will be raised and any found rows
            returned. Pass -1 to use all partitions.
        compute : bool, optional
            Whether to compute the result, default is True.
        is$   only {} partitions, head received {}s   head-%d-%d-%si   s   head-partial-%d-%si    R_   (   R   R   Rd   Rb   R   R$   t   headR[   t	   safe_headR5   R`   R   Rf   Rk   R   (   Rg   R   R   R   R   Ri   t   name_pRh   RY   RV   R   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  \  s(    )+%c         C` s   d | |  j  f } i t j |  j  |  j d f | f | d f 6} t j | | d |  g } t | | |  j |  j d  } | r | j	   } n  | S(   sl    Last n rows of the dataset

        Caveat, the only checks the last n rows of the last partition.
        s
   tail-%d-%si   i    R_   i(
   Rb   R$   t   tailR   R5   R`   R   Rf   Rk   R   (   Rg   R   R   Ri   Rh   R   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    /c         C` s   d d l  m } | |   S(   s    Purely label-location based indexer for selection by label.

        >>> df.loc["b"]  # doctest: +SKIP
        >>> df.loc["b":"d"]  # doctest: +SKIP
        i   (   t   _LocIndexer(   t   indexingR  (   Rg   R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   loc  s    c   	      ` s  t  | t  s | f } n  d d l m } | | |  j f  } t d   | D  } d t |  |    t j |  j   d t	 | j
   } g  | D] \ } } |  j | ^ q |  j | d d d g }   f d   t |  D } t j   | d	 |  g } t |   |  j |  S(
   Ni   (   t   normalize_indexc         s` s7   |  ]- } t  | t  r+ t | | d   n | Vq d S(   i   N(   RK   R   t   slice(   R  t   k(    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pys	   <genexpr>  s   s   blocks-Rw   ii   c         ` s+   i  |  ]! \ } } t  |    | f  q S(    (   R   (   R  RY   Rn   (   Ri   (    s2   lib/python2.7/site-packages/dask/dataframe/core.pys
   <dictcomp>  s   	 R_   (   RK   R   t   array.slicingR  R   R/   RM   R   Ro   t   objectt   tolistRk   R  R5   R`   R   Rf   (	   Rg   R   R  t   new_keyst   _RY   Rk   Rh   R   (    (   Ri   s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   _partitions  s    	%@c         C` s   t  |  j  S(   sG   Slice dataframe by partitions

        This allows partitionwise slicing of a Dask Dataframe.  You can perform normal
        Numpy-style slicing but now rather than slice elements of the array you
        slice along partitions so, for example, ``df.partitions[:5]`` produces a new
        Dask Dataframe of the first five partitions.

        Examples
        --------
        >>> df.partitions[0]  # doctest: +SKIP
        >>> df.partitions[:3]  # doctest: +SKIP
        >>> df.partitions[::10]  # doctest: +SKIP

        Returns
        -------
        A Dask DataFrame
        (   R   R'  (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt
   partitions  s    c         C` s   | d k	 r( | d k	 r( t j d  n  | d k	 rA t |  |  S| d k	 r` t |  | d | S| d k	 r| t |  d | St d   d S(   s   Repartition dataframe along new divisions

        Parameters
        ----------
        divisions : list, optional
            List of partitions to be used. If specified npartitions will be
            ignored.
        npartitions : int, optional
            Number of partitions of output. Only used if divisions isn't
            specified.
        freq : str, pd.Timedelta
            A period on which to partition timeseries data like ``'7D'`` or
            ``'12h'`` or ``pd.Timedelta(hours=12)``.  Assumes a datetime index.
        force : bool, default False
            Allows the expansion of the existing divisions.
            If False then the new divisions lower and upper bounds must be
            the same as the old divisions.

        Examples
        --------
        >>> df = df.repartition(npartitions=10)  # doctest: +SKIP
        >>> df = df.repartition(divisions=[0, 5, 10, 20])  # doctest: +SKIP
        >>> df = df.repartition(freq='7d')  # doctest: +SKIP
        sV   When providing both npartitions and divisions to repartition only npartitions is used.t   forcet   freqs8   Provide either divisions= or npartitions= to repartitionN(   R   t   warningst   warnt   repartition_npartitionst   repartitiont   repartition_freqR   (   Rg   Rk   R   R*  R)  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR.    s    c         ` s    j  |  }   d  k r6 | d  k	 r6 t d   n  t | t  rX | j j d } n | }  j j d | d   d | d |  } | d k s   d  k r	t |  r t	 |  r d } i | d 6} n | f } i  }  j
 t j d   d | d | d | | | S  d k rCd
   d  | d  k r3d n | d }	 }
 n2 d    j d  d | d  k rkd n | }	 }
 | d  k rd t           f d   t  j  D } t j  | d  g } t |  |  j  } n  } | j t j |	 |
 d   d | d | S(   Ns%   fillna with set limit and method=Nonei    R   R   t   limitt   axisi   Rj   R   t   ffillt   bfills   fillna-chunk-c         ` s=   i  |  ]3 } t  j  j | f   |  k f  | f  q S(    (   R6   t   fillna_checkRb   (   R  RY   (   R   Ri   Rg   t
   skip_check(    s2   lib/python2.7/site-packages/dask/dataframe/core.pys
   <dictcomp>  s   	R_   (    (   R   R2  (   t   _validate_axisR   R   RK   R   Rv   t   valuest   fillnaRF   R1   R   R$   R   R/   R   R5   R`   R   Rk   R   (   Rg   R   R   R0  R1  t
   test_valueRj   RX   R   R   R   Rh   R   t   parts(    (   R   Ri   Rg   R5  s2   lib/python2.7/site-packages/dask/dataframe/core.pyR8    sB    	"c         C` s   |  j  d d d | d |  S(   NR   R2  R0  R1  (   R8  (   Rg   R1  R0  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR2    s    c         C` s   |  j  d d d | d |  S(   NR   R3  R0  R1  (   R8  (   Rg   R1  R0  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR3  "  s    c   	      ` s  | d k	 rb d } t | t  rS d | k o8 d k n rS t j |  |   qb t |   n    d k r} t d   n  | d k r t j j   } n  d t	     |   t
  j |  }      f d   t |  D } t j  | d  g } t |   j  j  S(	   s   Random sample of items

        Parameters
        ----------
        n : int, optional
            Number of items to return is not supported by dask. Use frac
            instead.
        frac : float, optional
            Fraction of axis items to return.
        replace : boolean, optional
            Sample with or without replacement. Default = False.
        random_state : int or ``np.random.RandomState``
            If int we create a new RandomState with this as the seed
            Otherwise we draw from the passed RandomState

        See Also
        --------
        DataFrame.random_split
        pandas.DataFrame.sample
        sl   sample does not support the number of sampled items parameter, 'n'. Please use the 'frac' parameter instead.i    i   s   frac must not be Nones   sample-c         ` s@   i  |  ]6 \ } } t  j  j | f |    f  | f  q S(    (   R6   t   sampleRb   (   R  RY   R   (   R  Ri   t   replaceRg   (    s2   lib/python2.7/site-packages/dask/dataframe/core.pys
   <dictcomp>M  s   	R_   N(   R   RK   R   R+  R,  R   RM   t   randomt   RandomStateR/   R   R   R  R5   R`   R   Rf   Rk   (	   Rg   R   R  R<  R  R   R  Rh   R   (    (   R  Ri   R<  Rg   s2   lib/python2.7/site-packages/dask/dataframe/core.pyR;  &  s     +	c         C` s"   |  j  t j d | d | d | S(   Nt
   to_replaceR   t   regex(   R   R$   R<  (   Rg   R?  R   R@  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR<  S  s    c         C` s  d d l  m } | t k r: t |  j t  j    } n  |  j t j  } t	 | t
  r t |  } t |  |  j k r t d j t |  |  j    n  |  j d k r | | f  } n | | t |  j  f f  } | | _ n$ | d k	 rt d j |    n  | S(   s  Convert a dask DataFrame to a dask array.

        Parameters
        ----------
        lengths : bool or Sequence of ints, optional
            How to determine the chunks sizes for the output array.
            By default, the output array will have unknown chunk lengths
            along the first axis, which can cause some later operations
            to fail.

            * True : immediately compute the length of each partition
            * Sequence : a sequence of integers to use for the chunk sizes
              on the first axis. These values are *not* validated for
              correctness, beyond ensuring that the number of items
              matches the number of partitions.

        Returns
        -------
        i    (   t   normalize_chunkssR   The number of items in 'lengths' does not match the number of partitions. {} != {}i   s$   Unexpected value for 'lengths': '{}'N(   t   dask.array.coreRA  RW   R   R   RU   R   RM   R   RK   R   R   R   Rd   t   ndimt   columnst   _chunksR   (   Rg   t   lengthsRA  t   arrt   chunks(    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   to_dask_arrayX  s"    t   ac         K` s)   d d l  m } | |  | | | | |  S(   s.    See dd.to_hdf docstring for more information i   (   t   to_hdf(   t   ioRK  (   Rg   t   path_or_bufRn   t   modeR   R   RK  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRK    s    c         K` s    d d l  m } | |  | |  S(   s.    See dd.to_csv docstring for more information i   (   t   to_csv(   RL  RO  (   Rg   t   filenameR   RO  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRO    s    c         O` s#   d d l  m } | |  | | |  S(   s/    See dd.to_json docstring for more information i   (   t   to_json(   RL  RQ  (   Rg   RP  RX   R   RQ  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRQ    s    c         C` s~   |  j    } |  j   } | r^ |  j | |  j     } d |  j } t j | | d d } n  g  | D] } t | |  ^ qe S(   s  Convert into a list of ``dask.delayed`` objects, one per partition.

        Parameters
        ----------
        optimize_graph : bool, optional
            If True [default], the graph is optimized before converting into
            ``dask.delayed`` objects.

        Examples
        --------
        >>> partitions = df.to_delayed()  # doctest: +SKIP

        See Also
        --------
        dask.dataframe.from_delayed
        s   delayed-R_   (    (   Ro   Rm   R   Rb   R5   R`   R3   (   Rg   R   t   keysR   Ri   R!  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR     s    c         ` s     f d   S(   Nc         ` s   t    |   S(   N(   R   (   Rg   (   R   (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR     R   (    (   R   R   (    (   R   s2   lib/python2.7/site-packages/dask/dataframe/core.pyR     s    c         ` s$   | r   f d   S  f d   Sd  S(   Nc         ` s   t    | |   S(   N(   R   (   Rg   R   (   R   (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR     R   c         ` s   t    |  |  S(   N(   R   (   Rg   R   (   R   (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR     R   (    (   R   R   R   (    (   R   s2   lib/python2.7/site-packages/dask/dataframe/core.pyR     s    i    c         C` s   d d l  m } t | t  r= | d k  r= t d   q= n  | d k	 r t | t  sg t d   n  | d k  r t d   q n  | |  d | d | d | d	 | d
 | d | S(   si  Provides rolling transformations.

        Parameters
        ----------
        window : int, str, offset
           Size of the moving window. This is the number of observations used
           for calculating the statistic. The window size must not be so large
           as to span more than one adjacent partition. If using an offset
           or offset alias like '5D', the data must have a ``DatetimeIndex``

           .. versionchanged:: 0.15.0

              Now accepts offsets and string offset aliases

        min_periods : int, default None
            Minimum number of observations in window required to have a value
            (otherwise result is NA).
        center : boolean, default False
            Set the labels at the center of the window.
        win_type : string, default None
            Provide a window type. The recognized window types are identical
            to pandas.
        axis : int, default 0

        Returns
        -------
        a Rolling object on which to call a method to compute a statistic

        Notes
        -----
        The `freq` argument is not supported.
        i    (   t   Rollings   window must be >= 0s   min_periods must be an integers   min_periods must be >= 0t   windowt   min_periodsR*  t   centert   win_typeR1  N(   t   dask.dataframe.rollingRS  RK   R   R   R   (   Rg   RT  RU  R*  RV  RW  R1  RS  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR     s    "c         C` s   |  j  |  } t | t  s- t d   n  | d k r[ |  j t j d d d | d d S| d k rs | d f n
 d | f \ } } |  j t j | | d d d | S(   Ns   periods must be an integeri   R   t   difft   periodsR1  i    (   R6  RK   R   Rc   R   R$   RY  R   (   Rg   RZ  R1  R   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRY    s    +c         C` s  |  j  |  } t | t  s- t d   n  | d k ra |  j t j d d d | d | d d S| d  k r | d k r | d f n
 d | f \ } } |  j t j | | d d d | S|  j	 j | d | } |  j t j d d d | d | d	 | d
 t
 } t | | d | S(   Ns   periods must be an integeri   R   t   shiftRZ  R*  R1  i    Rj   t   transform_divisions(   R6  RK   R   Rc   R   R$   R[  R   R   Rv   R   t   maybe_shift_divisions(   Rg   RZ  R*  R1  R   R   Rj   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR[    s    +	c   
      C` s   |  j  |  } t |  j |  d | d |  } |  j | } t t |  } | d k r |  j | d | d | d | d | }	 t | |	  S|  j | d | d | d | d | d | }	 t |  t	  r t
 |  j  t |  j  f |	 _ n  t | |	  Sd  S(   NR1  t   skipnai   Rj   R   R   (   R6  R   Rv   t   _token_prefixR$   R   t
   handle_outR   RK   R   R   RD  R   Rk   (
   Rg   Ri   R1  R^  R   R   Rj   R   R   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   _reduction_agg  s    !	$c         C` s2   t  |  d  |  j j   } |  j t j d | S(   Nt   absRj   (   t   _raise_if_object_seriesRv   Rb  R   R$   (   Rg   Rj   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRb  #  s    c      
   C` s%   |  j  d d | d | d | d | S(   Nt   allR1  R^  R   R   (   Ra  (   Rg   R1  R^  R   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRd  )  s    c      
   C` s%   |  j  d d | d | d | d | S(   Nt   anyR1  R^  R   R   (   Ra  (   Rg   R1  R^  R   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRe  .  s    c      
   C` sc   |  j  d d | d | d | d | } | r[ | j |  j   j d |  | k d t j S| Sd  S(   NR   R1  R^  R   R   R   (   Ra  t   wheret   notnullR   RM   t   NaN(   Rg   R1  R^  R   Rw   R   t	   min_countR   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR   3  s    $
c      
   C` sc   |  j  d d | d | d | d | } | r[ | j |  j   j d |  | k d t j S| Sd  S(   Nt   prodR1  R^  R   R   R   (   Ra  Rf  Rg  R   RM   Rh  (   Rg   R1  R^  R   Rw   R   Ri  R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRj  >  s    $
c      
   C` s%   |  j  d d | d | d | d | S(   NR   R1  R^  R   R   (   Ra  (   Rg   R1  R^  R   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR   I  s    c      
   C` s%   |  j  d d | d | d | d | S(   NR   R1  R^  R   R   (   Ra  (   Rg   R1  R^  R   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR   N  s    c         C` s  d } |  j  |  } |  j j d | d |  } | d k rk t t j |  d | d |  j | d | d | St |  } t |  g d t d t	 d	 t
 d | d
 i | d 6d |  j | d | d | d | 	} t |  t  r t |  j  t |  j  f | _ n  | Sd  S(   Nt   idxmaxR1  R^  i   Rj   R   R   R   R  R  t   scalarR   t   fn(   R6  Rv   Rk  R   R$   R_  RF   R   t   idxmaxmin_chunkt   idxmaxmin_aggt   idxmaxmin_combineRK   R   R   RD  R   Rk   (   Rg   R1  R^  R   Rm  Rj   Rl  R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRk  S  s     $c         C` s   d } |  j  |  } |  j j d |  } | d k re t t j |  d | d |  j | d | d | St |  } t |  g d t	 d t
 d	 t d | d
 i | d 6d |  j | d | d | d | 	} t |  t  r t |  j  t |  j  f | _ n  | Sd  S(   Nt   idxminR1  i   Rj   R   R^  R   R   R  R  Rl  R   Rm  (   R6  Rv   Rk  R   R$   Rq  R_  RF   R   Rn  Ro  Rp  RK   R   R   RD  R   Rk   (   Rg   R1  R^  R   Rm  Rj   Rl  R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRq  g  s     $c      
   C` s   |  j  |  } |  j d } | d k r_ |  j j d |  } |  j t j d | d | d | S|  j j   } |  j t j d t j d | d | d | } t |  t	  r t
 |  j  t |  j  f | _ n  | Sd  S(   Nt   counti   R1  Rj   R   R   R   (   R6  R_  Rv   Rr  R   R$   R   R   RK   R   R   RD  R   Rk   (   Rg   R1  R   R   Rj   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRr  {  s    $c         C` s9  |  j  |  } t |  d  |  j j d | d |  } | d k r t t j |  d | d |  j d d | d | } t | |  S|  j   } | j	 d | d |  }	 | j
 d |  }
 |  j d t |  | |  } t t j |	 |
 d | d | } t |  t  r(t |  j  t |  j  f | _ n  t | |  Sd  S(	   Nt   meanR1  R^  i   Rj   R   R   s   mean-%s(   R6  Rc  Rv   Rs  R   R$   R_  R`  t   _get_numeric_dataR   Rr  R/   R6   t   mean_aggregateRK   R   R   RD  R   Rk   (   Rg   R1  R^  R   Rw   R   Rj   R   t   numt   sR   Ri   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRs    s"    $c         C` s\  |  j  |  } t |  d  |  j j d | d |  } | d k r t t j |  d | d |  j d d | d | d | } t | |  S|  j   }	 d |	 j	 d | d	 |  }
 d |	 d
 j	 d | d	 |  } |	 j
 d	 |  } |  j d } t t j | |
 | d | d | d | } t |  t  rKt |  j  t |  j  f | _ n  t | |  Sd  S(   Nt   varR1  R^  i   Rj   R   t   ddofg      ?R   i   (   R6  Rc  Rv   Rx  R   R$   R_  R`  Rt  R   Rr  R6   t   var_aggregateRK   R   R   RD  R   Rk   (   Rg   R1  R^  Ry  R   Rw   R   Rj   R   Rv  R   t   x2R   Ri   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRx    s$     $c         C` s   |  j  |  } t |  d  |  j j d | d |  } | d k r t t j |  d | d |  j d d | d | d | } t | |  S|  j d | d | d |  }	 |  j d }
 t t	 j
 |	 d | d |
 } t | |  Sd  S(	   Nt   stdR1  R^  i   Rj   R   Ry  R   (   R6  Rc  Rv   R|  R   R$   R_  R`  Rx  RM   t   sqrt(   Rg   R1  R^  Ry  R   Rw   R   Rj   R   t   vRi   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR|    s    c         C` s$  |  j  |  } t |  d  |  j j d | d | d |  } | d k r~ t t j |  d | d |  j d d | d | d | S|  j   } | j d | d | d |  } | j	 d |  } |  j d }	 t t
 j | | d | d |	 }
 t |  t  rt |  j  t |  j  f |
 _ n  |
 Sd  S(	   Nt   semR1  R^  Ry  i   Rj   R   R   (   R6  Rc  Rv   R  R   R$   R_  Rt  Rx  Rr  RM   R}  RK   R   R   RD  R   Rk   (   Rg   R1  R^  Ry  R   Rj   Rv  R~  R   Ri   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    !"$g      ?t   defaultc      
   ` s   j  |  } d t   |  } | d k rw t  t  rO t d   n  t t j   | d | d  d f St  d   j	 j  d | }  j
   } t     f d	   | j D  } g  | D] } | j d
 f ^ q }	 t | d
 t  rvi t j |	 | j d | j f | d
 f 6}
 t j | |
 d | } t | j  t | j  f } t | | | |  Si t j |	 d f | d
 f 6}
 t j | |
 d | } t | | | | d
 j  Sd S(   s   Approximate row-wise and precise column-wise quantiles of DataFrame

        Parameters
        ----------
        q : list/array of floats, default 0.5 (50%)
            Iterable of numbers ranging from 0 to 1 for the desired quantiles
        axis : {0, 1, 'index', 'columns'} (default 0)
            0 or 'index' for row-wise, 1 or 'columns' for column-wise
        method : {'default', 'tdigest', 'dask'}, optional
            What method to use. By default will use dask's internal custom
            algorithm (``'dask'``).  If set to ``'tdigest'`` will use tdigest
            for floats and ints and fallback to the ``'dask'`` otherwise.
        s   quantiles-concat--i   s+   'q' must be scalar when axis=1 is specifiedR   Rj   t   f8t   quantileR1  c         3` s%   |  ] } t   |     Vq d  S(   N(   R  (   R  t   c(   R   t   qRg   (    s2   lib/python2.7/site-packages/dask/dataframe/core.pys	   <genexpr>  s    i    R_   N(   R6  R/   RK   R}   R   R   R$   R  Rc  Rf   Rt  R   RD  Rb   R^   RR   RS   R   Ri   R5   R`   R   R   R6   RV   R   Rk   (   Rg   R  R1  R   t   keynameRj   Rv  t	   quantilest   _qt   qnamesR   R   Rk   (    (   R   R  Rg   s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s(    %"+c         C` s  |  j    } |  j d k r? t | j  d k r? t d   n- |  j d k rl |  j d k rl t d   n  | d k r d d d	 g } n< t j |  } t j	 | d  } t j
 |  } t |  } | j d
 |  | j d
 |  | j d
 |  | j d
 |  | j | d | | j d
 |  g } g  | D] } | j d f ^ q0} d t |  |  } i t j | f | d f 6}	 t j | |	 d | }
 | j j   } t |
 | | d d d g S(   s.   Currently, only numeric describe is supported i   i    s)   DataFrame contains only non-numeric data.i   R#  s,   Cannot compute ``describe`` on object dtype.g      ?g      ?g      ?R   R   s
   describe--R_   Rk   N(   Rt  RC  RU   RD  R   Rw   R   RM   R   R   R   R}   Rr  Rs  R|  R   R  R   Rb   R/   R6   t   describe_aggregateR5   R`   Rf   t   describeR   (   Rg   R   t   percentilest   percentiles_methodRv  t   statsRw  t   stats_namesRi   R   R   Rj   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s.    $"c         C` s  |  j  |  } | d k rU d j |  j |  } |  j | d | | }	 t | |	  Sd j |  j |  }
 t | |  d |
 d |  | } d j |  j |  } t t | | d t j g   d | } t |   } d j |  j | |  } d j |  j | |  } i  } | j	 d	 f | | d	 f <x t
 d |  j  D] } | d k rd| j	 | d f | | | f <n0 | | | d f | j	 | d f f | | | f <| | j	 | f | | f f | | | f <q2Wt j | | d
 | | g } t | | | |  j  |  j  }	 t | |	  Sd S(   s"    Wrapper for cumulative operation i   s   {0}{1}(axis=1)R   s
   {0}{1}-mapRj   s   {0}{1}-take-lasts
   {0}{1}-{2}s   {0}{1}-cum-last-{2}i    R_   N(   R6  Rd   R_  R   R`  t
   _take_lastRR   RS   R/   Rb   R   R   R5   R`   R   Rf   Rk   (   Rg   t   op_nameR   R   R1  R^  R  R   Ri   R   t   name1t   cumpartR  t   cumlastt   suffixt   cnameR   RY   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   _cum_agg!  s2    		 0,!c         C` sF   |  j  d d t j d t j d | d | d t d | d |  d | S(   Nt   cumsumR   R   R1  R^  R  R   (   R  R$   R  t   operatort   addt   dict(   Rg   R1  R^  Rw   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  J  s    		c         C` sF   |  j  d d t j d t j d | d | d t d | d |  d | S(   Nt   cumprodR   R   R1  R^  R  R   (   R  R$   R  R  t   mulR  (   Rg   R1  R^  Rw   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  S  s    		c         C` sF   |  j  d d t j d t j d | d | d t d | d |  d | S(   Nt   cummaxR   R   R1  R^  R  R   (   R  R$   R  R6   t   cummax_aggregateR  (   Rg   R1  R^  R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  \  s    		c         C` sF   |  j  d d t j d t j d | d | d t d | d |  d | S(   Nt   cumminR   R   R1  R^  R  R   (   R  R$   R  R6   t   cummin_aggregateR  (   Rg   R1  R^  R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  e  s    		c         C` s   t  t j |  | |  S(   N(   R   R$   Rf  (   Rg   t   condR   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRf  n  s    c         C` s   t  t j |  | |  S(   N(   R   R$   t   mask(   Rg   R  R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  t  s    c         C` s   |  j  t j  S(   N(   R   R$   Rg  (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRg  x  s    c         C` s   |  j  t j  S(   N(   R   R$   t   isnull(   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  |  s    c         C` s/   t  t d  r |  j t j  St d   d  S(   Nt   isnasN   Need more recent version of Pandas to support isna. Please use isnull instead.(   R|   RR   R   R$   R  R   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    c         C` s   t  |  j  r' t t j t j f } n	 t f } t | |  r^ t d t t	 |     n  |  j
 j |  } |  j t j t |  d | S(   Ns   Passing a %r to `isin`Rj   (   RE   Rf   R   RR   RS   R   RK   R   R(   Re   Rv   t   isinR   R$   R2   (   Rg   R7  t	   bad_typesRj   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    	c         C` s   t  |  j  r0 t |  r0 |  j j |  } n |  j j |  } t | d  r g  | j   D]6 \ } } t |  r^ t | d d   d  k r^ | ^ q^ } t	 | d | } n3 t |  r t | d d   d  k r t	 |  } n  |  j
 t j d | d | S(   Nt   itemst
   categoriesR   Rw   Rj   (   RE   Rf   RA   Rv   t   astypeR|   R  R   R   R@   R   R$   (   Rg   Rw   Rj   R!  R~  t   set_unknown(    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    0c         C` sV   d d l  m } t | t t f  r: d } t |   n  | |  | g d d d | S(   Ni   (   RV   s)   append doesn't support list or dict inputt   joint   outert   interleave_partitions(   t   multiRV   RK   R}   R  R   (   Rg   R   R  RV   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR     s    R  c      	   ` s  t  t j |  | | d | d | \ } } |  j t j | d | d | d | } t |  | | | |  } d |     f d   t | j    D }	 |	 j | j  t	 |	   | | j
  }
 d |   f d   t | j    D } | j | j  t	 |  | | j
  } |
 | f S(   NR1  t
   fill_valueR  s   align1-c         ` s.   i  |  ]$ \ } } t  | d  f   | f  q S(   i    (   R   (   R  RY   Rn   (   R  (    s2   lib/python2.7/site-packages/dask/dataframe/core.pys
   <dictcomp>  s   	s   align2-c         ` s.   i  |  ]$ \ } } t  | d  f   | f  q S(   i   (   R   (   R  RY   Rn   (   R  (    s2   lib/python2.7/site-packages/dask/dataframe/core.pys
   <dictcomp>  s   	(   t   _emulateR$   t   alignR   R/   R  Ro   Rz   Ra   R   Rk   (   Rg   R   R  R1  R  t   meta1t   meta2t   alignedR   t   dsk1t   result1R  t   result2(    (   R  R  s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s     	

c         C` s"   |  j  t j | | d | d | S(   NR  t	   overwrite(   R   R$   R  (   Rg   R   R   R  R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    c         C` s   |  j  t j |  S(   N(   R   R$   t   combine_first(   Rg   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    c         C` s
   t   d S(   s7    bind operator method like DataFrame.add to this class N(   R   (   R   Ri   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   _bind_operator_method  s    c         C` s)   d d l  m } | |  | d | d | S(   Ni   (   t	   Resamplert   closedt   label(   t   tseries.resampleR  (   Rg   t   ruleR  R  R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   resample  s    c         ` sD   j  j |   j s( t d   n  t j j j |  }  j d | }  j	 j
 |  } | j   py t | d  } |  j d k r  j } n  j | d  | f } d t  |       f d   t |  D } t j  j | f d  | | t d f |   | f <t j   | d  g } t |    |  S(	   Ns0   `first` is not implemented for unknown divisionsi    t   _inci   s   first-c         ` s(   i  |  ] }  j  | f   | f  q S(    (   Rb   (   R  RY   (   Ri   Rg   (    s2   lib/python2.7/site-packages/dask/dataframe/core.pys
   <dictcomp>  s   	 R  R_   (   Rv   R   R   R   RR   t   tseriest   frequenciest	   to_offsetRk   R  t   _get_partitionst
   isAnchoredR|   R   R/   R   R6   t   boundary_sliceRb   R   RW   R5   R`   R   (   Rg   t   offsett   datet   endt   include_rightt   divsRh   R   (    (   Ri   Rg   s2   lib/python2.7/site-packages/dask/dataframe/core.pyR     s     	c         ` s-   j  j |   j s( t d   n  t j j j |  }  j d | }  j	 j
 |  } | d k rx  j } n | f  j | d } d t  |       f d   t t |  j   D } t j  j | f | d  t t d f |   d f <t j   | d  g } t |    |  S(	   Ns/   `last` is not implemented for unknown divisionsii    i   s   last-c         ` s6   i  |  ], \ } }  j  | d  f   | d  f  q S(   i   (   Rb   (   R  RY   R  (   Ri   Rg   (    s2   lib/python2.7/site-packages/dask/dataframe/core.pys
   <dictcomp>  s   	R  R_   (   Rv   R   R   R   RR   R  R  R  Rk   R  R  R/   R  R   R   R6   R  Rb   R   RW   R   R5   R`   R   (   Rg   R  R  t   startR  Rh   R   (    (   Ri   Rg   s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   last   s     	c         C` sJ   d d l  m } t |  g d | j d | j d | j d | d d d	 t S(
   s9  Approximate number of unique rows.

        This method uses the HyperLogLog algorithm for cardinality
        estimation to compute the approximate number of unique rows.
        The approximate error is 0.406%.

        Parameters
        ----------
        split_every : int, optional
            Group partitions into groups of this size while performing a
            tree-reduction. If set to False, no tree-reduction will be used.
            Default is 8.

        Returns
        -------
        a float representing the approximate number of elements
        i   (   t   hyperloglogR   R  R   R   t   bi   Rj   (   R   R  R   t   compute_hll_arrayt   reduce_statet   estimate_countR   (   Rg   R   R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   nunique_approx  s
    		c         C` s   |  j  t j  S(   s    Return a dask.array of the values of this dataframe

        Warning: This creates a dask.array without precise shape information.
        Operations that depend on shape information, like slicing or reshaping,
        will not work.
        (   R   R6   R7  (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR7  2  s    c         C` se   |  j  j d k	 od t |  od t j |  s= t | t  od | |  j  j k od | t |  d d  k S(   s   
        Test whether a key is an index level reference

        To be considered an index level reference, `key` must match the index name
        and must NOT match the name of any column (if a dataframe).
        RD  N(    (	   R   Ri   R   R1   RM   t   isscalarRK   R   R   (   Rg   Rn   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   _is_index_level_reference<  s
    c         ` s:   t  | t  r) t   f d   | D  S  j |  Sd S(   sb   
        Test whether the input contains a reference to the index of the DataFrame/Series
        c         3` s   |  ] }   j  |  Vq d  S(   N(   R  (   R  R   (   Rg   (    s2   lib/python2.7/site-packages/dask/dataframe/core.pys	   <genexpr>N  s    N(   RK   R}   Re  R  (   Rg   t   columns_or_index(    (   Rg   s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   _contains_index_nameI  s    N(   R   R   R   Rl   Rm   Ro   Rq   Rp   R   R;   R0   R   R   R   R   R   Rt   Ru   R   R   R   R   Rv   R   R   R   R   R   R   R   R   R   R   R   R   R   t   setterR   R   R   R   R   R    RR   R   R   R   R   t   __nonzero__R   R   R   t   __long__R   R>   R   R   t
   no_defaultR   R  R  RW   R  R  R  R'  R(  R.  R8  R2  R3  R;  R<  RI  RK  RO  RQ  R   R   R   R   R   RY  R[  Ra  Rb  Rd  Re  R   Rj  R   R   Rk  Rq  Rr  Rs  Rx  R|  R  R  R  R  R  R  R  R  RM   t   nanRf  R  Rg  R  R  R  R  RS   R   R  R  R  R  R  R   R  R  R7  R  R  (    (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR      s(  														
									Qn		|,+
	'/-/			0		)(	
	c         C` sD   t  |  t  r@ t |  d  r@ |  j t k r@ t d |   n  d S(   sv   
    Utility function to raise an error if an object column does not support
    a certain operation like `mean`.
    Rw   s%   `%s` not supported with object seriesN(   RK   RS   R|   Rw   R#  R   (   R   R!   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRc  S  s    -RS   c           B` s  e  Z d  Z e j Z e e  Z d Z	 d@ d  Z e d    Z e j d    Z e d    Z e d    Z e d    Z e d    Z e d	    Z e d
    Z d   Z e d    Z d   Z d   Z d@ e e d  Z e e j  d d   Z e e j  d@ d d d   Z d d d  Z  d d  Z! d   Z" e e j  d d@ d   Z# e e j  d    Z$ e% d d   Z& e e j  d@ d   Z' e e j  e d   Z( d@ d  d!  Z) e e j  d@ d"   Z* e e j  d@ d  d#   Z+ e e j  d$ d@ d%   Z, e e j  d$ d@ d&   Z- e e j  d'    Z. e/ d( d)  e e j  d@ e0 d*    Z1 e e j  d+    Z2 e e j  e3 d,   Z4 e e j  d@ d@ d@ d-   Z5 e e j  d.    Z6 e e j  d/    Z7 e e j  d0 d@ d@ d1   Z8 e e j  d@ d2   Z9 e e j  d3    Z: e e j  d4    Z; e d5  Z< e e j  d@ d6   Z= e e j  d$ d7   Z> e% d8    Z? e% d9    Z@ e/ d( d)  e3 e0 dA d:   ZA e e j  d@ e d;   ZB e e j  d< d@ e d=   ZC e e j  d  e d>   ZD e e j  e3 e d?   ZE RS(B   s   Parallel Pandas Series

    Do not use this class directly.  Instead use functions like
    ``dd.read_csv``, ``dd.read_parquet``, or ``dd.from_pandas``.

    Parameters
    ----------

    dsk: dict
        The dask graph to compute this Series
    _name: str
        The key prefix that specifies which keys in the dask comprise this
        particular Series
    meta: pandas.Series
        An empty ``pandas.Series`` with names, dtypes, and index matching the
        expected output.
    divisions: tuple of index values
        Values along which we partition our blocks on the index

    See Also
    --------
    dask.dataframe.DataFrame
    s   series-c         C` s   t  | t  ro t |  d k ro t  | d d t j  r[ | d d j d k r[ d  } qo | d d j } n  t j	 | d | d |  j
 S(   Ni    i   R   Ri   (    (   RK   R   RU   RM   RN   R   R   R   RR   RS   Ri   (   Rg   R   R   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR   x  s
    !1	c         C` s
   |  j  j S(   N(   Rf   Ri   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRi     s    c         C` s7   | |  j  _ t |  |  } | j |  _ | j |  _ d  S(   N(   Rf   Ri   t   _rename_daskRa   Rb   (   Rg   Ri   t   renamed(    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRi     s    c         C` s   d S(   s    Return dimensionality i   (    (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRC    s    c         C` s
   |  j  f S(   s	  
        Return a tuple representing the dimensionality of a Series.

        The single element of the tuple is a Delayed result.

        Examples
        --------
        >>> series.shape  # doctest: +SKIP
        # (dd.Scalar<size-ag..., dtype=int64>,)
        (   R   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR     s    c         C` s
   |  j  j S(   s    Return data type (   Rf   Rw   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRw     s    c         C` s
   t  |   S(   s    Namespace of datetime methods (   R7   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   dt  s    c         C` s
   t  |   S(   N(   R9   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   cat  s    c         C` s
   t  |   S(   s    Namespace for string methods (   R8   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR     s    c         C` sk   t  t t |     } | j |  j  x6 d d g D]( } t |  j |  s5 | j |  q5 q5 Wt |  S(   NR  R   (	   Rx   Ry   Re   Rz   R{   R|   Rf   R   R}   (   Rg   R~   t   accessor(    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR     s    c      	   C` s(   |  j  t j t j d d d t d t S(   s    Number of bytes R   t   nbytesRj   R   (   R   R6   R  RM   R   R   R   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    c         C` s   t  |  j |  j  S(   N(   t   _repr_data_seriesRf   R   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR     s    c         C` s   |  j  d
 k	 r0 d j d |  j  d |  j  } n d j d |  j  } d j d |  j j d |  j   d | d t |  j  d	 t	 |  j
   S(   s    have to overwrite footer s   Name: {name}, dtype: {dtype}Ri   Rw   s   dtype: {dtype}sG   Dask {klass} Structure:
{data}
{footer}
Dask Name: {name}, {task} tasksR   R   t   footerR   N(   Ri   R   Rd   Rw   R   R   R   R%   Rb   RU   Ra   (   Rg   R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR     s    c         C` s`  d d l  m } m } m } | |  sA | |  re | |  re | rM |  n	 |  j   } | | _ n |  j t j |  } |  j	 r| rt
 |  s | |  rt j t |  j d  d |  j } | j |  j }	 |	 j s d }
 t |
   n  t |	 j    | _ q| j   } n  | r\| j |  _ | j |  _ | j |  _ | j |  _ |  } n  | S(   s  Alter Series index labels or name

        Function / dict values must be unique (1-to-1). Labels not contained in
        a dict / Series will be left as-is. Extra labels listed don't throw an
        error.

        Alternatively, change ``Series.name`` with a scalar value.

        Parameters
        ----------
        index : scalar, hashable sequence, dict-like or callable, optional
            If dict-like or callable, the transformation is applied to the
            index. Scalar or hashable sequence-like will alter the
            ``Series.name`` attribute.
        inplace : boolean, default False
            Whether to return a new Series or modify this one inplace.
        sorted_index : bool, default False
            If true, the output ``Series`` will have known divisions inferred
            from the input series and the transformation. Ignored for
            non-callable/dict-like ``index`` or when the input series has
            unknown divisions. Note that this may only be set to ``True`` if
            you know that the transformed index is monotonicly increasing. Dask
            will check that transformed divisions are monotonic, but cannot
            check all the values between divisions, so incorrectly setting this
            can result in bugs.

        Returns
        -------
        renamed : Series

        See Also
        --------
        pandas.Series.rename
        i    (   t	   is_scalart   is_list_liket   is_dict_likei   R   sG   sorted_index=True, but the transformed index isn't monotonic_increasing(   t   pandas.api.typesR  R  R  R   Ri   R   R$   t   renameR   t   callableRR   RS   R   R   Rk   R   t   is_monotonic_increasingR   R   R$  R   Ra   Rb   Rf   (   Rg   R   t   inplacet   sorted_indexR  R  R  t   rest   oldt   newR   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s,    #%			i    c         C` s   t  t j |  |  S(   N(   R   R$   t   round(   Rg   t   decimals(    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    R  c         C` s@   t  t j |  | | |  } t t j |  j  j    | _ | S(   N(   R   R$   t   to_timestampR   RR   R   Rk   (   Rg   R*  t   howR1  t   df(    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    !g      ?R  c         C` s   t  |  | d | S(   s   Approximate quantiles of Series

        Parameters
        ----------
        q : list/array of floats, default 0.5 (50%)
            Iterable of numbers ranging from 0 to 1 for the desired quantiles
        method : {'default', 'tdigest', 'dask'}, optional
            What method to use. By default will use dask's internal custom
            algorithm (``'dask'``).  If set to ``'tdigest'`` will use tdigest
            for floats and ints and fallback to the ``'dask'`` otherwise.
        R   (   R  (   Rg   R  R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    g      ?c         C` s#   d d l  m } | |  | d | S(   sA    Approximate quantiles of Series used for repartitioning
        i   (   t   partition_quantilest   upsample(   t   partitionquantilesR  (   Rg   R   R  R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   _repartition_quantiles-  s    c         C` s   t  | t  r |  j | j k r d t |  |  } t t j | |  |  } t j | | d |  | g } t | | |  j	 |  j  St
 d   d  S(   Ns   index-%sR_   s[   Series getitem in only supported for other series objects with matching partition structure(   RK   RS   Rk   R/   t   partitionwise_graphR  R   R5   R`   Rf   R   (   Rg   Rn   Ri   Rh   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   __getitem__3  s    !Re  c         C` s   |  S(   N(    (   Rg   R  R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRt  >  s    c         c` sO   xH t  |  j  D]7 } |  j |  j   } x | j   D] } | Vq8 Wq Wd  S(   N(   R   R   R   R   t	   iteritems(   Rg   RY   Rw  t   item(    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  B  s    c         C` sB   | d k r$ t d j |    n  i d d  6d d 6j | |  S(   Ni    R   s   No axis named {0}(   i    R   N(   R   R   Rd   R   (   R   R1  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR6  I  s    c         K` s#   d d l  m } | |  d | | S(   Ni    (   t   SeriesGroupByt   by(   t   dask.dataframe.groupbyR  (   Rg   R   R   R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   groupbyP  s    c         C` s   t  t |   j d |  S(   NR   (   t   superRS   Rr  (   Rg   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRr  U  s    i   c         C` s@   t  |  d t j d t j d |  j d d d | d |  j d | S(	   s   
        Return Series of unique values in the object. Includes NA values.

        Returns
        -------
        uniques : Series
        R   R   Rj   R   R   R   t   series_nameR   (   R   R6   R   Rf   Ri   (   Rg   R   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR   Y  s    c         C` s   |  j  d |  j   S(   NR   (   R   Rr  (   Rg   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   nuniquee  s    c         C` sL   t  |  d t j d t j d t j d |  j j   d d d | d | d	 t S(
   NR   R   R  Rj   R   s   value-countsR   R   R   (   R   R$   t   value_countsR6   t   value_counts_aggregatet   value_counts_combineRf   t   split_out_on_index(   Rg   R   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  i  s    		i   c         C` s7   t  |  d t j d t j d |  j d d d | d | S(   NR   R   Rj   R   s   series-nlargestR   R   (   R   R$   t   nlargestRf   (   Rg   R   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR
  r  s    c         C` s7   t  |  d t j d t j d |  j d d d | d | S(   NR   R   Rj   R   s   series-nsmallestR   R   (   R   R$   t	   nsmallestRf   (   Rg   R   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  x  s    c         C` s   t  t |   j |  S(   N(   R  RS   R  (   Rg   R7  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  ~  s    R   i   c         ` s  t    t  p1 t    p1 t    o1 t    sR t d j t       n  d t |           f d   t	 |  j
    D } t j  | d |  g } | t k r t t j |    d  d t } n$ t | d t t |   d d   } t |  | |  j  S(   Ns4   arg must be pandas.Series, dict or callable. Got {0}s   map-c         ` s4   i  |  ]* \ } } t  j |    f  | f  q S(    (   R$   t   map(   R  RY   R!  (   t   argt	   na_actionRi   (    s2   lib/python2.7/site-packages/dask/dataframe/core.pys
   <dictcomp>  s   	 R_   R  t   udfR   (   RK   R  R  RF   R1   Rc   Rd   Re   R/   R  Ro   R5   R`   R  R  R$   R  RW   R=   R   R   RS   Rk   (   Rg   R  R  Rj   Rh   R   (    (   R  R  Ri   s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    	$$c         C` s   |  j  t j  S(   N(   R   R$   t   dropna(   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    c         C` s"   |  j  t j d | d | d | S(   Nt   leftt   rightt	   inclusive(   R   R$   t   between(   Rg   R  R  R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    c         C` s7   | d  k	 r t d   n  |  j t j d | d | S(   Ns   'out' must be Nonet   lowert   upper(   R   R   R   R$   t   clip(   Rg   R  R  R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    c         C` s   |  j  t j d | S(   Nt	   threshold(   R   R$   t
   clip_lower(   Rg   R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    c         C` s   |  j  t j d | S(   NR  (   R   R$   t
   clip_upper(   Rg   R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    R  c         C` s(   t  t |   j | d | d | d | S(   NR  R1  R  (   R  RS   R  (   Rg   R   R  R1  R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    !c         C` s   |  j  t j | | d | S(   NR  (   R   R$   R  (   Rg   R   R   R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    c         C` s   |  S(   N(    (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   squeeze  s    c         C` s   |  j  t j |  S(   N(   R   R$   R  (   Rg   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    c         C` s   d d l  m } | |  |  S(   s!    Create a Dask Bag from a Series i   (   t   to_bag(   RL  R  (   Rg   R   R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    c         C` s%   |  j  t j | d |  j j |  S(   NRj   (   R   R$   t   to_frameRf   (   Rg   Ri   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    c         C` s   |  j    j d |  S(   NR   (   R   R   (   Rg   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR     s    c         ` s>   d d d   f d  } t   j  | _ t |  | |  d S(   s4    bind operator method like Series.add to this class i    c      
   ` sj   | d  k	 r t d   n  |  j |  } t   |  | d | d | } t   |  | d | d | d | S(   Ns   level must be NoneR1  R  Rj   (   R   R   R6  R  R   (   Rg   R   t   levelR  R1  Rj   (   R   (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   meth  s    N(   R   R)   R   R   (   R   Ri   R   R  (    (   R   s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    c         ` s>   d d d   f d  } t   j  | _ t |  | |  d S(   s5    bind comparison method like Series.eq to this class i    c         ` sx   | d  k	 r t d   n  |  j |  } | d  k rL t   |  | d | St   d | } t | |  | d | Sd  S(   Ns   level must be NoneR1  R  (   R   R   R6  R   R   (   Rg   R   R  R  R1  R   (   t
   comparison(    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    N(   R   R)   R   R   (   R   Ri   R   R  (    (   R   s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   _bind_comparison_method  s    
c         K` sq   | t  k rO t t j |  j | d | d | d t | } t j t |   n  t	 t j |  | | | d | | S(   s&   Parallel version of pandas.Series.apply

        Parameters
        ----------
        func : function
            Function to apply
        convert_dtype : boolean, default True
            Try to find better dtype for elementwise function results.
            If False, leave as dtype=object.
        $META
        args : tuple
            Positional arguments to pass to function in addition to the value.

        Additional keyword arguments will be passed as keywords to the function.

        Returns
        -------
        applied : Series or DataFrame if func returns a Series.

        Examples
        --------
        >>> import dask.dataframe as dd
        >>> s = pd.Series(range(5), name='x')
        >>> ds = dd.from_pandas(s, npartitions=2)

        Apply a function elementwise across the Series, passing in extra
        arguments in ``args`` and ``kwargs``:

        >>> def myadd(x, a, b=1):
        ...     return x + a + b
        >>> res = ds.apply(myadd, args=(2,), b=1.5)

        By default, dask tries to infer the output metadata by running your
        provided function on some fake data. This works well in many cases, but
        can sometimes be expensive, or even fail. To avoid this, you can
        manually specify the output metadata with the ``meta`` keyword. This
        can be specified in many forms, for more information see
        ``dask.dataframe.utils.make_meta``.

        Here we specify the output is a Series with name ``'x'``, and dtype
        ``float64``:

        >>> res = ds.apply(myadd, args=(2,), b=1.5, meta=('x', 'f8'))

        In the case where the metadata doesn't change, you can also pass in
        the object itself directly:

        >>> res = ds.apply(lambda x: x + 1, meta=ds)

        See Also
        --------
        dask.Series.map_partitions
        t   convert_dtypeRX   R  Rj   (
   R  R  R$   R   Rv   RW   R+  R,  t   meta_warningR   (   Rg   R   R"  Rj   RX   t   kwds(    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR     s    7c         C` s_   d d l  m } t | t  s. t d   n  | |  | g d d } t | | d t d | S(   Ni   (   RV   s%   other must be a dask.dataframe.SeriesR1  Rl  R   (   R  RV   RK   RS   Rc   t   cov_corrRW   (   Rg   R   RU  R   RV   R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   cov.	  s
    t   pearsonc      	   C` s   d d l  m } t | t  s. t d   n  | d k rI t d   n  | |  | g d d } t | | d t d t d	 | S(
   Ni   (   RV   s%   other must be a dask.dataframe.SeriesR'  s-   Only Pearson correlation has been implementedR1  t   corrRl  R   (   R  RV   RK   RS   Rc   R   R%  RW   (   Rg   R   R   RU  R   RV   R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR(  6	  s    c         C` sL   t  | t  s t d   n  |  j | d k r6 |  n |  j |  d | S(   Ns   lag must be an integeri    R   (   RK   R   Rc   R(  R[  (   Rg   t   lagR   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   autocorrC	  s    'c         C` s4   |  j  t j d | d | } t t  | j    S(   NR   t   deep(   R   R$   t   memory_usageR2   R   R   (   Rg   R   R+  R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR,  J	  s    N(    (F   R   R   R   RR   RS   t   _partition_typeR   RF   R   R_  R   R   R   Ri   R  RC  R   Rw   R	   R  R  R   R   R  R   R   R   R  R    R  R   R  R  R  R  Rt  R  R   R6  R  Rr  R   R  R  R
  R  R  R>   R  R  R  RW   R  R  R  R  R  R  R  R  R  R  R   R  R!  R   R&  R(  R*  R,  (    (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRS   \  s   					=	?R   c           B` sa  e  Z e j Z e e  Z d  Z d d d d d d d d d	 d
 d d d d d h Z	 d d d d d d d d d d d d d d h Z
 d   Z d   Z e d     Z d- d!  Z d" e d#  Z e e j  e d$   Z e e j  e d%   Z e d&  Z e e j  d' d- d(   Z e e j  d)    Z e e j d* d+ g e d- d,   Z RS(.   s   index-t
   nanosecondt   microsecondt   millisecondt	   dayofyeart   minutet   hourt   dayt	   dayofweekt   secondt   weekt   weekdayt
   weekofyeart   montht   quartert   yeart   knownt   as_knownt
   as_unknownt   add_categoriesR  t   remove_categoriest   reorder_categoriest
   as_orderedt   codest   remove_unused_categoriest   set_categoriest   as_unorderedt   orderedt   rename_categoriesc         C` sa   t  |  j  r. | |  j k r. t |  j |  S| |  j k rM t |  j |  St d |   d  S(   Ns"   'Index' object has no attribute %r(   RA   Rw   t   _cat_attributesR   R  t   _dt_attributesR  t   AttributeError(   Rg   Rn   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   __getattr__`	  s
    c         C` sK   t  t |   j   } | j |  j  t |  j  rG | j |  j  n  | S(   N(   R  R   R   t   extendRK  RA   Rw   RJ  (   Rg   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR   g	  s
    c         C` s%   d } t  | j |  j j    d  S(   Ns%   '{0}' object has no attribute 'index'(   RL  Rd   R   R   (   Rg   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR   n	  s    c         C` s   t  j | d |  j S(   NRi   (   RR   R   Ri   (   Rg   R   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR   s	  s    i   c         C` s   d | |  j  f } i t j |  j  d f t d |  f | d f 6} t j | | d |  g } t | | |  j |  j d   } | r | j	   } n  | S(   s\    First n items of the Index.

        Caveat, this only checks the first partition.
        s
   head-%d-%si    R_   i   (
   Rb   R  R   R   R5   R`   R   Rf   Rk   R   (   Rg   R   R   Ri   Rh   R   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  v	  s    1c         C` s2   |  j  t j d |  j j   d |  j d d | S(   NRj   R   R   R   (   R   R$   R   Rv   R_  (   Rg   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR   	  s    c         C` s2   |  j  t j d |  j j   d |  j d d | S(   NRj   R   R   R   (   R   R$   R   Rv   R_  (   Rg   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR   	  s    c      	   C` s(   |  j  t j t j d d d t d | S(   NR   s   index-countRj   R   (   R   R6   t   index_countRM   R   R   (   Rg   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRr  	  s    i   c         C` s   t  |  j t j  rl | d  k	 r0 t d   n  |  j j |  } |  j t	 j | d | d d d t
 } nE |  j j | d | } |  j t	 j | d d d | d | d t
 } | d  k r | j } n  t | | d | S(   Ns*   PeriodIndex doesn't accept `freq` argumentRj   R   R[  R\  R*  (   RK   Rf   RR   t   PeriodIndexR   R   Rv   R[  R   R$   R   R*  R]  (   Rg   RZ  R*  Rj   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR[  	  s    	c         C` s   |  j  t j d |  j j   S(   NRj   (   R   R$   t	   to_seriesRf   (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRQ  	  s    t   ua_argsR   c         C` s   | s t     n  t d k rI |  j t j | | d |  j j | |  S| d  k	 rm t d j t    n |  j t j d |  j j   Sd  S(   Ns   0.24.0Rj   sN   The 'name' keyword was added in pandas 0.24.0. Your version of pandas is '{}'.(	   R   RC   R   R$   R  Rf   R   R   Rd   (   Rg   R   Ri   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  	  s    	N(   R   R   RR   R   R-  R   RG   R   R_  RK  RJ  RM  R   R   R   R   R   RW   R  R    R   R   R   Rr  R[  RQ  R  (    (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR   P	  s4   							R   c           B` s'  e  Z d  Z e j Z e e  Z d Z	 dP d  Z e d    Z e j d    Z e d    Z d   Z d   Z d   Z d	   Z d
   Z d   Z d   Z e d    Z e d    Z e d    Z e e j  d    Z e e j  d    Z e e j  dP dP d   Z e e dP dP e d  Z  e e j  d dP dP d   Z! e e j  d dP dP d   Z" e e j  dP d   Z# e$ e%  dP dP dP d   Z% e e j  d    Z& e e j d d g dP dP d   Z' d   Z( e e j  dP d   Z) e e j  d dP dP d    Z* e e j  dP dP dP d!   Z+ e e j  d"    Z, e e j  d#    Z- e e j  dP d$   Z. e e j  dP d% d& d'   Z/ e d(  Z0 d)   Z1 e e j  d d*   Z2 d dP d+  Z3 e4 d& d,   Z5 e e j  d& d- d.   Z6 d/ dP dP dP e e dQ e dP dP d2 
 Z7 e e j  dP d3 d4 d4 dP dP d5   Z8 e e j  e d6   Z9 e e j  d7    Z: e e j  e d8 d9   Z; e4 d:    Z< e4 d;    Z= e> d< d=  d& dP e dP dR e? d>   Z@ e e j  d? d@   ZA e e j  d& dA   ZB e e j  dP e dB   ZC e e j  dC dP e dD   ZD dP e e dE  ZE e e j  e e dF   ZF dP dP dP dG dH  ZG e dI  ZH e e j  d dJ   ZI dK   ZJ dL ZK dM   ZL dN   ZM dO   ZN RS(S   sc  
    Parallel Pandas DataFrame

    Do not use this class directly.  Instead use functions like
    ``dd.read_csv``, ``dd.read_parquet``, or ``dd.from_pandas``.

    Parameters
    ----------
    dsk: dict
        The dask graph to compute this DataFrame
    name: str
        The key prefix that specifies which keys in the dask comprise this
        particular DataFrame
    meta: pandas.DataFrame
        An empty ``pandas.DataFrame`` with names, dtypes, and index matching
        the expected output.
    divisions: tuple of index values
        Values along which we partition our blocks on the index
    s
   dataframe-c         C` s   t  | t  ro t |  d k ro t  | d d t j  r[ | d d j d k r[ d  } qo | d d j } n  t j	 | d | d |  j
 S(   Ni    i   R   RD  (    (   RK   R   RU   RM   RN   R   R   R   RR   R   RD  (   Rg   R   R   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR   	  s
    !1	c         C` s
   |  j  j S(   N(   Rf   RD  (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRD  	  s    c         C` s7   t  |  |  } | j |  _ | j |  _ | j |  _ d  S(   N(   R  Rf   Rb   Ra   (   Rg   RD  R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRD  	  s    c         C` s   d d l  m } | |   S(   sL  Purely integer-location based indexing for selection by position.

        Only indexing the column positions is supported. Trying to select
        row positions will raise a ValueError.

        See :ref:`dataframe.indexing` for more.

        Examples
        --------
        >>> df.iloc[:, [2, 0, 1]]  # doctest: +SKIP
        i   (   t   _iLocIndexer(   R  RS  (   Rg   RS  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   iloc	  s    c   	      C` s  d t  |  |  } t j |  s7 t | t t f  r t |  j j t j	 t j
 f  rx | |  j j k rx |  j | Sn  |  j t |  } t t j | |  |  } t j | | d |  g } t | | | |  j  St | t  rPd d l m } t d   | j | j | j f D  } | rB| |  j j  rB|  j | qP|  j | Sn  t | t j t f  st  |  rt! |  st" |  r|  j t |  } t t j | |  |  } t j | | d |  g } t | | | |  j  St | t#  r|  j | j k r5d d l$ m% } | |  | g  \ }  } n  t t j | |  |  } t j | | d |  | g } t | | |  |  j  St& |   d  S(   Ns
   getitem-%sR_   i    (   t   is_float_dtypec         s` s   |  ] } t  | t  Vq d  S(   N(   RK   R   (   R  RY   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pys	   <genexpr>
  s   i   (   t   _maybe_align_partitions('   R/   RM   R  RK   R   R   Rf   R   RR   t   DatetimeIndexRP  RD  R  t   _extract_metaR  R  R   R5   R`   R   Rk   R   R  RU  Re  R  t   stept   stopRw   RT  RN   R}   R1   RF   RG   RS   R  RV  R   (	   Rg   Rn   Ri   Rj   Rh   R   RU  t   is_integer_sliceRV  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  	  s<    $!	%c         ` s   t  | t t f  rR t    t  rR |  j   f d   t |   j  D   } nf t  | t j  r t    t  r t |  } |  j   f d   | D   } n |  j i   | 6  } | j	 |  _	 | j
 |  _
 | j |  _ | j |  _ d  S(   Nc         ` s#   i  |  ] \ } }   | |  q S(    (    (   R  R!  R  (   R   (    s2   lib/python2.7/site-packages/dask/dataframe/core.pys
   <dictcomp>)
  s   	c         ` s   i  |  ] }   |  q S(    (    (   R  R!  (   R   (    s2   lib/python2.7/site-packages/dask/dataframe/core.pys
   <dictcomp>.
  s   	 (   RK   R   R}   R   t   assignt   zipRD  RR   R   Ra   Rb   Rf   Rk   (   Rg   Rn   R   R  (    (   R   s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   __setitem__'
  s    $""c         C` s@   |  j  | g d d } | j |  _ | j |  _ | j |  _ d  S(   NR1  i   (   R   Ra   Rb   Rf   (   Rg   Rn   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   __delitem__7
  s    c         C` sc   y t  j |  d  j } Wn t k
 r2 d } n X| | k rL | |  | <n t  j |  | |  d  S(   NRf   (    (   R#  t   __getattribute__RD  RL  t   __setattr__(   Rg   Rn   R   RD  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRa  =
  s    
c         C` s+   | |  j  k r |  | St d |   d  S(   Ns&   'DataFrame' object has no attribute %r(   RD  RL  (   Rg   Rn   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRM  H
  s    c         C` sL   t  t t |     } | j |  j  | j d   |  j D  t |  S(   Nc         s` s<   |  ]2 } t  | t j j  r t j j |  r | Vq d  S(   N(   RK   RR   t   compatR   t   isidentifier(   R  R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pys	   <genexpr>Q
  s    (   Rx   Ry   Re   Rz   R{   RD  R}   (   Rg   R~   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR   N
  s    c         C` s   |  j  j   S(   N(   RD  R$  (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   _ipython_key_completions_V
  s    c         C` s   d S(   s    Return dimensionality i   (    (   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRC  Y
  s    c         C` s2   t  |  j  } t t  |  j |  } | | f S(   sB  
        Return a tuple representing the dimensionality of the DataFrame.

        The number of rows is a Delayed result. The number of columns
        is a concrete integer.

        Examples
        --------
        >>> df.size  # doctest: +SKIP
        (Delayed('int-07f06075-5ecc-4d77-817e-63c69a9188a8'), 2)
        (   RU   RD  R2   R   R   (   Rg   t   col_sizet   row_size(    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR   ^
  s    c         C` s
   |  j  j S(   s    Return data types (   Rf   t   dtypes(   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRg  o
  s    c         C` s   |  j  j   S(   N(   Rf   t   get_dtype_counts(   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRh  t
  s    c         C` s   |  j  j   S(   N(   Rf   t   get_ftype_counts(   Rg   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRi  x
  s    c         C` s,   |  j  j d | d |  j } |  t |  S(   Nt   includet   exclude(   Rf   t   select_dtypesRD  R}   (   Rg   Rj  Rk  t   cs(    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRl  |
  s    c      
   K` s   | r t  d   n  | } ~ | d k	 r7 t |  n  | ri d d l m }	 |	 |  | d | d | | Sd d l m }
 |
 |  | d | d | d | | Sd S(	   s   Set the DataFrame index (row labels) using an existing column

        This realigns the dataset to be sorted by a new column.  This can have a
        significant impact on performance, because joins, groupbys, lookups, etc.
        are all much faster on that column.  However, this performance increase
        comes with a cost, sorting a parallel dataset requires expensive shuffles.
        Often we ``set_index`` once directly after data ingest and filtering and
        then perform many cheap computations off of the sorted dataset.

        This function operates exactly like ``pandas.set_index`` except with
        different performance costs (it is much more expensive).  Under normal
        operation this function does an initial pass over the index column to
        compute approximate qunatiles to serve as future divisions.  It then passes
        over the data a second time, splitting up each input partition into several
        pieces and sharing those pieces to all of the output partitions now in
        sorted order.

        In some cases we can alleviate those costs, for example if your dataset is
        sorted already then we can avoid making many small pieces or if you know
        good values to split the new index column then we can avoid the initial
        pass over the data.  For example if your new index is a datetime index and
        your data is already sorted by day then this entire operation can be done
        for free.  You can control these options with the following parameters.

        Parameters
        ----------
        df: Dask DataFrame
        index: string or Dask Series
        npartitions: int, None, or 'auto'
            The ideal number of output partitions.   If None use the same as
            the input.  If 'auto' then decide by memory use.
        shuffle: string, optional
            Either ``'disk'`` for single-node operation or ``'tasks'`` for
            distributed operation.  Will be inferred by your current scheduler.
        sorted: bool, optional
            If the index column is already sorted in increasing order.
            Defaults to False
        divisions: list, optional
            Known values on which to separate index values of the partitions.
            See https://docs.dask.org/en/latest/dataframe-design.html#partitions
            Defaults to computing this with a single pass over the data. Note
            that if ``sorted=True``, specified divisions are assumed to match
            the existing partitions in the data. If this is untrue, you should
            leave divisions empty and call ``repartition`` after ``set_index``.
        inplace : bool, optional
            Modifying the DataFrame in place is not supported by Dask.
            Defaults to False.
        compute: bool
            Whether or not to trigger an immediate computation. Defaults to False.

        Examples
        --------
        >>> df2 = df.set_index('x')  # doctest: +SKIP
        >>> df2 = df.set_index(d.x)  # doctest: +SKIP
        >>> df2 = df.set_index(d.timestamp, sorted=True)  # doctest: +SKIP

        A common case is when we have a datetime column that we know to be
        sorted and is cleanly divided by day.  We can set this index for free
        by specifying both that the column is pre-sorted and the particular
        divisions along which is is separated

        >>> import pandas as pd
        >>> divisions = pd.date_range('2000', '2010', freq='1D')
        >>> df2 = df.set_index('timestamp', sorted=True, divisions=divisions)  # doctest: +SKIP
        s%   The inplace= keyword is not supportedi   (   t   set_sorted_indexR   Rk   (   t	   set_indexR   N(   R   R   t   check_divisionst   shuffleRn  Ro  (   Rg   R   R   t   sortedR   Rk   R  R   t
   pre_sortedRn  Ro  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRo  
  s    Ci   c         C` sC   d } t  |  d t j d t j d |  j d | d | d | d | S(	   Ns   dataframe-nlargestR   R   Rj   R   R   R   RD  (   R   R$   R
  Rf   (   Rg   R   RD  R   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR
  
  s    c         C` sC   d } t  |  d t j d t j d |  j d | d | d | d | S(	   Ns   dataframe-nsmallestR   R   Rj   R   R   R   RD  (   R   R$   R  Rf   (   Rg   R   RD  R   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  
  s    c         K` s#   d d l  m } | |  d | | S(   Ni    (   t   DataFrameGroupByR   (   R  Rt  (   Rg   R   R   Rt  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  
  s    c      	   K` s   t  |  d | d | d | | S(   NRD  R   R   (   R:   (   Rg   RD  R   R   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR:   
  s    c         K` s   x | j    D] \ } } t | t  p^ t |  p^ t |  p^ t j j j |  p^ t	 |  s t
 d j t |  j    n  t |  r | |   | | <q q Wt t | j    d   } |  j j t | d t   } t t j |  d | | S(   Ns*   Column assignment doesn't support type {0}t   nonemptyRj   (    (   R  RK   R^   RF   R  RR   t   apit   typesR  RG   Rc   Rd   Re   R   R}   R   Rv   R\  RX  RW   R   R6   (   Rg   R   R!  R~  t   pairst   df2(    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR\  
  s    !	RR  R   c         C` s4   | d  k	 r t d   n  |  j t j d  d | S(   Ns   Cannot rename index.RD  (   R   R   R   R$   R  (   Rg   R   RD  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  
  s    c         K` s   |  j  t j | |  S(   s   Filter dataframe with complex expression

        Blocked version of pd.DataFrame.query

        This is like the sequential version except that this will also happen
        in many threads.  This may conflict with ``numexpr`` which will use
        multiple threads itself.  We recommend that you set numexpr to use a
        single thread

            import numexpr
            numexpr.set_nthreads(1)

        See also
        --------
        pandas.DataFrame.query
        (   R   R$   t   query(   Rg   t   exprR   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRz    s    c         K` s   | d  k r t } n  d | k rB | t d  f k rB t d   n  |  j j | d | | } |  j t j | d | d | | S(   Nt   =s4   Inplace eval not supported. Please use inplace=FalseR  Rj   (   R   R   RW   R   Rf   t   evalR   R$   (   Rg   R{  R  R   Rj   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR}    s    	Re  c         C` s"   |  j  t j d | d | d | S(   NR  R   t   thresh(   R   R$   R  (   Rg   R  R   R~  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  #  s    c         C` s7   | d  k	 r t d   n  |  j t j d | d | S(   Ns   'out' must be NoneR  R  (   R   R   R   R$   R  (   Rg   R  R  R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  '  s    c         C` s   |  j  t j d | S(   NR  (   R   R$   R  (   Rg   R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  -  s    c         C` s   |  j  t j d | S(   NR  (   R   R$   R  (   Rg   R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  1  s    c         C` s   | d k r7 t |  j  d k r0 |  |  j d S|  SnW | d k ra t d j t |      n- | d k r t d j | t |      n  d  S(   Ni   i    s)   {0} does not support squeeze along axis 0s   No axis {0} for object type {1}(   Ni   (   i    i   N(   R   RU   RD  R   Rd   Re   R   (   Rg   R1  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  5  s    		R  i    c         C` s@   t  t j |  | | |  } t t j |  j  j    | _ | S(   N(   R   R$   R  R   RR   R   Rk   (   Rg   R*  R  R1  R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  E  s    !c         C` s   d d l  m } | |  |  S(   s   Convert to a dask Bag of tuples of each row.

        Parameters
        ----------
        index : bool, optional
            If True, the index is included as the first element of each tuple.
            Default is False.
        i   (   R  (   RL  R  (   Rg   R   R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  K  s    	c         O` s#   d d l  m } | |  | | |  S(   s2    See dd.to_parquet docstring for more information i   (   t
   to_parquet(   RL  R  (   Rg   t   pathRX   R   R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  W  s    c         C` s   |  j    j d | d t  S(   NR   R   (   R   R   R   (   Rg   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR   \  s    c         C` s^   |  j  j   } t | j  t |  j  k  rV |  j d } |  j t j d | d | S|  Sd  S(   Ns   -get_numeric_dataRj   R   (   Rf   Rt  RU   RD  R_  R   R$   (   Rg   R  R   t   numericsRi   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRt  b  s    c         C` sI   | d k r$ t d j |    n  i d d  6d d 6d d 6j | |  S(   Ni    i   R   RD  s   No axis named {0}(   i    i   R   RD  N(   R   R   Rd   R   (   R   R1  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR6  n  s    t   raisec         C` sJ   |  j  |  } | d k r: |  j t j | d | d | St d   d  S(   Ni   R1  t   errorss$   Drop currently only works for axis=1(   R6  R   R$   R   R   (   Rg   t   labelsR1  R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR   u  s    t   innert   _xt   _yc         C` st   t  |  s t d   n  d d l m } | |  | d | d | d | d | d | d	 | d
 | d |
 d |	 d | 
S(   s  Merge the DataFrame with another DataFrame

        This will merge the two datasets, either on the indices, a certain column
        in each dataset or the index in one dataset and the column in another.

        Parameters
        ----------
        right: dask.dataframe.DataFrame
        how : {'left', 'right', 'outer', 'inner'}, default: 'inner'
            How to handle the operation of the two objects:
            - left: use calling frame's index (or column if on is specified)
            - right: use other frame's index
            - outer: form union of calling frame's index (or column if on is
              specified) with other frame's index, and sort it
              lexicographically
            - inner: form intersection of calling frame's index (or column if
              on is specified) with other frame's index, preserving the order
              of the calling's one
        on : label or list
            Column or index level names to join on. These must be found in both
            DataFrames. If on is None and not merging on indexes then this
            defaults to the intersection of the columns in both DataFrames.
        left_on : label or list, or array-like
            Column to join on in the left DataFrame. Other than in pandas
            arrays and lists are only support if their length is 1.
        right_on : label or list, or array-like
            Column to join on in the right DataFrame. Other than in pandas
            arrays and lists are only support if their length is 1.
        left_index : boolean, default False
            Use the index from the left DataFrame as the join key.
        right_index : boolean, default False
            Use the index from the right DataFrame as the join key.
        suffixes : 2-length sequence (tuple, list, ...)
            Suffix to apply to overlapping column names in the left and
            right side, respectively
        indicator : boolean or string, default False
            If True, adds a column to output DataFrame called "_merge" with
            information on the source of each row. If string, column with
            information on source of each row will be added to output DataFrame,
            and column will be named value of string. Information column is
            Categorical-type and takes on a value of "left_only" for observations
            whose merge key only appears in `left` DataFrame, "right_only" for
            observations whose merge key only appears in `right` DataFrame,
            and "both" if the observation’s merge key is found in both.
        npartitions: int, None, or 'auto'
            The ideal number of output partitions. This is only utilised when
            performing a hash_join (merging on columns only). If `None`
            npartitions = max(lhs.npartitions, rhs.npartitions)
        shuffle: {'disk', 'tasks'}, optional
            Either ``'disk'`` for single-node operation or ``'tasks'`` for
            distributed operation.  Will be inferred by your current scheduler.

        Notes
        -----

        There are three ways to join dataframes:

        1. Joining on indices. In this case the divisions are
           aligned using the function ``dask.dataframe.multi.align_partitions``.
           Afterwards, each partition is merged with the pandas merge function.

        2. Joining one on index and one on column. In this case the divisions of
           dataframe merged by index (:math:`d_i`) are used to divide the column
           merged dataframe (:math:`d_c`) one using
           ``dask.dataframe.multi.rearrange_by_divisions``. In this case the
           merged dataframe (:math:`d_m`) has the exact same divisions
           as (:math:`d_i`). This can lead to issues if you merge multiple rows from
           (:math:`d_c`) to one row in (:math:`d_i`).

        3. Joining both on columns. In this case a hash join is performed using
           ``dask.dataframe.multi.hash_join``.

        s   right must be DataFramei   (   R   R  t   ont   left_ont   right_ont
   left_indext   right_indext   suffixesR   t	   indicatorRq  (   RE   R   R  R   (   Rg   R  R  R  R  R  R  R  R  R  R   Rq  R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR   |  s    MR  R   c   	      C` sn   t  |  s t d   n  d d l m } | |  | d | d | d  k d t d | d | | g d	 | d
 | S(   Ns   other must be DataFramei   (   R   R  R  R  R  R  R   Rq  (   RE   R   R  R   R   RW   (	   Rg   R   R  R  t   lsuffixt   rsuffixR   Rq  R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    c         C` s^   t  | t  r$ d } t |   n t |  rB | j   j } n  t t |   j | d | S(   NsM   Unable to appending dd.Series to dd.DataFrame.Use pd.Series to append as row.R  (	   RK   RS   R   RF   R  t   TR  R   R   (   Rg   R   R  R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR     s    c         c` sO   xH t  |  j  D]7 } |  j |  j   } x | j   D] } | Vq8 Wq Wd  S(   N(   R   R   R   R   t   iterrows(   Rg   RY   R  t   row(    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    t   Pandasc         c` s[   xT t  |  j  D]C } |  j |  j   } x% | j d | d |  D] } | VqD Wq Wd  S(   NR   Ri   (   R   R   R   R   t
   itertuples(   Rg   R   Ri   RY   R  R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    c         ` sA   d d d    f d  } t  j  | _ t |    |  d S(   s7    bind operator method like DataFrame.add to this class RD  c         ` s   | d  k	 r t d   n  |  j |  } | d	 k r t | t  rc d j    } t |   q t |  r t  |  d | d | d | } t	  |  d | d | d | d | Sn  t  |  | d | d | } t	  |  | d | d | d | S(
   Ns   level must be Nonei   RD  s#   Unable to {0} dd.Series with axis=1R   R1  R  Rj   (   i   RD  (
   R   R   R6  RK   RS   Rd   R   RF   R  R   (   Rg   R   R1  R  R  R   Rj   (   Ri   R   (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    	N(   R   R)   R   R   (   R   Ri   R   R  (    (   Ri   R   s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    c         ` s;   d d   f d  } t   j  | _ t |  | |  d S(   s8    bind comparison method like DataFrame.eq to this class RD  c         ` s@   | d  k	 r t d   n  |  j |  } t   |  | d | S(   Ns   level must be NoneR1  (   R   R   R6  R   (   Rg   R   R1  R  (   R   (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    N(   R   R)   R   R   (   R   Ri   R   R  (    (   R   s2   lib/python2.7/site-packages/dask/dataframe/core.pyR!    s    R   i   c      	   K` s   |  j  |  } i | d 6| d 6| d 6d d 6}	 t d k rP | j d d  n  | j |	  | d k r~ d }
 t |
   n  | t k r t t j	 |  j
 | d	 | d
 t | } t j t |   n  t t j	 |  | d	 | d | | S(   s{   Parallel version of pandas.DataFrame.apply

        This mimics the pandas version except for the following:

        1.  Only ``axis=1`` is supported (and must be specified explicitly).
        2.  The user should provide output metadata via the `meta` keyword.

        Parameters
        ----------
        func : function
            Function to apply to each column/row
        axis : {0 or 'index', 1 or 'columns'}, default 0
            - 0 or 'index': apply function to each column (NOT SUPPORTED)
            - 1 or 'columns': apply function to each row
        $META
        args : tuple
            Positional arguments to pass to function in addition to the array/series

        Additional keyword arguments will be passed as keywords to the function

        Returns
        -------
        applied : Series or DataFrame

        Examples
        --------
        >>> import dask.dataframe as dd
        >>> df = pd.DataFrame({'x': [1, 2, 3, 4, 5],
        ...                    'y': [1., 2., 3., 4., 5.]})
        >>> ddf = dd.from_pandas(df, npartitions=2)

        Apply a function to row-wise passing in extra arguments in ``args`` and
        ``kwargs``:

        >>> def myadd(row, a, b=1):
        ...     return row.sum() + a + b
        >>> res = ddf.apply(myadd, axis=1, args=(2,), b=1.5)

        By default, dask tries to infer the output metadata by running your
        provided function on some fake data. This works well in many cases, but
        can sometimes be expensive, or even fail. To avoid this, you can
        manually specify the output metadata with the ``meta`` keyword. This
        can be specified in many forms, for more information see
        ``dask.dataframe.utils.make_meta``.

        Here we specify the output is a Series with name ``'x'``, and dtype
        ``float64``:

        >>> res = ddf.apply(myadd, axis=1, args=(2,), b=1.5, meta=('x', 'f8'))

        In the case where the metadata doesn't change, you can also pass in
        the object itself directly:

        >>> res = ddf.apply(lambda row: row + 1, axis=1, meta=ddf)

        See Also
        --------
        dask.DataFrame.map_partitions
        R1  t	   broadcastt   rawt   reduces   0.23.0t   result_typei    sE   dd.DataFrame.apply only supports axis=1
  Try: df.apply(func, axis=1)RX   R  Rj   N(   R6  R   RC   t
   setdefaultRz   R   R  R  R$   R   Rv   RW   R+  R,  R#  R   (   Rg   R   R1  R  R  R  RX   Rj   R$  t   pandas_kwargsR   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR   '  s"    ?
RI   c         C` s   t  t j |  | d | S(   NRj   (   R   R$   t   applymap(   Rg   R   Rj   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    c         C` s   t  t j |  |  S(   N(   R   R$   R  (   Rg   R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    c         C` s   t  |  | d | S(   NR   (   R%  (   Rg   RU  R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR&    s    R'  c         C` s1   | d k r t  d   n  t |  | t d | S(   NR'  s-   Only Pearson correlation has been implementedR   (   R   R%  RW   (   Rg   R   RU  R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR(    s    c         C` s  | d k r$ d d l } | j } n  t t |    g } t |  j  d k r | j d  | j d t |   j  t	 | |  d Si  } | r | j
 i |  j d 6|  j   d 6 n  | r | j
 i |  j t j d t d 6 n  t t | j   t j | j       } | r| d } | d } | j t |   | j d j t |  j    d d	 l m }	 t g  |  j D] }
 t |	 |
   ^ q{ d
 } d | } g  t |  j | |  j  D]- } | j |	 | d  | d | d  ^ q} n t |  j d d g } | j |  g  t |  j j   j    d t D] }
 d |
 ^ q@} | j d j d j! |    | r| d j"   } | j d j t# |    n  t	 | |  d S(   s6   
        Concise summary of a Dask DataFrame.
        i    Ns   Index: 0 entriess   Empty %sR   Rr  R,  s    Data columns (total {} columns):(   t   pprint_thingi   s   {!s:<%d} {} non-null {}i   i   Ri   t   ColumnsRn   s   %s(%d)s
   dtypes: {}s   , s   memory usage: {}
($   R   t   syst   stdoutR   Re   RU   RD  R   R   R#   Rz   R   Rr  R   R$   R,  RW   R  R]  RR  RO   R   R7  RD   Rd   t   pandas.io.formats.printingR  R   Rg  RN  Rr  R  R  R  R   R"   (   Rg   t   buft   verboseR,  R  t   linest   computationsR   t   countsR  R!  t   spacet   column_templateR   t   column_infot   dtype_countst
   memory_int(    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   info  sB    ')*

2
L8c         C` s:   |  j  t j d | d | } | j | j  j   } | S(   NR   R+  (   R   R$   R,  R  R   R   (   Rg   R   R+  R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR,    s    Rs  c      
   C` s2   d d l  m } | |  d | d | d | d | S(   s/  
        Create a spreadsheet-style pivot table as a DataFrame. Target ``columns``
        must have category dtype to infer result's ``columns``.
        ``index``, ``columns``, ``values`` and ``aggfunc`` must be all scalar.

        Parameters
        ----------
        values : scalar
            column to aggregate
        index : scalar
            column to be index
        columns : scalar
            column to be columns
        aggfunc : {'mean', 'sum', 'count'}, default 'mean'

        Returns
        -------
        table : DataFrame
        i   (   t   pivot_tableR   RD  R7  t   aggfunc(   t   reshapeR  (   Rg   R   RD  R7  R  R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    c         C` s   d d l  m } | |   S(   Ni   (   t
   to_records(   RL  R  (   Rg   R   R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    c         C` sO   |  j    j d | d t  } |  j j d | d t |  j  d t |  j   S(   NR   R   R   Ri   R   (	   R   t   to_htmlR   t	   _HTML_FMTRd   R%   Rb   RU   Ra   (   Rg   R   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    	!c         C` sV   |  j  } |  j } g  | j   D] \ } } t | d | ^ q } t j | d d S(   NR   R1  i   (   Rf   R   R  R  RR   RV   (   Rg   Rj   R   R&  Rw  t   series_list(    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR     s    		1sg   <div><strong>Dask DataFrame Structure:</strong></div>
{data}
<div>Dask Name: {name}, {task} tasks</div>c         C` sU   |  j    j d d d t d t  } |  j j d | d t |  j  d t |  j	   S(   NR   i   R   t   notebookR   Ri   R   (
   R   R  R   RW   R  Rd   R%   Rb   RU   Ra   (   Rg   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   _repr_html_  s    	!c         C` s{   t  | t  r | n | g } g  | D] } |  j |  r% | ^ q% } |  | } |  j |  rw | j d |  j  } n  | S(   s~  
        Parameters
        ----------
        columns_or_index
            Column or index name, or a list of these

        Returns
        -------
        dd.DataFrame
            Dask DataFrame with columns corresponding to each column or
            index level in columns_or_index.  If included, the column
            corresponding to the index level is named _index
        t   _index(   RK   R}   t   _is_column_label_referenceR  R\  R   (   Rg   R  R   t   column_namest   selected_df(    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   _select_columns_or_index  s    	(
c         C` s8   t  |  o7 t j |  s+ t | t  o7 | |  j k S(   s   
        Test whether a key is a column label reference

        To be considered a column label reference, `key` must match the name of at
        least one column.
        (   R1   RM   R  RK   R   RD  (   Rg   Rn   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    N(   R  R  (    (O   R   R   R   RR   R   R-  R   RE   R   R_  R   R   R   RD  R  RT  R  R^  R_  Ra  RM  R   Rd  RC  R   Rg  R    Rh  Ri  Rl  RW   R   Ro  R
  R  R  R   R:   R\  R  Rz  R}  R  R  R  R  R  R  R  R  R   Rt  R   R6  R   R   R  R   R  R  R  R!  R>   R  R   R  R  R&  R(  R  R,  R  R  R  R   R  R  R  R  (    (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR   	  s   			+							S					U
!V1				R  t   subR  t   divt   truedivt   floordivt   modt   powt   raddt   rsubt   rmult   rdivt   rtruedivt	   rfloordivt   rmodt   rpowt   ltt   gtt   let   get   net   eqc         ` sA   t    t  o@   j d k o@   j o@ t   f d   |  D  S(   sP   
    This Series is broadcastable against another dataframe in the sequence
    i   c         3` sE   |  ]; } t  | t  r   j t | j  t | j  f k Vq d  S(   N(   RK   R   Rk   R   RD  R   (   R  R  (   Rw  (    s2   lib/python2.7/site-packages/dask/dataframe/core.pys	   <genexpr>L  s   (   RK   RS   R   R   Re  (   t   dfsRw  (    (   Rw  s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   is_broadcastableE  s
    	c      
   ` s  | j  d t  } | j  d d  } | j  d t  } t |   d t |  | |  } t |  } d d l m } | |  } g  | D]$ } t	 | t
 t t f  r | ^ q }	 g  |	 D] }
 t	 |
 t
  r |
 ^ q } x t |	  D] \ }   t	   t  sq n  t   f d   | D  s@d t |   } t |   n    j d k r   j d	   t   j d  D      |	 | <q q W| d
 j } | ret	 | d
 t  ret |  d k reyi |  g  | D]. } | | d
 k rt j | j  n | ^ q|   } t	 | t j  r&| j   } n  Wn t k
 r:qeXt |  sed g | d
 j d } qen  t t |  } t t | |   } g  t |  D]0 \ } } t	 | t
 t t f  s| | f ^ q} t |  | | |  } t  j! | | d |	 } | t k rt |  d k rCt d   |	 D  rCd } t" |   n  g  |	 D]3 } | |  skt	 | t  rt| j# n | j$ ^ qJ} t% t |     t& d |  d | |  } Wd QXn  t' | | | |  } t( | |  S(   s   Elementwise operation for Dask dataframes

    Parameters
    ----------
    op: callable
        Function to apply across input dataframes
    *args: DataFrames, Series, Scalars, Arrays,
        The arguments of the operation
    **kwrags: scalars
    meta: pd.DataFrame, pd.Series (optional)
        Valid metadata for the operation.  Will evaluate on a small piece of
        data if not provided.
    transform_divisions: boolean
        If the input is a ``dask.dataframe.Index`` we normally will also apply
        the function onto the divisions and apply those transformed divisions
        to the output.  You can pass ``transform_divisions=False`` to override
        this behavior

    Examples
    --------
    >>> elemwise(operator.add, df.x, df.y)  # doctest: +SKIP
    Rj   R   R\  R   i   (   RV  c         3` s5   |  ]+ }   j  p, t   j  d   | j k Vq d S(   i    N(   RH  RU   R   (   R  R  (   RJ  (    s2   lib/python2.7/site-packages/dask/dataframe/core.pys	   <genexpr>y  s    s[   When combining dask arrays with dataframes they must match chunking exactly.  Operation: %sc         S` s#   i  |  ] \ } } | | d   q S(   i   (    (   R  RY   t   d(    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pys
   <dictcomp>  s   	 i    R_   i   c         s` s   |  ] } t  | d   Vq d S(   R   N(   R|   (   R  R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pys	   <genexpr>  s    s>   elemwise with 2 or more DataFrames and Scalar is not supportedt   functionR   N()   t   popR  R   RW   R!   R/   t   _maybe_from_pandasR  RV  RK   R   R^   R*   R  Rd  R   RC  t   rechunkR   Rk   R   RU   RR   R$  RT   RH   R   R   R  R}   R   R  R5   R`   R   Rf   Rv   R?   R   R   R`  (   R   RX   R   Rj   R   R\  Rb   RV  R  t   dasksR  R  RY   R   Rk   t   _is_broadcastableR   Rh   R   R  R:  R   (    (   RJ  s2   lib/python2.7/site-packages/dask/dataframe/core.pyR   P  s^     1(&+8	*)=!c         C` s  t  |  t  rX t |   d k r. |  d }  qX t |   d k rO t d   qX d }  n  |  d k	 r t |   t |  k r t d t t |    t t |   f   n  t  |  t  rt |  j	  t | j	  k rt
 d t t |  j	   t t | j	   f   qn  t  |  t t t f  rq| j |  _ | j |  _ | j |  _ t  |  t  s| j |  _ qnA |  d k	 rd t |   j t |  j f } t |   n | Sd S(   s    Handle out parameters

    If out is a dask.DataFrame, dask.Series or dask.Scalar then
    this overwrites the contents of it with the result
    i   i    s(   The out parameter is not fully supportedsD   Mismatched types between result and out parameter. out=%s, result=%ssL   Mismatched columns count between result and out parameter. out=%s, result=%ssH   The out parameter is not fully supported. Received type %s, expected %s N(   RK   R   RU   R   R   Re   Rc   R   R   RD  R   RS   R^   Rf   Rb   Ra   Rk   R   (   R   R   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR`    s4    	$+4c         C` sa   d d l  m } g  |  D]@ } t |  s5 t |  rQ t |  rQ | | d  n | ^ q }  |  S(   Ni   (   t   from_pandas(   RL  R  RF   RE   R1   (   R  R  R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    Jc         ` su   | r |   | p i    n    t   d t  t   rN  j  n   | ;    f d   t |  D S(   NR   c         ` s&   i  |  ] }   j   | k |  q S(    (   RT  (   R  RY   (   R  t   h(    s2   lib/python2.7/site-packages/dask/dataframe/core.pys
   <dictcomp>  s   	 (   R
   R   RF   t   _valuesR   (   R  t   npartsR   R   (    (   R  R  s2   lib/python2.7/site-packages/dask/dataframe/core.pyt
   hash_shard  s    
c         ` sE   t  j d t    | d  j t      f d   t |  D S(   s,    Split dataframe into k roughly equal parts i    i   c         ` s/   i  |  ]% }   j   |  | d  !|  q S(   i   (   RT  (   R  RY   (   R  Rk   (    s2   lib/python2.7/site-packages/dask/dataframe/core.pys
   <dictcomp>  s   	 (   RM   t   linspaceRU   R  R   R   (   R  R!  (    (   R  Rk   s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   split_evenly  s    (c         C` s=   |  j  } t | t j  r9 t j g  d | j   } n  | S(   NR   (   R   RK   RR   t
   MultiIndexR   R   (   R  R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR	    s    	c         C` s   |  | S(   N(    (   R  R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR     s    c          ` st   d k r t    n  | d k r0 t   } n   j |  | j |  | d k rz | rk t d   n  | } | } n% | d k r t   } n  | j |  t  t t f  s  g  n  g   D] } t | t  r | ^ q } t d   | D  } t	 |  d k rt d   n  | j
   } |	 d k r@d }	 n@ |	 t k rU| }	 n+ |	 d k  sqt |	 t  rt d   n  t | p | f |   | | |	 |
 | | 
 } d j | pt   |    t	   d k r)t  d	 t  r) r)   f d
   t  d	 j    D } n%      f d   t |  D } |
 r|
 d k rd | } d | } x{ t |  D]m } t   d	 | d	 f |
 | | f | | | f <x9 t |
  D]+ } t | | f | f | | d	 | | f <qWqW|   n d }
 d j | pt |  |  } | } d	 } x | |	 k rx t t |	 t |    D] \ } } x t |
  D] } t g  | D] }   | | | f ^ qf } | rt | | g | f | | | d | | f <qo| | f | | | d | | f <qoWqVW| d } |   | d 7} q.Wx t |
  D] } d j | p@t |  |  } t g  t |  D] }   | | | f ^ qYf } | rt | | g | f | | | f <q%| | f | | | f <q%W| t k rt  d t   } t | t | g  d t | } n  t | d | r/t t | d	  d d  n d } t j | | d | } d g |
 d } t | | | |  S(   s   Apply a function to blocks, then concat, then apply again

    Parameters
    ----------
    args :
        Positional arguments for the `chunk` function. All `dask.dataframe`
        objects should be partitioned and indexed equivalently.
    chunk : function [block-per-arg] -> block
        Function to operate on each block of data
    aggregate : function concatenated-block -> block
        Function to operate on the concatenated result of chunk
    combine : function concatenated-block -> block, optional
        Function to operate on intermediate concatenated results of chunk
        in a tree-reduction. If not provided, defaults to aggregate.
    $META
    token : str, optional
        The name to use for the output keys.
    chunk_kwargs : dict, optional
        Keywords for the chunk function only.
    aggregate_kwargs : dict, optional
        Keywords for the aggregate function only.
    combine_kwargs : dict, optional
        Keywords for the combine function only.
    split_every : int, optional
        Group partitions into groups of this size while performing a
        tree-reduction. If set to False, no tree-reduction will be used,
        and all intermediates will be concatenated and passed to ``aggregate``.
        Default is 8.
    split_out : int, optional
        Number of output partitions. Split occurs after first chunk reduction.
    split_out_setup : callable, optional
        If provided, this function is called on each chunk before performing
        the hash-split. It should return a pandas object, where each row
        (excluding the index) is hashed. If not provided, the chunk is hashed
        as is.
    split_out_setup_kwargs : dict, optional
        Keywords for the `split_out_setup` function only.
    kwargs :
        All remaining keywords will be passed to ``chunk``, ``aggregate``, and
        ``combine``.

    Examples
    --------
    >>> def chunk(a_block, b_block):
    ...     pass

    >>> def agg(df):
    ...     pass

    >>> apply_concat_apply([a, b], chunk=chunk, aggregate=agg)  # doctest: +SKIP
    s+   `combine_kwargs` provided with no `combine`c         s` s   |  ] } | j  Vq d  S(   N(   R   (   R  R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pys	   <genexpr>G  s    i   s1   All arguments must have same number of partitionsi   i   s#   split_every must be an integer >= 2s   {0}-chunk-{1}i    c         ` s1   i  |  ]' \ } }  | f   d  | d  f  q S(   i    (    (   R  RY   Rn   (   RJ  R   (    s2   lib/python2.7/site-packages/dask/dataframe/core.pys
   <dictcomp>[  s   	c      	   ` sb   i  |  ]X } t   g   D]* } t | t  r= | j | f n | ^ q  f   d  | d  f  q S(   i    (   R   RK   R   Rb   (   R  RY   R   (   RJ  RX   R   R  (    s2   lib/python2.7/site-packages/dask/dataframe/core.pys
   <dictcomp>^  s   	s   split-%ss   shard-%ss   {0}-combine-{1}s   {0}-agg-{1}R  R   R_   N(    R   R  Rz   R   RK   R   R}   R   Rx   RU   R  R   R   R/   Rd   R!   R  Ro   R   R  R   R   R[   R   R  R  RW   R=   R   R5   R`   R   (    RX   R   R   R  Rj   R   R  R  R  R   R   R   R   R   R  R  R   t	   token_keyRh   t   split_prefixt   shard_prefixRY   R  R  R!  t   deptht   part_it   indst   conct
   meta_chunkR   Rk   (    (   RJ  RX   R   R  s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   apply_concat_apply  s    9	(					,

-	(+,(
1"	+c         C` s   t  |  t t f  r) | r" |  j S|  j St  |  t  rX g  |  D] } t | |  ^ q? St  |  t  r t g  |  D] } t | |  ^ qq  St  |  t  r i  } x% |  D] } t |  | |  | | <q W| St  |  t	  r t
 d   n |  Sd S(   sO   
    Extract internal cache data (``_meta``) from dd.DataFrame / dd.Series
    s>   Cannot infer dataframe metadata with a `dask.delayed` argumentN(   RK   R^   R   Rv   Rf   R}   RX  R   R  R3   R   (   R   Ru  R  R  R!  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRX    s     &c      	   O` sN   t  t |   d | j d t  $ |  t | t  t | t    SWd QXd S(   s   
    Apply a function using args / kwargs. If arguments contain dd.DataFrame /
    dd.Series, using internal cache (``_meta``) for calculation
    R  N(   R?   R!   R  R   RX  RW   (   R   RX   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    %c         O` s1  | j  d t  } | j  d d  } | j  d t  } t |   sH t  | d k	 ri t | | |  } n! t |   } t |  | | |  } d j | |  } d d l	 m
 } t |  } | |  } g  | D] } t | t  r | ^ q }	 |	 rt t |	 d  d d  n d }
 | t k r;t |  d	 t | | } n t | d |
 } t d
   | D  ri t |  t g  | D] } | j d f ^ qvf | f | d f 6} t j | | d | } t | | |  St |  pt |  st t | g  d |
 } n  t |  } g  } g  } x | D] } t | t  rV| j |  | j |  q!n  t |  } t |  \ } } | r| j |  | j |  q!| j |  q!Wi  } xO | j   D]A \ } } t |  } t |  \ } } | j |  | | | <qWt  t! | d | d |  d | | | } |	 d j" } | rt |	 d t#  rt$ |	  d k ryi |  g  | D]. } | |	 d k rt% j# | j"  n | ^ ql|   } t | t% j#  r| j&   } n  Wn t' k
 rqXt( |  sd g |	 d j) d } qn  t j | | d | } t* | | | |  S(   s   Apply Python function on each DataFrame partition.

    Parameters
    ----------
    func : function
        Function applied to each partition.
    args, kwargs :
        Arguments and keywords to pass to the function.  At least one of the
        args should be a Dask.dataframe. Arguments and keywords may contain
        ``Scalar``, ``Delayed`` or regular python objects. DataFrame-like args
        (both dask and pandas) will be repartitioned to align (if necessary)
        before applying the function.
    $META
    Rj   R   R\  s   {0}-{1}i   (   RV  i    R   R  c         s` s   |  ] } t  | t  Vq d  S(   N(   RK   R^   (   R  R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pys	   <genexpr>  s    R_   t   _funcRf   N(+   R  R  R   RW   R  t   AssertionErrorR/   R!   Rd   R  RV  R  RK   R   R   R=   R  Rd  R   R   Rb   R5   R`   R^   RQ   R'   R[   R   R+   R4   RN  R  R  t   apply_and_enforceRk   R   RU   RR   R$  RT   RH   R   R   (   R   RX   R   Rj   Ri   R\  R   RV  R  R  t
   meta_indexR  R   R   RZ   R_   t   arg2t   collectionst   kwargs3R!  R~  Rh   Rk   RJ  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR     s    ((>	+8	c          O` s   | j  d  } | j  d  } | |  |   } t |  sQ t |  sQ t |  r t |  sa | St |  r t j t j | j  t j | j   s t	 d   q | j } n	 | j
 } t | |  S| S(   ss   Apply a function, and enforce the output to match meta

    Ensures the output has the same columns, even if empty.R  Rf   sR   The columns in the computed data do not match the columns in the provided metadata(   R  RE   RF   RG   RU   RM   t   array_equalt
   nan_to_numRD  R   Ri   t   _rename(   RX   R   R   Rj   R  R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  !  s    $	c         C` sW  t  | t  s t  |  t k r& | St  |  t  rD t |   }  n  t |  r t |   rh |  j }  n  t  |  t j	  s t j	 |   }  n  t
 |   t
 | j  k r t |   t | j  k r |  j | j  r | S| j d t  } |  | _ | St |  st |  rSt |   s't |   r3|  j }  n  | j |  k rF| S| j |   S| S(   su  
    Rename columns of pd.DataFrame or name of pd.Series.
    Not for dd.DataFrame or dd.Series.

    Parameters
    ----------
    columns : tuple, string, pd.DataFrame or pd.Series
        Column names, Series name or pandas instance which has the
        target column names / name.
    df : pd.DataFrame or pd.Series
        target DataFrame / Series to be renamed
    R+  (   RK   R   R  R  R   R}   RE   RD  RR   R   RU   Re   t   equalsR   R   RF   RG   Ri   R  (   RD  R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  9  s0    	c         C` s   t  |  t  s t  t | |  j  } d j t |  |   } t t | | |   } t j	 | | d |  g } t
 | | | |  j  S(   s  
    Destructively rename columns of dd.DataFrame or name of dd.Series.
    Not for pd.DataFrame or pd.Series.

    Internaly used to overwrite dd.DataFrame.columns and dd.Series.name
    We can't use map_partition because it applies function then rename

    Parameters
    ----------
    df : dd.DataFrame or dd.Series
        target DataFrame / Series to be renamed
    names : tuple, string
        Column names/Series name
    s
   rename-{0}R_   (   RK   R   R  R  Rf   Rd   R/   R  R5   R`   R   Rk   (   R  t   namest   metadataRi   Rh   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  f  s    R  c         ` sm  t  j   } | j d k r7 | j d d  |  n  t |  t  sL t  d d d g } | | k rv t d   n  | d k r d } n | } t |  t  r t	 j |  j
  j   } n |  j
 j   } t |  r |  j    f d   } t } n d	   } t }  g  t  j   d
  t |    }	 t   d k rd |	  t	 j g  d t }
 t i t	 j g  d |  j d |
  d f 6 |  j d d g  St  j   t  j   g } |  j   }  | d k rt  j |  j t  j  st  j |  j t  j  rd d l m } | d d  d d l m  m  } d |	    f d   t! |  j"    D } d |	 } i | |  t# |  f  | d f 6} n d d l m$   m% } d |	      f d   t! |  j"    D } d |	 } i | |   g |  j& t# |  f  | d f 6} t' | |  } t( j) | | d |  g } | | | | |  S(   s  Approximate quantiles of Series.

    Parameters
    ----------
    q : list/array of floats
        Iterable of numbers ranging from 0 to 100 for the desired quantiles
    method : {'default', 'tdigest', 'dask'}, optional
        What method to use. By default will use dask's internal custom
        algorithm (``'dask'``).  If set to ``'tdigest'`` will use tdigest for
        floats and ints and fallback to the ``'dask'`` otherwise.
    i    t   kindt	   mergesortR  Ra   t   tdigests1   method can only be 'default', 'dask' or 'tdigest'c         ` s   t  j |   d    f S(   N(   RR   RS   R   (   t   tsk(   t   df_nameR  (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR     R   c         S` s   t  |  d f S(   Ni    (   R   (   R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR     R   id   s
   quantiles-Rw   Ri   R   (   t   import_requiredt   cricks=   crick is a required dependency for using the t-digest method.(   t   _tdigest_chunkt   _percentiles_from_tdigests   quantiles_tdigest-1-c         ` s4   i  |  ]* \ } }   t  | d  f f  | f  q S(   R7  (   R   (   R  RY   Rn   (   R  Ri   (    s2   lib/python2.7/site-packages/dask/dataframe/core.pys
   <dictcomp>  s   	s   quantiles_tdigest-2-(   t   _percentilet   merge_percentiless   quantiles-1-c         ` s7   i  |  ]- \ } }   t  | d  f  f  | f  q S(   R7  (   R   (   R  RY   Rn   (   R  Ri   t   qs(    s2   lib/python2.7/site-packages/dask/dataframe/core.pys
   <dictcomp>  s   	s   quantiles-2-R_   N(*   RM   R   RC  t   sortRK   RS   R  R   R   RR   Rv   R  RF   Ri   R^   R   R/   RU   R   Rf   R   R   R   R  t
   issubdtypeRw   t   floatingt   integert
   dask.utilsR  t   dask.array.percentileR  R  R  Ro   Rr  R  R   R   R   R5   R`   (   R  R  R   t	   q_ndarrayt   allowed_methodst   internal_methodRj   t   finalize_tskR   R   t   empty_indext   new_divisionsR  R  t   val_dskR  t	   merge_dskR   Rh   R   (    (   R  R  R  Ri   R  R  s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    sj    						
+0



c         ` s  | d k r d } n | d k  r0 t d   n  | t k rH |  j } n+ | d k  sd t | t  rs t d   n  |  j   }  | r t |  j  d k r t d   n  t	 |  | | |  }  r d n d } d j
 | |  j       f d   t |  j    D } d	 j
 | |  j  } |  j }	   }
 d
 } x |	 | k r| t |  }
 x[ t t | t |	    D]> \ } } t g  | D] }   | f ^ q f | |
 | f <qoW| d }	 |
   | d 7} q7Wd j
 | |  } t g  t |	  D] }   | f ^ q|  j |  | f | | d
 f <t j | | d |  g } | rVt | | d  St g  |  j D] } | d f ^ qcd |  j } t | | | |  j d
 |  j d f  S(   sg  DataFrame covariance and pearson correlation.

    Computes pairwise covariance or correlation of columns, excluding NA/null
    values.

    Parameters
    ----------
    df : DataFrame
    min_periods : int, optional
        Minimum number of observations required per pair of columns
        to have a valid result.
    corr : bool, optional
        If True, compute the Pearson correlation. If False [default], compute
        the covariance.
    scalar : bool, optional
        If True, compute covariance between two variables as a scalar. Only
        valid if `df` has 2 columns.  If False [default], compute the entire
        covariance/correlation matrix.
    split_every : int, optional
        Group partitions into groups of this size while performing a
        tree-reduction. If set to False, no tree-reduction will be used.
        Default is False.
    i   s   min_periods must be >= 2s#   split_every must be an integer >= 2s(   scalar only valid for 2 column dataframeR(  R&  s   {0}-chunk-{1}c         ` s.   i  |  ]$ \ } } t  |  f   | f  q S(    (   t   cov_corr_chunk(   R  RY   R   (   RJ  R(  (    s2   lib/python2.7/site-packages/dask/dataframe/core.pys
   <dictcomp>	  s   	s   {0}-combine-{1}-i    i   s   {0}-{1}R_   R  R   iN(   R   R   R   R   RK   R   Rt  RU   RD  R/   Rd   Rb   R  Ro   R   R   R   t   cov_corr_combinet   cov_corr_aggR5   R`   R^   R=   R   (   R  RU  R(  Rl  R   R   R!   Rh   t   prefixR!  R  R  R  R  RY   Ri   R   R  Rj   (    (   RJ  R(  s2   lib/python2.7/site-packages/dask/dataframe/core.pyR%    sF    		(6
%1c         C` s  |  j  d |  j  d f } t j |  } t j |  } |  j d d t }  xX t |   D]J \ } } |  | j   } |  | j   j | | <|  | j	   j | | <qZ W|  j
   j } d | j f d | j f d | j f g }	 | rt j d t   t j d  | | j }
 Wd	 QXt j |  } |  j   j } xa t |   D]S \ } } t j j |  | |
 |  d
 } t j | | <t j | d d | | <qEW| j } |	 j d | j f  n  t j | j  d |	 } | | d <| | d <| | d | d <| r| | d <n  | S(   s:   Chunk part of a covariance or correlation computation
    i   t   float64R   R   Rr  R&  t   recordt   alwaysNi   R1  i    t   mRw   (   R   RM   t   zerosR  R   R  Rg  R   R7  Rr  R&  Rw   R+  t   catch_warningsRW   t   simplefilterR  R  t   subtractR  R  t   nansumR   t   empty(   R  R(  R   t   sumsR  t   idxt   colR  R&  Rw   t   muR  R   t   mu_discrepancyR   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  "  s:    *!	

c         C` s  t  j |   j t |   f |  d j  }  t  j |  d  } |  d } t  j | d  } t  j | d  } | d  } | d } | d  } | d }	 t  j d d  Z | |	 | | }
 t  j | |	 | |	 |
 |
 j	 d  d  t  j |  d	 d  } Wd  QXt  j
 | j d
 |  j } | d | d <| d | d <| | d	 <| rt  j | d | d t  j  } | d | } t  j | | t  j  } t  j |  d | | | | d d d } | | d <n  | S(   Ni    R   Rr  ii   t   invalidt   ignorei   R&  Rw   R  R1  (   i    i   i   (   RM   t   concatenateR  RU   R   R  R  t   errstateR  t	   transposeR  Rw   Rf  R  (   R   R(  R  R  t   cum_sumst
   cum_countst   s1t   s2t   n1t   n2R  t   CR   t   nobsR!  t	   counts_naR  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  E  s2    ,




,
 $	c         C` s   t  |  |  } | d } | d } t j | | | k  <| r_ | d } t j | | j  }	 n t j | | t j  d }	 t j d d d d   | |	 }
 Wd  QX| r |
 d St j |
 d	 | d
 | S(   NRr  R&  R  i   R#  R$  t   dividei    RD  R   (   i    i   (	   R  RM   R  R}  R  Rf  R&  RR   R   (   R   R   RU  R(  Rl  R   R  R.  t   m2t   dent   mat(    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  e  s    


c         C` sT   t  |  } t t |   | |  } g  t t |   D] } |  j | | k ^ q7 S(   si   Split DataFrame into multiple pieces pseudorandomly

    >>> df = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6],
    ...                    'b': [2, 3, 4, 5, 6, 7]})

    >>> a, b = pd_split(df, [0.5, 0.5], random_state=123)  # roughly 50/50 split
    >>> a
       a  b
    1  2  3
    2  3  4
    5  6  7
    >>> b
       a  b
    0  1  2
    3  4  5
    4  5  6
    (   R}   R   RU   R   RT  (   R  t   pR  R   RY   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  v  s    c         ` s   d     | t  k r   j d St   r t  j d d   j d f  }  j re | g   S|    f d    j D d  j S    Sd S(   s   
    take last row (Series) of DataFrame / last value of Series
    considering NaN.

    Parameters
    ----------
    a : pd.DataFrame or pd.Series
    skipna : bool, default True
        Whether to exclude NaN

    c         S` sv   xo t  d t d t |   d   D]' } |  j | } t j |  s# | Sq# W|  |  j   } | j sr | j d Sd  S(   Ni   i
   i(	   R   R   RU   RT  RR   R  t   notnaR  R   (   Rw  RY   t   valt   nonnull(    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   _last_valid  s    )	ii    i   c         ` s#   i  |  ] }    |  |  q S(    (    (   R  R   (   R9  RJ  (    s2   lib/python2.7/site-packages/dask/dataframe/core.pys
   <dictcomp>  s   	 R   N(   R   RT  RE   Re   R  RD  R  (   RJ  R^  t
   series_typ(    (   R9  RJ  s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    	&	

c         C` s   t  |  t t f  s$ t d   n  t |   }  |  t |   k rQ t d   n  t |  d   t t t |  d     k r d } t |   n  d  S(   Ns"   New division must be list or tuples   New division must be sortedis8   New division must be unique, except for the last element(   RK   R}   R   R   Rr  RU   R   (   Rk   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRp    s    ,c         C` s  t  |  t |  d k  r+ t d   n  | r |  d | d k  rZ d } t |   n  |  d | d k r d } t |   q nR |  d | d k r d } t |   n  |  d | d k r d } t |   n  d	   } |  d g } t   }	 |  d }
 d \ } } d } | |   } xt| t |   k  r| t |  k  r|  | | | k  rt j | | d
 f |
 |  | t f |	 | | f <|  | }
 | d
 7} n |  | | | k rt j | | d
 f |
 | | t f |	 | | f <| | }
 | d
 7} n t j | | d
 f |
 | | t f |	 | | f <| | }
 t |   | d
 k sd|  | |  | d
 k  rq| d
 7} n  | d
 7} | j |
  | d
 7} q"W|  d | d k  s| d | d k r>x t | t |   D]c } t |   d } t j | | f |
 | | t f |	 | | f <| | }
 | j |
  | d
 7} qWnj | r| t |   k  rt j | | d
 f |  | |  | t f |	 | | f <| d
 7} n  | j |  d  |	 | | d
 f d  t	 f |	 | | d
 f <d \ } } | |  } x| t |  k  rg  } x5 | | | | k  r?| j | | f  | d
 7} qWxq | r| | | d k r| d | d k s| t |  d
 k r| | k  r| j | | f  | d
 7} qCWt |  d k rt j | d f |  d |  d t f |	 | | d
 f <n~ t |  d
 k r*| d |	 | | d
 f <nQ | s^t d t
 |   t
 |  t
 |  f   n  t j | f |	 | | d
 f <| d
 7} qW|	 S(   s%   dask graph to repartition dataframe by new divisions

    Parameters
    ----------
    a : tuple
        old divisions
    b : tuple, list
        new divisions
    name : str
        name of old dataframe
    out1 : str
        name of temporary splits
    out2 : str
        name of new dataframe
    force : bool, default False
        Allows the expansion of the existing divisions.
        If False then the new divisions lower and upper bounds must be
        the same as the old divisions.

    Examples
    --------
    >>> repartition_divisions([1, 3, 7], [1, 4, 6, 7], 'a', 'b', 'c')  # doctest: +SKIP
    {('b', 0): (<function boundary_slice at ...>, ('a', 0), 1, 3, False),
     ('b', 1): (<function boundary_slice at ...>, ('a', 1), 3, 4, False),
     ('b', 2): (<function boundary_slice at ...>, ('a', 1), 4, 6, False),
     ('b', 3): (<function boundary_slice at ...>, ('a', 1), 6, 7, False)
     ('c', 0): (<function concat at ...>,
                (<type 'list'>, [('b', 0), ('b', 1)])),
     ('c', 1): ('b', 2),
     ('c', 2): ('b', 3)}
    i   s+   New division must be longer than 2 elementsi    sH   left side of the new division must be equal or smaller than old divisionisH   right side of the new division must be equal or larger than old divisions0   left side of old and new divisions are differents1   right side of old and new divisions are differentc         S` s$   t  |   d k o# |  d |  d k S(   s0   Whether last division only contains single labeli   ii(   RU   (   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   _is_single_last_div  s    i   is=   check for duplicate partitions
old:
%s

new:
%s

combined:
%s(   i   i   (   i    i   (   Rp  RU   R   R  R6   R  R   R   R   RW   R   RV   (   RJ  R  Ri   t   out1t   out2R)  R   R;  R  R  t   lowRY   R  R!  t	   last_elemt   _jR  t   tmp(    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   repartition_divisions  s     
		
'0
0
0
.
(,
4-S7(c         C` s#  t  |  j d t j  s( t d   n  y |  j d j |  } Wn t k
 rb |  j d } n Xt j d | d |  j d d |  j   } t	 |  s |  j d |  j d g } n\ | d |  j d k r | j
 |  j d  n  | d |  j d k r|  j d g | } n  |  j d |  S(   s7    Repartition a timeseries dataframe by a new frequency i    s0   Can only repartition on frequency for timeseriesR  R  iR*  Rk   (   RK   Rk   RR   t	   TimestampRc   t   ceilR   t
   date_rangeR$  RU   R   R.  (   R  R*  R  Rk   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR/  K  s     c         C` s  d | t  |   f } |  j | k r) |  S|  j | k r0|  j | } g  t | d  D] } t | |  ^ qV } i  } xd t |  D]V } t j g  t | | | | d  D] } |  j | f ^ q f } | | | | f <q Wg  | D] } |  j | ^ q }	 t j	 | | d |  g }
 t
 |
 | |  j |	  St j |  j  } }	 |  j rt j |	 j t j  st j |	 j t j  rt j |	 j t j  r|	 j j d  }	 n  t |	  r|	 j }	 n  t |	  } t j d t j d | | d  d t j d | |  d |	  }	 t j | j t j  rLt j |	  j | j  j   }	 n- t j | j t j  ry|	 j | j  }	 n  t |	 t j  r|	 j   }	 n  t |	  }	 |  j d |	 d <|  j d	 |	 d	 <|  j d
 |	  S| |  j } d t  |  |  } i  } d } d } x t |  j  D] } | | } | |  j d k rJ| | } n t | |  } t  |  j | f | f | | | f <x= t |  D]/ } t! | | f | f | | | f <| d 7} qW| } qWd g | d }	 t j	 | | d |  g }
 t
 |
 | |  j |	  Sd S(   s9    Repartition dataframe to a smaller number of partitions s   repartition-%d-%si   R_   R  R   i    t   xpt   fpiRk   s   split-%sN(#   R/   R   R   R   R6   RV   Rb   Rk   R5   R`   R   Rf   RR   RS   R   RM   R  Rw   t
   datetime64t   numberR7  R  RF   RU   t   interpR  R$  R  RK   RN   R}   R.  R  R   R   (   R  R   t   new_namet   npartitions_ratiot   new_partition_indext   new_partitions_boundariesRh   t   old_partition_indexR   Rk   R   t   original_divisionsR   t   ratiot
   split_nameR  R  RY   R  R!  t   jj(    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR-  a  sr    *
-!"	$
"
c   
      ` s  t  |  |  } t |  t  r d | } d | } t |  j | |  j | | d | } t j | | d |  g } t | | |  j	 |  St
 |   s t |   rd |   d d l m } | |  | d d ! }	 t   f d	   t |	  D  } t |   |  |  St d
   d S(   s   Repartition dataframe along new divisions

    Dask.DataFrame objects are partitioned along their index.  Often when
    multiple dataframes interact we need to align these partitionings.  The
    ``repartition`` function constructs a new DataFrame object holding the same
    data but partitioned on different values.  It does this by performing a
    sequence of ``loc`` and ``concat`` calls to split and merge the previous
    generation of partitions.

    Parameters
    ----------

    divisions : list
        List of partitions to be used
    force : bool, default False
        Allows the expansion of the existing divisions.
        If False then the new divisions lower and upper bounds must be
        the same as the old divisions.

    Examples
    --------

    >>> df = df.repartition([0, 5, 10, 20])  # doctest: +SKIP

    Also works on Pandas objects

    >>> ddf = dd.repartition(df, [0, 5, 10, 20])  # doctest: +SKIP
    s   repartition-split-s   repartition-merge-R)  R_   s   repartition-dataframe-i   (   t   shard_df_on_indexic         3` s'   |  ] \ } }   | f | f Vq d  S(   N(    (   R  RY   R  (   Ri   (    s2   lib/python2.7/site-packages/dask/dataframe/core.pys	   <genexpr>  s    s    Data must be DataFrame or SeriesN(   R/   RK   R   RB  Rk   Rb   R5   R`   R   Rf   RE   RF   t   utilsRT  R  R  R   (
   R  Rk   R)  R   RA  R   Rh   R   RT  R  (    (   Ri   s2   lib/python2.7/site-packages/dask/dataframe/core.pyR.    s    


"c         K` s,   | |  |  } t  |  r( | j   j S| S(   N(   RF   R  R  (   R   R   R   R~   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    c         K` sM   t  |  t  r! t j |   }  n  | |  |  } t |  rI | j   j S| S(   N(   RK   R}   RR   RS   RF   R  R  (   R   R  R   R~   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR	    s    c         K` s.   t  |  t  r! t j |   }  n  | |  |  S(   N(   RK   R}   RR   RS   (   R   R  R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s    c         C` s   | d k r d n d } t  |   d k r] t |  |  d |  } t |  |  d |  } n t j g  d d } } t |  r t j i | d 6| d	 6 St j i | g d 6| g d	 6 S(
   NRk  R   R   i    R^  Rw   t   i8R  R   (   RU   R   RR   RS   RF   R   (   R   Rm  R^  t   minmaxR  R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRn    s    c         C` s   | d k r d n d } t  |   d k rx |  j d  }  t |  j |  d |  g } t |  j |  d |  g } n t j g  d d } } t j i | d 6| d	 6 S(
   NRk  R   R   i    R  R^  Rw   RV  R   (   RU   Ro  R   R   RR   RS   R   (   R   Rm  R^  RW  R  R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   idxmaxmin_row  s    !c         C` sM   t  |   d k r |  S|  j d d  j t d | d | j d d d t  S(   Ni    R  Rm  R^  i   R   (   RU   R  R   RX  R   RW   (   R   Rm  R^  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRp    s
    c         C` sU   t  |  | d | d } t |  d k r: t d   n  | rH | d Sd  | _ | S(   NR^  R  i    s*   attempt to get argmax of an empty sequence(   Rp  RU   R   R   Ri   (   R   Rm  R^  Rl  R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRo  
  s    	c         C` sP   |  j  d |  } t |  | k rL d } t j | j | t |    n  | S(   NR   s   Insufficient elements for `head`. {0} elements requested, only {1} elements available. Try passing larger `npartitions` to `head`.(   R  RU   R+  R,  Rd   (   R  R   t   rR   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s
    "c         C` s   t  | t  r' t j j j |  } n  t  | t j  r_ | j   sU t | d  r_ |  j	   S|  j
 r t j t t |  j   d |  j } | j | d | j } t |   |  j |  j |  j |  S|  S(   s  Maybe shift divisions by periods of size freq

    Used to shift the divisions for the `shift` method. If freq isn't a fixed
    size (not anchored or relative), then the divisions are shifted
    appropriately. Otherwise the divisions are cleared.

    Parameters
    ----------
    df : dd.DataFrame, dd.Series, or dd.Index
    periods : int
        The number of periods to shift.
    freq : DateOffset, timedelta, or time rule string
        The frequency to shift by.
    t   deltaR   R*  (   RK   R   RR   R  R  R  t
   DateOffsetR  R|   R   R   RS   R   RU   Rk   R[  R   Re   Ra   Rb   Rf   (   R  RZ  R*  R  Rk   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR]    s    
	'"c         K` s4   t  j t  j d  g  } t t  j |  d | | S(   Nt   2000Rj   (   RR   RS   RC  R   t   to_datetime(   R  R   Rj   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR]  <  s    t   nsR  c      	   C` sC   t  j t  j d d | g  } t t  j |  d | d | d | S(   Ni   t   unitR  Rj   (   RR   RS   t	   TimedeltaR   t   to_timedelta(   R  R_  R  Rj   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRa  B  s    !R  c         C` s   t  t j |   S(   N(   R   RR   R  (   R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  J  s    c         C` ss   t  |  d } t |   r: t |   r1 d } qI d } n t |  j  } t j | g d g | d | d |  j S(   s1   A helper for creating the ``_repr_data`` propertyi   s   category[known]s   category[unknown]s   ...R   Ri   (   RU   RA   RB   R   Rw   RR   RS   Ri   (   Rw  R   R   Rw   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR  O  s    		R   c         C` s   t  S(   N(   RS   (   R&  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   get_parallel_type_series_  s    c         C` s   t  S(   N(   R   (   R&  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   get_parallel_type_dataframed  s    c         C` s   t  S(   N(   R   (   R&  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   get_parallel_type_indexi  s    c         C` s   t  S(   N(   R^   (   R~   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   get_parallel_type_objectn  s    c         C` s   t  |  j  S(   N(   R   Rf   (   R~   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   get_parallel_type_frames  s    c           C` s   t  d   t j j   D  S(   Nc         s` s'   |  ] \ } } | t  k	 r | Vq d  S(   N(   Re  (   R  R!  R~  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pys	   <genexpr>y  s    	(   R   R   t   _lookupR  (    (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   parallel_typesx  s    c         C` s   t  |   t |  t    S(   s4    Does this object have a dask dataframe equivalent? (   R   RK   Rh  (   R   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyRQ   }  s    
c   	      C` sh  t  |  r% t |  |  | | |  St |  rKd d l j } t j f t |  d f t d   | j	 d D  } t |  d k r)|  j
 | } t | t  r | d d | j d <| j d	 | _ q)d
 t |  d } xD t t | d   D]) } | j | | f  | | | f | <q Wn  | j |  d | d | d | j St |  |  | | |  Sd S(   s   Generic constructor for dask.dataframe objects.

    Decides the appropriate output class based on the type of `meta` provided.
    i    Ni   c         s` s   |  ] } | f Vq d  S(   N(    (   R  R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pys	   <genexpr>  s    R  Ri   RH  Rw   (   R  (   i    (   RQ   R   R'   t
   dask.arrayR   RM   R  RU   R   R   t   layersRK   R-   t   new_axest   output_indicesR   R  R*   Rw   (	   Rh   Ri   Rj   Rk   RO   RH  R   R  RY   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR     s     *"c      
   O` sh  g  } i  } x7| D]/} t  | t  rT | j | j d g  | j f | | j <q t  | t  r | j | j d g  d
 | | j <q t  | t  r/| j d k r | j | j d g  n\ | j d k r | j | j d g  n4 | j d k r| j | j d g  n t	 d   | j
 | | j <q | j | d g  q Wt |  | d d | d	 t | | S(   s?  
    Apply a function partition-wise across arguments to create layer of a graph

    This applies a function, ``func``, in an embarrassingly parallel fashion
    across partitions/chunks in the provided arguments.  It handles Dataframes,
    Arrays, and scalars smoothly, and relies on the ``blockwise`` machinery
    to provide a nicely symbolic graph.

    It is most commonly used in other graph-building functions to create the
    appropriate layer of the resulting dataframe.

    Parameters
    ----------
    func: callable
    name: str
        descriptive name for the operation
    *args:
    **kwargs:

    Returns
    -------
    out: Blockwise graph

    Examples
    --------
    >>> subgraph = partitionwise_graph(function, x, y, z=123)  # doctest: +SKIP
    >>> layer = partitionwise_graph(function, df, x, z=123)  # doctest: +SKIP
    >>> graph = HighLevelGraph.from_collections(name, layer, dependencies=[df, x])  # doctest: +SKIP
    >>> result = new_dd_object(graph, name, metadata, df.divisions)  # doctest: +SKIP

    See Also
    --------
    map_partitions
    RY   i   i    R   i   t   ijs/   Can't add multi-dimensional array to dataframest	   numblocksR%  (   i   N(   RK   R   RN  Rb   R   R^   R*   RC  Ri   R   Rn  R   R,   RW   (   R   Ri   RX   R   Rx  Rn  R  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR    s(    #c         C` s   t  |   r. d   |  j j   j   D } n- t |   rU |  j t |  j  f } n d } d } | r~ | d t |  7} n  | S(   sS   
    Provide an informative message when the user is asked to provide metadata
    c         S` s%   i  |  ] \ } } t  |  |  q S(    (   R   (   R  R!  R~  (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pys
   <dictcomp>  s   	 s<  
You did not provide metadata, so Dask is running your function on a small dataset to guess output types. It is possible that Dask will guess incorrectly.
To provide an explicit output types or to silence this message, please provide the `meta=` keyword, as described in the map or apply function that you are using.s8   
  Before: .apply(func)
  After:  .apply(func, meta=%s)
N(	   RE   Rg  t   to_dictR  RF   Ri   R   Rw   R   (   R  t   meta_strR   (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyR#    s    "(   t
   __future__R    R   R   R  R+  t	   functoolsR   R   t   numbersR   R   R   t   pprintR   t   numpyRM   t   pandasRR   t   pandas.utilR	   R
   t   toolzR   R   R   R   R   t   chestR   t   Cachet   ImportErrorR  R   R   RO   R   RU  R   R   R   R   t   compatibilityR   R   R   R   R   R   R   R   R   R   R    R!   R"   R#   R$   R%   R&   R'   R(   R)   t
   array.coreR*   R+   R,   R-   t   baseR.   R/   R0   R1   R2   R3   R4   t   highlevelgraphR5   R6   R  R7   R8   t   categoricalR9   R:   R;   R<   R=   R>   R?   R@   RA   RB   RC   RD   RE   RF   RG   RH   R  t
   set_optionR   R[   R]   R^   R   R   Rc  RS   R   R   Rb  R  t   and_R  R  R  R   R  R  R  R  R  t   negt   or_R  R  R  R  t   xorR   t   _bind_operatorRi   R   R  R  R!  R  R   R`  R  R   R  R  R	  R   R  R   RX  R  R   R  R  R  R  R%  R  R  R  R  RW   R  Rp  RB  R/  R-  R.  R  R	  R  Rn  RX  Rp  Ro  R  R]  R]  Ra  R|   R  R  R   t   registerRb  Rc  Rd  R#  Re  Rf  Rh  RQ   R   R  R#  (    (    (    s2   lib/python2.7/site-packages/dask/dataframe/core.pyt   <module>   s  (
.R"X		s#      x		 o   o"		[	(								i		-	_D# '		G/
	
						;