ó
¦–Õ\c           @` s)  d  Z  d d l m Z m Z m Z d d l m Z m Z d d l Z d d l	 m
 Z
 m Z m Z d d l Z d d l m Z m Z d d l m Z d d	 l m Z d
 d l m Z m Z m Z m Z m Z m Z m Z m Z d
 d l m  Z  d
 d l! m" Z" d
 d l# m# Z# m$ Z$ d
 d l% m& Z& d „  Z' d „  Z( d d „ Z* i d g d 6d
 g d 6d d
 g d 6g  d 6Z+ d „  Z, e- e- d „ Z. e# Z/ d d d# d e0 d „ Z1 d „  Z2 e e j3 ƒ d d d d e0 e0 d$ e0 d d d d „ ƒ Z3 d „  Z4 d „  Z5 d d d  „ Z6 d d! „ Z7 d d e0 d" „ Z8 d S(%   s(	  
Algorithms that Involve Multiple DataFrames
===========================================

The pandas operations ``concat``, ``join``, and ``merge`` combine multiple
DataFrames.  This module contains analogous algorithms in the parallel case.

There are two important cases:

1.  We combine along a partitioned index
2.  We combine along an unpartitioned index or other column

In the first case we know which partitions of each dataframe interact with
which others.  This lets uss be significantly more clever and efficient.

In the second case each partition from one dataset interacts with all
partitions from the other.  We handle this through a shuffle operation.

Partitioned Joins
-----------------

In the first case where we join along a partitioned index we proceed in the
following stages.

1.  Align the partitions of all inputs to be the same.  This involves a call
    to ``dd.repartition`` which will split up and concat existing partitions as
    necessary.  After this step all inputs have partitions that align with
    each other.  This step is relatively cheap.
    See the function ``align_partitions``.
2.  Remove unnecessary partitions based on the type of join we perform (left,
    right, inner, outer).  We can do this at the partition level before any
    computation happens.  We'll do it again on each partition when we call the
    in-memory function.  See the function ``require``.
3.  Embarrassingly parallel calls to ``pd.concat``, ``pd.join``, or
    ``pd.merge``.  Now that the data is aligned and unnecessary blocks have
    been removed we can rely on the fast in-memory Pandas join machinery to
    execute joins per-partition.  We know that all intersecting records exist
    within the same partition


Hash Joins via Shuffle
----------------------

When we join along an unpartitioned index or along an arbitrary column any
partition from one input might interact with any partition in another.  In
this case we perform a hash-join by shuffling data in each input by that
column.  This results in new inputs with the same partition structure cleanly
separated along that column.

We proceed with hash joins in the following stages:

1.  Shuffle each input on the specified column.  See the function
    ``dask.dataframe.shuffle.shuffle``.
2.  Perform embarrassingly parallel join across shuffled inputs.
i    (   t   absolute_importt   divisiont   print_function(   t   wrapst   partialN(   t   merge_sortedt   uniquet   firsti   (   t   tokenizet   is_dask_collection(   t   apply(   t   HighLevelGraphi   (   t   _Framet	   DataFramet   Seriest   map_partitionst   Indext   _maybe_from_pandast   new_dd_objectt   is_broadcastable(   t   from_pandas(   t   methods(   t   shufflet   rearrange_by_divisions(   t   strip_unknown_categoriesc          G` s-  t  t |  ƒ } g  |  D]( } t | t ƒ r | | ƒ r | ^ q } t |  ƒ d k re t d ƒ ‚ n  t d „  | Dƒ ƒ sŠ t d ƒ ‚ n  t t t	 g  | D] } | j
 ^ qš Œ  ƒ ƒ } t | ƒ d k rá | d | d f } n  g  |  D]0 } t | t ƒ r| j | d t ƒn | ^ qè } t ƒ  } g  |  D] } d ^ q.} x× | d  D]Ë } t ƒ  }	 x¬ t | ƒ D]ž \ }
 } t | t ƒ rø| |
 } | j
 } | t | ƒ d k  rè| | | k rè|	 j | j | |
 f ƒ | |
 c d 7<q|	 j d ƒ qg|	 j d ƒ qgW| j |	 ƒ qKW| t | ƒ | f S(	   sr   Mutually partition and align DataFrame blocks

    This serves as precursor to multi-dataframe operations like join, concat,
    or merge.

    Parameters
    ----------
    dfs: sequence of dd.DataFrame, dd.Series and dd.base.Scalar
        Sequence of dataframes to be aligned on their index

    Returns
    -------
    dfs: sequence of dd.DataFrame, dd.Series and dd.base.Scalar
        These must have consistent divisions with each other
    divisions: tuple
        Full divisions sequence of the entire result
    result: list
        A list of lists of keys that show which data exist on which
        divisions
    i    s$   dfs contains no DataFrame and Seriesc         s` s   |  ] } | j  Vq d  S(   N(   t   known_divisions(   t   .0t   df(    (    s3   lib/python2.7/site-packages/dask/dataframe/multi.pys	   <genexpr>f   s    s]   Not all divisions are known, can't align partitions. Please use `set_index` to set the index.i   t   forceiÿÿÿÿN(   R   R   t
   isinstanceR   t   lent
   ValueErrort   allt   listR   R   t	   divisionst   repartitiont   Truet	   enumeratet   appendt   _namet   Nonet   tuple(   t   dfst   _is_broadcastableR   t   dfs1R"   t   dfs2t   resultt   indst   dt   Lt   it   jt   divs(    (    s3   lib/python2.7/site-packages/dask/dataframe/multi.pyt   align_partitionsK   s8    .:		
	&c         ` sÃ   t  t |  ƒ } g  |  D]( } t | t ƒ r | | ƒ r | ^ q } | sN |  S| d j ‰  t ‡  f d †  | Dƒ ƒ s¿ t t | Œ  d ƒ } g  |  D]' } t | t ƒ s¯ | n	 t | ƒ ^ q” S|  S(   s  Align DataFrame blocks if divisions are different.

    Note that if all divisions are unknown, but have equal npartitions, then
    they will be passed through unchanged. This is different than
    `align_partitions`, which will fail if divisions aren't all knowni    c         3` s   |  ] } | j  ˆ  k Vq d  S(   N(   R"   (   R   R   (   R"   (    s3   lib/python2.7/site-packages/dask/dataframe/multi.pys	   <genexpr>’   s    (	   R   R   R   R   R"   R    t   iterR5   t   next(   t   argsR+   R   R*   R-   t   a(    (   R"   s3   lib/python2.7/site-packages/dask/dataframe/multi.pyt   _maybe_align_partitions„   s    2c         C` s¦   | s |  | f Sx‰ | D] } g  t  | ƒ D]" \ } } | | d k	 r* | ^ q* } t |  t | ƒ t | ƒ d !ƒ }  t | t | ƒ t | ƒ d !ƒ } q W|  | f S(   sI   Clear out divisions where required components are not present

    In left, right, or inner joins we exclude portions of the dataset if one
    side or the other is not present.  We can achieve this at the partition
    level as well

    >>> divisions = [1, 3, 5, 7, 9]
    >>> parts = [(('a', 0), None),
    ...          (('a', 1), ('b', 0)),
    ...          (('a', 2), ('b', 1)),
    ...          (None, ('b', 2))]

    >>> divisions2, parts2 = require(divisions, parts, required=[0])
    >>> divisions2
    (1, 3, 5, 7)
    >>> parts2  # doctest: +NORMALIZE_WHITESPACE
    ((('a', 0), None),
     (('a', 1), ('b', 0)),
     (('a', 2), ('b', 1)))

    >>> divisions2, parts2 = require(divisions, parts, required=[1])
    >>> divisions2
    (3, 5, 7, 9)
    >>> parts2  # doctest: +NORMALIZE_WHITESPACE
    ((('a', 1), ('b', 0)),
     (('a', 2), ('b', 1)),
     (None, ('b', 2)))

    >>> divisions2, parts2 = require(divisions, parts, required=[0, 1])
    >>> divisions2
    (3, 5, 7)
    >>> parts2  # doctest: +NORMALIZE_WHITESPACE
    ((('a', 1), ('b', 0)),
     (('a', 2), ('b', 1)))
    i   i   N(   R%   R(   R)   t   mint   max(   R"   t   partst   requiredR2   R3   t   pt   present(    (    s3   lib/python2.7/site-packages/dask/dataframe/multi.pyt   require˜   s    $
5#'t   leftt   rightt   innert   outerc         O` s^   | j  d d  ƒ } |  j | | Ž  } t | ƒ d k rZ | d  k	 rZ | j j | ƒ | _ n  | S(   Nt   empty_index_dtypei    (   t   popR(   t   mergeR   t   indext   astype(   t   lhsR8   t   kwargsRF   t   out(    (    s3   lib/python2.7/site-packages/dask/dataframe/multi.pyt   merge_chunkÍ   s
    c         K` s  | j  d d ƒ } | | d <| | d <t |  | ƒ \ \ }  } } } t | | t | ƒ \ } } d t |  | |  } |  j j | j |  }	 |	 j j | d <t	 ƒ  }
 xB t
 | ƒ D]4 \ } \ } } t t | | g | f |
 | | f <q´ Wt j | |
 d |  | g ƒ} t | | |	 | ƒ S(   s3    Join two partitioned dataframes along their index t   howRB   t
   left_indext   right_indexs   join-indexed-RF   t   dependencies(   t   getR5   RA   R>   R   t   _meta_nonemptyRH   RI   t   dtypet   dictR%   R
   RN   R   t   from_collectionsR   (   RK   t   rhsRP   RQ   RL   RO   R"   R=   t   namet   metat   dskR2   R9   t   bt   graph(    (    s3   lib/python2.7/site-packages/dask/dataframe/multi.pyt   merge_indexed_dataframesØ   s    

	&t   _xt   _yc	         ` sÎ  | d k r$ t |  j | j ƒ } n  t |  | d | d | ƒ‰ t | | d | d | ƒ‰ t | t ƒ rx d } t }	 n t }	 t | t ƒ rœ d } t }
 n t }
 t d | d | d | d |	 d |
 d | d	 | ƒ ‰  |  j	 j
 | j	 ˆ   } t | t ƒ rt t | ƒ f } n  t | t ƒ r5t t | ƒ f } n  t ˆ ˆ | | ˆ   } d
 | ‰ | j j ˆ  d <‡  ‡ ‡ ‡ f d †  t | ƒ Dƒ } d g | d } t j ˆ | d ˆ ˆ g ƒ} t | ˆ | | ƒ S(   s   Join two DataFrames on particular columns with hash join

    This shuffles both datasets on the joined column and then performs an
    embarrassingly parallel join partition-by-partition

    >>> hash_join(a, 'id', rhs, 'id', how='left', npartitions=10)  # doctest: +SKIP
    t   npartitionsR   RO   t   left_ont   right_onRP   RQ   t   suffixest	   indicators
   hash-join-RF   c         ` sC   i  |  ]9 } t  t ˆ j | f ˆ j | f g ˆ  f ˆ | f “ q S(    (   R
   RN   R'   (   R   R2   (   RL   t   lhs2RY   t   rhs2(    s3   lib/python2.7/site-packages/dask/dataframe/multi.pys
   <dictcomp>  s   	i   RR   N(   R(   R<   Ra   t   shuffle_funcR   R   R$   t   FalseRV   RT   RH   R!   R)   R   RI   RU   t   rangeR   RW   R   (   RK   Rb   RX   Rc   RO   Ra   Rd   R   Re   RP   RQ   RZ   t   tokenR[   R"   R]   (    (   RL   Rf   RY   Rg   s3   lib/python2.7/site-packages/dask/dataframe/multi.pyt	   hash_joinñ   s8    
		
c         ` sÑ  |  j  j | j  ˆ   } | j j ˆ  d <d t |  | ˆ   ‰ |  j d k ré ˆ  d d k ré t |  j ƒ  ƒ ‰ ‡  ‡ ‡ f d †  t | j ƒ  ƒ Dƒ } ˆ  j	 d ƒ s¾ | j
 ˆ  j	 d	 ƒ ƒ rÊ | j } q g  | j D] } d  ^ qÔ } n· | j d k r”ˆ  d d k r”t | j ƒ  ƒ ‰ ‡  ‡ ‡ f d †  t |  j ƒ  ƒ Dƒ } ˆ  j	 d ƒ si|  j
 ˆ  j	 d ƒ ƒ ru|  j } q g  |  j D] } d  ^ q} n t d ƒ ‚ t j ˆ | d |  | g ƒ} t | ˆ | | ƒ S(   NRF   s   merge-i   RO   RD   RC   c         ` s7   i  |  ]- \ } } t  t ˆ | g ˆ  f ˆ | f “ q S(    (   R
   RN   (   R   R2   t	   right_key(   RL   t   left_keyRY   (    s3   lib/python2.7/site-packages/dask/dataframe/multi.pys
   <dictcomp>.  s   	RQ   Rc   RB   c         ` s7   i  |  ]- \ } } t  t | ˆ g ˆ  f ˆ | f “ q S(    (   R
   RN   (   R   R2   Rn   (   RL   RY   Rm   (    s3   lib/python2.7/site-packages/dask/dataframe/multi.pys
   <dictcomp>9  s   	RP   Rb   s7   single_partition_join has no fallback for invalid callsRR   (   RD   RC   (   RD   RB   (   RT   RH   RI   RU   R   Ra   R   t   __dask_keys__R%   RS   t   _contains_index_nameR"   R(   t   NotImplementedErrorR   RW   R   (   RB   RC   RL   RZ   R[   R"   t   _R]   (    (   RL   Rn   RY   Rm   s3   lib/python2.7/site-packages/dask/dataframe/multi.pyt   single_partition_join%  s,    c         C` sT  x5 | | | g D]$ } t  | t ƒ r t d ƒ ‚ q q W| rœ | rœ | rœ | rœ | rœ g  |  j D] } | | j k re | ^ qe } | sœ t } } qœ n  | rÃ | rÃ | rÃ | } } d  } n  t  |  t j t j f ƒ r9t  | t j t j f ƒ r9t j	 |  | d | d | d | d | d | d | d | d	 |	 ƒSt
 |  ƒ sˆ| rs| rs|  j |  | ƒ }  t } t } n  t |  d
 d ƒ}  n  t
 | ƒ s×| rÂ| rÂ| j | | ƒ } t } t } n  t | d
 d ƒ} n  | sì|  j | ƒ oò|  j } | s
| j | ƒ o| j } | rV| rVt |  | d | d | d	 |	 d | d | d | d | ƒS|  j d k rq| d k sŒ| j d k rÃ| d k rÃt |  | d | d | d | d | d | d | d	 |	 ƒS| rÙ|  j rÙ| sï| r
| j r
| r
|  j } | j } | j	 | d | d | d | d | d | d | d | d	 |	 ƒ} | r||  j r|t | | |  j | d | ƒ} |  j ƒ  }  n< | r¸| j r¸t |  | | j | d | ƒ}  | j ƒ  } n  t t |  | d | d | d | d | d | d | d | d | d	 |	 d | j j ƒ
St |  | r|  j n | | | r4| j n | | |
 | d | d	 |	 ƒSd  S(   Ns7   Dask collections not currently allowed in merge columnsRO   t   onRb   Rc   RP   RQ   Rd   Re   Ra   i   RD   RC   RB   R   RZ   RF   (   RD   RC   (   RD   RB   (   R   R   Rq   t   columnsR$   R(   t   pdR   R   RH   R	   t	   set_indexRi   R   Rp   R   R^   Ra   Rs   RT   R   R"   t   clear_divisionsR   RN   RI   RU   Rl   (   RB   RC   RO   Rt   Rb   Rc   RP   RQ   Rd   Re   Ra   R   t
   max_brancht   ot   ct   merge_indexed_leftt   merge_indexed_rightt
   left_emptyt   right_emptyRZ   (    (    s3   lib/python2.7/site-packages/dask/dataframe/multi.pyRH   H  s    #+
	!				c         C` sC   t  t t t  |  ƒ ƒ ƒ d k r0 t d ƒ ‚ n  t j |  d d ƒS(   Ni   s,   Concatenated DataFrames of different lengthst   axis(   R   t   sett   mapR   Rv   t   concat(   R*   (    (    s3   lib/python2.7/site-packages/dask/dataframe/multi.pyt   concat_and_checkª  s    !c         ` s“   d t  ˆ  Œ  ‰ ‡  ‡ f d †  t ˆ  d j ƒ Dƒ } t j g  ˆ  D] } | j ^ qC d d ƒ} t j ˆ | d ˆ  ƒ} t | ˆ | ˆ  d j	 ƒ S(   Ns   concat-c         ` sA   i  |  ]7 } t  g  ˆ  D] } | j | f ^ q f ˆ | f “ q S(    (   R„   R'   (   R   R2   R   (   R*   RY   (    s3   lib/python2.7/site-packages/dask/dataframe/multi.pys
   <dictcomp>³  s   	i    R€   i   RR   (
   R   Rj   Ra   Rv   Rƒ   t   _metaR   RW   R   R"   (   R*   R[   R   RZ   R]   (    (   R*   RY   s3   lib/python2.7/site-packages/dask/dataframe/multi.pyt   concat_unindexed_dataframes°  s    +c         ` sH  ˆ  d k } t  j g  |  D] } | j ^ q d ˆ  d ˆ d | ƒ} g  |  D] } t | j ƒ ^ qJ } t |  Œ  \ } } }	 d t ˆ |  Œ ‰ g  |	 D]@ }
 g  t |
 | ƒ D]$ \ } } | d k	 rÈ | n | ^ qª ^ q” } t ‰ t	 ‰ t
 ‡  ‡ ‡ ‡ ‡ f d †  t | ƒ Dƒ ƒ } x | D] } | j | j ƒ qWt | ˆ | | ƒ S(   s9    Concatenate indexed dataframes together along the index i    R€   t   joint   filter_warnings   concat-indexed-c         3` s<   |  ]2 \ } } ˆ | f t  j | ˆ  ˆ ˆ ˆ f f Vq d  S(   N(   R   Rƒ   (   R   R2   t   part(   R€   Rˆ   R‡   RY   t   uniform(    s3   lib/python2.7/site-packages/dask/dataframe/multi.pys	   <genexpr>Î  s   N(   R   Rƒ   R…   R   R5   R   t   zipR(   R$   Ri   RV   R%   t   updatet   daskR   (   R*   R€   R‡   t   warnR   RZ   t   emptiesR-   R"   R=   R‰   t   emptyt   parts2R[   (    (   R€   Rˆ   R‡   RY   RŠ   s3   lib/python2.7/site-packages/dask/dataframe/multi.pyt   concat_indexed_dataframes¼  s    .	"Jc         C` s(  t  j g  |  D] } | j ^ q d | d t ƒ} t | ƒ } d j t |  Œ  ƒ } i  } d } x´ |  D]¬ } | j | j ƒ y | j | k t	 }	 Wn t
 t f k
 r± t }	 n Xx\ | j ƒ  D]N }
 |	 rÞ |
 | | | f <n% t  j | |
 g d | f | | | f <| d 7} q¿ Wqe Wt | | | | ƒ S(   s8   Concatenate partitions on axis=0 by doing a simple stackR‡   Rˆ   s
   concat-{0}i    i   (   R   Rƒ   R…   Ri   R   t   formatR   RŒ   R   R$   R   t	   TypeErrorRo   R   (   R*   R"   R‡   R   RZ   R   RY   R[   R2   t   matcht   key(    (    s3   lib/python2.7/site-packages/dask/dataframe/multi.pyt   stack_partitions×  s$    1

%c         ` s¾  t  ˆ  t ƒ s t d ƒ ‚ n  t ˆ  ƒ d k r? t d ƒ ‚ n  t ˆ  ƒ d k r‰ | d k r~ t  ˆ  d t ƒ r~ ˆ  d j ƒ  Sˆ  d Sn  | d k r¤ t d ƒ ‚ n  t j | ƒ } g  ˆ  D] } t  | t	 ƒ rº | ^ qº } t
 ˆ  ƒ ‰  | d k rt d „  | Dƒ ƒ rt ˆ  d	 | d
 | ƒSt | ƒ t ˆ  ƒ k r€t d „  ˆ  Dƒ ƒ r€t d „  | Dƒ ƒ d k r€t j d ƒ t ˆ  ƒ St d ƒ ‚ n+t d „  | Dƒ ƒ rzt ‡  f d †  t t ˆ  ƒ d ƒ Dƒ ƒ r!g  } x# ˆ  d  D] } | | j d  7} qâW| ˆ  d j 7} t ˆ  | d
 | ƒS| r7t ˆ  d
 | ƒSd g t g  ˆ  D] } | j ^ qGƒ d } t ˆ  | d
 | ƒSn@ d g t g  ˆ  D] } | j ^ qŠƒ d } t ˆ  | d
 | ƒSd S(   s   Concatenate DataFrames along rows.

    - When axis=0 (default), concatenate DataFrames row-wise:

      - If all divisions are known and ordered, concatenate DataFrames keeping
        divisions. When divisions are not ordered, specifying
        interleave_partition=True allows concatenate divisions each by each.

      - If any of division is unknown, concatenate DataFrames resetting its
        division to unknown (None)

    - When axis=1, concatenate DataFrames column-wise:

      - Allowed if all divisions are known.

      - If any of division is unknown, it raises ValueError.

    Parameters
    ----------
    dfs : list
        List of dask.DataFrames to be concatenated
    axis : {0, 1, 'index', 'columns'}, default 0
        The axis to concatenate along
    join : {'inner', 'outer'}, default 'outer'
        How to handle indexes on other axis
    interleave_partitions : bool, default False
        Whether to concatenate DataFrames ignoring its order. If True, every
        divisions are concatenated each by each.

    Notes
    -----
    This differs in from ``pd.concat`` in the when concatenating Categoricals
    with different categories. Pandas currently coerces those to objects
    before concatenating. Coercing to objects is very expensive for large
    arrays, so dask preserves the Categoricals by taking the union of
    the categories.

    Examples
    --------
    If all divisions are known and ordered, divisions are kept.

    >>> a                                               # doctest: +SKIP
    dd.DataFrame<x, divisions=(1, 3, 5)>
    >>> b                                               # doctest: +SKIP
    dd.DataFrame<y, divisions=(6, 8, 10)>
    >>> dd.concat([a, b])                               # doctest: +SKIP
    dd.DataFrame<concat-..., divisions=(1, 3, 6, 8, 10)>

    Unable to concatenate if divisions are not ordered.

    >>> a                                               # doctest: +SKIP
    dd.DataFrame<x, divisions=(1, 3, 5)>
    >>> b                                               # doctest: +SKIP
    dd.DataFrame<y, divisions=(2, 3, 6)>
    >>> dd.concat([a, b])                               # doctest: +SKIP
    ValueError: All inputs have known divisions which cannot be concatenated
    in order. Specify interleave_partitions=True to ignore order

    Specify interleave_partitions=True to ignore the division order.

    >>> dd.concat([a, b], interleave_partitions=True)   # doctest: +SKIP
    dd.DataFrame<concat-..., divisions=(1, 2, 3, 5, 6)>

    If any of division is unknown, the result division will be unknown

    >>> a                                               # doctest: +SKIP
    dd.DataFrame<x, divisions=(None, None)>
    >>> b                                               # doctest: +SKIP
    dd.DataFrame<y, divisions=(1, 4, 10)>
    >>> dd.concat([a, b])                               # doctest: +SKIP
    dd.DataFrame<concat-..., divisions=(None, None, None, None)>

    Different categoricals are unioned

    >> dd.concat([                                     # doctest: +SKIP
    ...     dd.from_pandas(pd.Series(['a', 'b'], dtype='category'), 1),
    ...     dd.from_pandas(pd.Series(['a', 'c'], dtype='category'), 1),
    ... ], interleave_partitions=True).dtype
    CategoricalDtype(categories=['a', 'b', 'c'], ordered=False)
    s/   dfs must be a list of DataFrames/Series objectsi    s   No objects to concatenatei   RD   RE   s!   'join' must be 'inner' or 'outer'c         s` s   |  ] } | j  Vq d  S(   N(   R   (   R   R   (    (    s3   lib/python2.7/site-packages/dask/dataframe/multi.pys	   <genexpr>W  s    R€   R‡   c         s` s   |  ] } | j  Vq d  S(   N(   R   (   R   R   (    (    s3   lib/python2.7/site-packages/dask/dataframe/multi.pys	   <genexpr>Z  s    c         S` s   h  |  ] } | j  ’ q S(    (   Ra   (   R   R   (    (    s3   lib/python2.7/site-packages/dask/dataframe/multi.pys	   <setcomp>[  s   	 s™   Concatenating dataframes with unknown divisions.
We're assuming that the indexes of each dataframes are 
 aligned. This assumption is not generally safe.sG   Unable to concatenate DataFrame with unknown division specifying axis=1c         s` s   |  ] } | j  Vq d  S(   N(   R   (   R   R   (    (    s3   lib/python2.7/site-packages/dask/dataframe/multi.pys	   <genexpr>e  s    c         3` s5   |  ]+ } ˆ  | j  d  ˆ  | d j  d k  Vq d S(   iÿÿÿÿi   i    N(   R"   (   R   R2   (   R*   (    s3   lib/python2.7/site-packages/dask/dataframe/multi.pys	   <genexpr>g  s   iÿÿÿÿN(   RD   RE   (   R   R!   R”   R   R   R   t   to_frameR   t   _validate_axisR   R   R    R’   t   warningsRŽ   R†   Rj   R"   R—   R(   t   sumRa   (   R*   R€   R‡   t   interleave_partitionsR   t   dasksR"   (    (   R*   s3   lib/python2.7/site-packages/dask/dataframe/multi.pyRƒ   ô  sH    Q(
--(   R_   R`   (   R_   R`   (9   t   __doc__t
   __future__R    R   R   t	   functoolsR   R   Rš   t   toolzR   R   R   t   pandasRv   t   baseR   R	   t   compatibilityR
   t   highlevelgraphR   t   coreR   R   R   R   R   R   R   R   t   ioR   t    R   R   R   t   utilsR   R5   R:   R(   RA   R>   RN   R$   R^   Rh   Ri   Rl   Rs   RH   R„   R†   R’   R—   Rƒ   (    (    (    s3   lib/python2.7/site-packages/dask/dataframe/multi.pyt   <module>7   sB   :	9	2.		2	#	_		