ó
¦–Õ\c           @` sß  d  d l  m Z m Z m Z m Z d  d l Z d  d l m Z d  d l Z	 d  d l
 Z d  d l j j Z d  d l Z d  d l Z d  d l Z d  d l j Z d  d l m Z d  d l m Z d  d l m Z y d  d l Z Wn e k
 rô e Z n Xy% d  d l Z  e  j! e d ƒ k Z" Wn e k
 r3e Z" n Xy d  d l# j$ Z% Wn e k
 r`e Z% n Xe Z& d Z' e j( j) e& d	 e' ƒZ* e% r°e  j! e d
 ƒ k r°e+ Z, d Z- n e% Z, d Z- e j( j) e, d	 e- ƒZ. d „  Z/ d „  Z0 d „  Z1 d Z2 d Z3 e j4 i g  e5 e2 ƒ D] Z6 e6 d d ^ qd 6g  e5 e2 ƒ D] Z6 e6 d ^ q:d 6d e j7 g  e5 e2 ƒ D] Z6 d e6 ^ qgd d ƒƒZ8 e j9 e8 d e3 ƒZ: e j; d e j< d d e* ƒe j< d d e. ƒg ƒ d  „  ƒ Z= d! „  Z> d" Z? e> d# e? ƒ Z@ e@ d$ „  ƒ ZA e@ d% „  ƒ ZB e j( jC d& e e+ g ƒ e@ d' „  ƒ ƒ ZD e> ƒ  d( „  ƒ ZE e> ƒ  d) „  ƒ ZF e@ d* „  ƒ ZG e@ d+ „  ƒ ZH e@ d, „  ƒ ZI e@ d- „  ƒ ZJ e@ d. „  ƒ ZK d/ „  ZL e@ d0 „  ƒ ZM d1 „  ZN d2 „  ZO e j( jP d	 d3 ƒ e j( jC d4 d g d d d g g  g ƒ d5 „  ƒ ƒ ZQ e j( j) eR e j4 d6 ƒ d	 d7 ƒe> ƒ  d8 „  ƒ ƒ ZS e@ d9 „  ƒ ZT d: „  ZU d; „  ZV d< „  ZW d= „  ZX d> „  ZY d? „  ZZ d@ „  Z[ e@ dA „  ƒ Z\ dB „  Z] e j( jC dC e j4 i dD dE dF g d 6ƒ i  i  f e j4 i d4 dG dH g d 6ƒ i dI dJ 6i  f e j4 i dK dG dL g d 6ƒ i dI dJ 6i  f e j4 i dM dN dO g d 6ƒ i dP dJ 6i  f e j4 i e j^ dG dH dG g ƒ d 6ƒ i dI dJ 6i d g dQ 6f e j4 i e j^ dF dE dF g ƒ d 6ƒ i  i d g dQ 6f e j4 i e_ e` e ja dR dS dT g ƒ ƒ d 6ƒ i  i  f e j4 i dR dS dT g d 6ƒ jb dU ƒ i  i  f e j< e j4 i dD dE dF g d 6ƒ jb dU ƒ i  i  d e j( jP d	 dV ƒ ƒe j4 i dD dE dF g d 6ƒ jb dW ƒ i  i  f e j4 i dD dE dF g d 6ƒ jb dX ƒ i  i  f e j4 i dD dE dF g d 6ƒ jb dY ƒ i  i  f e j4 i dD dE dF g d 6ƒ jb dZ ƒ i  i  f e j4 i dD dF dE g d 6d dD dE dF g ƒi  i  f e j4 i dD dF d g d 6d e j7 dF dE dD g d d[ ƒƒi  i  f e j4 i dF dE dD g d 6dD dE dF g d 6ƒ i  i  f e j4 i dF dE dD g d 6dD dE dF g d 6d\ d d g ƒi  i  f e j4 i dD dE dF g d] 6ƒ i  i  f e j4 i dD dE d g d 6ƒ i  i  f e j4 i d^ d_ d g d` 6ƒ i  i  f e j4 i d^ d_ d g da 6ƒ i  i  f e j4 i d^ d_ d g db 6ƒ i  i  f g ƒ dc „  ƒ Zd dd „  Ze de „  Zf df „  Zg dg „  Zh e> d# e? dh di ƒ dj „  ƒ Zi e@ dk „  ƒ Zj dl „  Zk dm „  Zl dn „  Zm e j( jC do dp dq g ƒ dr „  ƒ Zn ds „  Zo dt „  Zp du „  Zq e> d# e? dv dw ƒ dx „  ƒ Zr e j( jC dy dz d d{ d| g ƒ d} „  ƒ Zs e j; d i i d d~ 6d d€ 6d d‚ 6d dƒ 6i d d~ 6d„ d€ 6d d‚ 6d dƒ 6g d… 6d g d† 6d‡ dˆ 6i i d d~ 6d„ d€ 6d d‚ 6d dƒ 6i d d~ 6d d€ 6d d‚ 6d dƒ 6g d… 6d g d† 6d‡ dˆ 6i i d d‰ 6i dŠ d‹ 6d~ 6d d€ 6dŒ d‚ 6d dƒ 6g dŽ 6i d„ d‰ 6d d~ 6d„ d€ 6d d‚ 6d dƒ 6i d d‰ 6d d~ 6d d€ 6d d‚ 6d dƒ 6g d… 6d g d† 6d‡ dˆ 6g ƒ d „  ƒ Zt d‘ „  Zu d’ „  Zv d“ „  Zw d” „  Zx d• „  Zy d– „  Zz d— „  Z{ d˜ „  Z| d™ „  Z} dš „  Z~ d› „  Z dœ „  Z€ d „  Z d S(ž   i    (   t   absolute_importt   divisiont   print_functiont   unicode_literalsN(   t   LooseVersion(   t	   assert_eq(   t   _parse_pandas_metadata(   t   natural_sort_keyu   0.9.0u   fastparquet not foundt   reasonu   0.13.0u   pyarrow 0.13.0 not supportedu   pyarrow not foundc           C` s   t  r t j t ƒ n  d  S(   N(   t   SKIP_FASTPARQUETt   pytestt   skipt   SKIP_FASTPARQUET_REASON(    (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   check_fastparquet3   s    c           C` s   t  r t j t ƒ n  d  S(   N(   t   SKIP_PYARROWR
   R   t   SKIP_PYARROW_REASON(    (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   check_pyarrow8   s    c         C` s*   |  d k r t  S|  d k r& t r& t  St S(   Nu   fastparquetu   pyarrow(   t   Truet   check_pa_divst   False(   t   engine(    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   should_check_divs=   s
    i(   i   i   i   u   xg      @u   yt   indexi
   t   nameu   myindext   npartitionst   paramsu   fastparquett   marksu   pyarrowc         C` s   |  j  S(   N(   t   param(   t   request(    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyR   O   s    c          ` sò  d d h ‰  ‡  f d †  ˆ  Dƒ } x~ d t  t f d t t f g D]^ \ } } } | rA t j j d | ƒ } x1 | D]& } | | k rr | | j | ƒ qr qr WqA qA Wxÿ |  j ƒ  D]ñ \ } } | j	 d d ƒ \ } }	 t
 |	 j	 d ƒ ƒ }
 | d k st |
 ƒ d	 k st |
 ƒ j ˆ  ƒ r/t d
 | ƒ ‚ n  t t j | ƒ d | ƒ } t |
 ƒ d	 k rp| |
 j | ƒ q° x. | D]& } |
 | k rw| | j | ƒ qwqwWq° Wt j j d g  t | j ƒ  ƒ D]' \ } } t j d t
 | ƒ | Œ ^ qÄƒ S(   uÁ   Product of both engines for write/read:

    To add custom marks, pass keyword of the form: `mark_writer_reader=reason`,
    or `mark_engine=reason` to apply to all parameters with that engine.u   pyarrowu   fastparquetc         ` s,   i  |  ]" } ˆ  D] } g  | | f “ q q S(    (    (   t   .0t   wt   r(   t   backends(    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pys
   <dictcomp>\   s   	 R   u   _i   u   xfailu   skipi   u   unknown keyword %ru   write_engineu   read_engineR   (   u   xfailu   skip(   u   write_engineu   read_engine(   R	   R   R   R   R
   t   markR   t   appendt   itemst   splitt   tuplet   lent   sett
   differencet
   ValueErrort   getattrt   parametrizet   sortedR   (   t   kwargsR   R   R   R   t   valt   kt   kwt   kindt   restt   keyt   v(    (   R    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   write_read_enginesV   s0    u5   fastparquet fails reading pyarrow written directoriest   xfail_pyarrow_fastparquetc   
      C` sx  t  |  ƒ } t j i t j d d t j ƒd 6t j d d t j ƒd 6t j d d t j ƒd 6t j j	 d d d g d	 d ƒj
 d
 ƒ d 6ƒ } t j | d d ƒ} | j | d t d | ƒt j | ƒ } d | k sà t ‚ d | k sò t ‚ t j | d t d | ƒ} t | j ƒ d k s(t ‚ | j d d ƒ j ƒ  } x1 | j D]& }	 | |	 | |	 k j ƒ  sJt ‚ qJWd  S(   Niè  t   dtypeu   i32u   i64u   fu   hellou   you   peoplet   sizeu   Ou   bhellot	   chunksizeiô  t   write_indexR   u   _common_metadatau   part.0.parquetR   i   t	   scheduleru   sync(   t   strt   pdt	   DataFramet   npt   aranget   int32t   int64t   float64t   randomt   choicet   astypet   ddt   from_pandast
   to_parquetR   t   ost   listdirt   AssertionErrort   read_parquetR&   t	   divisionst   computet   reset_indext   columnst   all(
   t   tmpdirt   write_enginet   read_enginet   tmpt   datat   dft   filest   df2t   outt   column(    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt
   test_local   s    "1c         C` sá   t  |  ƒ } t j | d | ƒt j | d | d t | ƒ ƒ} t t | d t | ƒ ƒt j | d | d t ƒ} t t j ƒ  | d t	 ƒt j | d | ƒ} | d k rÄ t t | d t	 ƒn t t j ƒ  | d t	 ƒd  S(   NR   t   infer_divisionst   check_divisionsu   fastparquet(
   R<   t   ddfRI   RG   RM   R   R   R   t   clear_divisionsR   (   RS   RT   RU   t   fnt   ddf2t   ddf2_no_divst   ddf2_default(    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt
   test_index˜   s    !u   indexc         C` sµ   t  |  ƒ } t j i d d d g d 6d d d g d 6ƒ d  } | ra | j d d t d t ƒn  t j | d	 d
 ƒ} | j | d | d | ƒt j | d | ƒ} t	 | | ƒ d  S(   Nu   au   bi   i   i   i    t   inplacet   dropR   i   R:   R   (
   R<   R=   R>   t	   set_indexR   RG   RH   RI   RM   R   (   RS   RT   RU   R   Rb   RX   R`   t   read_df(    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt
   test_empty²   s    3c         C` sc  | | k o d k n r; t  j d k r; t j d ƒ n  t |  ƒ } t j | d | ƒt  j j t  j j	 | d ƒ ƒ rš t  j
 t  j j	 | d ƒ ƒ n  t  j | ƒ } d | k s» t ‚ t j t  j j	 | d ƒ d | d t | ƒ oñ t | ƒ ƒ} t t | d t | ƒ ot | ƒ ƒt j t  j j	 | d ƒ d | d t ƒ} t t j ƒ  | d t ƒd  S(	   Nu   fastparquetu   ntu   filepath bug.R   u	   _metadatau	   *.parquetR^   R_   (   RJ   R   R
   R   R<   R`   RI   t   patht   existst   joint   unlinkRK   RL   RG   RM   R   R   R   Ra   R   (   RS   RT   RU   Rb   RY   Rc   Rd   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_read_globÀ   s    +!!%c         C` s0  | | k o d k n r; t  j d k r; t j d ƒ n  t |  ƒ }  t j |  d | ƒt g  t  j |  ƒ D]* } | j	 d ƒ sm t  j
 j |  | ƒ ^ qm d t ƒ} t j | d | d t | ƒ oÍ t | ƒ ƒ} t t | d t | ƒ oô t | ƒ ƒt j | d | d t ƒ} t t j ƒ  | d t ƒd  S(	   Nu   fastparquetu   ntu   filepath bug.R   u	   _metadataR3   R^   R_   (   RJ   R   R
   R   R<   R`   RI   R,   RK   t   endswithRl   Rn   R   RG   RM   R   R   R   Ra   R   (   RS   RT   RU   t   fRY   Rc   Rd   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_read_listÚ   s    +*	%c         C` s/  t  |  ƒ } t j | d | ƒt t j | d g  d | d t | ƒ ƒt g  d t | ƒ ƒt t j | d g  d | d t ƒt g  j ƒ  d t	 ƒt t j | d d g d | d t | ƒ ƒt d g d t | ƒ ƒt t j | d d g d | d t ƒt d g j ƒ  d t	 ƒt t j | d d d d g d | d t | ƒ ƒt d g d t | ƒ ƒt t j | d d d d g d | d t ƒt d g j ƒ  d t	 ƒt t j | d d d d d g d | d t | ƒ ƒt d t | ƒ ƒt t j | d d d d d g d | d t ƒt j ƒ  d t	 ƒd  S(	   NR   RQ   R^   R_   u   xR   u   myindexu   y(
   R<   R`   RI   R   RG   RM   R   R   Ra   R   (   RS   RT   RU   Rb   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_columns_indexò   s,    '!*$$$''c         C` s¬   t  |  ƒ } t j | d | ƒt j t t f ƒ ' t j | d d g d | ƒj	 ƒ  Wd  QXt j t
 t f ƒ 4 t j | d d g t t j ƒ d | ƒj	 ƒ  Wd  QXd  S(   NR   RQ   u	   nonesense(   R<   R`   RI   R
   t   raisesR)   t   KeyErrorRG   RM   RO   t	   Exceptiont   listRQ   (   RS   RT   RU   Rb   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_nonsense_column"  s    ("c         C` s÷   t  |  ƒ } t j | d | ƒt j ƒ  } t t j | d t d | d t ƒ| d t d t ƒt t j | d t d d d g d | d t ƒ| d d g d t d t ƒt t j | d t d d	 d g d | d t ƒ| d	 d g d t d t ƒd  S(
   NR   R   R^   t   check_indexR_   RQ   u   xu   yu   myindex(	   R<   R`   RI   RP   R   RG   RM   R   R   (   RS   RT   RU   Rb   Rc   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_columns_no_index0  s    ''c         C` sŒ   t  |  ƒ } t j | d | ƒ| d k rA t rA d } t } n d } t } t j | d | ƒ$ t j	 | d d d | d t
 ƒWd  QXd  S(	   NR   u   pyarrowu   requires pyarrow >=0.9.0u(   not known to be sorted across partitionst   matchR   u   xR^   (   R<   R`   RI   R   t   NotImplementedErrorR)   R
   Ru   RG   RM   R   (   RS   RT   RU   Rb   R|   t   ex(    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_infer_divisions_not_sortedH  s    	c      
   C` s½   | d k r1 t  j t d ƒ k r1 t j d ƒ n  t |  ƒ } t j | d | d t ƒ| d k rx t	 rx d } t
 } n d } t } t j | d | ƒ t j | d | d	 t ƒWd  QXd  S(
   Nu   pyarrowu   0.13.0u&   No longer an error from pyarrow 0.13.0R   R:   u   requires pyarrow >=0.9.0u   no index column was discoveredR|   R^   (   t   pat   __version__R   R
   R   R<   R`   RI   R   R   R}   R)   Ru   RG   RM   R   (   RS   RT   RU   Rb   R|   R~   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_infer_divisions_no_indexX  s    !	c         C` s)  t  j j t |  ƒ d ƒ } t j j t j d ƒ t j d ƒ d g d d d g ƒ} t j	 t j
 j d d ƒ d d	 d
 g d | ƒ} | j d t ƒ } | d k rç t j | | d t ƒt j t ƒ  t j | d | ƒ} Wd  QXnÉ d d  l } t j | j j | ƒ | ƒ t j | d | ƒ} t | | ƒ t j | d d	 d | ƒ} t | | d	 ƒ t j | d d	 d
 g d d d g d | ƒ} t | | j d	 d
 g ƒ d d g ƒ t j | d t d | ƒ} t | | ƒ t j | d d	 g d | ƒ} t | | j d	 ƒ d
 g ƒ t j | d d g d | ƒ} t | | j d ƒ d	 d
 g ƒ t j | d d d	 g d | ƒ} t | | j d ƒ d d	 g ƒ t j | d t d d d
 g d | ƒ} t | | d d
 g ƒ x[ d d
 g D]M } t j | d | d d d	 g d | ƒ} t | | j | ƒ d d	 g ƒ qÝWxQ d	 d g D]C } t j t ƒ , t j | d | d d d	 g d | ƒ} Wd  QXq;Wx  d  d | j d ƒ f t d
 | f t d | f d	 d | j d	 ƒ f d	 d
 | j d	 ƒ f g D]A \ }	 }
 } t j | d |	 d |
 d | ƒ} t | | |
 ƒ qàWd  S(   Nu   test.parqueti
   i   t   namesu   x0u   x1i   RQ   u   au   bR   Rh   u   fastparquetR:   R   i    (   RJ   Rl   Rn   R<   R=   t
   MultiIndext   from_arraysR?   R@   R>   RD   t   randnRP   R   t   fastparquett   writeR   R
   Ru   R)   RG   RM   t   pyarrowt   pqt   write_tablet   TableRH   R   Ri   t   None(   RS   R   Rb   R   RX   RZ   R`   R€   t   dt   indt   colt   sol_df(    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt#   test_columns_index_with_multi_indexj  sP    +0-& ! ''$1(!c         C` s•   t  |  ƒ } t j i d d d g d 6d d d g d 6ƒ } t j | d	 d ƒ} | j | d
 t d | ƒt j | d | ƒ} t | | d t ƒd  S(   Ni   i   i   u   ai   i   i   u   bR   R:   R   Rz   (	   R<   R=   R>   RG   RH   RI   R   RM   R   (   RS   RT   RU   Rb   RX   R`   Rc   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_no_indexª  s    /c         C` s¶   t  |  ƒ } t j | d | ƒt j | d d g d | d t | ƒ ƒ} t t d g | d t | ƒ ƒt j | d d d d d | d t | ƒ ƒ} t t j | d t | ƒ ƒd  S(   NR   RQ   u   xR^   R_   R   u   myindex(   R<   R`   RI   RG   RM   R   R   t   x(   RS   R   Rb   Rc   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_read_series´  s    * -c         ` sÚ   t  |  ƒ } t j | d ˆ  ƒ‡  f d †  } t | | ƒ j ƒ t | | ƒ j ƒ k s^ t ‚ t | | ƒ j ƒ t | | d d g ƒj ƒ k s— t ‚ t | | d d ƒj ƒ t | | d d g ƒj ƒ k sÖ t ‚ d  S(   NR   c         ` s   t  j |  d ˆ  | S(   NR   (   RG   RM   (   Rb   R-   (   R   (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   readÂ  s    RQ   u   x(   u   x(   R<   R`   RI   R'   t   daskRL   (   RS   R   Rb   R–   (    (   R   sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt
   test_names¾  s    0'ua   parquet column fusion is special cased today we'll need to find a more general solution near-termu   cc         ` sÌ   t  ƒ  t |  ƒ } t j | ƒ t j | ƒ } t t ˆ  | ˆ  ƒ | ˆ  } t j	 j
 d t ƒ   | j | j | j ƒ  ƒ } Wd  QXt | ƒ | j k s  t ‚ t ‡  f d †  | j ƒ  Dƒ ƒ sÈ t ‚ d  S(   Nt   fuse_rename_keysc         3` s   |  ] } | d  ˆ  k Vq d S(   i   N(    (   R   R4   (   t   c(    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pys	   <genexpr>Ü  s    (   R   R<   R`   RI   RG   RM   R   RX   R—   t   configR'   R   t   __dask_optimize__t   __dask_keys__R&   R   RL   RR   t   values(   RS   Rš   Rb   Rc   R”   t   dsk(    (   Rš   sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_optimizeÎ  s    
!u
   to_parquetu   no to_parquet methodc         C` sm   t  |  j d ƒ ƒ } t j i d d d g d 6ƒ } | j | d | ƒt j | d | ƒ} t | | ƒ d  S(   Nu   test.parqueti   i   i   u   xR   (   R<   Rn   R=   R>   RI   RG   RM   R   (   RS   RT   RU   Rb   RX   R`   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_roundtrip_from_pandasß  s
    c         C` sÍ  t  |  ƒ } t j i d d d g d d 6d d ƒ} t j | d d	 ƒ} t j | | d
 | ƒt j | d d d
 | ƒ} | j ƒ  j j	 j
 j ƒ  d d d g k s« t ‚ t j | d d g d
 | ƒ} | j ƒ  j j	 j
 j ƒ  d d d g k sù t ‚ | d k rt j | d
 | ƒ} | j ƒ  j j	 j
 j ƒ  d d d g k sJt ‚ | j d  j ƒ  d | j _ t | | ƒ st ‚ n  t j | d g  d
 | ƒ} | j d  j ƒ  | j | j k j ƒ  sÉt ‚ d  S(   Nu   au   bu   cid   u   xR7   u   categoryR   i   R   t
   categoriesu   pyarrowiè  u   index(   R<   R=   R>   RG   RH   RI   RM   RO   R”   t   catR¢   t   tolistRL   t   locR   R   R   RR   (   RS   RT   RU   RV   RX   R`   Rc   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_categoricalê  s"    )000c         C` s2  t  ƒ  t |  ƒ } t j i t j d d t j ƒd 6t j d d t j ƒd 6t j d d t j ƒd 6t j	 j
 d d d g d	 d ƒj d
 ƒ d 6ƒ } d | j _ t | ƒ d } t j | j |  d d ƒ} t j | j | d d ƒ} | j | ƒ | j | d t ƒt j | d | ƒ} t | | ƒ d S(   u5   Test that appended parquet equal to the original one.iè  R7   u   i32u   i64u   fu   hellou   you   peopleR8   u   Ou   bhellou   indexi   R9   id   R"   R   N(   R   R<   R=   R>   R?   R@   RA   RB   RC   RD   RE   RF   R   R   R&   RG   RH   t   ilocRI   R   RM   R   (   RS   R   RV   RX   t   halft   ddf1Rc   t   ddf3(    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_append  s    "c         C` s8  t  ƒ  t |  ƒ } t j i t j d d t j ƒd 6t j d d t j ƒd 6t j d d t j ƒd 6t j	 j
 d d d g d	 d ƒj d
 ƒ d 6ƒ } d | j _ t | ƒ d } t j | j |  d d ƒ} t j | j | d d ƒ} | j | d t ƒ| j | d t ƒt j | d d ƒ} t | | ƒ d S(   u5   Test that appended parquet equal to the original one.iè  R7   u   i32u   i64u   fu   hellou   you   peopleR8   u   Ou   bhellou   indexi   R9   id   R"   R   u   fastparquetN(   R   R<   R=   R>   R?   R@   RA   RB   RC   RD   RE   RF   R   R   R&   RG   RH   R§   RI   R   RM   R   (   RS   RV   RX   R¨   R©   Rc   Rª   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_append_create  s    "c      	   C` s„  t  ƒ  t |  ƒ } t j i t j d d ƒ d 6t j d d ƒ d 6t j d d ƒ d 6ƒ } d	 | j _ t j i t j d d ƒ d 6t j d d ƒ d 6t j d
 d ƒ d 6ƒ } d	 | j _ t j	 | d d ƒ} t j	 | d d ƒ} t j
 | | d d g ƒt j
 | | d d g d t d t ƒt j | ƒ j ƒ  } | j j d ƒ | d <t | j d ƒ t j | | g ƒ | j d t ƒd  S(   Ni    i
   u   lati   u   lonid   in   u   valueu   indexix   i‚   R   i   t   partition_onR"   t   ignore_divisionsu   intRz   (   R   R<   R=   R>   R?   R@   R   R   RG   RH   RI   R   RM   RO   t   lonRF   R   t   sort_valuest   concatRQ   R   (   RS   RV   t   df0t   df1t   dd_df0t   dd_df1R[   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_append_with_partition1  s"    //+c         C` sk  t  ƒ  t |  ƒ } t j i t j j d ƒ d 6t j j d d d g d d ƒd 6t j j d d d g d d ƒd 6ƒ } t j	 | d	 ƒ } | j
 | d
 d g d d ƒt j | d d ƒ} t | j j j ƒ d d d h k sæ t ‚ | j
 | d
 d d g d d ƒt j | d d ƒ} t | j j j ƒ d d d h k sDt ‚ t | j j j ƒ d d d h k snt ‚ t j | d d d g d d ƒ} t | j j j ƒ d d d h k s¹t ‚ d | j k sÎt ‚ t j | d d d d ƒ} t | j j ƒ d d d h k st ‚ d | j k s%t ‚ t j | d d d d ƒ} t | j j ƒ d d d h k sgt ‚ d  S(   Ni2   u   au   xu   yu   zR8   u   bu   ci   R­   R   u   fastparquetRQ   R   (   R   R<   R=   R>   R?   RD   t   randRE   RG   RH   RI   RM   R'   t   bR£   R¢   RL   Rš   RQ   R   (   RS   RV   RŽ   RX   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_partition_on_catsG  s*    "(***!*'c      
   C` s²  t  ƒ  t |  j d ƒ ƒ } t j i t j d d t j ƒd 6t j d d t j ƒd 6t j d d t j	 ƒd 6t j
 j d d d	 g d
 d ƒj d ƒ d 6ƒ } t | ƒ d } t j | j |  d d ƒ} t j | j | d d ƒ} | j | ƒ t j t ƒ   } | j | d t d t ƒWd QXd t | j ƒ k sBt ‚ t |  j d ƒ ƒ } | j | d t ƒ| j | d t d t ƒt j | d d ƒ} t | j d ƒ | ƒ d S(   u#   Test append with write_index=False.u   tmp1.parquetiè  R7   u   i32u   i64u   fu   hellou   you   peopleR8   u   Ou   bhelloi   R9   id   R:   R"   Nu   Appended columnsu   tmp2.parquetR   (   R   R<   Rn   R=   R>   R?   R@   RA   RB   RC   RD   RE   RF   R&   RG   RH   R§   RI   R
   Ru   R)   R   R   t   valueRL   RM   R   Ri   (   RS   RV   RX   R¨   R©   Rc   t   excinfoRª   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_append_wo_indexa  s&    "c         C` sT  t  ƒ  t |  ƒ } t j i t j d d t j ƒd 6t j d d t j ƒd 6t j d d t j ƒd 6t j	 j
 d d d g d	 d ƒj d
 ƒ d 6ƒ } t | ƒ d } t j | j |  d d ƒ} t j | j | d d d ƒ} | j | ƒ t j t ƒ  } | j | d t ƒWd QXd t | j ƒ k s7t ‚ | j | d t d t ƒd S(   u1   Test raising of error when divisions overlapping.iè  R7   u   i32u   i64u   fu   hellou   you   peopleR8   u   Ou   bhelloi   R9   id   i
   R"   Nu   Appended divisionsR®   (   R   R<   R=   R>   R?   R@   RA   RB   RC   RD   RE   RF   R&   RG   RH   R§   RI   R
   Ru   R)   R   Rº   RL   (   RS   RV   RX   R¨   R©   Rc   R»   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt!   test_append_overlapping_divisions{  s    " c   	   	   C` sg  t  ƒ  t |  ƒ } t j i t j d d t j ƒd 6ƒ } t j i t j d d t j ƒd 6ƒ } t j i t j d d t j ƒd 6ƒ } t j	 | d d ƒ} t j	 | d d ƒ} t j	 | d d ƒ} | j
 | ƒ t j t ƒ  } | j
 | d t ƒWd QXd	 t | j ƒ k st ‚ t j t ƒ  } | j
 | d t ƒWd QXd
 t | j ƒ k sct ‚ d S(   u-   Test raising of error when non equal columns.id   R7   u   i32u   i64R9   i   R"   Nu   Appended columnsu   Appended dtypes(   R   R<   R=   R>   R?   R@   RA   RB   RG   RH   RI   R
   Ru   R)   R   Rº   RL   (	   RS   RV   R³   RZ   t   df3R©   Rc   Rª   R»   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_append_different_columns  s    (((c         C` s  t  |  ƒ } t j i d d d g d 6d d d g d 6d	 d
 d g d 6d t j d d d g d d ƒd d d d g ƒ} t j | d d ƒ} t j | | d | ƒ| d k râ t j | ƒ } | j	 d d d d g k sâ t
 ‚ n  t j | d d d | ƒ} t | | d t ƒd  S(   Ni   i   i   u   ai
   i   i   u   bid   iÈ   i,  u   cR   iÿÿÿÿiþÿÿÿiýÿÿÿR   u   myindexRQ   R   R   u   fastparquetR_   (   R<   R=   R>   t   IndexRG   RH   RI   R‡   t   ParquetFileRQ   RL   RM   R   R   (   RS   RT   RU   RV   RX   R`   t   pfRc   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_ordering§  s    $c   	   	   C` s¬  d d  l  } t |  ƒ } t j i t j d d t j ƒd 6t j d d t j ƒd 6ƒ } t j	 | d d ƒ} | j
 | ƒ t j | d d d g d	 | d
 t | ƒ ƒ} t | d d g | d t d t | ƒ ƒd d  l  } | j  t j j | d ƒ ƒ } t j | d d g d	 | ƒj ƒ  } | j d d t ƒt | d g | d t d t ƒt j | d d d g d	 | d
 t | ƒ ƒ} t | d d g | d t d t | ƒ ƒd  S(   Ni    iè  R7   u   i32u   fR9   i2   RQ   R   R^   Rz   R_   u	   *.parquetRg   (   t   globR<   R=   R>   R?   R@   RA   RC   RG   RH   RI   RM   R   R   R   RJ   Rl   Rn   RO   R°   R   (	   RS   R   RÄ   RV   RW   RX   RZ   t   fnsR¾   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt    test_read_parquet_custom_columnsº  s4    "	u   df,write_kwargs,read_kwargsi   i   i   u   au   bu   utf8u   object_encodingu   ccu   bbbt   aR¸   Rš   u   bytesu
   categoriesi¸  iÐ  iè  u   M8[ns]u,   Parquet doesn't support nanosecond precisionu   M8[us]u   M8[ms]u   uint16u   float32u   fooRQ   u   0g      @g       @u   -u   .u    c         C` s‹   t  ƒ  t |  ƒ } | j j d  k r4 d | j _ n  t j | d d ƒ} t j | | |  t j | d | j j | } t	 | | ƒ d  S(   Nu   indexR   i   R   (
   R   R<   R   R   R   RG   RH   RI   RM   R   (   RS   RX   t   write_kwargst   read_kwargsRV   R`   Rc   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_roundtripÚ  s    c      
   C` s£  t  ƒ  t |  ƒ } t j i d d d d d g d 6t d ƒ d 6ƒ } t j | d	 d ƒ} | j j d
 ƒ | d <| j	 | ƒ t j
 | d d g ƒ} t j t ƒ  | j j j Wd  QXt | j j ƒ  j j ƒ d d d h k sè t ‚ | j d „  ƒ j ƒ  } | j ƒ  d d d d g k s$t ‚ t | j | j d t ƒt j t ƒ # t j
 | d d g ƒj ƒ  } Wd  QXt j t ƒ  t j
 | d d g ƒ} Wd  QXd  S(   Ni   i   i   i   i   u   xu   caaabu   yR   u   categoryR¢   u   au   bu   cc         S` s   |  j  j j S(   N(   t   yR£   R¢   (   R”   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   <lambda>  t    t   check_namesu   foo(   R   R<   R=   R>   Rx   RG   RH   RË   RF   RI   RM   R
   Ru   R}   R£   R¢   R'   RO   RL   t   map_partitionsR¤   R   R   t	   TypeErrorR)   (   RS   Rb   RX   R`   Rc   t   cats_set(    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_categories  s$    0$$c         C` s®   t  |  ƒ } t j i t d ƒ d 6t d ƒ d 6ƒ } t j | d d ƒ} | | j d k } | j | d | ƒt j | d | ƒ} | j	 ƒ  } t
 | | d t d t ƒd  S(	   Ni
   u   au   bR   i   R   RÎ   Rz   (   R<   R=   R>   t   rangeRG   RH   RÇ   RI   RM   RO   R   R   (   RS   R   Rb   RX   R`   Rc   Rª   t   sol(    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_empty_partition  s    )c         C` sŠ   t  |  ƒ } t j ƒ  } d | j _ t j | d d ƒ} | j | d | ƒt j | d | d t	 | ƒ ƒ} t
 | | d t	 | ƒ ƒd  S(   Nu   fooR   i   R   R^   R_   (   R<   t   tmt   makeTimeDataFrameR   R   RG   RH   RI   RM   R   R   (   RS   R   Rb   RX   R`   Rc   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_timestamp_index(  s    !c         C` sœ   t  ƒ  t ƒ  t |  j d ƒ ƒ } t j i d t j d t j d g d 6ƒ } t j	 | d d ƒ} | j
 | ƒ t j | ƒ } | d j d k s˜ t ‚ d  S(   Nu   test.parquetg      ð?i   i   u   c1R   i   (   R   R   R<   Rn   R=   R>   R?   t   nanRG   RH   RI   RŠ   t
   read_tablet
   null_countRL   (   RS   Rb   RX   R`   t   table(    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt$   test_to_parquet_default_writes_nulls2  s    +t   xfail_pyarrow_pyarrowua   Race condition writing using pyarrow with partition_on. Fixed on master, but not on pyarrow 0.8.0c         C` s  t  |  ƒ }  t j i t j j d d d g d d ƒd 6t j j d d ƒ d 6t j j d d	 d d ƒd
 6ƒ } t j | d d ƒ} | j	 |  d d g d | ƒt j
 |  d | ƒj ƒ  } xU | j j ƒ  D]D } t | j | j | k ƒ t | j | j | k ƒ k sË t ‚ qË Wd  S(   Nu   Au   Bu   CR8   id   u   au   bi   i   u   cR   i   R­   R   (   R<   R=   R>   R?   RD   RE   t   randintRG   RH   RI   RM   RO   RÇ   t   uniqueR'   R¸   RL   (   RS   RT   RU   RX   RŽ   R[   R.   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_partition_on?  s    +"c      
   C` sÀ  t  |  ƒ } t j i d d d d d g d 6ƒ } t j | d d ƒ} | j d d d	 t ƒ j | d
 t d | ƒt j	 | d t d | d d g ƒj
 ƒ  } t | | ƒ | j d d d	 t ƒ j | d | ƒt j	 | d | ƒj
 ƒ  t | | ƒ | d k r]| j d d d	 t ƒ j | d | ƒt j | ƒ j d d g ƒ } t | ƒ d k s]t ‚ n  | j d d d	 t ƒ j | d | ƒt j	 | d | d d g ƒj
 ƒ  t | ƒ d k s¼t ‚ d  S(   Nu   abu   aau   bau   dau   bbu   atR   i   t   forceR:   R   R   t   filtersu   ==i   u   fastparqueti    (   u   atu   ==u   aa(   u   atu   ==u   aa(   u   atu   ==u   aa(   R<   R=   R>   RG   RH   t   repartitionR   RI   R   RM   RO   R   R‡   RÁ   t	   to_pandasR&   RL   (   RS   RT   RU   Rb   RX   R`   Rc   RZ   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_filtersP  s$    %$%%%"c         C` s  t  ƒ  t |  ƒ }  d } g  } x5 d d d d g D]! } | | g t | d ƒ 7} q2 Wt j i | d 6t j j d | ƒ d 6t j j d d	 d | ƒd 6ƒ } t j	 | d
 d ƒ} | j
 |  d d g d d ƒt j |  d d d d g ƒ} d } | j | k s	t ‚ d  S(   Nid   u   au   bu   cu   di   R8   i   i   R   R­   R   u   fastparquetRã   u   ==i   i1   (   u   au   ==u   b(   i   i1   (   R   R<   t   intR=   R>   R?   RD   Rß   RG   RH   RI   RM   RN   RL   (   RS   R8   t   categoricalsRº   RX   RŽ   R[   t   expected_divisions(    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt    test_divisions_read_with_filterso  s     "c         C` sû   t  ƒ  t |  ƒ }  t j i d d d d d d d d g d 6d d d d d d d d g d 6d	 d d d d d d d d g ƒ} t j | d
 d ƒ} | j |  d d g d d ƒt j |  d d d d g ƒ} | j sÜ t	 ‚ d } | j
 | k s÷ t	 ‚ d  S(   Ni    i   i   i   u   uniqueu   id1u   id2u   idR   R   R­   R   u   fastparquetRã   u   ==(   u   idu   ==u   id1(   i    i   i   (   R   R<   R=   R>   RG   RH   RI   RM   t   known_divisionsRL   RN   (   RS   RX   RŽ   R[   Ré   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt*   test_divisions_are_known_read_with_filters†  s     (!c         C` s¡  t  ƒ  t |  ƒ } t j i t j j d d d g d d ƒd 6t j j d d ƒ d 6t j j d d	 d d ƒd
 6ƒ } t j	 | d d ƒ} | j
 | d d g d d ƒt j | ƒ } t j | ƒ j ƒ  } xU | j j ƒ  D]D } t | j | j | k ƒ t | j | j | k ƒ k sÛ t ‚ qÛ Wt j | d d g ƒj ƒ  } t | j | j d k ƒ t | j ƒ k srt ‚ t j t ƒ  t j | d d ƒ} Wd  QXd  S(   Nu   Au   Bu   CR8   id   u   au   bi   i   u   cR   i   R­   R   u   fastparquetRã   u   ==u   pyarrow(   u   au   ==u   B(   R   R<   R=   R>   R?   RD   RE   Rß   RG   RH   RI   R‡   RÁ   RM   RO   RÇ   Rà   R'   R¸   RL   R
   Ru   (   RS   Rb   RX   RŽ   t   pq_fR[   R.   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt&   test_read_from_fastparquet_parquetfile  s     	""B1u	   scheduleru   threadsu	   processesc         C` sø   t  |  ƒ }  t j i d d d d g d 6d d d d	 g d
 6ƒ } d | j _ t j | d d ƒ} | j |  d t d | ƒ} t	 | d ƒ s’ t
 ‚ | j d | ƒ t j j |  ƒ sº t
 ‚ t j |  d | d t | ƒ ƒ} t | | d t | ƒ ƒd  S(   Ni   i   i   i   u   ag      ð?g       @g      @g      @u   bu   indexR   RO   R   u   daskR;   R^   R_   (   R<   R=   R>   R   R   RG   RH   RI   R   t   hasattrRL   RO   RJ   Rl   Rm   RM   R   R   (   RS   R;   R   RX   R`   Rº   Rc   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_to_parquet_lazy¹  s    !c         C` s·   t  ƒ  t |  ƒ } t j i d g d 6d d ƒ} t j | d ƒ } | j | d t d d ƒt j	 | ƒ } | j
 d j t j j j k s‘ t ‚ t j | ƒ j ƒ  } t | | ƒ d  S(	   Nu   nowu   aR7   u   M8[ns]i   R:   t   timesu   int96(   R   R<   R=   R>   RG   RH   RI   R   R‡   RÁ   t   _schemat   typet   parquet_thriftt   Typet   INT96RL   RM   RO   R   (   RS   Rb   RX   R`   RÂ   R[   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_timestamp96Ë  s    %c   	      ` s}  t  ƒ  t |  ƒ } d ‰  t j ‡  f d †  t d d d g ƒ Dƒ ƒ } t j ‡  f d †  t d d d g ƒ Dƒ ƒ } g  } xg d d g D]Y } t j j | | ƒ } t j j | ƒ sÅ t j	 | ƒ n  | j
 t j j | d	 ƒ ƒ qˆ Wt j | d
 | ƒ t j | d | ƒ t j | ƒ } d | j k s1t ‚ | j ƒ  } d | k sOt ‚ t j | j ƒ d d g k j ƒ  syt ‚ d  S(   Ni   c         ` s+   i  |  ]! \ } } t  j j ˆ  ƒ | “ q S(    (   R?   RD   (   R   t   iRš   (   t   N(    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pys
   <dictcomp>Û  s   	u   au   bu   cc         ` s+   i  |  ]! \ } } t  j j ˆ  ƒ | “ q S(    (   R?   RD   (   R   Rø   Rš   (   Rù   (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pys
   <dictcomp>Ý  s   	u
   test_data1u
   test_data2u
   data1.parqi    i   u   dir0(   R   R<   R=   R>   t	   enumerateRJ   Rl   Rn   Rm   t   mkdirR"   R‡   Rˆ   RG   RM   RQ   RL   RO   R?   Rà   t   dir0RR   (	   RS   Rb   R³   RZ   RY   RŽ   t   dnRX   R[   (    (   Rù   sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_drill_scheme×  s(     c      
   C` s/  t  ƒ  t |  ƒ } t j i t j t j j d d d d d d g d d ƒd	 d
 ƒd 6t j t t	 d d ƒ ƒ d	 d ƒd 6t j t t	 d d ƒ ƒ d	 d ƒd 6ƒ } t
 j | d ƒ } | j | ƒ t
 j | d d g ƒ} t | j ƒ d g k sû t ‚ t
 j | ƒ } t | j ƒ t | ƒ k s+t ‚ d  S(   Nu   au   bu   cu   du   eu   fR8   id   R7   u   categoryu
   categoriesi    u   intu   intsu   floatu   floatsi   RQ   (   R   R<   R=   R>   t   SeriesR?   RD   RE   Rx   RÓ   RG   RH   RI   RM   RQ   RL   (   RS   Rb   RX   R`   t   rddf(    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_parquet_select_catsð  s    	*
%+t   xfail_fastparquet_pyarrowu   fastparquet gh#251c         C` sö   | d k r t  j d ƒ n  | d k r\ d d  l } | j t d ƒ k  r\ t  j d ƒ q\ n  t j i d d g d	 6d
 t j d d g d d ƒƒ} d | j _	 t
 j | d ƒ } t |  ƒ } | j | d | ƒt
 j | d | ƒ} t | | ƒ d  S(   Nu   fastparquetu)   Fastparquet does not write column_indexesu   pyarrowi    u   0.8.0u*   pyarrow<0.8.0 did not write column_indexesi   i   u   AR   u   au   bR   u   idxu   colsR   (   R
   R   R‰   R   R   R=   R>   RÀ   RQ   R   RG   RH   R<   RI   RM   R   (   RS   RT   RU   R€   RX   R`   RV   t   result(    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_columns_name  s    7u   compression,u   defaultu   gzipu   snappyc   	      C` s?  t  |  ƒ } | d k r4 | d k r4 t j d ƒ n  t j i d d d g d d 6d	 d
 d g d d 6ƒ } t j | d d ƒ} | j | d | d | ƒ| d k rõ | d k rõ d d  l } | j	 | ƒ } | j
 d j d j j d	 k sõ t ‚ n  t j | d | d t | ƒ ƒ} t | | d | d k d t | ƒ ƒd  S(   Nu   fastparquetu   snappyu   defaultu   au   bu   ci
   u   xi   i   i   u   yR   t   compressionR   i    R^   Rz   R_   (   u   snappyu   default(   R<   R
   t   importorskipR=   R>   RG   RH   RI   R‡   RÁ   t
   row_groupsRQ   t	   meta_datat   codecRL   RM   R   R   (	   RS   R  R   Rb   RX   R`   R‡   RÂ   R[   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt%   test_writing_parquet_with_compression  s    )u   metadatau   idxu   nameu   int64u
   numpy_typeu   pandas_typeu   Au   columnsu   index_columnsu   0.21.0u   pandas_versionu
   field_nameu   UTF-8u   encodingu   objectu   unicodeu   column_indexesu   __index_level_0__c         C` s   |  j  S(   N(   R   (   R   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   pandas_metadata1  s    /c         C` sÆ   t  |  ƒ \ } } } } | d g k s- t ‚ | d g k sB t ‚ | d  g k sW t ‚ |  d d g k r | i d d 6d d 6k s­ t ‚ n  | i d d 6d d 6k s­ t ‚ t | t ƒ sÂ t ‚ d  S(   Nu   idxu   Au   index_columnsu   __index_level_0__(   R   RL   R   t
   isinstancet   dict(   R  t   index_namest   column_namest   mappingt   column_index_names(    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_parse_pandas_metadatac  s    # c    	      C` sí  d  g }  d g } i d  d 6d d 6} d  g } i i d  d 6d d 6d d 6d d 6i d  d 6d d 6d d 6d d 6g d 6d g d	 6d
 d 6} t | ƒ \ } } } } | |  k sµ t ‚ | | k sÇ t ‚ | | k sÙ t ‚ | | k së t ‚ i i d  d 6i d d 6d 6d  d 6d d 6d d 6g d 6i d d 6d  d 6d d 6d d 6d d 6i d d 6d  d 6d  d 6d d 6d d 6g d 6d g d	 6d
 d 6} t | ƒ \ } } } } | |  k s³t ‚ | | k sÅt ‚ | | k s×t ‚ | | k sét ‚ d  S(   Nu   xu   __index_level_0__u   metadatau   nameu   int64u
   numpy_typeu   pandas_typeu   columnsu   index_columnsu   0.21.0u   pandas_versionu
   field_nameu   UTF-8u   encodingu   objectu   unicodeu   column_indexes(   R   R   RL   (	   t   e_index_namest   e_column_namest	   e_mappingt   e_column_index_namest   mdR  R  R  R  (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt%   test_parse_pandas_metadata_null_indext  sR    			






c         C` sÄ   t  ƒ  t |  ƒ d } t j j t j d d d g ƒ t j d d d g ƒ g d d d	 g ƒ} t j | | ƒ t j	 | d
 | ƒ} t
 j i d d d g d 6d d d g d	 6ƒ } t | | ƒ d  S(   Nu
   table.parqi   i   i   i   i   Rƒ   u   Au   BR   (   R   R<   R€   RŒ   R…   t   arrayRŠ   R‹   RG   RM   R=   R>   R   (   RS   R   RV   RÜ   R  t   expected(    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_read_no_metadata¨  s    /c          C` s  i i d  d 6i d d 6d 6d  d 6d d 6d d	 6g d
 6i d d 6d  d 6d d 6d d 6d d	 6i d d 6d  d 6d d 6d d 6d d	 6g d 6d g d 6d d 6}  t |  ƒ \ } } } } | d g k sË t ‚ | d g k sà t ‚ | i d d 6d d 6k s t ‚ | d  g k st ‚ d  S(   Nu
   field_nameu   UTF-8u   encodingu   metadatau   nameu   objectu
   numpy_typeu   unicodeu   pandas_typeu   column_indexesu   Au   int64u   __index_level_0__u   columnsu   index_columnsu   0.21.0u   pandas_version(   R   R   RL   (   R  R  R  t   storage_name_mappingR  (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt2   test_parse_pandas_metadata_duplicate_index_columns·  s6    

 c          C` s  i i d  d 6i d d 6d 6d  d 6d d 6d d	 6g d
 6i d d 6d  d 6d d 6d d 6d d	 6i d d 6d  d 6d d 6d d 6d d	 6g d 6d g d 6d d 6}  t |  ƒ \ } } } } | d g k sË t ‚ | d g k sà t ‚ | i d d 6d d 6k s t ‚ | d  g k st ‚ d  S(   Nu
   field_nameu   UTF-8u   encodingu   metadatau   nameu   objectu
   numpy_typeu   unicodeu   pandas_typeu   column_indexesu   Au   int64u   __index_level_0__u   columnsu   index_columnsu   0.21.0u   pandas_version(   R   R   RL   (   R  R  R  R  R  (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt1   test_parse_pandas_metadata_column_with_index_nameÛ  s6    

 c   
      C` s  t  |  ƒ } t j j | d ƒ } t j j | d ƒ } t j d ƒ t j i t j	 j
 d d d g d d ƒd	 6t j	 j	 d d ƒ d
 6t j	 j d d d d ƒd 6ƒ } t j | d d ƒ} i i d d 6d  d 6t d 6d 6i d d 6d d 6d  d 6d 6} | j | d | | | t j | d | d t | ƒ ƒ} t | | d | d k d t | ƒ ƒt j j d d ƒ ( | j | d | d d	 g | | Wd  QXt j | d | ƒj ƒ  } xU | j j ƒ  D]D }	 t | j | j |	 k ƒ t | j | j |	 k ƒ k sÃt ‚ qÃWd  S(   Nu   normalu   partitionedu   snappyu   Au   Bu   CR8   id   u   au   bi   i   u   cR   i   u   compressionu   coerce_timestampsu   use_dictionaryu   pyarrowu   int64u   timesu
   fixed_textu   fastparquetR   R^   Rz   R_   R;   u   syncR­   (   R<   RJ   Rl   Rn   R
   R  R=   R>   R?   RD   RE   Rß   RG   RH   R   R   RI   RM   R   R   R—   R›   R'   RO   RÇ   Rà   R¸   RL   (
   RS   R   Rb   t   path1t   path2RX   R`   t   engine_kwargsR[   R.   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt    test_writing_parquet_with_kwargsÿ  s4    +"!%c      
   C` s?   t  |  ƒ } t j t ƒ  t j | d | d d ƒWd  QXd  S(   NR   t   unknown_keyu   unknown_value(   R<   R
   Ru   RÐ   R`   RI   (   RS   R   Rb   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt(   test_writing_parquet_with_unknown_kwargs$  s    c         C` sE  t  j d ƒ | d k rM d d  l } | j t d ƒ k  rM t  j d ƒ qM n  t |  ƒ } d } i t j j	 d d d | ƒj
 ƒ  d	 d
 6t j j d d d g d | ƒd 6t j j d d d g d | ƒd 6} t j t j | ƒ d ƒ } | j | d d d t d | d d d g ƒt j | d | ƒ} | | j d k j ƒ  d  S(   Nu   snappyu   pyarrowi    u   0.9.0u"   pyarrow<0.9.0 did not support thisi   g333333Ó?R8   i2   u   signal1u   Au   Bu   Cu   fake_categorical1u   Du   Eu   Fu   fake_categorical2i   R  R:   R   R­   (   R
   R  R‰   R   R   R   R<   R?   RD   t   normalt   cumsumRE   RG   RH   R=   R>   RI   R   RM   t   fake_categorical1RO   (   RS   R   R€   Rb   R8   RŽ   RX   t   df_partitioned(    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_select_partitioned_column+  s    )"%c         C` sé   | d k r1 t  j t d ƒ k  r1 t j d ƒ n  | d k rb t j t d ƒ k  rb t j d ƒ n  t |  ƒ } t j d g g d d	 g d
 d ƒ} t	 j
 | d ƒ } | j | d | ƒt	 j | d | ƒ} t | | d t d t ƒd  S(   Nu   pyarrowu   0.11.0u#   pyarrow<0.11.0 did not support thisu   fastparquetu   0.3.0u&   fastparquet<0.3.0 did not support thisi    RQ   u   aR7   u   datetime64[ns, UTC]i   R   R_   Rz   (   R€   R   R   R
   R   R‡   R<   R=   R>   RG   RH   RI   RM   R   R   (   RS   R   Rb   RX   RZ   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_with_tz?  s    !$c         C` s  t  ƒ  t |  ƒ } i t j t j d ƒ d ƒ j t j ƒ d 6t j d d ƒ j t j ƒ d 6t j d d ƒ j t j ƒ d 6t j d	 d ƒ j t j	 ƒ d
 6} t
 j | ƒ } t j | d d ƒ} | j | d d d d ƒt j | d d ƒ} | j i t j d 6ƒ j ƒ  d  S(   Ni   i   u   piÿÿÿÿi   u   biþÿÿÿu   ciýÿÿÿu   dR   R   u   pyarrowR­   (   R   R<   R?   t   repeatR@   RF   t   int8t   int16t   float32RC   R=   R>   RG   RH   RI   RM   RO   (   RS   Rl   RW   t   pdfR`   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_arrow_partitioningM  s    ("c          C` s€   t  j t ƒ  }  t j d d d ƒWd  QXd t |  j ƒ k sF t ‚ d t |  j ƒ k sa t ‚ d t |  j ƒ k s| t ‚ d  S(   Nu   fooR   u   arrowu   fastparquet(   R
   Ru   R)   RG   RM   R<   Rº   RL   (   t   info(    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_informative_error_messages`  s
    c         C` sØ   t  ƒ  t |  ƒ } t j i d d d d d g d 6ƒ } | d j d ƒ | d <t j | d d ƒ} t j | | ƒ t j | | d t d t ƒt j	 | ƒ j
 ƒ  } | d j ƒ  d d d d d g d	 k sÔ t ‚ d  S(
   Nu   au   bu   xu   categoryR   i   R"   R®   i   (   R   R<   R=   R>   RF   RG   RH   RI   R   RM   RO   R¤   RL   (   RS   Rl   RX   R`   RŽ   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_append_cat_fpi  s    %c         C` s“   d d  l  } t ƒ  t |  ƒ } t j i d d d d g d 6ƒ } t j | d d ƒ} t j | | ƒ t j	 | ƒ } | j
 | ƒ t j | ƒ d  S(   Ni    i   i   i   i   u   xR   (   t   shutilR   R<   R=   R>   RG   RH   RI   R‡   RÁ   t   rmtreeRM   (   RS   R4  Rl   RX   R`   RÂ   (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   test_passing_parquetfiley  s    "(‚   t
   __future__R    R   R   R   RJ   t   distutils.versionR   t   numpyR?   t   pandasR=   t   pandas.util.testingt   utilt   testingRÖ   R
   R—   t   dask.multiprocessingt   dask.dataframet	   dataframeRG   t   dask.dataframe.utilsR   t   dask.dataframe.io.parquetR   t
   dask.utilsR   R‡   t   ImportErrorR   R‰   R€   R   R   t   pyarrow.parquett   parquetRŠ   R	   R   R!   t   skipift   FASTPARQUET_MARKR   R   R   t   PYARROW_MARKR   R   R   t   nrowsR   R>   RÓ   Rø   RÀ   RX   RH   R`   t   fixtureR   R   R5   t   pyarrow_fastparquet_msgt   write_read_engines_xfailR]   Rf   R+   Rk   Rp   Rs   Rt   Ry   R{   R   R‚   R’   R“   R•   R˜   t   xfailR    Rï   R¡   R¦   R«   R¬   R¶   R¹   R¼   R½   R¿   RÃ   RÆ   t   CategoricalRx   t   mapt	   TimestampRF   R   RÊ   RÒ   RÕ   RØ   RÝ   Rá   Ræ   Rê   Rì   Rî   Rð   R÷   Rþ   R  R  R
  R  R  R  R  R  R  R"  R$  R)  R*  R0  R2  R3  R6  (    (    (    sC   lib/python2.7/site-packages/dask/dataframe/io/tests/test_parquet.pyt   <module>   sb  "


				1'5	$	%0	@
	
	6									 %,,,%87.4	....4C(%%%%7			
				$			*	






		4		$	$	%							