ó
¦–Õ\c        
   @` sÿ  d  d l  m Z m Z m Z d  d l Z d  d l Z d  d l m Z d  d l Z d  d l	 m
 Z
 d  d l Z d  d l m Z m Z m Z d  d l m Z m Z y6 d  d l Z d  d l m Z d  d l m Z m Z Wn e k
 rí d Z n Xy d  d l Z Wn e k
 rd Z n Xy5 d  d l Z d  d	 l m Z e e j ƒ e k Z Wn e k
 rle  Z d Z n Xe j! j" d
 d ƒ sše j# j$ d d ƒ Z% n  d Z& e j' d e j( d d e j# j) e d d ƒƒe j( d d e j# j) e d d ƒƒg ƒ d „  ƒ Z* e j# j) e d d ƒZ+ e j# j) e d d ƒZ, e+ e, d „  ƒ ƒ Z- d „  Z. d „  Z/ d „  Z0 d „  Z1 d „  Z2 d „  Z3 e j# j) e d d ƒd  „  ƒ Z4 d! „  Z5 e+ e, d" „  ƒ ƒ Z6 e+ d# „  ƒ Z7 d$ „  Z8 e j# j) e d d% ƒd& „  ƒ Z9 d S('   i    (   t   print_functiont   divisiont   absolute_importN(   t   LooseVersion(   t   concat(   t
   read_bytest
   open_filest   get_fs(   t   unicodet   PY2(   t   Client(   t   clustert   loop(   t   _MIN_PYARROW_VERSION_SUPPORTEDt   DASK_RUN_HDFS_TESTSt    t   reasons    HDFS tests not configured to runs   /tmp/test-daskt   paramst   hdfs3t   markss   hdfs3 not foundt   pyarrows"   required pyarrow version not foundc         c` sÄ   |  j  d k r* t j d d d d ƒ } n t j j d d d d ƒ } | j t ƒ rj | j t d t	 ƒn  | j
 t ƒ t j j d |  j  ƒ 
 | VWd  QX| j t ƒ rÀ | j t d t	 ƒn  d  S(   NR   t   hostt	   localhostt   portiT  t	   recursivet   hdfs_driver(   t   paramR   t   HDFileSystemR   t   hdfst   connectt   existst   basedirt   rmt   Truet   mkdirt   daskt   configt   set(   t   requestR   (    (    s9   lib/python2.7/site-packages/dask/bytes/tests/test_hdfs.pyR   ,   s    s   pyarrow not installeds   hdfs3 not installedc          C` sÜ   d d l  m }  d d l m } t d ƒ \ } } t | | ƒ sG t ‚ t j j	 d d ƒ  t d ƒ \ } } Wd  QXt | |  ƒ sŠ t ‚ | | k sœ t ‚ t
 j t ƒ + t j j	 d d ƒ  t d ƒ Wd  QXWd  QXd  S(   Ni    (   t   HDFS3HadoopFileSystem(   t   PyArrowHadoopFileSystemR   R   R   s   not-a-valid-driver(   t   dask.bytes.hdfs3R'   t   dask.bytes.pyarrowR(   R   t
   isinstancet   AssertionErrorR#   R$   R%   t   pytestt   raisest
   ValueError(   R'   R(   t   fs1t   token1t   fs2t   token2(    (    s9   lib/python2.7/site-packages/dask/bytes/tests/test_hdfs.pyt   test_fs_driver_backendsF   s    c   
      C` sÙ   d } d t  d ƒ } x_ g  t | ƒ D] } d t | f ^ q& D]4 } |  j | d d d ƒ } | j | ƒ Wd  QXq@ Wt d t ƒ \ } } t j | ƒ \ } g  | D] }	 d	 j |	 ƒ ^ q§ | | g k sÕ t	 ‚ d  S(
   Ni
   t   ag     @@s
   %s/file.%dt   wbt   replicationi   s   hdfs://%s/file.*R   (
   t   intt   rangeR   t   opent   writeR   R#   t   computet   joinR,   (
   R   t   nfilest   datat   it   fnt   ft   samplet   valuest   resultst   r(    (    s9   lib/python2.7/site-packages/dask/bytes/tests/test_hdfs.pyt   test_read_bytesZ   s    0c         C` sß   d } d t  d ƒ } x_ g  t | ƒ D] } d t | f ^ q& D]4 } |  j | d d d ƒ } | j | ƒ Wd  QXq@ Wd t } t | ƒ \ } } t j | ƒ \ }	 g  |	 D] }
 d	 j |
 ƒ ^ q­ | | g k sÛ t	 ‚ d  S(
   Ni
   R5   g     @@s
   %s/file.%dR6   R7   i   s   hdfs://localhost:8020%s/file.*R   (
   R8   R9   R   R:   R;   R   R#   R<   R=   R,   (   R   R>   R?   R@   RA   RB   t   pathRC   RD   RE   RF   (    (    s9   lib/python2.7/site-packages/dask/bytes/tests/test_hdfs.pyt   test_read_bytes_URLi   s    0
c         C` s3  d t  } t d ƒ } t d ƒ } d | } |  j | d d d ƒ+ } x! t | ƒ D] } | j | ƒ qT WWd  QXt d | d	 | ƒ\ } } | d
  d k s£ t ‚ t | d ƒ | k s¿ t ‚ t j	 | d ƒ \ }	 t
 t t |	 ƒ ƒ | | k sú t ‚ x2 |	 D]* }
 t |
 j d ƒ ƒ d h k st ‚ qWd  S(   Ns   %s/fileg     @@g     jø@R5   R6   R7   i   s   hdfs://t	   blocksizei   t   aaaaai    s   utf-8(   R   R8   R:   R9   R;   R   R,   t   lenR#   R<   t   sumt   mapR%   t   decode(   R   RA   t   nblocksRJ   R?   RB   R@   RC   RD   RE   RF   (    (    s9   lib/python2.7/site-packages/dask/bytes/tests/test_hdfs.pyt   test_read_bytes_big_filex   s    

%c   	      C` sF  d t  d ƒ } d t } |  j | d d d ƒ } | j | ƒ Wd  QXt d t d d	 d
 t ƒ\ } } t d t d d	 d
 t ƒ\ } } t d t d d d
 t ƒ\ } } g  t | ƒ D] } | j ^ q» g  t | ƒ D] } | j ^ qÚ k sø t ‚ g  t | ƒ D] } | j ^ qg  t | ƒ D] } | j ^ q$k sBt ‚ d  S(   Ns   abc
g     @@s   %s/fileR6   R7   i   s   hdfs://%s/*t	   delimiters   
RC   t   c(	   R8   R   R:   R;   R   t   FalseR   t   keyR,   (	   R   R?   RA   t   filt   _t   xt   yt   zRB   (    (    s9   lib/python2.7/site-packages/dask/bytes/tests/test_hdfs.pyt   test_deterministic_key_namesŽ   s    
"""Jc         C` sÑ   d t  } g  t d ƒ D] } d | ^ q } t | d t | ƒ d d ƒ} x9 t | | ƒ D]( \ } } |  } | j | ƒ Wd  QXq[ Wt d t  ƒ \ } }	 t j t	 t
 |	 ƒ ƒ ƒ \ }
 | |
 k sÍ t ‚ d  S(   Ns
   hdfs://%s/i   s   test data %it   numt   modeR6   s   hdfs://%s/*.part(   R   R9   R   RL   t   zipR;   R   R#   R<   t   listR   R,   (   R   RH   R@   R?   t   filesRV   t   bRB   RC   t   valsRE   (    (    s9   lib/python2.7/site-packages/dask/bytes/tests/test_hdfs.pyt   test_open_files_write   s    
#	c         C` s·   t  j d ƒ } |  j d t d ƒ  } | j d ƒ Wd  QX|  j d t d ƒ  } | j d ƒ Wd  QX| j d t ƒ } t | | j ƒ s’ t ‚ | j	 j
 ƒ  j ƒ  d k s³ t ‚ d  S(   Ns   dask.dataframes   %s/1.csvR6   s$   name,amount,id
Alice,100,1
Bob,200,2s   %s/2.csvs)   name,amount,id
Charlie,300,3
Dennis,400,4s   hdfs://%s/*.csvi   i   i   i   i   i   i
   (   R-   t   importorskipR:   R   R;   t   read_csvR+   t	   DataFrameR,   t   idRM   R<   (   R   t   ddRB   t   df(    (    s9   lib/python2.7/site-packages/dask/bytes/tests/test_hdfs.pyt   test_read_csv¬   s    sM   pyarrow's hdfs isn't fork-safe, requires multiprocessing `spawn` start methodc         C` s¯  t  j d ƒ } d d  l } | j d ƒ j d ƒ } |  j d t d ƒ  } | j d j ƒ  ƒ Wd  QX|  j d t d ƒ  } | j d	 j ƒ  ƒ Wd  QX|  j d
 t d ƒ  } | j d j ƒ  ƒ Wd  QX| j	 d t ƒ } t
 j j d | ƒ , | j j ƒ  j j ƒ  j t ƒ j ƒ  } Wd  QX| d d d d d d g k sCt ‚ | j	 d t ƒ } t
 j j d | ƒ   | j j ƒ  j ƒ  j ƒ  } Wd  QX| d d d d g k s«t ‚ d  S(   Ns   dask.bagi    t   spawni   s   %s/text.1.txtR6   s   Alice 100
Bob 200
Charlie 300s   %s/text.2.txts   Dan 400
Edith 500
Frank 600s   %s/other.txts   a b
c ds   hdfs://%s/text.*.txtt   pools   hdfs://%s/other.txtR5   Ra   RS   t   d(   R-   Rd   t   multiprocessingt   get_contextt   PoolR:   R   R;   t   encodet	   read_textR#   R$   R%   t   strt   stript   splitRN   RL   R<   R,   t   flatten(   R   t   dbt   mpRl   RB   Ra   t   result(    (    s9   lib/python2.7/site-packages/dask/bytes/tests/test_hdfs.pyt   test_read_text»   s"    -$!c         C` sñ   t  j d ƒ } d } d t } |  j | d ƒ # } | j d j | | g ƒ ƒ Wd  QX| j d | d t ƒ} | d j ƒ  } t	 | ƒ d	 k s— t
 ‚ t t t j | ƒ ƒ | j d
 ƒ g d	 k sË t
 ‚ t	 | d j ƒ  ƒ d k sí t
 ‚ d  S(   Ns   dask.bags   abcdÃ©s   %s/data.txtR6   s   
s   hdfs://t
   collectioni    i   s   utf-8i   (   R-   Rd   R   R:   R;   R=   Rr   RT   R<   RL   R,   R_   RN   R   Rt   RO   (   R   Rw   R?   RA   RB   Ry   (    (    s9   lib/python2.7/site-packages/dask/bytes/tests/test_hdfs.pyt   test_read_text_unicodeØ   s    
"4c          C` sD   d d l  m }  |  ƒ  } | j ƒ  } t | t j j ƒ s@ t ‚ d  S(   Ni    (   R'   (   R)   R'   t   _get_pyarrow_filesystemR+   R   t
   filesystemt
   FileSystemR,   (   R'   t   dhdfst   pa_hdfs(    (    s9   lib/python2.7/site-packages/dask/bytes/tests/test_hdfs.pyt   test_pyarrow_compatè   s    	c   	      C` sþ   t  t j ƒ d k r% t j d ƒ n  t j d ƒ } d d  l } d d  l } d t } d | } | j	 | j
 j d d ƒ d
 t d ƒ ƒ} | j | d d	 ƒ} | j | d d ƒt |  j | ƒ ƒ sÍ t ‚ | j | d d ƒ} t | ƒ d k sú t ‚ d  S(   Ns   0.13.0s(   pyarrow 0.13.0 not supported for parquets   dask.dataframei    s   %s/test.parquets	   hdfs://%st   sizeiè  i   t   columnst   abcdt   npartitionst   engineR   (   iè  i   (   R   R   t   __version__R-   t   skipRd   t   pandast   numpyR   Rf   t   randomt   normalR_   t   from_pandast
   to_parquetRL   t   lsR,   t   read_parquet(	   R   Rh   t   pdt   npRA   t   hdfs_fnRi   t   ddft   ddf2(    (    s9   lib/python2.7/site-packages/dask/bytes/tests/test_hdfs.pyt   test_parquet_pyarrowñ   s    

c      	   C` sk  t  |  ƒ j j d ƒ r: d d l m } | j |  ƒ }  n d d l m } | j |  ƒ }  i d d g d d d	 d
 d g f t	 6d g d d g f t	 d 6d g d d g f t	 d 6g  d g f t	 d 6} |  j
 t	 d ƒ |  j
 t	 d ƒ xI d „  | j ƒ  Dƒ D]1 } |  j | d d ƒ } | j d ƒ Wd  QXqWt |  j t	 d ƒ ƒ d „  d d d d g Dƒ k srt ‚ t |  j t	 d ƒ ƒ d  „  d! d" d g Dƒ k sªt ‚ t |  j t	 d# ƒ ƒ d$ „  d! d" d% d& g Dƒ k såt ‚ t |  j t	 d' ƒ ƒ d( „  d! d% g Dƒ k st ‚ |  j t	 d ƒ t	 d g k s@t ‚ |  j t	 d) ƒ t	 d) g k sft ‚ |  j t	 d ƒ t	 d g k sŒt ‚ |  j d* ƒ g  k s§t ‚ |  j t	 d+ ƒ g  k sÆt ‚ |  j t	 d, ƒ g  k såt ‚ |  j t	 d- ƒ g  k st ‚ |  j t	 d. ƒ g  k s#t ‚ t |  j t	 d/ ƒ ƒ d0 „  d d d d d1 d d g Dƒ k sgt ‚ d  S(2   NR   i    (   R'   (   R(   RS   t   c2R5   t   a1t   a2t   a3t   b1Rm   t   x1t   x2s   /cs   /c2t   x3s   /c/ds   /c/d/s   /c2/d/c         s` s:   |  ]0 \ } \ } } | D] } t  j | | ƒ Vq q d  S(   N(   t	   posixpathR=   (   t   .0t   dirnameRW   t   filsRB   (    (    s9   lib/python2.7/site-packages/dask/bytes/tests/test_hdfs.pys	   <genexpr>  s   R]   R6   t   000s   /a*c         S` s   h  |  ] } t  | ’ q S(    (   R   (   R¡   t   p(    (    s9   lib/python2.7/site-packages/dask/bytes/tests/test_hdfs.pys	   <setcomp>  s   	 s   /as   /a1s   /a2s   /a3s   /c/*c         S` s   h  |  ] } t  | ’ q S(    (   R   (   R¡   R¥   (    (    s9   lib/python2.7/site-packages/dask/bytes/tests/test_hdfs.pys	   <setcomp>!  s   	 s   /c/x1s   /c/x2s   /*/x*c         S` s   h  |  ] } t  | ’ q S(    (   R   (   R¡   R¥   (    (    s9   lib/python2.7/site-packages/dask/bytes/tests/test_hdfs.pys	   <setcomp>$  s   	 s   /c2/x1s   /c2/x2s   /*/x1c         S` s   h  |  ] } t  | ’ q S(    (   R   (   R¡   R¥   (    (    s9   lib/python2.7/site-packages/dask/bytes/tests/test_hdfs.pys	   <setcomp>&  s   	 s   /c/s   /this-path-doesnt-exists	   /missing/s   /missing/x1s
   /missing/*s
   /*/missings   /*c         S` s   h  |  ] } t  | ’ q S(    (   R   (   R¡   R¥   (    (    s9   lib/python2.7/site-packages/dask/bytes/tests/test_hdfs.pys	   <setcomp>3  s   	 s   /b1(   t   typet
   __module__t
   startswithR)   R'   t
   from_hdfs3R*   R(   t   from_pyarrowR   t   mkdirst   itemsR:   R;   R%   t   globR,   (   R   R'   R(   t   treeRA   t   f2(    (    s9   lib/python2.7/site-packages/dask/bytes/tests/test_hdfs.pyt	   test_glob  sB    %	%"%&&&s(   Skipped as distributed is not installed.c         C` sÚ   t  j d ƒ } |  j d t d ƒ  } | j d ƒ Wd  QX|  j d t d ƒ  } | j d ƒ Wd  QXt ƒ  d \ } \ } } t | d d | ƒ9 | j d	 t ƒ } | j j	 ƒ  j
 ƒ  d k sÊ t ‚ Wd  QXWd  QXd  S(   Ns   dask.dataframes   %s/1.csvR6   s$   name,amount,id
Alice,100,1
Bob,200,2s   %s/2.csvs)   name,amount,id
Charlie,300,3
Dennis,400,4t   addressR   s   hdfs://%s/*.csvi   i   i   i   i   i   i
   (   R-   Rd   R:   R   R;   R   R
   Re   Rg   RM   R<   R,   (   R   R   Rh   RB   t   sR5   Ra   Ri   (    (    s9   lib/python2.7/site-packages/dask/bytes/tests/test_hdfs.pyt   test_distributed6  s    (:   t
   __future__R    R   R   t   osR    t   distutils.versionR   R-   t   toolzR   R#   t   dask.bytes.coreR   R   R   t   dask.compatibilityR   R	   t   distributedR
   t   distributed.utils_testR   R   t   ImportErrort   NoneR   R   R*   R   Rˆ   t   PYARROW_DRIVERRT   t   environt   gett   markR‰   t
   pytestmarkR   t   fixtureR   t   skipifR   t   require_pyarrowt   require_hdfs3R4   RG   RI   RQ   R[   Rc   Rj   Rz   R|   R‚   R—   R°   R³   (    (    (    s9   lib/python2.7/site-packages/dask/bytes/tests/test_hdfs.pyt   <module>   sd   


	%						!		.