ó
¦–Õ\c           @` s§   d  d l  m Z m Z m Z d  d l m Z d d l m Z m Z d d l	 m
 Z
 d d l m Z d d	 l m Z d d
 l m Z d Z d d „ Z d d d „ Z d S(   i    (   t   absolute_importt   divisiont   print_function(   t   LooseVersioni   (   t   _get_pyarrow_dtypest   _meta_from_dtypesi   (   t	   DataFramei   (   t   tokenize(   t   get_fs_token_paths(   t   import_requiredt   read_orcc   	      C` sŠ   t  d d ƒ } d d l } |  j | d ƒ ( } | j | ƒ } | j | | ƒ } Wd QX| j t d ƒ k  rv | j ƒ  S| j d t ƒ Sd S(   s5   Pull out specific data from specific part of ORC files   pyarrow.orcs   Please install pyarrow >= 0.9.0i    Nt   rbs   0.11.0t   date_as_object(	   R	   t   pyarrowt   opent   ORCFilet   read_stripet   __version__R   t	   to_pandast   False(	   t   fst   patht   stripet   columnst   orct   pat   ft   ot   table(    (    s4   lib/python2.7/site-packages/dask/dataframe/io/orc.pyt   _read_orc_stripe   s    
c         C` s  t  d d ƒ } d d l } t | j ƒ d k r? t d ƒ ‚ n  | pH i  } t |  d d d	 | ƒ\ } } } d } g  }	 x~ | D]v }  | j |  d ƒ \ }
 | j |
 ƒ } | d k rÁ | j	 } n | | j	 k rß t
 d
 ƒ ‚ n  |	 j | j ƒ Wd QXq Wt | d d ƒ} | d k	 rUt | ƒ t | ƒ } | rat
 d | t | ƒ f ƒ ‚ qan t | ƒ } t | | g  g  ƒ } d t | |  | ƒ } i  } d } x] t | |	 ƒ D]L \ }  } x= t | ƒ D]/ } t | |  | | f | | | f <| d 7} qÁWq¨Wt | | | d g t | ƒ d ƒ S(   s…  Read dataframe from ORC file(s)

    Parameters
    ----------
    path: str or list(str)
        Location of file(s), which can be a full URL with protocol specifier,
        and may include glob character if a single string.
    columns: None or list(str)
        Columns to load. If None, loads all.
    storage_options: None or dict
        Further parameters to pass to the bytes backend.

    Returns
    -------
    Dask.DataFrame (even if there is only one column)

    Examples
    --------
    >>> df = dd.read_orc('https://github.com/apache/orc/raw/'
    ...                  'master/examples/demo-11-zlib.orc')  # doctest: +SKIP
    s   pyarrow.orcs   Please install pyarrow >= 0.9.0i    Ns   0.10.0sõ   Due to a bug in pyarrow 0.10.0, the ORC reader is unavailable. Please either downgrade pyarrow to 0.9.0, or use the pyarrow master branch (in which this issue is fixed).

For more information see: https://issues.apache.org/jira/browse/ARROW-3009t   modeR   t   storage_optionss,   Incompatible schemas while parsing ORC filest
   categoriess)   Requested columns (%s) not in schema (%s)s	   read-orc-i   (   R	   R   R   R   t   RuntimeErrorR   t   NoneR   R   t   schemat
   ValueErrort   appendt   nstripesR   t   sett   listR   R   t   zipt   rangeR   R   t   len(   R   R   R   R   R   R   t   fs_tokent   pathsR#   t   nstripes_per_fileR   R   t   ext   metat   namet   dskt   Nt   nR   (    (    s4   lib/python2.7/site-packages/dask/dataframe/io/orc.pyR
      sB    N(   R
   (   t
   __future__R    R   R   t   distutils.versionR   t   utilsR   R   t   coreR   t   baseR   t
   bytes.coreR   R	   t   __all__R"   R   R
   (    (    (    s4   lib/python2.7/site-packages/dask/dataframe/io/orc.pyt   <module>   s   