ó
¦–Õ\c        	   @` sé   d  d l  m Z m Z m Z d  d l Z d  d l Z d  d l m Z d d l m	 Z	 d d l
 m Z m Z d d l m Z d d l m Z m Z d	 d
 l m Z e d e ƒ Z d d e d e j e d d d „ Z d „  Z d „  Z d S(   i    (   t   print_functiont   divisiont   absolute_importN(   t   concati   (   t   unicode(   t   system_encodingt   parse_bytes(   t   delayed(   t
   open_filest
   read_bytesi   (   t   from_delayedt   puret   infert   strictc	         C` sÉ  | d k	 r' | d k	 r' t d ƒ ‚ n  t | t t f ƒ rK t | ƒ } n  t |  d d d | d | d | | pr i  }	 | d k r/| d k rÄ g  |	 D]$ }
 t t ƒ t t	 ƒ |
 ƒ ƒ ^ q— } q™g  } xÌ t
 d t |	 ƒ | ƒ D]E } |	 | | | !} t t ƒ t t ƒ t	 | ƒ ƒ } | j | ƒ qã Wnj t |  d | j ƒ  d	 | d
 t d | | p\i  \ } } g  t | ƒ D] } t t ƒ | | | ƒ ^ qu} | s±t d |  ƒ ‚ n  | s»| St | ƒ Sd S(   sª   Read lines from text files

    Parameters
    ----------
    urlpath : string or list
        Absolute or relative filepath(s). Prefix with a protocol like ``s3://``
        to read from alternative filesystems. To read from multiple files you
        can pass a globstring or a list of paths, with the caveat that they
        must all have the same protocol.
    blocksize: None, int, or str
        Size (in bytes) to cut up larger files.  Streams by default.
        Can be ``None`` for streaming, an integer number of bytes, or a string
        like "128MiB"
    compression: string
        Compression format like 'gzip' or 'xz'.  Defaults to 'infer'
    encoding: string
    errors: string
    linedelimiter: string
    collection: bool, optional
        Return dask.bag if True, or list of delayed values if false
    storage_options: dict
        Extra options that make sense to a particular storage connection, e.g.
        host, port, username, password, etc.
    files_per_partition: None or int
        If set, group input files into partitions of the requested size,
        instead of one partition per file. Mutually exclusive with blocksize.

    Examples
    --------
    >>> b = read_text('myfiles.1.txt')  # doctest: +SKIP
    >>> b = read_text('myfiles.*.txt')  # doctest: +SKIP
    >>> b = read_text('myfiles.*.txt.gz')  # doctest: +SKIP
    >>> b = read_text('s3://bucket/myfiles.*.txt')  # doctest: +SKIP
    >>> b = read_text('s3://key:secret@bucket/myfiles.*.txt')  # doctest: +SKIP
    >>> b = read_text('hdfs://namenode.example.com/myfiles.*.txt')  # doctest: +SKIP

    Parallelize a large file by providing the number of uncompressed bytes to
    load into each partition.

    >>> b = read_text('largefile.txt', blocksize='10MB')  # doctest: +SKIP

    Returns
    -------
    dask.bag.Bag if collection is True or list of Delayed lists otherwise

    See Also
    --------
    from_sequence: Build bag from Python sequence
    s7   Only one of blocksize or files_per_partition can be sett   modet   rtt   encodingt   errorst   compressioni    t	   delimitert	   blocksizet   samples   No files foundN(   t   Nonet
   ValueErrort
   isinstancet   strR   R   R   R   t   listt   file_to_blockst   ranget   lenR   t   mapt   appendR	   t   encodet   Falset   decodeR
   (   t   urlpathR   R   R   R   t   linedelimitert
   collectiont   storage_optionst   files_per_partitiont   filest   filt   blockst   startt   block_filest   block_linest   _t   b(    (    s,   lib/python2.7/site-packages/dask/bag/text.pyt	   read_text   s2    5	4!1c         c` s)   |   } x | D] } | Vq WWd  QXd  S(   N(    (   t	   lazy_filet   ft   line(    (    s,   lib/python2.7/site-packages/dask/bag/text.pyR   g   s    	c         C` s+   |  j  | | ƒ } t j | ƒ } t | ƒ S(   N(   R"   t   iot   StringIOR   (   t   blockR   R   t   textt   lines(    (    s,   lib/python2.7/site-packages/dask/bag/text.pyR"   m   s    (   t
   __future__R    R   R   R4   t   ost   toolzR   t   compatibilityR   t   utilsR   R   R   t   bytesR   R	   t   coreR
   t   TrueR   t   linesepR0   R   R"   (    (    (    s,   lib/python2.7/site-packages/dask/bag/text.pyt   <module>   s   	S	