ó
¦Õ\c           @@  sŹ   d  d l  m Z d  d l Z d  d l Z d  d l m Z m Z d  d l Z d d l	 m
 Z
 m Z d d d e d d d d	  Z d
   Z e
 d d d d d d d d d d 	  Z d d  Z d   Z d S(   i    (   t   absolute_importN(   t
   open_filest
   read_bytesi   (   t   insert_meta_param_descriptiont	   make_metat   recordss   utf-8t   strictc	         K@  s  | d k r | d k } n  | d k r< | r< t d   n  | |	 d <| oU | d k |	 d <t | d d | d | d |	 j d d  d	 |  j d
 | | p i  }
 g  t |
 |  j    D]' \ } } t j t	  | | |	  ^ q· } | rt j
 |  g  |
 D] } | j ^ qž S| Sd S(   s  Write dataframe into JSON text files

    This utilises ``pandas.DataFrame.to_json()``, and most parameters are
    passed through - see its docstring.

    Differences: orient is 'records' by default, with lines=True; this
    produces the kind of JSON output that is most common in big-data
    applications, and which can be chunked when reading (see ``read_json()``).

    Parameters
    ----------
    df: dask.DataFrame
        Data to save
    url_path: str, list of str
        Location to write to. If a string, and there are more than one
        partitions in df, should include a glob character to expand into a
        set of file names, or provide a ``name_function=`` parameter.
        Supports protocol specifications such as ``"s3://"``.
    encoding, errors:
        The text encoding to implement, e.g., "utf-8" and how to respond
        to errors in the conversion (see ``str.encode()``).
    orient, lines, kwargs
        passed to pandas; if not specified, lines=True when orient='records',
        False otherwise.
    storage_options: dict
        Passed to backend file-system implementation
    compute: bool
        If true, immediately executes. If False, returns a set of delayed
        objects, which can be computed at a later time.
    encoding, errors:
        Text conversion, ``see str.encode()``
    compression : string or None
        String like 'gzip' or 'xz'.
    R   s;   Line-delimited JSON is only available withorient="records".t   orientt   linest   wtt   encodingt   errorst   name_functiont   numt   compressionN(   t   Nonet
   ValueErrorR   t   popt   npartitionst   zipt
   to_delayedt   daskt   delayedt   write_json_partitiont   computet   path(   t   dft   url_pathR   R   t   storage_optionsR   R
   R   R   t   kwargst   outfilest   outfilet   dt   partst   f(    (    s5   lib/python2.7/site-packages/dask/dataframe/io/json.pyt   to_json
   s&    %
	@c         C@  s#   |  } |  j  | |  Wd  QXd  S(   N(   R#   (   R   t   openfileR   R"   (    (    s5   lib/python2.7/site-packages/dask/dataframe/io/json.pyR   G   s    	i   t   inferc
      
   K@  s­  d d l  j } | d k r* | d k } n  | d k rK | rK t d   n  | rs | d k sd | rs t d   n  | p| i  } | rAt |  d d | d | d	 | | \ } } t t j j |   } |	 d k rė t	 | | | |
  }	 n  t
 |	  }	 g  | D]* } t j t	  | | | |
 d
 |	 ^ qž } | j | d
 |	 St |  d d | d | d	 | | } g  | D]$ } t j t  | | | |
  ^ ql} | j | d
 |	 Sd S(   s_  Create a dataframe from a set of JSON files

    This utilises ``pandas.read_json()``, and most parameters are
    passed through - see its docstring.

    Differences: orient is 'records' by default, with lines=True; this
    is appropriate for line-delimited "JSON-lines" data, the kind of JSON output
    that is most common in big-data scenarios, and which can be chunked when
    reading (see ``read_json()``). All other options require blocksize=None,
    i.e., one partition per input file.

    Parameters
    ----------
    url_path: str, list of str
        Location to read from. If a string, can include a glob character to
        find a set of file names.
        Supports protocol specifications such as ``"s3://"``.
    encoding, errors:
        The text encoding to implement, e.g., "utf-8" and how to respond
        to errors in the conversion (see ``str.encode()``).
    orient, lines, kwargs
        passed to pandas; if not specified, lines=True when orient='records',
        False otherwise.
    storage_options: dict
        Passed to backend file-system implementation
    blocksize: None or int
        If None, files are not blocked, and you get one partition per input
        file. If int, which can only be used for line-delimited JSON files,
        each partition will be approximately this size in bytes, to the nearest
        newline character.
    sample: int
        Number of bytes to pre-load, to provide an empty dataframe structure
        to any blocks wihout data. Only relevant is using blocksize.
    encoding, errors:
        Text conversion, ``see bytes.decode()``
    compression : string or None
        String like 'gzip' or 'xz'.
    $META

    Returns
    -------
    dask.DataFrame

    Examples
    --------
    Load single file

    >>> dd.read_json('myfile.1.json')  # doctest: +SKIP

    Load multiple files

    >>> dd.read_json('myfile.*.json')  # doctest: +SKIP

    >>> dd.read_json(['myfile.1.json', 'myfile.2.json'])  # doctest: +SKIP

    Load large line-delimited JSON files using partitions of approx
    256MB size

    >> dd.read_json('data/file*.csv', blocksize=2**28)
    i    NR   s;   Line-delimited JSON is only available withorient="records".sS   JSON file chunking only allowed for JSON-linesinput (orient='records', lines=True).s   
t	   blocksizet   sampleR   t   metat   rtR
   R   (   t   dask.dataframet	   dataframeR   R   R   t   listR   t   coret   flattent   read_json_chunkR   R   t   from_delayedR   t   read_json_file(   R   R   R   R   R&   R'   R
   R   R   R(   R   t   ddt   firstt   chunkst   chunkR!   t   filesR"   (    (    s5   lib/python2.7/site-packages/dask/dataframe/io/json.pyt	   read_jsonL   s0    @	4.c         C@  sg   t  j |  j | |   } | j d  t j | d d d t | } | d  k	 r_ | j r_ | S| Sd  S(   Ni    R   R   R   (	   t   iot   StringIOt   decodet   seekt   pdR7   t   TrueR   t   empty(   R5   R
   R   R   R(   t   sR   (    (    s5   lib/python2.7/site-packages/dask/dataframe/io/json.pyR/   Ŗ   s    c         C@  s/   |  # }  t  j |  d | d | | SWd  QXd  S(   NR   R   (   R<   R7   (   R"   R   R   R   (    (    s5   lib/python2.7/site-packages/dask/dataframe/io/json.pyR1   “   s    	i   (   t
   __future__R    R8   t   pandasR<   t
   dask.bytesR   R   R   t   utilsR   R   R   R=   R#   R   R7   R/   R1   (    (    (    s5   lib/python2.7/site-packages/dask/dataframe/io/json.pyt   <module>   s   		;		[
