B
    T\                 @   sf   d dl mZ d dlZd dlZd dlmZmZ d dlZddd	Z	d
d Z
dddZdddZdd ZdS )    )absolute_importN)
open_files
read_bytesrecordsTutf-8strictc	                s   |dkr|dk}|dkr$|r$t d| d< |o6|dk d< t|df|| dd| j|d|pbi }
 fd	d
t|
|  D }|rt| dd
 |
D S |S dS )a  Write dataframe into JSON text files

    This utilises ``pandas.DataFrame.to_json()``, and most parameters are
    passed through - see its docstring.

    Differences: orient is 'records' by default, with lines=True; this
    produces the kind of JSON output that is most common in big-data
    applications, and which can be chunked when reading (see ``read_json()``).

    Parameters
    ----------
    df: dask.DataFrame
        Data to save
    url_path: str, list of str
        Location to write to. If a string, and there are more than one
        partitions in df, should include a glob character to expand into a
        set of file names, or provide a ``name_function=`` parameter.
        Supports protocol specifications such as ``"s3://"``.
    encoding, errors:
        The text encoding to implement, e.g., "utf-8" and how to respond
        to errors in the conversion (see ``str.encode()``).
    orient, lines, kwargs
        passed to pandas; if not specified, lines=True when orient='records',
        False otherwise.
    storage_options: dict
        Passed to backend file-system implementation
    compute: bool
        If true, immediately executes. If False, returns a set of delayed
        objects, which can be computed at a later time.
    encoding, errors:
        Text conversion, ``see str.encode()``
    compression : string or None
        String like 'gzip' or 'xz'.
    Nr   z;Line-delimited JSON is only available withorient="records".orientlinesZwtname_function)encodingerrorsr
   Znumcompressionc                s"   g | ]\}}t t|| qS  )daskdelayedwrite_json_partition).0Zoutfiled)kwargsr   5lib/python3.7/site-packages/dask/dataframe/io/json.py
<listcomp>=   s   zto_json.<locals>.<listcomp>c             S   s   g | ]
}|j qS r   )path)r   fr   r   r   r   A   s    )
ValueErrorr   popZnpartitionszipZ
to_delayedr   compute)dfurl_pathr   r	   storage_optionsr   r   r   r   r   Zoutfilespartsr   )r   r   to_json	   s&    %


r!   c          	   C   s"   |}| j |f| W d Q R X d S )N)r!   )r   Zopenfiler   r   r   r   r   r   F   s    r      inferc	                s   ddl m}
 dkrdkdkr0r0td|rHdks@sHtd|pNi }|rt| df|||d|\}ttj|}t  fdd	|D }n0t	| d
f |d|}fdd	|D }|

|S )aV  Create a dataframe from a set of JSON files

    This utilises ``pandas.read_json()``, and most parameters are
    passed through - see its docstring.

    Differences: orient is 'records' by default, with lines=True; this
    is appropriate for line-delimited "JSON-lines" data, the kind of JSON output
    that is most common in big-data scenarios, and which can be chunked when
    reading (see ``read_json()``). All other options require blocksize=None,
    i.e., one partition per input file.


    Parameters
    ----------
    url_path: str, list of str
        Location to read from. If a string, can include a glob character to
        find a set of file names.
        Supports protocol specifications such as ``"s3://"``.
    encoding, errors:
        The text encoding to implement, e.g., "utf-8" and how to respond
        to errors in the conversion (see ``str.encode()``).
    orient, lines, kwargs
        passed to pandas; if not specified, lines=True when orient='records',
        False otherwise.
    storage_options: dict
        Passed to backend file-system implementation
    blocksize: None or int
        If None, files are not blocked, and you get one partition per input
        file. If int, which can only be used for line-delimited JSON files,
        each partition will be approximately this size in bytes, to the nearest
        newline character.
    sample: int
        Number of bytes to pre-load, to provide an empty dataframe structure
        to any blocks wihout data. Only relevant is using blocksize.
    encoding, errors:
        Text conversion, ``see bytes.decode()``
    compression : string or None
        String like 'gzip' or 'xz'.

    Returns
    -------
    dask.DataFrame

    Examples
    --------
    Load single file

    >>> dd.read_json('myfile.1.json')  # doctest: +SKIP

    Load multiple files

    >>> dd.read_json('myfile.*.json')  # doctest: +SKIP

    >>> dd.read_json(['myfile.1.json', 'myfile.2.json'])  # doctest: +SKIP

    Load large line-delimited JSON files using partitions of approx
    256MB size

    >> dd.read_json('data/file*.csv', blocksize=2**28)
    r   Nr   z;Line-delimited JSON is only available withorient="records".zSJSON file chunking only allowed for JSON-linesinput (orient='records', lines=True).   
)	blocksizesampler   c          
      s,   g | ]$}t t| d d dqS )Nr   )meta)r   r   read_json_chunk)r   chunk)r   r   firstr   r   r   r      s   zread_json.<locals>.<listcomp>Zrt)r   r   r   c                s    g | ]}t t| qS r   )r   r   read_json_file)r   r   )r   r	   r   r   r   r      s   )Zdask.dataframeZ	dataframer   r   listr   ZcoreZflattenr(   r   Zfrom_delayed)r   r   r	   r   r%   r&   r   r   r   r   ZddZchunksr    filesr   )r   r   r*   r   r	   r   r   	read_jsonK   s*    ?

r.   c             C   sN   t | ||}|d tj|fddd|}|d k	rF|jrF|S |S d S )Nr   r   T)r   r	   )ioStringIOdecodeseekpdr.   empty)r)   r   r   r   r'   sr   r   r   r   r(      s    
r(   c          	   C   s(   | } t j| f||d|S Q R X d S )N)r   r	   )r3   r.   )r   r   r	   r   r   r   r   r+      s    r+   )r   NNTr   r   N)r   NNNr"   r   r   r#   )N)Z
__future__r   r/   Zpandasr3   Z
dask.bytesr   r   r   r!   r   r.   r(   r+   r   r   r   r   <module>   s     
;  
Y

