B
    x\%                 @   s   d Z ddlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
mZ ddlmZmZ dd	 ZG d
d deZG dd deZG dd deZdddZdddZdS )z parquet compat     )LooseVersion)catch_warnings)string_types)AbstractMethodError)	DataFrame
get_option)get_filepath_or_buffer	is_s3_urlc             C   s   | dkrt d} | dkrXyt S  tk
r2   Y nX yt S  tk
rN   Y nX td| dkrhtd| dkrvt S | dkrt S dS )	z return our implementation autozio.parquet.enginez}Unable to find a usable engine; tried using: 'pyarrow', 'fastparquet'.
pyarrow or fastparquet is required for parquet support)pyarrowfastparquetz.engine must be one of 'pyarrow', 'fastparquet'r   r   N)r   PyArrowImplImportErrorFastParquetImpl
ValueError)engine r   0lib/python3.7/site-packages/pandas/io/parquet.py
get_engine   s$    r   c               @   s.   e Zd ZdZedd Zdd ZdddZdS )	BaseImplNc             C   sL   t | tstd| jjdkr&tdtdd | jjD }|sHtdd S )Nz+to_parquet only supports IO with DataFrames>   unicodestringz%parquet must have string column namesc             s   s    | ]}|d k	rt |tV  qd S )N)
isinstancer   ).0namer   r   r   	<genexpr>>   s   z.BaseImpl.validate_dataframe.<locals>.<genexpr>z!Index level names must be strings)r   r   r   columnsZinferred_typeallindexnames)dfZvalid_namesr   r   r   validate_dataframe2   s    
zBaseImpl.validate_dataframec             K   s   t | d S )N)r   )selfr    pathcompressionkwargsr   r   r   writeE   s    zBaseImpl.writec             K   s   t | d S )N)r   )r"   r#   r   r%   r   r   r   readH   s    zBaseImpl.read)N)__name__
__module____qualname__apistaticmethodr!   r&   r'   r   r   r   r   r   .   s   r   c               @   s(   e Zd Zdd Zd
ddZddd	ZdS )r   c             C   sR   ydd l }dd l}W n tk
r0   tdY nX t|jdk rHtd|| _d S )Nr   zpyarrow is required for parquet support

you can install via conda
conda install pyarrow -c conda-forge

or via pip
pip install -U pyarrow
z0.9.0zpyarrow >= 0.9.0 is required for parquet support

you can install via conda
conda install pyarrow -c conda-forge

or via pip
pip install -U pyarrow
)r   Zpyarrow.parquetr   r   __version__r+   )r"   r   r   r   r   __init__N   s    zPyArrowImpl.__init__snappymsNc             K   s   |  | t|dd\}}}}|d kr,i }	nd|i}	| jjj|f|	}
|d k	rp| jjj|
|f|||d| n| jjj|
|f||d| d S )Nwb)modeZpreserve_index)r$   coerce_timestampspartition_cols)r$   r3   )r!   r   r+   ZTableZfrom_pandasparquetZwrite_to_datasetZwrite_table)r"   r    r#   r$   r3   r   r4   r%   _Zfrom_pandas_kwargstabler   r   r   r&   g   s    
zPyArrowImpl.writec             K   sX   t |\}}}}d|d< | jjj|fd|i| }|rTy|  W n   Y nX |S )NTZuse_pandas_metadatar   )r   r+   r5   Z
read_table	to_pandasclose)r"   r#   r   r%   r6   should_closeresultr   r   r   r'   |   s    zPyArrowImpl.read)r/   r0   NN)N)r(   r)   r*   r.   r&   r'   r   r   r   r   r   L   s    
r   c               @   s(   e Zd Zdd Zd	ddZd
ddZdS )r   c             C   sJ   ydd l }W n tk
r(   tdY nX t|jdk r@td|| _d S )Nr   zfastparquet is required for parquet support

you can install via conda
conda install fastparquet -c conda-forge

or via pip
pip install -U fastparquetz0.2.1zfastparquet >= 0.2.1 is required for parquet support

you can install via conda
conda install fastparquet -c conda-forge

or via pip
pip install -U fastparquet)r   r   r   r-   r+   )r"   r   r   r   r   r.      s    zFastParquetImpl.__init__r/   Nc          	   K   s   |  | d|kr$|d k	r$tdnd|kr6|d}|d k	rFd|d< t|rpt|dd\}}}}dd |d	< nt|\}}}}td
d$ | jj||f|||d| W d Q R X d S )Npartition_onzYCannot use both partition_on and partition_cols. Use partition_cols for partitioning dataZhiveZfile_schemer1   )r2   c             S   s   | S )Nr   )r#   r6   r   r   r   <lambda>   s    z'FastParquetImpl.write.<locals>.<lambda>	open_withT)record)r$   Zwrite_indexr<   )r!   r   popr	   r   r   r+   r&   )r"   r    r#   r$   r   r4   r%   r6   r   r   r   r&      s    


zFastParquetImpl.writec             K   sn   t |r>t|\}}}}z| jj||jjd}W d |  X nt|\}}}}| j|}|jf d|i|S )N)r>   r   )r	   r   r+   ZParquetFiles3openr9   r8   )r"   r#   r   r%   rA   r6   r:   Zparquet_filer   r   r   r'      s    zFastParquetImpl.read)r/   NN)N)r(   r)   r*   r.   r&   r'   r   r   r   r   r      s    
r   r
   r/   Nc             K   s$   t |}|j| |f|||d|S )a  
    Write a DataFrame to the parquet format.

    Parameters
    ----------
    path : str
        File path or Root Directory path. Will be used as Root Directory path
        while writing a partitioned dataset.

        .. versionchanged:: 0.24.0

    engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto'
        Parquet library to use. If 'auto', then the option
        ``io.parquet.engine`` is used. The default ``io.parquet.engine``
        behavior is to try 'pyarrow', falling back to 'fastparquet' if
        'pyarrow' is unavailable.
    compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy'
        Name of the compression to use. Use ``None`` for no compression.
    index : bool, default None
        If ``True``, include the dataframe's index(es) in the file output. If
        ``False``, they will not be written to the file. If ``None``, the
        engine's default behavior will be used.

        .. versionadded 0.24.0

    partition_cols : list, optional, default None
        Column names by which to partition the dataset
        Columns are partitioned in the order they are given

        .. versionadded:: 0.24.0

    kwargs
        Additional keyword arguments passed to the engine
    )r$   r   r4   )r   r&   )r    r#   r   r$   r   r4   r%   implr   r   r   
to_parquet   s    $rD   c             K   s   t |}|j| fd|i|S )a  
    Load a parquet object from the file path, returning a DataFrame.

    .. versionadded 0.21.0

    Parameters
    ----------
    path : string
        File path
    columns : list, default=None
        If not None, only these columns will be read from the file.

        .. versionadded 0.21.1
    engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto'
        Parquet library to use. If 'auto', then the option
        ``io.parquet.engine`` is used. The default ``io.parquet.engine``
        behavior is to try 'pyarrow', falling back to 'fastparquet' if
        'pyarrow' is unavailable.
    kwargs are passed to the engine

    Returns
    -------
    DataFrame
    r   )r   r'   )r#   r   r   r%   rC   r   r   r   read_parquet   s    rE   )r
   r/   NN)r
   N)__doc__Zdistutils.versionr   warningsr   Zpandas.compatr   Zpandas.errorsr   Zpandasr   r   Zpandas.io.commonr   r	   r   objectr   r   r   rD   rE   r   r   r   r   <module>   s    ?K 
(