B
    ¥	ˆ\"  ã               @   sT   d Z ddlZddlZddlmZ ddlmZ ddd„ZdeiZedƒdd
d„ƒZ	dS )zBGlue code to load http://mlcomp.org data as a scikit.learn dataseté    N)Ú
load_files)Ú
deprecatedc             K   s*   |d k	rt j | |¡} t| | d¡f|ŽS )NZdescription)ÚosÚpathÚjoinr   Úget)Údataset_pathÚmetadataÚset_Úkwargs© r   ú6lib/python3.7/site-packages/sklearn/datasets/mlcomp.pyÚ_load_document_classification   s    r   ZDocumentClassificationz—since the http://mlcomp.org/ website will shut down in March 2017, the load_mlcomp function was deprecated in version 0.19 and will be removed in 0.21.Úrawc          
   K   sà  |dkr4yt jd }W n tk
r2   tdƒ‚Y nX t j |¡}t j |¡}t j |¡}t j |¡sptd| ƒ‚t	| t
jƒrt j |t| ƒ¡}n’d}d|  }xnt  |¡D ]`}t j ||d¡}t j |¡sÊq¨t|ƒ0}x(|D ] }	|	 ¡ |krÚt j ||¡}P qÚW W dQ R X q¨W |dkr"td| ƒ‚tƒ }
t j |d¡}t j |¡sPt|d ƒ‚t|ƒ@}x8|D ]0}	d	|	kr`|	 d	d
¡\}}| ¡ |
| ¡ < q`W W dQ R X |
 dd¡}t |¡}|dkrÊtd| ƒ‚|||
fd|i|—ŽS )aÆ  Load a datasets as downloaded from http://mlcomp.org

    Read more in the :ref:`User Guide <datasets>`.

    Parameters
    ----------

    name_or_id : int or str
        The integer id or the string name metadata of the MLComp
        dataset to load

    set\_ : str, default='raw'
        Select the portion to load: 'train', 'test' or 'raw'

    mlcomp_root : str, optional
        The filesystem path to the root folder where MLComp datasets
        are stored, if mlcomp_root is None, the MLCOMP_DATASETS_HOME
        environment variable is looked up instead.

    **kwargs : domain specific kwargs to be passed to the dataset loader.

    Returns
    -------

    data : Bunch
        Dictionary-like object, the interesting attributes are:
        'filenames', the files holding the raw to learn, 'target', the
        classification labels (integer index), 'target_names',
        the meaning of the labels, and 'DESCR', the full description of the
        dataset.

    Note on the lookup process: depending on the type of name_or_id,
    will choose between integer id lookup or metadata name lookup by
    looking at the unzipped archives and metadata file.

    TODO: implement zip dataset loading too
    NZMLCOMP_DATASETS_HOMEz.MLCOMP_DATASETS_HOME env variable is undefinedzCould not find folder: zname: r	   z+Could not find dataset with metadata line: z is not a valid MLComp datasetú:é   ÚformatZunknowz"No loader implemented for format: r
   )r   ÚenvironÚKeyErrorÚ
ValueErrorr   Ú
expanduserÚabspathÚnormpathÚexistsÚ
isinstanceÚnumbersZIntegralr   ÚstrÚlistdirÚopenÚstripÚdictÚsplitr   ÚLOADERS)Z
name_or_idr
   Zmlcomp_rootr   r   Zexpected_name_lineZdatasetZmetadata_fileÚfÚliner	   ÚkeyÚvaluer   Úloaderr   r   r   Úload_mlcomp   sP    *





 

r(   )N)r   N)
Ú__doc__r   r   Zsklearn.datasets.baser   Zsklearn.utilsr   r   r"   r(   r   r   r   r   Ú<module>   s   
