
\c           @   sv   d  Z  d d l Z d d l Z d d l m Z d d l m Z d d  Z i e d 6Z	 e d  d d d	   Z
 d S(
   sB   Glue code to load http://mlcomp.org data as a scikit.learn datasetiN(   t
   load_files(   t
   deprecatedc         K   s=   | d  k	 r$ t j j |  |  }  n  t |  | j d  |  S(   Nt   description(   t   Nonet   ost   patht   joinR    t   get(   t   dataset_patht   metadatat   set_t   kwargs(    (    s6   lib/python2.7/site-packages/sklearn/datasets/mlcomp.pyt   _load_document_classification   s    t   DocumentClassifications   since the http://mlcomp.org/ website will shut down in March 2017, the load_mlcomp function was deprecated in version 0.19 and will be removed in 0.21.t   rawc      	   K   s  | d k r@ y t j d } Wq@ t k
 r< t d   q@ Xn  t j j |  } t j j |  } t j j |  } t j j	 |  s t d |   n  t
 |  t j  r t j j | t |    } n d } d |  } x t j |  D] } t j j | | d  } t j j	 |  s!q n  t |  C } x9 | D]1 }	 |	 j   | k r7t j j | |  } Pq7q7WWd QXq W| d k rt d |   n  t   }
 t j j | d  } t j j	 |  st | d   n  t |  U } xK | D]C }	 d	 |	 k r|	 j d	 d
  \ } } | j   |
 | j   <qqWWd QX|
 j d d  } t j |  } | d k r{t d |   n  | | |
 d | | S(   s  Load a datasets as downloaded from http://mlcomp.org

    Read more in the :ref:`User Guide <datasets>`.

    Parameters
    ----------

    name_or_id : int or str
        The integer id or the string name metadata of the MLComp
        dataset to load

    set\_ : str, default='raw'
        Select the portion to load: 'train', 'test' or 'raw'

    mlcomp_root : str, optional
        The filesystem path to the root folder where MLComp datasets
        are stored, if mlcomp_root is None, the MLCOMP_DATASETS_HOME
        environment variable is looked up instead.

    **kwargs : domain specific kwargs to be passed to the dataset loader.

    Returns
    -------

    data : Bunch
        Dictionary-like object, the interesting attributes are:
        'filenames', the files holding the raw to learn, 'target', the
        classification labels (integer index), 'target_names',
        the meaning of the labels, and 'DESCR', the full description of the
        dataset.

    Note on the lookup process: depending on the type of name_or_id,
    will choose between integer id lookup or metadata name lookup by
    looking at the unzipped archives and metadata file.

    TODO: implement zip dataset loading too
    t   MLCOMP_DATASETS_HOMEs.   MLCOMP_DATASETS_HOME env variable is undefineds   Could not find folder: s   name: R	   Ns+   Could not find dataset with metadata line: s    is not a valid MLComp datasett   :i   t   formatt   unknows"   No loader implemented for format: R
   (   R   R   t   environt   KeyErrort
   ValueErrorR   t
   expandusert   abspatht   normpatht   existst
   isinstancet   numberst   IntegralR   t   strt   listdirt   opent   stript   dictt   splitR   t   LOADERS(   t
   name_or_idR
   t   mlcomp_rootR   R   t   expected_name_linet   datasett   metadata_filet   ft   lineR	   t   keyt   valueR   t   loader(    (    s6   lib/python2.7/site-packages/sklearn/datasets/mlcomp.pyt   load_mlcomp   sP    *
	#(   t   __doc__R   R   t   sklearn.datasets.baseR    t   sklearn.utilsR   R   R   R#   R.   (    (    (    s6   lib/python2.7/site-packages/sklearn/datasets/mlcomp.pyt   <module>   s   
	