ó
‡ˆ\c        	   @   s5  d  Z  d d l m Z m Z m Z d d l m Z m Z m Z m	 Z	 d d l
 Z
 d d l m Z d d l Z d d l m Z m Z m Z d d	 l m Z d d
 l m Z d d l m Z d d l m Z d d l m Z e
 j e ƒ Z e d d d d d d ƒ Z e d d d d d d ƒ Z e d d d d d d ƒ e d d d d d d ƒ e d d d d d d ƒ f Z  e d  ƒ d! „  ƒ Z! d e# e# d" „ Z$ d# „  Z% d e& d d$ d% „ Z' d e# d& d$ e& e( d' d( ƒ e( d) d* ƒ f e# e& d+ „ Z) d e& d d, „ Z* d- d e# d& e& e( d' d( ƒ e( d) d* ƒ f e# d. „ Z+ d S(/   sÞ   Labeled Faces in the Wild (LFW) dataset

This dataset is a collection of JPEG pictures of famous people collected
over the internet, all details are available on the official website:

    http://vis-www.cs.umass.edu/lfw/
iÿÿÿÿ(   t   listdirt   makedirst   remove(   t   dirnamet   joint   existst   isdirN(   t   LooseVersioni   (   t   get_data_homet   _fetch_remotet   RemoteFileMetadatai   (   t
   deprecated(   t   Bunch(   t   Memory(   t   b(   t   _joblibt   filenames   lfw.tgzt   urls.   https://ndownloader.figshare.com/files/5976018t   checksumt@   055f7d9c632d7370e6fb4afc7468d40f970c34a80d4c6f50ffec63f5a8d536c0s   lfw-funneled.tgzs.   https://ndownloader.figshare.com/files/5976015t@   b47c8422c8cded889dc5a13418c4bc2abbda121092b3533a83306f90d900100as   pairsDevTrain.txts.   https://ndownloader.figshare.com/files/5976012t@   1d454dada7dfeca0e7eab6f65dc4e97a6312d44cf142207be28d688be92aabfas   pairsDevTest.txts.   https://ndownloader.figshare.com/files/5976009t@   7cb06600ea8b2814ac26e946201cdb304296262aad67d046a16a7ec85d0ff87cs	   pairs.txts.   https://ndownloader.figshare.com/files/5976006t@   ea42330c62c92989f9d7c03237ed5d591365e89b3e649747777b70e692dc1592sI   This function was deprecated in version 0.20 and will be removed in 0.22.c         C   s$   |  |  j  ƒ  } | | j ƒ  :} | S(   s=  Scale back to 0-1 range in case of normalization for plotting.

    .. deprecated:: 0.20
    This function was deprecated in version 0.20 and will be removed in 0.22.


    Parameters
    ----------
    face : array_like
        The array to scale

    Returns
    -------
    array_like
        The scaled array
    (   t   mint   max(   t   facet   scaled(    (    s3   lib/python2.7/site-packages/sklearn/datasets/lfw.pyt
   scale_faceE   s    c   
      C   s  t  d |  ƒ }  t |  d ƒ } t | ƒ s7 t | ƒ n  xk t D]c } t | | j ƒ } t | ƒ s> | rŽ t j d | j ƒ t	 | d | ƒq¡ t
 d | ƒ ‚ q> q> W| rÃ t | d ƒ } t } n t | d ƒ } t } t | ƒ s†t | | j ƒ } t | ƒ sA| r.t j d | j ƒ t	 | d | ƒqAt
 d | ƒ ‚ n  d	 d
 l }	 t j d | ƒ |	 j | d ƒ j d | ƒ t | ƒ n  | | f S(   s0   Helper function to download any missing LFW datat	   data_homet   lfw_homes   Downloading LFW metadata: %sR   s   %s is missingt   lfw_funneledt   lfws!   Downloading LFW data (~200MB): %siÿÿÿÿNs$   Decompressing the data archive to %ss   r:gzt   path(   R   R   R   R   t   TARGETSR   t   loggert   infoR   R	   t   IOErrort   FUNNELED_ARCHIVEt   ARCHIVEt   tarfilet   debugt   opent
   extractallR   (
   R   t   funneledt   download_if_missingR   t   targett   target_filepatht   data_folder_patht   archivet   archive_pathR(   (    (    s3   lib/python2.7/site-packages/sklearn/datasets/lfw.pyt   _check_fetch_lfwb   s:    		
c         C   s+  d d l  m } m } t d d ƒ t d d ƒ f } | d k rI | } n t d „  t | | ƒ Dƒ ƒ } | \ } } | j | j | j	 p d }	 | j | j | j	 pª d }
 | d k	 ré t
 | ƒ } t | |	 ƒ }	 t | |
 ƒ }
 n  t |  ƒ } | st j | |	 |
 f d t j ƒ} n$ t j | |	 |
 d f d t j ƒ} xá t |  ƒ D]Ó \ } } | d	 d k r†t j d
 | d | ƒ n  | | ƒ } | j d k r´t d | ƒ ‚ n  t j | | d t j ƒ} | d :} | d k	 rø| | | ƒ } n  | s| j d d ƒ } n  | | | d f <qPW| S(   s   Internally used to load imagesi   (   t   imreadt   imresizei    iú   c         s   s!   |  ] \ } } | p | Vq d  S(   N(    (   t   .0t   st   ds(    (    s3   lib/python2.7/site-packages/sklearn/datasets/lfw.pys	   <genexpr>˜   s    i   t   dtypei   iè  s   Loading face #%05d / %05dsL   Failed to read the image file %s, Please make sure that libjpeg is installedg     ào@t   axis.N(   t   externals._pilutilR4   R5   t   slicet   Nonet   tuplet   zipt   stopt   startt   stept   floatt   intt   lent   npt   zerost   float32t	   enumerateR#   R)   t   ndimt   RuntimeErrort   asarrayt   mean(   t
   file_pathst   slice_t   colort   resizeR4   R5   t   default_slicet   h_slicet   w_slicet   ht   wt   n_facest   facest   it	   file_patht   imgR   (    (    s3   lib/python2.7/site-packages/sklearn/datasets/lfw.pyt
   _load_imgs   s>    	$$
i    c         C   sr  g  g  } } x· t  t |  ƒ ƒ D]£ } t |  | ƒ } t | ƒ sG q  n  g  t  t | ƒ ƒ D] }	 t | |	 ƒ ^ qZ }
 t |
 ƒ } | | k r  | j d d ƒ } | j | g | ƒ | j |
 ƒ q  q  Wt | ƒ } | d k rò t d | ƒ ‚ n  t j	 | ƒ } t j
 | | ƒ } t | | | | ƒ } t j | ƒ } t j j d ƒ j | ƒ | | | | } } | | | f S(   s~   Perform the actual data loading for the lfw people dataset

    This operation is meant to be cached by a joblib wrapper.
    t   _t    i    s*   min_faces_per_person=%d is too restrictivei*   (   t   sortedR    R   R   RE   t   replacet   extendt
   ValueErrorRF   t   uniquet   searchsortedR\   t   aranget   randomt   RandomStatet   shuffle(   R0   RO   RP   RQ   t   min_faces_per_persont   person_namesRN   t   person_namet   folder_patht   ft   pathst
   n_picturesRW   t   target_namesR.   RX   t   indices(    (    s3   lib/python2.7/site-packages/sklearn/datasets/lfw.pyt   _fetch_lfw_peopleÊ   s,    .g      à?iF   iÃ   iN   i¬   c         C   sG  t  d |  d | d | ƒ \ } }	 t j d | ƒ t t j ƒ t d ƒ k  rj t d | d d d	 d
 ƒ }
 n t d | d d d	 d
 ƒ }
 |
 j t ƒ } | |	 d | d | d | d | ƒ\ } } } | j	 t
 | ƒ d ƒ } t t ƒ } t t | d d ƒ ƒ  } | j ƒ  } Wd QX| r"| | f St d | d | d | d | d | ƒ S(   sr  Load the Labeled Faces in the Wild (LFW) people dataset (classification).

    Download it if necessary.

    =================   =======================
    Classes                                5749
    Samples total                         13233
    Dimensionality                         5828
    Features            real, between 0 and 255
    =================   =======================

    Read more in the :ref:`User Guide <labeled_faces_in_the_wild_dataset>`.

    Parameters
    ----------
    data_home : optional, default: None
        Specify another download and cache folder for the datasets. By default
        all scikit-learn data is stored in '~/scikit_learn_data' subfolders.

    funneled : boolean, optional, default: True
        Download and use the funneled variant of the dataset.

    resize : float, optional, default 0.5
        Ratio used to resize the each face picture.

    min_faces_per_person : int, optional, default None
        The extracted dataset will only retain pictures of people that have at
        least `min_faces_per_person` different pictures.

    color : boolean, optional, default False
        Keep the 3 RGB channels instead of averaging them to a single
        gray level channel. If color is True the shape of the data has
        one more dimension than the shape with color = False.

    slice_ : optional
        Provide a custom 2D slice (height, width) to extract the
        'interesting' part of the jpeg files and avoid use statistical
        correlation from the background

    download_if_missing : optional, True by default
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    return_X_y : boolean, default=False.
        If True, returns ``(dataset.data, dataset.target)`` instead of a Bunch
        object. See below for more information about the `dataset.data` and
        `dataset.target` object.

        .. versionadded:: 0.20

    Returns
    -------
    dataset : dict-like object with the following attributes:

    dataset.data : numpy array of shape (13233, 2914)
        Each row corresponds to a ravelled face image of original size 62 x 47
        pixels. Changing the ``slice_`` or resize parameters will change the
        shape of the output.

    dataset.images : numpy array of shape (13233, 62, 47)
        Each row is a face image corresponding to one of the 5749 people in
        the dataset. Changing the ``slice_`` or resize parameters will change
        the shape of the output.

    dataset.target : numpy array of shape (13233,)
        Labels associated to each face image. Those labels range from 0-5748
        and correspond to the person IDs.

    dataset.DESCR : string
        Description of the Labeled Faces in the Wild (LFW) dataset.

    (data, target) : tuple if ``return_X_y`` is True

        .. versionadded:: 0.20

    R   R,   R-   s    Loading LFW people faces from %ss   0.12t   cachedirt   compressi   t   verbosei    t   locationRQ   Ri   RP   RO   iÿÿÿÿt   descrs   lfw.rstNt   datat   imagesR.   Rp   t   DESCR(   R3   R#   R)   R   R   t   __version__R   t   cacheRr   t   reshapeRE   R   t   __file__R*   R   t   readR   (   R   R,   RQ   Ri   RP   RO   R-   t
   return_X_yR   R0   t   mt	   load_funcRX   R.   Rp   t   Xt   module_patht   rst_filet   fdescr(    (    s3   lib/python2.7/site-packages/sklearn/datasets/lfw.pyt   fetch_lfw_peopleó   s(    Q
c      	   C   s¡  t  |  d ƒ 5 } g  | D]! } | j ƒ  j t d ƒ ƒ ^ q } Wd QXg  | D] } t | ƒ d k rM | ^ qM }	 t |	 ƒ }
 t j |
 d t j ƒ} t ƒ  } x„t	 |	 ƒ D]v\ } } t | ƒ d k rd | | <| d t | d ƒ d f | d t | d ƒ d f f } nu t | ƒ d	 k rmd | | <| d t | d ƒ d f | d t | d ƒ d f f } n t
 d
 | d | f ƒ ‚ x— t	 | ƒ D]‰ \ } \ } } y t | | ƒ } Wn) t k
 rät | t | d ƒ ƒ } n Xt t t | ƒ ƒ ƒ } t | | | ƒ } | j | ƒ q”Wq« Wt | | | | ƒ } t | j ƒ } | j d ƒ } | j d d ƒ | j d | d ƒ | | _ | | t j d d g ƒ f S(   s}   Perform the actual data loading for the LFW pairs dataset

    This operation is meant to be cached by a joblib wrapper.
    t   rbs   	Ni   R9   i   i   i    i   s   invalid line %d: %rs   UTF-8s   Different personss   Same person(   R*   t   stript   splitR   RE   RF   RG   RD   t   listRI   Rb   R   t	   TypeErrort   strR_   R    t   appendR\   t   shapet   popt   insertt   array(   t   index_file_pathR0   RO   RP   RQ   t
   index_filet   lnt   split_linest   slt
   pair_specst   n_pairsR.   RN   RY   t
   componentst   pairt   jt   namet   idxt   person_foldert	   filenamesRZ   t   pairsR   RW   (    (    s3   lib/python2.7/site-packages/sklearn/datasets/lfw.pyt   _fetch_lfw_pairsk  s>    4+	
$
$	t   trainc         C   s–  t  d | d | d | ƒ \ } } t j d |  | ƒ t t j ƒ t d ƒ k  rm t d | d d d	 d
 ƒ }	 n t d | d d d	 d
 ƒ }	 |	 j t ƒ }
 i d d 6d d 6d d 6} |  | k ré t	 d |  t
 t | j ƒ  ƒ ƒ f ƒ ‚ n  t | | |  ƒ } |
 | | d | d | d | ƒ\ } } } t t ƒ } t t | d d ƒ ƒ  } | j ƒ  } Wd QXt d | j t | ƒ d ƒ d | d | d | d | ƒ S(   sˆ  Load the Labeled Faces in the Wild (LFW) pairs dataset (classification).

    Download it if necessary.

    =================   =======================
    Classes                                5749
    Samples total                         13233
    Dimensionality                         5828
    Features            real, between 0 and 255
    =================   =======================

    In the official `README.txt`_ this task is described as the
    "Restricted" task.  As I am not sure as to implement the
    "Unrestricted" variant correctly, I left it as unsupported for now.

      .. _`README.txt`: http://vis-www.cs.umass.edu/lfw/README.txt

    The original images are 250 x 250 pixels, but the default slice and resize
    arguments reduce them to 62 x 47.

    Read more in the :ref:`User Guide <labeled_faces_in_the_wild_dataset>`.

    Parameters
    ----------
    subset : optional, default: 'train'
        Select the dataset to load: 'train' for the development training
        set, 'test' for the development test set, and '10_folds' for the
        official evaluation set that is meant to be used with a 10-folds
        cross validation.

    data_home : optional, default: None
        Specify another download and cache folder for the datasets. By
        default all scikit-learn data is stored in '~/scikit_learn_data'
        subfolders.

    funneled : boolean, optional, default: True
        Download and use the funneled variant of the dataset.

    resize : float, optional, default 0.5
        Ratio used to resize the each face picture.

    color : boolean, optional, default False
        Keep the 3 RGB channels instead of averaging them to a single
        gray level channel. If color is True the shape of the data has
        one more dimension than the shape with color = False.

    slice_ : optional
        Provide a custom 2D slice (height, width) to extract the
        'interesting' part of the jpeg files and avoid use statistical
        correlation from the background

    download_if_missing : optional, True by default
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    -------
    The data is returned as a Bunch object with the following attributes:

    data : numpy array of shape (2200, 5828). Shape depends on ``subset``.
        Each row corresponds to 2 ravel'd face images of original size 62 x 47
        pixels. Changing the ``slice_``, ``resize`` or ``subset`` parameters
        will change the shape of the output.

    pairs : numpy array of shape (2200, 2, 62, 47). Shape depends on ``subset``
        Each row has 2 face images corresponding to same or different person
        from the dataset containing 5749 people. Changing the ``slice_``,
        ``resize`` or ``subset`` parameters will change the shape of the
        output.

    target : numpy array of shape (2200,). Shape depends on ``subset``.
        Labels associated to each pair of images. The two label values being
        different persons or the same person.

    DESCR : string
        Description of the Labeled Faces in the Wild (LFW) dataset.

    R   R,   R-   s   Loading %s LFW pairs from %ss   0.12Rs   Rt   i   Ru   i    Rv   s   pairsDevTrain.txtR£   s   pairsDevTest.txtt   tests	   pairs.txtt   10_foldss+   subset='%s' is invalid: should be one of %rRQ   RP   RO   Rw   s   lfw.rstNRx   iÿÿÿÿR¡   R.   Rp   Rz   (   R3   R#   R)   R   R   R{   R   R|   R¢   Rb   R‹   R_   t   keysR   R   R~   R*   R   R   R}   RE   (   t   subsetR   R,   RQ   RP   RO   R-   R   R0   R   R‚   t   label_filenamesR“   R¡   R.   Rp   R„   R…   R†   (    (    s3   lib/python2.7/site-packages/sklearn/datasets/lfw.pyt   fetch_lfw_pairsž  s2    Q
%$(,   t   __doc__t   osR    R   R   t   os.pathR   R   R   R   t   loggingt   distutils.versionR   t   numpyRF   t   baseR   R	   R
   t   utilsR   R   t   utils._joblibR   t   externals.sixR   R   t	   getLoggert   __name__R#   R'   R&   R"   R   R=   t   TrueR3   R\   t   FalseRr   R<   R‡   R¢   R©   (    (    (    s3   lib/python2.7/site-packages/sklearn/datasets/lfw.pyt   <module>   s\   "		+	=	(	u2