B
    	\L              	   @   sv  d Z ddlmZmZmZ ddlmZmZmZm	Z	 ddl
Z
ddlmZ ddlZddlmZmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ e
eZeddddZeddddZeddddeddddeddddfZ eddd  Z!d5d"d#Z"d$d% Z#d6d'd(Z$dd!d)dd&e%d*d+e%d,d-fd!d&fd.d/Z&d7d0d1Z'd2dd!d)d&e%d*d+e%d,d-fd!fd3d4Z(dS )8zLabeled Faces in the Wild (LFW) dataset

This dataset is a collection of JPEG pictures of famous people collected
over the internet, all details are available on the official website:

    http://vis-www.cs.umass.edu/lfw/
    )listdirmakedirsremove)dirnamejoinexistsisdirN)LooseVersion   )get_data_home_fetch_remoteRemoteFileMetadata   )
deprecated)Bunch)Memory)b)_joblibzlfw.tgzz.https://ndownloader.figshare.com/files/5976018Z@055f7d9c632d7370e6fb4afc7468d40f970c34a80d4c6f50ffec63f5a8d536c0)filenameurlZchecksumzlfw-funneled.tgzz.https://ndownloader.figshare.com/files/5976015Z@b47c8422c8cded889dc5a13418c4bc2abbda121092b3533a83306f90d900100azpairsDevTrain.txtz.https://ndownloader.figshare.com/files/5976012Z@1d454dada7dfeca0e7eab6f65dc4e97a6312d44cf142207be28d688be92aabfazpairsDevTest.txtz.https://ndownloader.figshare.com/files/5976009Z@7cb06600ea8b2814ac26e946201cdb304296262aad67d046a16a7ec85d0ff87cz	pairs.txtz.https://ndownloader.figshare.com/files/5976006Z@ea42330c62c92989f9d7c03237ed5d591365e89b3e649747777b70e692dc1592zIThis function was deprecated in version 0.20 and will be removed in 0.22.c             C   s   | |    }||  }|S )a=  Scale back to 0-1 range in case of normalization for plotting.

    .. deprecated:: 0.20
    This function was deprecated in version 0.20 and will be removed in 0.22.


    Parameters
    ----------
    face : array_like
        The array to scale

    Returns
    -------
    array_like
        The scaled array
    )minmax)faceZscaled r   3lib/python3.7/site-packages/sklearn/datasets/lfw.py
scale_faceE   s    r   Tc       
      C   s  t | d} t| d}t|s$t| xLtD ]D}t||j}t|s*|rbtd|j t	||d q*t
d| q*W |rt|d}t}nt|d}t}t|st||j}t|s|rtd|j t	||d nt
d| d	d
l}	td| |	|dj|d t| ||fS )z0Helper function to download any missing LFW data)	data_homelfw_homezDownloading LFW metadata: %s)r   z%s is missingZlfw_funneledZlfwz!Downloading LFW data (~200MB): %sr   Nz$Decompressing the data archive to %szr:gz)path)r   r   r   r   TARGETSr   loggerinfor   r   IOErrorFUNNELED_ARCHIVEARCHIVEtarfiledebugopenZ
extractallr   )
r   funneleddownload_if_missingr   targetZtarget_filepathdata_folder_patharchiveZarchive_pathr%   r   r   r   _check_fetch_lfwb   s:    





r-   c             C   s  ddl m}m} tddtddf}|dkr2|}ntdd t||D }|\}}|j|j |jpdd }	|j|j |jpzd }
|dk	rt	|}t
||	 }	t
||
 }
t| }|stj||	|
ftjd	}ntj||	|
d
ftjd	}xt| D ]\}}|d dkrtd|d | ||}|jdkr4td| tj|| tjd	}|d }|dk	rd|||}|sv|jdd}|||df< qW |S )zInternally used to load imagesr   )imreadimresizer      Nc             s   s   | ]\}}|p|V  qd S )Nr   ).0sZdsr   r   r   	<genexpr>   s    z_load_imgs.<locals>.<genexpr>r
   )dtype   i  zLoading face #%05d / %05dzLFailed to read the image file %s, Please make sure that libjpeg is installedg     o@)Zaxis.)Zexternals._pilutilr.   r/   slicetuplezipstopstartstepfloatintlennpzerosZfloat32	enumerater    r&   ndimRuntimeErrorZasarrayZmean)
file_pathsslice_colorresizer.   r/   Zdefault_sliceZh_sliceZw_slicehwn_facesfacesi	file_pathZimgr   r   r   r   
_load_imgs   s>    

rN   Fc                s   g g  }}xxt t| D ]h}t| | t s0q fddt t D }t|}	|	|kr|dd}||g|	  || qW t|}
|
dkrtd| t	|}t
||}t||||}t|
}tjd| || ||  }}|||fS )z~Perform the actual data loading for the lfw people dataset

    This operation is meant to be cached by a joblib wrapper.
    c                s   g | ]}t  |qS r   )r   )r1   f)folder_pathr   r   
<listcomp>   s    z%_fetch_lfw_people.<locals>.<listcomp>_ r   z*min_faces_per_person=%d is too restrictive*   )sortedr   r   r   r>   replaceextend
ValueErrorr?   uniqueZsearchsortedrN   ZarangeZrandomZRandomStateZshuffle)r+   rE   rF   rG   min_faces_per_personZperson_namesrD   Zperson_namepathsZ
n_picturesrJ   target_namesr*   rK   indicesr   )rP   r   _fetch_lfw_people   s,    



r^   g      ?F      N      c          	   C   s   t | ||d\}}	td| ttjtdk r@t|ddd}
nt|ddd}
|
t}||	||||d\}}}|	t
|d	}tt}tt|d
d}| }W dQ R X |r||fS t|||||dS )ar  Load the Labeled Faces in the Wild (LFW) people dataset (classification).

    Download it if necessary.

    =================   =======================
    Classes                                5749
    Samples total                         13233
    Dimensionality                         5828
    Features            real, between 0 and 255
    =================   =======================

    Read more in the :ref:`User Guide <labeled_faces_in_the_wild_dataset>`.

    Parameters
    ----------
    data_home : optional, default: None
        Specify another download and cache folder for the datasets. By default
        all scikit-learn data is stored in '~/scikit_learn_data' subfolders.

    funneled : boolean, optional, default: True
        Download and use the funneled variant of the dataset.

    resize : float, optional, default 0.5
        Ratio used to resize the each face picture.

    min_faces_per_person : int, optional, default None
        The extracted dataset will only retain pictures of people that have at
        least `min_faces_per_person` different pictures.

    color : boolean, optional, default False
        Keep the 3 RGB channels instead of averaging them to a single
        gray level channel. If color is True the shape of the data has
        one more dimension than the shape with color = False.

    slice_ : optional
        Provide a custom 2D slice (height, width) to extract the
        'interesting' part of the jpeg files and avoid use statistical
        correlation from the background

    download_if_missing : optional, True by default
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    return_X_y : boolean, default=False.
        If True, returns ``(dataset.data, dataset.target)`` instead of a Bunch
        object. See below for more information about the `dataset.data` and
        `dataset.target` object.

        .. versionadded:: 0.20

    Returns
    -------
    dataset : dict-like object with the following attributes:

    dataset.data : numpy array of shape (13233, 2914)
        Each row corresponds to a ravelled face image of original size 62 x 47
        pixels. Changing the ``slice_`` or resize parameters will change the
        shape of the output.

    dataset.images : numpy array of shape (13233, 62, 47)
        Each row is a face image corresponding to one of the 5749 people in
        the dataset. Changing the ``slice_`` or resize parameters will change
        the shape of the output.

    dataset.target : numpy array of shape (13233,)
        Labels associated to each face image. Those labels range from 0-5748
        and correspond to the person IDs.

    dataset.DESCR : string
        Description of the Labeled Faces in the Wild (LFW) dataset.

    (data, target) : tuple if ``return_X_y`` is True

        .. versionadded:: 0.20

    )r   r(   r)   z Loading LFW people faces from %sz0.12   r   )cachedircompressverbose)locationre   rf   )rG   rZ   rF   rE   descrzlfw.rstN)dataZimagesr*   r\   DESCR)r-   r    r&   r	   r   __version__r   cacher^   reshaper>   r   __file__r'   r   readr   )r   r(   rG   rZ   rF   rE   r)   Z
return_X_yr   r+   m	load_funcrK   r*   r\   Xmodule_pathrst_filefdescrr   r   r   fetch_lfw_people   s(    Q
rw   c          
   C   s  t | d}dd |D }W dQ R X dd |D }t|}tj|tjd}	t }
x0t|D ]"\}}t|dkrd|	|< |d	 t|d d f|d	 t|d
 d ff}nZt|dkrd	|	|< |d	 t|d d f|d
 t|d d ff}ntd|d |f xxt|D ]l\}\}}yt||}W n& t	k
rP   t|t
|d}Y nX ttt|}t||| }|
| qW q\W t|
|||}t|j}|d	}|d	d
 |d	|d
  ||_||	tddgfS )z}Perform the actual data loading for the LFW pairs dataset

    This operation is meant to be cached by a joblib wrapper.
    rbc             S   s   g | ]}|  td qS )	)stripsplitr   )r1   Zlnr   r   r   rQ   t  s    z$_fetch_lfw_pairs.<locals>.<listcomp>Nc             S   s   g | ]}t |d kr|qS )r   )r>   )r1   Zslr   r   r   rQ   u  s    )r4   r5   r
   r   r      zinvalid line %d: %rzUTF-8zDifferent personszSame person)r'   r>   r?   r@   r=   listrA   rX   r   	TypeErrorstrrU   r   appendrN   shapepopinsertZarray)index_file_pathr+   rE   rF   rG   Z
index_fileZsplit_linesZ
pair_specsZn_pairsr*   rD   rL   Z
componentsZpairjnameidxZperson_folder	filenamesrM   pairsr   rJ   r   r   r   _fetch_lfw_pairsk  s>    

r   trainc          	   C   s   t |||d\}}td| | ttjtdk rBt|ddd}	nt|ddd}	|	t}
dd	d
d}| |krt	d| t
t| f t|||  }|
|||||d\}}}tt}tt|dd}| }W dQ R X t|t|d||||dS )a  Load the Labeled Faces in the Wild (LFW) pairs dataset (classification).

    Download it if necessary.

    =================   =======================
    Classes                                5749
    Samples total                         13233
    Dimensionality                         5828
    Features            real, between 0 and 255
    =================   =======================

    In the official `README.txt`_ this task is described as the
    "Restricted" task.  As I am not sure as to implement the
    "Unrestricted" variant correctly, I left it as unsupported for now.

      .. _`README.txt`: http://vis-www.cs.umass.edu/lfw/README.txt

    The original images are 250 x 250 pixels, but the default slice and resize
    arguments reduce them to 62 x 47.

    Read more in the :ref:`User Guide <labeled_faces_in_the_wild_dataset>`.

    Parameters
    ----------
    subset : optional, default: 'train'
        Select the dataset to load: 'train' for the development training
        set, 'test' for the development test set, and '10_folds' for the
        official evaluation set that is meant to be used with a 10-folds
        cross validation.

    data_home : optional, default: None
        Specify another download and cache folder for the datasets. By
        default all scikit-learn data is stored in '~/scikit_learn_data'
        subfolders.

    funneled : boolean, optional, default: True
        Download and use the funneled variant of the dataset.

    resize : float, optional, default 0.5
        Ratio used to resize the each face picture.

    color : boolean, optional, default False
        Keep the 3 RGB channels instead of averaging them to a single
        gray level channel. If color is True the shape of the data has
        one more dimension than the shape with color = False.

    slice_ : optional
        Provide a custom 2D slice (height, width) to extract the
        'interesting' part of the jpeg files and avoid use statistical
        correlation from the background

    download_if_missing : optional, True by default
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    -------
    The data is returned as a Bunch object with the following attributes:

    data : numpy array of shape (2200, 5828). Shape depends on ``subset``.
        Each row corresponds to 2 ravel'd face images of original size 62 x 47
        pixels. Changing the ``slice_``, ``resize`` or ``subset`` parameters
        will change the shape of the output.

    pairs : numpy array of shape (2200, 2, 62, 47). Shape depends on ``subset``
        Each row has 2 face images corresponding to same or different person
        from the dataset containing 5749 people. Changing the ``slice_``,
        ``resize`` or ``subset`` parameters will change the shape of the
        output.

    target : numpy array of shape (2200,). Shape depends on ``subset``.
        Labels associated to each pair of images. The two label values being
        different persons or the same person.

    DESCR : string
        Description of the Labeled Faces in the Wild (LFW) dataset.

    )r   r(   r)   zLoading %s LFW pairs from %sz0.12rc   r   )rd   re   rf   )rg   re   rf   zpairsDevTrain.txtzpairsDevTest.txtz	pairs.txt)r   ZtestZ10_foldsz+subset='%s' is invalid: should be one of %r)rG   rF   rE   ri   zlfw.rstNrh   )rj   r   r*   r\   rk   )r-   r    r&   r	   r   rl   r   rm   r   rX   r}   rU   keysr   r   ro   r'   rp   r   rn   r>   )Zsubsetr   r(   rG   rF   rE   r)   r   r+   rq   rr   Zlabel_filenamesr   r   r*   r\   rt   ru   rv   r   r   r   fetch_lfw_pairs  s0    Q
r   )NTT)NFNr   )NFN))__doc__osr   r   r   os.pathr   r   r   r   ZloggingZdistutils.versionr	   Znumpyr?   baser   r   r   Zutilsr   r   Zutils._joblibr   Zexternals.sixr   r   Z	getLogger__name__r    r$   r#   r   r   r-   rN   r^   r6   rw   r   r   r   r   r   r   <module>   s\   


+= 
(u 
2