ó
\c           @   sü   d  Z  d d l m Z m Z m Z d d l m Z m Z d d l Z d d l	 Z
 d d l Z d d l m Z d d l m Z d d l m Z d d	 l m Z d
 d l m Z d
 d l m Z e d d d d d d  Z e j e  Z d e e d  Z d S(   s:  California housing dataset.

The original database is available from StatLib

    http://lib.stat.cmu.edu/datasets/

The data contains 20,640 observations on 9 variables.

This dataset contains the average house value as target variable
and the following input variables (features): average income,
housing average age, average rooms, average bedrooms, population,
average occupation, latitude, and longitude in that order.

References
----------

Pace, R. Kelley and Ronald Barry, Sparse Spatial Autoregressions,
Statistics and Probability Letters, 33 (1997) 291-297.

i˙˙˙˙(   t   dirnamet   existst   join(   t   makedirst   removeNi   (   t   get_data_home(   t   _fetch_remote(   t   _pkl_filepath(   t   RemoteFileMetadatai   (   t   Bunch(   t   _joblibt   filenames   cal_housing.tgzt   urls.   https://ndownloader.figshare.com/files/5976036t   checksumt@   aaa5c9a6afe2225cc2aed2723682ae403280c4a3695a2ddda4ffb5d8215ea681c         C   s  t  d |   }  t |   s( t |   n  t |  d  } t |  s| sX t d   n  t j d j t j	 |    t
 t d |  } t j d d d |  r } t j | j d	  d
 d } d d d d d d d d d g	 } | d d  | f } t j | | d d Wd QXt |  n t j |  } d d d d d d d d g } | d d  d f | d d  d d  f }	 }
 |
 d d  d f c |
 d d  d f :<|
 d d  d f c |
 d d  d f :<|
 d d  d f |
 d d  d f |
 d d  d f <|	 d }	 t t  } t t | d  d!    } | j   } Wd QX| rd|
 |	 f St d" |
 d# |	 d$ | d% |  S(&   sV  Load the California housing dataset (regression).

    ==============     ==============
    Samples total               20640
    Dimensionality                  8
    Features                     real
    Target             real 0.15 - 5.
    ==============     ==============

    Read more in the :ref:`User Guide <california_housing_dataset>`.

    Parameters
    ----------
    data_home : optional, default: None
        Specify another download and cache folder for the datasets. By default
        all scikit-learn data is stored in '~/scikit_learn_data' subfolders.

    download_if_missing : optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.


    return_X_y : boolean, default=False.
        If True, returns ``(data.data, data.target)`` instead of a Bunch
        object.

        .. versionadded:: 0.20

    Returns
    -------
    dataset : dict-like object with the following attributes:

    dataset.data : ndarray, shape [20640, 8]
        Each row corresponding to the 8 feature values in order.

    dataset.target : numpy array of shape (20640,)
        Each value corresponds to the average house value in units of 100,000.

    dataset.feature_names : array of length 8
        Array of ordered feature names used in the dataset.

    dataset.DESCR : string
        Description of the California housing dataset.

    (data, target) : tuple if ``return_X_y`` is True

        .. versionadded:: 0.20

    Notes
    ------

    This dataset consists of 20,640 samples and 9 features.
    t	   data_homes   cal_housing.pkzs1   Data not found and `download_if_missing` is Falses&   Downloading Cal. housing from {} to {}R    t   modes   r:gzt   names"   CaliforniaHousing/cal_housing.datat	   delimitert   ,i   i   i   i   i   i   i   i   i    Nt   compresst   MedInct   HouseAget   AveRoomst	   AveBedrmst
   Populationt   AveOccupt   Latitudet	   Longitudeg     jř@t   descrs   california_housing.rstt   datat   targett   feature_namest   DESCR(   R   R   R   R   t   IOErrort   loggert   infot   formatt   ARCHIVER   R   t   tarfilet   opent   npt   loadtxtt   extractfileR
   t   dumpR   t   loadR    t   __file__R   t   readR	   (   R   t   download_if_missingt
   return_X_yt   filepatht   archive_patht   ft   cal_housingt   columns_indexR    R   R   t   module_patht   dfileR   (    (    sB   lib/python2.7/site-packages/sklearn/datasets/california_housing.pyt   fetch_california_housing1   sF    7	!3,,:

(   t   __doc__t   os.pathR    R   R   t   osR   R   R'   t   numpyR)   t   loggingt   baseR   R   R   R   t   utilsR	   R
   R&   t	   getLoggert   __name__R#   t   Nonet   Truet   FalseR9   (    (    (    sB   lib/python2.7/site-packages/sklearn/datasets/california_housing.pyt   <module>   s$   	