ó
‡ˆ\c           @   sU  d  Z  d d l m Z d d l m Z m Z d d l m Z d d l Z d d l	 Z	 d d l
 Z d d l m Z d d l m Z d d	 l m Z d
 d l m Z d d l m Z d d l m Z e j d d k Z e d d d d d d ƒ Z e d d d d d d ƒ Z d Z e	 j e ƒ Z d e j d „ Z d „  Z  d „  Z! d e# d „ Z$ d S(   s½  
=============================
Species distribution dataset
=============================

This dataset represents the geographic distribution of species.
The dataset is provided by Phillips et. al. (2006).

The two species are:

 - `"Bradypus variegatus"
   <http://www.iucnredlist.org/details/3038/0>`_ ,
   the Brown-throated Sloth.

 - `"Microryzomys minutus"
   <http://www.iucnredlist.org/details/13408/0>`_ ,
   also known as the Forest Small Rice Rat, a rodent that lives in Peru,
   Colombia, Ecuador, Peru, and Venezuela.

References
----------

`"Maximum entropy modeling of species geographic distributions"
<http://rob.schapire.net/papers/ecolmod.pdf>`_ S. J. Phillips,
R. P. Anderson, R. E. Schapire - Ecological Modelling, 190:231-259, 2006.

Notes
-----

For an example of using this dataset, see
:ref:`examples/applications/plot_species_distribution_modeling.py
<sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py>`.
iÿÿÿÿ(   t   BytesIO(   t   makedirst   remove(   t   existsNi   (   t   get_data_home(   t   _fetch_remote(   t   RemoteFileMetadatai   (   t   Bunch(   t   _pkl_filepath(   t   _joblibi    i   t   filenames   samples.zipt   urls.   https://ndownloader.figshare.com/files/5976075t   checksumt@   abb07ad284ac50d9e6d20f1c4211e0fd3c098f7f85955e89d321ee8efe37ac28s   coverages.zips.   https://ndownloader.figshare.com/files/5976078t@   4d862674d72e79d6cee77e63b98651ec7926043ba7d39dcb31329cf3f6073807s   species_coverage.pkzi   c   	      C   s•   g  t  | ƒ D] } |  j ƒ  ^ q } d „  } t g  | D] } | | ƒ ^ q8 ƒ } t j |  d | ƒ} t | d ƒ } | d k r‘ d | | <n  | S(   sj   Load a coverage file from an open file object.

    This will return a numpy array of the given dtype
    c         S   s$   |  j  ƒ  d t |  j  ƒ  d ƒ f S(   Ni    i   (   t   splitt   float(   t   t(    (    sE   lib/python2.7/site-packages/sklearn/datasets/species_distributions.pyt   <lambda>V   s    t   dtypet   NODATA_valueiñØÿÿ(   t   ranget   readlinet   dictt   npt   loadtxtt   int(	   t   Ft   header_lengthR   t   it   headert
   make_tuplet   linet   Mt   nodata(    (    sE   lib/python2.7/site-packages/sklearn/datasets/species_distributions.pyt   _load_coverageP   s    %	%c         C   sy   t  r- |  j ƒ  j d ƒ j ƒ  j d ƒ } n |  j ƒ  j ƒ  j d ƒ } t j |  d d d d d d ƒ} | | j _ | S(   sÃ   Load csv file.

    Parameters
    ----------
    F : file object
        CSV file open in byte mode.

    Returns
    -------
    rec : np.ndarray
        record array representing the data
    t   asciit   ,t   skiprowsi    t	   delimiterR   s	   a22,f4,f4(	   t   PY3_OR_LATERR   t   decodet   stripR   R   R   R   t   names(   R   R+   t   rec(    (    sE   lib/python2.7/site-packages/sklearn/datasets/species_distributions.pyt	   _load_csv`   s    '!c         C   s‚   |  j  |  j } | |  j |  j } |  j |  j } | |  j |  j } t j | | |  j ƒ } t j | | |  j ƒ } | | f S(   s%  Construct the map grid from the batch object

    Parameters
    ----------
    batch : Batch object
        The object returned by :func:`fetch_species_distributions`

    Returns
    -------
    (xgrid, ygrid) : 1-D arrays
        The grid corresponding to the values in batch.coverages
    (   t   x_left_lower_cornert	   grid_sizet   Nxt   y_left_lower_cornert   NyR   t   arange(   t   batcht   xmint   xmaxt   ymint   ymaxt   xgridt   ygrid(    (    sE   lib/python2.7/site-packages/sklearn/datasets/species_distributions.pyt   construct_gridsy   s    c         C   s-  t  |  ƒ }  t |  ƒ s% t |  ƒ n  t d d d d d d d d d	 d
 ƒ } t j } t |  t ƒ } t | ƒ s| s… t d ƒ ‚ n  t	 j
 d t j |  f ƒ t t d |  ƒ} t j | ƒ a } xW | j D]L } t | | ƒ } d | k rþ t | ƒ }	 n  d | k rÍ t | ƒ }
 qÍ qÍ WWd QXt | ƒ t	 j
 d t j |  f ƒ t t d |  ƒ} t j | ƒ o } g  } xJ | j D]? } t | | ƒ } t	 j d j | ƒ ƒ | j t | ƒ ƒ q{Wt j | d | ƒ} Wd QXt | ƒ t d | d |
 d |	 |  } t j | | d d ƒn t j | ƒ } | S(   sü	  Loader for species distribution dataset from Phillips et. al. (2006)

    Read more in the :ref:`User Guide <datasets>`.

    Parameters
    ----------
    data_home : optional, default: None
        Specify another download and cache folder for the datasets. By default
        all scikit-learn data is stored in '~/scikit_learn_data' subfolders.

    download_if_missing : optional, True by default
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    --------
    The data is returned as a Bunch object with the following attributes:

    coverages : array, shape = [14, 1592, 1212]
        These represent the 14 features measured at each point of the map grid.
        The latitude/longitude values for the grid are discussed below.
        Missing data is represented by the value -9999.

    train : record array, shape = (1624,)
        The training points for the data.  Each point has three fields:

        - train['species'] is the species name
        - train['dd long'] is the longitude, in degrees
        - train['dd lat'] is the latitude, in degrees

    test : record array, shape = (620,)
        The test points for the data.  Same format as the training data.

    Nx, Ny : integers
        The number of longitudes (x) and latitudes (y) in the grid

    x_left_lower_corner, y_left_lower_corner : floats
        The (x,y) position of the lower-left corner, in degrees

    grid_size : float
        The spacing between points of the grid, in degrees

    References
    ----------

    * `"Maximum entropy modeling of species geographic distributions"
      <http://rob.schapire.net/papers/ecolmod.pdf>`_
      S. J. Phillips, R. P. Anderson, R. E. Schapire - Ecological Modelling,
      190:231-259, 2006.

    Notes
    -----

    This dataset represents the geographic distribution of species.
    The dataset is provided by Phillips et. al. (2006).

    The two species are:

    - `"Bradypus variegatus"
      <http://www.iucnredlist.org/details/3038/0>`_ ,
      the Brown-throated Sloth.

    - `"Microryzomys minutus"
      <http://www.iucnredlist.org/details/13408/0>`_ ,
      also known as the Forest Small Rice Rat, a rodent that lives in Peru,
      Colombia, Ecuador, Peru, and Venezuela.

    - For an example of using this dataset with scikit-learn, see
      :ref:`examples/applications/plot_species_distribution_modeling.py
      <sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py>`.
    R.   g33333³WÀR0   i¼  R1   gfffffLÀR2   i8  R/   gš™™™™™©?s1   Data not found and `download_if_missing` is Falses&   Downloading species data from %s to %st   dirnamet   traint   testNs'   Downloading coverage data from %s to %ss    - converting {}R   t	   coveragest   compressi	   (   R   R   R   R   R   t   int16R   t   DATA_ARCHIVE_NAMEt   IOErrort   loggert   infot   SAMPLESR   R   t   loadt   filesR    R-   R   t	   COVERAGESt   debugt   formatt   appendR#   t   asarrayR   R	   t   dump(   t	   data_homet   download_if_missingt   extra_paramsR   t   archive_patht   samples_patht   Xt   ft   fhandleR=   R>   t   coverages_pathR?   t   bunch(    (    sE   lib/python2.7/site-packages/sklearn/datasets/species_distributions.pyt   fetch_species_distributions”   sT    I			
	
	(%   t   __doc__t   ioR    t   osR   R   t   os.pathR   t   syst   loggingt   numpyR   t   baseR   R   R   t   utilsR   t   sklearn.datasets.baseR   t   sklearn.utilsR	   t   version_infoR(   RF   RI   RB   t	   getLoggert   __name__RD   RA   R#   R-   R;   t   Nonet   TrueRY   (    (    (    sE   lib/python2.7/site-packages/sklearn/datasets/species_distributions.pyt   <module>!   s8   				