ó
‡ˆ\c           @   s±  d  Z  d d l Z d d l Z d d l Z d d l Z d d l Z d d l m Z d d l	 m
 Z
 d d l m Z m Z d d l m Z d d	 l m Z d d
 l m Z d d l m Z d d l m Z m Z m Z m Z m Z m Z m Z m Z m  Z  d d l m! Z! e j" Z" e j# Z# d Z$ e e$ ƒ Z% e% e j& ƒ Z& e% e j' ƒ Z' e% e j( ƒ Z( e% e j) ƒ Z) e% d e j* f d „  ƒ  Yƒ Z* e% d e j+ f d „  ƒ  Yƒ Z+ d d d d d d d d d d d d d d  d! d" d d d# d$ d% d& d' g Z, e j- ƒ  d( k Z. d) e j/ d* ƒ d+ k Z0 d, e1 f d- „  ƒ  YZ2 d. „  Z3 d/ „  Z4 d0 „  Z5 d1 „  Z6 d2 „  Z7 e8 d3 „ Z9 d4 d5 „ Z: d d6 „ Z< d7 „  Z= d8 „  Z> d d d9 „ Z? d: „  Z@ d S(;   s=   
The :mod:`sklearn.utils` module includes various utilities.
iÿÿÿÿN(   t   issparsei   (   t   murmurhash3_32(   t   compute_class_weightt   compute_sample_weight(   t   _joblibi   (   t   DataConversionWarning(   t	   _Sequence(   t
   deprecated(	   t   as_float_arrayt   assert_all_finitet   check_random_statet   column_or_1dt   check_arrayt   check_consistent_lengtht	   check_X_yt	   indexablet   check_symmetric(   t
   get_configs£   deprecated in version 0.20.1 to be removed in version 0.23. Please import this functionality directly from joblib, which can be installed with: pip install joblib.t   Memoryc           B   s   e  Z RS(    (   t   __name__t
   __module__(    (    (    s5   lib/python2.7/site-packages/sklearn/utils/__init__.pyR   0   s   t   Parallelc           B   s   e  Z RS(    (   R   R   (    (    (    s5   lib/python2.7/site-packages/sklearn/utils/__init__.pyR   5   s   R   R   R	   R   R
   R   R   R   t   safe_indexingR   R   R   R   t   indices_to_maskR   t	   cpu_countt   delayedt   parallel_backendt   register_parallel_backendt   hasht   effective_n_jobst   PyPyi   t   Pi    t   Bunchc           B   s;   e  Z d  Z d „  Z d „  Z d „  Z d „  Z d „  Z RS(   sò   Container object for datasets

    Dictionary-like object that exposes its keys as attributes.

    >>> b = Bunch(a=1, b=2)
    >>> b['b']
    2
    >>> b.b
    2
    >>> b.a = 3
    >>> b['a']
    3
    >>> b.c = 6
    >>> b['c']
    6

    c         K   s   t  t |  ƒ j | ƒ d  S(   N(   t   superR    t   __init__(   t   selft   kwargs(    (    s5   lib/python2.7/site-packages/sklearn/utils/__init__.pyR"   [   s    c         C   s   | |  | <d  S(   N(    (   R#   t   keyt   value(    (    s5   lib/python2.7/site-packages/sklearn/utils/__init__.pyt   __setattr__^   s    c         C   s
   |  j  ƒ  S(   N(   t   keys(   R#   (    (    s5   lib/python2.7/site-packages/sklearn/utils/__init__.pyt   __dir__a   s    c         C   s0   y |  | SWn t  k
 r+ t | ƒ ‚ n Xd  S(   N(   t   KeyErrort   AttributeError(   R#   R%   (    (    s5   lib/python2.7/site-packages/sklearn/utils/__init__.pyt   __getattr__d   s    c         C   s   d  S(   N(    (   R#   t   state(    (    s5   lib/python2.7/site-packages/sklearn/utils/__init__.pyt   __setstate__j   s    	(   R   R   t   __doc__R"   R'   R)   R,   R.   (    (    (    s5   lib/python2.7/site-packages/sklearn/utils/__init__.pyR    H   s   				c         C   sa   t  j | ƒ } t  j | j t  j ƒ r+ | St |  d ƒ r] t  j | j d ƒ } | | } n  | S(   së   Return a mask which is safe to use on X.

    Parameters
    ----------
    X : {array-like, sparse matrix}
        Data on which to apply mask.

    mask : array
        Mask to be used on X.

    Returns
    -------
        mask
    t   toarrayi    (   t   npt   asarrayt
   issubdtypet   dtypet   signedintegert   hasattrt   aranget   shape(   t   Xt   maskt   ind(    (    s5   lib/python2.7/site-packages/sklearn/utils/__init__.pyt	   safe_maskv   s    c         C   sF   | d k r) |  t  |  | ƒ d d … f St j d d |  j d f ƒ S(   s)  
    This mask is safer than safe_mask since it returns an
    empty array, when a sparse matrix is sliced with a boolean mask
    with all False, instead of raising an unhelpful error in older
    versions of SciPy.

    See: https://github.com/scipy/scipy/issues/5361

    Also note that we can avoid doing the dot product by checking if
    the len_mask is not zero in _huber_loss_and_gradient but this
    is not going to be the bottleneck, since the number of outliers
    and non_outliers are typically non-zero and it makes the code
    tougher to follow.

    Parameters
    ----------
    X : {array-like, sparse matrix}
        Data on which to apply mask.

    mask : array
        Mask to be used on X.

    len_mask : int
        The length of the mask.

    Returns
    -------
        mask
    i    NR8   i   (   R<   R1   t   zerosR8   (   R9   R:   t   len_mask(    (    s5   lib/python2.7/site-packages/sklearn/utils/__init__.pyt   axis0_safe_slice   s    c         C   sí   t  |  d ƒ rq | j j r! | n	 | j ƒ  } y |  j | SWqé t k
 rm t j d t ƒ |  j ƒ  j | SXnx t  |  d ƒ rÎ t  |  d ƒ rÃ t  | d ƒ rÃ | j	 j
 d k rÃ |  j | d d ƒS|  | Sn g  | D] } |  | ^ qÕ Sd	 S(
   s  Return items or rows from X using indices.

    Allows simple indexing of lists or arrays.

    Parameters
    ----------
    X : array-like, sparse-matrix, list, pandas.DataFrame, pandas.Series.
        Data from which to sample rows or items.
    indices : array-like of int
        Indices according to which X will be subsampled.

    Returns
    -------
    subset
        Subset of X on first axis

    Notes
    -----
    CSR, CSC, and LIL sparse matrices are supported. COO sparse matrices are
    not supported.
    t   ilocs$   Copying input dataframe for slicing.R8   t   takeR4   t   it   axisi    N(   R6   t   flagst	   writeablet   copyR@   t
   ValueErrort   warningst   warnR   R4   t   kindRA   (   R9   t   indicest   idx(    (    s5   lib/python2.7/site-packages/sklearn/utils/__init__.pyR   ²   s    	c    
      O   s­  t  | j d d
 ƒ ƒ } | j d t ƒ } | j d d
 ƒ } | r[ t d | j ƒ  ƒ ‚ n  t |  ƒ d k rq d
 S|  d } t | d ƒ r— | j d n	 t | ƒ } | d
 k r¸ | } n, | | k rä | rä t d | | f ƒ ‚ n  t	 |  Œ  | r| j
 d | d | f ƒ} n& t j | ƒ } | j | ƒ | |  } g  |  D]$ } t | ƒ r]| j ƒ  n | ^ q?}  g  |  D] } t | | ƒ ^ qp}	 t |	 ƒ d	 k r¥|	 d S|	 Sd
 S(   s6	  Resample arrays or sparse matrices in a consistent way

    The default strategy implements one step of the bootstrapping
    procedure.

    Parameters
    ----------
    *arrays : sequence of indexable data-structures
        Indexable data-structures can be arrays, lists, dataframes or scipy
        sparse matrices with consistent first dimension.

    Other Parameters
    ----------------
    replace : boolean, True by default
        Implements resampling with replacement. If False, this will implement
        (sliced) random permutations.

    n_samples : int, None by default
        Number of samples to generate. If left to None this is
        automatically set to the first dimension of the arrays.
        If replace is False it should not be larger than the length of
        arrays.

    random_state : int, RandomState instance or None, optional (default=None)
        The seed of the pseudo random number generator to use when shuffling
        the data.  If int, random_state is the seed used by the random number
        generator; If RandomState instance, random_state is the random number
        generator; If None, the random number generator is the RandomState
        instance used by `np.random`.

    Returns
    -------
    resampled_arrays : sequence of indexable data-structures
        Sequence of resampled copies of the collections. The original arrays
        are not impacted.

    Examples
    --------
    It is possible to mix sparse and dense arrays in the same run::

      >>> X = np.array([[1., 0.], [2., 1.], [0., 0.]])
      >>> y = np.array([0, 1, 2])

      >>> from scipy.sparse import coo_matrix
      >>> X_sparse = coo_matrix(X)

      >>> from sklearn.utils import resample
      >>> X, X_sparse, y = resample(X, X_sparse, y, random_state=0)
      >>> X
      array([[1., 0.],
             [2., 1.],
             [1., 0.]])

      >>> X_sparse                   # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
      <3x2 sparse matrix of type '<... 'numpy.float64'>'
          with 4 stored elements in Compressed Sparse Row format>

      >>> X_sparse.toarray()
      array([[1., 0.],
             [2., 1.],
             [1., 0.]])

      >>> y
      array([0, 1, 0])

      >>> resample(y, n_samples=2, random_state=0)
      array([0, 1])


    See also
    --------
    :func:`sklearn.utils.shuffle`
    t   random_statet   replacet	   n_sampless   Unexpected kw arguments: %ri    R8   s@   Cannot sample %d out of arrays with dim %d when replace is Falset   sizei   N(   R
   t   popt   Nonet   TrueRG   R(   t   lenR6   R8   R   t   randintR1   R7   t   shuffleR    t   tocsrR   (
   t   arrayst   optionsRM   RN   t   max_n_samplest   firstRO   RK   t   at   resampled_arrays(    (    s5   lib/python2.7/site-packages/sklearn/utils/__init__.pyt   resampleß   s4    J
(	

1"c          O   s   t  | d <t |  | Ž  S(   sf  Shuffle arrays or sparse matrices in a consistent way

    This is a convenience alias to ``resample(*arrays, replace=False)`` to do
    random permutations of the collections.

    Parameters
    ----------
    *arrays : sequence of indexable data-structures
        Indexable data-structures can be arrays, lists, dataframes or scipy
        sparse matrices with consistent first dimension.

    Other Parameters
    ----------------
    random_state : int, RandomState instance or None, optional (default=None)
        The seed of the pseudo random number generator to use when shuffling
        the data.  If int, random_state is the seed used by the random number
        generator; If RandomState instance, random_state is the random number
        generator; If None, the random number generator is the RandomState
        instance used by `np.random`.

    n_samples : int, None by default
        Number of samples to generate. If left to None this is
        automatically set to the first dimension of the arrays.

    Returns
    -------
    shuffled_arrays : sequence of indexable data-structures
        Sequence of shuffled copies of the collections. The original arrays
        are not impacted.

    Examples
    --------
    It is possible to mix sparse and dense arrays in the same run::

      >>> X = np.array([[1., 0.], [2., 1.], [0., 0.]])
      >>> y = np.array([0, 1, 2])

      >>> from scipy.sparse import coo_matrix
      >>> X_sparse = coo_matrix(X)

      >>> from sklearn.utils import shuffle
      >>> X, X_sparse, y = shuffle(X, X_sparse, y, random_state=0)
      >>> X
      array([[0., 0.],
             [2., 1.],
             [1., 0.]])

      >>> X_sparse                   # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
      <3x2 sparse matrix of type '<... 'numpy.float64'>'
          with 3 stored elements in Compressed Sparse Row format>

      >>> X_sparse.toarray()
      array([[0., 0.],
             [2., 1.],
             [1., 0.]])

      >>> y
      array([2, 1, 0])

      >>> shuffle(y, n_samples=2, random_state=0)
      array([0, 1])

    See also
    --------
    :func:`sklearn.utils.resample`
    RN   (   t   FalseR^   (   RX   RY   (    (    s5   lib/python2.7/site-packages/sklearn/utils/__init__.pyRV   O  s    C
c         C   su   t  |  d d d d g d t ƒ}  t |  ƒ rT | rB |  j ƒ  }  n  |  j d C_ n | rg |  d }  n
 |  d C}  |  S(   sd  Element wise squaring of array-likes and sparse matrices.

    Parameters
    ----------
    X : array like, matrix, sparse matrix

    copy : boolean, optional, default True
        Whether to create a copy of X and operate on it or to perform
        inplace computation (default behaviour).

    Returns
    -------
    X ** 2 : element wise square
    t   accept_sparset   csrt   csct   coot	   ensure_2di   (   R   R_   R    RF   t   data(   R9   RF   (    (    s5   lib/python2.7/site-packages/sklearn/utils/__init__.pyt   safe_sqr–  s    !
i    c         c   s|   d } xR t  t |  | ƒ ƒ D]: } | | } | | |  k rC q n  t | | ƒ V| } q W| |  k  rx t | |  ƒ Vn  d S(   s”  Generator to create slices containing batch_size elements, from 0 to n.

    The last slice may contain less than batch_size elements, when batch_size
    does not divide n.

    Parameters
    ----------
    n : int
    batch_size : int
        Number of element in each batch
    min_batch_size : int, default=0
        Minimum batch size to produce.

    Yields
    ------
    slice of batch_size elements

    Examples
    --------
    >>> from sklearn.utils import gen_batches
    >>> list(gen_batches(7, 3))
    [slice(0, 3, None), slice(3, 6, None), slice(6, 7, None)]
    >>> list(gen_batches(6, 3))
    [slice(0, 3, None), slice(3, 6, None)]
    >>> list(gen_batches(2, 3))
    [slice(0, 2, None)]
    >>> list(gen_batches(7, 3, min_batch_size=0))
    [slice(0, 3, None), slice(3, 6, None), slice(6, 7, None)]
    >>> list(gen_batches(7, 3, min_batch_size=2))
    [slice(0, 3, None), slice(3, 7, None)]
    i    N(   t   ranget   intt   slice(   t   nt
   batch_sizet   min_batch_sizet   startt   _t   end(    (    s5   lib/python2.7/site-packages/sklearn/utils/__init__.pyt   gen_batches²  s     

c         c   sµ   d } | d k  r% t  d | ƒ ‚ n  x‰ t | ƒ D]{ } |  | } | |  | k  r_ | d 7} n  | d k r2 | | } | d k	 r“ t | | ƒ } n  t | | d ƒ V| } q2 q2 Wd S(   s¹  Generator to create n_packs slices going up to n.

    Parameters
    ----------
    n : int
    n_packs : int
        Number of slices to generate.
    n_samples : int or None (default = None)
        Number of samples. Pass n_samples when the slices are to be used for
        sparse matrix indexing; slicing off-the-end raises an exception, while
        it works for NumPy arrays.

    Yields
    ------
    slice

    Examples
    --------
    >>> from sklearn.utils import gen_even_slices
    >>> list(gen_even_slices(10, 1))
    [slice(0, 10, None)]
    >>> list(gen_even_slices(10, 10))                     #doctest: +ELLIPSIS
    [slice(0, 1, None), slice(1, 2, None), ..., slice(9, 10, None)]
    >>> list(gen_even_slices(10, 5))                      #doctest: +ELLIPSIS
    [slice(0, 2, None), slice(2, 4, None), ..., slice(8, 10, None)]
    >>> list(gen_even_slices(10, 3))
    [slice(0, 4, None), slice(4, 7, None), slice(7, 10, None)]
    i    i   s+   gen_even_slices got n_packs=%s, must be >=1N(   RG   Rg   RR   t   minRi   (   Rj   t   n_packsRO   Rm   t   pack_numt   this_nRo   (    (    s5   lib/python2.7/site-packages/sklearn/utils/__init__.pyt   gen_even_slicesÝ  s    

c         C   s@   t  |  t j ƒ r t j |  ƒ St  |  t ƒ r2 |  St |  ƒ Sd S(   sp   Cast iterable x to a Sequence, avoiding a copy if possible.

    Parameters
    ----------
    x : iterable
    N(   t
   isinstanceR1   t   ndarrayR2   t   Sequencet   list(   t   x(    (    s5   lib/python2.7/site-packages/sklearn/utils/__init__.pyt
   tosequence
  s
    c         C   sJ   | t  j |  ƒ k r$ t d ƒ ‚ n  t  j | d t  j ƒ} t | |  <| S(   sR  Convert list of indices to boolean mask.

    Parameters
    ----------
    indices : list-like
        List of integers treated as indices.
    mask_length : int
        Length of boolean mask to be generated.
        This parameter must be greater than max(indices)

    Returns
    -------
    mask : 1d boolean nd-array
        Boolean array that is True where indices are present, else False.

    Examples
    --------
    >>> from sklearn.utils import indices_to_mask
    >>> indices = [1, 2 , 3, 4]
    >>> indices_to_mask(indices, 5)
    array([False,  True,  True,  True,  True])
    s-   mask_length must be greater than max(indices)R4   (   R1   t   maxRG   R=   t   boolRS   (   RK   t   mask_lengthR:   (    (    s5   lib/python2.7/site-packages/sklearn/utils/__init__.pyR     s
    
c         C   s‹   | d k r t ƒ  d } n  t | d |  ƒ } | d k	 rN t | | ƒ } n  | d k  r‡ t j d | t j |  d	 ƒ f ƒ d } n  | S(
   s¢  Calculates how many rows can be processed within working_memory

    Parameters
    ----------
    row_bytes : int
        The expected number of bytes of memory that will be consumed
        during the processing of each row.
    max_n_rows : int, optional
        The maximum return value.
    working_memory : int or float, optional
        The number of rows to fit inside this number of MiB will be returned.
        When None (default), the value of
        ``sklearn.get_config()['working_memory']`` is used.

    Returns
    -------
    int or the value of n_samples

    Warns
    -----
    Issues a UserWarning if ``row_bytes`` exceeds ``working_memory`` MiB.
    t   working_memoryi   i   i   sO   Could not adhere to working_memory config. Currently %.0fMiB, %.0fMiB required.iìÿÿÿNi   g      °>(   RR   R   Rh   Rq   RH   RI   R1   t   ceil(   t	   row_bytest
   max_n_rowsR   t   chunk_n_rows(    (    s5   lib/python2.7/site-packages/sklearn/utils/__init__.pyt   get_chunk_n_rows9  s    		c         C   s.   t  t |  t j t j f ƒ o* t j |  ƒ ƒ S(   sç  Tests if x is NaN

    This function is meant to overcome the issue that np.isnan does not allow
    non-numerical types as input, and that np.nan is not np.float('nan').

    Parameters
    ----------
    x : any type

    Returns
    -------
    boolean

    Examples
    --------
    >>> is_scalar_nan(np.nan)
    True
    >>> is_scalar_nan(float("nan"))
    True
    >>> is_scalar_nan(None)
    False
    >>> is_scalar_nan("")
    False
    >>> is_scalar_nan([np.nan])
    False
    (   R}   Rv   t   numberst   RealR1   t   floatingt   isnan(   Rz   (    (    s5   lib/python2.7/site-packages/sklearn/utils/__init__.pyt   is_scalar_nan`  s     (A   R/   R…   t   platformt   structRH   t   numpyR1   t   scipy.sparseR    t
   murmurhashR   t   class_weightR   R   t    R   t
   exceptionsR   t   fixesR   Rx   t   deprecationR   t
   validationR   R	   R
   R   R   R   R   R   R   R   R   R   t   msgt	   deprecateR   R   R   R   R   R   t   __all__t   python_implementationt   IS_PYPYt   calcsizet	   _IS_32BITt   dictR    R<   R?   R   R^   RV   RS   Rf   Rp   RR   Ru   R{   R   R„   R‰   (    (    (    s5   lib/python2.7/site-packages/sklearn/utils/__init__.pyt   <module>   sf   @				.		#	-	p	G+-		 &