ó
‡ˆ\c           @   si  d  d l  Z d  d l m Z d  d l m Z d  d l m Z d  d l m Z d  d l m Z d  d l m	 Z	 d  d l m
 Z
 d  d	 l m Z d  d
 l m Z d  d l m Z d  d l m Z d  d l m Z e
 d „  ƒ Z e
 d „  ƒ Z e
 d „  ƒ Z d „  Z d „  Z e
 d „  ƒ Z e
 d „  ƒ Z e
 d „  ƒ Z e
 d „  ƒ Z e
 d „  ƒ Z e
 d „  ƒ Z d S(   iÿÿÿÿN(   t   sparse(   t   assert_equal(   t   assert_array_equal(   t   assert_array_almost_equal(   t   assert_raises(   t   assert_false(   t   ignore_warnings(   t   Imputer(   t   Pipeline(   t   GridSearchCV(   t   tree(   t   sparse_random_matrixc   	      C   sæ  d | | f } t  } |  j j d k s: | j j d k rC t } n  t | d | d d ƒ} | j |  ƒ j |  j ƒ  ƒ } | | j | d | j	 d t
 ƒ ƒ| | | d | j	 d t
 ƒ ƒt | d | d d ƒ} | j |  j ƒ  ƒ t j | ƒ j ƒ  rt t | j |  j ƒ  j ƒ  ƒ n@ | j |  j ƒ  j ƒ  ƒ } | | | j ƒ  d | j	 d t
 ƒ ƒt | d | d d ƒ} | j t j |  ƒ ƒ | j t j |  j ƒ  ƒ ƒ } t j | ƒ rÆ| j ƒ  } n  | | j | d | j	 d t ƒ ƒ| | | d | j	 d t ƒ ƒt | d | d d ƒ} | j t j |  j ƒ  ƒ ƒ t j | ƒ j ƒ  r{t t | j t j |  j ƒ  j ƒ  ƒ ƒ ng | j t j |  j ƒ  j ƒ  ƒ ƒ } t j | ƒ r½| j ƒ  } n  | | | j ƒ  d | j	 d t ƒ ƒd S(	   s
  Utility function for testing imputation for a given strategy.

    Test:
        - along the two axes
        - with dense and sparse arrays

    Check that:
        - the statistics (mean, median, mode) are correct
        - the missing values are imputed correctlysH   Parameters: strategy = %s, missing_values = %s, axis = {0}, sparse = {1}t   ft   strategyt   axisi    t   err_msgi   N(   R   t   dtypet   kindR   R   t   fitt	   transformt   copyt   statistics_t   formatt   Falset	   transposet   npt   isnant   anyR   t
   ValueErrorR    t
   csc_matrixt   issparset   toarrayt   True(	   t   Xt   X_trueR   t
   statisticst   missing_valuesR   t	   assert_aet   imputert   X_trans(    (    sJ   lib/python2.7/site-packages/sklearn/preprocessing/tests/test_imputation.pyt   _check_statistics   sF    $	"$c          C   sŸ   t  j j d d ƒ }  t  j |  d  d  d … <xm d d d g D]\ } t d | ƒ } | j |  ƒ } t | j d ƒ | j t j	 |  ƒ ƒ } t | j d ƒ q; Wd  S(	   Ni
   i   t   meant   mediant   most_frequentR   (   i
   i   (   i
   i   (
   R   t   randomt   randnt   nanR   t   fit_transformR   t   shapeR    t
   csr_matrix(   R!   R   R&   t	   X_imputed(    (    sJ   lib/python2.7/site-packages/sklearn/preprocessing/tests/test_imputation.pyt   test_imputation_shapeT   s    c       	   C   sT  t  j t  j d d d d g t  j d d t  j d g t  j d d d d g t  j d d d d g g ƒ }  t  j d d g d d g d d g d d g g ƒ } t  j d t  j t  j d g } |  d  d  … d d d d	 g f } t  j d d g d d g d d g d d g g ƒ } t  j d t  j d g } t |  | d
 | d ƒ t | | d | d ƒ d  S(   Ni    i   i   i   i   i   i   i   i   R)   R*   (   R   t   arrayR.   R(   (   R!   t   X_imputed_meant   statistics_meant   X_for_mediant   X_imputed_mediant   statistics_median(    (    sJ   lib/python2.7/site-packages/sklearn/preprocessing/tests/test_imputation.pyt%   test_imputation_mean_median_only_zerob   s*    			"			c         O   sJ   t  |  d ƒ r |  j n	 t |  ƒ } | d k r7 t j St j |  | | Ž S(   Nt   sizei    (   t   hasattrR;   t   lenR   R.   R*   (   t   arrt   argst   kwargst   length(    (    sJ   lib/python2.7/site-packages/sklearn/preprocessing/tests/test_imputation.pyt   safe_median…   s    $c         O   sJ   t  |  d ƒ r |  j n	 t |  ƒ } | d k r7 t j St j |  | | Ž S(   NR;   i    (   R<   R;   R=   R   R.   R)   (   R>   R?   R@   RA   (    (    sJ   lib/python2.7/site-packages/sklearn/preprocessing/tests/test_imputation.pyt	   safe_mean‹   s    $c          C   sQ  t  j j d ƒ }  d } d } | | | | f } t  j | d ƒ } t  j d | d d ƒ } | d d  d … | d d  d … <d d d „  f d d d	 „  f d
 d d „  f d
 d d „  f g } xˆ| D]€\ } } }	 t  j | ƒ }
 t  j | ƒ } t  j | d ƒ } xÊt | d ƒ D]¸} | | d d k | | d | | d } t | d | | | | | | d ƒ } | d | | } | |  } t  j | | ƒ } | |  j	 t
 | ƒ ƒ |  } |	 | | | ƒ | | <t  j | | | f ƒ |
 d  d  … | f <d | k rHt  j | t  j | | | | ƒ f ƒ | d  d  … | f <n8 t  j | | t  j | | | ƒ f ƒ | d  d  … | f <t  j j | ƒ j |
 d  d  … | f ƒ t  j j | ƒ j | d  d  … | f ƒ qW| d
 k rt  j | ƒ j d d ƒ } n t  j | ƒ j d d ƒ } | d  d  … | f } t |
 | | | | ƒ qÉ Wd  S(   Ni    i
   i   i   i   R)   t   NaNc         S   s   t  t j |  | f ƒ ƒ S(   N(   RC   R   t   hstack(   t   zt   vt   p(    (    sJ   lib/python2.7/site-packages/sklearn/preprocessing/tests/test_imputation.pyt   <lambda>Ÿ   s    c         S   s   t  j | ƒ S(   N(   R   R)   (   RF   RG   RH   (    (    sJ   lib/python2.7/site-packages/sklearn/preprocessing/tests/test_imputation.pyRI       s    R*   c         S   s   t  t j |  | f ƒ ƒ S(   N(   RB   R   RE   (   RF   RG   RH   (    (    sJ   lib/python2.7/site-packages/sklearn/preprocessing/tests/test_imputation.pyRI   ¡   s    c         S   s   t  j | ƒ S(   N(   R   R*   (   RF   RG   RH   (    (    sJ   lib/python2.7/site-packages/sklearn/preprocessing/tests/test_imputation.pyRI   ¢   s    R   (   R   R,   t   RandomStatet   zerost   aranget   emptyt   ranget   maxt   repeatt   permutationR=   RE   t   shuffleR   R   t   allR(   (   t   rngt   dimt   decR0   RK   t   valuest   testsR   t   test_missing_valuest   true_value_funR!   R"   t   true_statisticst   jt   nb_zerost   nb_missing_valuest	   nb_valuesRF   RH   RG   t   cols_to_keep(    (    sJ   lib/python2.7/site-packages/sklearn/preprocessing/tests/test_imputation.pyt   test_imputation_mean_median‘   sT    !,(
(	&	)-c          C   s@  t  j d t  j t  j g d t  j t  j g d d t  j g d d t  j g d d t  j g d d t  j g d d t  j g d d t  j g g ƒ j ƒ  }  t  j d d d g d d d g d d d g d d d g d d d	 g d d d
 g d d d g d d d g g ƒ j ƒ  } d d d d d	 d
 d d g } t |  | d | d ƒ d  S(   Ni    i   iûÿÿÿi   iüÿÿÿiÿÿÿÿi   g      Àg      @g      @g      Àg      à?R*   RD   (   R   R4   R.   R   R(   (   R!   R8   R9   (    (    sJ   lib/python2.7/site-packages/sklearn/preprocessing/tests/test_imputation.pyt$   test_imputation_median_special_casesØ   s*    c          C   s³   t  j d d d d g d d d d g d d d d g d d d d g g ƒ }  t  j d d d g d d d g d d d g d d d g g ƒ } t |  | d t  j d d d g d ƒ d  S(	   Niÿÿÿÿi    i   i   i   i   i   R+   (   R   R4   R(   R.   (   R!   R"   (    (    sJ   lib/python2.7/site-packages/sklearn/preprocessing/tests/test_imputation.pyt   test_imputation_most_frequentö   s    c          C   s¯   t  d t d d ƒ f d t j d d ƒ f g ƒ }  i d d d g d	 6d d
 g d 6} d } t | | d d ƒ} t | d
 d d ƒj ƒ  } t |  | ƒ } | j | | ƒ d  S(   NR&   R$   i    R
   t   random_stateR)   R*   R+   t   imputer__strategyi   t   imputer__axisid   t   densitygš™™™™™¹?(   R   R   R
   t   DecisionTreeRegressorR   R   R	   R   (   t   pipelinet
   parameterst   lR!   t   Yt   gs(    (    sJ   lib/python2.7/site-packages/sklearn/preprocessing/tests/test_imputation.pyt$   test_imputation_pipeline_grid_search  s    c          C   s´   d d  l  }  d } t | | d d ƒ} x† d d d g D]u } t d d	 d
 | ƒ } | j | ƒ |  j |  j | ƒ ƒ } t | j | j ƒ  ƒ | j | j ƒ  ƒ d d | ƒq7 Wd  S(   Niÿÿÿÿid   Rg   gš™™™™™¹?R)   R*   R+   R$   i    R   R   s9   Fail to transform the data after pickling (strategy = %s)(	   t   pickleR   R   R   t   loadst   dumpsR   R   R   (   Ro   Rk   R!   R   R&   t   imputer_pickled(    (    sJ   lib/python2.7/site-packages/sklearn/preprocessing/tests/test_imputation.pyt   test_imputation_pickle   s    c       	   C   s  t  d d d d d d ƒ}  |  j ƒ  j ƒ  } t d d d d d	 t ƒ } | j | ƒ j | ƒ } d
 | d <t t j	 | | k ƒ ƒ |  j ƒ  } t d | j
 d d d d	 t ƒ } | j | ƒ j | ƒ } d
 | j
 d <t t j	 | j
 | j
 k ƒ ƒ |  j ƒ  j ƒ  } t d d d d d	 t ƒ } | j | ƒ j | ƒ } d
 | d <t | | ƒ |  j ƒ  } t d | j
 d d d d	 t d d ƒ } | j | ƒ j | ƒ } d
 | j
 d <t | j
 | j
 ƒ |  j ƒ  j ƒ  } t d | j
 d d d d	 t d d ƒ } | j | ƒ j | ƒ } d
 | j
 d <t | j
 | j
 ƒ |  j ƒ  } t d | j
 d d d d	 t d d ƒ } | j | ƒ j | ƒ } d
 | j
 d <t t j	 | j
 | j
 k ƒ ƒ |  j ƒ  j ƒ  } t d | j
 d d d d	 t d d ƒ } | j | ƒ j | ƒ } d
 | j
 d <t t j	 | j
 | j
 k ƒ ƒ |  j ƒ  } t d d d d d	 t d d ƒ } | j | ƒ j | ƒ } t t j | ƒ ƒ d  S(   Ni   Rg   g      è?Rd   i    R$   R   R)   R   iÿÿÿÿR   i   (   i    i    (   i    i    (   R   R   R   R   R    R   R   R   R   RS   t   dataR   R   t   tocscR    R   (   t   X_origR!   R&   t   Xt(    (    sJ   lib/python2.7/site-packages/sklearn/preprocessing/tests/test_imputation.pyt   test_imputation_copy6  sZ    
"
(    t   numpyR   t   scipyR    t   sklearn.utils.testingR   R   R   R   R   R   t    sklearn.preprocessing.imputationR   t   sklearn.pipelineR   t   sklearn.model_selectionR	   t   sklearnR
   t   sklearn.random_projectionR   R(   R3   R:   RB   RC   Ra   Rb   Rc   Rn   Rs   Rx   (    (    (    sJ   lib/python2.7/site-packages/sklearn/preprocessing/tests/test_imputation.pyt   <module>   s.   A#		G