ó
‡ˆ\c           @   sL  d  Z  d d l Z d d l Z d d l m Z d d l m Z d d l m Z d d l m	 Z	 d d l m
 Z
 d d l m Z d d	 l m Z d d
 l m Z d d l m Z d d l m Z d d l m Z d d l m Z d d l m Z d d l m Z d d l m Z m Z d d l m Z d d l m Z d d l m Z m  Z  e d ƒ Z! e ƒ  Z" e! j# e" j$ j% ƒ Z& e" j' e& e" _' e" j$ e& e" _$ e ƒ  Z( e! j# e( j$ j% ƒ Z& e( j' e& e( _' e( j$ e& e( _$ e j) j* d ƒ d „  ƒ Z+ e j) j* d ƒ e j) j* d ƒ e j) j* d ƒ d „  ƒ ƒ ƒ Z, e j) j* d ƒ e j) j* d ƒ e j) j* d ƒ d „  ƒ ƒ ƒ Z- e j) j* d ƒ e j) j* d ƒ d „  ƒ ƒ Z. e j) j* d ƒ e j) j* d ƒ d „  ƒ ƒ Z/ e j) j* d ƒ e j) j* d ƒ e j) j* d ƒ d „  ƒ ƒ ƒ Z0 e j) j* d ƒ e j) j* d ƒ d „  ƒ ƒ Z1 e j) j* d ƒ d  „  ƒ Z2 e j) j* d ƒ e j) j* d ƒ d! „  ƒ ƒ Z3 e j) j* d ƒ e j) j* d ƒ e j) j* d ƒ d" „  ƒ ƒ ƒ Z4 d# „  Z5 e j) j* d ƒ e j) j* d ƒ d$ „  ƒ ƒ Z6 e j) j* d ƒ e j) j* d ƒ d% „  ƒ ƒ Z7 e j) j* d ƒ e j) j* d ƒ d& „  ƒ ƒ Z8 d S('   sD   
Testing for Isolation Forest algorithm (sklearn.ensemble.iforest).
iÿÿÿÿN(   t   euler_gamma(   t   assert_almost_equal(   t   assert_array_equal(   t   assert_array_almost_equal(   t   assert_raises(   t   assert_raises_regex(   t   assert_warns_message(   t   assert_equal(   t   assert_greater(   t   ignore_warnings(   t   ParameterGrid(   t   IsolationForest(   t   _average_path_length(   t   train_test_split(   t   load_bostont	   load_iris(   t   check_random_state(   t   roc_auc_score(   t
   csc_matrixt
   csr_matrixi    s   ignore:threshold_ attributec       	   C   s¼   t  j d d g d d g g ƒ }  t  j d d g d d g g ƒ } t i d g d 6d d d g d 6t t g d	 6ƒ } t ƒ  8 x0 | D]( } t d
 t |  j |  ƒ j	 | ƒ q† WWd QXd S(   s6   Check Isolation Forest for various parameter settings.i    i   i   i   t   n_estimatorsg      à?g      ð?t   max_samplest	   bootstrapt   random_stateN(
   t   npt   arrayR
   t   Truet   FalseR	   R   t   rngt   fitt   predict(   t   X_traint   X_testt   gridt   params(    (    sB   lib/python2.7/site-packages/sklearn/ensemble/tests/test_iforest.pyt   test_iforest3   s    !!
	s   ignore:default contaminations   ignore:behaviour="old"c          C   s  t  d ƒ }  t t j d  t j d  d |  ƒ\ } } } } t i d d g d 6t t g d 6ƒ } xª t t	 g D]œ } | | ƒ } | | ƒ } x{ | D]s }	 t
 d d	 d d
 |	  j | ƒ }
 |
 j | ƒ } t
 d d	 d d
 |	  j | ƒ } | j | ƒ } t | | ƒ q“ Wqn Wd S(   s=   Check IForest for various parameter settings on sparse input.i    i2   R   g      à?g      ð?R   R   R   i
   i   N(   R   R   t   bostont   datat   targetR
   R   R   R   R   R   R   R   R   (   R   R   R    t   y_traint   y_testR!   t   sparse_formatt   X_train_sparset   X_test_sparseR"   t   sparse_classifiert   sparse_resultst   dense_classifiert   dense_results(    (    sB   lib/python2.7/site-packages/sklearn/ensemble/tests/test_iforest.pyt   test_iforest_sparseC   s"    c       	   C   sý  t  j }  t t t d d ƒ j |  ƒ t t t d d ƒ j |  ƒ t t t d d ƒ j |  ƒ t t d t d d ƒ j |  ƒ t j	 d ƒ  } t d d ƒ j |  ƒ Wd QXg  | D] } t | j t ƒ r± | ^ q± } t | ƒ d	 k sí t ‚ t j	 d ƒ & } t d t j d
 ƒ ƒ j |  ƒ Wd QXg  | D] } t | j t ƒ r+| ^ q+} t | ƒ d	 k sgt ‚ t t t d d ƒ j |  ƒ t t t d d ƒ j |  ƒ t t t ƒ  j |  ƒ j |  d d … d d … f ƒ d } t t | t t d d ƒ d ƒ d S(   s7   Test that it gives proper exception on deficient input.R   iÿÿÿÿg        g       @s3   max_samples will be set to n_samples for estimationiè  t   autoNi    i   t   foobarg      ø?i   s;   threshold_ attribute does not exist when behaviour != 'old't	   behaviourt   newt
   threshold_(   t   irisR%   R   t
   ValueErrorR   R   R   t   UserWarningt   pytestt   warnst   Nonet
   issubclasst   categoryt   lent   AssertionErrorR   t   int64R   R   t   AttributeErrort   getattr(   t   Xt   recordt   eacht   user_warningst   msg(    (    sB   lib/python2.7/site-packages/sklearn/ensemble/tests/test_iforest.pyt   test_iforest_errora   s4    	%5c          C   sb   t  j }  t ƒ  j |  ƒ } x@ | j D]5 } t | j t t j	 t j
 |  j d ƒ ƒ ƒ ƒ q% Wd S(   sD   Check max_depth recalculation when max_samples is reset to n_samplesi    N(   R6   R%   R   R   t   estimators_R   t	   max_deptht   intR   t   ceilt   log2t   shape(   RC   t   clft   est(    (    sB   lib/python2.7/site-packages/sklearn/ensemble/tests/test_iforest.pyt   test_recalculate_max_depth   s    	c          C   s¥   t  j }  t ƒ  j |  ƒ } t | j |  j d ƒ t d d ƒ } t t d | j |  ƒ t | j |  j d ƒ t d d ƒ j |  ƒ } t | j d |  j d ƒ d  S(   Ni    R   iô  s3   max_samples will be set to n_samples for estimationgš™™™™™Ù?(	   R6   R%   R   R   R   t   max_samples_RN   R   R8   (   RC   RO   (    (    sB   lib/python2.7/site-packages/sklearn/ensemble/tests/test_iforest.pyt   test_max_samples_attribute™   s    	c    	      C   sÚ   t  d ƒ }  t t j t j d |  ƒ\ } } } } t d d d d ƒ j | ƒ } | j d d ƒ | j | ƒ } | j d d ƒ | j | ƒ } t	 | | ƒ t d d d d ƒ j | ƒ } | j | ƒ } t	 | | ƒ d S(   s   Check parallel regression.i    R   t   n_jobsi   i   i   N(
   R   R   R$   R%   R&   R   R   t
   set_paramsR   R   (	   R   R   R    R'   R(   t   ensemblet   y1t   y2t   y3(    (    sB   lib/python2.7/site-packages/sklearn/ensemble/tests/test_iforest.pyt    test_iforest_parallel_regressionª   s    		c          C   så   t  d ƒ }  d |  j d d ƒ } t j | d | d f } | d  } |  j d d d d d	 d ƒ } t j | d | f } t j d g d
 d g d
 ƒ } t d d d |  ƒ j | ƒ } | j | ƒ } t	 t
 | | ƒ d ƒ d S(   s#   Test Isolation Forest performs welli   g333333Ó?ix   id   t   lowiüÿÿÿt   highi   t   sizei   i    i   R   R   g\Âõ(\ï?N(   i   i   (   R   t   randnR   t   r_t   uniformR   R   R   t   decision_functionR   R   (   R   RC   R   t
   X_outliersR    R(   RO   t   y_pred(    (    sB   lib/python2.7/site-packages/sklearn/ensemble/tests/test_iforest.pyt   test_iforest_performanceÅ   s    
!c       	   C   sö   d d g d d g d d g d d g d d g d d g d d g d d g g }  x¡ d	 d
 g D]“ } t  d d d t d | ƒ } | j |  ƒ | j |  ƒ } | j |  ƒ } t t j | d ƒ t j | d  ƒ ƒ t	 | d d g d d g ƒ q[ Wd  S(   Niþÿÿÿiÿÿÿÿi   i   i   i   iüÿÿÿi   g      Ð?R1   R3   R4   R   t   contamination(
   R   R   R   Ra   R   R   R   t   mint   maxR   (   RC   Re   RO   t   decision_funct   pred(    (    sB   lib/python2.7/site-packages/sklearn/ensemble/tests/test_iforest.pyt   test_iforest_worksß   s    N	'c          C   s2   t  j }  t ƒ  j |  ƒ } t | j | j ƒ d  S(   N(   R6   R%   R   R   R   RR   t   _max_samples(   RC   RO   (    (    sB   lib/python2.7/site-packages/sklearn/ensemble/tests/test_iforest.pyt   test_max_samples_consistencyð   s    	c          C   sk   t  d ƒ }  t t j d  t j d  d |  ƒ\ } } } } t d d ƒ } | j | | ƒ | j | ƒ d  S(   Ni    i2   R   t   max_featuresgš™™™™™é?(   R   R   R$   R%   R&   R   R   R   (   R   R   R    R'   R(   RO   (    (    sB   lib/python2.7/site-packages/sklearn/ensemble/tests/test_iforest.pyt    test_iforest_subsampled_featuresù   s    c          C   sÁ   d t  j d ƒ t d d }  d t  j d ƒ t d d } t t d ƒ d d d	 ƒt t d
 ƒ |  d d	 ƒt t d ƒ | d d	 ƒt t t  j d d
 d g ƒ ƒ d |  | g d d	 ƒd  S(   Ng       @g      @g      @g     0@g     8@i   g      ð?t   decimali
   i   iç  g       @g     0Ÿ@(   R   t   logR    R   R   R   R   (   t
   result_onet
   result_two(    (    sB   lib/python2.7/site-packages/sklearn/ensemble/tests/test_iforest.pyt    test_iforest_average_path_length  s    c          C   sð   d d g d d g d d g g }  t  d d ƒ j |  ƒ } t  ƒ  j |  ƒ } t | j d d g g ƒ | j d d g g ƒ | j ƒ t | j d d g g ƒ | j d d g g ƒ | j ƒ t | j d d g g ƒ | j d d g g ƒ ƒ d  S(   Ni   i   Re   gš™™™™™¹?g       @(   R   R   R   t   score_samplesRa   t   offset_(   R   t   clf1t   clf2(    (    sB   lib/python2.7/site-packages/sklearn/ensemble/tests/test_iforest.pyt   test_score_samples  s    !  c          C   ss   d g d g g }  t  ƒ  } t t d | j |  ƒ t t d | j |  ƒ t  ƒ  j |  ƒ } t t d t | d ƒ d  S(   Ng        g      ð?sI   default contamination parameter 0.1 will change in version 0.22 to "auto"sA   behaviour="old" is deprecated and will be removed in version 0.22sG   threshold_ attribute is deprecated in 0.20 and will be removed in 0.22.R5   (   R   R   t   FutureWarningR   t   DeprecationWarningRB   (   RC   RO   (    (    sB   lib/python2.7/site-packages/sklearn/ensemble/tests/test_iforest.pyt   test_deprecation"  s    	c          C   sŒ   d d g d d g d d g g }  t  d d ƒ j |  ƒ } t  d d d d ƒ j |  ƒ } t | j d d g g ƒ | j d d g g ƒ ƒ d  S(	   Ni   i   R3   t   oldR4   Re   R1   g       @(   R   R   R   Ra   (   R   Rv   Rw   (    (    sB   lib/python2.7/site-packages/sklearn/ensemble/tests/test_iforest.pyt   test_behaviour_param9  s
    !(9   t   __doc__R9   t   numpyR   t   sklearn.utils.fixesR    t   sklearn.utils.testingR   R   R   R   R   R   R   R   R	   t   sklearn.model_selectionR
   t   sklearn.ensembleR   t   sklearn.ensemble.iforestR   R   t   sklearn.datasetsR   R   t   sklearn.utilsR   t   sklearn.metricsR   t   scipy.sparseR   R   R   R6   t   permutationR&   R]   t   permR%   R$   t   markt   filterwarningsR#   R0   RH   RQ   RS   RZ   Rd   Rj   Rl   Rn   Rs   Rx   R{   R}   (    (    (    sB   lib/python2.7/site-packages/sklearn/ensemble/tests/test_iforest.pyt   <module>   st   		!!,	!!	