ó
áp7]c           @   s³   d  Z  d d l m Z m Z d d l Z d d l m Z d e f d „  ƒ  YZ	 d e f d „  ƒ  YZ
 d	 e f d
 „  ƒ  YZ d e f d „  ƒ  YZ d „  Z d e f d „  ƒ  YZ d S(   s!  
Utilities for cross validation.

taken from scikits.learn

# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>,
#         Gael Varoquaux    <gael.varoquaux@normalesup.org>
# License: BSD Style.
# $Id$

changes to code by josef-pktd:
 - docstring formatting: underlines of headers

iÿÿÿÿ(   t   ranget   lrangeN(   t   combinationst   LeaveOneOutc           B   s)   e  Z d  Z d „  Z d „  Z d „  Z RS(   ss   
    Leave-One-Out cross validation iterator:
    Provides train/test indexes to split data in train test sets
    c         C   s   | |  _  d S(   s9  
        Leave-One-Out cross validation iterator:
        Provides train/test indexes to split data in train test sets

        Parameters
        ----------
        n: int
            Total number of elements

        Examples
        --------
        >>> from scikits.learn import cross_val
        >>> X = [[1, 2], [3, 4]]
        >>> y = [1, 2]
        >>> loo = cross_val.LeaveOneOut(2)
        >>> for train_index, test_index in loo:
        ...    print "TRAIN:", train_index, "TEST:", test_index
        ...    X_train, X_test, y_train, y_test = cross_val.split(train_index, test_index, X, y)
        ...    print X_train, X_test, y_train, y_test
        TRAIN: [False  True] TEST: [ True False]
        [[3 4]] [[1 2]] [2] [1]
        TRAIN: [ True False] TEST: [False  True]
        [[1 2]] [[3 4]] [1] [2]
        N(   t   n(   t   selfR   (    (    sB   lib/python2.7/site-packages/statsmodels/sandbox/tools/cross_val.pyt   __init__   s    c         c   s`   |  j  } xP t | ƒ D]B } t j | d t j ƒ} t | | <t j | ƒ } | | f Vq Wd  S(   Nt   dtype(   R   R    t   npt   zerost   boolt   Truet   logical_not(   R   R   t   it
   test_indext   train_index(    (    sB   lib/python2.7/site-packages/statsmodels/sandbox/tools/cross_val.pyt   __iter__8   s    	
c         C   s    d |  j  j |  j  j |  j f S(   Ns   %s.%s(n=%i)(   t	   __class__t
   __module__t   __name__R   (   R   (    (    sB   lib/python2.7/site-packages/statsmodels/sandbox/tools/cross_val.pyt   __repr__A   s    	(   R   R   t   __doc__R   R   R   (    (    (    sB   lib/python2.7/site-packages/statsmodels/sandbox/tools/cross_val.pyR      s   			t	   LeavePOutc           B   s)   e  Z d  Z d „  Z d „  Z d „  Z RS(   sr   
    Leave-P-Out cross validation iterator:
    Provides train/test indexes to split data in train test sets

    c         C   s   | |  _  | |  _ d S(   sV  
        Leave-P-Out cross validation iterator:
        Provides train/test indexes to split data in train test sets

        Parameters
        ----------
        n: int
            Total number of elements
        p: int
            Size test sets

        Examples
        --------
        >>> from scikits.learn import cross_val
        >>> X = [[1, 2], [3, 4], [5, 6], [7, 8]]
        >>> y = [1, 2, 3, 4]
        >>> lpo = cross_val.LeavePOut(4, 2)
        >>> for train_index, test_index in lpo:
        ...    print "TRAIN:", train_index, "TEST:", test_index
        ...    X_train, X_test, y_train, y_test = cross_val.split(train_index, test_index, X, y)
        TRAIN: [False False  True  True] TEST: [ True  True False False]
        TRAIN: [False  True False  True] TEST: [ True False  True False]
        TRAIN: [False  True  True False] TEST: [ True False False  True]
        TRAIN: [ True False False  True] TEST: [False  True  True False]
        TRAIN: [ True False  True False] TEST: [False  True False  True]
        TRAIN: [ True  True False False] TEST: [False False  True  True]
        N(   R   t   p(   R   R   R   (    (    sB   lib/python2.7/site-packages/statsmodels/sandbox/tools/cross_val.pyR   Q   s    	c         c   s   |  j  } |  j } t t | ƒ | ƒ } xS | D]K } t j | d t j ƒ} t | t j | ƒ <t j	 | ƒ } | | f Vq. Wd  S(   NR   (
   R   R   R   R   R   R	   R
   R   t   arrayR   (   R   R   R   t   combt   idxR   R   (    (    sB   lib/python2.7/site-packages/statsmodels/sandbox/tools/cross_val.pyR   q   s    		c         C   s&   d |  j  j |  j  j |  j |  j f S(   Ns   %s.%s(n=%i, p=%i)(   R   R   R   R   R   (   R   (    (    sB   lib/python2.7/site-packages/statsmodels/sandbox/tools/cross_val.pyR   |   s
    		(   R   R   R   R   R   R   (    (    (    sB   lib/python2.7/site-packages/statsmodels/sandbox/tools/cross_val.pyR   J   s   	 	t   KFoldc           B   s)   e  Z d  Z d „  Z d „  Z d „  Z RS(   sm   
    K-Folds cross validation iterator:
    Provides train/test indexes to split data in train test sets
    c         C   s\   | d k s t  t d ƒ ƒ ‚ | | k  sF t  t d | | f ƒ ƒ ‚ | |  _ | |  _ d S(   s—  
        K-Folds cross validation iterator:
        Provides train/test indexes to split data in train test sets

        Parameters
        ----------
        n: int
            Total number of elements
        k: int
            number of folds

        Examples
        --------
        >>> from scikits.learn import cross_val
        >>> X = [[1, 2], [3, 4], [1, 2], [3, 4]]
        >>> y = [1, 2, 3, 4]
        >>> kf = cross_val.KFold(4, k=2)
        >>> for train_index, test_index in kf:
        ...    print "TRAIN:", train_index, "TEST:", test_index
        ...    X_train, X_test, y_train, y_test = cross_val.split(train_index, test_index, X, y)
        TRAIN: [False False  True  True] TEST: [ True  True False False]
        TRAIN: [ True  True False False] TEST: [False False  True  True]

        Notes
        -----
        All the folds have size trunc(n/k), the last one has the complementary
        i    s   cannot have k below 1s    cannot have k=%d greater than %dN(   t   AssertionErrort
   ValueErrorR   t   k(   R   R   R   (    (    sB   lib/python2.7/site-packages/statsmodels/sandbox/tools/cross_val.pyR   Œ   s    (	c         c   s²   |  j  } |  j } t t j | | ƒ ƒ } x€ t | ƒ D]r } t j | d t j ƒ} | | d k  r‚ t | | | | d | +n t | | | )t j	 | ƒ } | | f Vq8 Wd  S(   NR   i   (
   R   R   t   intR   t   ceilR    R	   R
   R   R   (   R   R   R   t   jR   R   R   (    (    sB   lib/python2.7/site-packages/statsmodels/sandbox/tools/cross_val.pyR   ®   s    		c         C   s&   d |  j  j |  j  j |  j |  j f S(   Ns   %s.%s(n=%i, k=%i)(   R   R   R   R   R   (   R   (    (    sB   lib/python2.7/site-packages/statsmodels/sandbox/tools/cross_val.pyR   ½   s
    		(   R   R   R   R   R   R   (    (    (    sB   lib/python2.7/site-packages/statsmodels/sandbox/tools/cross_val.pyR   †   s   	"	t   LeaveOneLabelOutc           B   s)   e  Z d  Z d „  Z d „  Z d „  Z RS(   sy   
    Leave-One-Label_Out cross-validation iterator:
    Provides train/test indexes to split data in train test sets
    c         C   s   | |  _  d S(   sö  
        Leave-One-Label_Out cross validation:
        Provides train/test indexes to split data in train test sets

        Parameters
        ----------
        labels : list
                List of labels

        Examples
        --------
        >>> from scikits.learn import cross_val
        >>> X = [[1, 2], [3, 4], [5, 6], [7, 8]]
        >>> y = [1, 2, 1, 2]
        >>> labels = [1, 1, 2, 2]
        >>> lol = cross_val.LeaveOneLabelOut(labels)
        >>> for train_index, test_index in lol:
        ...    print "TRAIN:", train_index, "TEST:", test_index
        ...    X_train, X_test, y_train, y_test = cross_val.split(train_index,             test_index, X, y)
        ...    print X_train, X_test, y_train, y_test
        TRAIN: [False False  True  True] TEST: [ True  True False False]
        [[5 6]
        [7 8]] [[1 2]
        [3 4]] [1 2] [1 2]
        TRAIN: [ True  True False False] TEST: [False False  True  True]
        [[1 2]
        [3 4]] [[5 6]
        [7 8]] [1 2] [1 2]

        N(   t   labels(   R   R#   (    (    sB   lib/python2.7/site-packages/statsmodels/sandbox/tools/cross_val.pyR   Í   s     c         c   s~   t  j |  j d t ƒ} x_ t  j | ƒ D]N } t  j t | ƒ d t  j ƒ} t | | | k <t  j | ƒ } | | f Vq( Wd  S(   Nt   copyR   (	   R   R   R#   R   t   uniqueR	   t   lenR
   R   (   R   R#   R   R   R   (    (    sB   lib/python2.7/site-packages/statsmodels/sandbox/tools/cross_val.pyR   ð   s    c         C   s    d |  j  j |  j  j |  j f S(   Ns   %s.%s(labels=%s)(   R   R   R   R#   (   R   (    (    sB   lib/python2.7/site-packages/statsmodels/sandbox/tools/cross_val.pyR   ú   s    		(   R   R   R   R   R   R   (    (    (    sB   lib/python2.7/site-packages/statsmodels/sandbox/tools/cross_val.pyR"   Ç   s   	#	
c         G   sX   g  } xK | D]C } t  j | ƒ } | |  } | | } | j | ƒ | j | ƒ q W| S(   sx   
    For each arg return a train and test subsets defined by indexes provided
    in train_indexes and test_indexes
    (   R   t
   asanyarrayt   append(   t   train_indexest   test_indexest   argst   rett   argt	   arg_traint   arg_test(    (    sB   lib/python2.7/site-packages/statsmodels/sandbox/tools/cross_val.pyt   split  s    

t
   KStepAheadc           B   s5   e  Z d  Z d d e e d „ Z d „  Z d „  Z RS(   sn   
    KStepAhead cross validation iterator:
    Provides fit/test indexes to split data in sequential sets
    i   c         C   sY   | |  _  | |  _ | d k r: t t j | d ƒ ƒ } n  | |  _ | |  _ | |  _ d S(   s?  
        KStepAhead cross validation iterator:
        Provides train/test indexes to split data in train test sets

        Parameters
        ----------
        n: int
            Total number of elements
        k : int
            number of steps ahead
        start : int
            initial size of data for fitting
        kall : boolean
            if true. all values for up to k-step ahead are included in the test index.
            If false, then only the k-th step ahead value is returnd


        Notes
        -----
        I don't think this is really useful, because it can be done with
        a very simple loop instead.
        Useful as a plugin, but it could return slices instead for faster array access.

        Examples
        --------
        >>> from scikits.learn import cross_val
        >>> X = [[1, 2], [3, 4]]
        >>> y = [1, 2]
        >>> loo = cross_val.LeaveOneOut(2)
        >>> for train_index, test_index in loo:
        ...    print "TRAIN:", train_index, "TEST:", test_index
        ...    X_train, X_test, y_train, y_test = cross_val.split(train_index, test_index, X, y)
        ...    print X_train, X_test, y_train, y_test
        TRAIN: [False  True] TEST: [ True False]
        [[3 4]] [[1 2]] [2] [1]
        TRAIN: [ True False] TEST: [False  True]
        [[1 2]] [[3 4]] [1] [2]
        g      Ð?N(	   R   R   t   NoneR   R   t   trunct   startt   kallt   return_slice(   R   R   R   R4   R5   R6   (    (    sB   lib/python2.7/site-packages/statsmodels/sandbox/tools/cross_val.pyR   "  s    '				c   	      c   s9  |  j  } |  j } |  j } |  j rœ xt | | | ƒ D]] } t d  | d  ƒ } |  j ro t | | | ƒ } n t | | d | | ƒ } | | f Vq8 Wn™ x– t | | | ƒ D] } t j	 | d t j
 ƒ} t | | *t j	 | d t j
 ƒ} |  j rt | | | | +n t | | | d | | +| | f Vq° Wd  S(   Ni   R   (   R   R   R4   R6   R    t   sliceR2   R5   R   R	   R
   R   (	   R   R   R   R4   R   t   train_slicet
   test_sliceR   R   (    (    sB   lib/python2.7/site-packages/statsmodels/sandbox/tools/cross_val.pyR   R  s$    					
	c         C   s    d |  j  j |  j  j |  j f S(   Ns   %s.%s(n=%i)(   R   R   R   R   (   R   (    (    sB   lib/python2.7/site-packages/statsmodels/sandbox/tools/cross_val.pyR   m  s    	N(   R   R   R   R2   R   R   R   R   (    (    (    sB   lib/python2.7/site-packages/statsmodels/sandbox/tools/cross_val.pyR1     s   0	(   R   t   statsmodels.compat.pythonR    R   t   numpyR   t	   itertoolsR   t   objectR   R   R   R"   R0   R1   (    (    (    sB   lib/python2.7/site-packages/statsmodels/sandbox/tools/cross_val.pyt   <module>   s   4<A;	