ó
‡ˆ\c           @   s  d  Z  d d l Z d d l Z d d l j Z d d l m Z d d l	 m
 Z
 d d l	 m Z d d l m Z m Z m Z d d l m Z d d	 l m Z d d
 l m Z m Z d e
 e f d „  ƒ  YZ d e
 e f d „  ƒ  YZ d e
 e f d „  ƒ  YZ d e
 e f d „  ƒ  YZ d S(   s   
The :mod:`sklearn.kernel_approximation` module implements several
approximate kernel feature maps base on Fourier transforms.
iÿÿÿÿN(   t   svdi   (   t   BaseEstimator(   t   TransformerMixin(   t   check_arrayt   check_random_statet   as_float_array(   t   safe_sparse_dot(   t   check_is_fitted(   t   pairwise_kernelst   KERNEL_PARAMSt
   RBFSamplerc           B   s5   e  Z d  Z d d d d „ Z d d „ Z d „  Z RS(   s  Approximates feature map of an RBF kernel by Monte Carlo approximation
    of its Fourier transform.

    It implements a variant of Random Kitchen Sinks.[1]

    Read more in the :ref:`User Guide <rbf_kernel_approx>`.

    Parameters
    ----------
    gamma : float
        Parameter of RBF kernel: exp(-gamma * x^2)

    n_components : int
        Number of Monte Carlo samples per original feature.
        Equals the dimensionality of the computed feature space.

    random_state : int, RandomState instance or None, optional (default=None)
        If int, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used
        by `np.random`.

    Examples
    --------
    >>> from sklearn.kernel_approximation import RBFSampler
    >>> from sklearn.linear_model import SGDClassifier
    >>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]
    >>> y = [0, 0, 1, 1]
    >>> rbf_feature = RBFSampler(gamma=1, random_state=1)
    >>> X_features = rbf_feature.fit_transform(X)
    >>> clf = SGDClassifier(max_iter=5, tol=1e-3)
    >>> clf.fit(X_features, y)
    ... # doctest: +NORMALIZE_WHITESPACE
    SGDClassifier(alpha=0.0001, average=False, class_weight=None,
           early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
           l1_ratio=0.15, learning_rate='optimal', loss='hinge', max_iter=5,
           n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2',
           power_t=0.5, random_state=None, shuffle=True, tol=0.001,
           validation_fraction=0.1, verbose=0, warm_start=False)
    >>> clf.score(X_features, y)
    1.0

    Notes
    -----
    See "Random Features for Large-Scale Kernel Machines" by A. Rahimi and
    Benjamin Recht.

    [1] "Weighted Sums of Random Kitchen Sinks: Replacing
    minimization with randomization in learning" by A. Rahimi and
    Benjamin Recht.
    (http://people.eecs.berkeley.edu/~brecht/papers/08.rah.rec.nips.pdf)
    g      ð?id   c         C   s   | |  _  | |  _ | |  _ d  S(   N(   t   gammat   n_componentst   random_state(   t   selfR   R   R   (    (    s;   lib/python2.7/site-packages/sklearn/kernel_approximation.pyt   __init__N   s    		c         C   s‰   t  | d d ƒ} t |  j ƒ } | j d } t j d |  j ƒ | j d | |  j f ƒ |  _	 | j
 d d t j d |  j ƒ|  _ |  S(   sž  Fit the model with X.

        Samples random projection according to n_features.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            Training data, where n_samples in the number of samples
            and n_features is the number of features.

        Returns
        -------
        self : object
            Returns the transformer.
        t   accept_sparset   csri   i   t   sizei    (   R   R   R   t   shapet   npt   sqrtR   t   normalR   t   random_weights_t   uniformt   pit   random_offset_(   R   t   Xt   yR   t
   n_features(    (    s;   lib/python2.7/site-packages/sklearn/kernel_approximation.pyt   fitS   s    c         C   su   t  |  d ƒ t | d d ƒ} t | |  j ƒ } | |  j 7} t j | | ƒ | t j d ƒ t j |  j ƒ 9} | S(   sp  Apply the approximate feature map to X.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            New data, where n_samples in the number of samples
            and n_features is the number of features.

        Returns
        -------
        X_new : array-like, shape (n_samples, n_components)
        R   R   R   g       @(	   R   R   R   R   R   R   t   cosR   R   (   R   R   t
   projection(    (    s;   lib/python2.7/site-packages/sklearn/kernel_approximation.pyt	   transformo   s    #N(   t   __name__t
   __module__t   __doc__t   NoneR   R   R!   (    (    (    s;   lib/python2.7/site-packages/sklearn/kernel_approximation.pyR
      s   4t   SkewedChi2Samplerc           B   s5   e  Z d  Z d d d d „ Z d d „ Z d „  Z RS(   s’  Approximates feature map of the "skewed chi-squared" kernel by Monte
    Carlo approximation of its Fourier transform.

    Read more in the :ref:`User Guide <skewed_chi_kernel_approx>`.

    Parameters
    ----------
    skewedness : float
        "skewedness" parameter of the kernel. Needs to be cross-validated.

    n_components : int
        number of Monte Carlo samples per original feature.
        Equals the dimensionality of the computed feature space.

    random_state : int, RandomState instance or None, optional (default=None)
        If int, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used
        by `np.random`.

    Examples
    --------
    >>> from sklearn.kernel_approximation import SkewedChi2Sampler
    >>> from sklearn.linear_model import SGDClassifier
    >>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]
    >>> y = [0, 0, 1, 1]
    >>> chi2_feature = SkewedChi2Sampler(skewedness=.01,
    ...                                  n_components=10,
    ...                                  random_state=0)
    >>> X_features = chi2_feature.fit_transform(X, y)
    >>> clf = SGDClassifier(max_iter=10, tol=1e-3)
    >>> clf.fit(X_features, y)
    SGDClassifier(alpha=0.0001, average=False, class_weight=None,
           early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
           l1_ratio=0.15, learning_rate='optimal', loss='hinge', max_iter=10,
           n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2',
           power_t=0.5, random_state=None, shuffle=True, tol=0.001,
           validation_fraction=0.1, verbose=0, warm_start=False)
    >>> clf.score(X_features, y)
    1.0

    References
    ----------
    See "Random Fourier Approximations for Skewed Multiplicative Histogram
    Kernels" by Fuxin Li, Catalin Ionescu and Cristian Sminchisescu.

    See also
    --------
    AdditiveChi2Sampler : A different approach for approximating an additive
        variant of the chi squared kernel.

    sklearn.metrics.pairwise.chi2_kernel : The exact chi squared kernel.
    g      ð?id   c         C   s   | |  _  | |  _ | |  _ d  S(   N(   t
   skewednessR   R   (   R   R'   R   R   (    (    s;   lib/python2.7/site-packages/sklearn/kernel_approximation.pyR   ½   s    		c         C   s   t  | ƒ } t |  j ƒ } | j d } | j d | |  j f ƒ } d t j t j t j	 t j d | ƒ ƒ |  _
 | j d d t j d |  j ƒ|  _ |  S(   s  Fit the model with X.

        Samples random projection according to n_features.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training data, where n_samples in the number of samples
            and n_features is the number of features.

        Returns
        -------
        self : object
            Returns the transformer.
        i   R   g      ð?g       @i    i   (   R   R   R   R   R   R   R   R   t   logt   tanR   R   (   R   R   R   R   R   R   (    (    s;   lib/python2.7/site-packages/sklearn/kernel_approximation.pyR   Â   s    1c         C   sÉ   t  |  d ƒ t | d t ƒ} t | d t ƒ} | |  j k j ƒ  rV t d ƒ ‚ n  | |  j 7} t j	 | | ƒ t
 | |  j ƒ } | |  j 7} t j | | ƒ | t j d ƒ t j |  j ƒ 9} | S(   s¨  Apply the approximate feature map to X.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            New data, where n_samples in the number of samples
            and n_features is the number of features. All values of X must be
            strictly greater than "-skewedness".

        Returns
        -------
        X_new : array-like, shape (n_samples, n_components)
        R   t   copys3   X may not contain entries smaller than -skewedness.g       @(   R   R   t   TrueR   t   FalseR'   t   anyt
   ValueErrorR   R(   R   R   R   R   R   R   (   R   R   R    (    (    s;   lib/python2.7/site-packages/sklearn/kernel_approximation.pyR!   Þ   s    #N(   R"   R#   R$   R%   R   R   R!   (    (    (    s;   lib/python2.7/site-packages/sklearn/kernel_approximation.pyR&   †   s   5t   AdditiveChi2Samplerc           B   sD   e  Z d  Z d d d „ Z d d „ Z d „  Z d „  Z d „  Z RS(   s2
  Approximate feature map for additive chi2 kernel.

    Uses sampling the fourier transform of the kernel characteristic
    at regular intervals.

    Since the kernel that is to be approximated is additive, the components of
    the input vectors can be treated separately.  Each entry in the original
    space is transformed into 2*sample_steps+1 features, where sample_steps is
    a parameter of the method. Typical values of sample_steps include 1, 2 and
    3.

    Optimal choices for the sampling interval for certain data ranges can be
    computed (see the reference). The default values should be reasonable.

    Read more in the :ref:`User Guide <additive_chi_kernel_approx>`.

    Parameters
    ----------
    sample_steps : int, optional
        Gives the number of (complex) sampling points.
    sample_interval : float, optional
        Sampling interval. Must be specified when sample_steps not in {1,2,3}.

    Examples
    --------
    >>> from sklearn.datasets import load_digits
    >>> from sklearn.linear_model import SGDClassifier
    >>> from sklearn.kernel_approximation import AdditiveChi2Sampler
    >>> X, y = load_digits(return_X_y=True)
    >>> chi2sampler = AdditiveChi2Sampler(sample_steps=2)
    >>> X_transformed = chi2sampler.fit_transform(X, y)
    >>> clf = SGDClassifier(max_iter=5, random_state=0, tol=1e-3)
    >>> clf.fit(X_transformed, y)
    SGDClassifier(alpha=0.0001, average=False, class_weight=None,
           early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
           l1_ratio=0.15, learning_rate='optimal', loss='hinge', max_iter=5,
           n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2',
           power_t=0.5, random_state=0, shuffle=True, tol=0.001,
           validation_fraction=0.1, verbose=0, warm_start=False)
    >>> clf.score(X_transformed, y) # doctest: +ELLIPSIS
    0.9543...

    Notes
    -----
    This estimator approximates a slightly different version of the additive
    chi squared kernel then ``metric.additive_chi2`` computes.

    See also
    --------
    SkewedChi2Sampler : A Fourier-approximation to a non-additive variant of
        the chi squared kernel.

    sklearn.metrics.pairwise.chi2_kernel : The exact chi squared kernel.

    sklearn.metrics.pairwise.additive_chi2_kernel : The exact additive chi
        squared kernel.

    References
    ----------
    See `"Efficient additive kernels via explicit feature maps"
    <http://www.robots.ox.ac.uk/~vedaldi/assets/pubs/vedaldi11efficient.pdf>`_
    A. Vedaldi and A. Zisserman, Pattern Analysis and Machine Intelligence,
    2011
    i   c         C   s   | |  _  | |  _ d  S(   N(   t   sample_stepst   sample_interval(   R   R0   R1   (    (    s;   lib/python2.7/site-packages/sklearn/kernel_approximation.pyR   ?  s    	c         C   s‘   t  | d d ƒ} |  j d
 k r |  j d k r< d |  _ q |  j d k rW d |  _ q |  j d k rr d |  _ q t d	 ƒ ‚ n |  j |  _ |  S(   sN  Set the parameters

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training data, where n_samples in the number of samples
            and n_features is the number of features.

        Returns
        -------
        self : object
            Returns the transformer.
        R   R   i   gš™™™™™é?i   g      à?i   gš™™™™™Ù?sH   If sample_steps is not in [1, 2, 3], you need to provide sample_intervalN(   R   R1   R%   R0   t   sample_interval_R.   (   R   R   R   (    (    s;   lib/python2.7/site-packages/sklearn/kernel_approximation.pyR   C  s    c         C   sŒ   d } t  |  d d | ƒt | d d ƒ} t j | ƒ } | rI | j n | d k  j ƒ  rj t d ƒ ‚ n  | ry |  j n |  j } | | ƒ S(   s  Apply approximate feature map to X.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = (n_samples, n_features)

        Returns
        -------
        X_new : {array, sparse matrix},                shape = (n_samples, n_features * (2*sample_steps + 1))
            Whether the return value is an array of sparse matrix depends on
            the type of the input X.
        sO   %(name)s is not fitted. Call fit to set the parameters before calling transformR2   t   msgR   R   i    s"   Entries of X must be non-negative.(	   R   R   t   spt   issparset   dataR-   R.   t   _transform_sparset   _transform_dense(   R   R   R3   t   sparset   transf(    (    s;   lib/python2.7/site-packages/sklearn/kernel_approximation.pyR!   a  s    !c   
      C   s1  | d k } | | } t  j | ƒ } t  j | |  j ƒ | | <| g } |  j t  j | ƒ } d | |  j } x² t d |  j ƒ D]ž } t  j | t  j t  j | |  j ƒ ƒ }	 t  j | ƒ } |	 t  j	 | | ƒ | | <| j
 | ƒ t  j | ƒ } |	 t  j | | ƒ | | <| j
 | ƒ q‚ Wt  j | ƒ S(   Ng        i   i   (   R   t
   zeros_likeR   R2   R(   t   rangeR0   t   coshR   R   t   appendt   sint   hstack(
   R   R   t   non_zerot   X_nzt   X_stept   X_newt   log_step_nzt   step_nzt   jt	   factor_nz(    (    s;   lib/python2.7/site-packages/sklearn/kernel_approximation.pyR8   €  s"    
		!c      	   C   s™  | j  j ƒ  } | j j ƒ  } t j | j |  j ƒ } t j | | | f d | j	 d | j
 d t ƒ} | g } |  j t j | j ƒ } d | j |  j } xì t d |  j ƒ D]Ø }	 t j | t j t j |	 |  j ƒ ƒ }
 |
 t j |	 | ƒ } t j | | | f d | j	 d | j
 d t ƒ} | j | ƒ |
 t j |	 | ƒ } t j | | | f d | j	 d | j
 d t ƒ} | j | ƒ q° Wt j | ƒ S(   NR   t   dtypeR*   i   i   (   t   indicesR*   t   indptrR   R   R6   R2   R4   t
   csr_matrixR   RI   R,   R(   R<   R0   R=   R   R   R>   R?   R@   (   R   R   RJ   RK   t	   data_stepRC   RD   RE   RF   RG   RH   (    (    s;   lib/python2.7/site-packages/sklearn/kernel_approximation.pyR7   š  s(    		!N(	   R"   R#   R$   R%   R   R   R!   R8   R7   (    (    (    s;   lib/python2.7/site-packages/sklearn/kernel_approximation.pyR/   ý   s   @		t   Nystroemc           B   sJ   e  Z d  Z d d d d d d d d „ Z d d „ Z d „  Z d „  Z RS(   s  Approximate a kernel map using a subset of the training data.

    Constructs an approximate feature map for an arbitrary kernel
    using a subset of the data as basis.

    Read more in the :ref:`User Guide <nystroem_kernel_approx>`.

    Parameters
    ----------
    kernel : string or callable, default="rbf"
        Kernel map to be approximated. A callable should accept two arguments
        and the keyword arguments passed to this object as kernel_params, and
        should return a floating point number.

    gamma : float, default=None
        Gamma parameter for the RBF, laplacian, polynomial, exponential chi2
        and sigmoid kernels. Interpretation of the default value is left to
        the kernel; see the documentation for sklearn.metrics.pairwise.
        Ignored by other kernels.

    coef0 : float, default=None
        Zero coefficient for polynomial and sigmoid kernels.
        Ignored by other kernels.

    degree : float, default=None
        Degree of the polynomial kernel. Ignored by other kernels.

    kernel_params : mapping of string to any, optional
        Additional parameters (keyword arguments) for kernel function passed
        as callable object.

    n_components : int
        Number of features to construct.
        How many data points will be used to construct the mapping.

    random_state : int, RandomState instance or None, optional (default=None)
        If int, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used
        by `np.random`.

    Attributes
    ----------
    components_ : array, shape (n_components, n_features)
        Subset of training points used to construct the feature map.

    component_indices_ : array, shape (n_components)
        Indices of ``components_`` in the training set.

    normalization_ : array, shape (n_components, n_components)
        Normalization matrix needed for embedding.
        Square root of the kernel matrix on ``components_``.

    Examples
    --------
    >>> from sklearn import datasets, svm
    >>> from sklearn.kernel_approximation import Nystroem
    >>> digits = datasets.load_digits(n_class=9)
    >>> data = digits.data / 16.
    >>> clf = svm.LinearSVC()
    >>> feature_map_nystroem = Nystroem(gamma=.2,
    ...                                 random_state=1,
    ...                                 n_components=300)
    >>> data_transformed = feature_map_nystroem.fit_transform(data)
    >>> clf.fit(data_transformed, digits.target)
    ... # doctest: +NORMALIZE_WHITESPACE
    LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
         intercept_scaling=1, loss='squared_hinge', max_iter=1000,
         multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
         verbose=0)
    >>> clf.score(data_transformed, digits.target) # doctest: +ELLIPSIS
    0.9987...

    References
    ----------
    * Williams, C.K.I. and Seeger, M.
      "Using the Nystroem method to speed up kernel machines",
      Advances in neural information processing systems 2001

    * T. Yang, Y. Li, M. Mahdavi, R. Jin and Z. Zhou
      "Nystroem Method vs Random Fourier Features: A Theoretical and Empirical
      Comparison",
      Advances in Neural Information Processing Systems 2012


    See also
    --------
    RBFSampler : An approximation to the RBF kernel using random Fourier
                 features.

    sklearn.metrics.pairwise.kernel_metrics : List of built-in kernels.
    t   rbfid   c         C   sC   | |  _  | |  _ | |  _ | |  _ | |  _ | |  _ | |  _ d  S(   N(   t   kernelR   t   coef0t   degreet   kernel_paramsR   R   (   R   RP   R   RQ   RR   RS   R   R   (    (    s;   lib/python2.7/site-packages/sklearn/kernel_approximation.pyR     s    						c         C   s  t  | d d ƒ} t |  j ƒ } | j d } |  j | k rS | } t j d ƒ n	 |  j } t | | ƒ } | j | ƒ } | |  } | | } t	 | d |  j
 d t |  j ƒ  }	 t |	 ƒ \ }
 } } t j | d ƒ } t j |
 t j | ƒ | ƒ |  _ | |  _ | |  _ |  S(   s  Fit estimator to data.

        Samples a subset of training points, computes kernel
        on these and computes normalization matrix.

        Parameters
        ----------
        X : array-like, shape=(n_samples, n_feature)
            Training data.
        R   R   i    sŽ   n_components > n_samples. This is not possible.
n_components was set to n_samples, which results in inefficient evaluation of the full kernel.t   metrict   filter_paramsgê-™—q=(   R   R   R   R   R   t   warningst   warnt   mint   permutationR   RP   R+   t   _get_kernel_paramsR    R   t   maximumt   dotR   t   normalization_t   components_t   component_indices_(   R   R   R   t   rndt	   n_samplesR   t   indst
   basis_indst   basist   basis_kernelt   Ut   St   V(    (    s;   lib/python2.7/site-packages/sklearn/kernel_approximation.pyR     s(    	

"		c         C   se   t  |  d ƒ t | d d ƒ} |  j ƒ  } t | |  j d |  j d t | } t j | |  j	 j
 ƒ S(   s„  Apply feature map to X.

        Computes an approximate feature map using the kernel
        between some training points and X.

        Parameters
        ----------
        X : array-like, shape=(n_samples, n_features)
            Data to transform.

        Returns
        -------
        X_transformed : array, shape=(n_samples, n_components)
            Transformed data.
        R^   R   R   RT   RU   (   R   R   RZ   R   R^   RP   R+   R   R\   R]   t   T(   R   R   RS   t   embedded(    (    s;   lib/python2.7/site-packages/sklearn/kernel_approximation.pyR!   H  s    		c         C   s·   |  j  } | d  k r i  } n  t |  j ƒ ss xƒ t |  j D]1 } t |  | ƒ d  k	 r; t |  | ƒ | | <q; q; Wn@ |  j d  k	 s  |  j d  k	 s  |  j d  k	 r³ t	 j
 d t ƒ n  | S(   Ns¶   Passing gamma, coef0 or degree to Nystroem when using a callable kernel is deprecated in version 0.19 and will raise an error in 0.21, as they are ignored. Use kernel_params instead.(   RS   R%   t   callableRP   R	   t   getattrR   RQ   RR   RV   RW   t   DeprecationWarning(   R   t   paramst   param(    (    s;   lib/python2.7/site-packages/sklearn/kernel_approximation.pyRZ   b  s    		
N(   R"   R#   R$   R%   R   R   R!   RZ   (    (    (    s;   lib/python2.7/site-packages/sklearn/kernel_approximation.pyRN   ·  s   \	*	(   R$   RV   t   numpyR   t   scipy.sparseR9   R4   t   scipy.linalgR    t   baseR   R   t   utilsR   R   R   t   utils.extmathR   t   utils.validationR   t   metrics.pairwiseR   R	   R
   R&   R/   RN   (    (    (    s;   lib/python2.7/site-packages/sklearn/kernel_approximation.pyt   <module>   s   nwº