ó
î&]\c           @` s-  d  d l  m Z m Z m Z d  d l Z d  d l m Z m Z d  d l m	 Z	 m
 Z
 d  d l m Z d  d l m Z d  d l m Z m Z m Z m Z m Z m Z m Z m Z m Z m Z m Z m Z m Z m Z m Z d  d l Z d  d l  m! Z! m" Z" d	 d
 l# m$ Z$ d g Z% d e& f d „  ƒ  YZ' d S(   i    (   t   divisiont   print_functiont   absolute_importN(   t   callablet   string_types(   t   linalgt   special(   t	   logsumexp(   t   cov(   t
   atleast_2dt   reshapet   zerost   newaxist   dott   expt   pit   sqrtt   ravelt   powert
   atleast_1dt   squeezet   sumt	   transposet   ones(   t   choicet   multivariate_normali   (   t   mvnt   gaussian_kdec           B` sÅ   e  Z d  Z d d d „ Z d „  Z e Z d „  Z d „  Z d d „ Z	 d „  Z
 d d „ Z d „  Z d	 „  Z e Z d
 e _ d d „ Z d „  Z d „  Z d „  Z e d „  ƒ Z e d „  ƒ Z RS(   s´  Representation of a kernel-density estimate using Gaussian kernels.

    Kernel density estimation is a way to estimate the probability density
    function (PDF) of a random variable in a non-parametric way.
    `gaussian_kde` works for both uni-variate and multi-variate data.   It
    includes automatic bandwidth determination.  The estimation works best for
    a unimodal distribution; bimodal or multi-modal distributions tend to be
    oversmoothed.

    Parameters
    ----------
    dataset : array_like
        Datapoints to estimate from. In case of univariate data this is a 1-D
        array, otherwise a 2-D array with shape (# of dims, # of data).
    bw_method : str, scalar or callable, optional
        The method used to calculate the estimator bandwidth.  This can be
        'scott', 'silverman', a scalar constant or a callable.  If a scalar,
        this will be used directly as `kde.factor`.  If a callable, it should
        take a `gaussian_kde` instance as only parameter and return a scalar.
        If None (default), 'scott' is used.  See Notes for more details.
    weights : array_like, optional
        weights of datapoints. This must be the same shape as dataset.
        If None (default), the samples are assumed to be equally weighted

    Attributes
    ----------
    dataset : ndarray
        The dataset with which `gaussian_kde` was initialized.
    d : int
        Number of dimensions.
    n : int
        Number of datapoints.
    neff : int
        Effective number of datapoints.

        .. versionadded:: 1.2.0
    factor : float
        The bandwidth factor, obtained from `kde.covariance_factor`, with which
        the covariance matrix is multiplied.
    covariance : ndarray
        The covariance matrix of `dataset`, scaled by the calculated bandwidth
        (`kde.factor`).
    inv_cov : ndarray
        The inverse of `covariance`.

    Methods
    -------
    evaluate
    __call__
    integrate_gaussian
    integrate_box_1d
    integrate_box
    integrate_kde
    pdf
    logpdf
    resample
    set_bandwidth
    covariance_factor

    Notes
    -----
    Bandwidth selection strongly influences the estimate obtained from the KDE
    (much more so than the actual shape of the kernel).  Bandwidth selection
    can be done by a "rule of thumb", by cross-validation, by "plug-in
    methods" or by other means; see [3]_, [4]_ for reviews.  `gaussian_kde`
    uses a rule of thumb, the default is Scott's Rule.

    Scott's Rule [1]_, implemented as `scotts_factor`, is::

        n**(-1./(d+4)),

    with ``n`` the number of data points and ``d`` the number of dimensions.
    In the case of unequally weighted points, `scotts_factor` becomes::

        neff**(-1./(d+4)),

    with ``neff`` the effective number of datapoints.
    Silverman's Rule [2]_, implemented as `silverman_factor`, is::

        (n * (d + 2) / 4.)**(-1. / (d + 4)).

    or in the case of unequally weighted points::

        (neff * (d + 2) / 4.)**(-1. / (d + 4)).

    Good general descriptions of kernel density estimation can be found in [1]_
    and [2]_, the mathematics for this multi-dimensional implementation can be
    found in [1]_.

    With a set of weighted samples, the effective number of datapoints ``neff``
    is defined by::

        neff = sum(weights)^2 / sum(weights^2)

    as detailed in [5]_.

    References
    ----------
    .. [1] D.W. Scott, "Multivariate Density Estimation: Theory, Practice, and
           Visualization", John Wiley & Sons, New York, Chicester, 1992.
    .. [2] B.W. Silverman, "Density Estimation for Statistics and Data
           Analysis", Vol. 26, Monographs on Statistics and Applied Probability,
           Chapman and Hall, London, 1986.
    .. [3] B.A. Turlach, "Bandwidth Selection in Kernel Density Estimation: A
           Review", CORE and Institut de Statistique, Vol. 19, pp. 1-33, 1993.
    .. [4] D.M. Bashtannyk and R.J. Hyndman, "Bandwidth selection for kernel
           conditional density estimation", Computational Statistics & Data
           Analysis, Vol. 36, pp. 279-298, 2001.
    .. [5] Gray P. G., 1969, Journal of the Royal Statistical Society.
           Series A (General), 132, 272

    Examples
    --------
    Generate some random two-dimensional data:

    >>> from scipy import stats
    >>> def measure(n):
    ...     "Measurement model, return two coupled measurements."
    ...     m1 = np.random.normal(size=n)
    ...     m2 = np.random.normal(scale=0.5, size=n)
    ...     return m1+m2, m1-m2

    >>> m1, m2 = measure(2000)
    >>> xmin = m1.min()
    >>> xmax = m1.max()
    >>> ymin = m2.min()
    >>> ymax = m2.max()

    Perform a kernel density estimate on the data:

    >>> X, Y = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
    >>> positions = np.vstack([X.ravel(), Y.ravel()])
    >>> values = np.vstack([m1, m2])
    >>> kernel = stats.gaussian_kde(values)
    >>> Z = np.reshape(kernel(positions).T, X.shape)

    Plot the results:

    >>> import matplotlib.pyplot as plt
    >>> fig, ax = plt.subplots()
    >>> ax.imshow(np.rot90(Z), cmap=plt.cm.gist_earth_r,
    ...           extent=[xmin, xmax, ymin, ymax])
    >>> ax.plot(m1, m2, 'k.', markersize=2)
    >>> ax.set_xlim([xmin, xmax])
    >>> ax.set_ylim([ymin, ymax])
    >>> plt.show()

    c         C` sý   t  | ƒ |  _ |  j j d k s0 t d ƒ ‚ n  |  j j \ |  _ |  _ | d  k	 ré t | ƒ j	 t
 ƒ |  _ |  j t |  j ƒ _ |  j j d k r¥ t d ƒ ‚ n  t |  j ƒ |  j k rÌ t d ƒ ‚ n  d t |  j d ƒ |  _ n  |  j d | ƒ d  S(   Ni   s.   `dataset` input should have multiple elements.s*   `weights` input should be one-dimensional.s%   `weights` input should be of length ni   t	   bw_method(   R	   t   datasett   sizet
   ValueErrort   shapet   dt   nt   NoneR   t   astypet   floatt   _weightsR   t   weightst   ndimt   lent   _nefft   set_bandwidth(   t   selfR   R   R'   (    (    s.   lib/python2.7/site-packages/scipy/stats/kde.pyt   __init__À   s    c         C` sÙ  t  | ƒ } | j \ } } | |  j k rˆ | d k rf | |  j k rf t | |  j d f ƒ } d } qˆ d | |  j f } t | ƒ ‚ n  t | f d t ƒ} t j |  j	 ƒ } t
 | |  j ƒ } t
 | | ƒ } | |  j k rOxß t |  j ƒ D]Y }	 | d d … |	 t f | }
 t |
 |
 d d ƒd } | |  j |	 t | ƒ 7} qï Wnr xo t | ƒ D]a }	 | | d d … |	 t f }
 t |
 |
 d d ƒd } t t | ƒ |  j d d ƒ| |	 <q\W| |  j |  j } | S(   s  Evaluate the estimated pdf on a set of points.

        Parameters
        ----------
        points : (# of dimensions, # of points)-array
            Alternatively, a (# of dimensions,) vector can be passed in and
            treated as a single point.

        Returns
        -------
        values : (# of points,)-array
            The values at each point.

        Raises
        ------
        ValueError : if the dimensionality of the input points is different than
                     the dimensionality of the KDE.

        i   s2   points have dimension %s, dataset has dimension %st   dtypeNt   axisi    g       @(   R	   R    R!   R
   R   R   R%   R   t   choleskyt   inv_covR   R   R"   t   rangeR   R   R'   R   t   _norm_factor(   R,   t   pointsR!   t   mt   msgt   resultt	   whiteningt   scaled_datasett   scaled_pointst   it   difft   energy(    (    s.   lib/python2.7/site-packages/scipy/stats/kde.pyt   evaluateÒ   s0    	#(c         C` sK  t  t | ƒ ƒ } t | ƒ } | j |  j f k rI t d |  j ƒ ‚ n  | j |  j |  j f k rz t d |  j ƒ ‚ n  | d d … t f } |  j | } t j	 | ƒ } |  j
 | } t j | | ƒ } t j t j | d ƒ ƒ } t d t | j d d ƒ | } t | | d d ƒd }	 t t |	 ƒ |  j d d ƒ| }
 |
 S(   sW  
        Multiply estimated density by a multivariate Gaussian and integrate
        over the whole space.

        Parameters
        ----------
        mean : aray_like
            A 1-D array, specifying the mean of the Gaussian.
        cov : array_like
            A 2-D array, specifying the covariance matrix of the Gaussian.

        Returns
        -------
        result : scalar
            The value of the integral.

        Raises
        ------
        ValueError
            If the mean or covariance of the input Gaussian differs from
            the KDE's dimensionality.

        s   mean does not have dimension %ss%   covariance does not have dimension %sNi    i   g       @R/   (   R   R   R	   R    R!   R   R   t
   covarianceR   t
   cho_factorR   t	   cho_solvet   npt   prodt   diagonalR   R   R   R   R'   (   R,   t   meanR   t   sum_covt   sum_cov_cholR<   t   tdifft   sqrt_dett
   norm_constt   energiesR7   (    (    s.   lib/python2.7/site-packages/scipy/stats/kde.pyt   integrate_gaussian  s     "$c         C` s•   |  j  d k r t d ƒ ‚ n  t t |  j ƒ ƒ d } t | |  j | ƒ } t | |  j | ƒ } t j |  j t	 j
 | ƒ t	 j
 | ƒ ƒ } | S(   s´  
        Computes the integral of a 1D pdf between two bounds.

        Parameters
        ----------
        low : scalar
            Lower bound of integration.
        high : scalar
            Upper bound of integration.

        Returns
        -------
        value : scalar
            The result of the integral.

        Raises
        ------
        ValueError
            If the KDE is over more than one dimension.

        i   s'   integrate_box_1d() only handles 1D pdfsi    (   R!   R   R   R   R?   R   RB   R   R'   R   t   ndtr(   R,   t   lowt   hight   stdevt   normalized_lowt   normalized_hight   value(    (    s.   lib/python2.7/site-packages/scipy/stats/kde.pyt   integrate_box_1dA  s    c         C` sz   | d k	 r i | d 6} n i  } t j | | |  j |  j |  j |  \ } } | rv d |  j d } t j | ƒ n  | S(   sõ  Computes the integral of a pdf over a rectangular interval.

        Parameters
        ----------
        low_bounds : array_like
            A 1-D array containing the lower bounds of integration.
        high_bounds : array_like
            A 1-D array containing the upper bounds of integration.
        maxpts : int, optional
            The maximum number of points to use for integration.

        Returns
        -------
        value : scalar
            The result of the integral.

        t   maxptss5   An integral in mvn.mvnun requires more points than %siè  N(	   R#   R   t   mvnun_weightedR   R'   R?   R!   t   warningst   warn(   R,   t
   low_boundst   high_boundsRU   t
   extra_kwdsRS   t   informR6   (    (    s.   lib/python2.7/site-packages/scipy/stats/kde.pyt   integrate_boxd  s    c         C` s]  | j  |  j  k r! t d ƒ ‚ n  | j |  j k  rB | } |  } n |  } | } | j | j } t j | ƒ } d } x› t | j ƒ D]Š } | j d d … | t f } | j | }	 t j	 | |	 ƒ }
 t
 |	 |
 d d ƒd } | t
 t | ƒ | j d d ƒ| j | 7} qƒ Wt j t j | d ƒ ƒ } t d t | j d d ƒ | } | | } | S(   sŸ  
        Computes the integral of the product of this  kernel density estimate
        with another.

        Parameters
        ----------
        other : gaussian_kde instance
            The other kde.

        Returns
        -------
        value : scalar
            The result of the integral.

        Raises
        ------
        ValueError
            If the KDEs have different dimensionality.

        s$   KDEs are not the same dimensionalityg        NR/   i    g       @i   (   R!   R   R"   R?   R   R@   R2   R   R   RA   R   R   R'   RB   RC   RD   R   R   R    (   R,   t   othert   smallt   largeRF   RG   R7   R;   RE   R<   RH   RK   RI   RJ   (    (    s.   lib/python2.7/site-packages/scipy/stats/kde.pyt   integrate_kde…  s(    	3"
c         C` sŠ   | d k r t |  j ƒ } n  t t t |  j f t ƒ |  j d | ƒƒ } t	 |  j
 d | d |  j ƒ} |  j d d … | f } | | S(   s£  
        Randomly sample a dataset from the estimated pdf.

        Parameters
        ----------
        size : int, optional
            The number of samples to draw.  If not provided, then the size is
            the same as the effective number of samples in the underlying
            dataset.

        Returns
        -------
        resample : (self.d, `size`) ndarray
            The sampled dataset.

        R   t   pN(   R#   t   intt   neffR   R   R   R!   R%   R?   R   R"   R'   R   (   R,   R   t   normt   indicest   means(    (    s.   lib/python2.7/site-packages/scipy/stats/kde.pyt   resample·  s    c         C` s   t  |  j d |  j d ƒ S(   Ng      ð¿i   (   R   Rd   R!   (   R,   (    (    s.   lib/python2.7/site-packages/scipy/stats/kde.pyt   scotts_factorÒ  s    c         C` s*   t  |  j |  j d d d |  j d ƒ S(   Ng       @g      @g      ð¿i   (   R   Rd   R!   (   R,   (    (    s.   lib/python2.7/site-packages/scipy/stats/kde.pyt   silverman_factorÕ  s    s0  Computes the coefficient (`kde.factor`) that
        multiplies the data covariance matrix to obtain the kernel covariance
        matrix. The default is `scotts_factor`.  A subclass can overwrite this
        method to provide a different method, or set it through a call to
        `kde.set_bandwidth`.c         ` sÌ   ˆ  d k r n¯ ˆ  d k r* ˆ j ˆ _ n” ˆ  d k rE ˆ j ˆ _ ny t j ˆ  ƒ r‚ t ˆ  t ƒ r‚ d ˆ _ ‡  f d †  ˆ _ n< t	 ˆ  ƒ r¬ ˆ  ˆ _ ‡ f d †  ˆ _ n d } t
 | ƒ ‚ ˆ j ƒ  d S(   s6  Compute the estimator bandwidth with given method.

        The new bandwidth calculated after a call to `set_bandwidth` is used
        for subsequent evaluations of the estimated density.

        Parameters
        ----------
        bw_method : str, scalar or callable, optional
            The method used to calculate the estimator bandwidth.  This can be
            'scott', 'silverman', a scalar constant or a callable.  If a
            scalar, this will be used directly as `kde.factor`.  If a callable,
            it should take a `gaussian_kde` instance as only parameter and
            return a scalar.  If None (default), nothing happens; the current
            `kde.covariance_factor` method is kept.

        Notes
        -----
        .. versionadded:: 0.11

        Examples
        --------
        >>> import scipy.stats as stats
        >>> x1 = np.array([-7, -5, 1, 4, 5.])
        >>> kde = stats.gaussian_kde(x1)
        >>> xs = np.linspace(-10, 10, num=50)
        >>> y1 = kde(xs)
        >>> kde.set_bandwidth(bw_method='silverman')
        >>> y2 = kde(xs)
        >>> kde.set_bandwidth(bw_method=kde.factor / 3.)
        >>> y3 = kde(xs)

        >>> import matplotlib.pyplot as plt
        >>> fig, ax = plt.subplots()
        >>> ax.plot(x1, np.ones(x1.shape) / (4. * x1.size), 'bo',
        ...         label='Data points (rescaled)')
        >>> ax.plot(xs, y1, label='Scott (default)')
        >>> ax.plot(xs, y2, label='Silverman')
        >>> ax.plot(xs, y3, label='Const (1/3 * Silverman)')
        >>> ax.legend()
        >>> plt.show()

        t   scottt	   silvermans   use constantc           ` s   ˆ  S(   N(    (    (   R   (    s.   lib/python2.7/site-packages/scipy/stats/kde.pyt   <lambda>  s    c           ` s   ˆ  j  ˆ  ƒ S(   N(   t
   _bw_method(    (   R,   (    s.   lib/python2.7/site-packages/scipy/stats/kde.pyRm     s    sC   `bw_method` should be 'scott', 'silverman', a scalar or a callable.N(   R#   Ri   t   covariance_factorRj   RB   t   isscalart
   isinstanceR   Rn   R   R   t   _compute_covariance(   R,   R   R6   (    (   R   R,   s.   lib/python2.7/site-packages/scipy/stats/kde.pyR+   à  s    +		c      	   C` s¿   |  j  ƒ  |  _ t |  d ƒ sc t t |  j d d d t d |  j ƒƒ |  _ t	 j
 |  j ƒ |  _ n  |  j |  j d |  _ |  j |  j d |  _ t t	 j d t |  j ƒ ƒ |  j |  _ d S(   sc   Computes the covariance matrix for each Gaussian kernel using
        covariance_factor().
        t   _data_inv_covt   rowvari   t   biast   aweightsi   N(   Ro   t   factort   hasattrR	   R   R   t   FalseR'   t   _data_covarianceR   t   invRs   R?   R1   R   t   detR   R"   R3   (   R,   (    (    s.   lib/python2.7/site-packages/scipy/stats/kde.pyRr     s    c         C` s   |  j  | ƒ S(   s×   
        Evaluate the estimated pdf on a provided set of points.

        Notes
        -----
        This is an alias for `gaussian_kde.evaluate`.  See the ``evaluate``
        docstring for more details.

        (   R>   (   R,   t   x(    (    s.   lib/python2.7/site-packages/scipy/stats/kde.pyt   pdf.  s    
c         C` sñ  t  | ƒ } | j \ } } | |  j k rˆ | d k rf | |  j k rf t | |  j d f ƒ } d } qˆ d | |  j f } t | ƒ ‚ n  t | f d t ƒ} | |  j k rbt |  j | f d t ƒ} xg t |  j ƒ D]V } |  j	 d d … | t
 f | }	 t |  j |	 ƒ }
 t |	 |
 d d ƒd | | <q× Wt | d |  j | |  j |  j d d ƒ} n‹ xˆ t | ƒ D]z } |  j	 | d d … | t
 f }	 t |  j |	 ƒ }
 t |	 |
 d d ƒd } t | d |  j |  j |  j ƒ| | <qoW| S(	   sT   
        Evaluate the log of the estimated pdf on a provided set of points.
        i   s2   points have dimension %s, dataset has dimension %sR.   NR/   i    g       @t   b(   R	   R    R!   R
   R   R   R%   R"   R2   R   R   R   R1   R   R   R'   R3   (   R,   R}   R4   R!   R5   R6   R7   R=   R;   R<   RH   (    (    s.   lib/python2.7/site-packages/scipy/stats/kde.pyt   logpdf:  s4    	 "
 
"c         C` s@   y |  j  SWn. t k
 r; t |  j ƒ |  j |  _  |  j  SXd  S(   N(   R&   t   AttributeErrorR   R"   (   R,   (    (    s.   lib/python2.7/site-packages/scipy/stats/kde.pyR'   c  s
    c         C` sA   y |  j  SWn/ t k
 r< d t |  j d ƒ |  _  |  j  SXd  S(   Ni   i   (   R*   R   R   R'   (   R,   (    (    s.   lib/python2.7/site-packages/scipy/stats/kde.pyRd   k  s
    N(   t   __name__t
   __module__t   __doc__R#   R-   R>   t   __call__RL   RT   R]   Ra   Rh   Ri   Rj   Ro   R+   Rr   R~   R€   t   propertyR'   Rd   (    (    (    s.   lib/python2.7/site-packages/scipy/stats/kde.pyR   +   s&   ”	8	5	#!	2			>			)((   t
   __future__R    R   R   RW   t   scipy._lib.sixR   R   t   scipyR   R   t   scipy.specialR   t   scipy._lib._numpy_compatR   t   numpyR	   R
   R   R   R   R   R   R   R   R   R   R   R   R   R   RB   t   numpy.randomR   R   t    R   t   __all__t   objectR   (    (    (    s.   lib/python2.7/site-packages/scipy/stats/kde.pyt   <module>   s   d	