ó
áp7]c           @   s$  d  Z  d d l Z d d „ Z d d „ Z d d „ Z d d „ Z d d „ Z d d	 „ Z d d
 „ Z	 d d d „ Z
 d d d „ Z d d „ Z d „  Z d „  Z d „  Z d „  Z e d „ Z e d „ Z e d „ Z e d „ Z e e e e	 e e e e
 e e e e e e e e e e g Z d S(   s“   some measures for evaluation of prediction, tests and model selection

Created on Tue Nov 08 15:23:20 2011

Author: Josef Perktold
License: BSD-3

iÿÿÿÿNi    c         C   s9   t  j |  ƒ }  t  j | ƒ } t  j |  | d d | ƒS(   s†  mean squared error

    Parameters
    ----------
    x1, x2 : array_like
       The performance measure depends on the difference between these two
       arrays.
    axis : int
       axis along which the summary statistic is calculated

    Returns
    -------
    mse : ndarray or float
       mean squared error along given axis.

    Notes
    -----
    If ``x1`` and ``x2`` have different shapes, then they need to broadcast.
    This uses ``numpy.asanyarray`` to convert the input. Whether this is the
    desired result or not depends on the array subclass, for example
    numpy matrices will silently produce an incorrect result.

    i   t   axis(   t   npt
   asanyarrayt   mean(   t   x1t   x2R    (    (    s>   lib/python2.7/site-packages/statsmodels/tools/eval_measures.pyt   mse   s    c         C   s:   t  j |  ƒ }  t  j | ƒ } t  j t |  | d | ƒƒ S(   s‘  root mean squared error

    Parameters
    ----------
    x1, x2 : array_like
       The performance measure depends on the difference between these two
       arrays.
    axis : int
       axis along which the summary statistic is calculated

    Returns
    -------
    rmse : ndarray or float
       root mean squared error along given axis.

    Notes
    -----
    If ``x1`` and ``x2`` have different shapes, then they need to broadcast.
    This uses ``numpy.asanyarray`` to convert the input. Whether this is the
    desired result or not depends on the array subclass, for example
    numpy matrices will silently produce an incorrect result.

    R    (   R   R   t   sqrtR   (   R   R   R    (    (    s>   lib/python2.7/site-packages/statsmodels/tools/eval_measures.pyt   rmse*   s    c         C   s>   t  j |  ƒ }  t  j | ƒ } t  j t  j |  | ƒ d | ƒS(   sL  maximum absolute error

    Parameters
    ----------
    x1, x2 : array_like
       The performance measure depends on the difference between these two
       arrays.
    axis : int
       axis along which the summary statistic is calculated

    Returns
    -------
    maxabs : ndarray or float
       maximum absolute difference along given axis.

    Notes
    -----
    If ``x1`` and ``x2`` have different shapes, then they need to broadcast.
    This uses ``numpy.asanyarray`` to convert the input. Whether this is the
    desired result or not depends on the array subclass.

    R    (   R   R   t   maxt   abs(   R   R   R    (    (    s>   lib/python2.7/site-packages/statsmodels/tools/eval_measures.pyt   maxabsG   s    c         C   s>   t  j |  ƒ }  t  j | ƒ } t  j t  j |  | ƒ d | ƒS(   sG  mean absolute error

    Parameters
    ----------
    x1, x2 : array_like
       The performance measure depends on the difference between these two
       arrays.
    axis : int
       axis along which the summary statistic is calculated

    Returns
    -------
    meanabs : ndarray or float
       mean absolute difference along given axis.

    Notes
    -----
    If ``x1`` and ``x2`` have different shapes, then they need to broadcast.
    This uses ``numpy.asanyarray`` to convert the input. Whether this is the
    desired result or not depends on the array subclass.

    R    (   R   R   R   R
   (   R   R   R    (    (    s>   lib/python2.7/site-packages/statsmodels/tools/eval_measures.pyt   meanabsc   s    c         C   s>   t  j |  ƒ }  t  j | ƒ } t  j t  j |  | ƒ d | ƒS(   sM  median absolute error

    Parameters
    ----------
    x1, x2 : array_like
       The performance measure depends on the difference between these two
       arrays.
    axis : int
       axis along which the summary statistic is calculated

    Returns
    -------
    medianabs : ndarray or float
       median absolute difference along given axis.

    Notes
    -----
    If ``x1`` and ``x2`` have different shapes, then they need to broadcast.
    This uses ``numpy.asanyarray`` to convert the input. Whether this is the
    desired result or not depends on the array subclass.

    R    (   R   R   t   medianR
   (   R   R   R    (    (    s>   lib/python2.7/site-packages/statsmodels/tools/eval_measures.pyt	   medianabs   s    c         C   s5   t  j |  ƒ }  t  j | ƒ } t  j |  | d | ƒS(   sA  bias, mean error

    Parameters
    ----------
    x1, x2 : array_like
       The performance measure depends on the difference between these two
       arrays.
    axis : int
       axis along which the summary statistic is calculated

    Returns
    -------
    bias : ndarray or float
       bias, or mean difference along given axis.

    Notes
    -----
    If ``x1`` and ``x2`` have different shapes, then they need to broadcast.
    This uses ``numpy.asanyarray`` to convert the input. Whether this is the
    desired result or not depends on the array subclass.

    R    (   R   R   R   (   R   R   R    (    (    s>   lib/python2.7/site-packages/statsmodels/tools/eval_measures.pyt   bias›   s    c         C   s5   t  j |  ƒ }  t  j | ƒ } t  j |  | d | ƒS(   sY  median bias, median error

    Parameters
    ----------
    x1, x2 : array_like
       The performance measure depends on the difference between these two
       arrays.
    axis : int
       axis along which the summary statistic is calculated

    Returns
    -------
    medianbias : ndarray or float
       median bias, or median difference along given axis.

    Notes
    -----
    If ``x1`` and ``x2`` have different shapes, then they need to broadcast.
    This uses ``numpy.asanyarray`` to convert the input. Whether this is the
    desired result or not depends on the array subclass.

    R    (   R   R   R   (   R   R   R    (    (    s>   lib/python2.7/site-packages/statsmodels/tools/eval_measures.pyt
   medianbias·   s    c         C   s;   t  j |  ƒ }  t  j | ƒ } t  j |  | d | d | ƒS(   s@  variance of error

    Parameters
    ----------
    x1, x2 : array_like
       The performance measure depends on the difference between these two
       arrays.
    axis : int
       axis along which the summary statistic is calculated

    Returns
    -------
    vare : ndarray or float
       variance of difference along given axis.

    Notes
    -----
    If ``x1`` and ``x2`` have different shapes, then they need to broadcast.
    This uses ``numpy.asanyarray`` to convert the input. Whether this is the
    desired result or not depends on the array subclass.

    t   ddofR    (   R   R   t   var(   R   R   R   R    (    (    s>   lib/python2.7/site-packages/statsmodels/tools/eval_measures.pyt   vareÓ   s    c         C   s;   t  j |  ƒ }  t  j | ƒ } t  j |  | d | d | ƒS(   sT  standard deviation of error

    Parameters
    ----------
    x1, x2 : array_like
       The performance measure depends on the difference between these two
       arrays.
    axis : int
       axis along which the summary statistic is calculated

    Returns
    -------
    stde : ndarray or float
       standard deviation of difference along given axis.

    Notes
    -----
    If ``x1`` and ``x2`` have different shapes, then they need to broadcast.
    This uses ``numpy.asanyarray`` to convert the input. Whether this is the
    desired result or not depends on the array subclass.

    R   R    (   R   R   t   std(   R   R   R   R    (    (    s>   lib/python2.7/site-packages/statsmodels/tools/eval_measures.pyt   stdeï   s    c         C   sò   t  j |  ƒ }  t  j | ƒ } | d k rQ t  j |  ƒ }  t  j | ƒ } d } n  t  j |  | ƒ } |  j | } t  j | d t  j d d g ƒ ƒ j t	 ƒ } t
 d ƒ g | j } | | | <t  j | t | ƒ d | ƒ} t  j | ƒ } | S(   s‡  interquartile range of error

    rounded index, no interpolations

    this could use newer numpy function instead

    Parameters
    ----------
    x1, x2 : array_like
       The performance measure depends on the difference between these two
       arrays.
    axis : int
       axis along which the summary statistic is calculated

    Returns
    -------
    mse : ndarray or float
       mean squared error along given axis.

    Notes
    -----
    If ``x1`` and ``x2`` have different shapes, then they need to broadcast.

    This uses ``numpy.asarray`` to convert the input, in contrast to the other
    functions in this category.

    i    i   g      Ð?g      è?R    N(   R   t   asarrayt   Nonet   ravelt   sortt   shapet   roundt   arrayt   astypet   intt   slicet   ndimt   difft   tuplet   squeeze(   R   R   R    t   xdifft   nobst   idxt   slt   iqr(    (    s>   lib/python2.7/site-packages/statsmodels/tools/eval_measures.pyR(     s    	/
c         C   s   d |  d | S(   s‹  Akaike information criterion

    Parameters
    ----------
    llf : float
        value of the loglikelihood
    nobs : int
        number of observations
    df_modelwc : int
        number of parameters including constant

    Returns
    -------
    aic : float
        information criterion

    References
    ----------
    http://en.wikipedia.org/wiki/Akaike_information_criterion

    g       Àg       @(    (   t   llfR%   t
   df_modelwc(    (    s>   lib/python2.7/site-packages/statsmodels/tools/eval_measures.pyt   aic:  s    c         C   s    d |  d | | | | d S(   s´  Akaike information criterion (AIC) with small sample correction

    Parameters
    ----------
    llf : float
        value of the loglikelihood
    nobs : int
        number of observations
    df_modelwc : int
        number of parameters including constant

    Returns
    -------
    aicc : float
        information criterion

    References
    ----------
    http://en.wikipedia.org/wiki/Akaike_information_criterion#AICc

    g       Àg       @g      ð?(    (   R)   R%   R*   (    (    s>   lib/python2.7/site-packages/statsmodels/tools/eval_measures.pyt   aiccS  s    c         C   s   d |  t  j | ƒ | S(   sª  Bayesian information criterion (BIC) or Schwarz criterion

    Parameters
    ----------
    llf : float
        value of the loglikelihood
    nobs : int
        number of observations
    df_modelwc : int
        number of parameters including constant

    Returns
    -------
    bic : float
        information criterion

    References
    ----------
    http://en.wikipedia.org/wiki/Bayesian_information_criterion

    g       À(   R   t   log(   R)   R%   R*   (    (    s>   lib/python2.7/site-packages/statsmodels/tools/eval_measures.pyt   bicl  s    c         C   s&   d |  d t  j t  j | ƒ ƒ | S(   sy  Hannan-Quinn information criterion (HQC)

    Parameters
    ----------
    llf : float
        value of the loglikelihood
    nobs : int
        number of observations
    df_modelwc : int
        number of parameters including constant

    Returns
    -------
    hqic : float
        information criterion

    References
    ----------
    Wikipedia doesn't say much

    g       Ài   (   R   R-   (   R)   R%   R*   (    (    s>   lib/python2.7/site-packages/statsmodels/tools/eval_measures.pyt   hqic…  s    c         C   s0   | s t  j |  ƒ }  n  |  t d | | ƒ | S(   sÔ  Akaike information criterion

    Parameters
    ----------
    sigma2 : float
        estimate of the residual variance or determinant of Sigma_hat in the
        multivariate case. If islog is true, then it is assumed that sigma
        is already log-ed, for example logdetSigma.
    nobs : int
        number of observations
    df_modelwc : int
        number of parameters including constant

    Returns
    -------
    aic : float
        information criterion

    Notes
    -----
    A constant has been dropped in comparison to the loglikelihood base
    information criteria. The information criteria should be used to compare
    only comparable models.

    For example, AIC is defined in terms of the loglikelihood as

    :math:`-2 llf + 2 k`

    in terms of :math:`\hat{\sigma}^2`

    :math:`log(\hat{\sigma}^2) + 2 k / n`

    in terms of the determinant of :math:`\hat{\Sigma}`

    :math:`log(\|\hat{\Sigma}\|) + 2 k / n`

    Note: In our definition we do not divide by n in the log-likelihood
    version.

    TODO: Latex math

    reference for example lecture notes by Herman Bierens

    See Also
    --------

    References
    ----------
    http://en.wikipedia.org/wiki/Akaike_information_criterion

    i    (   R   R-   R+   (   t   sigma2R%   R*   t   islog(    (    s>   lib/python2.7/site-packages/statsmodels/tools/eval_measures.pyt	   aic_sigma   s    4c         C   s0   | s t  j |  ƒ }  n  |  t d | | ƒ | S(   s  Akaike information criterion (AIC) with small sample correction

    Parameters
    ----------
    sigma2 : float
        estimate of the residual variance or determinant of Sigma_hat in the
        multivariate case. If islog is true, then it is assumed that sigma
        is already log-ed, for example logdetSigma.
    nobs : int
        number of observations
    df_modelwc : int
        number of parameters including constant

    Returns
    -------
    aicc : float
        information criterion

    Notes
    -----
    A constant has been dropped in comparison to the loglikelihood base
    information criteria. These should be used to compare for comparable
    models.

    References
    ----------
    http://en.wikipedia.org/wiki/Akaike_information_criterion#AICc

    i    (   R   R-   R,   (   R0   R%   R*   R1   (    (    s>   lib/python2.7/site-packages/statsmodels/tools/eval_measures.pyt
   aicc_sigmaÙ  s    c         C   s0   | s t  j |  ƒ }  n  |  t d | | ƒ | S(   s  Bayesian information criterion (BIC) or Schwarz criterion

    Parameters
    ----------
    sigma2 : float
        estimate of the residual variance or determinant of Sigma_hat in the
        multivariate case. If islog is true, then it is assumed that sigma
        is already log-ed, for example logdetSigma.
    nobs : int
        number of observations
    df_modelwc : int
        number of parameters including constant

    Returns
    -------
    bic : float
        information criterion

    Notes
    -----
    A constant has been dropped in comparison to the loglikelihood base
    information criteria. These should be used to compare for comparable
    models.

    References
    ----------
    http://en.wikipedia.org/wiki/Bayesian_information_criterion

    i    (   R   R-   R.   (   R0   R%   R*   R1   (    (    s>   lib/python2.7/site-packages/statsmodels/tools/eval_measures.pyt	   bic_sigmaü  s    c         C   s0   | s t  j |  ƒ }  n  |  t d | | ƒ | S(   sÀ  Hannan-Quinn information criterion (HQC)

    Parameters
    ----------
    sigma2 : float
        estimate of the residual variance or determinant of Sigma_hat in the
        multivariate case. If islog is true, then it is assumed that sigma
        is already log-ed, for example logdetSigma.
    nobs : int
        number of observations
    df_modelwc : int
        number of parameters including constant

    Returns
    -------
    hqic : float
        information criterion

    Notes
    -----
    A constant has been dropped in comparison to the loglikelihood base
    information criteria. These should be used to compare for comparable
    models.

    References
    ----------
    xxx

    i    (   R   R-   R/   (   R0   R%   R*   R1   (    (    s>   lib/python2.7/site-packages/statsmodels/tools/eval_measures.pyt
   hqic_sigma  s    (   t   __doc__t   numpyR   R   R   R   R   R   R   R   R   R   R(   R+   R,   R.   R/   t   FalseR2   R3   R4   R5   t   __all__(    (    (    s>   lib/python2.7/site-packages/statsmodels/tools/eval_measures.pyt   <module>	   s,   /				9##(