ó
‡ˆ\c           @   sÑ   d  Z  d d l m Z d d l Z d d l m Z m Z d d l m Z d d l	 m
 Z
 d d	 d
 d d d g Z d „  Z d d d „ Z d d d „ Z d d d „ Z d „  Z d d d „ Z d d d „ Z d S(   sï   Metrics to assess performance on regression task

Functions named as ``*_score`` return a scalar value to maximize: the higher
the better

Function named as ``*_error`` or ``*_loss`` return a scalar value to minimize:
the lower the better
iÿÿÿÿ(   t   divisionNi   (   t   check_arrayt   check_consistent_length(   t   column_or_1d(   t   string_typest   mean_absolute_errort   mean_squared_errort   mean_squared_log_errort   median_absolute_errort   r2_scoret   explained_variance_scorec         C   s—  t  |  | ƒ t |  d t ƒ}  t | d t ƒ} |  j d k rR |  j d ƒ }  n  | j d k rs | j d ƒ } n  |  j d | j d k r¶ t d j |  j d | j d ƒ ƒ ‚ n  |  j d } d } t | t	 ƒ r| | k rot d j | | ƒ ƒ ‚ qonm | d k	 rot | d t ƒ} | d k r;t d	 ƒ ‚ qo| t | ƒ k rot d
 t | ƒ | f ƒ ‚ qon  | d k rd n d } | |  | | f S(   sí  Check that y_true and y_pred belong to the same regression task

    Parameters
    ----------
    y_true : array-like

    y_pred : array-like

    multioutput : array-like or string in ['raw_values', uniform_average',
        'variance_weighted'] or None
        None is accepted due to backward compatibility of r2_score().

    Returns
    -------
    type_true : one of {'continuous', continuous-multioutput'}
        The type of the true target data, as output by
        'utils.multiclass.type_of_target'

    y_true : array-like of shape = (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape = (n_samples, n_outputs)
        Estimated target values.

    multioutput : array-like of shape = (n_outputs) or string in ['raw_values',
        uniform_average', 'variance_weighted'] or None
        Custom output weights if ``multioutput`` is array-like or
        just the corresponding argument if ``multioutput`` is a
        correct keyword.

    t	   ensure_2di   iÿÿÿÿs<   y_true and y_pred have different number of output ({0}!={1})t
   raw_valuest   uniform_averaget   variance_weightedsI   Allowed 'multioutput' string values are {}. You provided multioutput={!r}s5   Custom weights are useful only in multi-output cases.s?   There must be equally many custom weights (%d) as outputs (%d).t
   continuouss   continuous-multioutput(   iÿÿÿÿi   (   iÿÿÿÿi   (   R   R   R   N(   R   R   t   Falset   ndimt   reshapet   shapet
   ValueErrort   formatt
   isinstanceR   t   Nonet   len(   t   y_truet   y_predt   multioutputt	   n_outputst   allowed_multioutput_strt   y_type(    (    s9   lib/python2.7/site-packages/sklearn/metrics/regression.pyt   _check_reg_targets+   s6     	  	R   c         C   s    t  |  | | ƒ \ } }  } } t |  | | ƒ t j t j | |  ƒ d | d d ƒ} t | t ƒ r | d k ru | S| d k r d } q n  t j | d | ƒS(   s  Mean absolute error regression loss

    Read more in the :ref:`User Guide <mean_absolute_error>`.

    Parameters
    ----------
    y_true : array-like of shape = (n_samples) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape = (n_samples) or (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape = (n_samples), optional
        Sample weights.

    multioutput : string in ['raw_values', 'uniform_average']
        or array-like of shape (n_outputs)
        Defines aggregating of multiple output values.
        Array-like value defines weights used to average errors.

        'raw_values' :
            Returns a full set of errors in case of multioutput input.

        'uniform_average' :
            Errors of all outputs are averaged with uniform weight.


    Returns
    -------
    loss : float or ndarray of floats
        If multioutput is 'raw_values', then mean absolute error is returned
        for each output separately.
        If multioutput is 'uniform_average' or an ndarray of weights, then the
        weighted average of all output errors is returned.

        MAE output is non-negative floating point. The best value is 0.0.

    Examples
    --------
    >>> from sklearn.metrics import mean_absolute_error
    >>> y_true = [3, -0.5, 2, 7]
    >>> y_pred = [2.5, 0.0, 2, 8]
    >>> mean_absolute_error(y_true, y_pred)
    0.5
    >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
    >>> y_pred = [[0, 2], [-1, 2], [8, -5]]
    >>> mean_absolute_error(y_true, y_pred)
    0.75
    >>> mean_absolute_error(y_true, y_pred, multioutput='raw_values')
    array([0.5, 1. ])
    >>> mean_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7])
    ... # doctest: +ELLIPSIS
    0.85...
    t   weightst   axisi    R   R   N(   R   R   t   npt   averaget   absR   R   R   (   R   R   t   sample_weightR   R   t   output_errors(    (    s9   lib/python2.7/site-packages/sklearn/metrics/regression.pyR   p   s    9c         C   s›   t  |  | | ƒ \ } }  } } t |  | | ƒ t j |  | d d d d | ƒ} t | t ƒ rˆ | d k rp | S| d k rˆ d } qˆ n  t j | d | ƒS(   s•  Mean squared error regression loss

    Read more in the :ref:`User Guide <mean_squared_error>`.

    Parameters
    ----------
    y_true : array-like of shape = (n_samples) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape = (n_samples) or (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape = (n_samples), optional
        Sample weights.

    multioutput : string in ['raw_values', 'uniform_average']
        or array-like of shape (n_outputs)
        Defines aggregating of multiple output values.
        Array-like value defines weights used to average errors.

        'raw_values' :
            Returns a full set of errors in case of multioutput input.

        'uniform_average' :
            Errors of all outputs are averaged with uniform weight.

    Returns
    -------
    loss : float or ndarray of floats
        A non-negative floating point value (the best value is 0.0), or an
        array of floating point values, one for each individual target.

    Examples
    --------
    >>> from sklearn.metrics import mean_squared_error
    >>> y_true = [3, -0.5, 2, 7]
    >>> y_pred = [2.5, 0.0, 2, 8]
    >>> mean_squared_error(y_true, y_pred)
    0.375
    >>> y_true = [[0.5, 1],[-1, 1],[7, -6]]
    >>> y_pred = [[0, 2],[-1, 2],[8, -5]]
    >>> mean_squared_error(y_true, y_pred)  # doctest: +ELLIPSIS
    0.708...
    >>> mean_squared_error(y_true, y_pred, multioutput='raw_values')
    ... # doctest: +ELLIPSIS
    array([0.41666667, 1.        ])
    >>> mean_squared_error(y_true, y_pred, multioutput=[0.3, 0.7])
    ... # doctest: +ELLIPSIS
    0.825...

    i   R!   i    R    R   R   N(   R   R   R"   R#   R   R   R   (   R   R   R%   R   R   R&   (    (    s9   lib/python2.7/site-packages/sklearn/metrics/regression.pyR   ¸   s    6	c         C   s†   t  |  | | ƒ \ } }  } } t |  | | ƒ |  d k  j ƒ  sR | d k  j ƒ  ra t d ƒ ‚ n  t t j |  ƒ t j | ƒ | | ƒ S(   sñ  Mean squared logarithmic error regression loss

    Read more in the :ref:`User Guide <mean_squared_log_error>`.

    Parameters
    ----------
    y_true : array-like of shape = (n_samples) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape = (n_samples) or (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape = (n_samples), optional
        Sample weights.

    multioutput : string in ['raw_values', 'uniform_average']             or array-like of shape = (n_outputs)

        Defines aggregating of multiple output values.
        Array-like value defines weights used to average errors.

        'raw_values' :
            Returns a full set of errors when the input is of multioutput
            format.

        'uniform_average' :
            Errors of all outputs are averaged with uniform weight.

    Returns
    -------
    loss : float or ndarray of floats
        A non-negative floating point value (the best value is 0.0), or an
        array of floating point values, one for each individual target.

    Examples
    --------
    >>> from sklearn.metrics import mean_squared_log_error
    >>> y_true = [3, 5, 2.5, 7]
    >>> y_pred = [2.5, 5, 4, 8]
    >>> mean_squared_log_error(y_true, y_pred)  # doctest: +ELLIPSIS
    0.039...
    >>> y_true = [[0.5, 1], [1, 2], [7, 6]]
    >>> y_pred = [[0.5, 2], [1, 2.5], [8, 8]]
    >>> mean_squared_log_error(y_true, y_pred)  # doctest: +ELLIPSIS
    0.044...
    >>> mean_squared_log_error(y_true, y_pred, multioutput='raw_values')
    ... # doctest: +ELLIPSIS
    array([0.00462428, 0.08377444])
    >>> mean_squared_log_error(y_true, y_pred, multioutput=[0.3, 0.7])
    ... # doctest: +ELLIPSIS
    0.060...

    i    sS   Mean Squared Logarithmic Error cannot be used when targets contain negative values.(   R   R   t   anyR   R   R"   t   log1p(   R   R   R%   R   R   (    (    s9   lib/python2.7/site-packages/sklearn/metrics/regression.pyR   ý   s    8$c         C   sS   t  |  | d ƒ \ } }  } } | d k r9 t d ƒ ‚ n  t j t j | |  ƒ ƒ S(   sr  Median absolute error regression loss

    Read more in the :ref:`User Guide <median_absolute_error>`.

    Parameters
    ----------
    y_true : array-like of shape = (n_samples)
        Ground truth (correct) target values.

    y_pred : array-like of shape = (n_samples)
        Estimated target values.

    Returns
    -------
    loss : float
        A positive floating point value (the best value is 0.0).

    Examples
    --------
    >>> from sklearn.metrics import median_absolute_error
    >>> y_true = [3, -0.5, 2, 7]
    >>> y_pred = [2.5, 0.0, 2, 8]
    >>> median_absolute_error(y_true, y_pred)
    0.5

    R   s   continuous-multioutputs2   Multioutput not supported in median_absolute_error(   R   R   R"   t   medianR$   (   R   R   R   t   _(    (    s9   lib/python2.7/site-packages/sklearn/metrics/regression.pyR   A  s
    	c         C   sx  t  |  | | ƒ \ } }  } } t |  | | ƒ t j |  | d | d d ƒ} t j |  | | d d | d d ƒ} t j |  d | d d ƒ} t j |  | d d | d d ƒ} | d k }	 | d k }
 |	 |
 @} t j |  j d ƒ } d | | | | | | <d | |	 |
 @<t | t ƒ r_| d k r2| S| d k rGd
 } qe| d	 k re| } qen | } t j | d | ƒS(   sâ  Explained variance regression score function

    Best possible score is 1.0, lower values are worse.

    Read more in the :ref:`User Guide <explained_variance_score>`.

    Parameters
    ----------
    y_true : array-like of shape = (n_samples) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape = (n_samples) or (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape = (n_samples), optional
        Sample weights.

    multioutput : string in ['raw_values', 'uniform_average',                 'variance_weighted'] or array-like of shape (n_outputs)
        Defines aggregating of multiple output scores.
        Array-like value defines weights used to average scores.

        'raw_values' :
            Returns a full set of scores in case of multioutput input.

        'uniform_average' :
            Scores of all outputs are averaged with uniform weight.

        'variance_weighted' :
            Scores of all outputs are averaged, weighted by the variances
            of each individual output.

    Returns
    -------
    score : float or ndarray of floats
        The explained variance or ndarray if 'multioutput' is 'raw_values'.

    Notes
    -----
    This is not a symmetric function.

    Examples
    --------
    >>> from sklearn.metrics import explained_variance_score
    >>> y_true = [3, -0.5, 2, 7]
    >>> y_pred = [2.5, 0.0, 2, 8]
    >>> explained_variance_score(y_true, y_pred)  # doctest: +ELLIPSIS
    0.957...
    >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
    >>> y_pred = [[0, 2], [-1, 2], [8, -5]]
    >>> explained_variance_score(y_true, y_pred, multioutput='uniform_average')
    ... # doctest: +ELLIPSIS
    0.983...

    R    R!   i    i   i   g        R   R   R   N(	   R   R   R"   R#   t   onesR   R   R   R   (   R   R   R%   R   R   t
   y_diff_avgt	   numeratort
   y_true_avgt   denominatort   nonzero_numeratort   nonzero_denominatort   valid_scoret   output_scorest   avg_weights(    (    s9   lib/python2.7/site-packages/sklearn/metrics/regression.pyR
   c  s2    :

	c         C   s½  t  |  | | ƒ \ } }  } } t |  | | ƒ | d k	 rb t | ƒ } | d d … t j f } n d } | |  | d j d d d t j ƒ } | |  t j |  d d d | ƒd j d d d t j ƒ } | d k } | d k }	 | |	 @}
 t j	 |  j
 d g ƒ } d | |
 | |
 | |
 <d	 | |	 | @<t | t ƒ r¤| d
 k rN| S| d k rcd } qª| d k rª| } t j | ƒ s¡t j |	 ƒ s—d Sd	 Sq¡qªn | } t j | d | ƒS(   s3
  R^2 (coefficient of determination) regression score function.

    Best possible score is 1.0 and it can be negative (because the
    model can be arbitrarily worse). A constant model that always
    predicts the expected value of y, disregarding the input features,
    would get a R^2 score of 0.0.

    Read more in the :ref:`User Guide <r2_score>`.

    Parameters
    ----------
    y_true : array-like of shape = (n_samples) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape = (n_samples) or (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape = (n_samples), optional
        Sample weights.

    multioutput : string in ['raw_values', 'uniform_average', 'variance_weighted'] or None or array-like of shape (n_outputs)

        Defines aggregating of multiple output scores.
        Array-like value defines weights used to average scores.
        Default is "uniform_average".

        'raw_values' :
            Returns a full set of scores in case of multioutput input.

        'uniform_average' :
            Scores of all outputs are averaged with uniform weight.

        'variance_weighted' :
            Scores of all outputs are averaged, weighted by the variances
            of each individual output.

        .. versionchanged:: 0.19
            Default value of multioutput is 'uniform_average'.

    Returns
    -------
    z : float or ndarray of floats
        The R^2 score or ndarray of scores if 'multioutput' is
        'raw_values'.

    Notes
    -----
    This is not a symmetric function.

    Unlike most other scores, R^2 score may be negative (it need not actually
    be the square of a quantity R).

    References
    ----------
    .. [1] `Wikipedia entry on the Coefficient of determination
            <https://en.wikipedia.org/wiki/Coefficient_of_determination>`_

    Examples
    --------
    >>> from sklearn.metrics import r2_score
    >>> y_true = [3, -0.5, 2, 7]
    >>> y_pred = [2.5, 0.0, 2, 8]
    >>> r2_score(y_true, y_pred)  # doctest: +ELLIPSIS
    0.948...
    >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
    >>> y_pred = [[0, 2], [-1, 2], [8, -5]]
    >>> r2_score(y_true, y_pred,
    ...          multioutput='variance_weighted') # doctest: +ELLIPSIS
    0.938...
    >>> y_true = [1, 2, 3]
    >>> y_pred = [1, 2, 3]
    >>> r2_score(y_true, y_pred)
    1.0
    >>> y_true = [1, 2, 3]
    >>> y_pred = [2, 2, 2]
    >>> r2_score(y_true, y_pred)
    0.0
    >>> y_true = [1, 2, 3]
    >>> y_pred = [3, 2, 1]
    >>> r2_score(y_true, y_pred)
    -3.0
    Ng      ð?i   R!   i    t   dtypeR    i   g        R   R   R   (   R   R   R   R   R"   t   newaxist   sumt   float64R#   R+   R   R   R   R'   (   R   R   R%   R   R   t   weightR-   R/   R1   R0   R2   R3   R4   (    (    s9   lib/python2.7/site-packages/sklearn/metrics/regression.pyR	   À  s@    U$

	(   t   __doc__t
   __future__R    t   numpyR"   t   utils.validationR   R   R   t   externals.sixR   t   __ALL__R   R   R   R   R   R   R
   R	   (    (    (    s9   lib/python2.7/site-packages/sklearn/metrics/regression.pyt   <module>   s.   		FGDB	#[