B
    ¤ŠãZ‡   ã               @   sˆ   d Z ddlmZmZ ddlZddlmZ ddl	m
Z
 ddlmZ e d¡dejfd	d
„ZG dd„ deƒZeƒ ZG dd„ deƒZeƒ ZdS )zò
Support and standalone functions for Robust Linear Models

References
----------
PJ Huber.  'Robust Statistics' John Wiley and Sons, Inc., New York, 1981.

R Venables, B Ripley. 'Modern Applied Statistics in S'
    Springer, New York, 2002.
é    )ÚcallableÚrangeN)Únormé   )Únorms)Útoolsg      è?c             C   s<   t  | ¡} t|ƒr t  || |¡}t jt  | | ¡| |dS )a¤  
    The Median Absolute Deviation along given axis of an array

    Parameters
    ----------
    a : array-like
        Input array.
    c : float, optional
        The normalization constant.  Defined as scipy.stats.norm.ppf(3/4.),
        which is approximately .6745.
    axis : int, optional
        The defaul is 0. Can also be None.
    center : callable or float
        If a callable is provided, such as the default `np.median` then it
        is expected to be called center(a). The axis argument will be applied
        via np.apply_over_axes. Otherwise, provide a float.

    Returns
    -------
    mad : float
        `mad` = median(abs(`a` - center))/`c`
    )Úaxis)ÚnpÚasarrayr   Zapply_over_axesÚmedianÚfabs)ÚaÚcr   Úcenter© r   ú7lib/python3.7/site-packages/statsmodels/robust/scale.pyÚmad   s    
r   c               @   s,   e Zd ZdZddd„Zdd	d
„Zdd„ ZdS )ÚHubera.  
    Huber's proposal 2 for estimating location and scale jointly.

    Parameters
    ----------
    c : float, optional
        Threshold used in threshold for chi=psi**2.  Default value is 1.5.
    tol : float, optional
        Tolerance for convergence.  Default value is 1e-08.
    maxiter : int, optional0
        Maximum number of iterations.  Default value is 30.
    norm : statsmodels.robust.norms.RobustNorm, optional
        A robust norm used in M estimator of location. If None,
        the location estimator defaults to a one-step
        fixed point version of the M-estimator using Huber's T.

    call
        Return joint estimates of Huber's scale and location.

    Examples
    --------
    >>> import numpy as np
    >>> import statsmodels.api as sm
    >>> chem_data = np.array([2.20, 2.20, 2.4, 2.4, 2.5, 2.7, 2.8, 2.9, 3.03,
    ...        3.03, 3.10, 3.37, 3.4, 3.4, 3.4, 3.5, 3.6, 3.7, 3.7, 3.7, 3.7,
    ...        3.77, 5.28, 28.95])
    >>> sm.robust.scale.huber(chem_data)
    (array(3.2054980819923693), array(0.67365260010478967))
    ç      ø?ç:Œ0âŽyE>é   Nc             C   sV   || _ || _|| _|| _dt |¡ d }||d d|   d| t |¡  | _d S )Né   r   )r   ÚmaxiterÚtolr   ÚGaussianÚcdfZpdfÚgamma)Úselfr   r   r   r   Ztmpr   r   r   Ú__init__N   s    zHuber.__init__r   c             C   s”   t  |¡}|dkr4|jd d }t j||d}d}n|jd }|}d}|dkr\t||d}n|}t |||j¡}t |||j¡}|  ||||||¡S )a0  
        Compute Huber's proposal 2 estimate of scale, using an optional
        initial value of scale and an optional estimate of mu. If mu
        is supplied, it is not reestimated.

        Parameters
        ----------
        a : array
            1d array
        mu : float or None, optional
            If the location mu is supplied then it is not reestimated.
            Default is None, which means that it is estimated.
        initscale : float or None, optional
            A first guess on scale.  If initscale is None then the standardized
            median absolute deviation of a is used.

        Notes
        -----
        `Huber` minimizes the function

        sum(psi((a[i]-mu)/scale)**2)

        as a function of (mu, scale), where

        psi(x) = np.clip(x, -self.c, self.c)
        Nr   r   )r   TF)r	   r
   Úshaper   r   r   Ú	unsqueezeÚ_estimate_both)r   r   ÚmuZ	initscaler   ÚnÚest_muÚscaler   r   r   Ú__call__V   s    

zHuber.__call__c          
   C   s‚  xlt | jƒD ]\}|rt| jdkrVt ||| j|  || j|  ¡ |¡|j|  }q|t 	||| j||| j| j
¡}n| ¡ }t |||j¡}t t || | ¡| j¡}	|	 |¡}
t t |	|| d  |¡|| j |j| |
 | jd    ¡}t |||j¡}t t t || ¡|| j
 ¡¡}t t t || ¡|| j
 ¡¡}|rR|s\|}|}q| ¡ | ¡ fS qW td| j ƒ‚dS )ad  
        Estimate scale and location simultaneously with the following
        pseudo_loop:

        while not_converged:
            mu, scale = estimate_location(a, scale, mu), estimate_scale(a, scale, mu)

        where estimate_location is an M-estimator and estimate_scale implements
        the check used in Section 5.5 of Venables & Ripley
        Nr   zJjoint estimation of location and scale failed to converge in %d iterations)r   r   r   r	   Zclipr   Úsumr   r   Zestimate_locationr   Zsqueezer   r    Z
less_equalr   Úsqrtr   ZalltrueÚ
ValueError)r   r   r%   r"   r   r$   r#   Ú_ZnmuÚsubsetZcardÚnscaleZtest1Ztest2r   r   r   r!   ƒ   s,    
"
&" zHuber._estimate_both)r   r   r   N)NNr   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r&   r!   r   r   r   r   r   /   s   

-r   c               @   s"   e Zd ZdZd
dd„Zdd„ Zd	S )Ú
HuberScalea¤  
    Huber's scaling for fitting robust linear models.

    Huber's scale is intended to be used as the scale estimate in the
    IRLS algorithm and is slightly different than the `Huber` class.

    Parameters
    ----------
    d : float, optional
        d is the tuning constant for Huber's scale.  Default is 2.5
    tol : float, optional
        The convergence tolerance
    maxiter : int, optiona
        The maximum number of iterations.  The default is 30.

    Methods
    -------
    call
        Return's Huber's scale computed as below

    Notes
    --------
    Huber's scale is the iterative solution to

    scale_(i+1)**2 = 1/(n*h)*sum(chi(r/sigma_i)*sigma_i**2

    where the Huber function is

    chi(x) = (x**2)/2       for \|x\| < d
    chi(x) = (d**2)/2       for \|x\| >= d

    and the Huber constant h = (n-p)/n*(d**2 + (1-d**2)*            scipy.stats.norm.cdf(d) - .5 - d*sqrt(2*pi)*exp(-0.5*d**2)
    ç      @ç:Œ0âŽyE>r   c             C   s   || _ || _|| _d S )N)Údr   r   )r   r4   r   r   r   r   r   r   Ô   s    zHuberScale.__init__c       
         s  || ˆj d dˆj d  t ˆj ¡  d ˆj t dtj ¡ t dˆj d  ¡   }tˆ ƒ}‡ ‡fdd„‰‡ ‡‡fdd„}tj|g}d}xpt 	||d  ||  ¡ˆj
krþ|ˆjk rþt d||  t ||d ƒ¡ |d d  ¡}	| |	¡ |d7 }qW |d S )	Nr   r   g      à?g      à¿c                s   t  t  ˆ |  ¡ˆj¡S )N)r	   Zlessr   r4   )Úx)Úresidr   r   r   Ú<lambda>Þ   s    z%HuberScale.__call__.<locals>.<lambda>c                s2   ˆ| ƒˆ |  d  d dˆ| ƒ ˆj d d   S )Nr   r   )r4   )Ús)r6   r   r+   r   r   r7   ß   s    éÿÿÿÿ)r4   r   r   r	   r(   ZpiZexpr   ÚinfÚabsr   r   r'   Úappend)
r   Zdf_residZnobsr6   Úhr8   ZchiZ	scalehistZniterr,   r   )r6   r   r+   r   r&   Ù   s    :
"
 
zHuberScale.__call__N)r2   r3   r   )r-   r.   r/   r0   r   r&   r   r   r   r   r1   ±   s   "
r1   )r0   Zstatsmodels.compat.pythonr   r   Znumpyr	   Zscipy.statsr   r   Ú r   Zstatsmodels.toolsr   Zppfr   r   Úobjectr   Zhuberr1   Zhubers_scaler   r   r   r   Ú<module>
   s    ;