B
    &]\m:              
   @   s$  d Z ddlmZmZmZ ddddddd	d
ddg
ZddlZddlmZm	Z	m
Z
 ddlmZ ddlmZ ddlmZ ddlmZmZmZmZ edddgddfddZd%ddZedddgdfddZd&ddZdddgdfdd	Zdddgddfd d
Zd'd!dZd(d"dZd)d#dZd*d$dZ dS )+zB
Additional statistics functions with support for masked arrays.

    )divisionprint_functionabsolute_importcompare_medians_mshdquantileshdmedianhdquantiles_sdidealfourthsmedian_cihsmjcimquantiles_cimjrshtrimmed_mean_ciN)float_int_ndarray)MaskedArray   )mstats_basic)normbetatbinomg      ?g      ?g      ?Fc             C   s   dd }t j| dtd} tj|ddd}|dks:| jdkrH|| ||}n*| jdkr`td	| j t ||| ||}t j|dd
S )a  
    Computes quantile estimates with the Harrell-Davis method.

    The quantile estimates are calculated as a weighted linear combination
    of order statistics.

    Parameters
    ----------
    data : array_like
        Data array.
    prob : sequence, optional
        Sequence of quantiles to compute.
    axis : int or None, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.
    var : bool, optional
        Whether to return the variance of the estimate.

    Returns
    -------
    hdquantiles : MaskedArray
        A (p,) array of quantiles (if `var` is False), or a (2,p) array of
        quantiles and variances (if `var` is True), where ``p`` is the
        number of quantiles.

    See Also
    --------
    hdquantiles_sd

    c             S   sN  t t |  t}|j}t dt|ft	}|dk rTt j
|_|rL|S |d S t |d t| }tj}xt|D ]t\}}	|||d |	 |d d|	  }
|
dd |
dd  }t ||}||d|f< t ||| d |d|f< qzW |d |d|dkf< |d |d|dkf< |rFt j
 |d|dkf< |d|dkf< |S |d S )zGComputes the HD quantiles for a 1D array. Returns nan for invalid data.   r   r   N)npsqueezesort
compressedviewr   sizeemptylenr   nanflatarangefloatr   cdf	enumeratedot)dataprobvarxsortednZhdvbetacdfip_wwZhd_mean r5   8lib/python3.7/site-packages/scipy/stats/mstats_extras.py_hd_1D>   s,      "zhdquantiles.<locals>._hd_1DF)copydtyper   )r8   ndminNr   zDArray 'data' must be at most two dimensional, but got data.ndim = %d)r8   )maarrayr   r   ndim
ValueErrorapply_along_axisfix_invalid)r*   r+   axisr,   r7   r2   resultr5   r5   r6   r      s    

r   c             C   s   t | dg||d}| S )a9  
    Returns the Harrell-Davis estimate of the median along the given axis.

    Parameters
    ----------
    data : ndarray
        Data array.
    axis : int, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.
    var : bool, optional
        Whether to return the variance of the estimate.

    Returns
    -------
    hdmedian : MaskedArray
        The median values.  If ``var=True``, the variance is returned inside
        the masked array.  E.g. for a 1-D array the shape change from (1,) to
        (2,).

    g      ?)rA   r,   )r   r   )r*   rA   r,   rB   r5   r5   r6   r   j   s    c             C   sv   dd }t j| dtd} tj|ddd}|dkr<|| |}n(| jdkrTtd	| j t ||| |}t j|dd
 S )a  
    The standard error of the Harrell-Davis quantile estimates by jackknife.

    Parameters
    ----------
    data : array_like
        Data array.
    prob : sequence, optional
        Sequence of quantiles to compute.
    axis : int, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.

    Returns
    -------
    hdquantiles_sd : MaskedArray
        Standard error of the Harrell-Davis quantile estimates.

    See Also
    --------
    hdquantiles

    c       
         s  t |  t t t|t} dk r6t j|_t  t	 d  }t
j}xt|D ]\}}|| d |  d d|  }|dd |dd  t j fddt D td}t j| ddd	  t	 d  }	t	 d t t |	 t	   ||< q\W |S )
z%Computes the std error for 1D arrays.r   r   Nr   c                sD   g | ]<}t t jttd |tt|d  f t qS )r   r   )r   r)   Zr_listrangeastyper   ).0k)r.   r4   r-   r5   r6   
<listcomp>   s   z4hdquantiles_sd.<locals>._hdsd_1D.<locals>.<listcomp>)r9   F)r8   r:   )r   r   r   r"   r!   r   r#   r$   r%   r&   r   r'   r(   ZfromiterrD   r<   r,   sqrtZdiagZdiagonal)
r*   r+   ZhdsdZvvr0   r1   r2   r3   Zmx_Zmx_varr5   )r.   r4   r-   r6   _hdsd_1D   s     $0z hdquantiles_sd.<locals>._hdsd_1DF)r8   r9   r   )r8   r:   Nr   zDArray 'data' must be at most two dimensional, but got data.ndim = %d)r8   )	r;   r<   r   r   r=   r>   r?   r@   Zravel)r*   r+   rA   rJ   r2   rB   r5   r5   r6   r      s    

g?g?TT皙?c       
      C   s|   t j| dd} tj| |||d}||}tj| |||d}||d }td|d  |}	t	||	|  ||	|  fS )a  
    Selected confidence interval of the trimmed mean along the given axis.

    Parameters
    ----------
    data : array_like
        Input data.
    limits : {None, tuple}, optional
        None or a two item tuple.
        Tuple of the percentages to cut on each side of the array, with respect
        to the number of unmasked data, as floats between 0. and 1. If ``n``
        is the number of unmasked data before trimming, then
        (``n * limits[0]``)th smallest data and (``n * limits[1]``)th
        largest data are masked.  The total number of unmasked data after
        trimming is ``n * (1. - sum(limits))``.
        The value of one limit can be set to None to indicate an open interval.

        Defaults to (0.2, 0.2).
    inclusive : (2,) tuple of boolean, optional
        If relative==False, tuple indicating whether values exactly equal to
        the absolute limits are allowed.
        If relative==True, tuple indicating whether the number of data being
        masked on each side should be rounded (True) or truncated (False).

        Defaults to (True, True).
    alpha : float, optional
        Confidence level of the intervals.

        Defaults to 0.05.
    axis : int, optional
        Axis along which to cut. If None, uses a flattened version of `data`.

        Defaults to None.

    Returns
    -------
    trimmed_mean_ci : (2,) ndarray
        The lower and upper confidence intervals of the trimmed data.

    F)r8   )limits	inclusiverA   r   g       @)
r;   r<   mstatsZtrimrZmeanZtrimmed_stdecountr   ppfr   )
r*   rN   rO   alpharA   ZtrimmedZtmeanZtstdeZdfZtppfr5   r5   r6   r      s    *
c             C   sd   dd }t j| dd} | jdkr.td| j tj|ddd}|d	krP|| |S t ||| |S d	S )
a  
    Returns the Maritz-Jarrett estimators of the standard error of selected
    experimental quantiles of the data.

    Parameters
    ----------
    data : ndarray
        Data array.
    prob : sequence, optional
        Sequence of quantiles to compute.
    axis : int or None, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.

    c             S   s   t |  } | j}t || d t}tj}t 	t
|t}t jd|d td| }|d|  }xnt|D ]b\}}	|||	d ||	 |||	d ||	  }
t |
| }t |
| d }t ||d  ||< qpW |S )Ng      ?r   )r9   g      ?r   )r   r   r   r    r<   rE   r   r   r'   r!   r"   r   r%   r(   r)   rI   )r*   r2   r.   r+   r0   Zmjxyr1   mWZC1ZC2r5   r5   r6   _mjci_1D  s    (zmjci.<locals>._mjci_1DF)r8   r   zDArray 'data' must be at most two dimensional, but got data.ndim = %dr   )r8   r:   N)r;   r<   r=   r>   r   r?   )r*   r+   rA   rX   r2   r5   r5   r6   r      s    


c             C   sZ   t |d| }td|d  }tj| |dd|d}t| ||d}|||  |||  fS )a  
    Computes the alpha confidence interval for the selected quantiles of the
    data, with Maritz-Jarrett estimators.

    Parameters
    ----------
    data : ndarray
        Data array.
    prob : sequence, optional
        Sequence of quantiles to compute.
    alpha : float, optional
        Confidence level of the intervals.
    axis : int or None, optional
        Axis along which to compute the quantiles.
        If None, use a flattened array.

    Returns
    -------
    ci_lower : ndarray
        The lower boundaries of the confidence interval.  Of the same length as
        `prob`.
    ci_upper : ndarray
        The upper boundaries of the confidence interval.  Of the same length as
        `prob`.

    r   g       @r   )ZalphapZbetaprA   )rA   )minr   rR   rP   Z
mquantilesr   )r*   r+   rS   rA   zZxqZsmjr5   r5   r6   r   !  s
    c             C   sV   dd }t j| dd} |dkr*|| |}n(| jdkrBtd| j t ||| |}|S )aA  
    Computes the alpha-level confidence interval for the median of the data.

    Uses the Hettmasperger-Sheather method.

    Parameters
    ----------
    data : array_like
        Input data. Masked values are discarded. The input should be 1D only,
        or `axis` should be set to None.
    alpha : float, optional
        Confidence level of the intervals.
    axis : int or None, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.

    Returns
    -------
    median_cihs
        Alpha level confidence interval.

    c       	      S   s>  t |  } t| }t|d| }tt|d |d}t|| |dt|d |d }|d| k r|d8 }t|| |dt|d |d }t|| d |dt||d }|d | ||  }|| | t	||d|  |   }|| |  d| | |d    || || d   d| | ||    f}|S )Nr   g       @g      ?r   )
r   r   r   r"   rY   intr   Z_ppfr'   r&   )	r*   rS   r.   rG   ZgkZgkkIZlambdZlimsr5   r5   r6   _cihs_1DZ  s    $$$$*zmedian_cihs.<locals>._cihs_1DF)r8   Nr   zDArray 'data' must be at most two dimensional, but got data.ndim = %d)r;   r<   r=   r>   r?   )r*   rS   rA   r]   rB   r5   r5   r6   r
   C  s    

c             C   sn   t j| |dt j||d }}tj| |dtj||d }}t|| t |d |d   }dt| S )a+  
    Compares the medians from two independent groups along the given axis.

    The comparison is performed using the McKean-Schrader estimate of the
    standard error of the medians.

    Parameters
    ----------
    group_1 : array_like
        First dataset.  Has to be of size >=7.
    group_2 : array_like
        Second dataset.  Has to be of size >=7.
    axis : int, optional
        Axis along which the medians are estimated. If None, the arrays are
        flattened.  If `axis` is not None, then `group_1` and `group_2`
        should have the same shape.

    Returns
    -------
    compare_medians_ms : {float, ndarray}
        If `axis` is None, then returns a float, otherwise returns a 1-D
        ndarray of floats with a length equal to the length of `group_1`
        along `axis`.

    )rA   r   r   )	r;   ZmedianrP   Zstde_medianr   absrI   r   r'   )Zgroup_1Zgroup_2rA   Zmed_1Zmed_2Zstd_1Zstd_2rW   r5   r5   r6   r   v  s
    $c             C   s>   dd }t j| |dt} |dkr,|| S t ||| S dS )aC  
    Returns an estimate of the lower and upper quartiles.

    Uses the ideal fourths algorithm.

    Parameters
    ----------
    data : array_like
        Input array.
    axis : int, optional
        Axis along which the quartiles are estimated. If None, the arrays are
        flattened.

    Returns
    -------
    idealfourths : {list of floats, masked array}
        Returns the two internal values that divide `data` into four parts
        using the ideal fourths algorithm either along the flattened array
        (if `axis` is None) or along `axis` of `data`.

    c             S   s   |   }t|}|dk r$tjtjgS t|d d d\}}t|}d| ||d   |||   }|| }d| ||  |||d    }||gS )N   g      @g?r   )r   r"   r   r#   divmodr[   )r*   rT   r.   jhZqlorG   Zqupr5   r5   r6   _idf  s      zidealfourths.<locals>._idf)rA   N)r;   r   r   r   r?   )r*   rA   rc   r5   r5   r6   r	     s
    c             C   s   t j| dd} |dkr| }ntj|ddd}| jdkr>td|  }t| dd}d|d	 |d
   |d  }| dddf |dddf | kd
}| dddf |dddf | k d
}|| d| |  S )a  
    Evaluates Rosenblatt's shifted histogram estimators for each data point.

    Rosenblatt's estimator is a centered finite-difference approximation to the
    derivative of the empirical cumulative distribution function.

    Parameters
    ----------
    data : sequence
        Input data, should be 1-D. Masked values are ignored.
    points : sequence or None, optional
        Sequence of points where to evaluate Rosenblatt shifted histogram.
        If None, use the data.

    F)r8   Nr   )r8   r:   z#The input array should be 1D only !)rA   g333333?r   r   g?g       @)r;   r<   r   r=   AttributeErrorrQ   r	   sum)r*   Zpointsr.   rrb   ZnhiZnlor5   r5   r6   r     s    
**)r   F)rK   rL   rM   N)rM   N)N)N)N)!__doc__Z
__future__r   r   r   __all__Znumpyr   r   r   r   Znumpy.mar;   r    r   rP   Zscipy.stats.distributionsr   r   r   r   rC   r   r   r   r   r   r   r
   r   r	   r   r5   r5   r5   r6   <module>   s0   K
= 
2-"
3
!
(