B
    x\Q                 @   s   d dl mZ d dlZd dlmZmZmZmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZmZ d*ddZdd Zd+ddZe
dddd,ddZd-ddZe
ddde
ddd d!d.d#d$Zd/d&d'Zd0d(d)ZdS )1    )divisionN)lmaplrangerangezip)deprecate_kwarg)notna)pprint_thing)_get_standard_colors)_set_ticks_props	_subplots      ?Fhist.皙?c
       #      K   s6  |   }|jj}|| }t|||dd\}}|jddd t|}t|}|pPi }|pXi }|
dd g }x^|jD ]T}|| j|| j }t	
|t	| }}|| |	 d }||| || f qrW x|tt||jD ]f\}}xZtt||jD ]D\}}|||f }||kr|| j|| j }|dkrJ|j|f| nN|d	krdd
lm} |}||}t	|
 | d}|j|||f| |||  nX|| || @ j}|j|| | || | f||d|
 |||  |||  || || |dkr*|jd ||d kr|jd qW qW t|jdkr |d }|d d j } | |d | k| |d k@  } | |d  |d |d   }!|d d  }"|!|"d |"d   |"d  }!|d d j |! t	!| | "t#kr| "t#} |d d j$|  t%|ddddd |S )a  
    Draw a matrix of scatter plots.

    Parameters
    ----------
    frame : DataFrame
    alpha : float, optional
        amount of transparency applied
    figsize : (float,float), optional
        a tuple (width, height) in inches
    ax : Matplotlib axis object, optional
    grid : bool, optional
        setting this to True will show the grid
    diagonal : {'hist', 'kde'}
        pick between 'kde' and 'hist' for
        either Kernel Density Estimation or Histogram
        plot in the diagonal
    marker : str, optional
        Matplotlib marker type, default '.'
    hist_kwds : other plotting keyword arguments
        To be passed to hist function
    density_kwds : other plotting keyword arguments
        To be passed to kernel density estimate plot
    range_padding : float, optional
        relative extension of axis range in x and y
        with respect to (x_max - x_min) or (y_max - y_min),
        default 0.05
    kwds : other plotting keyword arguments
        To be passed to scatter function

    Examples
    --------
    >>> df = pd.DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D'])
    >>> scatter_matrix(df, alpha=0.2)
    F)naxesfigsizeaxZsqueezer   )ZwspaceZhspaceZ
edgecolorsnoneg       @r   )ZkdeZdensity)gaussian_kdei  )markeralpha      Z   )Z
xlabelsizeZxrotZ
ylabelsizeZyrot)&Z_get_numeric_datacolumnssizer   Zsubplots_adjustr   _get_marker_compat
setdefaultvaluesnpminmaxappendr   r   r   Zscipy.statsr   linspaceplotZevaluateset_xlimscatterZset_ylim
set_xlabel
set_ylabelZyaxisZset_visibleZxaxislenZget_majorticklocsZget_ylimZ	set_ticksallZastypeintZset_ticklabelsr   )#framer   r   r   gridZdiagonalr   Zdensity_kwdsZ	hist_kwdsZrange_paddingkwdsdfnr   figaxesmaskZboundaries_listar   Zrmin_Zrmax_Z
rdelta_extijbr   yZgkdeZindcommonZlim1ZlocsZadjZlim0 r;   4lib/python3.7/site-packages/pandas/plotting/_misc.pyscatter_matrix   sn    '






r=   c             C   s   dd l m} | |jkrdS | S )Nr   o)Zmatplotlib.lineslinesZlineMarkers)r   Zmlinesr;   r;   r<   r      s    
r   c          	      s  ddl m} ddlm} dd }t| }	| |  }
| | }| j|dd|}|dkrn|jddgddgd}i }t	t|
|d	|d
}x|
D ]}g g g||< qW t| j
d  tdd  fddt D D }xt|	D ]x}|j| j}tjtj|ddddd}|| jdd|  }|j| }|| d |d  || d |d  qW xHt|
D ]<\}}|j|| d || d f|| t|d| qdW |  ||jdddd x@t||j
D ].\}}||j|ddd |d dk r6|d dk r6|j|d d |d d |dddd n|d dk rz|d dkrz|j|d d |d d |dddd n|d dkr|d dk r|j|d d |d d |dddd nB|d dkr|d dkr|j|d d |d d |dddd qW |d |S )as	  
    Plot a multidimensional dataset in 2D.

    Each Series in the DataFrame is represented as a evenly distributed
    slice on a circle. Each data point is rendered in the circle according to
    the value on each Series. Highly correlated `Series` in the `DataFrame`
    are placed closer on the unit circle.

    RadViz allow to project a N-dimensional data set into a 2D space where the
    influence of each dimension can be interpreted as a balance between the
    influence of all dimensions.

    More info available at the `original article
    <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.135.889>`_
    describing RadViz.

    Parameters
    ----------
    frame : `DataFrame`
        Pandas object holding the data.
    class_column : str
        Column name containing the name of the data point category.
    ax : :class:`matplotlib.axes.Axes`, optional
        A plot instance to which to add the information.
    color : list[str] or tuple[str], optional
        Assign a color to each category. Example: ['blue', 'green'].
    colormap : str or :class:`matplotlib.colors.Colormap`, default None
        Colormap to select colors from. If string, load colormap with that
        name from matplotlib.
    kwds : optional
        Options to pass to matplotlib scatter plotting method.

    Returns
    -------
    axes : :class:`matplotlib.axes.Axes`

    See Also
    --------
    pandas.plotting.andrews_curves : Plot clustering visualization.

    Examples
    --------
    .. plot::
        :context: close-figs

        >>> df = pd.DataFrame({
        ...         'SepalLength': [6.5, 7.7, 5.1, 5.8, 7.6, 5.0, 5.4, 4.6,
        ...                         6.7, 4.6],
        ...         'SepalWidth': [3.0, 3.8, 3.8, 2.7, 3.0, 2.3, 3.0, 3.2,
        ...                        3.3, 3.6],
        ...         'PetalLength': [5.5, 6.7, 1.9, 5.1, 6.6, 3.3, 4.5, 1.4,
        ...                         5.7, 1.0],
        ...         'PetalWidth': [1.8, 2.2, 0.4, 1.9, 2.1, 1.0, 1.5, 0.2,
        ...                        2.1, 0.2],
        ...         'Category': ['virginica', 'virginica', 'setosa',
        ...                      'virginica', 'virginica', 'versicolor',
        ...                      'versicolor', 'setosa', 'virginica',
        ...                      'setosa']
        ...     })
        >>> rad_viz = pd.plotting.radviz(df, 'Category')  # doctest: +SKIP
    r   Nc             S   s    t | }t| }| | ||  S )N)r!   r"   )seriesr5   r8   r;   r;   r<   	normalize   s    zradviz.<locals>.normalizer   )axis)xlimylimrandom)
num_colorscolormap
color_typecolorc             S   s    g | ]}t |t |fqS r;   )r    cossin).0tr;   r;   r<   
<listcomp>   s   zradviz.<locals>.<listcomp>c                s"   g | ]}d t j |t   qS )g       @)r    pifloat)rM   r6   )mr;   r<   rO      s      )rJ   label)g        g        g      ?r   )ZradiusZ	facecolorg?Zgrayg        righttopZsmall)ZhaZvar   ZbottomleftZequal)matplotlib.pyplotpyplotZmatplotlib.patchespatchesr*   drop_duplicatesdropZapplygcar
   r   r    arrayr   ilocr   repeatZexpand_dimssumiatr#   	enumerater'   r	   legendZ	add_patchZCircler   textrB   )r-   class_columnr   rJ   rH   r/   pltrZ   rA   r1   classes	class_colr0   Zto_plotcolorsklssr6   rowZrow_r9   Zxynamer;   )rR   r<   radviz   s\    >





 
ro   datar-   )old_arg_namenew_arg_name   c                sL  ddl m m} ddlm}  fdd}	t| }
| | }| |  }| j|dd}t	| ||}t
 }tt||d|d	}tt||}|dkr|j| |fd
}xt|
D ]}|j| j}|	|}||}|j| }t|}||kr|| |j||f|| |d| q|j||fd|| i| qW |jdd |  |S )a>  
    Generates a matplotlib plot of Andrews curves, for visualising clusters of
    multivariate data.

    Andrews curves have the functional form:

    f(t) = x_1/sqrt(2) + x_2 sin(t) + x_3 cos(t) +
           x_4 sin(2t) + x_5 cos(2t) + ...

    Where x coefficients correspond to the values of each dimension and t is
    linearly spaced between -pi and +pi. Each row of frame then corresponds to
    a single curve.

    Parameters
    ----------
    frame : DataFrame
        Data to be plotted, preferably normalized to (0.0, 1.0)
    class_column : Name of the column containing class names
    ax : matplotlib axes object, default None
    samples : Number of points to plot in each curve
    color : list or tuple, optional
        Colors to use for the different classes
    colormap : str or matplotlib colormap object, default None
        Colormap to select colors from. If string, load colormap with that name
        from matplotlib.
    kwds : keywords
        Options to pass to matplotlib plotting method

    Returns
    -------
    ax : Matplotlib axis object

    r   )sqrtrP   Nc                s    fdd}|S )Nc                s    d }|d }t t  d}|t|jd d d t d|jd d }t || }|t j	|d d dt j
f t | |d d dt j
f t |  dd7 }|S )Nr   g       @r   rS   )rB   )r    deletecopyZresizer,   r   arangeshapeZouterra   ZnewaxisrL   rK   )rN   Zx1resultZcoeffsZ	harmonicsZ	trig_args)
amplitudesrt   r;   r<   f8  s    "
z+andrews_curves.<locals>.function.<locals>.fr;   )rz   r{   )rt   )rz   r<   function7  s    z andrews_curves.<locals>.functionr   )rB   rF   )rG   rH   rI   rJ   )rD   )rJ   rT   rJ   zupper right)loc)Zmathrt   rP   rX   rY   r*   r[   r\   r    r$   setr
   dictr   r]   r   r_   r   rb   r	   addr%   rd   r.   )r-   rf   r   samplesrJ   rH   r/   rP   rg   r|   r1   ri   rh   r0   rN   used_legendscolor_valuesrj   r6   rm   r{   r9   rk   rT   r;   )rt   r<   andrews_curves  s8    $


  r   2     c                s  ddl ddlm} t| j  fddt|D }tdd |D }tdd |D }tdd |D }	|dkr| }t	|}
g }|
dd	d
}|d || |j|
|f| |
dd	d}|d || |j|
|f| |
dd	d	}|d || |j|
|	f| |
dd	d}|d || |j|f| |
dd	d}|d || |j|f| |
dd	d}|d || |j|	f| x2|D ]*}|j| dd |j| dd qW |S )a  
    Bootstrap plot on mean, median and mid-range statistics.

    The bootstrap plot is used to estimate the uncertainty of a statistic
    by relaying on random sampling with replacement [1]_. This function will
    generate bootstrapping plots for mean, median and mid-range statistics
    for the given number of samples of the given size.

    .. [1] "Bootstrapping (statistics)" in     https://en.wikipedia.org/wiki/Bootstrapping_%28statistics%29

    Parameters
    ----------
    series : pandas.Series
        Pandas Series from where to get the samplings for the bootstrapping.
    fig : matplotlib.figure.Figure, default None
        If given, it will use the `fig` reference for plotting instead of
        creating a new one with default parameters.
    size : int, default 50
        Number of data points to consider during each sampling. It must be
        greater or equal than the length of the `series`.
    samples : int, default 500
        Number of times the bootstrap procedure is performed.
    **kwds :
        Options to pass to matplotlib plotting method.

    Returns
    -------
    fig : matplotlib.figure.Figure
        Matplotlib figure

    See Also
    --------
    pandas.DataFrame.plot : Basic plotting for DataFrame objects.
    pandas.Series.plot : Basic plotting for Series objects.

    Examples
    --------

    .. plot::
            :context: close-figs

            >>> s = pd.Series(np.random.uniform(size=100))
            >>> fig = pd.plotting.bootstrap_plot(s)  # doctest: +SKIP
    r   Nc                s   g | ]}  qS r;   )Zsample)rM   _)rp   rF   r   r;   r<   rO     s    z"bootstrap_plot.<locals>.<listcomp>c             S   s   g | ]}t |qS r;   )r    mean)rM   samplingr;   r;   r<   rO     s    c             S   s   g | ]}t |qS r;   )r    Zmedian)rM   r   r;   r;   r<   rO     s    c             S   s    g | ]}t |t| d  qS )g      ?)r!   r"   )rM   r   r;   r;   r<   rO     s   rS      r   ZSample   ZMean   ZMedian   ZMidranger   )Zfontsize)rF   rX   rY   listr   r   r    r^   Zfigurer   Zadd_subplotr(   r#   r%   r   ZsetpZget_xticklabelsZget_yticklabels)r@   r2   r   r   r/   rg   Z	samplingsZmeansZmediansZ	midrangesxr3   Zax1Zax2Zax3Zax4Zax5Zax6rB   r;   )rp   rF   r   r<   bootstrap_plotk  sP    .















r   rj   rJ   r   )rq   rr   
stacklevelTc             K   s  |	dkrddd}	ddl m} t| }| |  }| | }|dkrR| j|dd}n| | }t }t|j}|dkrtt	t
|jstd|j}nD|dk	rtt	|std	nt||krtd
|}nt|}|dkr| }tt||d|d}|
rt|}t|}tt||}xt|D ]t}|j| j}|j| }t|}||kr|| |j||f|| |d| n|j||fd|| i| q.W |rx|D ]}|j|f|	 qW || ||j ||d |d  |jdd |  |S )a  Parallel coordinates plotting.

    Parameters
    ----------
    frame : DataFrame
    class_column : str
        Column name containing class names
    cols : list, optional
        A list of column names to use
    ax : matplotlib.axis, optional
        matplotlib axis object
    color : list or tuple, optional
        Colors to use for the different classes
    use_columns : bool, optional
        If true, columns will be used as xticks
    xticks : list or tuple, optional
        A list of values to use for xticks
    colormap : str or matplotlib colormap, default None
        Colormap to use for line colors.
    axvlines : bool, optional
        If true, vertical lines will be added at each xtick
    axvlines_kwds : keywords, optional
        Options to be passed to axvline method for vertical lines
    sort_labels : bool, False
        Sort class_column labels, useful when assigning colors

        .. versionadded:: 0.20.0

    kwds : keywords
        Options to pass to matplotlib plotting method

    Returns
    -------
    ax: matplotlib axis object

    Examples
    --------
    >>> from matplotlib import pyplot as plt
    >>> df = pd.read_csv('https://raw.github.com/pandas-dev/pandas/master'
                        '/pandas/tests/data/iris.csv')
    >>> pd.plotting.parallel_coordinates(
            df, 'Name',
            color=('#556270', '#4ECDC4', '#C7F464'))
    >>> plt.show()
    Nr   black)Z	linewidthrJ   r   )rB   Tz,Columns must be numeric to be used as xticksz xticks specified must be numericz-Length of xticks must match number of columnsrF   )rG   rH   rI   rJ   )rJ   rT   rJ   rC   zupper right)r}   )rX   rY   r*   r[   r\   r~   r   r    r+   Zisrealr   
ValueErrorr   r]   r
   sortedr   r   r   r_   r   rb   r	   r   r%   ZaxvlineZ
set_xticksZset_xticklabelsr&   rd   r.   )r-   rf   Zcolsr   rJ   Zuse_columnsZxticksrH   ZaxvlinesZaxvlines_kwdsZsort_labelsr/   rg   r1   rh   ri   r0   r   Zncolsr   r   rj   r6   r9   rk   rT   r;   r;   r<   parallel_coordinates  s`    3





 "

r   r   c             K   s~   ddl m} |d|jd  | j}|d|  }||d }|dkrN| }|d |dj|d |j	||f| |S )a&  Lag plot for time series.

    Parameters
    ----------
    series : Time series
    lag : lag of the scatter plot, default 1
    ax : Matplotlib axis object, optional
    kwds : Matplotlib scatter method keyword arguments, optional

    Returns
    -------
    ax: Matplotlib axis object
    r   Nczpatch.facecolorzy(t)zy(t + {lag}))lag)
rX   rY   r   ZrcParamsr   r]   r(   r)   formatr'   )r@   r   r   r/   rg   rp   Zy1Zy2r;   r;   r<   lag_plot:  s    
r   c       	         sF  ddl m} t| t| |dkr8|jdfdd}tt d t   fdd}t	d }t
||}d	}d
}|j|t ddd |j|t dd |jddd |j| t dd |j| t ddd |d |d |j||f| d|kr:|  |  |S )a  Autocorrelation plot for time series.

    Parameters:
    -----------
    series: Time series
    ax: Matplotlib axis object, optional
    kwds : keywords
        Options to pass to matplotlib plotting method

    Returns:
    -----------
    ax: Matplotlib axis object
    r   Nr   )g      g      ?)rD   rE   rS   c                s4   d |    | d      t   S )N)ra   rQ   )h)c0rp   r   r1   r;   r<   rn  s    zautocorrelation_plot.<locals>.rg1\?g`dL@z--Zgrey)r9   Z	linestylerJ   )r9   rJ   g        r   ZLagZAutocorrelationrT   )rX   rY   r*   r    Zasarrayr]   r   ra   rQ   rw   r   Zaxhlinert   r(   r)   r%   rd   r.   )	r@   r   r/   rg   r   r   r9   Zz95Zz99r;   )r   rp   r   r1   r<   autocorrelation_plotX  s0    





r   )	r   NNFr   r   NNr   )NNN)Nrs   NN)Nr   r   )	NNNFNNTNF)r   N)N)Z
__future__r   Znumpyr    Zpandas.compatr   r   r   r   Zpandas.util._decoratorsr   Zpandas.core.dtypes.missingr   Zpandas.io.formats.printingr	   Zpandas.plotting._styler
   Zpandas.plotting._toolsr   r   r=   r   ro   r   r   r   r   r   r;   r;   r;   r<   <module>   s.     
x
~
 Y
[
  p
