ó
àYc           @@  sb  d  Z  d d l m Z d d l Z d d l Z d d l m Z d d l	 m
 Z
 d d l m Z d d l m Z m Z m Z m Z m Z m Z m Z m Z m Z d d	 l m Z m Z m Z d
 e f d „  ƒ  YZ d e f d „  ƒ  YZ d e f d „  ƒ  YZ d e f d „  ƒ  YZ d e f d „  ƒ  YZ d e f d „  ƒ  YZ  d e f d „  ƒ  YZ! d e f d „  ƒ  YZ" d e f d „  ƒ  YZ# d e f d „  ƒ  YZ$ d e f d „  ƒ  YZ% d  e f d! „  ƒ  YZ& d" e f d# „  ƒ  YZ' d$ e' f d% „  ƒ  YZ( d& e' f d' „  ƒ  YZ) d d d d d( „ Z+ i	 e d) 6e d* 6e d+ 6e d, 6e  d- 6e! d. 6e" d/ 6e# d0 6e$ d1 6Z, d S(2   sá   Statistical methods used to define or modify position of glyphs.

References:
    Wilkinson L. The Grammer of Graphics, sections 7, 7.1

Method Types:
    - Bin: Partitions a space before statistical calculation
    - Summary: Produces a single value comprising a statistical summary
    - Region: Produces two values bounding an interval.
    - Smooth: Produces values representing smoothed versions of the input data.
    - Link: Produces edges from pairs of nodes in a graph.

i    (   t   absolute_importN(   t   string_types(   t   ColumnDataSource(   t   HasProps(	   t   Boolt   Datet   Datetimet   Eithert   Floatt   Instancet   Intt   Listt   Stringi   (   t   Columnt   ColumnLabelt   EitherColumnt   Statc           B@  sÅ   e  Z d  Z e d d ƒ Z e e d d ƒZ e e	 e
 ƒ e	 e ƒ e	 e ƒ e	 e ƒ e	 e ƒ e	 e ƒ d d d d ƒZ e
 d d ƒ Z d „  Z d „  Z d d	 „ Z d d
 „ Z d „  Z d „  Z RS(   s³   Represents a statistical operation to summarize a column of data.

    Can be computed from either a ColumnLabel with a ColumnDataSource, *or*, a
    discrete column of data.
    t   helpsf   A column to use for the stat calculation. Required
        when providing a ColumnDataSource as input.sF   One option for providing the data
        source for stat calculation.t   defaults…   
                  Second option for providing values for stat calculation is by
                  passing the actual column of data.s{   The value calculated for the stat. Some stats could use
        multiple properties to provide the calculation if required.c         K@  sp   | j  d d  ƒ } | d  k	 rL t | t j ƒ r? t | ƒ } n  | | d <n  t t |  ƒ j |   |  j	 ƒ  d  S(   Nt   source(
   t   popt   Nonet
   isinstancet   pdt	   DataFrameR   t   superR   t   __init__t   _refresh(   t   selft
   propertiesR   (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR   /   s    c         C@  s-   |  j  ƒ  d k	 r) |  j ƒ  |  j ƒ  n  d S(   s;   Lazy update of properties, used for initial transform init.N(   t   get_dataR   t   updatet	   calculate(   R   (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR   9   s    
c         C@  su   t  | t j ƒ r! t | ƒ } n  t  | t ƒ rT | |  _ | d k	 r] | |  _ q] n	 | |  _ |  j ƒ  |  j	 ƒ  d S(   s8   Set data properties and update all dependent properties.N(
   R   R   R   R   R   R   t   columnt   valuesR   R    (   R   t   dataR!   (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyt   set_data?   s    		
c         C@  s´   |  j  d k	 r_ |  j d k	 s* | d k	 r_ | d k	 r? | } n	 |  j } t j |  j  j | ƒ S|  j d k r– |  j  d k	 r– t j |  j  j ƒ  j ƒ S|  j d k	 r¬ |  j Sd Sd S(   sA   Returns the available columnlabel/source values or column values.N(	   R   R   R!   R   t   SeriesR#   R"   t   to_dft   index(   R   R!   t   col(    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR   N   s    *		c         C@  s   t  d ƒ ‚ d S(   sF   Return transformed value from column label/source or column-like data.s;   You must implement the calculate method for each stat type.N(   t   NotImplementedError(   R   (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR    ^   s    c         C@  s   d S(   sD   Perform any initial work before the actual calculation is performed.N(    (   R   (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR   c   s    N(   t   __name__t
   __module__t   __doc__R   R!   R	   R   R   R   R   R   R
   R   R   R   R   R   R"   t   valueR   R   R$   R   R    R   (    (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR      s    			$			
		t   Sumc           B@  s   e  Z d  „  Z RS(   c         C@  s   |  j  ƒ  j ƒ  |  _ d  S(   N(   R   t   sumR-   (   R   (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR    i   s    (   R*   R+   R    (    (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR.   h   s   t   Meanc           B@  s   e  Z d  „  Z RS(   c         C@  s   |  j  ƒ  j ƒ  |  _ d  S(   N(   R   t   meanR-   (   R   (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR    n   s    (   R*   R+   R    (    (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR0   m   s   t   Countc           B@  s   e  Z d  „  Z RS(   c         C@  s   |  j  ƒ  j ƒ  |  _ d  S(   N(   R   t   countR-   (   R   (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR    s   s    (   R*   R+   R    (    (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR2   r   s   t   CountDistinctc           B@  s   e  Z d  „  Z RS(   c         C@  s   |  j  ƒ  j ƒ  |  _ d  S(   N(   R   t   nuniqueR-   (   R   (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR    x   s    (   R*   R+   R    (    (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR4   w   s   t   Medianc           B@  s   e  Z d  „  Z RS(   c         C@  s   |  j  ƒ  j ƒ  |  _ d  S(   N(   R   t   medianR-   (   R   (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR    }   s    (   R*   R+   R    (    (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR6   |   s   t   StdDeviationc           B@  s   e  Z d  „  Z RS(   c         C@  s   |  j  ƒ  j ƒ  |  _ d  S(   N(   R   t   stdR-   (   R   (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR    ‚   s    (   R*   R+   R    (    (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR8      s   t   Minc           B@  s   e  Z d  „  Z RS(   c         C@  s   |  j  ƒ  j ƒ  |  _ d  S(   N(   R   t   minR-   (   R   (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR    ‡   s    (   R*   R+   R    (    (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR:   †   s   t   Maxc           B@  s   e  Z d  „  Z RS(   c         C@  s   |  j  ƒ  j ƒ  |  _ d  S(   N(   R   t   maxR-   (   R   (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR    Œ   s    (   R*   R+   R    (    (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR<   ‹   s   t   Quantilec           B@  s&   e  Z d  Z e d d ƒ Z d „  Z RS(   sÖ   Produces the cutpoint that divides the input data by the interval.

    Quartiles are a special case of quartiles that divide a dataset into four
    equal-size groups. (https://en.wikipedia.org/wiki/Quantile)
    R   g      à?c         C@  s   |  j  ƒ  j |  j ƒ |  _ d  S(   N(   R   t   quantilet   intervalR-   (   R   (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR    ˜   s    (   R*   R+   R,   R   R@   R    (    (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR>      s   t   Binc           B@  sË   e  Z d  Z e e e e ƒ ƒ Z e e e e ƒ ƒ Z e e e e ƒ ƒ Z	 e ƒ  Z
 e ƒ  Z e e e e ƒ ƒ Z e e d e ƒ  ƒZ e ƒ  Z d d d „ Z e d „  ƒ Z d „  Z d „  Z d „  Z RS(   sA   Represents a single bin of data values and attributes of the bin.R   c         K@  s<  t  | t ƒ r t | ƒ } n* t  | t ƒ r9 | g } n t | ƒ g } | | d <|  j | ƒ } t | Œ  \ } } g  t | | ƒ D] \ } }	 | |	 d ^ qƒ }
 t | ƒ d k rÖ | d } | d } |
 d }
 n$ t | ƒ } t | ƒ } t |
 ƒ }
 | | d <| | d <|
 | d <| | d <t t	 |  ƒ j
 |   d  S(	   Nt   labelg       @i   i    t   startt   stopt   centerR"   (   R   t   tuplet   listR   t   strt   process_boundst   zipt   lenR   RA   R   (   R   t	   bin_labelR"   R   R   t   boundst   startst   stopsRC   RD   t   centers(    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR   ª   s*    
0





c         C@  s‰   |  j  d ƒ } g  | D]< } | j d d ƒ j d d ƒ j d d ƒ j d d ƒ ^ q } g  | D] } t | ƒ ^ q_ } | d | d f S(	   s.   Produce a consistent display of a bin of data.t   ,t   [t    t   ]t   (t   )i    i   (   t   splitt   replacet   float(   t   binst   value_chunkst   valR-   t
   bin_values(    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyt   binstr_to_listÇ   s    Ic         C@  sC   t  | t ƒ r/ g  | D] } |  j | ƒ ^ q S|  j | ƒ g Sd  S(   N(   R   RG   R^   (   R   RL   t   dim(    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyRI   Ð   s     c         C@  s   |  j  j |  j ƒ d  S(   N(   t   statR$   R"   (   R   (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR   Ö   s    c         C@  s   |  j  j |  _ d  S(   N(   R`   R-   (   R   (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR    Ù   s    N(   R*   R+   R,   R   R   R   RB   R   RC   RD   t   start_labelt
   stop_labelRE   R	   R   R2   R`   t   widthR   R   t   staticmethodR^   RI   R   R    (    (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyRA   œ   s   						t   BinStatsc           B@  s›   e  Z d  Z e e e e e ƒ d d d d ƒZ e d d d d ƒ Z	 e
 d d ƒ Z e
 d d ƒ Z e e ƒ Z d d d „ Z d	 „  Z d
 „  Z d „  Z RS(   sŽ   A set of statistical calculations for binning values.

    Bin counts using: https://en.wikipedia.org/wiki/Freedman%E2%80%93Diaconis_rule
    R   R   s  
    If bins is an int, it defines the number of equal-width bins in the
    given range. If bins is a sequence, it defines the
    bin edges, including the rightmost edge, allowing for non-uniform
    bin widths.

    (default: None, use Freedman-Diaconis rule)
    s#   Use Freedman-Diaconis rule if None.R@   g      Ð?g      è?c         K@  s4   | | d <| p d | d <t  t |  ƒ j |   d  S(   NR"   R!   (   R   Re   R   (   R   R"   R!   R   (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR   ï   s    
c         C@  sO   |  j  ƒ  } |  j j | ƒ |  j j | ƒ |  j d  k rK |  j | ƒ n  d  S(   N(   R   t   q1R$   t   q3RZ   R   t   calc_num_bins(   R   R"   (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR   õ   s
    c         C@  s¨   |  j  j |  j j } | d k r: t j | j ƒ |  _ n  d | t | ƒ d d |  _ t t j	 | j
 ƒ  | j ƒ  |  j ƒ ƒ |  _ |  j d k r¤ d |  _ n  d S(   s¤   Calculate optimal number of bins using IQR.

        From: http://stats.stackexchange.com/questions/114490/optimal-bin-width-for-two-dimensional-histogram

        i    i   g      ð?g      @i   i   N(   Rg   R-   Rf   t   npt   sqrtt   sizet	   bin_widthRK   t   intt   ceilR=   R;   RZ   (   R   R"   t   iqr(    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyRh   ü   s     /c         C@  s   d  S(   N(    (   R   (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR      s    N(   R*   R+   R,   R   R
   R   R   R   RZ   Rl   R>   Rf   Rg   R   t   labelsR   R   Rh   R    (    (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyRe   Ý   s   			t
   BinnedStatc           B@  s¶   e  Z d  Z e e d d ƒZ e e e ƒ d d ƒZ e e	 d e
 ƒ  d d ƒZ e ƒ  Z e ƒ  Z e d e ƒ Z e d e ƒ Z e ƒ  Z d
 d
 d
 d d
 d „ Z d „  Z d	 „  Z RS(   sc    Base class for shared functionality accross bins and aggregates
    dimensions for plotting.

    R   sW   
        A mapping between each dimension and associated binning calculations.
        sø   
        A list of the `Bin` instances that were produced as result of the inputs.
        Iterating over `Bins` will iterate over this list. Each `Bin` can be inspected
        for metadata about the bin and the values associated with it.
        R   sQ   
        The statistical operation to be used on the values in each bin.
        R3   c         K@  sp   t  | t ƒ r t | ƒ  } n  | p( d | d <| | d <| | d <| | d <| |  _ t t |  ƒ j |   d  S(   Nt   valsR!   R`   R"   R   (   R   RH   t   statst   _binsR   Rq   R   (   R   R"   R!   RZ   R`   R   R   (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR   .  s    


	c         C@  sh   i  } |  j  d  k	 r2 |  j  | d <|  j | d <n |  j d  k	 rQ |  j | d <n  |  j | d <t |   S(   NR   R!   R"   RZ   (   R   R   R!   R"   Rt   Re   (   R   t   stat_kwargs(    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyt	   _get_stat<  s    c         C@  s    |  j  ƒ  |  _ |  j j ƒ  d  S(   N(   Rv   t   bin_statR   (   R   (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR   J  s    N(   R*   R+   R,   R	   Re   Rw   R   RA   RZ   R   R2   R`   R   t
   bin_columnt   centers_columnR   t   Truet	   aggregatet   FalseR]   R   Rl   R   R   Rv   R   (    (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyRq     s    									t   Binsc           B@  s5   e  Z d  Z d „  Z d „  Z d „  Z e d „ Z RS(   sð   Bins and aggregates dimensions for plotting.

    Takes the inputs and produces a list of bins that can be iterated over and
    inspected for their metadata. The bins provide easy access to consistent labeling,
    bounds, and values.
    c      	   C@  sb  d } |  j  | |  _ g  } |  j j ƒ  } |  j j } | j d k  rU t d ƒ ‚ n  | j d k rÂ | j ƒ  d k rÂ d t	 t
 | d ƒ ƒ p“ d } t j | d | | d | | d ƒ } n  t j | | d t d t d	 d ƒ\ } } t j | d | d d ƒ |  _ |  j d  k	 rM|  j j | j ƒ  d
 |  j ƒ|  j j ƒ  } n& t j i |  j |  j  6| |  j 6ƒ } xL | j |  j ƒ D]8 \ }	 }
 | j t d |	 d |
 |  j  d |  j ƒ ƒ q†W| |  _ | j ƒ  } | j t ƒ } x' |  j D] } | j  | | | j! k <qðW|  j  d |  _" |  j d  k	 rQ|  j j | j ƒ  d
 |  j" ƒn | | |  j" <d  S(   Nt   _bini   s/   Histogram data must have at least two elements.i   i    g{®Gáz„?t   retbinst   include_lowestt	   precisiont   nameRL   R"   R`   t   _center(#   R!   Rx   Rw   R   RZ   Rk   t
   ValueErrort   ndimR9   t   absRY   Ri   t   linspaceR   t   cutRz   t   roundRl   R   R   t   addt   tolistR&   R   R"   t   groupbyt   appendRA   R`   t   copyt   astypeRH   RE   RB   Ry   (   R   t   bin_strt
   bin_modelsR#   RZ   t   margint   binnedt
   bin_boundst   dfR‚   t   groupRP   t   bin(    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR    W  s:    ! ,!&	"c         C@  s   |  j  | S(   N(   RZ   (   R   t   item(    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyt   __getitem__…  s    c         C@  s   |  j  | j ƒ |  j j ƒ  S(   N(   R$   R   R&   (   R   R#   (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyt   applyˆ  s    c         C@  s>   |  j  d  k	 r: t t |  j  d d „  d | ƒƒ |  _  n  d  S(   Nt   keyc         S@  s   |  j  S(   N(   RE   (   t   x(    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyt   <lambda>Ž  s    t   reverse(   RZ   R   RG   t   sorted(   R   t	   ascending(    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyt   sortŒ  s    (   R*   R+   R,   R    R™   Rš   Rz   R¡   (    (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR}   O  s
   	.		t	   Histogramc           B@  s)   e  Z d  Z e e d d ƒZ d „  Z RS(   sð   Bins and aggregates dimensions for plotting.

    Takes the inputs and produces a list of bins that can be iterated over and
    inspected for their metadata. The bins provide easy access to consistent labeling,
    bounds, and values.
    R   sf  
    Whether to normalize the histogram.

    If True, the result is the value of the probability *density* function
    at the bin, normalized such that the *integral* over the range is 1. If
    False, the result will contain the number of samples in each bin.

    For more info check ``numpy.histogram`` function documentation.

    (default: False)
    c   
      C@  s;  d } |  j  | |  _ |  j j ƒ  } |  j j } t j t j | ƒ d |  j d | ƒ\ } } t j	 | d | d d ƒ |  _
 g  |  _ x¬ t | ƒ D]ž \ } } | | d | | } | d k râ d | | | | d f }	 n d | | | | d f }	 |  j j t d	 |	 d
 | | g d t ƒ  d | ƒ ƒ q• Wd  S(   NR~   t   densityRZ   i   i   i    s   [%f, %f]s   (%f, %f]RL   R"   R`   Rc   (   R!   Rx   Rw   R   RZ   Ri   t	   histogramt   arrayR£   R‰   Rl   t	   enumerateR   RA   R<   (
   R   R   R#   RZ   R“   R”   t   it   bRc   t   lbl(    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR    ¦  s    '!	+(   R*   R+   R,   R   R|   R£   R    (    (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyR¢   ’  s   	
	c         K@  s@   t  |  t ƒ r |  } d } n d } t d | d | d | |  S(   s8   Specify binning or bins to be used for column or values.R"   R!   RZ   N(   R   RH   R   R}   (   R#   R"   R!   RZ   Rp   t   kwargs(    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyRZ   ¾  s
    	R/   R1   R3   R5   R7   t   stddevR;   R=   R?   (-   R,   t
   __future__R    t   numpyRi   t   pandasR   t   sixR   t   bokeh.models.sourcesR   t   bokeh.core.has_propsR   t   bokeh.core.propertiesR   R   R   R   R   R	   R
   R   R   R   R   R   R   R   R.   R0   R2   R4   R6   R8   R:   R<   R>   RA   Re   Rq   R}   R¢   R   RZ   Rs   (    (    (    s-   lib/python2.7/site-packages/bkcharts/stats.pyt   <module>   sD   @NA5=C,