B
    T\	                 @   sX   d Z ddlmZmZmZ ddlZddlZddl	m
Z
 dd Zdd	 Zd
d Zdd ZdS )uc  Implementation of HyperLogLog

This implements the HyperLogLog algorithm for cardinality estimation, found
in

    Philippe Flajolet, Éric Fusy, Olivier Gandouet and Frédéric Meunier.
        "HyperLogLog: the analysis of a near-optimal cardinality estimation
        algorithm". 2007 Conference on Analysis of Algorithms. Nice, France
        (2007)

    )absolute_importdivisionprint_functionN   )hash_pandas_objectc             C   s<   t j| dt d> }|jddt j}d|jdd S )zGCompute the position of the first nonzero bit for each int in an array.r       )axis!   )npZbitwise_andZouterarangeZcumsumastypeboolsum)abits r   9lib/python3.7/site-packages/dask/dataframe/hyperloglog.pycompute_first_bit   s    r   c       	      C   s   d|  krdksn t dd| }d|> }t| dd}t|tjrL|j}|tj}||? }t	|}t
||d}|d	 d
 }|jt|ddjtjS )N      zb should be between 8 and 16r   r   F)index)j	first_bitr   r   r   )Z
fill_value)
ValueErrorr   
isinstancepdZSeriesZ_valuesr   r
   Zuint32r   Z	DataFramegroupbymaxZreindexr   valuesZuint8)	objbZnum_bits_discardedmZhashesr   r   ZdfZseriesr   r   r   compute_hll_array   s    r"   c             C   s(   d|> }|  t| | |} | jddS )Nr   r   )r   )Zreshapelenr   )Msr    r!   r   r   r   reduce_state8   s    r%   c             C   s   d|> }t | |}ddd|   }|| d|d    | }|d| k rp|dk }|rp|t||  S |dkrd	t| d
  S |S )Nr   gZӼ?g$C?g       @Zf8g      @r   gAl     l        )r%   r   r   r
   logZlog1p)r$   r    r!   MZalphaEVr   r   r   estimate_countA   s    
 r*   )__doc__Z
__future__r   r   r   Znumpyr
   Zpandasr   Zhashingr   r   r"   r%   r*   r   r   r   r   <module>   s   	