B
    >?ð[Ü	  ã               @   sL   d Z ddlmZ dd„ ZG dd„ deƒZG dd„ deƒZG d	d
„ d
eƒZdS )z…Smoothing algorithms for language modeling.

According to Chen & Goodman 1995 these should work with both Backoff and
Interpolation.
é    )Ú	Smoothingc             C   s   t dd„ |  ¡ D ƒƒS )Nc             s   s   | ]}|d krdV  qdS )r   g      ð?N© )Ú.0Úcr   r   ú0lib/python3.7/site-packages/nltk/lm/smoothing.pyú	<genexpr>   s    z'_count_non_zero_vals.<locals>.<genexpr>)ÚsumÚvalues)Z
dictionaryr   r   r   Ú_count_non_zero_vals   s    r
   c                   sB   e Zd ZdZd‡ fdd„	Zdd„ Zdd„ Zd	d
„ Zdd„ Z‡  Z	S )Ú
WittenBellzWitten-Bell smoothing.çš™™™™™¹?c                s"   t t| ƒj||f|žŽ  || _d S )N)Úsuperr   Ú__init__Úcounts)ÚselfÚ
vocabularyÚcounterÚdiscountÚkwargs)Ú	__class__r   r   r      s    zWittenBell.__init__c             C   s"   |   |¡}d| |  ||¡ |fS )Ng      ð?)ÚgammaÚalpha)r   ÚwordÚcontextr   r   r   r   Úalpha_gamma   s    
zWittenBell.alpha_gammac             C   s   | j j |¡S )N)r   ZunigramsÚfreq)r   r   r   r   r   Úunigram_score    s    zWittenBell.unigram_scorec             C   s   | j |  |¡S )N)r   r   )r   r   r   r   r   r   r   #   s    zWittenBell.alphac             C   s,   t | j| ƒ}||| jt|ƒd   ¡   S )Né   )r
   r   ÚlenÚN)r   r   Zn_plusr   r   r   r   &   s    zWittenBell.gamma)r   )
Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   r   r   r   Ú__classcell__r   r   )r   r   r      s   r   c                   sB   e Zd ZdZd‡ fdd„	Zdd„ Zdd„ Zd	d
„ Zdd„ Z‡  Z	S )Ú	KneserNeyzKneser-Ney Smoothing.çš™™™™™¹?c                s(   t t| ƒj||f|žŽ  || _|| _d S )N)r   r%   r   r   r   )r   r   r   r   r   )r   r   r   r   .   s    zKneserNey.__init__c             C   s   dt | jƒ S )Ng      ð?)r   r   )r   r   r   r   r   r   3   s    zKneserNey.unigram_scorec             C   s    | j | }|  ||¡|  |¡fS )N)r   r   r   )r   r   r   Úprefix_countsr   r   r   r   6   s    
zKneserNey.alpha_gammac             C   s   t || | j dƒ| ¡  S )Ng        )Úmaxr   r   )r   r   r'   r   r   r   r   :   s    zKneserNey.alphac             C   s   | j t|ƒ | ¡  S )N)r   r
   r   )r   r'   r   r   r   r   =   s    zKneserNey.gamma)r&   )
r    r!   r"   r#   r   r   r   r   r   r$   r   r   )r   r   r%   +   s   r%   c                   s(   e Zd ZdZ‡ fdd„Zdd„ Z‡  ZS )Ú
GoodTuringzGood-Turing Smoothingc                s(   t t| ƒj||f|žŽ  || _|| _d S )N)r   r)   r   r   r   )r   r   r   r   )r   r   r   r   C   s    zGoodTuring.__init__c             C   sL   | j | }d}x.| j  ¡ D ] }t| ¡ ƒ|d kr|d7 }qW |t| jƒ S )Ng        r   )r   Úkeysr   Úsplitr   )r   r   Z
word_countZcount_plus_1ZeveryContextr   r   r   r   H   s    
zGoodTuring.unigram_score)r    r!   r"   r#   r   r   r$   r   r   )r   r   r)   A   s   r)   N)r#   Znltk.lm.apir   r
   r   r%   r)   r   r   r   r   Ú<module>   s
   