ó
ù`]c           @  s7  d  Z  d d l m Z m Z d d l Z d d l m Z m Z d d l m Z d d l	 m
 Z
 d d l m Z d d l m Z d d	 l m Z y d d
 l m Z Wn, e k
 rÐ d d l Z e j d „ Z n Xe
 e ƒ d e f d „  ƒ  Yƒ Z d „  Z d „  Z d d „ Z e
 e ƒ d e f d „  ƒ  Yƒ Z d S(   u   Language Model Interface.iÿÿÿÿ(   t   divisiont   unicode_literalsN(   t   ABCMetat   abstractmethod(   t   bisect(   t   add_metaclass(   t   NgramCounter(   t	   log_base2(   t
   Vocabulary(   t
   accumulatec         c  s_   t  |  ƒ } y t | ƒ } Wn t k
 r0 d SX| Vx" | D] } | | | ƒ } | Vq= Wd S(   u   Return running totalsN(   t   itert   nextt   StopIteration(   t   iterablet   funct   itt   totalt   element(    (    s*   lib/python2.7/site-packages/nltk/lm/api.pyR	      s    t	   Smoothingc           B  s5   e  Z d  Z d „  Z e d „  ƒ Z e d „  ƒ Z RS(   uë   Ngram Smoothing Interface

    Implements Chen & Goodman 1995's idea that all smoothing algorithms have
    certain features in common. This should ideally allow smoothing algoritms to
    work both with Backoff and Interpolation.
    c         C  s   | |  _  | |  _ d S(   uä   
        :param vocabulary: The Ngram vocabulary object.
        :type vocabulary: nltk.lm.vocab.Vocabulary
        :param counter: The counts of the vocabulary items.
        :type counter: nltk.lm.counter.NgramCounter
        N(   t   vocabt   counts(   t   selft
   vocabularyt   counter(    (    s*   lib/python2.7/site-packages/nltk/lm/api.pyt   __init__2   s    	c         C  s   t  ƒ  ‚ d  S(   N(   t   NotImplementedError(   R   t   word(    (    s*   lib/python2.7/site-packages/nltk/lm/api.pyt   unigram_score<   s    c         C  s   t  ƒ  ‚ d  S(   N(   R   (   R   R   t   context(    (    s*   lib/python2.7/site-packages/nltk/lm/api.pyt   alpha_gamma@   s    (   t   __name__t
   __module__t   __doc__R   R   R   R   (    (    (    s*   lib/python2.7/site-packages/nltk/lm/api.pyR   )   s   	
c         C  s   t  |  ƒ t |  ƒ S(   u0   Return average (aka mean) for sequence of items.(   t   sumt   len(   t   items(    (    s*   lib/python2.7/site-packages/nltk/lm/api.pyt   _meanE   s    c         C  s#   t  |  t j ƒ r |  St j |  ƒ S(   N(   t
   isinstancet   randomt   Random(   t   seed_or_generator(    (    s*   lib/python2.7/site-packages/nltk/lm/api.pyt   _random_generatorJ   s    c         C  sy   |  s t  d ƒ ‚ n  t |  ƒ t | ƒ k r< t  d ƒ ‚ n  t t | ƒ ƒ } | d } | j ƒ  } |  t | | | ƒ S(   u`   Like random.choice, but with weights.

    Heavily inspired by python 3.6 `random.choices`.
    u"   Can't choose from empty populationu3   The number of weights does not match the populationiÿÿÿÿ(   t
   ValueErrorR"   t   listR	   R&   R   (   t
   populationt   weightst   random_generatort   cum_weightsR   t	   threshold(    (    s*   lib/python2.7/site-packages/nltk/lm/api.pyt   _weighted_choiceP   s    
t   LanguageModelc           B  s€   e  Z d  Z d d d „ Z d d „ Z d d „ Z e d d „ ƒ Z d d „ Z	 d „  Z
 d „  Z d „  Z d	 d d d
 „ Z RS(   uK   ABC for Language Models.

    Cannot be directly instantiated itself.

    c         C  sI   | |  _  | d k r t ƒ  n | |  _ | d k r< t ƒ  n | |  _ d S(   u}  Creates new LanguageModel.

        :param vocabulary: If provided, this vocabulary will be used instead
        of creating a new one when training.
        :type vocabulary: `nltk.lm.Vocabulary` or None
        :param counter: If provided, use this object to count ngrams.
        :type vocabulary: `nltk.lm.NgramCounter` or None
        :param ngrams_fn: If given, defines how sentences in training text are turned to ngram
                          sequences.
        :type ngrams_fn: function or None
        :param pad_fn: If given, defines how senteces in training text are padded.
        :type pad_fn: function or None

        N(   t   ordert   NoneR   R   R   R   (   R   R3   R   R   (    (    s*   lib/python2.7/site-packages/nltk/lm/api.pyR   g   s    	c           s[   ˆ  j  s7 | d k r$ t d ƒ ‚ n  ˆ  j  j | ƒ n  ˆ  j j ‡  f d †  | Dƒ ƒ d S(   ue   Trains the model on a text.

        :param text: Training text as a sequence of sentences.

        u:   Cannot fit without a vocabulary or text to create it from.c         3  s!   |  ] } ˆ  j  j | ƒ Vq d  S(   N(   R   t   lookup(   t   .0t   sent(   R   (    s*   lib/python2.7/site-packages/nltk/lm/api.pys	   <genexpr>†   s    N(   R   R4   R*   t   updateR   (   R   t   textt   vocabulary_text(    (   R   s*   lib/python2.7/site-packages/nltk/lm/api.pyt   fitz   s    	c         C  s4   |  j  |  j j | ƒ | r- |  j j | ƒ n d ƒ S(   u©   Masks out of vocab (OOV) words and computes their model score.

        For model-specific logic of calculating scores, see the `unmasked_score`
        method.
        N(   t   unmasked_scoreR   R5   R4   (   R   R   R   (    (    s*   lib/python2.7/site-packages/nltk/lm/api.pyt   scoreˆ   s    c         C  s   t  ƒ  ‚ d S(   uÏ  Score a word given some optional context.

        Concrete models are expected to provide an implementation.
        Note that this method does not mask its arguments with the OOV label.
        Use the `score` method for that.

        :param str word: Word for which we want the score
        :param tuple(str) context: Context the word is in.
        If `None`, compute unigram score.
        :param context: tuple(str) or None
        :rtype: float

        N(   R   (   R   R   R   (    (    s*   lib/python2.7/site-packages/nltk/lm/api.pyR<   ’   s    c         C  s   t  |  j | | ƒ ƒ S(   u‡   Evaluate the log score of this word in this context.

        The arguments are the same as for `score` and `unmasked_score`.

        (   R   R=   (   R   R   R   (    (    s*   lib/python2.7/site-packages/nltk/lm/api.pyt   logscore£   s    c         C  s)   | r |  j  t | ƒ d | S|  j  j S(   u²   Helper method for retrieving counts for a given context.

        Assumes context has been checked and oov words in it masked.
        :type context: tuple(str) or None

        i   (   R   R"   t   unigrams(   R   R   (    (    s*   lib/python2.7/site-packages/nltk/lm/api.pyt   context_counts«   s    c         C  s5   d t  g  | D]  } |  j | d | d  ƒ ^ q ƒ S(   u©   Calculate cross-entropy of model for given evaluation text.

        :param Iterable(tuple(str)) text_ngrams: A sequence of ngram tuples.
        :rtype: float

        iÿÿÿÿ(   R$   R>   (   R   t   text_ngramst   ngram(    (    s*   lib/python2.7/site-packages/nltk/lm/api.pyt   entropy¶   s    c         C  s   t  d |  j | ƒ ƒ S(   uŽ   Calculates the perplexity of the given text.

        This is simply 2 ** cross-entropy for the text, so the arguments are the same.

        g       @(   t   powRC   (   R   RA   (    (    s*   lib/python2.7/site-packages/nltk/lm/api.pyt
   perplexityÁ   s    i   c      	     sN  | d k r g  n	 t | ƒ } t | ƒ } | d k rt | ƒ ˆ j k r] | ˆ j d n | ‰  ˆ j ˆ j j ˆ  ƒ ƒ } xN ˆ  rÎ | rÎ t ˆ  ƒ d k rª ˆ  d n g  ‰  ˆ j ˆ j j ˆ  ƒ ƒ } q Wt | ƒ } t	 | t
 ‡  ‡ f d †  | Dƒ ƒ | ƒ Sg  } x= t | ƒ D]/ } | j ˆ j d d d | | d | ƒ ƒ qW| S(   uÜ  Generate words from the model.

        :param int num_words: How many words to generate. By default 1.
        :param text_seed: Generation can be conditioned on preceding context.
        :param random_seed: A random seed or an instance of `random.Random`. If provided,
        makes the random sampling part of generation reproducible.
        :return: One (str) word or a list of words generated from model.

        Examples:

        >>> from nltk.lm import MLE
        >>> lm = MLE(2)
        >>> lm.fit([[("a", "b"), ("b", "c")]], vocabulary_text=['a', 'b', 'c'])
        >>> lm.fit([[("a",), ("b",), ("c",)]])
        >>> lm.generate(random_seed=3)
        'a'
        >>> lm.generate(text_seed=['a'])
        'b'

        i   c         3  s!   |  ] } ˆ j  | ˆ  ƒ Vq d  S(   N(   R=   (   R6   t   w(   R   R   (    s*   lib/python2.7/site-packages/nltk/lm/api.pys	   <genexpr>ð   s    t	   num_wordst	   text_seedt   random_seedN(   R4   R+   R)   R"   R3   R@   R   R5   t   sortedR1   t   tuplet   ranget   appendt   generate(   R   RG   RH   RI   R.   t   samplest	   generatedt   _(    (   R   R   s*   lib/python2.7/site-packages/nltk/lm/api.pyRN   É   s(    '"&	
N(   R   R   R    R4   R   R;   R=   R   R<   R>   R@   RC   RE   RN   (    (    (    s*   lib/python2.7/site-packages/nltk/lm/api.pyR2   _   s   
			(   R    t
   __future__R    R   R&   t   abcR   R   R   t   sixR   t   nltk.lm.counterR   t   nltk.lm.utilR   t   nltk.lm.vocabularyR   t	   itertoolsR	   t   ImportErrort   operatort   addt   objectR   R$   R)   R4   R1   R2   (    (    (    s*   lib/python2.7/site-packages/nltk/lm/api.pyt   <module>   s(   		