ó
ù`]c           @  sÊ   d  Z  d d l m Z m Z d d l Z d d l Z d d l m Z d d „ Z
 d „  Z d „  Z d „  Z d	 e f d
 „  ƒ  YZ d „  Z d „  Z e d „ Z e d „ Z i  a d d „ Z d „  Z d S(   s0   
Utility functions and classes for classifiers.
iÿÿÿÿ(   t   print_functiont   divisionN(   t   LazyMapc           sa   | d k r. | o( t | d t t f ƒ } n  | rP ‡  f d †  } t | | ƒ St ˆ  | ƒ Sd S(   sÖ  
    Use the ``LazyMap`` class to construct a lazy list-like
    object that is analogous to ``map(feature_func, toks)``.  In
    particular, if ``labeled=False``, then the returned list-like
    object's values are equal to::

        [feature_func(tok) for tok in toks]

    If ``labeled=True``, then the returned list-like object's values
    are equal to::

        [(feature_func(tok), label) for (tok, label) in toks]

    The primary purpose of this function is to avoid the memory
    overhead involved in storing all the featuresets for every token
    in a corpus.  Instead, these featuresets are constructed lazily,
    as-needed.  The reduction in memory overhead can be especially
    significant when the underlying list of tokens is itself lazy (as
    is the case with many corpus readers).

    :param feature_func: The function that will be applied to each
        token.  It should return a featureset -- i.e., a dict
        mapping feature names to feature values.
    :param toks: The list of tokens to which ``feature_func`` should be
        applied.  If ``labeled=True``, then the list elements will be
        passed directly to ``feature_func()``.  If ``labeled=False``,
        then the list elements should be tuples ``(tok,label)``, and
        ``tok`` will be passed to ``feature_func()``.
    :param labeled: If true, then ``toks`` contains labeled tokens --
        i.e., tuples of the form ``(tok, label)``.  (Default:
        auto-detect based on types.)
    i    c           s   ˆ  |  d ƒ |  d f S(   Ni    i   (    (   t   labeled_token(   t   feature_func(    s1   lib/python2.7/site-packages/nltk/classify/util.pyt	   lazy_funcA   s    N(   t   Nonet
   isinstancet   tuplet   listR   (   R   t   tokst   labeledR   (    (   R   s1   lib/python2.7/site-packages/nltk/classify/util.pyt   apply_features   s    !"c         C  s   t  t d „  |  Dƒ ƒ ƒ S(   s!  
    :return: A list of all labels that are attested in the given list
        of tokens.
    :rtype: list of (immutable)
    :param tokens: The list of classified tokens from which to extract
        labels.  A classified token has the form ``(token, label)``.
    :type tokens: list
    c         s  s   |  ] \ } } | Vq d  S(   N(    (   t   .0t   tokt   label(    (    s1   lib/python2.7/site-packages/nltk/classify/util.pys	   <genexpr>R   s    (   R   t   set(   t   tokens(    (    s1   lib/python2.7/site-packages/nltk/classify/util.pyt   attested_labelsI   s    	c         C  s|   |  j  g  | D] \ } } | ^ q ƒ } g  t | | ƒ D]! \ \ } } } | j | ƒ ^ q8 } t j t | ƒ t | ƒ ƒ S(   N(   t   prob_classify_manyt   zipt   probt   matht   logt   sumt   len(   t
   classifiert   goldt   fst   lt   resultst   pdistt   ll(    (    s1   lib/python2.7/site-packages/nltk/classify/util.pyt   log_likelihoodU   s    (7c         C  s~   |  j  g  | D] \ } } | ^ q ƒ } g  t | | ƒ D] \ \ } } } | | k ^ q8 } | rv t | ƒ t | ƒ Sd Sd  S(   Ni    (   t   classify_manyR   R   R   (   R   R   R   R   R   t   rt   correct(    (    s1   lib/python2.7/site-packages/nltk/classify/util.pyt   accuracy[   s
    (4t   CutoffCheckerc           B  s    e  Z d  Z d „  Z d „  Z RS(   sÉ   
    A helper class that implements cutoff checks based on number of
    iterations and log likelihood.

    Accuracy cutoffs are also implemented, but they're almost never
    a good idea to use.
    c         C  su   | j  ƒ  |  _ d | k r3 t | d ƒ | d <n  d | k rV t | d ƒ | d <n  d  |  _ d  |  _ d |  _ d  S(   Nt   min_llt   min_lldeltai   (   t   copyt   cutoffst   absR   R    t   acct   iter(   t   selfR*   (    (    s1   lib/python2.7/site-packages/nltk/classify/util.pyt   __init__m   s    		c         C  sw  |  j  } |  j d 7_ d | k r; |  j | d k r; t St j j j | | ƒ } t j | ƒ rf t Sd | k s~ d | k rà d | k rž | | d k rž t Sd | k rÔ |  j	 rÔ | |  j	 t
 | d ƒ k rÔ t S| |  _	 n  d | k sø d | k rst j j j | | ƒ } d | k r0| | d k r0t Sd | k rf|  j rf| |  j t
 | d ƒ k rft S| |  _ t Sd  S(   Ni   t   max_iterR'   R(   t   max_acct   min_accdelta(   R*   R-   t   Truet   nltkt   classifyt   utilR!   R   t   isnanR    R+   R,   t   False(   R.   R   t
   train_toksR*   t   new_llt   new_acc(    (    s1   lib/python2.7/site-packages/nltk/classify/util.pyt   checkw   s2    				(   t   __name__t
   __module__t   __doc__R/   R<   (    (    (    s1   lib/python2.7/site-packages/nltk/classify/util.pyR&   d   s   	
c         C  s„   i  } t  | d <|  d j ƒ  | d <|  d j ƒ  | d <xE d D]= } |  j ƒ  j | ƒ | d | <| |  j ƒ  k | d | <q? W| S(	   Nt   alwaysoni    t
   startswithiÿÿÿÿt   endswitht   abcdefghijklmnopqrstuvwxyzs	   count(%s)s   has(%s)(   R3   t   lowert   count(   t   namet   featurest   letter(    (    s1   lib/python2.7/site-packages/nltk/classify/util.pyt   names_demo_features    s    
c         C  sÌ   i  } t  | d <|  d j ƒ  d k | d <|  d j ƒ  d k | d <x d D]y } |  j ƒ  j | ƒ | d | <| |  j ƒ  k | d	 | <| |  d j ƒ  k | d
 | <| |  d j ƒ  k | d | <qK W| S(   NR@   i    t   aeiouys   startswith(vowel)iÿÿÿÿs   endswith(vowel)RC   s	   count(%s)s   has(%s)s   startswith(%s)s   endswith(%s)(   R3   RD   RE   (   RF   RG   RH   (    (    s1   lib/python2.7/site-packages/nltk/classify/util.pyt   binary_names_demo_features«   s    
"c         C  sT  d d l  m } d d  l } g  | j d ƒ D] } | d f ^ q, g  | j d ƒ D] } | d f ^ qQ } | j d ƒ | j | ƒ | d  } | d d	 !} t d
 ƒ |  g  | D] \ } }	 | | ƒ |	 f ^ q¯ ƒ }
 t d ƒ t |
 g  | D] \ } }	 | | ƒ |	 f ^ qí ƒ } t d | ƒ yg  | D] \ } }	 | | ƒ ^ q,} |
 j | ƒ } g  t	 | | ƒ D]! \ \ } } } | j
 | ƒ ^ qi} t d t | ƒ t | ƒ ƒ t ƒ  t d d d ƒ xq t t	 | | ƒ ƒ d  D]V \ \ } } } | d k rd } n d } t | | | j d ƒ | j d ƒ f ƒ qáWWn t k
 rOn X|
 S(   Niÿÿÿÿ(   t   namess   male.txtt   males
   female.txtt   femalei@â iˆ  i|  s   Training classifier...s   Testing classifier...s   Accuracy: %6.4fs   Avg. log likelihood: %6.4fs%   Unseen Names      P(Male)  P(Female)
t   -i(   i   s     %-15s *%6.4f   %6.4fs     %-15s  %6.4f  *%6.4f(   t   nltk.corpusRL   t   randomt   wordst   seedt   shufflet   printR%   R   R   t   logprobR   R   R	   R   t   NotImplementedError(   t   trainerRG   RL   RQ   RF   t   namelistt   traint   testt   nt   gR   R,   t   test_featuresetst   pdistsR   R   R    t   gendert   fmt(    (    s1   lib/python2.7/site-packages/nltk/classify/util.pyt
   names_demo¸   s8    %)

1
4%7,	1c         C  sz  d d l  m } d d  l } | j d ƒ } | j d ƒ } | j d ƒ | j | ƒ | j | ƒ t | | d  ƒ } t | | d d !| d  ƒ } g  | d d	 !D] } | t f ^ q  g  | d d
 !D] } | t f ^ qÃ }	 | j |	 ƒ t	 d ƒ |  | | ƒ }
 t	 d ƒ t
 |
 g  |	 D] \ } } | | ƒ | f ^ qƒ } t	 d | ƒ yg  |	 D] \ } } | | ƒ ^ qX} |
 j | ƒ } g  t |	 | ƒ D]! \ \ } } } | j | ƒ ^ q•} t	 d t | ƒ t |	 ƒ ƒ t	 ƒ  t	 d d d ƒ xk t |	 | ƒ d  D]V \ \ } } } | t k r.d } n d } t	 | | | j t ƒ | j t ƒ f ƒ qWWn t k
 run X|
 S(   Niÿÿÿÿ(   RL   s   male.txts
   female.txtiñû	 iÐ  iÄ	  iô  i¾
  iî  s   Training classifier...s   Testing classifier...s   Accuracy: %6.4fs   Avg. log likelihood: %6.4fs%   Unseen Names      P(Male)  P(Female)
RO   i(   i   s     %-15s *%6.4f   %6.4fs     %-15s  %6.4f  *%6.4f(   RP   RL   RQ   RR   RS   RT   t   mapR3   R8   RU   R%   R   R   RV   R   R   R   RW   (   RX   RG   RL   RQ   t
   male_namest   female_namest   positivet	   unlabeledRF   R[   R   R\   t   mR,   R^   R_   R   R   R    t   is_maleRa   (    (    s1   lib/python2.7/site-packages/nltk/classify/util.pyt   partial_names_demoæ   s@    #'

4%7&	1iè  c         C  sB  d d l  m } d d  l } t d ƒ | t k rh g  | j | ƒ D] } | | j d f ^ qB t | <n  t | } | t | ƒ k r” t | ƒ } n  t t	 d „  | Dƒ ƒ ƒ } t d d j
 | ƒ ƒ t d ƒ | j d	 ƒ | j | ƒ | t d
 | ƒ  }	 | t d
 | ƒ | !}
 t d ƒ |  g  |	 D] \ } } | | ƒ | f ^ q*ƒ } t d ƒ t | g  |
 D] \ } } | | ƒ | f ^ qhƒ } t d | ƒ y g  |
 D] \ } } | | ƒ ^ q§} | j | ƒ } g  t |
 | ƒ D]! \ \ } } } | j | ƒ ^ qä} t d t | ƒ t |
 ƒ ƒ Wn t k
 r=n X| S(   Niÿÿÿÿ(   t   sensevals   Reading data...i    c         s  s   |  ] \ } } | Vq d  S(   N(    (   R   t   iR   (    (    s1   lib/python2.7/site-packages/nltk/classify/util.pys	   <genexpr>,  s    s
     Senses: t    s   Splitting into test & train...i@â gš™™™™™é?s   Training classifier...s   Testing classifier...s   Accuracy: %6.4fs   Avg. log likelihood: %6.4f(   RP   Rk   RQ   RU   t   _inst_cachet	   instancest   sensesR   R	   R   t   joinRS   RT   t   intR%   R   R   RV   R   RW   (   RX   t   wordRG   R\   Rk   RQ   Rl   Ro   Rp   RZ   R[   R   R   R,   R^   R_   RF   R   R   R    (    (    s1   lib/python2.7/site-packages/nltk/classify/util.pyt   wsd_demo   s8    
6

1
4%7"c          C  s8   y t  Wn) t k
 r3 t d ƒ }  t |  ƒ ‚ n Xd S(   s8   
    Checks whether the MEGAM binary is configured.
    s\   Please configure your megam binary first, e.g.
>>> nltk.config_megam('/usr/bin/local/megam')N(   t
   _megam_bint	   NameErrort   str(   t   err_msg(    (    s1   lib/python2.7/site-packages/nltk/classify/util.pyt   check_megam_configM  s    	(   R?   t
   __future__R    R   R   t   nltk.classify.utilR4   t	   nltk.utilR   R   R   R   R!   R%   t   objectR&   RI   RK   Rb   Rj   Rn   Rt   Ry   (    (    (    s1   lib/python2.7/site-packages/nltk/classify/util.pyt   <module>   s    
-				<		.7-