B
    >?[}/                 @   s   d Z ddlmZmZ ddlZddlZddlmZ dddZ	dd Z
d	d
 Zdd ZG dd deZdd Zdd ZefddZefddZi adddZdd ZdS )z0
Utility functions and classes for classifiers.
    )print_functiondivisionN)LazyMapc                sF   |dkr|ot |d ttf}|r8 fdd}t||S t |S dS )a  
    Use the ``LazyMap`` class to construct a lazy list-like
    object that is analogous to ``map(feature_func, toks)``.  In
    particular, if ``labeled=False``, then the returned list-like
    object's values are equal to::

        [feature_func(tok) for tok in toks]

    If ``labeled=True``, then the returned list-like object's values
    are equal to::

        [(feature_func(tok), label) for (tok, label) in toks]

    The primary purpose of this function is to avoid the memory
    overhead involved in storing all the featuresets for every token
    in a corpus.  Instead, these featuresets are constructed lazily,
    as-needed.  The reduction in memory overhead can be especially
    significant when the underlying list of tokens is itself lazy (as
    is the case with many corpus readers).

    :param feature_func: The function that will be applied to each
        token.  It should return a featureset -- i.e., a dict
        mapping feature names to feature values.
    :param toks: The list of tokens to which ``feature_func`` should be
        applied.  If ``labeled=True``, then the list elements will be
        passed directly to ``feature_func()``.  If ``labeled=False``,
        then the list elements should be tuples ``(tok,label)``, and
        ``tok`` will be passed to ``feature_func()``.
    :param labeled: If true, then ``toks`` contains labeled tokens --
        i.e., tuples of the form ``(tok, label)``.  (Default:
        auto-detect based on types.)
    Nr   c                s    | d | d fS )Nr       )Zlabeled_token)feature_funcr   1lib/python3.7/site-packages/nltk/classify/util.py	lazy_funcA   s    z!apply_features.<locals>.lazy_func)
isinstancetuplelistr   )r   ZtoksZlabeledr	   r   )r   r   apply_features   s    !
r   c             C   s   t tdd | D S )a!  
    :return: A list of all labels that are attested in the given list
        of tokens.
    :rtype: list of (immutable)
    :param tokens: The list of classified tokens from which to extract
        labels.  A classified token has the form ``(token, label)``.
    :type tokens: list
    c             s   s   | ]\}}|V  qd S )Nr   ).0tokZlabelr   r   r   	<genexpr>R   s    z"attested_labels.<locals>.<genexpr>)r   set)tokensr   r   r   attested_labelsI   s    	r   c             C   s>   |  dd |D }dd t||D }tt|t| S )Nc             S   s   g | ]\}}|qS r   r   )r   fslr   r   r   
<listcomp>V   s    z"log_likelihood.<locals>.<listcomp>c             S   s   g | ]\\}}}| |qS r   )prob)r   r   r   pdistr   r   r   r   W   s    )prob_classify_manyzipmathlogsumlen)
classifiergoldresultsllr   r   r   log_likelihoodU   s    r#   c             C   sD   |  dd |D }dd t||D }|r<t|t| S dS d S )Nc             S   s   g | ]\}}|qS r   r   )r   r   r   r   r   r   r   \   s    zaccuracy.<locals>.<listcomp>c             S   s   g | ]\\}}}||kqS r   r   )r   r   r   rr   r   r   r   ]   s    r   )Zclassify_manyr   r   r   )r   r    r!   Zcorrectr   r   r   accuracy[   s
    r%   c               @   s    e Zd ZdZdd Zdd ZdS )CutoffCheckerz
    A helper class that implements cutoff checks based on number of
    iterations and log likelihood.

    Accuracy cutoffs are also implemented, but they're almost never
    a good idea to use.
    c             C   sR   |  | _d|kr$t|d  |d< d|kr<t|d |d< d | _d | _d| _d S )Nmin_llmin_lldeltar   )copycutoffsabsr"   acciter)selfr*   r   r   r   __init__m   s    
zCutoffChecker.__init__c             C   s  | j }|  jd7  _d|kr.| j|d kr.dS tjj||}t|rLdS d|ks\d|krd|krt||d krtdS d|kr| jr|| j t	|d krdS || _d|ksd|krtjj||}d|kr||d krdS d|kr
| j
r
|| j
 t	|d kr
dS || _
dS d S )	Nr   Zmax_iterTr'   r(   Zmax_accZmin_accdeltaF)r*   r-   nltkZclassifyutilr#   r   Zisnanr"   r+   r,   )r.   r   Z
train_toksr*   Znew_llZnew_accr   r   r   checkw   s2    

zCutoffChecker.checkN)__name__
__module____qualname____doc__r/   r2   r   r   r   r   r&   d   s   
r&   c             C   sh   i }d|d< | d   |d< | d   |d< x6dD ].}|   ||d| < ||   k|d	| < q2W |S )
NTalwaysonr   
startswithendswithabcdefghijklmnopqrstuvwxyzz	count(%s)zhas(%s))lowercount)namefeaturesletterr   r   r   names_demo_features   s    
rA   c             C   s   i }d|d< | d   dk|d< | d   dk|d< xfdD ]^}|   ||d	| < ||   k|d
| < || d   k|d| < || d   k|d| < q:W |S )NTr7   r   Zaeiouyzstartswith(vowel)r9   zendswith(vowel)r;   z	count(%s)zhas(%s)zstartswith(%s)zendswith(%s))r<   r=   )r>   r?   r@   r   r   r   binary_names_demo_features   s    
rB   c                s  ddl m} dd l}dd |dD dd |dD  }|d || |d d	 }|d	d
 }td |  fdd|D }td t| fdd|D }td|  y fdd|D }	||	}
dd t	||
D }tdt
|t|   t  td xZtt	||
d d D ]@\\}}}|dkr>d}nd}t|||d|df  q"W W n tk
r~   Y nX |S )Nr   )namesc             S   s   g | ]}|d fqS )maler   )r   r>   r   r   r   r      s    znames_demo.<locals>.<listcomp>zmale.txtc             S   s   g | ]}|d fqS )femaler   )r   r>   r   r   r   r      s    z
female.txti@ i  i|  zTraining classifier...c                s   g | ]\}} ||fqS r   r   )r   ng)r?   r   r   r      s    zTesting classifier...c                s   g | ]\}} ||fqS r   r   )r   rF   rG   )r?   r   r   r      s    zAccuracy: %6.4fc                s   g | ]\}} |qS r   r   )r   rF   rG   )r?   r   r   r      s    c             S   s   g | ]\\}}}| |qS r   )logprob)r   r>   r    r   r   r   r   r      s    zAvg. log likelihood: %6.4fzMUnseen Names      P(Male)  P(Female)
----------------------------------------   rD   z  %-15s *%6.4f   %6.4fz  %-15s  %6.4f  *%6.4frE   )nltk.corpusrC   randomwordsseedshuffleprintr%   r   r   r   r   r   r   NotImplementedError)trainerr?   rC   rK   Znamelisttraintestr   r,   test_featuresetspdistsr"   r>   Zgenderr   fmtr   )r?   r   
names_demo   s8    


$
(rW   c                s  ddl m} dd l}|d}|d}|d || || t |d d }t |dd |d d  }d	d
 |dd D dd
 |dd D  }|| td | ||}	td t|	 fdd
|D }
td|
  y fdd
|D }|		|}dd
 t
||D }tdt|t|   t  td xVt
||d d D ]@\\}}}|dkrrd}nd}t|||d|df  qVW W n tk
r   Y nX |	S )Nr   )rC   zmale.txtz
female.txti	 i  i	  i  c             S   s   g | ]}|d fqS )Tr   )r   r>   r   r   r   r      s    z&partial_names_demo.<locals>.<listcomp>i
  c             S   s   g | ]}|d fqS )Fr   )r   r>   r   r   r   r      s    i  zTraining classifier...zTesting classifier...c                s   g | ]\}} ||fqS r   r   )r   rF   m)r?   r   r   r     s    zAccuracy: %6.4fc                s   g | ]\}} |qS r   r   )r   rF   rX   )r?   r   r   r   
  s    c             S   s   g | ]\\}}}| |qS r   )rH   )r   r>   r    r   r   r   r   r     s    zAvg. log likelihood: %6.4fzMUnseen Names      P(Male)  P(Female)
----------------------------------------rI   Tz  %-15s *%6.4f   %6.4fz  %-15s  %6.4f  *%6.4fF)rJ   rC   rK   rL   rM   rN   maprO   r%   r   r   r   r   r   rP   )rQ   r?   rC   rK   Z
male_namesZfemale_namesZpositiveZ	unlabeledrS   r   r,   rT   rU   r"   r>   Zis_maler   rV   r   )r?   r   partial_names_demo   s@    







 
(rZ     c                s~  ddl m} dd l}td |tkr<dd ||D t|< t| d d  }|t|kr`t|}ttdd |D }tdd		|  td
 |
d || |d td|  }|td| | }	td |  fdd|D }
td t|
 fdd|	D }td|  yL fdd|	D }|
|}dd t|	|D }tdt|t|	   W n tk
rx   Y nX |
S )Nr   )sensevalzReading data...c             S   s   g | ]}||j d  fqS )r   )senses)r   ir   r   r   r   (  s    zwsd_demo.<locals>.<listcomp>c             s   s   | ]\}}|V  qd S )Nr   )r   r^   r   r   r   r   r   ,  s    zwsd_demo.<locals>.<genexpr>z
  Senses:  zSplitting into test & train...i@ g?zTraining classifier...c                s   g | ]\}} ||fqS r   r   )r   r^   r   )r?   r   r   r   8  s    zTesting classifier...c                s   g | ]\}} ||fqS r   r   )r   r^   r   )r?   r   r   r   <  s    zAccuracy: %6.4fc                s   g | ]\}} |qS r   r   )r   r^   rF   )r?   r   r   r   B  s    c             S   s   g | ]\\}}}| |qS r   )rH   )r   r>   r    r   r   r   r   r   D  s    zAvg. log likelihood: %6.4f)rJ   r\   rK   rO   _inst_cache	instancesr   r   r   joinrM   rN   intr%   r   r   r   rP   )rQ   Zwordr?   rF   r\   rK   ra   r]   rR   rS   r   r,   rT   rU   r"   r   )r?   r   wsd_demo   s8    


rd   c              C   s2   yt  W n$ tk
r,   td} t| Y nX dS )z8
    Checks whether the MEGAM binary is configured.
    z\Please configure your megam binary first, e.g.
>>> nltk.config_megam('/usr/bin/local/megam')N)Z
_megam_bin	NameErrorstr)Zerr_msgr   r   r   check_megam_configM  s    rg   )N)r[   )r6   Z
__future__r   r   r   Znltk.classify.utilr0   Z	nltk.utilr   r   r   r#   r%   objectr&   rA   rB   rW   rZ   r`   rd   rg   r   r   r   r   <module>   s    

-	<.7
-