B
    >?[                 @   s   d Z ddlmZ ddlZddlmZ ddlmZ ddlm	Z	 yddl
Z
W n ek
r`   dZ
Y nX dadddZdd
dZdddZdd Zdd ZdS )aO  
A set of functions used to interface with the external megam_ maxent
optimization package. Before megam can be used, you should tell NLTK where it
can find the megam binary, using the ``config_megam()`` function. Typical
usage:

    >>> from nltk.classify import megam
    >>> megam.config_megam() # pass path to megam if not found in PATH # doctest: +SKIP
    [Found megam: ...]

Use with MaxentClassifier. Example below, see MaxentClassifier documentation
for details.

    nltk.classify.MaxentClassifier.train(corpus, 'megam')

.. _megam: http://www.umiacs.umd.edu/~hal/megam/index.html
    )print_functionN)string_types)compat)find_binaryc             C   s    t d| dgddddgddadS )	aA  
    Configure NLTK's interface to the ``megam`` maxent optimization
    package.

    :param bin: The full path to the ``megam`` binary.  If not specified,
        then nltk will search the system for a ``megam`` binary; and if
        one is not found, it will raise a ``LookupError`` exception.
    :type bin: str
    ZmegamZMEGAMz	megam.optZ	megam_686zmegam_i686.optz/http://www.umiacs.umd.edu/~hal/megam/index.html)Zenv_varsZbinary_namesZurlN)r   
_megam_bin)bin r   2lib/python3.7/site-packages/nltk/classify/megam.pyconfig_megam.   s    
r
   Tc                s      }tdd t|D }x| D ]\t drZ|d fdd|D  n|d|   |st || n,x*|D ]"}|d t ||| qW |d q$W d	S )
a  
    Generate an input file for ``megam`` based on the given corpus of
    classified tokens.

    :type train_toks: list(tuple(dict, str))
    :param train_toks: Training data, represented as a list of
        pairs, the first member of which is a feature dictionary,
        and the second of which is a classification label.

    :type encoding: MaxentFeatureEncodingI
    :param encoding: A feature encoding, used to convert featuresets
        into feature vectors. May optionally implement a cost() method
        in order to assign different costs to different class predictions.

    :type stream: stream
    :param stream: The stream to which the megam input file should be
        written.

    :param bernoulli: If true, then use the 'bernoulli' format.  I.e.,
        all joint features have binary values, and are listed iff they
        are true.  Otherwise, list feature values explicitly.  If
        ``bernoulli=False``, then you must call ``megam`` with the
        ``-fvals`` option.

    :param explicit: If true, then use the 'explicit' format.  I.e.,
        list the features that would fire for any of the possible
        labels, for each token.  If ``explicit=True``, then you must
        call ``megam`` with the ``-explicit`` option.
    c             s   s   | ]\}}||fV  qd S )Nr   ).0ilabelr   r   r	   	<genexpr>g   s    z#write_megam_file.<locals>.<genexpr>cost:c             3   s    | ]}t  |V  qd S )N)strr   )r   l)encoding
featuresetr   r   r	   r   n   s    z%dz #
N)labelsdict	enumeratehasattrwritejoin_write_megam_featuresencode)Z
train_toksr   stream	bernoulliexplicitr   Zlabelnumr   r   )r   r   r   r	   write_megam_fileG   s    
 

r!   c             C   sl   t dkrtd|std|  d}t |d}x0|D ](}| r<| \}}t||t|< q<W |S )z
    Given the stdout output generated by ``megam`` when training a
    model, return a ``numpy`` array containing the corresponding weight
    vector.  This function does not currently handle bias features.
    Nz.This function requires that numpy be installedznon-explicit not supported yetr   d)numpy
ValueErrorAssertionErrorstripsplitZzerosfloatint)sZfeatures_countr    linesZweightslinefidZweightr   r   r	   parse_megam_weights   s    
r.   c             C   sb   | st dxP| D ]H\}}|rH|dkr6|d|  qZ|dkrZt dq|d||f  qW d S )Nz:MEGAM classifier requires the use of an always-on feature.   z %sr   z3If bernoulli=True, then allfeatures must be binary.z %s %s)r$   r   )Zvectorr   r   r-   Zfvalr   r   r	   r      s    r   c             C   s   t | trtdtdkr t  tg|  }tj|tjd}| \}}|j	dkrft
  t
| tdt |trt|S |dS dS )z=
    Call the ``megam`` binary with the given arguments.
    z args should be a list of stringsN)stdoutr   zmegam command failed!zutf-8)
isinstancer   	TypeErrorr   r
   
subprocessPopenPIPEZcommunicate
returncodeprintOSErrordecode)argscmdpr0   stderrr   r   r	   
call_megam   s    



r>   )N)TT)T)__doc__Z
__future__r   r3   Zsixr   Znltkr   Znltk.internalsr   r#   ImportErrorr   r
   r!   r.   r   r>   r   r   r   r	   <module>   s   


<
