ó
ù`]c           @  s¿   d  Z  d d l m Z d d l Z d d l m Z d d l m Z d d l m	 Z	 y d d l
 Z
 Wn e k
 r{ d Z
 n Xd a d d „ Z e e d „ Z e d	 „ Z d
 „  Z d „  Z d S(   sO  
A set of functions used to interface with the external megam_ maxent
optimization package. Before megam can be used, you should tell NLTK where it
can find the megam binary, using the ``config_megam()`` function. Typical
usage:

    >>> from nltk.classify import megam
    >>> megam.config_megam() # pass path to megam if not found in PATH # doctest: +SKIP
    [Found megam: ...]

Use with MaxentClassifier. Example below, see MaxentClassifier documentation
for details.

    nltk.classify.MaxentClassifier.train(corpus, 'megam')

.. _megam: http://www.umiacs.umd.edu/~hal/megam/index.html
iÿÿÿÿ(   t   print_functionN(   t   string_types(   t   compat(   t   find_binaryc      
   C  s4   t  d |  d d g d d d d d g d d	 ƒa d
 S(   sA  
    Configure NLTK's interface to the ``megam`` maxent optimization
    package.

    :param bin: The full path to the ``megam`` binary.  If not specified,
        then nltk will search the system for a ``megam`` binary; and if
        one is not found, it will raise a ``LookupError`` exception.
    :type bin: str
    t   megamt   env_varst   MEGAMt   binary_namess	   megam.optt	   megam_686s   megam_i686.optt   urls/   http://www.umiacs.umd.edu/~hal/megam/index.htmlN(   R   t
   _megam_bin(   t   bin(    (    s2   lib/python2.7/site-packages/nltk/classify/megam.pyt   config_megam.   s    	c           s  ˆ  j  ƒ  } t d „  t | ƒ Dƒ ƒ } xÓ |  D]Ë \ ‰ ‰ t ˆ  d ƒ ry | j d j ‡  ‡ ‡ f d †  | Dƒ ƒ ƒ n | j d | ˆ ƒ | s³ t ˆ  j ˆ ˆ ƒ | | ƒ n: x7 | D]/ } | j d ƒ t ˆ  j ˆ | ƒ | | ƒ qº W| j d ƒ q/ Wd S(	   sò  
    Generate an input file for ``megam`` based on the given corpus of
    classified tokens.

    :type train_toks: list(tuple(dict, str))
    :param train_toks: Training data, represented as a list of
        pairs, the first member of which is a feature dictionary,
        and the second of which is a classification label.

    :type encoding: MaxentFeatureEncodingI
    :param encoding: A feature encoding, used to convert featuresets
        into feature vectors. May optionally implement a cost() method
        in order to assign different costs to different class predictions.

    :type stream: stream
    :param stream: The stream to which the megam input file should be
        written.

    :param bernoulli: If true, then use the 'bernoulli' format.  I.e.,
        all joint features have binary values, and are listed iff they
        are true.  Otherwise, list feature values explicitly.  If
        ``bernoulli=False``, then you must call ``megam`` with the
        ``-fvals`` option.

    :param explicit: If true, then use the 'explicit' format.  I.e.,
        list the features that would fire for any of the possible
        labels, for each token.  If ``explicit=True``, then you must
        call ``megam`` with the ``-explicit`` option.
    c         s  s!   |  ] \ } } | | f Vq d  S(   N(    (   t   .0t   it   label(    (    s2   lib/python2.7/site-packages/nltk/classify/megam.pys	   <genexpr>g   s    t   costt   :c         3  s*   |  ]  } t  ˆ  j ˆ ˆ | ƒ ƒ Vq d  S(   N(   t   strR   (   R   t   l(   t   encodingt
   featuresetR   (    s2   lib/python2.7/site-packages/nltk/classify/megam.pys	   <genexpr>n   s    s   %ds    #s   
N(   t   labelst   dictt	   enumeratet   hasattrt   writet   joint   _write_megam_featurest   encode(   t
   train_toksR   t   streamt	   bernoullit   explicitR   t   labelnumR   (    (   R   R   R   s2   lib/python2.7/site-packages/nltk/classify/megam.pyt   write_megam_fileG   s    ) c         C  s    t  d k r t d ƒ ‚ n  | s- t d ƒ ‚ |  j ƒ  j d ƒ } t  j | d ƒ } xE | D]= } | j ƒ  r[ | j ƒ  \ } } t | ƒ | t | ƒ <q[ q[ W| S(   sÔ   
    Given the stdout output generated by ``megam`` when training a
    model, return a ``numpy`` array containing the corresponding weight
    vector.  This function does not currently handle bias features.
    s.   This function requires that numpy be installeds   non-explicit not supported yets   
t   dN(	   t   numpyt   Nonet
   ValueErrort   AssertionErrort   stript   splitt   zerost   floatt   int(   t   st   features_countR!   t   linest   weightst   linet   fidt   weight(    (    s2   lib/python2.7/site-packages/nltk/classify/megam.pyt   parse_megam_weightsƒ   s    c         C  s‹   |  s t  d ƒ ‚ n  xo |  D]g \ } } | rl | d k rN | j d | ƒ qƒ | d k rƒ t  d ƒ ‚ qƒ q | j d | | f ƒ q Wd  S(   Ns:   MEGAM classifier requires the use of an always-on feature.i   s    %si    s3   If bernoulli=True, then allfeatures must be binary.s    %s %s(   R'   R   (   t   vectorR   R    R3   t   fval(    (    s2   lib/python2.7/site-packages/nltk/classify/megam.pyR   •   s    c         C  s¾   t  |  t ƒ r t d ƒ ‚ n  t d k r4 t ƒ  n  t g |  } t j | d t j ƒ} | j	 ƒ  \ } } | j
 d k rš t ƒ  t | ƒ t d ƒ ‚ n  t  | t ƒ r­ | S| j d ƒ Sd S(   s=   
    Call the ``megam`` binary with the given arguments.
    s    args should be a list of stringst   stdouti    s   megam command failed!s   utf-8N(   t
   isinstanceR   t	   TypeErrorR
   R&   R   t
   subprocesst   Popent   PIPEt   communicatet
   returncodet   printt   OSErrort   decode(   t   argst   cmdt   pR8   t   stderr(    (    s2   lib/python2.7/site-packages/nltk/classify/megam.pyt
   call_megam¦   s    

(   t   __doc__t
   __future__R    R;   t   sixR   t   nltkR   t   nltk.internalsR   R%   t   ImportErrorR&   R
   R   t   TrueR#   R5   R   RG   (    (    (    s2   lib/python2.7/site-packages/nltk/classify/megam.pyt   <module>   s   
<	