ó
ù`]c           @  sÄ   d  d l  m Z m Z d  d l Z d  d l m Z d  d l m Z e d e f d „  ƒ  Yƒ Z	 e d e
 f d „  ƒ  Yƒ Z d	 „  Z d
 „  Z d „  Z e d d d g ƒ Z d e f d „  ƒ  YZ d S(   iÿÿÿÿ(   t   print_functiont   unicode_literalsN(   t
   namedtuple(   t   python_2_unicode_compatiblet   AlignedSentc           B  s†   e  Z d  Z d d „ Z e d „  ƒ Z e d „  ƒ Z d „  Z d „  Z	 e e e	 ƒ Z
 d „  Z d „  Z d „  Z d	 „  Z d
 „  Z RS(   u#  
    Return an aligned sentence object, which encapsulates two sentences
    along with an ``Alignment`` between them.

    Typically used in machine translation to represent a sentence and
    its translation.

        >>> from nltk.translate import AlignedSent, Alignment
        >>> algnsent = AlignedSent(['klein', 'ist', 'das', 'Haus'],
        ...     ['the', 'house', 'is', 'small'], Alignment.fromstring('0-3 1-2 2-0 3-1'))
        >>> algnsent.words
        ['klein', 'ist', 'das', 'Haus']
        >>> algnsent.mots
        ['the', 'house', 'is', 'small']
        >>> algnsent.alignment
        Alignment([(0, 3), (1, 2), (2, 0), (3, 1)])
        >>> from nltk.corpus import comtrans
        >>> print(comtrans.aligned_sents()[54])
        <AlignedSent: 'Weshalb also sollten...' -> 'So why should EU arm...'>
        >>> print(comtrans.aligned_sents()[54].alignment)
        0-0 0-1 1-0 2-2 3-4 3-5 4-7 5-8 6-3 7-9 8-9 9-10 9-11 10-12 11-6 12-6 13-13

    :param words: Words in the target language sentence
    :type words: list(str)
    :param mots: Words in the source language sentence
    :type mots: list(str)
    :param alignment: Word-level alignments between ``words`` and ``mots``.
        Each alignment is represented as a 2-tuple (words_index, mots_index).
    :type alignment: Alignment
    c         C  sU   | |  _  | |  _ | d  k r0 t g  ƒ |  _ n! t | ƒ t k sH t ‚ | |  _ d  S(   N(   t   _wordst   _motst   Nonet	   Alignmentt	   alignmentt   typet   AssertionError(   t   selft   wordst   motsR	   (    (    s1   lib/python2.7/site-packages/nltk/translate/api.pyt   __init__3   s    		c         C  s   |  j  S(   N(   R   (   R   (    (    s1   lib/python2.7/site-packages/nltk/translate/api.pyR   <   s    c         C  s   |  j  S(   N(   R   (   R   (    (    s1   lib/python2.7/site-packages/nltk/translate/api.pyR   @   s    c         C  s   |  j  S(   N(   t
   _alignment(   R   (    (    s1   lib/python2.7/site-packages/nltk/translate/api.pyt   _get_alignmentD   s    c         C  s/   t  t |  j ƒ t |  j ƒ | ƒ | |  _ d  S(   N(   t   _check_alignmentt   lenR   R   R   (   R   R	   (    (    s1   lib/python2.7/site-packages/nltk/translate/api.pyt   _set_alignmentG   s    "c         C  sT   d d j  d „  |  j Dƒ ƒ } d d j  d „  |  j Dƒ ƒ } d | | |  j f S(   u_   
        Return a string representation for this ``AlignedSent``.

        :rtype: str
        u   [%s]u   , c         s  s   |  ] } d  | Vq d S(   u   '%s'N(    (   t   .0t   w(    (    s1   lib/python2.7/site-packages/nltk/translate/api.pys	   <genexpr>S   s    c         s  s   |  ] } d  | Vq d S(   u   '%s'N(    (   R   R   (    (    s1   lib/python2.7/site-packages/nltk/translate/api.pys	   <genexpr>T   s    u   AlignedSent(%s, %s, %r)(   t   joinR   R   R   (   R   R   R   (    (    s1   lib/python2.7/site-packages/nltk/translate/api.pyt   __repr__M   s      c         C  s†  d } | d 7} x% |  j  D] } | d | | f 7} q Wx% |  j D] } | d | | f 7} qB Wx9 |  j D]. \ } } | d |  j  | |  j | f 7} qj WxG t t |  j  ƒ d ƒ D], } | d |  j  | |  j  | d f 7} q¶ WxG t t |  j ƒ d ƒ D], } | d |  j | |  j | d f 7} q W| d	 d
 j d „  |  j  Dƒ ƒ 7} | d	 d
 j d „  |  j Dƒ ƒ 7} | d 7} | S(   u<   
        Dot representation of the aligned sentence
        u   graph align {
u   node[shape=plaintext]
u   "%s_source" [label="%s"] 
u   "%s_target" [label="%s"] 
u   "%s_source" -- "%s_target" 
i   u)   "%s_source" -- "%s_source" [style=invis]
u)   "%s_target" -- "%s_target" [style=invis]
u   {rank = same; %s}
u    c         s  s   |  ] } d  | Vq d S(   u   "%s_source"N(    (   R   R   (    (    s1   lib/python2.7/site-packages/nltk/translate/api.pys	   <genexpr>y   s    c         s  s   |  ] } d  | Vq d S(   u   "%s_target"N(    (   R   R   (    (    s1   lib/python2.7/site-packages/nltk/translate/api.pys	   <genexpr>z   s    u   }(   R   R   R   t   rangeR   R   (   R   t   sR   t   ut   vt   i(    (    s1   lib/python2.7/site-packages/nltk/translate/api.pyt   _to_dotX   s(    
& 
 
$$
c         C  s•   |  j  ƒ  j d ƒ } d } y8 t j d d | g d t j d t j d t j ƒ} Wn t k
 rr t d ƒ ‚ n X| j | ƒ \ } } | j d ƒ S(	   uR   
        Ipython magic : show SVG representation of this ``AlignedSent``.
        u   utf8u   svgu   dotu   -T%st   stdint   stdoutt   stderru0   Cannot find the dot binary from Graphviz package(	   R   t   encodet
   subprocesst   Popent   PIPEt   OSErrort	   Exceptiont   communicatet   decode(   R   t
   dot_stringt   output_formatt   processt   outt   err(    (    s1   lib/python2.7/site-packages/nltk/translate/api.pyt
   _repr_svg_€   s    		c         C  sB   d j  |  j ƒ d  d } d j  |  j ƒ d  d } d | | f S(   un   
        Return a human-readable string representation for this ``AlignedSent``.

        :rtype: str
        u    i   u   ...u   <AlignedSent: '%s' -> '%s'>(   R   R   R   (   R   t   sourcet   target(    (    s1   lib/python2.7/site-packages/nltk/translate/api.pyt   __str__“   s    c         C  s   t  |  j |  j |  j j ƒ  ƒ S(   um   
        Return the aligned sentence pair, reversing the directionality

        :rtype: AlignedSent
        (   R   R   R   R   t   invert(   R   (    (    s1   lib/python2.7/site-packages/nltk/translate/api.pyR3      s    N(   t   __name__t
   __module__t   __doc__R   R   t   propertyR   R   R   R   R	   R   R   R/   R2   R3   (    (    (    s1   lib/python2.7/site-packages/nltk/translate/api.pyR      s   					(		
R   c           B  s_   e  Z d  Z d „  Z e d „  ƒ Z d „  Z d „  Z d	 d „ Z	 d „  Z
 d „  Z d „  Z RS(
   uc  
    A storage class for representing alignment between two sequences, s1, s2.
    In general, an alignment is a set of tuples of the form (i, j, ...)
    representing an alignment between the i-th element of s1 and the
    j-th element of s2.  Tuples are extensible (they might contain
    additional data, such as a boolean to indicate sure vs possible alignments).

        >>> from nltk.translate import Alignment
        >>> a = Alignment([(0, 0), (0, 1), (1, 2), (2, 2)])
        >>> a.invert()
        Alignment([(0, 0), (1, 0), (2, 1), (2, 2)])
        >>> print(a.invert())
        0-0 1-0 2-1 2-2
        >>> a[0]
        [(0, 1), (0, 0)]
        >>> a.invert()[2]
        [(2, 1), (2, 2)]
        >>> b = Alignment([(0, 0), (0, 1)])
        >>> b.issubset(a)
        True
        >>> c = Alignment.fromstring('0-0 0-1')
        >>> b == c
        True
    c         C  sP   t  j |  | ƒ } | t  g  ƒ k r: t d „  | Dƒ ƒ n d | _ d  | _ | S(   Nc         s  s   |  ] } | d  Vq d S(   i    N(    (   R   t   p(    (    s1   lib/python2.7/site-packages/nltk/translate/api.pys	   <genexpr>Ã   s    i    (   t	   frozensett   __new__t   maxt   _lenR   t   _index(   t   clst   pairsR   (    (    s1   lib/python2.7/site-packages/nltk/translate/api.pyR:   Á   s    1	c         C  s)   t  g  | j ƒ  D] } t | ƒ ^ q ƒ S(   u  
        Read a giza-formatted string and return an Alignment object.

            >>> Alignment.fromstring('0-0 2-1 9-2 21-3 10-4 7-5')
            Alignment([(0, 0), (2, 1), (7, 5), (9, 2), (10, 4), (21, 3)])

        :type s: str
        :param s: the positional alignments in giza format
        :rtype: Alignment
        :return: An Alignment object corresponding to the string representation ``s``.
        (   R   t   splitt
   _giza2pair(   R>   R   t   a(    (    s1   lib/python2.7/site-packages/nltk/translate/api.pyt
   fromstringÇ   s    c         C  s&   |  j  s |  j ƒ  n  |  j  j | ƒ S(   uN   
        Look up the alignments that map from a given index or slice.
        (   R=   t   _build_indext   __getitem__(   R   t   key(    (    s1   lib/python2.7/site-packages/nltk/translate/api.pyRE   ×   s    	c         C  s   t  d „  |  Dƒ ƒ S(   uI   
        Return an Alignment object, being the inverted mapping.
        c         s  s+   |  ]! } | d  | d f | d Vq d S(   i   i    i   N(    (   R   R8   (    (    s1   lib/python2.7/site-packages/nltk/translate/api.pys	   <genexpr>ã   s    (   R   (   R   (    (    s1   lib/python2.7/site-packages/nltk/translate/api.pyR3   ß   s    c         C  s|   t  ƒ  } |  j s |  j ƒ  n  | sC t t t |  j ƒ ƒ ƒ } n  x, | D]$ } | j d „  |  j | Dƒ ƒ qJ Wt | ƒ S(   u   
        Work out the range of the mapping from the given positions.
        If no positions are specified, compute the range of the entire mapping.
        c         s  s   |  ] \ } } | Vq d  S(   N(    (   R   t   _t   f(    (    s1   lib/python2.7/site-packages/nltk/translate/api.pys	   <genexpr>ð   s    (   t   setR=   RD   t   listR   R   t   updatet   sorted(   R   t	   positionst   imageR8   (    (    s1   lib/python2.7/site-packages/nltk/translate/api.pyR   å   s    		"c         C  s   d t  |  ƒ S(   uM   
        Produce a Giza-formatted string representing the alignment.
        u   Alignment(%r)(   RL   (   R   (    (    s1   lib/python2.7/site-packages/nltk/translate/api.pyR   ó   s    c         C  s   d j  d „  t |  ƒ Dƒ ƒ S(   uM   
        Produce a Giza-formatted string representing the alignment.
        u    c         s  s   |  ] } d  | d  Vq d S(   u   %d-%di   N(    (   R   R8   (    (    s1   lib/python2.7/site-packages/nltk/translate/api.pys	   <genexpr>ý   s    (   R   RL   (   R   (    (    s1   lib/python2.7/site-packages/nltk/translate/api.pyR2   ù   s    c         C  sV   g  t  |  j d ƒ D] } g  ^ q |  _ x& |  D] } |  j | d j | ƒ q0 Wd S(   u€   
        Build a list self._index such that self._index[i] is a list
        of the alignments originating from word i.
        i   i    N(   R   R<   R=   t   append(   R   RG   R8   (    (    s1   lib/python2.7/site-packages/nltk/translate/api.pyRD   ÿ   s    )N(   R4   R5   R6   R:   t   classmethodRC   RE   R3   R   R   R   R2   RD   (    (    (    s1   lib/python2.7/site-packages/nltk/translate/api.pyR   ¦   s   					c         C  s+   |  j  d ƒ \ } } t | ƒ t | ƒ f S(   Nu   -(   R@   t   int(   t   pair_stringR   t   j(    (    s1   lib/python2.7/site-packages/nltk/translate/api.pyRA   	  s    c         C  s.   |  j  d ƒ \ } } } t | ƒ t | ƒ f S(   Nu   -(   R@   RQ   (   RR   R   RS   R8   (    (    s1   lib/python2.7/site-packages/nltk/translate/api.pyt   _naacl2pair  s    c           sr   t  | ƒ t k s t ‚ t ‡ f d †  | Dƒ ƒ sC t d ƒ ‚ n  t ‡  f d †  | Dƒ ƒ sn t d ƒ ‚ n  d S(   ub  
    Check whether the alignments are legal.

    :param num_words: the number of source language words
    :type num_words: int
    :param num_mots: the number of target language words
    :type num_mots: int
    :param alignment: alignment to be checked
    :type alignment: Alignment
    :raise IndexError: if alignment falls outside the sentence
    c         3  s/   |  ]% } d  | d  k o$ ˆ  k  n Vq d S(   i    N(    (   R   t   pair(   t	   num_words(    s1   lib/python2.7/site-packages/nltk/translate/api.pys	   <genexpr>"  s    u&   Alignment is outside boundary of wordsc         3  s?   |  ]5 } | d  d k p6 d | d  k o4 ˆ  k  n Vq d S(   i   i    N(   R   (   R   RU   (   t   num_mots(    s1   lib/python2.7/site-packages/nltk/translate/api.pys	   <genexpr>$  s    u%   Alignment is outside boundary of motsN(   R
   R   R   t   allt
   IndexError(   RV   RW   R	   (    (   RW   RV   s1   lib/python2.7/site-packages/nltk/translate/api.pyR     s
    u   PhraseTableEntryu
   trg_phraseu   log_probt   PhraseTablec           B  s2   e  Z d  Z d „  Z d „  Z d „  Z d „  Z RS(   us   
    In-memory store of translations for a given phrase, and the log
    probability of the those translations
    c         C  s   t  ƒ  |  _ d  S(   N(   t   dictt   src_phrases(   R   (    (    s1   lib/python2.7/site-packages/nltk/translate/api.pyR   1  s    c         C  s   |  j  | S(   uÅ  
        Get the translations for a source language phrase

        :param src_phrase: Source language phrase of interest
        :type src_phrase: tuple(str)

        :return: A list of target language phrases that are translations
            of ``src_phrase``, ordered in decreasing order of
            likelihood. Each list element is a tuple of the target
            phrase and its log probability.
        :rtype: list(PhraseTableEntry)
        (   R\   (   R   t
   src_phrase(    (    s1   lib/python2.7/site-packages/nltk/translate/api.pyt   translations_for4  s    c         C  sl   t  d | d | ƒ } | |  j k r4 g  |  j | <n  |  j | j | ƒ |  j | j d d „  d t ƒ d S(   uä   
        :type src_phrase: tuple(str)
        :type trg_phrase: tuple(str)

        :param log_prob: Log probability that given ``src_phrase``,
            ``trg_phrase`` is its translation
        :type log_prob: float
        t
   trg_phraset   log_probRF   c         S  s   |  j  S(   N(   R`   (   t   e(    (    s1   lib/python2.7/site-packages/nltk/translate/api.pyt   <lambda>P  t    t   reverseN(   t   PhraseTableEntryR\   RO   t   sortt   True(   R   R]   R_   R`   t   entry(    (    s1   lib/python2.7/site-packages/nltk/translate/api.pyt   addC  s
    	c         C  s   | |  j  k S(   N(   R\   (   R   R]   (    (    s1   lib/python2.7/site-packages/nltk/translate/api.pyt   __contains__R  s    (   R4   R5   R6   R   R^   Ri   Rj   (    (    (    s1   lib/python2.7/site-packages/nltk/translate/api.pyRZ   +  s
   			(   t
   __future__R    R   R#   t   collectionsR   t   nltk.compatR   t   objectR   R9   R   RA   RT   R   Re   RZ   (    (    (    s1   lib/python2.7/site-packages/nltk/translate/api.pyt   <module>   s   “b			