B
    >?[*                 @   s   d dl mZmZ d dlZd dlmZ d dlmZ eG dd deZ	eG dd de
Zd	d
 Zdd Zdd ZedddgZG dd deZdS )    )print_functionunicode_literalsN)
namedtuple)python_2_unicode_compatiblec               @   st   e Zd ZdZdddZedd Zedd Zd	d
 Zdd Z	eee	Z
dd Zdd Zdd Zdd Zdd ZdS )AlignedSenta#  
    Return an aligned sentence object, which encapsulates two sentences
    along with an ``Alignment`` between them.

    Typically used in machine translation to represent a sentence and
    its translation.

        >>> from nltk.translate import AlignedSent, Alignment
        >>> algnsent = AlignedSent(['klein', 'ist', 'das', 'Haus'],
        ...     ['the', 'house', 'is', 'small'], Alignment.fromstring('0-3 1-2 2-0 3-1'))
        >>> algnsent.words
        ['klein', 'ist', 'das', 'Haus']
        >>> algnsent.mots
        ['the', 'house', 'is', 'small']
        >>> algnsent.alignment
        Alignment([(0, 3), (1, 2), (2, 0), (3, 1)])
        >>> from nltk.corpus import comtrans
        >>> print(comtrans.aligned_sents()[54])
        <AlignedSent: 'Weshalb also sollten...' -> 'So why should EU arm...'>
        >>> print(comtrans.aligned_sents()[54].alignment)
        0-0 0-1 1-0 2-2 3-4 3-5 4-7 5-8 6-3 7-9 8-9 9-10 9-11 10-12 11-6 12-6 13-13

    :param words: Words in the target language sentence
    :type words: list(str)
    :param mots: Words in the source language sentence
    :type mots: list(str)
    :param alignment: Word-level alignments between ``words`` and ``mots``.
        Each alignment is represented as a 2-tuple (words_index, mots_index).
    :type alignment: Alignment
    Nc             C   s:   || _ || _|d kr tg | _nt|tks0t|| _d S )N)_words_mots	Alignment	alignmenttypeAssertionError)selfwordsmotsr
    r   1lib/python3.7/site-packages/nltk/translate/api.py__init__3   s    zAlignedSent.__init__c             C   s   | j S )N)r   )r   r   r   r   r   <   s    zAlignedSent.wordsc             C   s   | j S )N)r   )r   r   r   r   r   @   s    zAlignedSent.motsc             C   s   | j S )N)
_alignment)r   r   r   r   _get_alignmentD   s    zAlignedSent._get_alignmentc             C   s"   t t| jt| j| || _d S )N)_check_alignmentlenr   r   r   )r   r
   r   r   r   _set_alignmentG   s    zAlignedSent._set_alignmentc             C   sD   dd dd | jD  }dd dd | jD  }d||| jf S )z_
        Return a string representation for this ``AlignedSent``.

        :rtype: str
        z[%s]z, c             s   s   | ]}d | V  qdS )z'%s'Nr   ).0wr   r   r   	<genexpr>S   s    z'AlignedSent.__repr__.<locals>.<genexpr>c             s   s   | ]}d | V  qdS )z'%s'Nr   )r   r   r   r   r   r   T   s    zAlignedSent(%s, %s, %r))joinr   r   r   )r   r   r   r   r   r   __repr__M   s    zAlignedSent.__repr__c             C   s<  d}|d7 }x| j D ]}|d||f 7 }qW x| jD ]}|d||f 7 }q4W x.| jD ]$\}}|d| j | | j| f 7 }qTW x:tt| j d D ]$}|d| j | | j |d  f 7 }qW x:tt| jd D ]$}|d| j| | j|d  f 7 }qW |d	d
dd | j D  7 }|d	d
dd | jD  7 }|d7 }|S )z<
        Dot representation of the aligned sentence
        zgraph align {
znode[shape=plaintext]
z"%s_source" [label="%s"] 
z"%s_target" [label="%s"] 
z"%s_source" -- "%s_target" 
   z)"%s_source" -- "%s_source" [style=invis]
z)"%s_target" -- "%s_target" [style=invis]
z{rank = same; %s}
 c             s   s   | ]}d | V  qdS )z"%s_source"Nr   )r   r   r   r   r   r   y   s    z&AlignedSent._to_dot.<locals>.<genexpr>c             s   s   | ]}d | V  qdS )z"%s_target"Nr   )r   r   r   r   r   r   z   s    })r   r   r   ranger   r   )r   sr   uvir   r   r   _to_dotX   s(     zAlignedSent._to_dotc             C   sl   |   d}d}y$tjdd| gtjtjtjd}W n tk
rR   tdY nX ||\}}|dS )zR
        Ipython magic : show SVG representation of this ``AlignedSent``.
        utf8Zsvgdotz-T%s)stdinstdoutstderrz0Cannot find the dot binary from Graphviz package)	r%   encode
subprocessPopenPIPEOSError	ExceptionZcommunicatedecode)r   Z
dot_stringZoutput_formatZprocessouterrr   r   r   
_repr_svg_   s    
zAlignedSent._repr_svg_c             C   s<   d | jdd d }d | jdd d }d||f S )zn
        Return a human-readable string representation for this ``AlignedSent``.

        :rtype: str
        r   N   z...z<AlignedSent: '%s' -> '%s'>)r   r   r   )r   sourcetargetr   r   r   __str__   s    zAlignedSent.__str__c             C   s   t | j| j| j S )zm
        Return the aligned sentence pair, reversing the directionality

        :rtype: AlignedSent
        )r   r   r   r   invert)r   r   r   r   r9      s    zAlignedSent.invert)N)__name__
__module____qualname____doc__r   propertyr   r   r   r   r
   r   r%   r4   r8   r9   r   r   r   r   r      s   
	
(
r   c               @   sV   e Zd ZdZdd Zedd Zdd Zdd	 ZdddZ	dd Z
dd Zdd Zd
S )r	   ac  
    A storage class for representing alignment between two sequences, s1, s2.
    In general, an alignment is a set of tuples of the form (i, j, ...)
    representing an alignment between the i-th element of s1 and the
    j-th element of s2.  Tuples are extensible (they might contain
    additional data, such as a boolean to indicate sure vs possible alignments).

        >>> from nltk.translate import Alignment
        >>> a = Alignment([(0, 0), (0, 1), (1, 2), (2, 2)])
        >>> a.invert()
        Alignment([(0, 0), (1, 0), (2, 1), (2, 2)])
        >>> print(a.invert())
        0-0 1-0 2-1 2-2
        >>> a[0]
        [(0, 1), (0, 0)]
        >>> a.invert()[2]
        [(2, 1), (2, 2)]
        >>> b = Alignment([(0, 0), (0, 1)])
        >>> b.issubset(a)
        True
        >>> c = Alignment.fromstring('0-0 0-1')
        >>> b == c
        True
    c             C   s:   t | |}|t g kr*tdd |D nd|_d |_|S )Nc             s   s   | ]}|d  V  qdS )r   Nr   )r   pr   r   r   r      s    z$Alignment.__new__.<locals>.<genexpr>r   )	frozenset__new__max_len_index)clsZpairsr   r   r   r   rA      s    $zAlignment.__new__c             C   s   t dd | D S )a  
        Read a giza-formatted string and return an Alignment object.

            >>> Alignment.fromstring('0-0 2-1 9-2 21-3 10-4 7-5')
            Alignment([(0, 0), (2, 1), (7, 5), (9, 2), (10, 4), (21, 3)])

        :type s: str
        :param s: the positional alignments in giza format
        :rtype: Alignment
        :return: An Alignment object corresponding to the string representation ``s``.
        c             S   s   g | ]}t |qS r   )
_giza2pair)r   ar   r   r   
<listcomp>   s    z(Alignment.fromstring.<locals>.<listcomp>)r	   split)rE   r!   r   r   r   
fromstring   s    zAlignment.fromstringc             C   s   | j s|   | j |S )zN
        Look up the alignments that map from a given index or slice.
        )rD   _build_index__getitem__)r   keyr   r   r   rL      s    zAlignment.__getitem__c             C   s   t dd | D S )zI
        Return an Alignment object, being the inverted mapping.
        c             s   s*   | ]"}|d  |d f|dd  V  qdS )r   r      Nr   )r   r?   r   r   r   r      s    z#Alignment.invert.<locals>.<genexpr>)r	   )r   r   r   r   r9      s    zAlignment.invertNc             C   sZ   t  }| js|   |s*ttt| j}x&|D ]}|dd | j| D  q0W t|S )z
        Work out the range of the mapping from the given positions.
        If no positions are specified, compute the range of the entire mapping.
        c             s   s   | ]\}}|V  qd S )Nr   )r   _fr   r   r   r      s    z"Alignment.range.<locals>.<genexpr>)setrD   rK   listr    r   updatesorted)r   Z	positionsZimager?   r   r   r   r       s    
zAlignment.rangec             C   s   dt |  S )zM
        Produce a Giza-formatted string representing the alignment.
        zAlignment(%r))rT   )r   r   r   r   r      s    zAlignment.__repr__c             C   s   d dd t| D S )zM
        Produce a Giza-formatted string representing the alignment.
        r   c             s   s   | ]}d |dd  V  qdS )z%d-%dNrN   r   )r   r?   r   r   r   r      s    z$Alignment.__str__.<locals>.<genexpr>)r   rT   )r   r   r   r   r8      s    zAlignment.__str__c             C   s@   dd t | jd D | _x | D ]}| j|d  | q W dS )z
        Build a list self._index such that self._index[i] is a list
        of the alignments originating from word i.
        c             S   s   g | ]}g qS r   r   )r   rO   r   r   r   rH     s    z*Alignment._build_index.<locals>.<listcomp>r   r   N)r    rC   rD   append)r   r?   r   r   r   rK      s    
zAlignment._build_index)N)r:   r;   r<   r=   rA   classmethodrJ   rL   r9   r    r   r8   rK   r   r   r   r   r	      s   
r	   c             C   s   |  d\}}t|t|fS )N-)rI   int)pair_stringr$   jr   r   r   rF   	  s    rF   c             C   s    |  d\}}}t|t|fS )NrW   )rI   rX   )rY   r$   rZ   r?   r   r   r   _naacl2pair  s    r[   c                sP   t |tksttfdd|D s.tdt fdd|D sLtddS )ab  
    Check whether the alignments are legal.

    :param num_words: the number of source language words
    :type num_words: int
    :param num_mots: the number of target language words
    :type num_mots: int
    :param alignment: alignment to be checked
    :type alignment: Alignment
    :raise IndexError: if alignment falls outside the sentence
    c             3   s*   | ]"}d |d    ko k n  V  qdS )r   Nr   )r   pair)	num_wordsr   r   r   "  s    z#_check_alignment.<locals>.<genexpr>z&Alignment is outside boundary of wordsc             3   s6   | ].}|d  dkp,d|d    ko( k n  V  qdS )r   Nr   r   )r   r\   )num_motsr   r   r   $  s    z%Alignment is outside boundary of motsN)r   r	   r   all
IndexError)r]   r^   r
   r   )r^   r]   r   r     s
    r   PhraseTableEntry
trg_phraselog_probc               @   s0   e Zd ZdZdd Zdd Zdd Zdd	 Zd
S )PhraseTablezs
    In-memory store of translations for a given phrase, and the log
    probability of the those translations
    c             C   s   t  | _d S )N)dictsrc_phrases)r   r   r   r   r   1  s    zPhraseTable.__init__c             C   s
   | j | S )a  
        Get the translations for a source language phrase

        :param src_phrase: Source language phrase of interest
        :type src_phrase: tuple(str)

        :return: A list of target language phrases that are translations
            of ``src_phrase``, ordered in decreasing order of
            likelihood. Each list element is a tuple of the target
            phrase and its log probability.
        :rtype: list(PhraseTableEntry)
        )rf   )r   
src_phraser   r   r   translations_for4  s    zPhraseTable.translations_forc             C   sL   t ||d}|| jkr g | j|< | j| | | j| jdd dd dS )z
        :type src_phrase: tuple(str)
        :type trg_phrase: tuple(str)

        :param log_prob: Log probability that given ``src_phrase``,
            ``trg_phrase`` is its translation
        :type log_prob: float
        )rb   rc   c             S   s   | j S )N)rc   )er   r   r   <lambda>P  s    z!PhraseTable.add.<locals>.<lambda>T)rM   reverseN)ra   rf   rU   sort)r   rg   rb   rc   entryr   r   r   addC  s
    	

zPhraseTable.addc             C   s
   || j kS )N)rf   )r   rg   r   r   r   __contains__R  s    zPhraseTable.__contains__N)r:   r;   r<   r=   r   rh   rn   ro   r   r   r   r   rd   +  s
   rd   )Z
__future__r   r   r,   collectionsr   Znltk.compatr   objectr   r@   r	   rF   r[   r   ra   rd   r   r   r   r   <module>   s    b