B
    P?[ F                 @   s  d Z ddlmZmZ ddlmZ ddlmZmZm	Z	m
Z
mZmZmZmZmZmZmZmZmZ ddlmZmZmZmZmZmZmZmZ G dd deZG dd	 d	eeZG d
d deZG dd deZ G dd deZ!G dd deZ"G dd deZ#G dd deZ$G dd de Z%G dd de!Z&G dd deZ'e e e  e! e" gZ(e e e e gZ)e e e e gZ*e e e e gZ+e e e# gZ,G dd deZ-G dd de-Z.G d d! d!e-Z/G d"d# d#e-Z0G d$d% d%e-Z1G d&d' d'e-Z2e e e% e& e' gZ3e e e e$ gZ4e e e e$ gZ5e e e e$ gZ6G d(d) d)e-eZ7G d*d+ d+e7Z8G d,d- d-e7Z9G d.d/ d/e7Z:G d0d1 d1e7Z;d;d7d8Z<e=d9kre<  d:S )<a  
Data classes and parser implementations for *incremental* chart
parsers, which use dynamic programming to efficiently parse a text.
A "chart parser" derives parse trees for a text by iteratively adding
"edges" to a "chart".  Each "edge" represents a hypothesis about the tree
structure for a subsequence of the text.  The "chart" is a
"blackboard" for composing and combining these hypotheses.

A parser is "incremental", if it guarantees that for all i, j where i < j,
all edges ending at i are built before any edges ending at j.
This is appealing for, say, speech recognizer hypothesis filtering.

The main parser class is ``EarleyChartParser``, which is a top-down
algorithm, originally formulated by Jay Earley (1970).
    )print_functiondivision)range)ChartChartParserEdgeILeafEdgeLeafInitRuleBottomUpPredictRuleBottomUpPredictCombineRuleTopDownInitRuleSingleEdgeFundamentalRuleEmptyPredictRuleCachedTopDownPredictRule!FilteredSingleEdgeFundamentalRule"FilteredBottomUpPredictCombineRule)FeatureChartFeatureChartParserFeatureTopDownInitRuleFeatureTopDownPredictRuleFeatureEmptyPredictRuleFeatureBottomUpPredictRule!FeatureBottomUpPredictCombineRule FeatureSingleEdgeFundamentalRulec               @   sL   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dS )IncrementalChartc             C   s(   t dd |  D | _i | _i | _d S )Nc             s   s   | ]
}g V  qd S )N ).0xr   r   5lib/python3.7/site-packages/nltk/parse/earleychart.py	<genexpr>B   s    z.IncrementalChart.initialize.<locals>.<genexpr>)tuple
_positions
_edgelistsZ_edge_to_cpls_indexes)selfr   r   r   
initialize@   s    zIncrementalChart.initializec             C   s   t |  S )N)list	iteredges)r$   r   r   r   edgesK   s    zIncrementalChart.edgesc             C   s   dd | j D S )Nc             s   s   | ]}|D ]
}|V  q
qd S )Nr   )r   edgelistedger   r   r   r   O   s    z-IncrementalChart.iteredges.<locals>.<genexpr>)r"   )r$   r   r   r   r'   N   s    zIncrementalChart.iteredgesc                sr   | j | } i krt|S t  }t|}|| jkrB| | t fdd|D }t| j| | |g S )Nc             3   s   | ]} | V  qd S )Nr   )r   key)restrictionsr   r   r   `   s    z*IncrementalChart.select.<locals>.<genexpr>)r"   itersortedkeysr    r#   
_add_indexget)r$   endr,   r)   
restr_keysvalsr   )r,   r   selectQ   s    


zIncrementalChart.selectc                s   x"|D ]}t t|std| qW tdd |  D  }| j|< xTt| jD ]F\}}|| }x4|D ], t fdd|D }||g 	  qfW qPW d S )NzBad restriction: %sc             s   s   | ]
}i V  qd S )Nr   )r   r   r   r   r   r   j   s    z.IncrementalChart._add_index.<locals>.<genexpr>c             3   s   | ]}t  | V  qd S )N)getattr)r   r+   )r*   r   r   r   p   s    )
hasattrr   
ValueErrorr    r!   r#   	enumerater"   
setdefaultappend)r$   r3   r+   indexr2   r)   
this_indexr4   r   )r*   r   r0   c   s    

 
zIncrementalChart._add_indexc                sP      }xB| j D ]4\}}t fdd|D }|| |g   qW d S )Nc             3   s   | ]}t  | V  qd S )N)r6   )r   r+   )r*   r   r   r   v   s    z:IncrementalChart._register_with_indexes.<locals>.<genexpr>)r2   r#   itemsr    r:   r;   )r$   r*   r2   r3   r<   r4   r   )r*   r   _register_with_indexess   s    z'IncrementalChart._register_with_indexesc             C   s   | j |  | d S )N)r"   r2   r;   )r$   r*   r   r   r   _append_edgey   s    zIncrementalChart._append_edgec             C   s   t |  d S )N   )r   
num_leaves)r$   r   r   r   r!   |   s    zIncrementalChart._positionsN)__name__
__module____qualname__r%   r(   r'   r5   r0   r?   r@   r!   r   r   r   r   r   ?   s   r   c               @   s$   e Zd Zdd Zdd Zdd ZdS )FeatureIncrementalChartc                st   j | } i krt|S t  }t|}|jkrB| t fdd|D }tj| | |g S )Nc             3   s   | ]}  | V  qd S )N)_get_type_if_possible)r   r+   )r,   r$   r   r   r      s    z1FeatureIncrementalChart.select.<locals>.<genexpr>)r"   r-   r.   r/   r    r#   r0   r1   )r$   r2   r,   r)   r3   r4   r   )r,   r$   r   r5      s    


zFeatureIncrementalChart.selectc                s   x"|D ]}t t|std| qW tdd  D  }j|< xVtjD ]H\}}|| }x6|D ]. t fdd|D }||g 	  qfW qPW d S )NzBad restriction: %sc             s   s   | ]
}i V  qd S )Nr   )r   r   r   r   r   r      s    z5FeatureIncrementalChart._add_index.<locals>.<genexpr>c             3   s    | ]} t | V  qd S )N)rG   r6   )r   r+   )r*   r$   r   r   r      s   )
r7   r   r8   r    r!   r#   r9   r"   r:   r;   )r$   r3   r+   r<   r2   r)   r=   r4   r   )r*   r$   r   r0      s    

 

z"FeatureIncrementalChart._add_indexc                sR      }xDj D ]6\}}t fdd|D }|| |g   qW d S )Nc             3   s    | ]} t | V  qd S )N)rG   r6   )r   r+   )r*   r$   r   r   r      s    zAFeatureIncrementalChart._register_with_indexes.<locals>.<genexpr>)r2   r#   r>   r    r:   r;   )r$   r*   r2   r3   r<   r4   r   )r*   r$   r   r?      s
    z.FeatureIncrementalChart._register_with_indexesN)rC   rD   rE   r5   r0   r?   r   r   r   r   rF      s   rF   c               @   s   e Zd Zdd ZdS )CompleteFundamentalRulec             c   sN   |  }x@|j||d| dD ]&}||  }||||r |V  q W d S )NT)startr2   is_completelhs)r2   r5   nextsymZmove_dot_forwardZinsert_with_backpointer)r$   chartgrammar	left_edger2   
right_edgenew_edger   r   r   _apply_incomplete   s    z)CompleteFundamentalRule._apply_incompleteN)rC   rD   rE   rR   r   r   r   r   rH      s   rH   c               @   s   e Zd Ze Zdd ZdS )CompleterRulec             c   s.   t |ts*x| j|||D ]
}|V  qW d S )N)
isinstancer   _fundamental_ruleapply)r$   rM   rN   r*   rQ   r   r   r   rV      s    
zCompleterRule.applyN)rC   rD   rE   rH   rU   rV   r   r   r   r   rS      s   rS   c               @   s   e Zd Ze Zdd ZdS )ScannerRulec             c   s.   t |tr*x| j|||D ]
}|V  qW d S )N)rT   r   rU   rV   )r$   rM   rN   r*   rQ   r   r   r   rV      s    
zScannerRule.applyN)rC   rD   rE   rH   rU   rV   r   r   r   r   rW      s   rW   c               @   s   e Zd ZdS )PredictorRuleN)rC   rD   rE   r   r   r   r   rX      s   rX   c               @   s   e Zd Zdd ZdS )FilteredCompleteFundamentalRulec             c   s*   |  r&x| |||D ]
}|V  qW d S )N)rJ   Z_apply_complete)r$   rM   rN   r*   rQ   r   r   r   rV      s    z%FilteredCompleteFundamentalRule.applyN)rC   rD   rE   rV   r   r   r   r   rY      s   rY   c               @   s   e Zd Zdd ZdS )FeatureCompleteFundamentalRulec             c   sR   | j }| }x>|j||d| dD ]$}x|||||D ]
}|V  q<W q&W d S )NT)rI   r2   rJ   rK   )rU   r2   r5   rL   rV   )r$   rM   rN   rO   frr2   rP   rQ   r   r   r   rR      s    z0FeatureCompleteFundamentalRule._apply_incompleteN)rC   rD   rE   rR   r   r   r   r   rZ      s   rZ   c               @   s   e Zd Ze ZdS )FeatureCompleterRuleN)rC   rD   rE   rZ   rU   r   r   r   r   r\      s   r\   c               @   s   e Zd Ze ZdS )FeatureScannerRuleN)rC   rD   rE   rZ   rU   r   r   r   r   r]      s   r]   c               @   s   e Zd ZdS )FeaturePredictorRuleN)rC   rD   rE   r   r   r   r   r^      s   r^   c               @   s,   e Zd ZdZeddefddZd	ddZdS )
IncrementalChartParsera  
    An *incremental* chart parser implementing Jay Earley's
    parsing algorithm:

    | For each index end in [0, 1, ..., N]:
    |   For each edge such that edge.end = end:
    |     If edge is incomplete and edge.next is not a part of speech:
    |       Apply PredictorRule to edge
    |     If edge is incomplete and edge.next is a part of speech:
    |       Apply ScannerRule to edge
    |     If edge is complete:
    |       Apply CompleterRule to edge
    | Return any complete parses in the chart
    r   2   c             C   sn   || _ || _|| _|| _g | _g | _xD|D ]<}|jdkrF| j| q*|jdkr^| j| q*tdq*W dS )a  
        Create a new Earley chart parser, that uses ``grammar`` to
        parse texts.

        :type grammar: CFG
        :param grammar: The grammar used to parse texts.
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            and higher numbers will produce more verbose tracing
            output.
        :type trace_chart_width: int
        :param trace_chart_width: The default total width reserved for
            the chart in trace output.  The remainder of each line will
            be used to display edges.
        :param chart_class: The class that should be used to create
            the charts used by this parser.
        r   rA   z9Incremental inference rules must have NUM_EDGES == 0 or 1N)	_grammar_trace_trace_chart_width_chart_class_axioms_inference_rulesZ	NUM_EDGESr;   r8   )r$   rN   strategytracetrace_chart_widthchart_classruler   r   r   __init__5  s    


zIncrementalChartParser.__init__Nc             C   s@  |d kr| j }| j}t|}| j| | |}| j}| j| d  }|r\t|	| x.| j
D ]$}t|||}|||||| qdW | j}	xt| d D ]}
|dkrtd|
d t|j|
d}xj|r6| }xV|	D ]N}t||||}|||||| x&|D ]}| |
kr|| qW qW qW qW |S )NrA   z
* Processing queue:
)r2   )rb   Z_trace_new_edgesr&   ra   Zcheck_coveragerd   rc   rB   printZpretty_format_leavesre   rV   rf   r   r5   popr2   r;   )r$   tokensrh   Ztrace_new_edgesrM   rN   Ztrace_edge_widthZaxiomZ	new_edgesZinference_rulesr2   Zagendar*   rk   rQ   r   r   r   chart_parse`  s6    


z"IncrementalChartParser.chart_parse)N)rC   rD   rE   __doc__BU_LC_INCREMENTAL_STRATEGYr   rl   rq   r   r   r   r   r_   %  s   %r_   c               @   s   e Zd Zdd ZdS )EarleyChartParserc             K   s   t j| |tf| d S )N)r_   rl   EARLEY_STRATEGY)r$   rN   parser_argsr   r   r   rl     s    zEarleyChartParser.__init__N)rC   rD   rE   rl   r   r   r   r   rt     s   rt   c               @   s   e Zd Zdd ZdS )IncrementalTopDownChartParserc             K   s   t j| |tf| d S )N)r_   rl   TD_INCREMENTAL_STRATEGY)r$   rN   rv   r   r   r   rl     s    z&IncrementalTopDownChartParser.__init__N)rC   rD   rE   rl   r   r   r   r   rw     s   rw   c               @   s   e Zd Zdd ZdS )IncrementalBottomUpChartParserc             K   s   t j| |tf| d S )N)r_   rl   BU_INCREMENTAL_STRATEGY)r$   rN   rv   r   r   r   rl     s    z'IncrementalBottomUpChartParser.__init__N)rC   rD   rE   rl   r   r   r   r   ry     s   ry   c               @   s   e Zd Zdd ZdS )(IncrementalBottomUpLeftCornerChartParserc             K   s   t j| |tf| d S )N)r_   rl   rs   )r$   rN   rv   r   r   r   rl     s    z1IncrementalBottomUpLeftCornerChartParser.__init__N)rC   rD   rE   rl   r   r   r   r   r{     s   r{   c               @   s   e Zd Zdd ZdS ) IncrementalLeftCornerChartParserc             K   s&   |  stdtj| |tf| d S )NzNIncrementalLeftCornerParser only works for grammars without empty productions.)Zis_nonemptyr8   r_   rl   LC_INCREMENTAL_STRATEGY)r$   rN   rv   r   r   r   rl     s
    z)IncrementalLeftCornerChartParser.__init__N)rC   rD   rE   rl   r   r   r   r   r|     s   r|   c               @   s   e Zd ZedefddZdS )FeatureIncrementalChartParser   c             K   s    t j| |f|||d| d S )N)rg   ri   rj   )r_   rl   )r$   rN   rg   ri   rj   rv   r   r   r   rl     s    z&FeatureIncrementalChartParser.__init__N)rC   rD   rE   "BU_LC_INCREMENTAL_FEATURE_STRATEGYrF   rl   r   r   r   r   r~     s   r~   c               @   s   e Zd Zdd ZdS )FeatureEarleyChartParserc             K   s   t j| |tf| d S )N)r~   rl   EARLEY_FEATURE_STRATEGY)r$   rN   rv   r   r   r   rl     s    z!FeatureEarleyChartParser.__init__N)rC   rD   rE   rl   r   r   r   r   r     s   r   c               @   s   e Zd Zdd ZdS )$FeatureIncrementalTopDownChartParserc             K   s   t j| |tf| d S )N)r~   rl   TD_INCREMENTAL_FEATURE_STRATEGY)r$   rN   rv   r   r   r   rl     s    z-FeatureIncrementalTopDownChartParser.__init__N)rC   rD   rE   rl   r   r   r   r   r     s   r   c               @   s   e Zd Zdd ZdS )%FeatureIncrementalBottomUpChartParserc             K   s   t j| |tf| d S )N)r~   rl   BU_INCREMENTAL_FEATURE_STRATEGY)r$   rN   rv   r   r   r   rl     s    z.FeatureIncrementalBottomUpChartParser.__init__N)rC   rD   rE   rl   r   r   r   r   r     s   r   c               @   s   e Zd Zdd ZdS )/FeatureIncrementalBottomUpLeftCornerChartParserc             K   s   t j| |tf| d S )N)r~   rl   r   )r$   rN   rv   r   r   r   rl     s    z8FeatureIncrementalBottomUpLeftCornerChartParser.__init__N)rC   rD   rE   rl   r   r   r   r   r     s   r   TF   $I saw John with a dog with my cookie   c             C   s   ddl }ddl}ddlm} | }	|r6td t|	 td t| | }
t|
 t  t|	|d}| }||
}t	|
|	 }| | }|rt||kstd|rx$|D ]}t| qW ntdt| | rtd	| dS )
z0
    A demonstration of the Earley parsers.
    r   N)demo_grammarz	* Grammarz* Sentence:)rh   zNot all parses foundz	Nr trees:zTime:)systimenltk.parse.chartr   rn   splitrt   Zclockrq   r&   parsesrI   lenAssertionError)Zprint_timesZprint_grammarZprint_treesrh   ZsentZ	numparsesr   r   r   rN   rp   ZearleytrM   r   Ztreer   r   r   demo  s0    

r   __main__N)TFTr   r   r   )>rr   Z
__future__r   r   Z	six.movesr   r   r   r   r   r   r	   r
   r   r   r   r   r   r   r   Znltk.parse.featurechartr   r   r   r   r   r   r   r   r   rF   rH   rS   rW   rX   rY   rZ   r\   r]   r^   ru   rx   rz   rs   r}   r_   rt   rw   ry   r{   r|   r   r   r   r   r~   r   r   r   r   r   rC   r   r   r   r   <module>   s   <(A6			_     
(
