B
    P?[*!                 @   s   d Z ddlmZ ddlmZmZmZ ddlmZ ddl	m
Z
mZ ddlmZ ddlmZmZ dd	d
Zdd Zdd ZG dd deZdddZde_dS )z 
Utility functions for parsers.
    )print_function)CFGFeatureGrammarPCFG)load)ChartChartParser)InsideChartParser)FeatureChartFeatureChartParserNc             K   s   t | f|}t|tstdt|trB|dkr4t}||||dS t|trr|dkrXt}|dkrdt}||||dS |dkr~t	}|dkrt
}||||dS dS )a  
    Load a grammar from a file, and build a parser based on that grammar.
    The parser depends on the grammar format, and might also depend
    on properties of the grammar itself.

    The following grammar formats are currently supported:
      - ``'cfg'``  (CFGs: ``CFG``)
      - ``'pcfg'`` (probabilistic CFGs: ``PCFG``)
      - ``'fcfg'`` (feature-based CFGs: ``FeatureGrammar``)

    :type grammar_url: str
    :param grammar_url: A URL specifying where the grammar is located.
        The default protocol is ``"nltk:"``, which searches for the file
        in the the NLTK data package.
    :type trace: int
    :param trace: The level of tracing that should be used when
        parsing a text.  ``0`` will generate no tracing output;
        and higher numbers will produce more verbose tracing output.
    :param parser: The class used for parsing; should be ``ChartParser``
        or a subclass.
        If None, the class depends on the grammar format.
    :param chart_class: The class used for storing the chart;
        should be ``Chart`` or a subclass.
        Only used for CFGs and feature CFGs.
        If None, the chart class depends on the grammar format.
    :type beam_size: int
    :param beam_size: The maximum length for the parser's edge queue.
        Only used for probabilistic CFGs.
    :param load_args: Keyword parameters used when loading the grammar.
        See ``data.load`` for more information.
    z1The grammar must be a CFG, or a subclass thereof.N)trace	beam_size)r   chart_class)r   
isinstancer   
ValueErrorr   r	   r   r   r
   r   r   )Zgrammar_urlr   parserr   r   Z	load_argsgrammar r   .lib/python3.7/site-packages/nltk/parse/util.pyload_parser   s$    "


r   c             c   sR   xLt | ddD ]<\}\}}t||d||dddddg
}d|d }|V  qW dS )	a  
    A module to convert a single POS tagged sentence into CONLL format.

    >>> from nltk import word_tokenize, pos_tag
    >>> text = "This is a foobar sentence."
    >>> for line in taggedsent_to_conll(pos_tag(word_tokenize(text))):
    ... 	print(line, end="")
        1	This	_	DT	DT	_	0	a	_	_
        2	is	_	VBZ	VBZ	_	0	a	_	_
        3	a	_	DT	DT	_	0	a	_	_
        4	foobar	_	JJ	JJ	_	0	a	_	_
        5	sentence	_	NN	NN	_	0	a	_	_
        6	.		_	.	.	_	0	a	_	_

    :param sentence: A single input sentence to parse
    :type sentence: list(tuple(str, str))
    :rtype: iter(str)
    :return: a generator yielding a single sentence in CONLL format.
       )start_0a	
N)	enumeratestrjoin)sentenceiZwordtag	input_strr   r   r   taggedsent_to_conllP   s    r$   c             c   s0   x*| D ]"}xt |D ]
}|V  qW dV  qW dS )a5  
    A module to convert the a POS tagged document stream
    (i.e. list of list of tuples, a list of sentences) and yield lines
    in CONLL format. This module yields one line per word and two newlines
    for end of sentence.

    >>> from nltk import word_tokenize, sent_tokenize, pos_tag
    >>> text = "This is a foobar sentence. Is that right?"
    >>> sentences = [pos_tag(word_tokenize(sent)) for sent in sent_tokenize(text)]
    >>> for line in taggedsents_to_conll(sentences):
    ...     if line:
    ...         print(line, end="")
    1	This	_	DT	DT	_	0	a	_	_
    2	is	_	VBZ	VBZ	_	0	a	_	_
    3	a	_	DT	DT	_	0	a	_	_
    4	foobar	_	JJ	JJ	_	0	a	_	_
    5	sentence	_	NN	NN	_	0	a	_	_
    6	.		_	.	.	_	0	a	_	_
    <BLANKLINE>
    <BLANKLINE>
    1	Is	_	VBZ	VBZ	_	0	a	_	_
    2	that	_	IN	IN	_	0	a	_	_
    3	right	_	NN	NN	_	0	a	_	_
    4	?	_	.	.	_	0	a	_	_
    <BLANKLINE>
    <BLANKLINE>

    :param sentences: Input sentences to parse
    :type sentence: list(list(tuple(str, str)))
    :rtype: iter(str)
    :return: a generator yielding sentences in CONLL format.
    z

N)r$   )	sentencesr    r#   r   r   r   taggedsents_to_conllj   s    !

r&   c               @   s$   e Zd ZdZdddZd	ddZdS )
TestGrammarz
    Unit tests for  CFG.
    Nc             C   s*   || _ t|dd| _|| _|| _|| _d S )Nr   )r   )Ztest_grammarr   cpsuiteZ_acceptZ_reject)selfr   r)   acceptrejectr   r   r   __init__   s
    zTestGrammar.__init__Fc       
      C   s   x| j D ]}t|d d dd xdD ]}x|| D ]}| }t| j|}|r||r|t  t| x|D ]}t| qlW |dkr|g krtd| qd}q4|rtd	| q4d}	q4W q&W |r|	rtd
 qW dS )a|  
        Sentences in the test suite are divided into two classes:
         - grammatical (``accept``) and
         - ungrammatical (``reject``).
        If a sentence should parse accordng to the grammar, the value of
        ``trees`` will be a non-empty list. If a sentence should be rejected
        according to the grammar, then the value of ``trees`` will be None.
        doc: )end)r+   r,   r+   zSentence '%s' failed to parse'TzSentence '%s' received a parse'zAll tests passed!N)r)   printsplitlistr(   parser   )
r*   Z
show_treesZtestkeyZsenttokensZtreesZtreeZacceptedZrejectedr   r   r   run   s(    	

zTestGrammar.run)NN)F)__name__
__module____qualname____doc__r-   r8   r   r   r   r   r'      s   
r'   #%;c             C   s   |dk	r|  |} g }x| dD ]}|dks"|d |kr<q"|dd}d}t|dkr|d dkrz|d d	k}|d }nt|d }|d }| }|g krq"|||fg7 }q"W |S )
a  
    Parses a string with one test sentence per line.
    Lines can optionally begin with:
      - a bool, saying if the sentence is grammatical or not, or
      - an int, giving the number of parse trees is should have,
    The result information is followed by a colon, and then the sentence.
    Empty lines and lines beginning with a comment char are ignored.

    :return: a list of tuple of sentences and expected results,
        where a sentence is a list of str,
        and a result is None, or bool, or int

    :param comment_chars: ``str`` of possible comment characters.
    :param encoding: the encoding of the string, if it is binary
    Nr    r   r/   r      )TruetrueFalseZfalse)r@   rA   )decoder3   lenint)stringZcomment_charsencodingr%   r    Z
split_inforesultr7   r   r   r   extract_test_sentences   s&    

rI   F)r   NNr   )r=   N)r<   Z
__future__r   Znltk.grammarr   r   r   Z	nltk.datar   Znltk.parse.chartr   r   Znltk.parse.pchartr	   Znltk.parse.featurechartr
   r   r   r$   r&   objectr'   rI   Z__test__r   r   r   r   <module>   s   
8,/
'