B
    >?[                 @   s   d Z ddlmZ ddlZddlZddlZddlZddlmZm	Z	m
Z
mZmZmZmZmZmZmZ ddlmZmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZ ddl m!Z!m"Z" ddl#m$Z$ G dd de%Z&dd Z'e(dkre'  dgZ)dS )zl
A graphical tool for exploring the regular expression based chunk
parser ``nltk.chunk.RegexpChunkParser``.
    )divisionN)
ButtonCanvasCheckbuttonFrameIntVarLabelMenu	ScrollbarTextTk)askopenfilenameasksaveasfilename)Font)Tree)in_idle)ShowText)	conll2000treebank_chunk)
ChunkScoreRegexpChunkParser)RegexpChunkRulec            .   @   s  e Zd ZdZdddddddd	d
dddddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/-Zd0d1d2d3gZd4ed5d6fd7ed8d6fd9ed:d;fd<ed=d>fd?ed=d>fd@edAdAdBfdCedDdEdBfdFedGd6fdHedId6fdJedKd6fg
ZdLZdMZ	dNZ
dOZdPZedQdRdSdSdLdTdUdVdWZedMdMdSdSdXdLdTdUdVdY	ZedZd[d\d\dLdTdUdVd]d^	Zed_dTdUd`ZedadbdcZedddUdUdedfZed\d\dLdTdUdgdhdiZedddddddjZdkZdSZedSd;Zedkd;ZdlZdmdn ZddsdtZdudv Zdwdx Zdydz Zd{d| Zd}Z d~Z!dd Z"d~Z#dd Z$dd Z%dd Z&d~Z'dd Z(dd Z)dd Z*dd Z+dd Z,dd Z-dd Z.dd Z/dd Z0dddZ1dd Z2dd Z3dd Z4dd Z5dddZ6dd Z7dd Z8dd Z9dZ:dddZ;dddZ<dddZ=dd Z>dddZ?dddZ@dd ZAdpS )RegexpChunkAppz
    A graphical tool for exploring the regular expression based chunk
    parser ``nltk.chunk.RegexpChunkParser``.

    See ``HELP`` for instructional text.
    zCoordinating conjunctionzPossessive pronounzCardinal numberZAdverbZ
DeterminerzAdverb, comparativezExistential therezAdverb, superlativezForeign wordZParticleZ	AdjectivetozAdjective, comparativeZInterjectionzAdjective, superlativezVerb, base formzList item markerzVerb, past tenseZModalzNoun, pluralzNoun, singular or maspszVerb, past participlezVerb,3rd ps. sing. presentzProper noun, singularzProper noun pluralzwh-determinerZPredeterminerz
wh-pronounzPossessive endingzPossessive wh-pronounzPersonal pronounz	wh-adverbzopen parenthesiszclose parenthesisz
open quoteZcommazclose quoteZperiodzpound sign (currency marker)zdollar sign (currency marker)zPreposition/subord. conjunctionz#Symbol (mathematical or scientific)zVerb, gerund/present participlezVerb, non-3rd ps. sing. presentZcolon)-ZCCzPRP$ZCDZRBZDTZRBRZEXZRBSZFWZRPZJJZTOZJJRZUHZJJSZVBZLSZVBDZMDZNNSZNNZVBNZVBZZNNPZNNPSZWDTZPDTZWPZPOSzWP$ZPRPZWRB()z``,z''.#$INZSYMZVBGZVBP:)HelpZ20a-  Welcome to the regular expression chunk-parser grammar editor.  You can use this editor to develop and test chunk parser grammars based on NLTK's RegexpChunkParser class.

Use this box ('Help') to learn more about the editor; click on the tabs for help on specific topics:<indent>
Rules: grammar rule types
Regexps: regular expression syntax
Tags: part of speech tags
</indent>
Use the upper-left box ('Grammar') to edit your grammar.  Each line of your grammar specifies a single 'rule', which performs an action such as creating a chunk or merging two chunks.

The lower-left box ('Development Set') runs your grammar on the development set, and displays the results.  Your grammar's chunks are <highlight>highlighted</highlight>, and the correct (gold standard) chunks are <underline>underlined</underline>.  If they match, they are displayed in <green>green</green>; otherwise, they are displayed in <red>red</red>.  The box displays a single sentence from the development set at a time; use the scrollbar or the next/previous buttons view additional sentences.

The lower-right box ('Evaluation') tracks the performance of your grammar on the development set.  The 'precision' axis indicates how many of your grammar's chunks are correct; and the 'recall' axis indicates how many of the gold standard chunks your system generated.  Typically, you should try to design a grammar that scores high on both metrics.  The exact precision and recall of the current grammar, as well as their harmonic mean (the 'f-score'), are displayed in the status bar at the bottom of the window.)ZRulesZ10a  <h1>{...regexp...}</h1><indent>
Chunk rule: creates new chunks from words matching regexp.</indent>

<h1>}...regexp...{</h1><indent>
Chink rule: removes words matching regexp from existing chunks.</indent>

<h1>...regexp1...}{...regexp2...</h1><indent>
Split rule: splits chunks that match regexp1 followed by regexp2 in two.</indent>

<h1>...regexp...{}...regexp...</h1><indent>
Merge rule: joins consecutive chunks that match regexp1 and regexp2</indent>
)ZRegexpsz10 60aX  <h1>Pattern		Matches...</h1>
<hangindent>	<<var>T</var>>	a word with tag <var>T</var> (where <var>T</var> may be a regexp).
	<var>x</var>?	an optional <var>x</var>
	<var>x</var>+	a sequence of 1 or more <var>x</var>'s
	<var>x</var>*	a sequence of 0 or more <var>x</var>'s
	<var>x</var>|<var>y</var>	<var>x</var> or <var>y</var>
	.	matches any character
	(<var>x</var>)	Treats <var>x</var> as a group
	# <var>x...</var>	Treats <var>x...</var> (to the end of the line) as a comment
	\<var>C</var>	matches character <var>C</var> (useful when <var>C</var> is a special character like + or #)
</hangindent>
<h1>Examples:</h1>
<hangindent>	<regexp><NN></regexp>
		Matches <match>"cow/NN"</match>
		Matches <match>"green/NN"</match>
	<regexp><VB.*></regexp>
		Matches <match>"eating/VBG"</match>
		Matches <match>"ate/VBD"</match>
	<regexp><IN><DT><NN></regexp>
		Matches <match>"on/IN the/DT car/NN"</match>
	<regexp><RB>?<VBD></regexp>
		Matches <match>"ran/VBD"</match>
		Matches <match>"slowly/RB ate/VBD"</match>
	<regexp><\#><CD> # This is a comment...</regexp>
		Matches <match>"#/# 100/CD"</match>
</hangindent>)ZTagsz10 60zB<h1>Part of Speech Tags:</h1>
<hangindent><<TAGSET>></hangindent>
Zredz#a00)
foregroundZgreenz#080Z	highlightz#ddd)
background	underlineT)r%   Zh1indent   )lmargin1lmargin2
hangindentr   <   varz#88fZregexpz#ba7matchz#6a6      g?g{Gz?g{Gz?(      z#efeZgroove   word)widthheightr$   highlightbackgroundhighlightthicknessreliefborderwrapz#555)	r4   r5   r$   r6   r#   r7   r8   r9   r:   F   
   z#eef)   )	r4   r5   r$   r6   r7   r8   r9   r:   tabsz#9bb)r$   r8   r9   	helveticai)familysizez#777   )r$   padxpadyr9   i,  i  )r$   r6   r7   r8   r9   r4   r5   )r$   Zactivebackgroundr6   z#aba   c             C   sD   t dd|}t dd|}t dd|}| }t dd|}|S )	Nz((\\.|[^#])*)(#.*)?z\1z + z
\s+
z	([^\\])\$z\1\\$)resubstrip)selfgrammar rM   7lib/python3.7/site-packages/nltk/app/chunkparser_app.pynormalize_grammar6  s    z RegexpChunkApp.normalize_grammarr   N NPc             C   s`  || _ |dkr| j}|| _|dkrT|dkr6td}n|dkrHt }ntd| d| _|| _d| _	d| _
|| _|| _d| _d| _g | _d| _d| _d| _d| _t|d| _t  }| _|d	 |d
 |d| j t|| _| jd | | |  | | !| | "| | j#$  |rJ| j#%d|d  | j#&dd | 'd | (  dS )a  
        :param devset_name: The name of the development set; used for
            display & for save files.  If either the name 'treebank'
            or the name 'conll2000' is used, and devset is None, then
            devset will be set automatically.
        :param devset: A list of chunked sentences
        :param grammar: The initial grammar to display.
        :param tagset: Dictionary from tags to string descriptions, used
            for the help page.  Defaults to ``self.TAGSET``.
        Nr   z	train.txtZtreebankzUnknown development set %sr   )chunk_labelz+50+50zRegexp Chunk Parser Appz<Control-q>d   endrG   insertz1.0))_chunk_labelTAGSETtagsetr   Zchunked_sentsr   
ValueErrorchunkerrL   normalized_grammargrammar_changeddevsetdevset_namedevset_index_last_keypress_history_history_index_eval_grammar_eval_normalized_grammar_eval_indexr   _eval_scorer   topZgeometrytitlebinddestroyr   _devset_sizeset_init_fonts_init_widgets_init_bindings_init_menubar
grammarboxZfocusrV   mark_setshow_devsetupdate)rK   r_   r^   rL   rS   rY   rh   rM   rM   rN   __init__A  sP    









zRegexpChunkApp.__init__c                s   | d j | d j | d j | d j | d fdd | d fd	d  j d j  j d j  j d j  j d
 j d S )Nz<Control-n>z<Control-p>z<Control-t>z
<KeyPress>z<Control-s>c                s      S )N)save_grammar)e)rK   rM   rN   <lambda>  s    z/RegexpChunkApp._init_bindings.<locals>.<lambda>z<Control-o>c                s      S )N)load_grammar)rx   )rK   rM   rN   ry     s    z<Configure>)rj   _devset_next_devset_prevtoggle_show_traceru   rr   evalbox
_eval_plot)rK   rh   rM   )rK   rN   rp     s    zRegexpChunkApp._init_bindingsc             C   sR   t || _| jd td| j  d| _tdt| j d d  d| _d S )Nr'   r?   )r@   rA      )r   _sizerm   r   get_fontint
_smallfont)rK   rh   rM   rM   rN   rn     s
    
zRegexpChunkApp._init_fontsc             C   s  t |}t |dd}|jdd| jd |jddd| jd |jddd	| jd |jd
d| jd |jdd| jdd |jdd|d t |dd}|jd| j	dd| j
d |jd| j	dd| j
d |jd| j	dd| j
d |jd| j	dd| j
d |jd| j	dd| j
d |jdd|d t |dd}|jd| jd| jd  |jd!| jd"| jd  |jd#| jd$| jd  |jd%| jd&| jd  |jd'd|d t |dd}|jd(d| jd |jd)d|d |j|d* d S )+Nr   )ZtearoffzReset Application)labelr%   commandzSave Current GrammarzCtrl-s)r   r%   acceleratorr   zLoad GrammarzCtrl-ozSave Grammar History   ZExitr.   zCtrl-q)r   r%   r   r   ZFile)r   r%   menuZTinyr<   )r   variabler%   valuer   ZSmall   ZMediumr'   ZLarge   ZHuge"   ZViewz50 sentences2   )r   r   r   r   z100 sentencesrT   z200 sentences   z500 sentencesi  zDevelopment-SetZAboutr"   )r   )r	   Zadd_commandresetrw   rz   save_historyrk   Zadd_cascadeZadd_radiobuttonr   resizerl   set_devset_sizeaboutconfig)rK   parentZmenubarZfilemenuZviewmenuZ
devsetmenuZhelpmenurM   rM   rN   rq     s    










zRegexpChunkApp._init_menubarc             G   s   | j r|   n|   dS )Nbreak)_showing_tracert   
show_trace)rK   rx   rM   rM   rN   r}   %  s    
z RegexpChunkApp.toggle_show_trace   Fc             O   st  | d| j }| d| j }| jd | jjd|d d dddd	}| j|d d
 |d  }}| jj||| d  |d dddd}d| j|d d  }}	| jd }
| j| jj	dd|d d|
|
d | j| jj	d|	d dd|
|
d | j
  rt| jdkrd }}d }}xbtdtt| j| jd D ]B}| j|  \}}}}t||}t||}t||}t||}qHW t|d d}t|d d}t|d d}t|d d}nd }}d }}xtdD ]}||| |d | ||    }|	|	| |d | ||    }||  k r>|k rXn n| jj||||	dd ||  k rp|	k rn n| jj||||dd qW | j||||	 | j||	||	 | jj|d |	dddd|  d	 | jj|d |dddd|  d	 | jj||	d dddd|  d	 | jj||	d dddd|  d	 d  }}x(t| jD ]\}\}}}}||| || ||    }|	|	| || ||    }|| jkr| jj|d |d |d |d d d!d d"|d  d#|d   d$|d   | jd%< n0| j| jj|d |d |d |d d&d'd |d k	r`| j  r`| j| jj||||d'd || }}qRW d S )(Nr4   r5   allr<   r2   leftwZ	Precision)justifyanchortextr   sZRecallcenter)r   r   r   r.   r$   r   i  )fillZoutlineg{Gz?   g      $@z#888)r   rB   rightZsez%d%%rT   neZnwz#0f0z#000zPrecision: %.2f%%	zRecall: %.2f%%	zF-score: %.2f%%r   z#afaz#8c8)r   r~   Zwinfo_widthZwinfo_heightdeleteZcreate_textZbbox_EVALBOX_PARAMSlowerZcreate_rectangle
_autoscalelenrb   rangemin_SCALE_NmaxZcreate_line	enumeraterc   Zcreate_ovalstatus_eval_lines)rK   rx   r   r4   r5   tagr   r   rh   ZbotbgZmax_precisionZ
max_recallZmin_precisionZ
min_recallirL   	precisionrecallZfmeasurexyZprev_xZprev_y_fscorerM   rM   rN   r   /  s    
"


 ",$zRegexpChunkApp._eval_plotc       	      C   s,  | j d krd S | jd kr"d| _d S t }t | j | jk rj| j| jkrjd| _| j t	| j
d | jS | j| jkrxb| jD ]X\}}}}| j| |kr~| j||||f t| jd | _|   d| _d | _d S q~W d| _t| jd| _| j| _| j| _| j dkrd| _d S xJ| j| jt| j| j | j  D ]"}| | }| j || q>W |  j| j7  _| j| j kr| j| j| j! | j" | j# f t| jd | _|   d| _d | _nTd| j | j  }d	| | j$d
< d| _| %t |  | j t	| j
d | j d S )NFTi  r.   r   )rS   rP   rT   z$Evaluating on Development Set (%d%%)r   )&rh   r[   _eval_demon_runningtimera   _EVAL_DELAYr\   re   afterr   
_EVAL_FREQ_eval_demonrb   rO   appendr   rc   r   rf   r   rW   rg   rL   rd   rJ   r^   r   _EVAL_CHUNKrl   r   _chunkparseleavesZscorer   r   Z	f_measurer   _adaptively_modify_eval_chunk)	rK   Zt0gprfZgoldZguessZprogressrM   rM   rN   r     s^    

zRegexpChunkApp._eval_demonc             C   s   || j krD| jdkrDt| jd tt| j| j |  | jd | _n8|| jk r|t| jd tt| j| j|  | jd | _dS )z
        Modify _EVAL_CHUNK to try to keep the amount of time that the
        eval demon takes between _EVAL_DEMON_MIN and _EVAL_DEMON_MAX.

        :param t: The amount of time that the eval demon took.
        r   r.   r<   N)_EVAL_DEMON_MAXr   r   r   r   _EVAL_DEMON_MIN)rK   trM   rM   rN   r     s    
z,RegexpChunkApp._adaptively_modify_eval_chunkc                s  t |f j}|jddd |jddd |jddd |jddd t|fd ji j _t| jd	d
 jd d _	 j	j
dddd  jj
dddd t| jjd}|j
dddd  jj|jd  jd }t ||d}|j
dddd t|fd jd jjdd t|fd jd jjdd t|fd ji j _ jj
dddd i  _ jd }t ||d}|j
dddd xt jD ]z\}\}}	}
t|| jd}|j
|d ddd |d|f fdd	 | j|< t |d j|dj
|d d dd  qW  j jd d  j jd!  jjd"d#d$ x( jD ]\}} jjd%| f| qRW   jd d  t| jjd} jj|jd |j
dddd t | jd d}t|fd ji j  _! j!jd#d&d' t| jd(d) j d d* _" j"j
dddd |j
dddd t| j#d _$ j$j
dddd t| j!j%d+d, _& j&j j!d-<  j&jd.d/d0  jd }t ||d}|j
dd1dd t|fd2 j'd jjdd t|fd3 j(d jjdd t|fd4 j)d5d6 j _* j*jd)d t|fd7 j+d j _, j,jd)d t-|f j. _/t| jd8d) j.d d*}|j
dddd  j/j
ddddd9  jd }t ||d}|j
dd1dd t0 j1 _2 j2d: t3|f j2 j4d;d< jjdd t0 j1 _5 j5d: t3|f j5 j4d=d< jjdd t|fd>d?i jjd)d t|fd ji j6 _7 j7j
dd@dAddddB d5 jdC< d5 j!dC<  jd }t |dDd|dj
ddd  t |ddD|dj
ddd  t |dEd|dj
ddFd  |jd&d#dG  j!jdHdIdJdK  j!jdLdJdMdN  j!jdOdPd  j!jdQdRdSdT  j!jdUdVdSdW  j!jdXdMdY  jjdXdZd  jjd[d\dY  jjd]d^dY  jjd_d`dY  jjdaddbdc d S )dNr      )ZweightrB   r2   r.   r   fontzGrammar:Zblackr$   )r   r   Zhighlightcolorr$   ZSW)columnrowstickyZNEWS)r   ZNWS)Zyscrollcommand)r$   ZEWzPrev Grammar)r   r   r   )sidezNext Grammar)r   r   Sz<ButtonPress>c                s
     |S )N)	show_help)rx   tab)rK   rM   rN   ry   P  s    z.RegexpChunkApp._init_widgets.<locals>.<lambda>)r5   r4   r$   )r   r   )r   elideT)r   ztag-%sZboth)expandr   zDevelopment Set:r   )r   r   r   r$   Zhoriz)r   ZorientZxscrollcommandZbottomr   )r   r      zPrev Example (Ctrl-p)zNext Example (Ctrl-n)zShow exampledisabled)r   r   statez
Show tracezEvaluation:)r   r   r   
columnspanFZZoom)r   r   r   ZLinesr   ZHistory	   ZNEW)r   r   r   rC   rD   r   r   r<   rE      )r   r   ztrue-posz#afaTrue)r$   r%   z	false-negz#800)r%   r#   z	false-posz#faatracez#666Znone)r#   r:   
wrapindentr=   )r)   r:   error)r#   z#feccommentz#840anglez#00fbracez#0a0r*   r0   )r(   r)   )8r   _FRAME_PARAMSZgrid_columnconfigureZgrid_rowconfigurer   r   _GRAMMARBOX_PARAMSrr   r   grammarlabelZgridr
   Zyviewr   rm   r   _history_prev_BUTTON_PARAMSZpack_history_nextr   _HELPBOX_PARAMShelpboxhelptabsr   HELPrj   _HELPTAB_SPACER	configureZ
tag_configHELP_AUTOTAGr   _DEVSETBOX_PARAMS	devsetboxdevsetlabel_devset_scrolldevset_scrollZxviewdevset_xscrollr|   r{   rt   devset_buttonr   trace_buttonr   r   r~   r   rh   r   r   r   r   _STATUS_PARAMSr   )rK   rh   Zframe0Zgrammar_scrollbarr   Zframe3Zhelptab_framer   r   tabstopsr   r   r   paramsZhelp_scrollbarZframe4Zframe1Zframe2rM   )rK   rN   ro     s&   













zRegexpChunkApp._init_widgetsc                s  d| _ d| jd< d| jd< d| jd< | jdd d| jd | j f | jd	< | j	d kr|| j
dd
 | jddd d S | j| j }| j	 }d}dg x6t| D ]&\}\}}|d| 7 } t| qW t fddtt|d D | _tdd tt|d D | _x,tt|d D ]}|dkr\| j
dd | jddd n*| j
dd||d    | jddd | j
d|d  | jddd t|d | }	| | }
| |}| |
}x"||D ]}| ||d qW x || D ]}| ||d qW x || D ]}| ||d q*W q,W | j
dd | jddd | jd| jjdd d S )NTr   r   normalz1.0rU   zDevelopment Set (%d/%d)r.   r   z#Trace: waiting for a valid grammar.r   	z%s c             3   s0   | ](}t t D ]}||f | fV  qqd S )N)r   r   ).0r   j)charnumrM   rN   	<genexpr>  s   z,RegexpChunkApp.show_trace.<locals>.<genexpr>c             s   s   | ]}||d  d  fV  qdS )r2   NrM   )r  r   rM   rM   rN   r    s    r   zStart:
r   zend -2c linestartzend -2cz
Apply %s:
rG   r   ztrue-posz	false-negz	false-posz
Finished.
rT   g333333?)r   r   r   r   r   r`   rl   r   r   r[   rV   tag_addr^   rulesr   r   r   r   dictr   r  linenumr   r   _chunksintersection_color_chunkrh   r   r   rm   )rK   rx   	gold_treer  Ztagseqwordnumr3   posr   r[   	test_treegold_chunkstest_chunkschunkrM   )r  rN   r     sV    





 


zRegexpChunkApp.show_tracec       
      C   sp  d| j d< | j dd xF| jD ]:\}}}||krJ|dddd tt| j d	d
 dD }| j	| j
f | j | j j
|d | j d|d  d}x| jD ]\}}d||f }xt||D ]t}	| j d||	d ||	d  | j d| ||	d ||	d  | j d||	d ||	d  qW qW q"| j	| j
f | j q"W d| j d< d S )Nr   r   z1.0rU   z
<<TAGSET>>rG   c             s   s   | ]}d | V  qdS )z	%s	%sNrM   )r  itemrM   rM   rN   r  "  s   z+RegexpChunkApp.show_help.<locals>.<genexpr>c             S   s(   t d| d rd| d fp&d| d fS )Nz\w+r   r.   )rH   r-   )Zt_wrM   rM   rN   ry   %  s   z*RegexpChunkApp.show_help.<locals>.<lambda>)key)r>   z



















z1.0 + %d charsz(?s)(<%s>)(.*?)(</%s>)r   r.   ztag-%sr2   rB   r   )r   r   r   replacejoinsortedlistrY   itemsr   r   _HELPTAB_FG_PARAMSrV   r   rH   finditerr  startrU   _HELPTAB_BG_PARAMS)
rK   r   namer   r   Cr   r   patternmrM   rM   rN   r     s0    

$".zRegexpChunkApp.show_helpc             G   s   |  | jd  dS )Nr.   r   )_view_historyrc   )rK   rx   rM   rM   rN   r   =  s    zRegexpChunkApp._history_prevc             G   s   |  | jd  dS )Nr.   r   )r#  rc   )rK   rx   rM   rM   rN   r   A  s    zRegexpChunkApp._history_nextc             C   s.  t dtt| jd |}| js$d S || jkr2d S d| jd< | jdd | jd| j| d  | jdd || _| 	| j| d  | 
| j| d | _| jrdd	 | jd
D }ng }t|| _|   |   | jr|   | jt| jd k r d| jd t| jf | jd< n
d| jd< d S )Nr   r.   r   r   z1.0rU   rV   c             S   s   g | ]}t |qS rM   )r   
fromstring)r  linerM   rM   rN   
<listcomp>Y  s   z0RegexpChunkApp._view_history.<locals>.<listcomp>rG   zGrammar %s/%s:r   zGrammar:)r   r   r   rb   rc   rr   r   rV   rs   _syntax_highlight_grammarrO   r\   splitr   r[   r   _highlight_devsetr   r   r   )rK   indexr  rM   rM   rN   r#  E  s4    


zRegexpChunkApp._view_historyc             G   s   |  ddd dS )Nscrollr.   pager   )r   )rK   rx   rM   rM   rN   r{   n  s    zRegexpChunkApp._devset_nextc             G   s   |  ddd dS )Nr+  rR   r,  r   )r   )rK   rx   rM   rM   rN   r|   r  s    zRegexpChunkApp._devset_prevc             G   s"   | j d krd S | j   d | _ d S )N)rh   rk   )rK   rx   rM   rM   rN   rk   v  s    

zRegexpChunkApp.destroyc             G   s   d}| j }|dkr:|d dr:| | jt|d   nr|dkrn|d drn| | j|t|d    n>|dkr| tt|d | j   ndstd||f |r| 	  d S )Nr.   r+  Zunitr   r,  Zmovetozbad scroll command %s %s)
r   
startswithrt   r`   r   floatrl   r   AssertionErrorr   )rK   r   argsNZshowing_tracerM   rM   rN   r   |  s    "zRegexpChunkApp._devset_scrollc             C   s  |d kr| j }ttd|| j d }|| j kr<| js<d S || _ d| _d| jd< d| jd< d| jd< d| jd< | j	d	d
 d| j d | j f | j
d< | j| j | j d  }i | _ddi| _xt|D ]|\}}d}xTt| D ]D\}\}}t|| j||f< |d||f 7 }t|| j||d f< qW | jd
|d d d  qW | jd k	r`|   d| jd< | j | j  }	| j d | j  }
| j|	|
 d S )Nr   r.   Fr   r   r   r3   r:   z1.0rU   zDevelopment Set (%d/%d)r   rP   z%s/%s rR   z

r2   )r`   r   r   rl   r   r   r   r   r   r   r   r^   r  r	  r   r   r   rV   r[   r)  r   rm   )rK   r*  samplesentnumZsentZlinestrr  r3   r  firstZlastrM   rM   rN   rt     s<    





zRegexpChunkApp.show_devsetc             C   s`   t  }d}xP|D ]H}t|trP| | jkrB|||t| f |t|7 }q|d7 }qW |S )Nr   r.   )rm   
isinstancer   r   rW   addr   )rK   ZtreeZchunksr  ZchildrM   rM   rN   r
    s    

zRegexpChunkApp._chunksc             C   s^  | j d krd S | jddd | jddd | jddd | jddd xt|dD ]\}}| srq`td|}d }|	d	r|
d	}d
|d |
d	f }d
|d |d	f }| jd|| xtd|D ]r}|d k	r|
 |krP d
|d |
 f }d
|d | f }|	 dkrB| jd|| q| jd|| qW q`W d S )Nr   z1.0rU   r   r   r*   rG   z(\\.|[^#])*(#.*)?r2   z%d.%dr.   z[<>{}]z<>)rh   rr   
tag_remover  r   r(  rJ   rH   r-   groupr  rU   r  )rK   rL   linenor%  r"  Zcomment_startr   rx   rM   rM   rN   r'    s0    


z(RegexpChunkApp._syntax_highlight_grammarc             C   s   | j d krd S | jddd g | _xt|dD ]t\}}tdd|}| }|r4yt	
| W q4 tk
r } z$| jdd|d  d	|d   W d d }~X Y q4X q4W d
| jd< d S )Nr   z1.0rU   rG   z((\\.|[^#])*)(#.*)?z\1z%s.0r.   z%s.0 lineendrP   r   )rh   rr   r7  Z_grammarcheck_errsr   r(  rH   rI   rJ   r   r$  rZ   r  r   )rK   rL   r9  r%  rx   rM   rM   rN   _grammarcheck  s    
0zRegexpChunkApp._grammarcheckc          
   G   s  |rt   | _| jdd | _}| |}|| jkr:d S || _| jt| j	d k r^d| j
d< | | y"|rdd |dD }ng }W n2 tk
r } z| | d | _d S d }~X Y nX t|| _| jd	dd t   | _| jr|   n|   | js
|   d S )
Nz1.0rU   r.   zGrammar:r   c             S   s   g | ]}t |qS rM   )r   r$  )r  r%  rM   rM   rN   r&    s   z)RegexpChunkApp.update.<locals>.<listcomp>rG   r   )r   ra   rr   r   rL   rO   r\   rc   r   rb   r   r'  r(  rZ   r:  r[   r   r7  r]   r   r   r)  r   r   )rK   ZeventrL   r\   r  rx   rM   rM   rN   ru     s6    








zRegexpChunkApp.updatec             C   s   |d kr| j | j| jd  }| jddd | jddd | jddd xt|D ]\}}| | }| |}| |}x ||D ]}| 	||d qW x|| D ]}| 	||d qW x|| D ]}| 	||d qW qXW d S )Nr.   ztrue-posz1.0rU   z	false-negz	false-pos)
r^   r`   r   r7  r   r   r   r
  r  r  )rK   r2  r3  r  r  r  r  r  rM   rM   rN   r)  "  s    

z RegexpChunkApp._highlight_devsetc          
   C   sH   y| j |S  ttfk
rB } z| jddd |S d }~X Y nX d S )Nr   z1.0rU   )r[   parserZ   
IndexErrorrr   r  )rK   Zwordsrx   rM   rM   rN   r   9  s
    zRegexpChunkApp._chunkparsec          	   C   sP   |\}}| j |d| j| | j||f f d| j| | j||f d f  d S )Nz%s.%sr.   )r   r  r	  r  )rK   r3  r  r   r  rU   rM   rM   rN   r  D  s
    zRegexpChunkApp._color_chunkc             C   sH   d | _ d | _d | _d| _g | _d| _| jdd | d | 	  d S )Nr   z1.0rU   )
r[   rL   r\   r]   rb   rc   rr   r   rt   ru   )rK   rM   rM   rN   r   L  s    
zRegexpChunkApp.resetz# Regexp Chunk Parsing Grammar
# Saved %(date)s
#
# Development set: %(devset)s
#   Precision: %(precision)s
#   Recall:    %(recall)s
#   F-score:   %(fscore)s

%(grammar)s
c             C   s   |s ddg}t |dd}|s d S | jrd| j| | jd d krddd | jd d	d  D \}}}n$| jd kr|d
 } }}nd } }}t|d2}|| jtt	
 | j|||| j d  W d Q R X d S )N)zChunk Gramamrz.chunk)z	All files*z.chunk)	filetypesdefaultextensionrR   r   c             S   s   g | ]}d d|  qS )z%.2f%%rT   rM   )r  vrM   rM   rN   r&  o  s    z/RegexpChunkApp.save_grammar.<locals>.<listcomp>r.   zGrammar not well formedzNot finished evaluation yetr   )Zdater^   r   r   r   rL   )r   rb   r\   rO   r[   openwriteSAVE_GRAMMAR_TEMPLATEr  r   ctimer_   rL   rJ   )rK   filenameftypesr   r   r   outfilerM   rM   rN   rw   e  s*    $
zRegexpChunkApp.save_grammarc          	   C   s   |s ddg}t |dd}|s d S | jdd |   t|d}| }W d Q R X tdd	| }| j	d| |   d S )
N)zChunk Gramamrz.chunk)z	All filesr=  z.chunk)r>  r?  z1.0rU   r   z2^\# Regexp Chunk Parsing Grammar[\s\S]*F-score:.*
rP   )
r   rr   r   ru   rA  readrH   rI   lstriprV   )rK   rE  rF  ZinfilerL   rM   rM   rN   rz     s    zRegexpChunkApp.load_grammarc       
   	   C   sH  |s ddg}t |dd}|s d S t|d}|d |dt   |d| j  xvt| jD ]h\}\}}}}d	|d
 t| j|d |d |d f }	|d|	  |d	dd |
  D  qfW | jr| j| | jd d ks:| jd kr|d n
|d |d	dd | j
  D  W d Q R X d S )N)zChunk Gramamr Historyz.txt)z	All filesr=  z.txt)r>  r?  r   z'# Regexp Chunk Parsing Grammar History
z# Saved %s
z# Development set: %s
z>Grammar %d/%d (precision=%.2f%%, recall=%.2f%%, fscore=%.2f%%)r.   rT   z
%s
rP   c             s   s   | ]}d | V  qdS )z  %s
NrM   )r  r%  rM   rM   rN   r    s    z.RegexpChunkApp.save_history.<locals>.<genexpr>rR   r   z#
Current Grammar (not well-formed)
z!
Current Grammar (not evaluated)
c             s   s   | ]}d | V  qdS )z  %s
NrM   )r  r%  rM   rM   rN   r    s    )r   rA  rB  r   rD  r_   r   rb   r   r  rJ   r(  r\   rO   r[   rL   )
rK   rE  rF  rG  r   r   r   r   r   ZhdrrM   rM   rN   r     s,    
&&
zRegexpChunkApp.save_historyc             G   sH   d}d}y ddl m} |||d  W n   t| j|| Y nX d S )Nz<NLTK RegExp Chunk Parser Application
Written by Edward Loperz2About: Regular Expression Chunk Parser Applicationr   )Message)messageri   )Zsix.moves.tkinter_messageboxrJ  Zshowr   rh   )rK   rx   ZABOUTZTITLErJ  rM   rM   rN   r     s    zRegexpChunkApp.aboutc             C   sJ   |d k	r| j | | j tt| j| j   | d | d d S )Nr.   r   )rl   rm   r   r   r^   r   rt   )rK   rA   rM   rM   rN   r     s
    
zRegexpChunkApp.set_devset_sizec             C   sX   |d k	r| j | | j  }| jjt| d | jjtdt| d d d d S )N)rA   ir   r'   )r   rm   r   r   r   absr   r   )rK   rA   rM   rM   rN   r     s
    
zRegexpChunkApp.resizec             O   s   t  r
dS | jj|| dS )z
        Enter the Tkinter mainloop.  This function must be called if
        this demo is created from a non-interactive program (e.g.
        from a secript); otherwise, the demo will close as soon as
        the script completes.
        N)r   rh   mainloop)rK   r0  kwargsrM   rM   rN   rM    s    zRegexpChunkApp.mainloop)r   NrP   rQ   N)N)N)N)N)N)N)N)B__name__
__module____qualname____doc__rX   r   r  r   r   r   r   r   r   r   r   r   r   Z_FONT_PARAMSr   r   r   Z_HELPTAB_BG_COLORZ_HELPTAB_FG_COLORr  r  r   rO   rv   rp   rn   rq   r}   r   Z_DRAW_LINESr   r   r   r   ro   r   r   r   r   r   r#  r{   r|   rk   r   rt   r
  r'  r:  ru   r)  r   r  r   rC  rw   rz   r   r   r   r   rM  rM   rM   rM   rN   r   /   s<  
'%


    
e	b K >?$)
-3




!


r   c               C   s   t    d S )N)r   rM  rM   rM   rM   rN   app  s    rS  __main__)*rR  Z
__future__r   r   textwraprH   ZrandomZsix.moves.tkinterr   r   r   r   r   r   r	   r
   r   r   Zsix.moves.tkinter_tkfiledialogr   r   Zsix.moves.tkinter_fontr   Z	nltk.treer   Z	nltk.utilr   Znltk.draw.utilr   Znltk.corpusr   r   Z
nltk.chunkr   r   Znltk.chunk.regexpr   objectr   rS  rO  __all__rM   rM   rM   rN   <module>   s<   0           5