
`]c           @   s`  d  Z  d d l m Z d d l Z d d l Z d d l Z d d l Z d d l m Z m	 Z	 m
 Z
 m Z m Z m Z m Z m Z m Z m Z d d l m Z m Z d d l m Z d d l m Z d d l m Z d d	 l m Z d d
 l m Z m Z d d l  m! Z! m" Z" d d l# m$ Z$ d e% f d     YZ& d   Z' e( d k rSe'   n  d g Z) d S(   sl   
A graphical tool for exploring the regular expression based chunk
parser ``nltk.chunk.RegexpChunkParser``.
i(   t   divisionN(
   t   Buttont   Canvast   Checkbuttont   Framet   IntVart   Labelt   Menut	   Scrollbart   Textt   Tk(   t   askopenfilenamet   asksaveasfilename(   t   Font(   t   Tree(   t   in_idle(   t   ShowText(   t	   conll2000t   treebank_chunk(   t
   ChunkScoret   RegexpChunkParser(   t   RegexpChunkRulet   RegexpChunkAppc           B   s]  e  Z d  Z i- d d 6d d 6d d 6d d 6d	 d
 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d  6d! d" 6d# d$ 6d% d& 6d' d( 6d) d* 6d+ d, 6d- d. 6d/ d0 6d1 d2 6d3 d4 6d5 d6 6d7 d8 6d9 d: 6d; d< 6d= d> 6d? d@ 6dA dB 6dC dD 6dE dF 6dG dH 6dI dJ 6dK dL 6dM dN 6dO dP 6dQ dR 6dS dT 6dU dV 6dW dX 6dY dZ 6Z d d d dd db de df dg dh f g Z di e dj dk  f dl e dj dm  f dn e do dp  f dq e dq e  f dr e dq e  f ds e dt du dv du  f dw e dt dx dv dy  f dz e dj d{  f d| e dj d}  f d~ e dj d  f g
 Z d Z d Z	 d Z
 d Z d Z e d d d d do d d d d d d d d d d d  Z e d d d d do d d d dj d d d d d d d d d  	Z e d d d d do d d d d d d d d d d d d d  	Z e do d d d d d  Z e d d d d  Z e do d d d d d d d  Z e do d d d d d d d d d d d d d  Z e do d d d d d  Z d Z d Z e do d  Z e do d  Z d Z d   Z d d d d d d  Z d   Z d   Z d   Z d   Z  d Z! e" Z# d   Z$ e" Z% d   Z& d   Z' d   Z( e" Z) d   Z* d   Z+ d   Z, d   Z- d   Z. d   Z/ d   Z0 d   Z1 d   Z2 d d  Z3 d   Z4 d   Z5 d   Z6 d   Z7 d d  Z8 d   Z9 d   Z: d   Z; d Z< d d  Z= d d  Z> d d  Z? d   Z@ d d  ZA d d  ZB d   ZC RS(   s   
    A graphical tool for exploring the regular expression based chunk
    parser ``nltk.chunk.RegexpChunkParser``.

    See ``HELP`` for instructional text.
    s   Coordinating conjunctiont   CCs   Possessive pronouns   PRP$s   Cardinal numbert   CDt   Adverbt   RBt
   Determinert   DTs   Adverb, comparativet   RBRs   Existential theret   EXs   Adverb, superlativet   RBSs   Foreign wordt   FWt   Particlet   RPt	   Adjectivet   JJt   tot   TOs   Adjective, comparativet   JJRt   Interjectiont   UHs   Adjective, superlativet   JJSs   Verb, base formt   VBs   List item markert   LSs   Verb, past tenset   VBDt   Modalt   MDs   Noun, pluralt   NNSs   Noun, singular or maspst   NNs   Verb, past participlet   VBNs   Verb,3rd ps. sing. presentt   VBZs   Proper noun, singulart   NNPs   Proper noun pluralt   NNPSs   wh-determinert   WDTt   Predeterminert   PDTs
   wh-pronount   WPs   Possessive endingt   POSs   Possessive wh-pronouns   WP$s   Personal pronount   PRPs	   wh-adverbt   WRBs   open parenthesist   (s   close parenthesist   )s
   open quotes   ``t   commat   ,s   close quotes   ''t   periodt   .s   pound sign (currency marker)t   #s   dollar sign (currency marker)t   $s   Preposition/subord. conjunctiont   INs#   Symbol (mathematical or scientific)t   SYMs   Verb, gerund/present participlet   VBGs   Verb, non-3rd ps. sing. presentt   VBPt   colont   :t   Helpt   20s-  Welcome to the regular expression chunk-parser grammar editor.  You can use this editor to develop and test chunk parser grammars based on NLTK's RegexpChunkParser class.

Use this box ('Help') to learn more about the editor; click on the tabs for help on specific topics:<indent>
Rules: grammar rule types
Regexps: regular expression syntax
Tags: part of speech tags
</indent>
Use the upper-left box ('Grammar') to edit your grammar.  Each line of your grammar specifies a single 'rule', which performs an action such as creating a chunk or merging two chunks.

The lower-left box ('Development Set') runs your grammar on the development set, and displays the results.  Your grammar's chunks are <highlight>highlighted</highlight>, and the correct (gold standard) chunks are <underline>underlined</underline>.  If they match, they are displayed in <green>green</green>; otherwise, they are displayed in <red>red</red>.  The box displays a single sentence from the development set at a time; use the scrollbar or the next/previous buttons view additional sentences.

The lower-right box ('Evaluation') tracks the performance of your grammar on the development set.  The 'precision' axis indicates how many of your grammar's chunks are correct; and the 'recall' axis indicates how many of the gold standard chunks your system generated.  Typically, you should try to design a grammar that scores high on both metrics.  The exact precision and recall of the current grammar, as well as their harmonic mean (the 'f-score'), are displayed in the status bar at the bottom of the window.t   Rulest   10s  <h1>{...regexp...}</h1><indent>
Chunk rule: creates new chunks from words matching regexp.</indent>

<h1>}...regexp...{</h1><indent>
Chink rule: removes words matching regexp from existing chunks.</indent>

<h1>...regexp1...}{...regexp2...</h1><indent>
Split rule: splits chunks that match regexp1 followed by regexp2 in two.</indent>

<h1>...regexp...{}...regexp...</h1><indent>
Merge rule: joins consecutive chunks that match regexp1 and regexp2</indent>
t   Regexpss   10 60sX  <h1>Pattern		Matches...</h1>
<hangindent>	<<var>T</var>>	a word with tag <var>T</var> (where <var>T</var> may be a regexp).
	<var>x</var>?	an optional <var>x</var>
	<var>x</var>+	a sequence of 1 or more <var>x</var>'s
	<var>x</var>*	a sequence of 0 or more <var>x</var>'s
	<var>x</var>|<var>y</var>	<var>x</var> or <var>y</var>
	.	matches any character
	(<var>x</var>)	Treats <var>x</var> as a group
	# <var>x...</var>	Treats <var>x...</var> (to the end of the line) as a comment
	\<var>C</var>	matches character <var>C</var> (useful when <var>C</var> is a special character like + or #)
</hangindent>
<h1>Examples:</h1>
<hangindent>	<regexp><NN></regexp>
		Matches <match>"cow/NN"</match>
		Matches <match>"green/NN"</match>
	<regexp><VB.*></regexp>
		Matches <match>"eating/VBG"</match>
		Matches <match>"ate/VBD"</match>
	<regexp><IN><DT><NN></regexp>
		Matches <match>"on/IN the/DT car/NN"</match>
	<regexp><RB>?<VBD></regexp>
		Matches <match>"ran/VBD"</match>
		Matches <match>"slowly/RB ate/VBD"</match>
	<regexp><\#><CD> # This is a comment...</regexp>
		Matches <match>"#/# 100/CD"</match>
</hangindent>t   Tagss   <h1>Part of Speech Tags:</h1>
s   <hangindent>s
   <<TAGSET>>s   </hangindent>
t   redt
   foregrounds   #a00t   greens   #080t	   highlightt
   backgrounds   #dddt	   underlinet   h1t   indentt   lmargin1i   t   lmargin2t
   hangindenti    i<   t   vars   #88ft   regexps   #ba7t   matchs   #6a6i   i   g?g{Gz?g{Gz?t   widthi(   t   heighti   s   #efet   highlightbackgroundt   highlightthicknesst   relieft   groovet   borderi   t   wrapt   words   #555iF   i
   s   #eeft   tabsi   s   #9bbt   familyt	   helveticat   sizeis   #777t   padxt   padyi   i,  i  t   activebackgrounds   #abai   c         C   sd   t  j d d |  } t  j d d |  } t  j d d |  } | j   } t  j d d |  } | S(	   Ns   ((\\.|[^#])*)(#.*)?s   \1s    +t    s   
\s+s   
s	   ([^\\])\$s   \1\\$(   t   ret   subt   strip(   t   selft   grammar(    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyt   normalize_grammar6  s    R   t    t   NPc         C   s  | |  _  | d k r! |  j } n  | |  _ | d k r | d k rT t j d  } q | d k ro t j   } q t d |   n  d |  _ | |  _	 d |  _
 d |  _ | |  _ | |  _ d |  _ d |  _ g  |  _ d |  _ d |  _ d |  _ d |  _ t d |  |  _ t   } |  _ | j d  | j d	  | j d
 |  j  t |  |  _ |  j j d  |  j  |  |  j! |  |  j" |  |  j# |  |  j$ j%   | r|  j$ j& d | d  |  j$ j' d d  n  |  j( d  |  j)   d S(   s  
        :param devset_name: The name of the development set; used for
            display & for save files.  If either the name 'treebank'
            or the name 'conll2000' is used, and devset is None, then
            devset will be set automatically.
        :param devset: A list of chunked sentences
        :param grammar: The initial grammar to display.
        :param tagset: Dictionary from tags to string descriptions, used
            for the help page.  Defaults to ``self.TAGSET``.
        R   s	   train.txtt   treebanks   Unknown development set %si    it   chunk_labels   +50+50s   Regexp Chunk Parser Apps   <Control-q>id   t   ends   
t   inserts   1.0N(*   t   _chunk_labelt   Nonet   TAGSETt   tagsetR   t   chunked_sentsR   t
   ValueErrort   chunkerRt   t   normalized_grammart   grammar_changedt   devsett   devset_namet   devset_indext   _last_keypresst   _historyt   _history_indext   _eval_grammart   _eval_normalized_grammart   _eval_indexR   t   _eval_scoreR
   t   topt   geometryt   titlet   bindt   destroyR   t   _devset_sizet   sett   _init_fontst   _init_widgetst   _init_bindingst   _init_menubart
   grammarboxt   focusR{   t   mark_sett   show_devsett   update(   Rs   R   R   Rt   Ry   R   R   (    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyt   __init__A  sP    															c            s   | j  d   j  | j  d   j  | j  d   j  | j  d   j  | j  d   f d    | j  d   f d      j j  d   j    j j  d   j    j j  d   j    j j  d	   j  d  S(
   Ns   <Control-n>s   <Control-p>s   <Control-t>s
   <KeyPress>s   <Control-s>c            s
     j    S(   N(   t   save_grammar(   t   e(   Rs   (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyt   <lambda>  Rv   s   <Control-o>c            s
     j    S(   N(   t   load_grammar(   R   (   Rs   (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyR     Rv   s   <Configure>(   R   t   _devset_nextt   _devset_prevt   toggle_show_traceR   R   t   evalboxt
   _eval_plot(   Rs   R   (    (   Rs   s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyR     s    c         C   su   t  |  |  _ |  j j d  t d d d |  j j    |  _ t d d d t |  j j   d d   |  _ d  S(   Ni   Ri   Rj   Rk   i   (   R   t   _sizeR   R   t   gett   _fontt   intt
   _smallfont(   Rs   R   (    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyR     s
    "c         C   s*  t  |  } t  | d d } | j d d d d d |  j  | j d d d d d d	 d |  j  | j d d
 d d d d d |  j  | j d d d d d |  j  | j d d d d d |  j d d  | j d d d d d |  t  | d d } | j d d d |  j	 d d d d d |  j
  | j d d d |  j	 d d d d d |  j
  | j d d d |  j	 d d d d d |  j
  | j d d d |  j	 d d d d d |  j
  | j d d d |  j	 d d d d d |  j
  | j d d d d d |  t  | d d } | j d d  d |  j d d! d |  j  | j d d" d |  j d d# d |  j  | j d d$ d |  j d d% d |  j  | j d d& d |  j d d' d |  j  | j d d( d d d |  t  | d d } | j d d) d d d |  j  | j d d* d d d |  | j d |  d  S(+   Nt   tearoffi    t   labels   Reset ApplicationRV   t   commands   Save Current Grammart   accelerators   Ctrl-ss   Load Grammars   Ctrl-os   Save Grammar Historyi   t   Exiti   s   Ctrl-qt   Filet   menut   Tinyt   variablet   valuei
   t   Smalli   t   Mediumi   t   Largei   t   Hugei"   t   Views   50 sentencesi2   s   100 sentencesid   s   200 sentencesi   s   500 sentencesi  s   Development-Sett   AboutRK   (   R   t   add_commandt   resetR   R   t   save_historyR   t   add_cascadet   add_radiobuttonR   t   resizeR   t   set_devset_sizet   aboutt   config(   Rs   t   parentt   menubart   filemenut   viewmenut
   devsetmenut   helpmenu(    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyR     s    	
	
				
		
		
		
		
		
		
		
		
c         G   s$   |  j  r |  j   n
 |  j   d S(   Nt   break(   t   _showing_traceR   t
   show_trace(   Rs   R   (    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyR   %  s    	
i   c         O   s  | j  d |  j j    } | j  d |  j j    } |  j j d  |  j j d | d d d d d d	 d
 d } |  j j |  d d | d } } |  j j | | | d | d d d d
 d d d } d |  j j |  d d } }	 |  j d }
 |  j j |  j j	 d d | d d d |
 d |
  |  j j |  j j	 d |	 d d d d |
 d |
  |  j
 j    ryt |  j  d k ryd } } d } } x t d t t |  j  |  j d   D]\ } |  j | \ } } } } t | |  } t | |  } t | |  } t | |  } qWt | d d  } t | d d  } t | d d  } t | d d  } n d } } d } } x t d  D] } | | | | d | | | } |	 |	 | | d | | | } | | k  o| k  n r"|  j j | | | |	 d d n  | | k  o9|	 k  n r|  j j | | | | d d qqW|  j j | | | |	  |  j j | |	 | |	  |  j j | d |	 d d d d d
 d d | |  j j | d | d d d d d
 d d | |  j j | |	 d d d d d  d
 d d | |  j j | |	 d d d d d d
 d d | d  } } xpt |  j  D]_\ } \ } } } } | | | | | | | } |	 |	 | | | | | } | |  j k r<|  j j | d | d | d | d d d! d d" d# | d d$ | d d% | d |  j d
 <nA |  j j |  j j | d | d | d | d d d& d d'  | d  k	 r|  j j    r|  j j |  j j | | | | d d'  n  | | } } qtWd  S((   NR_   R`   t   alli
   i   t   justifyt   leftt   anchort   wt   textt	   Precisioni   t   st   Recallt   centeri   RU   i    i  t   fillt   outlineg{Gz?i   g      $@s   #888i   t   rightt   ses   %d%%id   t   net   nws   #0f0s   #000s   Precision: %.2f%%	s   Recall: %.2f%%	s   F-score: %.2f%%s   #afas   #8c8(   R   R   t   winfo_widtht   winfo_heightt   deletet   create_textt   bboxt   _EVALBOX_PARAMSt   lowert   create_rectanglet
   _autoscalet   lenR   t   ranget   mint   _SCALE_Nt   maxt   create_lineR}   t	   enumerateR   t   create_ovalt   statust   _eval_lines(   Rs   R   R   R_   R`   t   tagR   R   R   t   bott   bgt   max_precisiont
   max_recallt   min_precisiont
   min_recallt   iRt   t	   precisiont   recallt   fmeasuret   xt   yt   prev_xt   prev_yt   _t   fscore(    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyR   /  s    	&%	
	!	,	,$

/



"&			
	

(

	,0		/	%c   	      C   s  |  j  d  k r d  S|  j d  k r/ t |  _ d  St j   } t j   |  j |  j k  r |  j |  j	 k r t
 |  _ |  j  j t |  j d  |  j  S|  j |  j	 k rjx |  j D]| \ } } } } |  j |  j |  k r |  j j | | | | f  t |  j  d |  _ |  j   t |  _ d  |  _	 d  Sq Wd |  _ t d |  j  |  _ |  j |  _ |  j |  _	 n  |  j j   d k rt |  _ d  Sx_ |  j |  j t |  j |  j |  j j     !D]. } |  j! | j"    } |  j j# | |  qW|  j |  j 7_ |  j |  j j    k r|  j j |  j |  j j$   |  j j%   |  j j&   f  t |  j  d |  _ |  j   t |  _ d  |  _	 nn d |  j |  j j    } d | |  j' d <t
 |  _ |  j( t j   |  |  j  j t |  j d  |  j  d  S(	   Ni  i   i    Ry   Rv   id   s$   Evaluating on Development Set (%d%%)R   ()   R   R}   R   t   Falset   _eval_demon_runningt   timeR   t   _EVAL_DELAYR   R   t   Truet   afterR   t
   _EVAL_FREQt   _eval_demonR   Ru   t   appendR   R   R   R   R   R|   R   Rt   R   Rr   R   R   t   _EVAL_CHUNKR   R   t   _chunkparset   leavest   scoreR   R   t	   f_measureR   t   _adaptively_modify_eval_chunk(	   Rs   t   t0t   gt   pt   rt   ft   goldt   guesst   progress(    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyR    s^    		#
						$	
		c         C   s   | |  j  k r^ |  j d k r^ t |  j d t t |  j |  j  |  |  j d   |  _ nO | |  j k  r t |  j d t t |  j |  j |  |  j d   |  _ n  d S(   s   
        Modify _EVAL_CHUNK to try to keep the amount of time that the
        eval demon takes between _EVAL_DEMON_MIN and _EVAL_DEMON_MAX.

        :param t: The amount of time that the eval demon took.
        i   i   i
   N(   t   _EVAL_DEMON_MAXR  R   R   R   t   _EVAL_DEMON_MIN(   Rs   t   t(    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyR    s    

c            s	  t  |   j  } | j d d d | j d d d | j d d d | j d d d t | d   j   j   _ t | d   j d	 d
 d d d   j d   _	   j	 j
 d d d d d d    j j
 d d d d d d  t | d   j j } | j
 d d d d d d    j j d | j    j d } t  | d | } | j
 d d d d d d  t | d	 d d   j   j j d d  t | d	 d d   j   j j d d  t | d   j   j   _   j j
 d d d d d d  i    _   j d } t  | d | } | j
 d d d d d d  x t   j  D] \ } \ } }	 }
 t | d	 | d   j } | j
 d | d d d d d  | j d |   f d   |   j | <t  | d d d   j d | j
 d | d d d d  qWW  j   j d d j d   j    j j d  d  t x.   j D]# \ } }   j j d! | |  qRW  j    j d d  t | d   j j }   j j d | j  | j
 d d d d d d  t  | d   j d } t | d   j   j!   _"   j" j d" t d# d$  t | d   j d	 d% d& d' d   j! d   _#   j# j
 d d d d d d  | j
 d d d d d d  t | d   j$   _%   j% j
 d d d d d d  t | d   j" j& d( d)   _'   j' j   j" d* <  j' j d d+ d# d,    j d } t  | d | } | j
 d d d d- d d  t | d	 d. d   j(   j j d d  t | d	 d/ d   j)   j j d d  t | d	 d0 d   j* d1 d2   j   _+   j+ j d d'  t | d	 d3 d   j,   j   _-   j- j d d'  t. |   j/    _0 t | d   j d	 d4 d& d' d   j/ d } | j
 d d d d d d    j0 j
 d d d d d d d5 d    j d } t  | d | } | j
 d d d d- d d  t1   j2    _3   j3 j t4  t5 | d6   j3 d   j6 d	 d7   j j d d  t1   j2    _7   j7 j t4  t5 | d6   j7 d   j6 d	 d8   j j d d  t | d	 d9   j j d d'  t | d   j   j8   _9   j9 j
 d d d d: d d; d< d d= d d5 d  d2   j d1 <d2   j" d1 <  j d } t  | d d> d d d | j
 d d d d  t  | d d d d> d | j
 d d d d  t  | d d? d d d | j
 d d d d@  | j d# d$ d" t    j" j dA d dB dC dD   j" j dE dC dD dF dG   j" j dH d dI   j" j dJ dF dK dL dM   j" j dN dO dP dL dM   j" j dQ dF dG   j j dQ d dR   j j dS dF dT   j j dU dF dV   j j dW dF dX   j j dY dZ d dO d[ d  S(\   Ni    t   weighti   i   i   i   i   t   fontR   s   Grammar:t   highlightcolort   blackRU   t   columnt   rowt   stickyt   SWt   NEWSR   t   NWSt   yscrollcommandt   EWs   Prev Grammart   sideR   s   Next Grammart   Ss   <ButtonPress>c            s     j  |  S(   N(   t	   show_help(   R   t   tab(   Rs   (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyR   P  Rv   R`   R_   t   elides   tag-%st   expandR   t   boths   Development Set:R   R   t   orientt   horizt   xscrollcommandt   bottomR   i   s   Prev Example (Ctrl-p)s   Next Example (Ctrl-n)s   Show examplet   statet   disableds
   Show traces   Evaluation:t
   columnspanR   t   Zoomt   Linest   Historyi	   t   NEWRl   Rm   i
   i   i   s   true-poss   #afaRV   R  s	   false-negRR   s   #800s	   false-poss   #faat   traces   #666Rf   t   nonet
   wrapindentRZ   i   t   errors   #fect   comments   #840t   angles   #00ft   braces   #0a0R[   RY   i(   (:   R   t   _FRAME_PARAMSt   grid_columnconfiguret   grid_rowconfigureR	   R   t   _GRAMMARBOX_PARAMSR   R   t   grammarlabelt   gridR   t   yviewR   R   R   t   _history_prevt   _BUTTON_PARAMSt   packt   _history_nextR   t   _HELPBOX_PARAMSt   helpboxt   helptabsR   t   HELPR   t   _HELPTAB_SPACERt	   configuret
   tag_configR  t   HELP_AUTOTAGR,  t   _DEVSETBOX_PARAMSt	   devsetboxt   devsetlabelt   _devset_scrollt   devset_scrollt   xviewt   devset_xscrollR   R   R   t   devset_buttonR   t   trace_buttonR   R   R   R   R   R   R  R   R   R   t   _STATUS_PARAMSR   (   Rs   R   t   frame0t   grammar_scrollbarR   t   frame3t   helptab_frameR   R-  t   tabstopsR   R   R   t   paramst   help_scrollbart   frame4t   frame1t   frame2(    (   Rs   s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyR     s&   		% !%		!	%				"1...c            s=  t  |  _ d |  j d <d |  j d <d |  j d <|  j j d d  d |  j d |  j j   f |  j	 d <|  j
 d  k r |  j j d d	  |  j j d
 d d  d  S|  j |  j } |  j
 j   } d } d g   xG t | j    D]3 \ } \ } } | d | 7}   j t |   q Wt   f d   t t |  d  D  |  _ t d   t t |  d  D  |  _ xxt t |  d  D]`} | d k r|  j j d d  |  j j d d d  n5 |  j j d d | | d  |  j j d d d  |  j j d | d  |  j j d d d  t | |   }	 |  j | j    }
 |  j |  } |  j |
  } x* | j |  D] } |  j | | d  qWx% | | D] } |  j | | d  qWx% | | D] } |  j | | d  qWqW|  j j d d  |  j j d d d  |  j j d |  j j  d d  d  S(   NR6  R5  t   normals   1.0Rz   s   Development Set (%d/%d)i   R   s#   Trace: waiting for a valid grammar.R?  s   	s   %s c         3   s>   |  ]4 } t  t     D] } | | f   | f Vq q d  S(   N(   R   R   (   t   .0R   t   j(   t   charnum(    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pys	   <genexpr>  s   c         s   s#   |  ] } | | d  d  f Vq d S(   i   N(    (   Rk  R   (    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pys	   <genexpr>  s    i    s   Start:
R<  s   end -2c linestarts   end -2cs
   Apply %s:
s   
R>  s   true-poss	   false-negs	   false-poss
   Finished.
id   g333333?(!   R  R   R^  R]  RW  R   R   R   R   RX  R   R}   R{   t   tag_addR   t   rulesR   R  R  R   t   dictR   Rm  t   linenumR   R  t   _chunkst   intersectiont   _color_chunkR   R	  R\  R   (   Rs   R   t	   gold_treeRo  t   tagseqt   wordnumRg   t   posR   R   t	   test_treet   gold_chunkst   test_chunkst   chunk(    (   Rm  s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyR     sV    	$	% )c   
   	   C   s  d |  j  d <|  j  j d d  x|  j D]\ } } } | | k r| j d d j d   t t |  j j    d d	   D   } |  j	 | j
 |  j   |  j  j
 d
 |  |  j  j d | d  d } x |  j D] \ } } d | | f } x t j | |  D] }	 |  j  j d | |	 j d  | |	 j d   |  j  j d | | |	 j d  | |	 j d   |  j  j d | |	 j d  | |	 j d   qWq Wq* |  j	 | j
 |  j   q* Wd |  j  d <d  S(   NRj  R5  s   1.0Rz   s
   <<TAGSET>>s   
c         s   s   |  ] } d  | Vq d S(   s   	%s	%sN(    (   Rk  t   item(    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pys	   <genexpr>"  s   t   keyc         S   s4   t  j d |  d  r& d |  d f p3 d |  d f S(   Ns   \w+i    i   (   Rp   R^   (   t   t_w(    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyR   %  s   Rh   i   s   1.0 + %d charss   (?s)(<%s>)(.*?)(</%s>)R.  i   s   tag-%si   i   R6  s   



















(   RO  R   RQ  t   replacet   joint   sortedt   listR   t   itemsRP  R   t   _HELPTAB_FG_PARAMSR{   RU  Rp   t   finditerRn  t   startRz   t   _HELPTAB_BG_PARAMS(
   Rs   R-  t   nameRd  R   t   CR   Re  t   patternt   m(    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyR,    s0    0	+;c         G   s   |  j  |  j d  d S(   Ni   R   (   t   _view_historyR   (   Rs   R   (    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyRJ  =  s    c         G   s   |  j  |  j d  d S(   Ni   R   (   R  R   (   Rs   R   (    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyRM  A  s    c         C   s  t  d t t |  j  d |   } |  j s2 d  S| |  j k rE d  Sd |  j d <|  j j d d  |  j j d |  j | d  |  j j d d  | |  _ |  j	 |  j | d  |  j
 |  j | d  |  _ |  j rg  |  j j d  D] } t j |  ^ q } n g  } t |  |  _ |  j   |  j   |  j rM|  j   n  |  j t |  j  d k  rd	 |  j d t |  j  f |  j d
 <n d |  j d
 <d  S(   Ni    i   Rj  R5  s   1.0Rz   R{   s   
s   Grammar %s/%s:R   s   Grammar:(   R   R   R   R   R   R   R   R{   R   t   _syntax_highlight_grammarRu   R   t   splitR   t
   fromstringR   R   R   t   _highlight_devsetR   R   RG  (   Rs   t   indext   lineRo  (    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyR  E  s4    %			.

	
c         G   s   |  j  d d d  d S(   Nt   scrolli   t   pageR   (   RY  (   Rs   R   (    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyR   n  s    c         G   s   |  j  d d d  d S(   NR  iR  R   (   RY  (   Rs   R   (    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyR   r  s    c         G   s-   |  j  d  k r d  S|  j  j   d  |  _  d  S(   N(   R   R}   R   (   Rs   R   (    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyR   v  s    c         G   s   d } |  j  } | d k rO | d j d  rO |  j |  j t | d   n | d k r | d j d  r |  j |  j | t | d   nU | d k r |  j t t | d  |  j j     n d s t d | | f   | r |  j	   n  d  S(   Ni   R  t   uniti    R  t   movetos   bad scroll command %s %s(
   R   t
   startswithR   R   R   t   floatR   R   t   AssertionErrorR   (   Rs   R   t   argst   Nt   showing_trace(    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyRY  |  s    	!%-c         C   s.  | d  k r |  j } n  t t d |  |  j j   d  } | |  j k rZ |  j rZ d  S| |  _ t |  _ d |  j d <d |  j	 d <d |  j
 d <d |  j
 d <|  j
 j d d	  d
 |  j d |  j j   f |  j d <|  j |  j |  j d !} i  |  _ i d d 6|  _ x t |  D] \ } } d } xp t | j    D]\ \ } \ } } t |  |  j | | f <| d | | f 7} t |  |  j | | d f <q?W|  j
 j d	 | d  d  qW|  j d  k	 r|  j   n  d |  j
 d <|  j |  j j   }	 |  j d |  j j   }
 |  j j |	 |
  d  S(   Ni    i   Rj  R5  R6  Rg   Rf   s   1.0Rz   s   Development Set (%d/%d)R   Rv   s   %s/%s is   

i   (   R}   R   R   R   R   R   R   R  R^  R]  RW  R   RX  R   Rm  Rq  R   R  R   R{   R   R  RZ  R   (   Rs   R  t   samplet   sentnumt   sentt   linestrRw  Rg   Rx  t   firstt   last(    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyR     s<    %		$	%!c         C   s   t    } d } xo | D]g } t | t  rs | j   |  j k r` | j | | t |  f  n  | t |  7} q | d 7} q W| S(   Ni    i   (   R   t
   isinstanceR   R   R|   t   addR   (   Rs   t   treet   chunksRw  t   child(    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyRr    s    	 c         C   s  |  j  d  k r d  S|  j j d d d  |  j j d d d  |  j j d d d  |  j j d d d  xrt | j d   D][\ } } | j   s q n  t j	 d |  } d  } | j
 d	  r(| j d	  } d
 | d | j d	  f } d
 | d | j d	  f } |  j j d | |  n  x t j d |  D] } | d  k	 rc| j   | k rcPn  d
 | d | j   f } d
 | d | j   f } | j
   d k r|  j j d | |  q;|  j j d | |  q;Wq Wd  S(   NR@  s   1.0Rz   RA  RB  R[   s   
s   (\\.|[^#])*(#.*)?i   s   %d.%di   s   [<>{}]s   <>(   R   R}   R   t
   tag_removeRn  R   R  Rr   Rp   R^   t   groupR  Rz   R  (   Rs   Rt   t   linenoR  R  t   comment_startR   R   (    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyR    s0    "c         C   s   |  j  d  k r d  S|  j j d d d  g  |  _ x t | j d   D] \ } } t j d d |  } | j	   } | rH y t
 j |  Wq t k
 r } |  j j d d | d d	 | d  q XqH qH Wd
 |  j d <d  S(   NR?  s   1.0Rz   s   
s   ((\\.|[^#])*)(#.*)?s   \1s   %s.0i   s   %s.0 lineendRv   R   (   R   R}   R   R  t   _grammarcheck_errsR   R  Rp   Rq   Rr   R   R  R   Rn  R   (   Rs   Rt   R  R  R   (    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyt   _grammarcheck  s    	"	(c         G   sq  | r t  j    |  _ n  |  j j d d  |  _ } |  j |  } | |  j k rV d  S| |  _ |  j t |  j	  d k  r d |  j
 d <n  |  j |  y> | r g  | j d  D] } t j |  ^ q } n g  } Wn* t k
 r} |  j |  d  |  _ d  SXt |  |  _ |  j j d d d  t  j    |  _ |  j rM|  j   n
 |  j   |  j sm|  j   n  d  S(   Ns   1.0Rz   i   s   Grammar:R   s   
R?  (   R  R   R   R   Rt   Ru   R   R   R   R   RG  R  R  R   R  R   R  R}   R   R   R  R   R   R   R  R  R  (   Rs   t   eventRt   R   R  Ro  R   (    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyR     s6    	+
		
	c         C   s<  | d  k r) |  j |  j |  j d !} n  |  j j d d d  |  j j d d d  |  j j d d d  x t |  D] \ } } |  j | j    } |  j |  } |  j |  } x* | j	 |  D] } |  j
 | | d  q Wx% | | D] } |  j
 | | d  q Wx% | | D] } |  j
 | | d  qWqx Wd  S(   Ni   s   true-poss   1.0Rz   s	   false-negs	   false-pos(   R}   R   R   RW  R  R   R  R  Rr  Rs  Rt  (   Rs   R  R  Ru  Ry  Rz  R{  R|  (    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyR  "  s    c         C   sK   y |  j  j |  SWn0 t t f k
 rF } |  j j d d d  | SXd  S(   NR?  s   1.0Rz   (   R   t   parseR   t
   IndexErrorR   Rn  (   Rs   t   wordsR   (    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyR  9  s
    c         C   sf   | \ } } |  j  j | d |  j | |  j | | f f d |  j | |  j | | f d f  d  S(   Ns   %s.%si   (   RW  Rn  Rq  Rm  (   Rs   R  R|  R   R  Rz   (    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyRt  D  s
    	!c         C   sd   d  |  _ d  |  _ d  |  _ d |  _ g  |  _ d |  _ |  j j d d  |  j	 d  |  j
   d  S(   Ni    s   1.0Rz   (   R}   R   Rt   R   R   R   R   R   R   R   R   (   Rs   (    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyR   L  s    						s   # Regexp Chunk Parsing Grammar
# Saved %(date)s
#
# Development set: %(devset)s
#   Precision: %(precision)s
#   Recall:    %(recall)s
#   F-score:   %(fscore)s

%(grammar)s
c         C   s/  | s4 d d g } t  d | d d  } | s4 d  Sn  |  j r |  j |  j |  j d d  k r g  |  j d d	 D] } d
 d | ^ qr \ } } } n. |  j d  k r d } } } n d } } } t | d  T } | j |  j t	 d t
 j   d |  j d | d | d | d |  j j     Wd  QXd  S(   Ns   Chunk Gramamrs   .chunks	   All filest   *t	   filetypest   defaultextensionii    i   s   %.2f%%id   s   Grammar not well formeds   Not finished evaluation yetR   t   dateR   R   R   R  Rt   (   s   Chunk Gramamrs   .chunk(   s	   All filesR  (   R   R   R   Ru   R   R}   t   opent   writet   SAVE_GRAMMAR_TEMPLATERp  R  t   ctimeR   Rt   Rr   (   Rs   t   filenamet   ftypest   vR   R   R  t   outfile(    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyR   e  s*    8	c         C   s   | s4 d d g } t  d | d d  } | s4 d  Sn  |  j j d d  |  j   t | d	   } | j   } Wd  QXt j d
 d |  j   } |  j j	 d |  |  j   d  S(   Ns   Chunk Gramamrs   .chunks	   All filesR  R  R  s   1.0Rz   R  s2   ^\# Regexp Chunk Parsing Grammar[\s\S]*F-score:.*
Rv   (   s   Chunk Gramamrs   .chunk(   s	   All filesR  (
   R   R   R   R   R  t   readRp   Rq   t   lstripR{   (   Rs   R  R  t   infileRt   (    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyR     s    
c   
      C   s  | s4 d d g } t  d | d d  } | s4 d  Sn  t | d  d} | j d  | j d	 t j    | j d
 |  j  x t |  j  D] \ } \ } } } } d | d t |  j  | d | d | d f }	 | j d |	  | j d j	 d   | j
   j   D   q W|  j oB|  j |  j |  j d d  k s|  j d  k rd| j d  n | j d  | j d j	 d   |  j j
   j   D   n  Wd  QXd  S(   Ns   Chunk Gramamr Historys   .txts	   All filesR  R  R  R   s'   # Regexp Chunk Parsing Grammar History
s   # Saved %s
s   # Development set: %s
s>   Grammar %d/%d (precision=%.2f%%, recall=%.2f%%, fscore=%.2f%%)i   id   s   
%s
Rv   c         s   s   |  ] } d  | Vq d S(   s     %s
N(    (   Rk  R  (    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pys	   <genexpr>  s    ii    s#   
Current Grammar (not well-formed)
s!   
Current Grammar (not evaluated)
c         s   s   |  ] } d  | Vq d S(   s     %s
N(    (   Rk  R  (    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pys	   <genexpr>  s    (   s   Chunk Gramamr Historys   .txt(   s	   All filesR  (   R   R  R  R  R  R   R   R   R   R  Rr   R  R   Ru   R   R}   Rt   (
   Rs   R  R  R  R   R  R  R  R  t   hdr(    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyR     s,    (/0	c         G   s^   d d } d } y- d d l  m } | d | d |  j   Wn t |  j | |  n Xd  S(   Ns%   NLTK RegExp Chunk Parser Application
s   Written by Edward Lopers2   About: Regular Expression Chunk Parser Applicationi(   t   Messaget   messageR   (   t   six.moves.tkinter_messageboxR  t   showR   R   (   Rs   R   t   ABOUTt   TITLER  (    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyR     s    
c         C   sh   | d  k	 r |  j j |  n  |  j j t t |  j  |  j j     |  j d  |  j d  d  S(   Ni   i    (   R}   R   R   R   R   R   R   R   (   Rs   Rk   (    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyR     s
    +c         C   sw   | d  k	 r |  j j |  n  |  j j   } |  j j d t |   |  j j d t d t |  d d   d  S(   NRk   ii   i   (	   R}   R   R   R   R   RS  t   absR   R   (   Rs   Rk   (    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyR     s
    c         O   s$   t    r d S|  j j | |   d S(   s   
        Enter the Tkinter mainloop.  This function must be called if
        this demo is created from a non-interactive program (e.g.
        from a secript); otherwise, the demo will close as soon as
        the script completes.
        N(   R   R   t   mainloop(   Rs   R  t   kwargs(    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyR    s    	(   RK   RL   s-  Welcome to the regular expression chunk-parser grammar editor.  You can use this editor to develop and test chunk parser grammars based on NLTK's RegexpChunkParser class.

Use this box ('Help') to learn more about the editor; click on the tabs for help on specific topics:<indent>
Rules: grammar rule types
Regexps: regular expression syntax
Tags: part of speech tags
</indent>
Use the upper-left box ('Grammar') to edit your grammar.  Each line of your grammar specifies a single 'rule', which performs an action such as creating a chunk or merging two chunks.

The lower-left box ('Development Set') runs your grammar on the development set, and displays the results.  Your grammar's chunks are <highlight>highlighted</highlight>, and the correct (gold standard) chunks are <underline>underlined</underline>.  If they match, they are displayed in <green>green</green>; otherwise, they are displayed in <red>red</red>.  The box displays a single sentence from the development set at a time; use the scrollbar or the next/previous buttons view additional sentences.

The lower-right box ('Evaluation') tracks the performance of your grammar on the development set.  The 'precision' axis indicates how many of your grammar's chunks are correct; and the 'recall' axis indicates how many of the gold standard chunks your system generated.  Typically, you should try to design a grammar that scores high on both metrics.  The exact precision and recall of the current grammar, as well as their harmonic mean (the 'f-score'), are displayed in the status bar at the bottom of the window.(   RM   RN   s  <h1>{...regexp...}</h1><indent>
Chunk rule: creates new chunks from words matching regexp.</indent>

<h1>}...regexp...{</h1><indent>
Chink rule: removes words matching regexp from existing chunks.</indent>

<h1>...regexp1...}{...regexp2...</h1><indent>
Split rule: splits chunks that match regexp1 followed by regexp2 in two.</indent>

<h1>...regexp...{}...regexp...</h1><indent>
Merge rule: joins consecutive chunks that match regexp1 and regexp2</indent>
(   RO   s   10 60sX  <h1>Pattern		Matches...</h1>
<hangindent>	<<var>T</var>>	a word with tag <var>T</var> (where <var>T</var> may be a regexp).
	<var>x</var>?	an optional <var>x</var>
	<var>x</var>+	a sequence of 1 or more <var>x</var>'s
	<var>x</var>*	a sequence of 0 or more <var>x</var>'s
	<var>x</var>|<var>y</var>	<var>x</var> or <var>y</var>
	.	matches any character
	(<var>x</var>)	Treats <var>x</var> as a group
	# <var>x...</var>	Treats <var>x...</var> (to the end of the line) as a comment
	\<var>C</var>	matches character <var>C</var> (useful when <var>C</var> is a special character like + or #)
</hangindent>
<h1>Examples:</h1>
<hangindent>	<regexp><NN></regexp>
		Matches <match>"cow/NN"</match>
		Matches <match>"green/NN"</match>
	<regexp><VB.*></regexp>
		Matches <match>"eating/VBG"</match>
		Matches <match>"ate/VBD"</match>
	<regexp><IN><DT><NN></regexp>
		Matches <match>"on/IN the/DT car/NN"</match>
	<regexp><RB>?<VBD></regexp>
		Matches <match>"ran/VBD"</match>
		Matches <match>"slowly/RB ate/VBD"</match>
	<regexp><\#><CD> # This is a comment...</regexp>
		Matches <match>"#/# 100/CD"</match>
</hangindent>(   i   N(D   t   __name__t
   __module__t   __doc__R~   RQ  Rp  R  RU  R  R  R
  R  R  RF  RN  RV  R_  t   _FONT_PARAMSRC  R   RK  t   _HELPTAB_BG_COLORt   _HELPTAB_FG_COLORR  R  RR  Ru   R}   R   R   R   R   R   R   R  t   _DRAW_LINESR   R  R  R  R   R   R   R,  RJ  RM  R  R   R   R   RY  R   Rr  R  R  R   R  R  Rt  R   R  R   R   R   R   R   R   R  (    (    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyR   /   sJ  	
  %    "			!		e				b			K			?	$			)				-				3			
!	
c           C   s   t    j   d  S(   N(   R   R  (    (    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyt   app  s    t   __main__R  (*   R  t
   __future__R    R  t   textwrapRp   t   randomt   six.moves.tkinterR   R   R   R   R   R   R   R   R	   R
   t   six.moves.tkinter_tkfiledialogR   R   t   six.moves.tkinter_fontR   t	   nltk.treeR   t	   nltk.utilR   t   nltk.draw.utilR   t   nltk.corpusR   R   t
   nltk.chunkR   R   t   nltk.chunk.regexpR   t   objectR   R  R  t   __all__(    (    (    s7   lib/python2.7/site-packages/nltk/app/chunkparser_app.pyt   <module>   s0   F     	
