# -*- coding: utf-8 -*-
from __future__ import absolute_import, unicode_literals
import unittest

from nltk import RegexpParser


class TestChunkRule(unittest.TestCase):
    def test_tag_pattern2re_pattern_quantifier(self):
        """Test for bug https://github.com/nltk/nltk/issues/1597

        Ensures that curly bracket quantifiers can be used inside a chunk rule.
        This type of quantifier has been used for the supplementary example
        in http://www.nltk.org/book/ch07.html#exploring-text-corpora.
        """
        sent = [
            ('The', 'AT'),
            ('September-October', 'NP'),
            ('term', 'NN'),
            ('jury', 'NN'),
            ('had', 'HVD'),
            ('been', 'BEN'),
            ('charged', 'VBN'),
            ('by', 'IN'),
            ('Fulton', 'NP-TL'),
            ('Superior', 'JJ-TL'),
            ('Court', 'NN-TL'),
            ('Judge', 'NN-TL'),
            ('Durwood', 'NP'),
            ('Pye', 'NP'),
            ('to', 'TO'),
            ('investigate', 'VB'),
            ('reports', 'NNS'),
            ('of', 'IN'),
            ('possible', 'JJ'),
            ('``', '``'),
            ('irregularities', 'NNS'),
            ("''", "''"),
            ('in', 'IN'),
            ('the', 'AT'),
            ('hard-fought', 'JJ'),
            ('primary', 'NN'),
            ('which', 'WDT'),
            ('was', 'BEDZ'),
            ('won', 'VBN'),
            ('by', 'IN'),
            ('Mayor-nominate', 'NN-TL'),
            ('Ivan', 'NP'),
            ('Allen', 'NP'),
            ('Jr.', 'NP'),
            ('.', '.'),
        ]  # source: brown corpus
        cp = RegexpParser('CHUNK: {<N.*>{4,}}')
        tree = cp.parse(sent)
        assert (
            tree.pformat()
            == """(S
  The/AT
  September-October/NP
  term/NN
  jury/NN
  had/HVD
  been/BEN
  charged/VBN
  by/IN
  Fulton/NP-TL
  Superior/JJ-TL
  (CHUNK Court/NN-TL Judge/NN-TL Durwood/NP Pye/NP)
  to/TO
  investigate/VB
  reports/NNS
  of/IN
  possible/JJ
  ``/``
  irregularities/NNS
  ''/''
  in/IN
  the/AT
  hard-fought/JJ
  primary/NN
  which/WDT
  was/BEDZ
  won/VBN
  by/IN
  (CHUNK Mayor-nominate/NN-TL Ivan/NP Allen/NP Jr./NP)
  ./.)"""
        )
