# -*- coding: utf-8 -*-
"""
Unit tests for Senna
"""

from __future__ import unicode_literals
from os import environ, path, sep

import logging
import unittest

from nltk.classify import Senna
from nltk.tag import SennaTagger, SennaChunkTagger, SennaNERTagger

# Set Senna executable path for tests if it is not specified as an environment variable
if 'SENNA' in environ:
    SENNA_EXECUTABLE_PATH = path.normpath(environ['SENNA']) + sep
else:
    SENNA_EXECUTABLE_PATH = '/usr/share/senna-v3.0'

senna_is_installed = path.exists(SENNA_EXECUTABLE_PATH)


@unittest.skipUnless(senna_is_installed, "Requires Senna executable")
class TestSennaPipeline(unittest.TestCase):
    """Unittest for nltk.classify.senna"""

    def test_senna_pipeline(self):
        """Senna pipeline interface"""

        pipeline = Senna(SENNA_EXECUTABLE_PATH, ['pos', 'chk', 'ner'])
        sent = 'Dusseldorf is an international business center'.split()
        result = [
            (token['word'], token['chk'], token['ner'], token['pos'])
            for token in pipeline.tag(sent)
        ]
        expected = [
            ('Dusseldorf', 'B-NP', 'B-LOC', 'NNP'),
            ('is', 'B-VP', 'O', 'VBZ'),
            ('an', 'B-NP', 'O', 'DT'),
            ('international', 'I-NP', 'O', 'JJ'),
            ('business', 'I-NP', 'O', 'NN'),
            ('center', 'I-NP', 'O', 'NN'),
        ]
        self.assertEqual(result, expected)


@unittest.skipUnless(senna_is_installed, "Requires Senna executable")
class TestSennaTagger(unittest.TestCase):
    """Unittest for nltk.tag.senna"""

    def test_senna_tagger(self):
        tagger = SennaTagger(SENNA_EXECUTABLE_PATH)
        result = tagger.tag('What is the airspeed of an unladen swallow ?'.split())
        expected = [
            ('What', 'WP'),
            ('is', 'VBZ'),
            ('the', 'DT'),
            ('airspeed', 'NN'),
            ('of', 'IN'),
            ('an', 'DT'),
            ('unladen', 'NN'),
            ('swallow', 'NN'),
            ('?', '.'),
        ]
        self.assertEqual(result, expected)

    def test_senna_chunk_tagger(self):
        chktagger = SennaChunkTagger(SENNA_EXECUTABLE_PATH)
        result_1 = chktagger.tag('What is the airspeed of an unladen swallow ?'.split())
        expected_1 = [
            ('What', 'B-NP'),
            ('is', 'B-VP'),
            ('the', 'B-NP'),
            ('airspeed', 'I-NP'),
            ('of', 'B-PP'),
            ('an', 'B-NP'),
            ('unladen', 'I-NP'),
            ('swallow', 'I-NP'),
            ('?', 'O'),
        ]

        result_2 = list(chktagger.bio_to_chunks(result_1, chunk_type='NP'))
        expected_2 = [
            ('What', '0'),
            ('the airspeed', '2-3'),
            ('an unladen swallow', '5-6-7'),
        ]
        self.assertEqual(result_1, expected_1)
        self.assertEqual(result_2, expected_2)

    def test_senna_ner_tagger(self):
        nertagger = SennaNERTagger(SENNA_EXECUTABLE_PATH)
        result_1 = nertagger.tag('Shakespeare theatre was in London .'.split())
        expected_1 = [
            ('Shakespeare', 'B-PER'),
            ('theatre', 'O'),
            ('was', 'O'),
            ('in', 'O'),
            ('London', 'B-LOC'),
            ('.', 'O'),
        ]

        result_2 = nertagger.tag('UN headquarters are in NY , USA .'.split())
        expected_2 = [
            ('UN', 'B-ORG'),
            ('headquarters', 'O'),
            ('are', 'O'),
            ('in', 'O'),
            ('NY', 'B-LOC'),
            (',', 'O'),
            ('USA', 'B-LOC'),
            ('.', 'O'),
        ]
        self.assertEqual(result_1, expected_1)
        self.assertEqual(result_2, expected_2)
