
`]c           @  s   d  d l  m Z m Z d  d l Z d  d l Z d  d l m Z d  d l m Z d  d l	 m
 Z
 d  d l m Z d e j f d     YZ d	 e j f d
     YZ d S(   i(   t   print_functiont   unicode_literalsN(   t   closing(   t   data(   t   SnowballStemmer(   t   PorterStemmert   SnowballTestc           B  s5   e  Z d    Z d   Z d   Z d   Z d   Z RS(   c         C  s  t  d t  } | j d  d k s* t  | j d  d k sE t  | j d  d k s` t  | j d  d k s{ t  | j d	  d k s t  | j d
  d k s t  | j d  d k s t  | j d  d k s t  | j d  d k st  t  d t  } | j d  d k s,t  | j d  d k sGt  | j d  d k sbt  t  d  } | j d  d k st  | j d  d k st  | j d  d k st  | j d  d k st  | j d  d k st  d S(   u   
        this unit testing for test the snowball arabic light stemmer
        this stemmer deals with prefixes and suffixes
        u   arabicu&   الْعَرَبِــــــيَّةu   عربu   العربيةu   فقالواu   قالu   الطالباتu   طالبu   فالطالباتu   والطالباتu   الطالبونu   اللذانu   منu   اللذu   الكلماتu   كلمN(   R   t   Truet   stemt   AssertionErrort   False(   t   selft
   ar_stemmer(    (    s7   lib/python2.7/site-packages/nltk/test/unit/test_stem.pyt   test_arabic   s(    c         C  s+   t  d  } | j d  d k s' t  d  S(   Nu   russianu   авантненькаяu   авантненьк(   R   R   R	   (   R   t   stemmer_russian(    (    s7   lib/python2.7/site-packages/nltk/test/unit/test_stem.pyt   test_russian+   s    c         C  s   t  d  } t  d d t } | j d  d k s9 t  | j d  d k sT t  | j d  d k so t  | j d  d k s t  d  S(   Nu   germant   ignore_stopwordsu	   Schränkeu   schranku   keinenu   kein(   R   R   R   R	   (   R   t   stemmer_germant   stemmer_german2(    (    s7   lib/python2.7/site-packages/nltk/test/unit/test_stem.pyt   test_german/   s    c         C  sF   t  d  } | j d  d k s' t  | j d  d k sB t  d  S(   Nu   spanishu	   Visionadou   visionu   algueu   algu(   R   R   R	   (   R   t   stemmer(    (    s7   lib/python2.7/site-packages/nltk/test/unit/test_stem.pyt   test_spanish9   s    c         C  s+   t  d  } | j d  d k s' t  d  S(   Nu   englishu   y'su   y(   R   R   R	   (   R   R   (    (    s7   lib/python2.7/site-packages/nltk/test/unit/test_stem.pyt   test_short_strings_bugA   s    (   t   __name__t
   __module__R   R   R   R   R   (    (    (    s7   lib/python2.7/site-packages/nltk/test/unit/test_stem.pyR      s
   			
	t
   PorterTestc           B  s>   e  Z d    Z d   Z d   Z d   Z d   Z d   Z RS(   c         C  s>   t  t j d  j d d    } | j   j   SWd  QXd  S(   Nu*   stemmers/porter_test/porter_vocabulary.txtt   encodingu   utf-8(   R   R   t   findt   opent   readt
   splitlines(   R   t   fp(    (    s7   lib/python2.7/site-packages/nltk/test/unit/test_stem.pyt   _vocabularyG   s
    c         C  sp   t  d |  } xZ t |  j   |  D]C \ } } | j |  } | | k s% t d | | | | f   q% Wd  S(   Nt   modeu*   %s should stem to %s in %s mode but got %s(   R   t   zipR    R   R	   (   R   t   stemmer_modet   expected_stemsR   t   wordt	   true_stemt   our_stem(    (    s7   lib/python2.7/site-packages/nltk/test/unit/test_stem.pyt   _test_against_expected_outputO   s    "c         C  sM   t  t j d  j d d   & } |  j t j | j   j    Wd QXd S(   u  Tests all words from the test vocabulary provided by M Porter

        The sample vocabulary and output were sourced from:
            http://tartarus.org/martin/PorterStemmer/voc.txt
            http://tartarus.org/martin/PorterStemmer/output.txt
        and are linked to from the Porter Stemmer algorithm's homepage
        at
            http://tartarus.org/martin/PorterStemmer/
        u-   stemmers/porter_test/porter_martin_output.txtR   u   utf-8N(	   R   R   R   R   R(   R   t   MARTIN_EXTENSIONSR   R   (   R   R   (    (    s7   lib/python2.7/site-packages/nltk/test/unit/test_stem.pyt   test_vocabulary_martin_modeX   s    
c         C  sM   t  t j d  j d d   & } |  j t j | j   j    Wd  QXd  S(   Nu+   stemmers/porter_test/porter_nltk_output.txtR   u   utf-8(	   R   R   R   R   R(   R   t   NLTK_EXTENSIONSR   R   (   R   R   (    (    s7   lib/python2.7/site-packages/nltk/test/unit/test_stem.pyt   test_vocabulary_nltk_modek   s    c         C  s   t  t j d  j d d   & } |  j t j | j   j    Wd  QX|  j t j t j d  j d d  j   j    d  S(   Nu/   stemmers/porter_test/porter_original_output.txtR   u   utf-8(	   R   R   R   R   R(   R   t   ORIGINAL_ALGORITHMR   R   (   R   R   (    (    s7   lib/python2.7/site-packages/nltk/test/unit/test_stem.pyt   test_vocabulary_original_modeu   s    c         C  s"   t    j d  d k s t  d S(   u   Test for bug https://github.com/nltk/nltk/issues/1581

        Ensures that 'oed' can be stemmed without throwing an error.
        u   oedu   oN(   R   R   R	   (   R   (    (    s7   lib/python2.7/site-packages/nltk/test/unit/test_stem.pyt   test_oed_bug   s    (   R   R   R    R(   R*   R,   R.   R/   (    (    (    s7   lib/python2.7/site-packages/nltk/test/unit/test_stem.pyR   F   s   					
	(   t
   __future__R    R   t   ost   unittestt
   contextlibR   t   nltkR   t   nltk.stem.snowballR   t   nltk.stem.porterR   t   TestCaseR   R   (    (    (    s7   lib/python2.7/site-packages/nltk/test/unit/test_stem.pyt   <module>   s   9