
`]c           @  s  d  d l  m Z d  d l Z d  d l Z d  d l Z d  d l Z d  d l Z d  d l Z d  d l Z d  d l	 Z	 d  d l
 Z
 d  d l m Z m Z m Z d  d l m Z d  d l m Z m Z d  d l m Z d  d l m Z m Z m Z d  d l m Z m Z m Z m Z m Z m Z m Z d  d	 l  m! Z! m" Z" d  d
 l# Td  d l$ m% Z% d d  Z& d   Z' d d d  Z) d d  Z* d d d  Z+ d   Z, d   Z- d   Z. d e f d     YZ/ d d d  Z0 d   Z1 e2 d  d  Z3 d   Z4 d    Z5 d!   Z6 e7 d"  Z8 d#   Z9 d$   Z: d%   Z; d&   Z< e7 e7 d d d'  Z= e7 e7 d d d(  Z> d)   Z? d*   Z@ d+ d  d,  ZA d-   ZB i  d  d.  ZC d d/ d0  ZD d d1  ZE d2   ZF d S(3   i(   t   print_functionN(   t   islicet   chaint   combinations(   t   pprint(   t   defaultdictt   deque(   t   version_info(   t   class_typest   string_typest	   text_type(   t   build_openert   install_openert
   getproxiest   ProxyHandlert   ProxyBasicAuthHandlert   ProxyDigestAuthHandlert   HTTPPasswordMgrWithDefaultRealm(   t   slice_boundst   raise_unorderable_types(   t   *(   t   python_2_unicode_compatiblet   selfc   
   
   C  s}  t  |   t |  t  s% |  j }  n  t d |  j  x@t t j |   j	    D]#\ } } | j
 d  rs qR n  t | d t  r qR n  t j d d k r t j } n	 t j } | |  d  \ } } } } | r&| d d k r&| d  k s	t |  t |  k r&| d } d	 | | f } n  t j | | | |  }	 t t j d
 | |	 f d d d d t |  d  qR Wd  S(   Ns%   %s supports the following operations:t   _t   __deprecated__i    i   i   R   i   s   %s.%ss   %s%st   initial_indents     - t   subsequent_indentt    i   (   t   strt
   isinstanceR   t	   __class__t   printt   __name__t   sortedt   pydoct
   allmethodst   itemst
   startswitht   getattrt   Falset   sysR   t   inspectt   getfullargspect
   getargspect   Nonet   lent   formatargspect   textwrapt   fill(
   t   objt   selfnamet   namet   methodR+   t   argst   varargst   varkwt   defaultst   argspec(    (    s(   lib/python2.7/site-packages/nltk/util.pyt   usage-   s0    
(	$
c          C  s   d d l  }  |  j j j d k S(   s  
    Return True if this function is run within idle.  Tkinter
    programs that are run in idle should never call ``Tk.mainloop``; so
    this function should be used to gate all calls to ``Tk.mainloop``.

    :warning: This function works by checking ``sys.stdin``.  If the
        user has modified ``sys.stdin``, then it may return incorrect
        results.
    :rtype: bool
    iNt   PyShellt   RPCProxy(   R;   R<   (   R(   t   stdinR   R    (   R(   (    (    s(   lib/python2.7/site-packages/nltk/util.pyt   in_idleU   s    i    c         C  s    t  t t |  | |    d S(   s   
    Pretty print a sequence of data items

    :param data: the data stream to print
    :type data: sequence or iter
    :param start: the start position
    :type start: int
    :param end: the end position
    :type end: int
    N(   R   t   listR   (   t   datat   startt   end(    (    s(   lib/python2.7/site-packages/nltk/util.pyt   prj   s    iF   c         C  s&   t  d j t j |  d |   d S(   s   
    Pretty print a string, breaking lines on whitespace

    :param s: the string to print, consisting of words and spaces
    :type s: str
    :param width: the display width
    :type width: int
    s   
t   widthN(   R   t   joinR/   t   wrap(   t   sRD   (    (    s(   lib/python2.7/site-packages/nltk/util.pyt   print_stringx   s    	R   c         C  s%   d j  t j | j  |   d |  S(   s#  
    Pretty print a list of text tokens, breaking lines on whitespace

    :param tokens: the tokens to print
    :type tokens: list
    :param separator: the string to use to separate tokens
    :type separator: str
    :param width: the display width (default=70)
    :type width: int
    s   
RD   (   RE   R/   RF   (   t   tokenst	   separatorRD   (    (    s(   lib/python2.7/site-packages/nltk/util.pyt	   tokenwrap   s    c           C  s   t  d d k o t  d d k S(   Ni    i   i   i   (   R   (    (    (    s(   lib/python2.7/site-packages/nltk/util.pyt   py25   s    c           C  s   t  d d k o t  d d k S(   Ni    i   i   i   (   R   (    (    (    s(   lib/python2.7/site-packages/nltk/util.pyt   py26   s    c           C  s   t  d d k o t  d d k S(   Ni    i   i   i   (   R   (    (    (    s(   lib/python2.7/site-packages/nltk/util.pyt   py27   s    t   Indexc           B  s   e  Z d    Z RS(   c         C  s<   t  j |  t  x% | D] \ } } |  | j |  q Wd  S(   N(   R   t   __init__R?   t   append(   R   t   pairst   keyt   value(    (    s(   lib/python2.7/site-packages/nltk/util.pyRP      s    (   R    t
   __module__RP   (    (    (    s(   lib/python2.7/site-packages/nltk/util.pyRO      s   t   {t   }c         C  s7   t  t j |  t j  j | d | | j     d S(   s3  
    Return a string with markers surrounding the matched substrings.
    Search str for substrings matching ``regexp`` and wrap the matches
    with braces.  This is convenient for learning about regular expressions.

    :param regexp: The regular expression.
    :type regexp: str
    :param string: The string being matched.
    :type string: str
    :param left: The left delimiter (printed before the matched substring)
    :type left: str
    :param right: The right delimiter (printed after the matched substring)
    :type right: str
    :rtype: str
    s   \g<0>N(   R   t   ret   compilet   Mt   subt   rstrip(   t   regexpt   stringt   leftt   right(    (    s(   lib/python2.7/site-packages/nltk/util.pyt   re_show   s    c         C  s]   t  |  d  r |  j   St |  t  rM t |  d   } | j   SWd  QXn t d   d  S(   Nt   readt   rs2   Must be called with a filename or file-like object(   t   hasattrRb   R   R	   t   opent
   ValueError(   t   ft   infile(    (    s(   lib/python2.7/site-packages/nltk/util.pyt
   filestring   s    
c         #  s   t  |  d f g  } xk | r | j   \ }   | V  | k r y' | j   f d   | |  D  Wq t k
 r{ q Xq q Wd S(   s  Traverse the nodes of a tree in breadth-first order.
    (No need to check for cycles.)
    The first argument should be the tree root;
    children should be a function taking as argument a tree node
    and returning an iterator of the node's children.
    i    c         3  s   |  ] } |   d  f Vq d S(   i   N(    (   t   .0t   c(   t   depth(    s(   lib/python2.7/site-packages/nltk/util.pys	   <genexpr>   s    N(   R   t   popleftt   extendt	   TypeError(   t   treet   childrent   maxdeptht   queuet   node(    (   Rl   s(   lib/python2.7/site-packages/nltk/util.pyt   breadth_first   s    	'c         C  sW  d } d g } y | j t j t j   Wn t k
 r? n Xy | j t j   d  Wn t t f k
 rt n Xy | j t j   d  Wn t t f k
 r n X| j d  xN | D]F } | s q n  y t	 |  |  } | } Wn t
 t f k
 rq XPq W| sIt
 d d j g  | D] } | r!t |  ^ q!   n
 | | f Sd S(   st  
    Given a byte string, attempt to decode it.
    Tries the standard 'UTF8' and 'latin-1' encodings,
    Plus several gathered from locale information.

    The calling program *must* first call::

        locale.setlocale(locale.LC_ALL, '')

    If successful it returns ``(decoded_unicode, successful_encoding)``.
    If unsuccessful it raises a ``UnicodeError``.
    s   utf-8i   s   latin-1s?   Unable to decode input data. Tried the following encodings: %s.s   , N(   R,   RQ   t   localet   nl_langinfot   CODESETt   AttributeErrort	   getlocalet
   IndexErrort   getdefaultlocaleR
   t   UnicodeErrort   LookupErrorRE   t   repr(   R@   t   successful_encodingt	   encodingst   enct   decoded(    (    s(   lib/python2.7/site-packages/nltk/util.pyt   guess_encoding   s:    	
5c         C  s<   t    } g  |  D]( } | | k r | j |  r | ^ q S(   N(   t   sett   add(   t   xst   seent   x(    (    s(   lib/python2.7/site-packages/nltk/util.pyt   unique_list4  s    	c         C  sk   t  t  } xX |  D]P } t |  | d  rU x4 |  | D] } | | j |  q7 Wq | | |  | <q W| S(   Nt   __iter__(   R   R?   Rd   RQ   (   t   dt   inverted_dictRS   t   term(    (    s(   lib/python2.7/site-packages/nltk/util.pyt   invert_dict?  s    c           s   | r d     n	 d     t   f d    D  } t    f d    D  } x  D] } | | } | | } xe | r | j   } | j |  | | j |   |   O} | | j |   |   O} | | 8} qw WqZ W| S(   s  
    Calculate the transitive closure of a directed graph,
    optionally the reflexive transitive closure.

    The algorithm is a slight modification of the "Marking Algorithm" of
    Ioannidis & Ramakrishnan (1998) "Efficient Transitive Closure Algorithms".

    :param graph: the initial graph, represented as a dictionary of sets
    :type graph: dict(set)
    :param reflexive: if set, also make the closure reflexive
    :type reflexive: bool
    :rtype: dict(set)
    c         S  s   t  |  g  S(   N(   R   (   t   k(    (    s(   lib/python2.7/site-packages/nltk/util.pyt   <lambda>_  t    c         S  s   t    S(   N(   R   (   R   (    (    s(   lib/python2.7/site-packages/nltk/util.pyR   a  R   c         3  s%   |  ] } |   | j    f Vq d  S(   N(   t   copy(   Rj   R   (   t   graph(    s(   lib/python2.7/site-packages/nltk/util.pys	   <genexpr>c  s    c         3  s!   |  ] } |   |  f Vq d  S(   N(    (   Rj   R   (   t   base_set(    s(   lib/python2.7/site-packages/nltk/util.pys	   <genexpr>e  s    (   t   dictt   popR   t
   setdefaultt   get(   R   t	   reflexivet   agenda_grapht   closure_grapht   it   agendat   closuret   j(    (   R   R   s(   lib/python2.7/site-packages/nltk/util.pyt   transitive_closureP  s    	

	c         C  sL   i  } x? |  D]7 } x. |  | D]" } | j  | t    j |  q Wq W| S(   s   
    Inverts a directed graph.

    :param graph: the graph, represented as a dictionary of sets
    :type graph: dict(set)
    :return: the inverted graph
    :rtype: dict(set)
    (   R   R   R   (   R   t   invertedRS   RT   (    (    s(   lib/python2.7/site-packages/nltk/util.pyt   invert_graphr  s
    	$c         C  s   t  d   d  S(   Ns>   To remove HTML markup, use BeautifulSoup's get_text() function(   t   NotImplementedError(   t   html(    (    s(   lib/python2.7/site-packages/nltk/util.pyt
   clean_html  s    c         C  s   t  d   d  S(   Ns>   To remove HTML markup, use BeautifulSoup's get_text() function(   R   (   t   url(    (    s(   lib/python2.7/site-packages/nltk/util.pyt	   clean_url  s    c          G  s   g  } xx |  D]p } t  | t t f  s4 | g } n  xF | D]> } t  | t t f  rl | j t |   q; | j |  q; Wq W| S(   s   
    Flatten a list.

        >>> from nltk.util import flatten
        >>> flatten(1, 2, ['b', 'a' , ['c', 'd']], 3)
        [1, 2, 'b', 'a', 'c', 'd', 3]

    :param args: items and lists to be combined into a single list
    :rtype: list
    (   R   R?   t   tupleRn   t   flattenRQ   (   R5   R   t   lt   item(    (    s(   lib/python2.7/site-packages/nltk/util.pyR     s    c         C  sV   t  |   }  | r/ t | f | d |   }  n  | rR t |  | f | d  }  n  |  S(   s  
    Returns a padded sequence of items before ngram extraction.

        >>> list(pad_sequence([1,2,3,4,5], 2, pad_left=True, pad_right=True, left_pad_symbol='<s>', right_pad_symbol='</s>'))
        ['<s>', 1, 2, 3, 4, 5, '</s>']
        >>> list(pad_sequence([1,2,3,4,5], 2, pad_left=True, left_pad_symbol='<s>'))
        ['<s>', 1, 2, 3, 4, 5]
        >>> list(pad_sequence([1,2,3,4,5], 2, pad_right=True, right_pad_symbol='</s>'))
        [1, 2, 3, 4, 5, '</s>']

    :param sequence: the source data to be padded
    :type sequence: sequence or iter
    :param n: the degree of the ngrams
    :type n: int
    :param pad_left: whether the ngrams should be left-padded
    :type pad_left: bool
    :param pad_right: whether the ngrams should be right-padded
    :type pad_right: bool
    :param left_pad_symbol: the symbol to use for left padding (default is None)
    :type left_pad_symbol: any
    :param right_pad_symbol: the symbol to use for right padding (default is None)
    :type right_pad_symbol: any
    :rtype: sequence or iter
    i   (   t   iterR   (   t   sequencet   nt   pad_leftt	   pad_rightt   left_pad_symbolt   right_pad_symbol(    (    s(   lib/python2.7/site-packages/nltk/util.pyt   pad_sequence  s     c   	      c  s   t  |  | | | | |  }  g  } xL | d k ro y t |   } Wn t k
 rT d SX| j |  | d 8} q$ Wx- |  D]% } | j |  t |  V| d =qw Wd S(   s  
    Return the ngrams generated from a sequence of items, as an iterator.
    For example:

        >>> from nltk.util import ngrams
        >>> list(ngrams([1,2,3,4,5], 3))
        [(1, 2, 3), (2, 3, 4), (3, 4, 5)]

    Wrap with list for a list version of this function.  Set pad_left
    or pad_right to true in order to get additional ngrams:

        >>> list(ngrams([1,2,3,4,5], 2, pad_right=True))
        [(1, 2), (2, 3), (3, 4), (4, 5), (5, None)]
        >>> list(ngrams([1,2,3,4,5], 2, pad_right=True, right_pad_symbol='</s>'))
        [(1, 2), (2, 3), (3, 4), (4, 5), (5, '</s>')]
        >>> list(ngrams([1,2,3,4,5], 2, pad_left=True, left_pad_symbol='<s>'))
        [('<s>', 1), (1, 2), (2, 3), (3, 4), (4, 5)]
        >>> list(ngrams([1,2,3,4,5], 2, pad_left=True, pad_right=True, left_pad_symbol='<s>', right_pad_symbol='</s>'))
        [('<s>', 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, '</s>')]


    :param sequence: the source data to be converted into ngrams
    :type sequence: sequence or iter
    :param n: the degree of the ngrams
    :type n: int
    :param pad_left: whether the ngrams should be left-padded
    :type pad_left: bool
    :param pad_right: whether the ngrams should be right-padded
    :type pad_right: bool
    :param left_pad_symbol: the symbol to use for left padding (default is None)
    :type left_pad_symbol: any
    :param right_pad_symbol: the symbol to use for right padding (default is None)
    :type right_pad_symbol: any
    :rtype: sequence or iter
    i   Ni    (   R   t   nextt   StopIterationRQ   R   (	   R   R   R   R   R   R   t   historyt	   next_itemR   (    (    s(   lib/python2.7/site-packages/nltk/util.pyt   ngrams  s    +c         k  s&   x t  |  d |  D] } | Vq Wd S(   s  
    Return the bigrams generated from a sequence of items, as an iterator.
    For example:

        >>> from nltk.util import bigrams
        >>> list(bigrams([1,2,3,4,5]))
        [(1, 2), (2, 3), (3, 4), (4, 5)]

    Use bigrams for a list version of this function.

    :param sequence: the source data to be converted into bigrams
    :type sequence: sequence or iter
    :rtype: iter(tuple)
    i   N(   R   (   R   t   kwargsR   (    (    s(   lib/python2.7/site-packages/nltk/util.pyt   bigrams  s    c         k  s&   x t  |  d |  D] } | Vq Wd S(   s  
    Return the trigrams generated from a sequence of items, as an iterator.
    For example:

        >>> from nltk.util import trigrams
        >>> list(trigrams([1,2,3,4,5]))
        [(1, 2, 3), (2, 3, 4), (3, 4, 5)]

    Use trigrams for a list version of this function.

    :param sequence: the source data to be converted into trigrams
    :type sequence: sequence or iter
    :rtype: iter(tuple)
    i   N(   R   (   R   R   R   (    (    s(   lib/python2.7/site-packages/nltk/util.pyt   trigrams3  s    i   c         k  s_   | d k r t  |   } n  x= t | | d  D]( } x t |  | |  D] } | VqH Wq/ Wd S(   s  
    Returns all possible ngrams generated from a sequence of items, as an iterator.

        >>> sent = 'a b c'.split()
        >>> list(everygrams(sent))
        [('a',), ('b',), ('c',), ('a', 'b'), ('b', 'c'), ('a', 'b', 'c')]
        >>> list(everygrams(sent, max_len=2))
        [('a',), ('b',), ('c',), ('a', 'b'), ('b', 'c')]

    :param sequence: the source data to be converted into trigrams
    :type sequence: sequence or iter
    :param min_len: minimum length of the ngrams, aka. n-gram order/degree of ngram
    :type  min_len: int
    :param max_len: maximum length of the ngrams (set to length of sequence by default)
    :type  max_len: int
    :rtype: iter(tuple)
    ii   N(   R-   t   rangeR   (   R   t   min_lent   max_lenR   R   t   ng(    (    s(   lib/python2.7/site-packages/nltk/util.pyt
   everygramsG  s
    c   	      k  s   d | k s d | k r- t  |  | |  }  n  t   } xx t |  | | d t d | D]W } | d  } | d } x: t | | d  D]% } | d | k r q n  | | Vq WqV Wd S(   s  
    Returns all possible skipgrams generated from a sequence of items, as an iterator.
    Skipgrams are ngrams that allows tokens to be skipped.
    Refer to http://homepages.inf.ed.ac.uk/ballison/pdf/lrec_skipgrams.pdf

        >>> sent = "Insurgents killed in ongoing fighting".split()
        >>> list(skipgrams(sent, 2, 2))
        [('Insurgents', 'killed'), ('Insurgents', 'in'), ('Insurgents', 'ongoing'), ('killed', 'in'), ('killed', 'ongoing'), ('killed', 'fighting'), ('in', 'ongoing'), ('in', 'fighting'), ('ongoing', 'fighting')]
        >>> list(skipgrams(sent, 3, 2))
        [('Insurgents', 'killed', 'in'), ('Insurgents', 'killed', 'ongoing'), ('Insurgents', 'killed', 'fighting'), ('Insurgents', 'in', 'ongoing'), ('Insurgents', 'in', 'fighting'), ('Insurgents', 'ongoing', 'fighting'), ('killed', 'in', 'ongoing'), ('killed', 'in', 'fighting'), ('killed', 'ongoing', 'fighting'), ('in', 'ongoing', 'fighting')]

    :param sequence: the source data to be converted into trigrams
    :type sequence: sequence or iter
    :param n: the degree of the ngrams
    :type n: int
    :param k: the skip distance
    :type  k: int
    :rtype: iter(tuple)
    R   R   R   i   iN(   R   t   objectR   t   TrueR   (	   R   R   R   R   t   SENTINELt   ngramt   headt   tailt	   skip_tail(    (    s(   lib/python2.7/site-packages/nltk/util.pyt	   skipgramsa  s    	&

c         C  sC  | d } t  |  } d } d } t |  d  rM t j |  j  j d } n- |  j d d  |  j   d } |  j d  x| | k  r>| | f } | | d }	 | j |	  r | |	 \ }
 } n d } x t	 rT|  j t
 d |	 d   |	 d k r|  j   n  |  j   }
 |  j   } | d k r/Pn  | |	 d }	 |	 | d k r d Sq W| | k  rt|
 | f | |	 <n  |
 | k r| |	 d k st d   |	 d } nl | |  | k r| S| | k r| |	 d k st d   |	 d } n# | | k  r|
 t  |  d } n  | d 7} | | f } | | k r} d Sq} Wd S(	   s  
    Return the line from the file with first word key.
    Searches through a sorted file using the binary search algorithm.

    :type file: file
    :param file: the file to be searched through.
    :type key: str
    :param key: the identifier we are searching for.
    R   i    R3   i   i   R   s   infinite loopN(   R-   Rd   t   ost   statR3   t   st_sizet   seekt   tellR   R   t   maxt   discard_linet   readlineR,   t   AssertionError(   t   fileRS   t   cachet
   cacheDeptht   keylenRA   t   currentDepthRB   t	   lastStatet   middlet   offsett   linet	   thisState(    (    s(   lib/python2.7/site-packages/nltk/util.pyt   binary_search_file  sV    
	
R   c      	   C  s   d d l  m } |  d
 k rP y t   d }  WqP t k
 rL t d   qP Xn  t i |  d 6|  d 6 } t |  } | d
 k	 r t   } | j	 d d
 d |  d | d	 |  | j
 t |   | j
 t |   n  t |  d
 S(   s  
    Set the HTTP proxy for Python to download through.

    If ``proxy`` is None then tries to set proxy from environment or system
    settings.

    :param proxy: The HTTP proxy server to use. For example:
        'http://proxy.example.com:3128/'
    :param user: The username to authenticate with. Use None to disable
        authentication.
    :param password: The password to authenticate with.
    i(   t   compatt   https'   Could not detect default proxy settingst   httpst   realmt   urit   usert   passwdN(   t   nltkR   R,   R   t   KeyErrorRf   R   R   R   t   add_passwordt   add_handlerR   R   R   (   t   proxyR   t   passwordR   t   proxy_handlert   openert   password_manager(    (    s(   lib/python2.7/site-packages/nltk/util.pyt	   set_proxy  s    	"c         C  s   d | d } t  |   r |  j s4 |  j j   rD | d |  _ n  x |  D] }  t |  | d  qK W|  j s |  j j   r | |  _ q n, | r |  j s |  j j   r | |  _ n  d S(   s  
    Recursive function to indent an ElementTree._ElementInterface
    used for pretty printing. Run indent on elem and then output
    in the normal way.

    :param elem: element to be indented. will be modified.
    :type elem: ElementTree._ElementInterface
    :param level: level of indentation for this element
    :type level: nonnegative integer
    :rtype:   ElementTree._ElementInterface
    :return:  Contents of elem indented to reflect its structure
    s   
s     i   N(   R-   t   textt   stript   elementtree_indentR   (   t   elemt   levelR   (    (    s(   lib/python2.7/site-packages/nltk/util.pyR      s     c         C  s   d | k o |  k n ry d \ } } xF t  d t | |  |  d  D]$ } | |  9} | | 9} |  d 8}  qI W| | Sd Sd S(   s9  
    This function is a fast way to calculate binomial coefficients, commonly
    known as nCk, i.e. the number of combinations of n things taken k at a time.
    (https://en.wikipedia.org/wiki/Binomial_coefficient).

    This is the *scipy.special.comb()* with long integer computation but this
    approximation is faster, see https://github.com/nltk/nltk/issues/1181

        >>> choose(4, 2)
        6
        >>> choose(6, 2)
        15

    :param n: The number of things.
    :type n: int
    :param r: The number of times a thing is taken.
    :type r: int
    i    i   N(   i   i   (   R   t   min(   R   R   t   ntokt   ktokt   t(    (    s(   lib/python2.7/site-packages/nltk/util.pyt   choose   s    '

(G   t
   __future__R    R(   R)   Rv   RX   t   typesR/   R"   t   bisectR   t	   itertoolsR   R   R   R   t   collectionsR   R   R   t   sixR   R	   R
   t   six.moves.urllib.requestR   R   R   R   R   R   R   t   nltk.internalsR   R   t   nltk.collectionst   nltk.compatR   R:   R>   R,   RC   RH   RK   RL   RM   RN   RO   Ra   Ri   R   Ru   R   R   R   R'   R   R   R   R   R   R   R   R   R   R   R   R   R   R   R   (    (    (    s(   lib/python2.7/site-packages/nltk/util.pyt   <module>   sl   4

(							=		"				 (9			+J* 