ó
	Q˜[c           @` sz  d  Z  d d l m Z m Z m Z m Z d d l Td d l m Z d d l	 Z	 d d l
 Z
 e	 j d ƒ Z e	 j d ƒ Z e	 j d ƒ Z e	 j d	 ƒ Z e	 j d
 ƒ Z e	 j d ƒ Z e	 j d ƒ Z e	 j d ƒ Z e	 j d ƒ Z e	 j d ƒ Z e	 j d ƒ Z e	 j d e	 j ƒ Z e	 j d e	 j ƒ Z e	 j d ƒ Z e	 j d ƒ Z d e f d „  ƒ  YZ d e j f d „  ƒ  YZ d S(   uL   A parser for HTML and XHTML.

Backported for python-future from Python 3.3.
i    (   t   absolute_importt   divisiont   print_functiont   unicode_literals(   t   *(   t   _markupbaseNu   [&<]u
   &[a-zA-Z#]u%   &([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]u)   &#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]u	   <[a-zA-Z]u   >u   --\s*>u(   ([a-zA-Z][-.a-zA-Z0-9:_]*)(?:\s|/(?!>))*u   [a-zA-Z][^	
 /> ]*uJ   \s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?u]   ((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*uê  
  <[a-zA-Z][-.a-zA-Z0-9:_]*          # tag name
  (?:\s+                             # whitespace before attribute name
    (?:[a-zA-Z_][-.:a-zA-Z0-9_]*     # attribute name
      (?:\s*=\s*                     # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |\"[^\"]*\"                # LIT-enclosed value
          |[^'\">\s]+                # bare value
         )
       )?
     )
   )*
  \s*                                # trailing whitespace
uF  
  <[a-zA-Z][-.a-zA-Z0-9:_]*          # tag name
  (?:[\s/]*                          # optional whitespace before attribute name
    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
      (?:\s*=+\s*                    # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |"[^"]*"                   # LIT-enclosed value
          |(?!['"])[^>\s]*           # bare value
         )
         (?:\s*,)*                   # possibly followed by a comma
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                # trailing whitespace
u#   </\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>t   HTMLParseErrorc           B` s#   e  Z d  Z d d „ Z d „  Z RS(   u&   Exception raised for all parse errors.c         C` s3   | s t  ‚ | |  _ | d |  _ | d |  _ d  S(   Ni    i   (   t   AssertionErrort   msgt   linenot   offset(   t   selfR   t   position(    (    s;   lib/python2.7/site-packages/future/backports/html/parser.pyt   __init__U   s    	c         C` sW   |  j  } |  j d  k	 r, | d |  j } n  |  j d  k	 rS | d |  j d } n  | S(   Nu   , at line %du   , column %di   (   R   R	   t   NoneR
   (   R   t   result(    (    s;   lib/python2.7/site-packages/future/backports/html/parser.pyt   __str__[   s    	N(   NN(   t   __name__t
   __module__t   __doc__R   R   R   (    (    (    s;   lib/python2.7/site-packages/future/backports/html/parser.pyR   R   s   t
   HTMLParserc           B` s
  e  Z d  Z d Z e d „ Z d „  Z d „  Z d „  Z d „  Z	 d Z d „  Z d	 „  Z d
 „  Z d „  Z d „  Z d d „ Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z  RS(    uÇ  Find tags and other markup and call handler functions.

    Usage:
        p = HTMLParser()
        p.feed(data)
        ...
        p.close()

    Start tags are handled by calling self.handle_starttag() or
    self.handle_startendtag(); end tags by self.handle_endtag().  The
    data between tags is passed from the parser to the derived class
    by calling self.handle_data() with the data as argument (the data
    may be split up in arbitrary chunks).  Entity references are
    passed by calling self.handle_entityref() with the entity
    reference as the argument.  Numeric character references are
    passed to self.handle_charref() with the string containing the
    reference as the argument.
    u   scriptu   stylec         C` s6   | r t  j d t d d ƒn  | |  _ |  j ƒ  d S(   uß   Initialize and reset this instance.

        If strict is set to False (the default) the parser will parse invalid
        markup, otherwise it will raise an error.  Note that the strict mode
        is deprecated.
        u   The strict mode is deprecated.t
   stackleveli   N(   t   warningst   warnt   DeprecationWarningt   strictt   reset(   R   R   (    (    s;   lib/python2.7/site-packages/future/backports/html/parser.pyR   z   s
    		c         C` s8   d |  _  d |  _ t |  _ d |  _ t j j |  ƒ d S(   u1   Reset this instance.  Loses all unprocessed data.u    u   ???N(	   t   rawdatat   lasttagt   interesting_normalt   interestingR   t
   cdata_elemR   t
   ParserBaseR   (   R   (    (    s;   lib/python2.7/site-packages/future/backports/html/parser.pyR   ‡   s
    				c         C` s!   |  j  | |  _  |  j d ƒ d S(   u‘   Feed data to the parser.

        Call this as often as you want, with as little or as much text
        as you want (may include '\n').
        i    N(   R   t   goahead(   R   t   data(    (    s;   lib/python2.7/site-packages/future/backports/html/parser.pyt   feed   s    c         C` s   |  j  d ƒ d S(   u   Handle any buffered data.i   N(   R!   (   R   (    (    s;   lib/python2.7/site-packages/future/backports/html/parser.pyt   close˜   s    c         C` s   t  | |  j ƒ  ƒ ‚ d  S(   N(   R   t   getpos(   R   t   message(    (    s;   lib/python2.7/site-packages/future/backports/html/parser.pyt   errorœ   s    c         C` s   |  j  S(   u)   Return full source of start tag: '<...>'.(   t   _HTMLParser__starttag_text(   R   (    (    s;   lib/python2.7/site-packages/future/backports/html/parser.pyt   get_starttag_text¡   s    c         C` s2   | j  ƒ  |  _ t j d |  j t j ƒ |  _ d  S(   Nu   </\s*%s\s*>(   t   lowerR   t   ret   compilet   IR   (   R   t   elem(    (    s;   lib/python2.7/site-packages/future/backports/html/parser.pyt   set_cdata_mode¥   s    c         C` s   t  |  _ d  |  _ d  S(   N(   R   R   R   R   (   R   (    (    s;   lib/python2.7/site-packages/future/backports/html/parser.pyt   clear_cdata_mode©   s    	c   
      C` sß  |  j  } d } t | ƒ } xk| | k  rˆ|  j j | | ƒ } | rT | j ƒ  } n |  j ra Pn  | } | | k  rŠ |  j | | | !ƒ n  |  j | | ƒ } | | k r¬ Pn  | j } | d | ƒ rkt	 j
 | | ƒ rè |  j | ƒ } nÊ | d | ƒ r	|  j | ƒ } n© | d | ƒ r*|  j | ƒ } nˆ | d | ƒ rK|  j | ƒ } ng | d | ƒ r‡|  j ru|  j | ƒ } q²|  j | ƒ } n+ | d | k  r±|  j d ƒ | d } n P| d k  rV| sÈPn  |  j rá|  j d ƒ n  | j d	 | d ƒ } | d k  r5| j d | d ƒ } | d k  r?| d } q?n
 | d 7} |  j | | | !ƒ n  |  j | | ƒ } q | d
 | ƒ r3t j
 | | ƒ } | rö| j ƒ  d d !}	 |  j |	 ƒ | j ƒ  } | d | d ƒ sÞ| d } n  |  j | | ƒ } q q…d | | k r/|  j | d d !ƒ |  j | d ƒ } n  Pq | d | ƒ rst j
 | | ƒ } | rº| j d ƒ }	 |  j |	 ƒ | j ƒ  } | d | d ƒ s¢| d } n  |  j | | ƒ } q n  t j
 | | ƒ } | r9| r5| j ƒ  | | k r5|  j r|  j d ƒ q5| | k r| } n  |  j | | d ƒ } n  Pq…| d | k  ro|  j d ƒ |  j | | d ƒ } q…Pq d s t d ƒ ‚ q W| rÎ| | k  rÎ|  j rÎ|  j | | | !ƒ |  j | | ƒ } n  | | |  _  d  S(   Ni    u   <u   </u   <!--u   <?u   <!i   u   EOF in middle of constructu   >u   &#i   iÿÿÿÿu   ;u   &u#   EOF in middle of entity or char refu   interesting.search() lied(   R   t   lenR   t   searcht   startR   t   handle_datat	   updatepost
   startswitht   starttagopent   matcht   parse_starttagt   parse_endtagt   parse_commentt   parse_piR   t   parse_declarationt   parse_html_declarationR'   t   findt   charreft   groupt   handle_charreft   endt	   entityreft   handle_entityreft
   incompleteR   (
   R   RC   R   t   it   nR8   t   jR6   t   kt   name(    (    s;   lib/python2.7/site-packages/future/backports/html/parser.pyR!   °   s°    		  			
		c         C` sè   |  j  } | | | d !d k s, t d ƒ ‚ | | | d !d k rP |  j | ƒ S| | | d !d k rt |  j | ƒ S| | | d !j ƒ  d	 k r× | j d
 | d ƒ } | d k r· d S|  j | | d | !ƒ | d S|  j | ƒ Sd  S(   Ni   u   <!u+   unexpected call to parse_html_declaration()i   u   <!--i   u   <![i	   u	   <!doctypeu   >iÿÿÿÿi   (   R   R   R;   t   parse_marked_sectionR*   R?   t   handle_declt   parse_bogus_comment(   R   RG   R   t   gtpos(    (    s;   lib/python2.7/site-packages/future/backports/html/parser.pyR>     s    	#i   c         C` s{   |  j  } | | | d !d k s, t d ƒ ‚ | j d | d ƒ } | d k rR d S| rs |  j | | d | !ƒ n  | d S(	   Ni   u   <!u   </u"   unexpected call to parse_comment()u   >iÿÿÿÿi   (   u   <!u   </(   R   R   R?   t   handle_comment(   R   RG   t   reportR   t   pos(    (    s;   lib/python2.7/site-packages/future/backports/html/parser.pyRN   -  s    	#c         C` s€   |  j  } | | | d !d k s, t d ƒ ‚ t j | | d ƒ } | sL d S| j ƒ  } |  j | | d | !ƒ | j ƒ  } | S(   Ni   u   <?u   unexpected call to parse_pi()iÿÿÿÿ(   R   R   t   picloseR2   R3   t	   handle_piRC   (   R   RG   R   R8   RI   (    (    s;   lib/python2.7/site-packages/future/backports/html/parser.pyR<   9  s    	#c         C` sÉ  d  |  _ |  j | ƒ } | d k  r( | S|  j } | | | !|  _ g  } t j | | d ƒ } | so t d ƒ ‚ | j ƒ  } | j d ƒ j	 ƒ  |  _
 } x| | k  r¥|  j rÄ t j | | ƒ } n t j | | ƒ } | sà Pn  | j d d d ƒ \ }	 }
 } |
 sd  } nX | d  d k o,| d k n sU| d  d k oP| d k n re| d d !} n  | r}|  j | ƒ } n  | j |	 j	 ƒ  | f ƒ | j ƒ  } qš W| | | !j ƒ  } | d k rt|  j ƒ  \ } } d |  j k r| |  j j d ƒ } t |  j ƒ |  j j d ƒ } n | t |  j ƒ } |  j r\|  j d | | | !d  f ƒ n  |  j | | | !ƒ | S| j d
 ƒ r–|  j | | ƒ n/ |  j | | ƒ | |  j k rÅ|  j | ƒ n  | S(   Ni    i   u#   unexpected call to parse_starttag()i   i   u   'iÿÿÿÿu   "u   >u   />u   
u    junk characters in start tag: %ri   (   u   >u   />(   R   R(   t   check_for_whole_start_tagR   t   tagfindR8   R   RC   RA   R*   R   R   t   attrfindt   attrfind_tolerantt   unescapet   appendt   stripR%   t   countR1   t   rfindR'   R4   t   endswitht   handle_startendtagt   handle_starttagt   CDATA_CONTENT_ELEMENTSR/   (   R   RG   t   endposR   t   attrsR8   RJ   t   tagt   mt   attrnamet   restt	   attrvalueRC   R	   R
   (    (    s;   lib/python2.7/site-packages/future/backports/html/parser.pyR9   E  s\    				$$		c         C` sh  |  j  } |  j r' t j | | ƒ } n t j | | ƒ } | rX| j ƒ  } | | | d !} | d k rp | d S| d k rô | j d | ƒ r– | d S| j d | ƒ r¬ d S|  j rÙ |  j | | d ƒ |  j d ƒ n  | | k ré | S| d Sn  | d k rd S| d	 k rd S|  j r=|  j | | ƒ |  j d
 ƒ n  | | k rM| S| d Sn  t	 d ƒ ‚ d  S(   Ni   u   >u   /u   />i   iÿÿÿÿu   malformed empty start tagu    u6   abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZu   malformed start tagu   we should not get here!(
   R   R   t   locatestarttagendR8   t   locatestarttagend_tolerantRC   R6   R5   R'   R   (   R   RG   R   Re   RI   t   next(    (    s;   lib/python2.7/site-packages/future/backports/html/parser.pyRU   ~  s>    				c         C` s·  |  j  } | | | d !d k s, t d ƒ ‚ t j | | d ƒ } | sL d S| j ƒ  } t j | | ƒ } | sH|  j d  k	 r— |  j	 | | | !ƒ | S|  j
 r¾ |  j d | | | !f ƒ n  t j | | d ƒ } | s	| | | d !d k rù | d S|  j | ƒ Sn  | j ƒ  j ƒ  } | j d	 | j ƒ  ƒ } |  j | ƒ | d S| j d ƒ j ƒ  } |  j d  k	 r–| |  j k r–|  j	 | | | !ƒ | Sn  |  j | j ƒ  ƒ |  j ƒ  | S(
   Ni   u   </u   unexpected call to parse_endtagi   iÿÿÿÿu   bad end tag: %ri   u   </>u   >(   R   R   t	   endendtagR2   RC   t
   endtagfindR8   R   R   R4   R   R'   t   tagfind_tolerantRN   RA   R*   R?   t   handle_endtagR0   (   R   RG   R   R8   RO   t	   namematcht   tagnameR.   (    (    s;   lib/python2.7/site-packages/future/backports/html/parser.pyR:   ©  s<    	#	
c         C` s!   |  j  | | ƒ |  j | ƒ d  S(   N(   R`   Ro   (   R   Rd   Rc   (    (    s;   lib/python2.7/site-packages/future/backports/html/parser.pyR_   Ó  s    c         C` s   d  S(   N(    (   R   Rd   Rc   (    (    s;   lib/python2.7/site-packages/future/backports/html/parser.pyR`   Ø  s    c         C` s   d  S(   N(    (   R   Rd   (    (    s;   lib/python2.7/site-packages/future/backports/html/parser.pyRo   Ü  s    c         C` s   d  S(   N(    (   R   RK   (    (    s;   lib/python2.7/site-packages/future/backports/html/parser.pyRB   à  s    c         C` s   d  S(   N(    (   R   RK   (    (    s;   lib/python2.7/site-packages/future/backports/html/parser.pyRE   ä  s    c         C` s   d  S(   N(    (   R   R"   (    (    s;   lib/python2.7/site-packages/future/backports/html/parser.pyR4   è  s    c         C` s   d  S(   N(    (   R   R"   (    (    s;   lib/python2.7/site-packages/future/backports/html/parser.pyRP   ì  s    c         C` s   d  S(   N(    (   R   t   decl(    (    s;   lib/python2.7/site-packages/future/backports/html/parser.pyRM   ð  s    c         C` s   d  S(   N(    (   R   R"   (    (    s;   lib/python2.7/site-packages/future/backports/html/parser.pyRT   ô  s    c         C` s$   |  j  r  |  j d | f ƒ n  d  S(   Nu   unknown declaration: %r(   R   R'   (   R   R"   (    (    s;   lib/python2.7/site-packages/future/backports/html/parser.pyt   unknown_decl÷  s    	c         C` s,   d | k r | Sd „  } t  j d | | ƒ S(   Nu   &c         S` s   |  j  ƒ  d }  yl |  d d k r{ |  d }  |  d d k r\ t |  d j d ƒ d ƒ } n t |  j d ƒ ƒ } t | ƒ SWn t k
 r” d |  SXd d	 l m } |  | k r¹ | |  S|  j d ƒ rÐ d
 |  SxI t d t	 |  ƒ ƒ D]* } |  |  | k ræ | |  |  |  | Sqæ Wd
 |  Sd  S(   Ni    u   #i   u   xu   Xu   ;i   u   &#(   t   html5u   &i   (   u   xu   X(
   t   groupst   intt   rstript   chrt
   ValueErrort   future.backports.html.entitiesRt   R^   t   rangeR1   (   t   st   cRt   t   x(    (    s;   lib/python2.7/site-packages/future/backports/html/parser.pyt   replaceEntitiesÿ  s&    
	u&   &(#?[xX]?(?:[0-9a-fA-F]+;|\w{1,32};?))(   R+   t   sub(   R   R|   R   (    (    s;   lib/python2.7/site-packages/future/backports/html/parser.pyRY   ü  s
    		(   u   scriptu   styleN(!   R   R   R   Ra   t   FalseR   R   R#   R$   R'   R   R(   R)   R/   R0   R!   R>   RN   R<   R9   RU   R:   R_   R`   Ro   RB   RE   R4   RP   RM   RT   Rs   RY   (    (    (    s;   lib/python2.7/site-packages/future/backports/html/parser.pyR   d   s:   									h			9	+	*										(    R   t
   __future__R    R   R   R   t   future.builtinst   future.backportsR   R+   R   R,   R   RF   RD   R@   R7   RS   t   commentcloseRV   Rn   RW   RX   t   VERBOSERi   Rj   Rl   Rm   t	   ExceptionR   R    R   (    (    (    s;   lib/python2.7/site-packages/future/backports/html/parser.pyt   <module>   s4   	"
		