
Y&=[c        !   @` s  d  d l  m Z m Z m Z d  d l m Z m Z d  d l m Z m	 Z	 d  d l
 Z
 d  d l Z d  d l Z d d l m Z m Z m Z m Z d d l m Z d d l m Z d  d	 l m Z y d  d
 l m Z Wn e k
 r e Z n Xe g  e D] Z e j d  ^ q  Z e g  e D] Z e j d  ^ q Z e g  e D] Z e j d  ^ qF Z e e d d g  BZ d Z  e j! re  d d k re  j" d  d k st#  e j$ e  d  e% d  d  Z& n e j$ e   Z& e' d d d d d d d d d d d d d d d  d! d" d# d$ d% d& d' d( d) d* d+ d, d- d. d/ d0 d1 g   Z( e j$ d2  Z) i  Z* d3 e+ f d4     YZ, d5   Z- d6 e+ f d7     YZ. d8 e. f d9     YZ/ d: e0 f d;     YZ1 d< e+ f d=     YZ2 d> e+ f d?     YZ3 d@   Z4 d S(A   i    (   t   absolute_importt   divisiont   unicode_literals(   t	   text_typet   binary_type(   t   http_clientt   urllibNi   (   t   EOFt   spaceCharacterst   asciiLetterst   asciiUppercase(   t   _ReparseException(   t   _utils(   t   StringIO(   t   BytesIOu   asciit   >t   <u   [---﷐-﷯￾￿🿾🿿𯿾𯿿𿿾𿿿񏿾񏿿񟿾񟿿񯿾񯿿񿿾񿿿򏿾򏿿򟿾򟿿򯿾򯿿򿿾򿿿󏿾󏿿󟿾󟿿󯿾󯿿󿿾󿿿􏿾􏿿]iu   ]u   "\uD800-\uDFFF"i i i i i i i i i i i i i i i i i	 i	 i
 i
 i i i i i i i i i i i i u   [	- -/:-@\[-`{-~]t   BufferedStreamc           B` sM   e  Z d  Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z	 RS(   u   Buffering for streams that do not have buffering of their own

    The buffer is implemented as a list of chunks on the assumption that
    joining many strings will be slow since it is O(n**2)
    c         C` s%   | |  _  g  |  _ d d g |  _ d  S(   Nii    (   t   streamt   buffert   position(   t   selfR   (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyt   __init__@   s    		c         C` sJ   d } x, |  j  |  j d  D] } | t |  7} q W| |  j d 7} | S(   Ni    i   (   R   R   t   len(   R   t   post   chunk(    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyt   tellE   s
    c         C` sx   | |  j    k s t  | } d } x> t |  j |  | k  rd | t |  j |  8} | d 7} q' W| | g |  _ d  S(   Ni    i   (   t   _bufferedBytest   AssertionErrorR   R   R   (   R   R   t   offsett   i(    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyt   seekL   s    c         C` sp   |  j  s |  j |  S|  j d t |  j   k r_ |  j d t |  j  d  k r_ |  j |  S|  j |  Sd  S(   Ni    i   i(   R   t   _readStreamR   R   t   _readFromBuffer(   R   t   bytes(    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyt   readU   s    	 c         C` s&   t  g  |  j D] } t |  ^ q  S(   N(   t   sumR   R   (   R   t   item(    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyR   ^   s    c         C` sL   |  j  j |  } |  j j |  |  j d c d 7<t |  |  j d <| S(   Ni    i   (   R   R#   R   t   appendR   R   (   R   R"   t   data(    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyR    a   s
    c         C` s"  | } g  } |  j  d } |  j  d } x | t |  j  k  r | d k r | d k s\ t  |  j | } | t |  | k r | } | | | g |  _  n/ t |  | } | t |  g |  _  | d 7} | j | | | | ! | | 8} d } q) W| r| j |  j |   n  d j |  S(   Ni    i   t    (   R   R   R   R   R&   R    t   join(   R   R"   t   remainingBytest   rvt   bufferIndext   bufferOffsett   bufferedDatat   bytesToRead(    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyR!   h   s&    $


(
   t   __name__t
   __module__t   __doc__R   R   R   R#   R   R    R!   (    (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyR   9   s   								c         K` s   t  |  t j  s< t  |  t j j  rE t  |  j t j  rE t } n9 t |  d  ro t  |  j	 d  t
  } n t  |  t
  } | r g  | D] } | j d  r | ^ q } | r t d |   n  t |  |  St |  |  Sd  S(   Nu   readi    u	   _encodingu3   Cannot set an encoding with a unicode input, set %r(   t
   isinstanceR   t   HTTPResponseR   t   responset   addbaset   fpt   Falset   hasattrR#   R   t   endswitht	   TypeErrort   HTMLUnicodeInputStreamt   HTMLBinaryInputStream(   t   sourcet   kwargst	   isUnicodet   xt	   encodings(    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyt   HTMLInputStream   s    	(R<   c           B` s}   e  Z d  Z d Z d   Z d   Z d   Z d   Z d   Z d   Z	 d d  Z d	   Z d
   Z e d  Z d   Z RS(   u   Provides a unicode stream of characters to the HTMLTokenizer.

    This class takes care of character encoding and removing or replacing
    incorrect byte-sequences and also provides column and line tracking.

    i (  c         C` s   t  j s d |  _ n- t d  d k r6 |  j |  _ n |  j |  _ d g |  _ t d  d f |  _	 |  j
 |  |  _ |  j   d S(   u  Initialises the HTMLInputStream.

        HTMLInputStream(source, [encoding]) -> Normalized stream from source
        for use by html5lib.

        source can be either a file-object, local filename or a string.

        The optional encoding parameter must be a string that indicates
        the encoding.  If specified, that encoding will be used,
        regardless of any BOM or later declaration (such as in a meta
        element)

        u   􏿿i   i    u   utf-8u   certainN(   R   t   supports_lone_surrogatest   Nonet   reportCharacterErrorsR   t   characterErrorsUCS4t   characterErrorsUCS2t   newLinest   lookupEncodingt   charEncodingt
   openStreamt
   dataStreamt   reset(   R   R>   (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyR      s    	c         C` sC   d |  _  d |  _ d |  _ g  |  _ d |  _ d |  _ d  |  _ d  S(   Nu    i    (   R   t	   chunkSizet   chunkOffsett   errorst   prevNumLinest   prevNumColsRE   t   _bufferedCharacter(   R   (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyRN      s    						c         C` s(   t  | d  r | } n t |  } | S(   uv   Produces a file object from source.

        source can be either a file object, local filename or a string.

        u   read(   R9   R   (   R   R>   R   (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyRL      s    	c         C` st   |  j  } | j d d |  } |  j | } | j d d |  } | d k r\ |  j | } n | | d } | | f S(   Nu   
i    ii   (   R   t   countRR   t   rfindRS   (   R   R   R   t   nLinest   positionLinet   lastLinePost   positionColumn(    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyt	   _position   s    	c         C` s&   |  j  |  j  \ } } | d | f S(   u:   Returns (line, col) of the current position in the stream.i   (   R[   RP   (   R   t   linet   col(    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyR      s    c         C` sL   |  j  |  j k r% |  j   s% t Sn  |  j  } |  j | } | d |  _  | S(   uo    Read one character from the stream or queue if available. Return
            EOF when EOF is reached.
        i   (   RP   RO   t	   readChunkR   R   (   R   RP   t   char(    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyR_      s    	c         C` sO  | d  k r |  j } n  |  j |  j  \ |  _ |  _ d |  _ d |  _ d |  _ |  j j	 |  } |  j
 r |  j
 | } d  |  _
 n
 | s t St |  d k r t | d  } | d k s d | k o d k n r | d |  _
 | d  } q n  |  j r|  j |  n  | j d d	  } | j d
 d	  } | |  _ t |  |  _ t S(   Nu    i    i   ii   i   i  u   
u   
u   (   RE   t   _defaultChunkSizeR[   RO   RR   RS   R   RP   RM   R#   RT   R8   R   t   ordRF   t   replacet   True(   R   RO   R'   t   lastv(    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyR^      s0    				(		c         C` s:   x3 t  t t j |    D] } |  j j d  q Wd  S(   Nu   invalid-codepoint(   t   rangeR   t   invalid_unicode_ret   findallRQ   R&   (   R   R'   t   _(    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyRG   %  s    "c         C` s  t  } x t j |  D] } | r( q n  t | j    } | j   } t j | | | d ! r t j | | | d ! } | t	 k r |  j
 j d  n  t } q | d k r | d k r | t |  d k r |  j
 j d  q t  } |  j
 j d  q Wd  S(   Ni   u   invalid-codepointi   i  i   (   R8   Rf   t   finditerRa   t   groupt   startR   t   isSurrogatePairt   surrogatePairToCodepointt   non_bmp_invalid_codepointsRQ   R&   Rc   R   (   R   R'   t   skipt   matcht	   codepointR   t   char_val(    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyRH   )  s     	c   
      C` s}  y t  | | f } Wn t k
 r x& | D] } t |  d k  s+ t  q+ Wd j g  | D] } d t |  ^ qZ  } | s d | } n  t j d |  } t  | | f <n Xg  } x t ri| j |  j	 |  j
  } | d k r |  j
 |  j k r?Pq?nB | j   } | |  j k r?| j |  j	 |  j
 | ! | |  _
 Pn  | j |  j	 |  j
  |  j   s Pq q Wd j |  }	 |	 S(   u    Returns a string of characters from the stream up to but not
        including any character in 'characters' or EOF. 'characters' must be
        a container that supports the 'in' method and iteration over its
        characters.
        i   u    u   \x%02xu   ^%su   [%s]+N(   t   charsUntilRegExt   KeyErrorRa   R   R)   t   ret   compileRc   Rp   R   RP   RE   RO   t   endR&   R^   (
   R   t
   characterst   oppositet   charst   ct   regexR+   t   mRw   t   r(    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyt
   charsUntil@  s2    ,%		c         C` so   | d  k	 rk |  j d k r= | |  j |  _ |  j d 7_ qk |  j d 8_ |  j |  j | k sk t  n  d  S(   Ni    i   (   RE   RP   R   RO   R   (   R   R_   (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyt   ungeto  s    N(   R0   R1   R2   R`   R   RN   RL   R[   R   R_   RE   R^   RG   RH   R8   R   R   (    (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyR<      s   	 					&		/R=   c           B` sb   e  Z d  Z d	 d	 d	 d	 d e d  Z d   Z d   Z e d  Z d   Z	 d   Z
 d   Z RS(
   u   Provides a unicode stream of characters to the HTMLTokenizer.

    This class takes care of character encoding and removing or replacing
    incorrect byte-sequences and also provides column and line tracking.

    u   windows-1252c         C` s   |  j  |  |  _ t j |  |  j  d |  _ d |  _ | |  _ | |  _ | |  _ | |  _	 | |  _
 |  j |  |  _ |  j d d k	 s t  |  j   d S(   u  Initialises the HTMLInputStream.

        HTMLInputStream(source, [encoding]) -> Normalized stream from source
        for use by html5lib.

        source can be either a file-object, local filename or a string.

        The optional encoding parameter must be a string that indicates
        the encoding.  If specified, that encoding will be used,
        regardless of any BOM or later declaration (such as in a meta
        element)

        i   id   i    N(   RL   t	   rawStreamR<   R   t   numBytesMetat   numBytesChardett   override_encodingt   transport_encodingt   same_origin_parent_encodingt   likely_encodingt   default_encodingt   determineEncodingRK   RE   R   RN   (   R   R>   R   R   R   R   R   t
   useChardet(    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyR     s    							c         C` s3   |  j  d j j |  j d  |  _ t j |   d  S(   Ni    u   replace(   RK   t
   codec_infot   streamreaderR   RM   R<   RN   (   R   (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyRN     s    "c         C` sU   t  | d  r | } n t |  } y | j | j    Wn t |  } n X| S(   uv   Produces a file object from source.

        source can be either a file object, local filename or a string.

        u   read(   R9   R   R   R   R   (   R   R>   R   (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyRL     s    	c         C` s!  |  j    d f } | d d  k	 r& | St |  j  d f } | d d  k	 rO | St |  j  d f } | d d  k	 rx | S|  j   d f } | d d  k	 r | St |  j  d f } | d d  k	 r | d j j d  r | St |  j	  d f } | d d  k	 r| S| ry d d l
 m } Wn t k
 r4qXg  } |   } x[ | j s|  j j |  j  } t | t  szt  | sPn  | j |  | j |  qGW| j   t | j d  } |  j j d  | d  k	 r| d f Sn  t |  j  d f } | d d  k	 r| St d  d f S(   Nu   certaini    u	   tentativeu   utf-16(   t   UniversalDetectoru   encodingu   windows-1252(   t	   detectBOMRE   RJ   R   R   t   detectEncodingMetaR   t   namet
   startswithR   t   chardet.universaldetectorR   t   ImportErrort   doneR   R#   R   R3   R"   R   R&   t   feedt   closet   resultR   R   (   R   t   chardetRK   R   t   bufferst   detectorR   t   encoding(    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyR     sR    '	
c         C` s   |  j  d d k s t  t |  } | d  k r5 d  S| j d k re t d  } | d  k	 s t  nr | |  j  d k r |  j  d d f |  _  nF |  j j d  | d f |  _  |  j   t d |  j  d | f   d  S(	   Ni   u   certainu   utf-16beu   utf-16leu   utf-8i    u   Encoding changed from %s to %s(   u   utf-16beu   utf-16le(	   RK   R   RJ   RE   R   R   R   RN   R   (   R   t   newEncoding(    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyt   changeEncoding  s    
c         C` s   i d t  j 6d t  j 6d t  j 6d t  j 6d t  j 6} |  j j d  } t | t	  s_ t
  | j | d   } d } | s | j |  } d } | s | j | d   } d } q n  | r |  j j |  t |  S|  j j d	  d
 Sd
 S(   u   Attempts to detect at BOM at the start of the stream. If
        an encoding can be determined from the BOM return the name of the
        encoding otherwise return Noneu   utf-8u   utf-16leu   utf-16beu   utf-32leu   utf-32bei   i   i   i    N(   t   codecst   BOM_UTF8t   BOM_UTF16_LEt   BOM_UTF16_BEt   BOM_UTF32_LEt   BOM_UTF32_BER   R#   R3   R"   R   t   getR   RJ   RE   (   R   t   bomDictt   stringR   R   (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyR     s&    

c         C` s   |  j  j |  j  } t | t  s* t  t |  } |  j  j d  | j   } | d k	 r| | j
 d k r| t d  } n  | S(   u9   Report the encoding declared by the meta element
        i    u   utf-16beu   utf-16leu   utf-8N(   u   utf-16beu   utf-16le(   R   R#   R   R3   R"   R   t   EncodingParserR   t   getEncodingRE   R   RJ   (   R   R   t   parserR   (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyR   9  s    N(   R0   R1   R2   RE   Rc   R   RN   RL   R   R   R   R   (    (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyR=     s   (		>		"t   EncodingBytesc           B` s   e  Z d  Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z	 d   Z
 e e
 e	  Z d	   Z e e  Z e d
  Z d   Z d   Z d   Z RS(   u   String-like object with an associated position and various extra methods
    If the position is ever greater than the string length then an exception is
    raisedc         C` s+   t  | t  s t  t j |  | j    S(   N(   R3   R"   R   t   __new__t   lower(   R   t   value(    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyR   L  s    c         C` s   d |  _  d  S(   Ni(   R[   (   R   R   (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyR   P  s    c         C` s   |  S(   N(    (   R   (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyt   __iter__T  s    c         C` sS   |  j  d } |  _  | t |   k r/ t  n | d k  rD t  n  |  | | d !S(   Ni   i    (   R[   R   t   StopIterationR;   (   R   t   p(    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyt   __next__W  s    		c         C` s
   |  j    S(   N(   R   (   R   (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyt   next_  s    c         C` sY   |  j  } | t |   k r$ t  n | d k  r9 t  n  | d |  _  } |  | | d !S(   Ni    i   (   R[   R   R   R;   (   R   R   (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyt   previousc  s    			c         C` s+   |  j  t |   k r t  n  | |  _  d  S(   N(   R[   R   R   (   R   R   (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyt   setPositionl  s    	c         C` s<   |  j  t |   k r t  n  |  j  d k r4 |  j  Sd  Sd  S(   Ni    (   R[   R   R   RE   (   R   (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyt   getPositionq  s
    	c         C` s   |  |  j  |  j  d !S(   Ni   (   R   (   R   (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyt   getCurrentByte{  s    c         C` sc   |  j  } xJ | t |   k  rU |  | | d !} | | k rH | |  _ | S| d 7} q W| |  _ d S(   u   Skip past a list of charactersi   N(   R   R   R[   RE   (   R   Rz   R   R{   (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyRo     s    			c         C` sc   |  j  } xJ | t |   k  rU |  | | d !} | | k rH | |  _ | S| d 7} q W| |  _ d  S(   Ni   (   R   R   R[   RE   (   R   Rz   R   R{   (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyt	   skipUntil  s    			c         C` sQ   |  j  } |  | | t |  !} | j |  } | rM |  j  t |  7_  n  | S(   u   Look for a sequence of bytes at the start of a string. If the bytes
        are found return True and advance the position to the byte after the
        match. Otherwise return False and leave the position alone(   R   R   R   (   R   R"   R   R'   R+   (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyt
   matchBytes  s    	c         C` sh   |  |  j  j |  } | d k r^ |  j d k r= d |  _ n  |  j | t |  d 7_ t St  d S(   u   Look for the next sequence of bytes matching a given sequence. If
        a match is found advance the position to the last byte of the matchii    i   N(   R   t   findR[   R   Rc   R   (   R   R"   t   newPosition(    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyt   jumpTo  s    (   R0   R1   R2   R   R   R   R   R   R   R   R   t   propertyR   R   t   currentBytet   spaceCharactersBytesRo   R   R   R   (    (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyR   H  s    												R   c           B` s_   e  Z d  Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z	 d   Z
 d	   Z RS(
   u?   Mini parser for detecting character encoding from meta elementsc         C` s   t  |  |  _ d |  _ d S(   u3   string - the data to work on for encoding detectionN(   R   R'   RE   R   (   R   R'   (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyR     s    c         C` s   d |  j  f d |  j f d |  j f d |  j f d |  j f d |  j f f } xv |  j D]k } t } xR | D]J \ } } |  j j |  rk y |   } PWq t k
 r t	 } Pq Xqk qk W| sX PqX qX W|  j
 S(   Ns   <!--s   <metas   </s   <!s   <?R   (   t   handleCommentt
   handleMetat   handlePossibleEndTagt   handleOthert   handlePossibleStartTagR'   Rc   R   R   R8   R   (   R   t   methodDispatchRh   t   keepParsingt   keyt   method(    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyR     s&    	c         C` s   |  j  j d  S(   u   Skip over commentss   -->(   R'   R   (   R   (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyR     s    c         C` sK  |  j  j t k r t St } d  } x"t rF|  j   } | d  k rG t S| d d k r | d d k } | rC| d  k	 rC| |  _ t Sq% | d d k r | d } t |  } | d  k	 rC| |  _ t Sq% | d d k r% t	 t
 | d   } | j   } | d  k	 rCt |  } | d  k	 r@| r4| |  _ t S| } q@qCq% q% Wd  S(   Ni    s
   http-equivi   s   content-typet   charsett   content(   R'   R   R   Rc   R8   RE   t   getAttributeR   RJ   t   ContentAttrParserR   t   parse(   R   t	   hasPragmat   pendingEncodingt   attrt   tentativeEncodingt   codect   contentParser(    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyR     s:    		
		c         C` s   |  j  t  S(   N(   t   handlePossibleTagR8   (   R   (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyR     s    c         C` s   t  |  j  |  j t  S(   N(   R   R'   R   Rc   (   R   (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyR     s    c         C` s   |  j  } | j t k r9 | r5 | j   |  j   n  t S| j t  } | d k ra | j   n+ |  j   } x | d  k	 r |  j   } qp Wt S(   NR   (
   R'   R   t   asciiLettersBytesR   R   Rc   R   t   spacesAngleBracketsR   RE   (   R   t   endTagR'   R{   R   (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyR     s    	
c         C` s   |  j  j d  S(   NR   (   R'   R   (   R   (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyR     s    c         C` s  |  j  } | j t t d g  B } | d k sI t |  d k sI t  | d	 k rY d Sg  } g  } x t r| d k r | r Pnz | t k r | j   } Pn^ | d
 k r d j |  d f S| t	 k r | j
 | j    n | d k r d S| j
 |  t |  } qh W| d k r7| j   d j |  d f St |  | j   } | d k r| } x t rt |  } | | k rt |  d j |  d j |  f S| t	 k r| j
 | j    qb| j
 |  qbWn^ | d k rd j |  d f S| t	 k r| j
 | j    n | d k r-d S| j
 |  x} t rt |  } | t k rwd j |  d j |  f S| t	 k r| j
 | j    q=| d k rd S| j
 |  q=Wd S(   u_   Return a name,value pair for the next attribute in the stream,
        if one is found, or Nonet   /i   R   t   =R(   t   't   "N(   R   N(   R   R   (   R   R   (   R'   Ro   R   t	   frozensetRE   R   R   Rc   R)   t   asciiUppercaseBytesR&   R   R   R   R   (   R   R'   R{   t   attrNamet	   attrValuet	   quoteChar(    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyR     sh    	$	

	
	(   R0   R1   R2   R   R   R   R   R   R   R   R   R   (    (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyR     s   				$				R   c           B` s   e  Z d    Z d   Z RS(   c         C` s"   t  | t  s t  | |  _ d  S(   N(   R3   R"   R   R'   (   R   R'   (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyR   f  s    c         C` s:  y!|  j  j d  |  j  j d 7_ |  j  j   |  j  j d k sH d  S|  j  j d 7_ |  j  j   |  j  j d k r |  j  j } |  j  j d 7_ |  j  j } |  j  j |  r |  j  | |  j  j !Sd  SnP |  j  j } y( |  j  j t  |  j  | |  j  j !SWn t k
 r|  j  | SXWn t k
 r5d  SXd  S(   NR   i   R   R   R   (   R   R   (	   R'   R   R   Ro   R   RE   R   R   R   (   R   t	   quoteMarkt   oldPosition(    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyR   j  s.    (   R0   R1   R   R   (    (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyR   e  s   	c         C` sw   t  |  t  r: y |  j d  }  Wq: t k
 r6 d SXn  |  d k	 ro y t j |   SWqs t k
 rk d SXn d Sd S(   u{   Return the python codec name corresponding to an encoding or None if the
    string doesn't correspond to a valid encoding.u   asciiN(   R3   R   t   decodet   UnicodeDecodeErrorRE   t   webencodingst   lookupt   AttributeError(   R   (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyRJ     s    (5   t
   __future__R    R   R   t   sixR   R   t	   six.movesR   R   R   Ru   R   t	   constantsR   R   R	   R
   R   R(   R   t   ioR   R   R   R   R%   t   encodeR   R   R   R   t   invalid_unicode_no_surrogateRD   RU   R   Rv   t   evalRf   t   setRn   t   ascii_punctuation_reRs   t   objectR   RC   R<   R=   R"   R   R   R   RJ   (    (    (    s4   lib/python2.7/site-packages/html5lib/_inputstream.pyt   <module>   sR   "
(((	+J	h'