ó
;×4\c        p   @  s4  d  Z  d d l m Z d d l Z d d l Z d d l Z d d l m Z m Z d d l m	 Z	 d d l
 m Z m Z d d l
 m Z d d l m Z d d	 l m Z d d l m Z d d
 l m Z d d l m Z d d l m Z d d l m Z e	 j Z e e ƒ Z e e	 j  d e	 j  d e	 j  d g ƒ Z! e	 j  d Z" e	 j  d Z# d d d d d d d d d d d d d d  d! d" d# d$ d% d& d' d( d) d* d+ d, d- d. d/ d0 d1 d2 d3 d4 d5 d6 d7 d8 d9 d: d; d< d= d> d? d@ dA dB dC dD dE dF dG dH dI dJ dK dL dM dN dO dP dQ dR dS dT dU dV dW dX dY dZ d[ d\ d] d^ d_ d` da db dc dd de df dg dh di dj dk dl dm dn do dp dq dr ds dt du dv dw dx dy dz d{ d| d} d~ d d€ d d‚ gp Z$ dƒ e% f d„ „  ƒ  YZ& d… e f d† „  ƒ  YZ' d‡ e f dˆ „  ƒ  YZ( d‰ „  Z) dŠ „  Z* d‹ „  Z+ e j, dŒ ƒ Z- d „  Z. dŽ e f d „  ƒ  YZ/ d S(   u‡   
Shim module between Bleach and html5lib. This makes it easier to upgrade the
html5lib library without having to change a lot of code.
iÿÿÿÿ(   t   unicode_literalsN(   t
   HTMLParsert   getTreeWalker(   t	   constants(   t
   namespacest   prefixes(   t   _ReparseException(   t   Filter(   t   allowed_protocols(   t   HTMLInputStream(   t   HTMLSerializer(   t   HTMLTokenizer(   t   Trieu   StartTagu   EndTagu   EmptyTagu
   Charactersu
   ParseErroru   au   abbru   addressu   areau   articleu   asideu   audiou   bu   baseu   bdiu   bdou
   blockquoteu   bodyu   bru   buttonu   canvasu   captionu   citeu   codeu   colu   colgroupu   datau   datalistu   ddu   delu   detailsu   dfnu   dialogu   divu   dlu   dtu   emu   embedu   fieldsetu
   figcaptionu   figureu   footeru   formu   h1u   h2u   h3u   h4u   h5u   h6u   headu   headeru   hgroupu   hru   htmlu   iu   iframeu   imgu   inputu   insu   kbdu   keygenu   labelu   legendu   liu   linku   mapu   marku   menuu   metau   meteru   navu   noscriptu   objectu   olu   optgroupu   optionu   outputu   pu   paramu   pictureu   preu   progressu   qu   rpu   rtu   rubyu   su   sampu   scriptu   sectionu   selectu   slotu   smallu   sourceu   spanu   strongu   styleu   subu   summaryu   supu   tableu   tbodyu   tdu   templateu   textareau   tfootu   thu   theadu   timeu   titleu   tru   tracku   uu   ulu   varu   videou   wbrt   InputStreamWithMemoryc           B  st   e  Z d  Z d „  Z e d „  ƒ Z e d „  ƒ Z e d „  ƒ Z d „  Z e	 d „ Z
 d „  Z d „  Z d	 „  Z RS(
   uÄ   Wraps an HTMLInputStream to remember characters since last <

    This wraps existing HTMLInputStream classes to keep track of the stream
    since the last < which marked an open tag state.

    c         C  s4   | |  _  |  j  j |  _ |  j  j |  _ g  |  _ d  S(   N(   t   _inner_streamt   resett   positiont   _buffer(   t   selft   inner_stream(    (    s3   lib/python2.7/site-packages/bleach/html5lib_shim.pyt   __init__®   s    	c         C  s
   |  j  j S(   N(   R   t   errors(   R   (    (    s3   lib/python2.7/site-packages/bleach/html5lib_shim.pyR   ´   s    c         C  s
   |  j  j S(   N(   R   t   charEncoding(   R   (    (    s3   lib/python2.7/site-packages/bleach/html5lib_shim.pyR   ¸   s    c         C  s
   |  j  j S(   N(   R   t   changeEncoding(   R   (    (    s3   lib/python2.7/site-packages/bleach/html5lib_shim.pyR   ¼   s    c         C  s,   |  j  j ƒ  } | r( |  j j | ƒ n  | S(   N(   R   t   charR   t   append(   R   t   c(    (    s3   lib/python2.7/site-packages/bleach/html5lib_shim.pyR   À   s    c         C  s2   |  j  j | d | ƒ} |  j j t | ƒ ƒ | S(   Nt   opposite(   R   t
   charsUntilR   t   extendt   list(   R   t
   charactersR   t   chars(    (    s3   lib/python2.7/site-packages/bleach/html5lib_shim.pyR   Ç   s    c         C  s,   |  j  r |  j  j d ƒ n  |  j j | ƒ S(   Niÿÿÿÿ(   R   t   popR   t   unget(   R   R   (    (    s3   lib/python2.7/site-packages/bleach/html5lib_shim.pyR"   Ì   s    	c         C  s   t  j d ƒ j |  j ƒ S(   uþ   Returns the stream history since last '<'

        Since the buffer starts at the last '<' as as seen by tagOpenState(),
        we know that everything from that point to when this method is called
        is the "tag" that is being tokenized.

        u    (   t   sixt	   text_typet   joinR   (   R   (    (    s3   lib/python2.7/site-packages/bleach/html5lib_shim.pyt   get_tagÑ   s    c         C  s   d g |  _  d S(   u¶   Resets stream history to just '<'

        This gets called by tagOpenState() which marks a '<' that denotes an
        open tag. Any time we see that, we reset the buffer.

        u   <N(   R   (   R   (    (    s3   lib/python2.7/site-packages/bleach/html5lib_shim.pyt	   start_tagÛ   s    (   t   __name__t
   __module__t   __doc__R   t   propertyR   R   R   R   t   FalseR   R"   R&   R'   (    (    (    s3   lib/python2.7/site-packages/bleach/html5lib_shim.pyR   §   s   				
t   BleachHTMLTokenizerc           B  sD   e  Z d  Z e d „ Z d „  Z d e d „ Z d „  Z d „  Z	 RS(   u1   Tokenizer that doesn't consume character entitiesc         K  s5   t  t |  ƒ j |   | |  _ t |  j ƒ |  _ d  S(   N(   t   superR-   R   t   consume_entitiesR   t   stream(   R   R/   t   kwargs(    (    s3   lib/python2.7/site-packages/bleach/html5lib_shim.pyR   ç   s    	c         c  s„  d  } xit t |  ƒ j ƒ  D]R} | d  k	 rM| d d k r¼ | d t k r¼ | j d ƒ r¼ g  | d D]< } d | d k rh d | d k rh d | d k rh | ^ qh | d <d  } | Vq | d d k r| d j ƒ  j ƒ  |  j j	 k r|  j
 j ƒ  | d <t | d <d  } | Vq | d t k r7| V| } q | V| Vd  } q n  | d t k ri| } q n  | Vq W| r€| Vn  d  S(	   Nu   datau#   invalid-character-in-attribute-nameu   typeu   "i    u   'u   <u!   expected-closing-tag-but-got-char(   t   NoneR.   R-   t   __iter__t   TAG_TOKEN_TYPESt   gett   lowert   stript   parsert   tagsR0   R&   t   CHARACTERS_TYPEt   PARSEERROR_TYPE(   R   t   last_error_tokent   tokent   item(    (    s3   lib/python2.7/site-packages/bleach/html5lib_shim.pyR3   ï   s>     "
		c         C  sh   |  j  r" t t |  ƒ j | | ƒ S| rF |  j d d d c d 7<n |  j j i t d 6d d 6ƒ d  S(   Nu   dataiÿÿÿÿi   u   &u   type(   R/   R.   R-   t   consumeEntityt   currentTokent
   tokenQueueR   R:   (   R   t   allowedChart   fromAttribute(    (    s3   lib/python2.7/site-packages/bleach/html5lib_shim.pyR?   -  s
    	c         C  s    |  j  j ƒ  t t |  ƒ j ƒ  S(   N(   R0   R'   R.   R-   t   tagOpenState(   R   (    (    s3   lib/python2.7/site-packages/bleach/html5lib_shim.pyRD   ?  s    c         C  s¿   |  j  } |  j j d  k	 r¨ | d t k r¨ | d j ƒ  |  j j k r¨ |  j j r\ d } n |  j j ƒ  } i t	 d 6| d 6} | |  _  |  j
 j | ƒ |  j |  _ d  St t |  ƒ j ƒ  d  S(   Nu   typeu   nameu    u   data(   R@   R8   R9   R2   R4   R6   R7   R0   R&   R:   RA   R   t	   dataStatet   stateR.   R-   t   emitCurrentToken(   R   R=   t   new_datat	   new_token(    (    s3   lib/python2.7/site-packages/bleach/html5lib_shim.pyRG   G  s    		
	N(
   R(   R)   R*   R,   R   R3   R2   R?   RD   RG   (    (    (    s3   lib/python2.7/site-packages/bleach/html5lib_shim.pyR-   å   s   	>	t   BleachHTMLParserc           B  s)   e  Z d  Z d „  Z e d e d „ Z RS(   u$   Parser that uses BleachHTMLTokenizerc         K  s`   | d k	 r+ g  | D] } | j ƒ  ^ q n d |  _ | |  _ | |  _ t t |  ƒ j |   d S(   uÐ  
        :arg tags: list of allowed tags--everything else is either stripped or
            escaped; if None, then this doesn't look at tags at all
        :arg strip: whether to strip disallowed tags (True) or escape them (False);
            if tags=None, then this doesn't have any effect
        :arg consume_entities: whether to consume entities (default behavior) or
            leave them as is when tokenizing (BleachHTMLTokenizer-added behavior)

        N(   R2   R6   R9   R7   R/   R.   RJ   R   (   R   R9   R7   R/   R1   t   tag(    (    s3   lib/python2.7/site-packages/bleach/html5lib_shim.pyR   l  s    
4		u   divc         K  sƒ   | |  _  | |  _ | |  _ t d | d |  j d |  |  |  _ |  j ƒ  y |  j ƒ  Wn% t k
 r~ |  j ƒ  |  j ƒ  n Xd  S(   NR0   R/   R8   (	   t   innerHTMLModet	   containert	   scriptingR-   R/   t	   tokenizerR   t   mainLoopt   ReparseException(   R   R0   t	   innerHTMLRM   RN   R1   (    (    s3   lib/python2.7/site-packages/bleach/html5lib_shim.pyt   _parse{  s    				

(   R(   R)   R*   R   R,   RS   (    (    (    s3   lib/python2.7/site-packages/bleach/html5lib_shim.pyRJ   j  s   	c         C  sd   |  d d k rT |  d d	 k r: t  j t |  d d ƒ ƒ St  j t |  d d ƒ ƒ St j |  d
 ƒ S(   u9  Convert an entity (minus the & and ; part) into what it represents

    This handles numeric, hex, and text entities.

    :arg value: the string (minus the ``&`` and ``;`` part) to convert

    :returns: unicode character or None if it's an ambiguous ampersand that
        doesn't match a character entity

    i    u   #i   u   xu   Xi   i   i
   (   u   xu   XN(   R#   t   unichrt   intt   ENTITIESR5   R2   (   t   value(    (    s3   lib/python2.7/site-packages/bleach/html5lib_shim.pyt   convert_entity  s
    c         C  sÕ   d |  k r |  Sg  } x¯ t  |  ƒ D]¡ } | s5 q# n  | j d ƒ r· t | ƒ } | d k	 r· t | ƒ } | d k	 r´ | j | ƒ | t | ƒ d } | r# | j | ƒ q# q# q´ q· n  | j | ƒ q# Wd j | ƒ S(   u‘   Converts all found entities in the text

    :arg text: the text to convert entities in

    :returns: unicode text with converted entities

    u   &i   u    N(   t   next_possible_entityt
   startswitht   match_entityR2   RX   R   t   lenR%   (   t   textt   new_textt   partt   entityt	   convertedt	   remainder(    (    s3   lib/python2.7/site-packages/bleach/html5lib_shim.pyt   convert_entities¢  s$    c         C  s€  |  d d k r t  d ƒ ‚ n  |  d }  t |  ƒ }  d } d t j } |  r|  d d k rd } |  j d ƒ |  r£ |  d d k r£ d
 } | |  j d ƒ 7} n d } xC |  rî |  d | k rî |  j d ƒ } | | k rá Pn  | | 7} q¬ W| r|  r|  d d k r| Sd SxF |  r[|  d | k r[|  j d ƒ } t j | ƒ sNPn  | | 7} qW| r||  r||  d d k r|| Sd S(   uH  Returns first entity in stream or None if no entity exists

    Note: For Bleach purposes, entities must start with a "&" and end with
    a ";". This ignoresambiguous character entities that have no ";" at the
    end.

    :arg stream: the character stream

    :returns: ``None`` or the entity string without "&" or ";"

    i    u   &u   Stream should begin with "&"i   u    u   <&=;u   #u   xu   Xu   0123456789abcdefABCDEFu
   0123456789u   ;(   u   xu   XN(   t
   ValueErrorR   t   stringt
   whitespaceR!   R2   t   ENTITIES_TRIEt   has_keys_with_prefix(   R0   t   possible_entityt   end_characterst   allowedR   (    (    s3   lib/python2.7/site-packages/bleach/html5lib_shim.pyR[   Å  s:    
u   (&)c         c  sZ   xS t  t j |  ƒ ƒ D]< \ } } | d k r6 | Vq | d d k r d | Vq q Wd S(   u·   Takes a text and generates a list of possible entities

    :arg text: the text to look at

    :returns: generator where each part (except the first) starts with an
        "&"

    i    i   u   &N(   t	   enumeratet   AMP_SPLIT_REt   split(   R]   t   iR_   (    (    s3   lib/python2.7/site-packages/bleach/html5lib_shim.pyRY     s
    	"t   BleachHTMLSerializerc           B  s#   e  Z d  Z d „  Z d d „ Z RS(   u3   HTMLSerializer that undoes & -> &amp; in attributesc         c  s»   | j  d d ƒ } x¢ t | ƒ D]” } | s1 q n  | j d ƒ r¢ t | ƒ } | d k	 r¢ t | ƒ d k	 r¢ d | d V| t | ƒ d } | r | Vq q q¢ n  | j  d d ƒ Vq Wd S(   u,   Escapes just bare & in HTML attribute valuesu   &amp;u   &u   ;i   N(   t   replaceRY   RZ   R[   R2   RX   R\   (   R   t   stokenR_   R`   (    (    s3   lib/python2.7/site-packages/bleach/html5lib_shim.pyt   escape_base_amp  s    	c         c  sË   t  } t  } x¸ t t |  ƒ j | | ƒ D]› } | r¦ | d k rI t  } nU | r‰ | d k rž x |  j | ƒ D] } | Vqk Wt  } q( qž n | d k rž t } n  | Vq( | j d ƒ r¾ t } n  | Vq( Wd S(   uÜ   Wrap HTMLSerializer.serialize and conver & to &amp; in attribute values

        Note that this converts & to &amp; in attribute values where the & isn't
        already part of an unambiguous character entity.

        u   >u   "u   =u   <N(   R,   R.   Rp   t	   serializeRs   t   TrueRZ   (   R   t
   treewalkert   encodingt   in_tagt   after_equalsRr   R_   (    (    s3   lib/python2.7/site-packages/bleach/html5lib_shim.pyRt   3  s$    "					N(   R(   R)   R*   Rs   R2   Rt   (    (    (    s3   lib/python2.7/site-packages/bleach/html5lib_shim.pyRp     s   	(0   R*   t
   __future__R    t   reRe   R#   t   bleach._vendor.html5libR   R   R   t!   bleach._vendor.html5lib.constantsR   R   R   RQ   t$   bleach._vendor.html5lib.filters.baseR   t)   bleach._vendor.html5lib.filters.sanitizerR   t   SanitizerFiltert$   bleach._vendor.html5lib._inputstreamR	   t"   bleach._vendor.html5lib.serializerR
   t"   bleach._vendor.html5lib._tokenizerR   t   bleach._vendor.html5lib._trieR   t   entitiesRV   Rg   t   sett
   tokenTypesR4   R:   R;   t	   HTML_TAGSt   objectR   R-   RJ   RX   Rc   R[   t   compileRm   RY   Rp   (    (    (    s3   lib/python2.7/site-packages/bleach/html5lib_shim.pyt   <module>   s   	

	>…%		#	:	