ó
0½3\c           @  s’  d  d l  m Z d  d l m Z d  d l Z d  d l Z d  d l m Z d  d l m	 Z	 d  d l
 m Z d  d l m Z m Z d d	 d
 d d d d d d d d d g Z i d d g d 6d g d	 6d g d
 6Z g  Z d d d g Z d j g  e e d d ƒ e d d ƒ e d d ƒ ƒ D] Z e e ƒ ^ qƒ Z e j d  e d! e j ƒ Z d" Z d# e f d$ „  ƒ  YZ d% „  Z d& e j f d' „  ƒ  YZ  d S((   iÿÿÿÿ(   t   unicode_literals(   t   chainN(   t   urlparse(   t   unescape(   t   html5lib_shim(   t   alphabetize_attributest   force_unicodeu   au   abbru   acronymu   bu
   blockquoteu   codeu   emu   iu   liu   olu   strongu   ulu   hrefu   titleu   httpu   httpsu   mailtou    i    i	   i   i   i   i    u   [u   ]u   ?t   Cleanerc           B  s5   e  Z d  Z e e e e e e d d „ Z
 d „  Z RS(   u¨  Cleaner for cleaning HTML fragments of malicious content

    This cleaner is a security-focused function whose sole purpose is to remove
    malicious content from a string such that it can be displayed as content in
    a web page.

    To use::

        from bleach.sanitizer import Cleaner

        cleaner = Cleaner()

        for text in all_the_yucky_things:
            sanitized = cleaner.clean(text)

    .. Note::

       This cleaner is not designed to use to transform content to be used in
       non-web-page contexts.

    .. Warning::

       This cleaner is not thread-safe--the html parser has internal state.
       Create a separate cleaner per thread!


    c         C  s»   | |  _  | |  _ | |  _ | |  _ | |  _ | |  _ | p? g  |  _ t j d |  j  d |  j d t	 d t	 ƒ |  _
 t j d ƒ |  _ t j d d d t	 d	 t d
 t	 d t	 d t	 ƒ |  _ d S(   u  Initializes a Cleaner

        :arg list tags: allowed list of tags; defaults to
            ``bleach.sanitizer.ALLOWED_TAGS``

        :arg dict attributes: allowed attributes; can be a callable, list or dict;
            defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``

        :arg list styles: allowed list of css styles; defaults to
            ``bleach.sanitizer.ALLOWED_STYLES``

        :arg list protocols: allowed list of protocols for links; defaults
            to ``bleach.sanitizer.ALLOWED_PROTOCOLS``

        :arg bool strip: whether or not to strip disallowed elements

        :arg bool strip_comments: whether or not to strip HTML comments

        :arg list filters: list of html5lib Filter classes to pass streamed content through

            .. seealso:: http://html5lib.readthedocs.io/en/latest/movingparts.html#filters

            .. Warning::

               Using filters changes the output of ``bleach.Cleaner.clean``.
               Make sure the way the filters change the output are secure.

        t   tagst   stript   consume_entitiest   namespaceHTMLElementsu   etreet   quote_attr_valuesu   alwayst   omit_optional_tagst   escape_lt_in_attrst   resolve_entitiest   sanitizet   alphabetical_attributesN(   R   t
   attributest   stylest	   protocolsR	   t   strip_commentst   filtersR   t   BleachHTMLParsert   Falset   parsert   getTreeWalkert   walkert   BleachHTMLSerializert   Truet
   serializer(   t   selfR   R   R   R   R	   R   R   (    (    s/   lib/python2.7/site-packages/bleach/sanitizer.pyt   __init__W   s(    										c         C  sè   t  | t j ƒ s9 d j d | j j ƒ } t | ƒ ‚ n  | sC d St | ƒ } |  j j	 | ƒ } t
 d |  j | ƒ d |  j d |  j d |  j d |  j d	 |  j d
 |  j d g  ƒ } x  |  j D] } | d | ƒ } q¿ W|  j j | ƒ S(   uÐ   Cleans text and returns sanitized result as unicode

        :arg str text: text to be cleaned

        :returns: sanitized text as unicode

        :raises TypeError: if ``text`` is not a text type

        u9   argument cannot be of '{name}' type, must be of text typet   nameu    t   sourceR   t   strip_disallowed_elementst   strip_html_commentst   allowed_elementst   allowed_css_propertiest   allowed_protocolst   allowed_svg_properties(   t
   isinstancet   sixt   string_typest   formatt	   __class__t   __name__t	   TypeErrorR   R   t   parseFragmentt   BleachSanitizerFilterR   R   R	   R   R   R   R   R   R   t   render(   R   t   textt   messaget   domt   filteredt   filter_class(    (    s/   lib/python2.7/site-packages/bleach/sanitizer.pyt   clean•   s(    
								N(   R.   t
   __module__t   __doc__t   ALLOWED_TAGSt   ALLOWED_ATTRIBUTESt   ALLOWED_STYLESt   ALLOWED_PROTOCOLSR   R   t   NoneR    R8   (    (    (    s/   lib/python2.7/site-packages/bleach/sanitizer.pyR   :   s
   	<c           sd   t  ˆ  ƒ r ˆ  St ˆ  t ƒ r2 ‡  f d †  } | St ˆ  t ƒ rT ‡  f d †  } | St d ƒ ‚ d S(   u0  Generates attribute filter function for the given attributes value

    The attributes value can take one of several shapes. This returns a filter
    function appropriate to the attributes value. One nice thing about this is
    that there's less if/then shenanigans in the ``allow_token`` method.

    c           s…   |  ˆ  k rE ˆ  |  } t  | ƒ r2 | |  | | ƒ S| | k rE t Sn  d ˆ  k r ˆ  d } t  | ƒ rw | |  | | ƒ S| | k St S(   Nu   *(   t   callableR   R   (   t   tagt   attrt   valuet   attr_val(   R   (    s/   lib/python2.7/site-packages/bleach/sanitizer.pyt   _attr_filterÌ   s    


c           s
   | ˆ  k S(   N(    (   RA   RB   RC   (   R   (    s/   lib/python2.7/site-packages/bleach/sanitizer.pyRE   á   s    u3   attributes needs to be a callable, a list or a dictN(   R@   R)   t   dictt   listt
   ValueError(   R   RE   (    (   R   s/   lib/python2.7/site-packages/bleach/sanitizer.pyt   attribute_filter_factoryÀ   s    R1   c           B  sq   e  Z d  Z e e e d „ Z d „  Z d „  Z d „  Z	 d „  Z
 d „  Z d „  Z d „  Z d	 „  Z d
 „  Z RS(   um   html5lib Filter that sanitizes text

    This filter can be used anywhere html5lib filters can be used.

    c         K  s:   t  | ƒ |  _ | |  _ | |  _ t t |  ƒ j | |  S(   u   Creates a BleachSanitizerFilter instance

        :arg Treewalker source: stream

        :arg list tags: allowed list of tags; defaults to
            ``bleach.sanitizer.ALLOWED_TAGS``

        :arg dict attributes: allowed attributes; can be a callable, list or dict;
            defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``

        :arg list styles: allowed list of css styles; defaults to
            ``bleach.sanitizer.ALLOWED_STYLES``

        :arg list protocols: allowed list of protocols for links; defaults
            to ``bleach.sanitizer.ALLOWED_PROTOCOLS``

        :arg bool strip_disallowed_elements: whether or not to strip disallowed
            elements

        :arg bool strip_html_comments: whether or not to strip HTML comments

        (   RI   t   attr_filterR#   R$   t   superR1   R    (   R   R"   R   R#   R$   t   kwargs(    (    s/   lib/python2.7/site-packages/bleach/sanitizer.pyR    ï   s    		c         c  s]   xV | D]N } |  j  | ƒ } | s( q n  t | t ƒ rP x | D] } | Vq> Wq | Vq Wd  S(   N(   t   sanitize_tokenR)   RG   (   R   t   token_iteratort   tokent   rett   subtoken(    (    s/   lib/python2.7/site-packages/bleach/sanitizer.pyt   sanitize_stream  s    c         c  sç   g  } x¡ | D]™ } | r~ | d d k r< | j  | ƒ q q¡ i d j g  | D] } | d ^ qL ƒ d 6d d 6} g  } | Vn# | d d k r¡ | j  | ƒ q n  | Vq Wi d j g  | D] } | d ^ qº ƒ d 6d d 6} | Vd S(   u/   Merge consecutive Characters tokens in a streamu   typeu
   Charactersu    u   dataN(   t   appendt   join(   R   RN   t   characters_bufferRO   t
   char_tokent	   new_token(    (    s/   lib/python2.7/site-packages/bleach/sanitizer.pyt   merge_characters  s&    '
	'
c         C  s"   |  j  |  j t j j |  ƒ ƒ ƒ S(   N(   RX   RR   R   t   Filtert   __iter__(   R   (    (    s/   lib/python2.7/site-packages/bleach/sanitizer.pyRZ   :  s    c         C  s·   | d } | d
 k rv | d |  j  k r6 |  j | ƒ S|  j rC d	 Sd | k rf t | d ƒ | d <n  |  j | ƒ Sn= | d k r– |  j s | Sd	 Sn | d k r¯ |  j | ƒ S| Sd	 S(   uÕ  Sanitize a token either by HTML-encoding or dropping.

        Unlike sanitizer.Filter, allowed_attributes can be a dict of {'tag':
        ['attribute', 'pairs'], 'tag': callable}.

        Here callable is a function with two arguments of attribute name and
        value. It should return true of false.

        Also gives the option to strip tags instead of encoding.

        :arg dict token: token to sanitize

        :returns: token or list of tokens

        u   typeu   StartTagu   EndTagu   EmptyTagu   nameu   datau   Commentu
   CharactersN(   u   StartTagu   EndTagu   EmptyTag(   R%   t   allow_tokenR#   R?   R   t   disallowed_tokenR$   t   sanitize_characters(   R   RO   t
   token_type(    (    s/   lib/python2.7/site-packages/bleach/sanitizer.pyRM   =  s     
		c         C  sC  | j  d d ƒ } | s | St j t | ƒ } | | d <d | k rH | Sg  } xî t j | ƒ D]Ý } | sp q^ n  | j d ƒ r t j | ƒ } | d
 k	 r | d k rÄ | j	 i d d 6d d 6ƒ n | j	 i d d 6| d 6ƒ | t
 | ƒ d	 } | r^ | j	 i d d 6| d 6ƒ q^ q^ q n  | j	 i d d 6| d 6ƒ q^ W| S(   u½  Handles Characters tokens

        Our overridden tokenizer doesn't do anything with entities. However,
        that means that the serializer will convert all ``&`` in Characters
        tokens to ``&amp;``.

        Since we don't want that, we extract entities here and convert them to
        Entity tokens so the serializer will let them be.

        :arg token: the Characters token to work on

        :returns: a list of tokens

        u   datau    u   &u   ampu
   Charactersu   typeu   Entityu   namei   N(   t   gett   INVISIBLE_CHARACTERS_REt   subt   INVISIBLE_REPLACEMENT_CHARR   t   next_possible_entityt
   startswitht   match_entityR?   RS   t   len(   R   RO   t   datat
   new_tokenst   partt   entityt	   remainder(    (    s/   lib/python2.7/site-packages/bleach/sanitizer.pyR]   h  s.    
	c         C  sÖ   t  j | ƒ } t j d d | ƒ } | j d d ƒ } | j ƒ  } y t | ƒ } Wn t k
 rf d SX| j	 r† | j	 | k rÒ | SnL | j
 d ƒ r™ | Sd | k rÂ | j d ƒ d | k rÂ | Sd | k rÒ | Sd S(	   uÅ   Checks a uri value to see if it's allowed

        :arg value: the uri value to sanitize
        :arg allowed_protocols: list of allowed protocols

        :returns: allowed value or None

        u   [`\000-\040\177-\240\s]+u    u   ï¿½u   #u   :i    u   httpN(   R   t   convert_entitiest   reRa   t   replacet   lowerR   RH   R?   t   schemeRd   t   split(   R   RC   R'   t	   new_valuet   parsed(    (    s/   lib/python2.7/site-packages/bleach/sanitizer.pyt   sanitize_uri_value¥  s*    		%c   	      C  s}  d | k ryi  } xQ| d j  ƒ  D]?\ } } | \ } } |  j | d | | ƒ sZ q# n  | |  j k r™ |  j | |  j ƒ } | d	 k r q# n  | } n  | |  j k rä t j d d t	 | ƒ ƒ } | j
 ƒ  } | sÛ q# qä | } n  d	 | d f |  j k r:| d
 t j d d f g k r:t j d | ƒ r7q# q7q:n  | d k rX|  j | ƒ } n  | | | <q# Wt | ƒ | d <n  | S(   u-   Handles the case where we're allowing the tagu   datau   nameu   url\s*\(\s*[^#\s][^)]+?\)u    u   hrefu   xlinku
   ^\s*[^#\s]u   styleN(   Nu   href(   Nu   style(   t   itemsRJ   t   attr_val_is_uriRt   R'   R?   t   svg_attr_val_allows_refRm   Ra   R   R	   t   svg_allow_local_hrefR   t
   namespacest   searcht   sanitize_cssR   (	   R   RO   t   attrst   namespaced_namet   valt	   namespaceR!   Rr   t   new_val(    (    s/   lib/python2.7/site-packages/bleach/sanitizer.pyR[   à  s:    			c         C  sV  | d } | d k r+ d | d | d <nî | d r| d k sG t  ‚ g  } x“ | d j ƒ  D] \ \ } } } | r | r | | } } n  | d  k s¨ | t j k r± | } n d t j | | f } | j d	 | | f ƒ q^ Wd
 | d d j | ƒ f | d <n d | d | d <| j d ƒ rA| d d  d | d <n  d | d <| d =| S(   Nu   typeu   EndTagu   </%s>u   nameu   datau   StartTagu   EmptyTagu   %s:%su    %s="%s"u   <%s%s>u    u   <%s>u   selfClosingiÿÿÿÿu   />u
   Characters(   u   StartTagu   EmptyTag(   t   AssertionErrorRu   R?   R   t   prefixesRS   RT   R_   (   R   RO   R^   R|   t   nsR!   t   vR}   (    (    s/   lib/python2.7/site-packages/bleach/sanitizer.pyR\     s,    

#		$
c         C  s&  t  j | ƒ } t j d ƒ j d | ƒ } | j d ƒ } t j d ƒ } x! | D] } | j | ƒ sO d SqO Wt j d | ƒ s‚ d Sg  } xŽ t j d | ƒ D]z \ } } | s³ q› n  | j ƒ  |  j	 k rä | j
 | d | d ƒ q› | j ƒ  |  j k r› | j
 | d | d ƒ q› q› Wd j | ƒ S(	   u   Sanitizes css in style tagsu   url\s*\(\s*[^\s)]+?\s*\)\s*u    u   ;uI   ^([-/:,#%.'"\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'\s*|"[\s\w]+"|\([\d,%\.\s]+\))*$u    u    ^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$u   ([-\w]+)\s*:\s*([^:;]*)u   : (   R   Rl   Rm   t   compileRa   Rq   t   matcht   findallRo   R&   RS   R(   RT   (   R   t   stylet   partst   gauntletRi   R8   t   propRC   (    (    s/   lib/python2.7/site-packages/bleach/sanitizer.pyR{   F  s&    	 (   R.   R9   R:   R<   R   R   R    RR   RX   RZ   RM   R]   Rt   R[   R\   R{   (    (    (    s/   lib/python2.7/site-packages/bleach/sanitizer.pyR1   é   s   				+	=	;	=	)(!   t
   __future__R    t	   itertoolsR   Rm   R*   t   six.moves.urllib.parseR   t   xml.sax.saxutilsR   t   bleachR   t   bleach.utilsR   R   R;   R<   R=   R>   RT   t   ranget   ct   chrt   INVISIBLE_CHARACTERSR…   t   UNICODER`   Rb   t   objectR   RI   t   SanitizerFilterR1   (    (    (    s/   lib/python2.7/site-packages/bleach/sanitizer.pyt   <module>   sB   	
O†	)