B
    n&=[‚=  ã               @   s~  d dl mZmZmZ d dlmZ d dlZd dlmZm	Z	 ddl
mZmZmZ ddl
mZmZmZ ddlmZmZ d d	lmZ d
 e¡d Ze de d ¡Ze de d ¡Zi ZedƒdkZx~ee ¡ ƒD ]n\Z Z!eräee!ƒdksÌesöee!ƒdkröqÌe!dkrÌee!ƒdkre "e!¡Z!ne#e!ƒZ!e!eks2e  $¡ rÌe ee!< qÌW dd„ Z%ede%ƒ ddd„Z&G dd„ de'ƒZ(G dd„ de)ƒZ*dS )é    )Úabsolute_importÚdivisionÚunicode_literals)Ú	text_typeN)Úregister_errorÚxmlcharrefreplace_errorsé   )ÚvoidElementsÚbooleanAttributesÚspaceCharacters)ÚrcdataElementsÚentitiesÚxmlEntities)ÚtreewalkersÚ_utils)ÚescapeÚ z"'=<>`ú[ú]u_    	
 /`Â áš€á Žá â€€â€â€‚â€ƒâ€„â€…â€†â€‡â€ˆâ€‰â€Šâ€¨â€©â€¯âŸã€€]u   ô¿¿é   ú&c       
   	   C   s"  t | ttfƒrg }g }d}xˆt| j| j| j… ƒD ]n\}}|rFd}q4|| j }t | j|t	| j|d gƒ… ¡rt 
| j||d … ¡}d}nt|ƒ}| |¡ q4W x^|D ]V}t |¡}	|	rè| d¡ | |	¡ |	 d¡sæ| d¡ q¬| dt|ƒdd …  ¡ q¬W d |¡| jfS t| ƒS d S )NFr   Tr   ú;z&#x%s;r   )Ú
isinstanceÚUnicodeEncodeErrorÚUnicodeTranslateErrorÚ	enumerateÚobjectÚstartÚendr   ZisSurrogatePairÚminÚsurrogatePairToCodepointÚordÚappendÚ_encode_entity_mapÚgetÚendswithÚhexÚjoinr   )
ÚexcZresZ
codepointsÚskipÚiÚcÚindexZ	codepointZcpÚe© r.   ú2lib/python3.7/site-packages/html5lib/serializer.pyÚhtmlentityreplace_errors*   s0     
"




r0   ÚhtmlentityreplaceÚetreec             K   s$   t  |¡}tf |Ž}| || ƒ|¡S )a˜  Serializes the input token stream using the specified treewalker

    :arg input: the token stream to serialize

    :arg tree: the treewalker to use

    :arg encoding: the encoding to use

    :arg serializer_opts: any options to pass to the
        :py:class:`html5lib.serializer.HTMLSerializer` that gets created

    :returns: the tree serialized as a string

    Example:

    >>> from html5lib.html5parser import parse
    >>> from html5lib.serializer import serialize
    >>> token_stream = parse('<html><body><p>Hi!</p></body></html>')
    >>> serialize(token_stream, omit_optional_tags=False)
    '<html><head></head><body><p>Hi!</p></body></html>'

    )r   ZgetTreeWalkerÚHTMLSerializerÚrender)ÚinputZtreeÚencodingZserializer_optsZwalkerÚsr.   r.   r/   Ú	serializeK   s    

r8   c               @   s~   e Zd ZdZdZdZdZdZdZdZ	dZ
dZdZdZdZdZdZdZdd„ Zdd	„ Zd
d„ Zddd„Zddd„Zddd„ZdS )r3   Úlegacyú"TF)Úquote_attr_valuesÚ
quote_charÚuse_best_quote_charÚomit_optional_tagsÚminimize_boolean_attributesÚuse_trailing_solidusÚspace_before_trailing_solidusÚescape_lt_in_attrsÚescape_rcdataÚresolve_entitiesÚalphabetical_attributesÚinject_meta_charsetÚstrip_whitespaceÚsanitizec          
   K   sz   t |ƒt | jƒ }t|ƒdkr2tdtt|ƒƒ ƒ‚d|kr@d| _x(| jD ]}t| || |t	| |ƒ¡ƒ qHW g | _
d| _dS )aB
  Initialize HTMLSerializer

        :arg inject_meta_charset: Whether or not to inject the meta charset.

            Defaults to ``True``.

        :arg quote_attr_values: Whether to quote attribute values that don't
            require quoting per legacy browser behavior (``"legacy"``), when
            required by the standard (``"spec"``), or always (``"always"``).

            Defaults to ``"legacy"``.

        :arg quote_char: Use given quote character for attribute quoting.

            Defaults to ``"`` which will use double quotes unless attribute
            value contains a double quote, in which case single quotes are
            used.

        :arg escape_lt_in_attrs: Whether or not to escape ``<`` in attribute
            values.

            Defaults to ``False``.

        :arg escape_rcdata: Whether to escape characters that need to be
            escaped within normal elements within rcdata elements such as
            style.

            Defaults to ``False``.

        :arg resolve_entities: Whether to resolve named character entities that
            appear in the source tree. The XML predefined entities &lt; &gt;
            &amp; &quot; &apos; are unaffected by this setting.

            Defaults to ``True``.

        :arg strip_whitespace: Whether to remove semantically meaningless
            whitespace. (This compresses all whitespace to a single space
            except within ``pre``.)

            Defaults to ``False``.

        :arg minimize_boolean_attributes: Shortens boolean attributes to give
            just the attribute value, for example::

              <input disabled="disabled">

            becomes::

              <input disabled>

            Defaults to ``True``.

        :arg use_trailing_solidus: Includes a close-tag slash at the end of the
            start tag of void elements (empty elements whose end tag is
            forbidden). E.g. ``<hr/>``.

            Defaults to ``False``.

        :arg space_before_trailing_solidus: Places a space immediately before
            the closing slash in a tag using a trailing solidus. E.g.
            ``<hr />``. Requires ``use_trailing_solidus=True``.

            Defaults to ``True``.

        :arg sanitize: Strip all unsafe or unknown constructs from output.
            See :py:class:`html5lib.filters.sanitizer.Filter`.

            Defaults to ``False``.

        :arg omit_optional_tags: Omit start/end tags that are optional.

            Defaults to ``True``.

        :arg alphabetical_attributes: Reorder attributes to be in alphabetical order.

            Defaults to ``False``.

        r   z2__init__() got an unexpected keyword argument '%s'r<   FN)Ú	frozensetÚoptionsÚlenÚ	TypeErrorÚnextÚiterr=   Úsetattrr$   ÚgetattrÚerrorsÚstrict)ÚselfÚkwargsZunexpected_argsÚattrr.   r.   r/   Ú__init__‡   s    OzHTMLSerializer.__init__c             C   s*   t |tƒst‚| jr"| | jd¡S |S d S )Nr1   )r   r   ÚAssertionErrorr6   Úencode)rS   Ústringr.   r.   r/   rX   à   s    zHTMLSerializer.encodec             C   s*   t |tƒst‚| jr"| | jd¡S |S d S )NrR   )r   r   rW   r6   rX   )rS   rY   r.   r.   r/   ÚencodeStrictç   s    zHTMLSerializer.encodeStrictNc             c   sî  || _ d}g | _|r0| jr0ddlm} |||ƒ}| jrJddlm} ||ƒ}| jrdddlm} ||ƒ}| j	r~ddl
m} ||ƒ}| jr˜ddlm} ||ƒ}xN|D ]D}|d }|dkr`d|d  }|d rÞ|d	|d  7 }n|d
 rî|d7 }|d
 rJ|d
  d¡dkr0|d
  d¡dkr*|  d¡ d}nd}|d||d
 |f 7 }|d7 }|  |¡V  q |dkrÆ|dksz|r°|rž|d  d¡dkrž|  d¡ |  |d ¡V  n|  t|d ƒ¡V  q |dkrî|d }	|  d|	 ¡V  |	tkr | js d}n|r|  d¡ x˜|d  ¡ D ]†\\}
}}|}|}|  d¡V  |  |¡V  | jr~|t |	tƒ ¡kr |t dtƒ ¡kr |  d¡V  | jdks¤t|ƒdkrªd}n@| jdkrÆt |¡d k	}n$| jd krât |¡d k	}ntd!ƒ‚| d"d#¡}| j r
| d$d%¡}|rš| j!}| j"rPd|kr8d|kr8d}nd|krPd|krPd}|dkrh| dd&¡}n| dd'¡}|  |¡V  |  |¡V  |  |¡V  n|  |¡V  q W |	t#krà| j$rà| j%rÔ|  d(¡V  n|  d)¡V  |  d¡V  q |d*kr2|d }	|	tkrd}n|r |  d¡ |  d+|	 ¡V  q |d,krt|d }| d-¡dkr^|  d.¡ |  d/|d  ¡V  q |d0krØ|d }	|	d1 }|t&kr¦|  d2|	 ¡ | j'rÂ|t(krÂt&| }nd3|	 }|  |¡V  q |  |d ¡ q W d S )4NFr   )ÚFilterÚtypeZDoctypez<!DOCTYPE %sÚnameZpublicIdz PUBLIC "%s"ZsystemIdz SYSTEMr:   r   ú'zASystem identifer contains both single and double quote charactersz %s%s%sú>)Z
CharactersÚSpaceCharactersr`   Údataz</zUnexpected </ in CDATA)ZStartTagZEmptyTagz<%sTz+Unexpected child element of a CDATA elementú r   ú=ÚalwaysÚspecr9   z?quote_attr_values must be one of: 'always', 'spec', or 'legacy'r   z&amp;ú<z&lt;z&#39;z&quot;z /ú/ZEndTagz</%s>ÚCommentz--zComment contains --z	<!--%s-->ZEntityr   zEntity %s not recognizedz&%s;))r6   rQ   rF   Zfilters.inject_meta_charsetr[   rE   Zfilters.alphabeticalattributesrG   Zfilters.whitespacerH   Zfilters.sanitizerr>   Zfilters.optionaltagsÚfindÚserializeErrorrZ   rX   r   r   rC   Úitemsr?   r
   r$   Útupler;   rK   Ú_quoteAttributeSpecÚsearchÚ_quoteAttributeLegacyÚ
ValueErrorÚreplacerB   r<   r=   r	   r@   rA   r   rD   r   )rS   Ú
treewalkerr6   Zin_cdatar[   Útokenr\   Zdoctyper<   r]   Ú_Z	attr_nameZ
attr_valueÚkÚvZ
quote_attrra   Úkeyr.   r.   r/   r8   î   sØ    

















zHTMLSerializer.serializec             C   s2   |rd  t|  ||¡ƒ¡S d  t|  |¡ƒ¡S dS )an  Serializes the stream from the treewalker into a string

        :arg treewalker: the treewalker to serialize

        :arg encoding: the string encoding to use

        :returns: the serialized tree

        Example:

        >>> from html5lib import parse, getTreeWalker
        >>> from html5lib.serializer import HTMLSerializer
        >>> token_stream = parse('<html><body>Hi!</body></html>')
        >>> walker = getTreeWalker('etree')
        >>> serializer = HTMLSerializer(omit_optional_tags=False)
        >>> serializer.render(walker(token_stream))
        '<html><head></head><body>Hi!</body></html>'

        ó    r   N)r'   Úlistr8   )rS   rr   r6   r.   r.   r/   r4   w  s    zHTMLSerializer.renderúXXX ERROR MESSAGE NEEDEDc             C   s   | j  |¡ | jrt‚d S )N)rQ   r"   rR   ÚSerializeError)rS   ra   r.   r.   r/   rj     s    zHTMLSerializer.serializeError)N)N)rz   )Ú__name__Ú
__module__Ú__qualname__r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   rJ   rV   rX   rZ   r8   r4   rj   r.   r.   r.   r/   r3   h   s,   Y
 

r3   c               @   s   e Zd ZdZdS )r{   zError in serialized treeN)r|   r}   r~   Ú__doc__r.   r.   r.   r/   r{   —  s   r{   )r2   N)+Z
__future__r   r   r   Zsixr   ÚreÚcodecsr   r   Z	constantsr	   r
   r   r   r   r   r   r   r   Zxml.sax.saxutilsr   r'   Z_quoteAttributeSpecCharsÚcompilerm   ro   r#   rK   Z_is_ucs4ry   rk   ru   rv   r    r!   Úislowerr0   r8   r   r3   Ú	Exceptionr{   r.   r.   r.   r/   Ú<module>   s<   
	

  1