B
    ³è0\ÿ‹  ã            *   @   s¬  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZddlm	Z	 yd dl
m
Z
 W n ek
rv   ej
Z
Y nX e	r†dd„ Zndd„ Zdd	„ Zd
dddddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3œ)Zd4d5„ ZG d6d7„ d7eƒZd8d9„ ZG d:d;„ d;e d<d=d>g¡ƒZG d?d@„ d@eƒZG dAdB„ dBƒZG dCdD„ dDƒZG dEdF„ dFeƒZG dGdH„ dHe
ƒZG dIdJ„ dJƒZG dKdL„ dLƒZdMdN„ ZG dOdP„ dPƒZdS )Qé    Né   )Úpy3)ÚUserDictc             C   s
   |   d¡S )Nzus-ascii)Úencode)Ús© r   ú,lib/python3.7/site-packages/PIL/PdfParser.pyÚ
make_bytes   s    r	   c             C   s   | S )Nr   )r   r   r   r   r	      s    c             C   s   t j|  d¡ S )NÚ	utf_16_be)ÚcodecsÚBOM_UTF16_BEr   )r   r   r   r   Úencode_text   s    r   úu   Ë˜u   Ë‡u   Ë†u   Ë™u   Ëu   Ë›u   Ëšu   Ëœu   â€¢u   â€ u   â€¡u   â€¦u   â€”u   â€“u   Æ’u   â„u   â€¹u   â€ºu   âˆ’u   â€°u   â€žu   â€œu   â€u   â€˜u   â€™u   â€šu   â„¢u   ï¬u   ï¬‚u   Åu   Å’u   Å u   Å¸u   Å½u   Ä±u   Å‚u   Å“u   Å¡u   Å¾u   â‚¬))é   é   é   é   é   é   é   é   é   é€   é   é‚   éƒ   é„   é…   é†   é‡   éˆ   é‰   éŠ   é‹   éŒ   é   éŽ   é   é   é‘   é’   é“   é”   é•   é–   é—   é˜   é™   éš   é›   éœ   é   éž   é    c             C   s`   | d t tjƒ… tjkr0| t tjƒd …  d¡S trHd dd„ | D ƒ¡S d dd„ | D ƒ¡S d S )Nr
   Ú c             s   s   | ]}t  |t|ƒ¡V  qd S )N)ÚPDFDocEncodingÚgetÚchr)Ú.0Úbyter   r   r   ú	<genexpr>P   s    zdecode_text.<locals>.<genexpr>c             s   s   | ]}t  t|ƒ|¡V  qd S )N)r9   r:   Úord)r<   r=   r   r   r   r>   R   s    )Úlenr   r   Údecoder   Újoin)Úbr   r   r   Údecode_textL   s
    rD   c               @   s   e Zd ZdZdS )ÚPdfFormatErrorz\An error that probably indicates a syntactic or semantic error in the
    PDF file structureN)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   r   r   rE   U   s   rE   c             C   s   | st |ƒ‚d S )N)rE   )Z	conditionZerror_messager   r   r   Úcheck_format_condition[   s    rJ   c               @   s4   e Zd Zdd„ Zdd„ Zdd„ Zdd„ Zd	d
„ ZdS )ÚIndirectReferencec             C   s   d|  S )Nz%s %s Rr   )Úselfr   r   r   Ú__str__b   s    zIndirectReference.__str__c             C   s   |   ¡  d¡S )Nzus-ascii)rM   r   )rL   r   r   r   Ú	__bytes__e   s    zIndirectReference.__bytes__c             C   s$   |j | j ko"|j| jko"|j| jkS )N)Ú	__class__Ú	object_idÚ
generation)rL   Úotherr   r   r   Ú__eq__h   s    zIndirectReference.__eq__c             C   s
   | |k S )Nr   )rL   rR   r   r   r   Ú__ne__m   s    zIndirectReference.__ne__c             C   s   t | j| jfƒS )N)ÚhashrP   rQ   )rL   r   r   r   Ú__hash__p   s    zIndirectReference.__hash__N)rF   rG   rH   rM   rN   rS   rT   rV   r   r   r   r   rK   `   s
   rK   ZIndirectReferenceTuplerP   rQ   c               @   s   e Zd Zdd„ ZdS )ÚIndirectObjectDefc             C   s   d|  S )Nz	%s %s objr   )rL   r   r   r   rM   u   s    zIndirectObjectDef.__str__N)rF   rG   rH   rM   r   r   r   r   rW   t   s   rW   c               @   sL   e Zd Zdd„ Zdd„ Zdd„ Zdd„ Zd	d
„ Zdd„ Zdd„ Z	dd„ Z
dS )Ú	XrefTablec             C   s    i | _ i | _ddi| _d| _d S )Nr   i   F)Úexisting_entriesÚnew_entriesÚdeleted_entriesÚreading_finished)rL   r   r   r   Ú__init__z   s    
zXrefTable.__init__c             C   s2   | j r|| j|< n
|| j|< || jkr.| j|= d S )N)r\   rZ   rY   r[   )rL   ÚkeyÚvaluer   r   r   Ú__setitem__€   s
    

zXrefTable.__setitem__c             C   s*   y
| j | S  tk
r$   | j| S X d S )N)rZ   ÚKeyErrorrY   )rL   r^   r   r   r   Ú__getitem__ˆ   s    
zXrefTable.__getitem__c             C   s†   || j kr0| j | d d }| j |= || j|< nR|| jkrX| j| d d }|| j|< n*|| jkrn| j| }ntdt|ƒ d ƒ‚d S )Nr   z
object ID z+ cannot be deleted because it doesn't exist)rZ   r[   rY   Ú
IndexErrorÚstr)rL   r^   rQ   r   r   r   Ú__delitem__Ž   s    


zXrefTable.__delitem__c             C   s   || j kp|| jkS )N)rY   rZ   )rL   r^   r   r   r   Ú__contains__œ   s    zXrefTable.__contains__c             C   s.   t t| j ¡ ƒt| j ¡ ƒB t| j ¡ ƒB ƒS )N)r@   ÚsetrY   ÚkeysrZ   r[   )rL   r   r   r   Ú__len__Ÿ   s    zXrefTable.__len__c             C   s*   t | j ¡ ƒt | j ¡ ƒ t | j ¡ ƒB S )N)rg   rY   rh   r[   rZ   )rL   r   r   r   rh   ¤   s    zXrefTable.keysc          	   C   s`  t t| j ¡ ƒt| j ¡ ƒB ƒ}t t| j ¡ ƒƒ}| ¡ }| d¡ x|rZd }xPt|ƒD ]<\}}|d ksx|d |kr~|}q\|d |… }||d … }P q\W |}d }| td|d t	|ƒf ƒ¡ x”|D ]Œ}	|	| jkrð| td| j|	  ƒ¡ qÈ| 
d¡}
t|	|
kd|	|
f ƒ y|d }W n tk
r6   d}Y nX | td|| j|	 f ƒ¡ qÈW qHW |S )Ns   xref
r   z%d %d
r   z%010d %05d n 
z>expected the next deleted object ID to be %s, instead found %sz%010d %05d f 
)Úsortedrg   rZ   rh   r[   ÚtellÚwriteÚ	enumerater	   r@   ÚpoprJ   rc   )rL   Úfrh   Zdeleted_keysZ	startxrefÚprevÚindexr^   Zcontiguous_keysrP   Zthis_deleted_object_idZnext_in_linked_listr   r   r   rl   ª   sB    





zXrefTable.writeN)rF   rG   rH   r]   r`   rb   re   rf   ri   rh   rl   r   r   r   r   rX   y   s   rX   c               @   sl   e Zd Zdd„ Zdd„ Zdd„ Zdd„ Zd	d
„ Zedd„ ƒZ	e
eddƒƒe
dd„ dD ƒƒ Zdd„ ZeZdS )ÚPdfNamec             C   s6   t |tƒr|j| _nt |tƒr&|| _n| d¡| _d S )Nzus-ascii)Ú
isinstancerr   ÚnameÚbytesr   )rL   rt   r   r   r   r]   Ô   s
    


zPdfName.__init__c             C   s   | j  d¡S )Nzus-ascii)rt   rA   )rL   r   r   r   Úname_as_strÜ   s    zPdfName.name_as_strc             C   s    t |tƒr|j| jkp|| jkS )N)rs   rr   rt   )rL   rR   r   r   r   rS   ß   s    zPdfName.__eq__c             C   s
   t | jƒS )N)rU   rt   )rL   r   r   r   rV   ã   s    zPdfName.__hash__c             C   s   dt | jƒ S )NzPdfName(%s))Úreprrt   )rL   r   r   r   Ú__repr__æ   s    zPdfName.__repr__c             C   s   | t  |¡ƒS )N)Ú	PdfParserÚinterpret_name)ÚclsÚdatar   r   r   Úfrom_pdf_streamé   s    zPdfName.from_pdf_streamé!   é   c             c   s   | ]}t |ƒV  qd S )N)r?   )r<   Úcr   r   r   r>   í   s    zPdfName.<genexpr>z#%/()<>[]{}c             C   sz   t dƒ}xh| jD ]^}trB|| jkr.| |¡ qn| td| ƒ¡ qt|ƒ| jkr\| |¡ q| dt|ƒ ¡ qW t|ƒS )Nó   /z#%02Xs   #%02X)	Ú	bytearrayrt   r   Úallowed_charsÚappendÚextendr	   r?   ru   )rL   ÚresultrC   r   r   r   rN   ï   s    
zPdfName.__bytes__N)rF   rG   rH   r]   rv   rS   rV   rx   Úclassmethodr}   rg   Úrangerƒ   rN   rM   r   r   r   r   rr   Ó   s    rr   c               @   s   e Zd Zdd„ ZeZdS )ÚPdfArrayc             C   s   dd  dd„ | D ƒ¡ d S )Ns   [ ó    c             s   s   | ]}t |ƒV  qd S )N)Úpdf_repr)r<   Úxr   r   r   r>     s    z%PdfArray.__bytes__.<locals>.<genexpr>s    ])rB   )rL   r   r   r   rN     s    zPdfArray.__bytes__N)rF   rG   rH   rN   rM   r   r   r   r   r‰     s   r‰   c               @   s,   e Zd Zdd„ Zdd„ Zdd„ Zes(eZdS )ÚPdfDictc             C   sN   |dkr.t tdƒr"t | ||¡ qJ|| j|< nt|tƒrB| d¡}|| |< d S )Nr|   Ú__setattr__zus-ascii)Úhasattrr   rŽ   Ú__dict__rs   rd   r   )rL   r^   r_   r   r   r   rŽ   	  s    


zPdfDict.__setattr__c             C   s>  y| | }W nD t k
rP   y| | d¡ }W n t k
rJ   t|ƒ‚Y nX Y nX t|tƒrdt|ƒ}| d¡r:| d¡r†|dd … }d}t|ƒdkrÒ|d }t	|dd… ƒd	 }t|ƒd
krÒ|t	|dd
… ƒ7 }dd t|ƒd … }t
 |d t|ƒd … |¡}|dkr:|d	9 }|dkr&|d9 }t
 t |¡| ¡}|S )Nzus-asciiZDatezD:é   ÚZé   é   é   é<   é   é   z%Y%m%d%H%M%S)ú+ú-r™   éÿÿÿÿ)ra   r   ÚAttributeErrorrs   ru   rD   ÚendswithÚ
startswithr@   ÚintÚtimeZstrptimeZgmtimeÚcalendarZtimegm)rL   r^   r_   ZrelationshipÚoffsetÚformatr   r   r   Ú__getattr__  s4    



zPdfDict.__getattr__c             C   sr   t dƒ}xV|  ¡ D ]J\}}|d kr$qt|ƒ}| d¡ | tt|ƒƒ¡ | d¡ | |¡ qW | d¡ t|ƒS )Ns   <<ó   
rŠ   s   
>>)r‚   Úitemsr‹   r…   ru   rr   )rL   Úoutr^   r_   r   r   r   rN   2  s    


zPdfDict.__bytes__N)rF   rG   rH   rŽ   r¤   rN   r   rM   r   r   r   r   r     s
   r   c               @   s*   e Zd Zdd„ Zerdd„ Zndd„ ZdS )Ú	PdfBinaryc             C   s
   || _ d S )N)r|   )rL   r|   r   r   r   r]   D  s    zPdfBinary.__init__c             C   s   t dd dd„ | jD ƒ¡ ƒS )Nz<%s>r8   c             s   s   | ]}d | V  qdS )z%02XNr   )r<   rC   r   r   r   r>   I  s    z&PdfBinary.__bytes__.<locals>.<genexpr>)r	   rB   r|   )rL   r   r   r   rN   H  s    zPdfBinary.__bytes__c             C   s   dd  dd„ | jD ƒ¡ S )Nz<%s>r8   c             s   s   | ]}d t |ƒ V  qdS )z%02XN)r?   )r<   rC   r   r   r   r>   L  s    z$PdfBinary.__str__.<locals>.<genexpr>)rB   r|   )rL   r   r   r   rM   K  s    zPdfBinary.__str__N)rF   rG   rH   r]   r   rN   rM   r   r   r   r   r¨   C  s   
r¨   c               @   s   e Zd Zdd„ Zdd„ ZdS )Ú	PdfStreamc             C   s   || _ || _d S )N)Ú
dictionaryÚbuf)rL   rª   r«   r   r   r   r]   P  s    zPdfStream.__init__c             C   s‚   y| j j}W n tk
r"   | jS X |dkrjy| j j}W n tk
rT   | j j}Y nX tj| jt|ƒdS t	dt
| j jƒ ƒ‚d S )Ns   FlateDecode)Úbufsizez$stream filter %s unknown/unsupported)rª   ZFilterrœ   r«   ZDLÚLengthÚzlibZ
decompressrŸ   ÚNotImplementedErrorrw   )rL   ÚfilterZexpected_lengthr   r   r   rA   T  s    zPdfStream.decodeN)rF   rG   rH   r]   rA   r   r   r   r   r©   O  s   r©   c             C   s.  | dkrdS | dkrdS | d kr$dS t | tƒsLt | tƒsLt | tƒsLt | tƒrTt| ƒS t | tƒrlt| ƒ d¡S t | t	j
ƒr’dt	 d| ¡ d¡ d	 S t | tƒr¨tt| ƒƒS t | tƒr¾tt| ƒƒS trÌt | tƒsÚtsæt | tƒrætt| ƒƒS t | tƒr"|  d
d¡} |  dd¡} |  d	d¡} d|  d	 S t| ƒS d S )NTs   trueFs   falses   nullzus-asciis   (D:z%Y%m%d%H%M%SZó   )ó   \s   \\ó   (s   \(s   \))rs   rr   r   r‰   r¨   ru   rŸ   rd   r   r    Zstruct_timeZstrftimeÚdictÚlistr   Zunicoder‹   r   Úreplace)rŒ   r   r   r   r‹   e  s4    


r‹   c                @   sê  e Zd ZdZdqdd„Zdd„ Zd	d
„ Zdd„ Zdd„ Zdd„ Z	dd„ Z
dd„ Zdd„ Zdd„ Zdd„ Zdrdd„Zdd„ Zdd „ Zd!d"„ Zed#d$„ ƒZd%d&„ Zdsd'd(„Zd)Zd*Zd+Zd,Zed- Zed. Zd/Zee e Ze  ed0 e d1 e d2 e d3 e d4 e d5 ej!¡Z"e  ed0 e d6 e d2 e d3 e d4 e ej!¡Z#d7d8„ Z$d9d:„ Z%e  e¡Z&e  ed; e d< ¡Z'e  ed= ¡Z(e  ed> e ¡Z)e*d?d@„ ƒZ+e  dA¡Z,e*dtdCdD„ƒZ-e  edE e d< ¡Z.e  edF e d< ¡Z/e  edG e d< ¡Z0e  edH e d< ¡Z1e  edI e d< ¡Z2e  edJ ¡Z3e  edK ¡Z4e  edL e dM ¡Z5e  edN ¡Z6e  edO e dO e dP e d< ¡Z7e  edO e dO e dQ e d< ¡Z8e  edR e d< ¡Z9e  dSe dT e dU ¡Z:e  edV ¡Z;e  edW e d< ¡Z<e*dudYdZ„ƒZ=e  d[¡Z>d\d]d^d_d`dadbdcdddedSdSd<d<dfdfe?d\ƒd]e?d^ƒd_e?d`ƒdae?dbƒdce?ddƒdee?dSƒdSe?d<ƒd<e?dfƒdfiZ@e*dgdh„ ƒZAe  edi e ¡ZBe  ed3 e d3 e e ¡ZCe  dj¡ZDdkdl„ ZEdvdmdn„ZFdwdodp„ZGdS )xry   z|Based on
    https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf
    Supports PDF up to 1.4
    Nr   Úrbc             C   s  |r|rt dƒ‚|| _|| _|| _|| _d| _d| _|d k	rZ|d krZt||ƒ | _}d| _|d k	rŽ|  |¡ | _}d| _|sŽt	|dƒrŽ|j
| _i | _|r¢|  ¡  nTd | _| _tƒ | _d | _tƒ | _d | _i | _g | _g | _d | _d | _i | _tƒ | _d| j_|r|  ¡  d S )Nz4specify buf or f or filename, but not both buf and fFTrt   r   )ÚRuntimeErrorÚfilenamer«   ro   Ústart_offsetÚshould_close_bufÚshould_close_fileÚopenÚget_buf_from_filer   rt   Úcached_objectsÚread_pdf_infoÚfile_size_totalÚfile_size_thisr   ÚrootÚroot_refÚinfoÚinfo_refÚpage_tree_rootÚpagesÚ
orig_pagesÚ	pages_refÚlast_xref_section_offsetÚtrailer_dictrX   Ú
xref_tabler\   Úseek_end)rL   r¹   ro   r«   rº   Úmoder   r   r   r]   Š  sF    
zPdfParser.__init__c             C   s   | S )Nr   )rL   r   r   r   Ú	__enter__³  s    zPdfParser.__enter__c             C   s   |   ¡  dS )NF)Úclose)rL   Úexc_typeÚ	exc_valueÚ	tracebackr   r   r   Ú__exit__¶  s    zPdfParser.__exit__c             C   s   |   ¡  |  ¡  d S )N)Ú	close_bufrÎ   )rL   r   r   r   Ústart_writingº  s    zPdfParser.start_writingc             C   s.   y| j  ¡  W n tk
r"   Y nX d | _ d S )N)r«   rÑ   rœ   )rL   r   r   r   rÖ   ¾  s
    zPdfParser.close_bufc             C   s2   | j r|  ¡  | jd k	r.| jr.| j ¡  d | _d S )N)r»   rÖ   ro   r¼   rÑ   )rL   r   r   r   rÑ   Å  s
    
zPdfParser.closec             C   s   | j  dtj¡ d S )Nr   )ro   ÚseekÚosÚSEEK_END)rL   r   r   r   rÎ   Ì  s    zPdfParser.seek_endc             C   s   | j  d¡ d S )Ns	   %PDF-1.4
)ro   rl   )rL   r   r   r   Úwrite_headerÏ  s    zPdfParser.write_headerc             C   s   | j  d|f  d¡¡ d S )Nz%% %s
zutf-8)ro   rl   r   )rL   r   r   r   r   Úwrite_commentÒ  s    zPdfParser.write_commentc             C   sl   |   ¡  |  | j ¡ ¡| _|  d¡| _|  ¡  | j| jtdƒ| jd | j| jtdƒt	| j
ƒ| j
d | jS )Nr   s   Catalog)ÚTypeZPagess   Pages)rÝ   ZCountZKids)Údel_rootÚnext_object_idro   rk   rÄ   rÊ   Úrewrite_pagesÚ	write_objrr   r@   rÈ   )rL   r   r   r   Úwrite_catalogÕ  s    

zPdfParser.write_catalogc             C   s  g }x®t | jƒD ] \}}| j| }| j|j= | |tdƒ ¡ || jkrJqi }x | ¡ D ]\}}||| 	¡ < qXW | j
|d< | jd|Ž}x(t | jƒD ]\}	}
|
|kr’|| j|	< q’W qW xB|D ]:}x4|rò| j| }|j| jkrä| j|j= | dd ¡}qÀW qºW g | _d S )Ns   ParentÚParent)N)rm   rÉ   r¿   rÍ   rP   r„   rr   rÈ   r¦   rv   rÊ   Ú
write_pager:   )rL   Zpages_tree_nodes_to_deleteÚiZpage_refZ	page_infoZstringified_page_infor^   r_   Znew_page_refÚjZcur_page_refZpages_tree_node_refZpages_tree_noder   r   r   rà   ã  s,    






zPdfParser.rewrite_pagesc             C   sž   |r|   ¡  || _| jr(|  d | j¡| _| j | j¡}t| jƒ}| j|dœ}| j	d k	r`| j	|d< | jrp| j|d< || _	| j dt
t|ƒƒ td| ƒ ¡ d S )N)s   Roots   Sizes   Prevs   Infos   trailer
z
startxref
%d
%%%%EOF)rÞ   rÄ   rÅ   rá   rÆ   rÍ   rl   ro   r@   rË   ru   r   r	   )rL   Znew_root_refZ
start_xrefZnum_entriesrÌ   r   r   r   Úwrite_xref_and_trailer   s    



z PdfParser.write_xref_and_trailerc             O   sL   t |tƒr| j| }d|kr(tdƒ|d< d|kr:| j|d< | j|f|ž|ŽS )NrÝ   s   Pagerã   )rs   rŸ   rÈ   rr   rÊ   rá   )rL   ÚrefÚobjsÚdict_objr   r   r   rä     s    


zPdfParser.write_pagec             O   sÈ   | j }|d kr|  | ¡ ¡}n| ¡ |jf| j|j< | tt|Ž ƒ¡ | 	dd ¡}|d k	rft
|ƒ|d< |rx| t|ƒ¡ x|D ]}| t|ƒ¡ q~W |d k	rº| d¡ | |¡ | d¡ | d¡ |S )NÚstreamr­   s   stream
s   
endstream
s   endobj
)ro   rß   rk   rQ   rÍ   rP   rl   ru   rW   rn   r@   r‹   )rL   rè   ré   rê   ro   rë   Úobjr   r   r   rá     s$    




zPdfParser.write_objc             C   s.   | j d krd S | j| j j= | j| jd j= d S )Ns   Pages)rÄ   rÍ   rP   rÃ   )rL   r   r   r   rÞ   /  s    
zPdfParser.del_rootc             C   sT   t | dƒr|  ¡ S t | dƒr$|  ¡ S ytj|  ¡ dtjdS  tk
rN   dS X d S )NÚ	getbufferÚgetvaluer   )Úaccessó    )r   rí   rî   ÚmmapÚfilenoZACCESS_READÚ
ValueError)ro   r   r   r   r¾   5  s    

zPdfParser.get_buf_from_filec             C   sü   t | jƒ| _| j| j | _|  ¡  | jd | _| j dd ¡| _	t
|  | j¡ƒ| _| j	d krdt
ƒ | _nt
|  | j	¡ƒ| _td| jkdƒ t| jd dkdƒ td| jkdƒ tt| jd tƒd	ƒ | jd | _|  | j¡| _|  | j¡| _| jd d … | _d S )
Ns   Roots   Infos   Typez/Type missing in Roots   Catalogz/Type in Root is not /Catalogs   Pagesz/Pages missing in Rootz+/Pages in Root is not an indirect reference)r@   r«   rÁ   rº   rÂ   Úread_trailerrÌ   rÄ   r:   rÆ   r   Úread_indirectrÃ   rÅ   rJ   rs   rK   rÊ   rÇ   Úlinearize_page_treerÈ   rÉ   )rL   r   r   r   rÀ   A  s(    

zPdfParser.read_pdf_infoc             C   sX   yt t| j ¡ ƒd dƒ}W n tk
r:   t ddƒ}Y nX |d k	rT|df| j|j< |S )Nr   r   )rK   ÚmaxrÍ   rh   ró   rP   )rL   r¢   Z	referencer   r   r   rß   [  s    zPdfParser.next_object_ids   [][()<>{}/%]s$   [][()<>{}/%\000\011\012\014\015\040]s   [\000\011\012\014\015\040]s#   [\000\011\012\014\015\0400-9a-fA-F]ó   *ó   +s   [\r\n]+s   trailers   \<\<(.*\>\>)s	   startxrefs   ([0-9]+)s   %%EOFó   $s   \<\<(.*?\>\>)c             C   sÄ   t | jƒd }|| jk r| j}| j | j|¡}t|dƒ |}x$|r`|}| j | j| ¡ d ¡}q>W |sj|}| d¡}t| d¡ƒ| _	|  
|¡| _tƒ | _| j| j	d d| jkrÀ|  | jd ¡ d S )Ni @  ztrailer end not foundé   r   r‘   )Úxref_section_offsets   Prev)r@   r«   rº   Úre_trailer_endÚsearchrJ   ÚstartÚgrouprŸ   rË   Úinterpret_trailerrÌ   rX   rÍ   Úread_xref_tableÚread_prev_trailer)rL   Zsearch_start_offsetÚmZ
last_matchÚtrailer_datar   r   r   rô   v  s$    



zPdfParser.read_trailerc             C   sv   | j |d}| j | j||d … ¡}t|dƒ | d¡}tt| d¡ƒ|kdƒ |  |¡}d|krr|  |d ¡ d S )N)rü   i @  zprevious trailer not foundr   r‘   zGxref section offset in previous trailer doesn't match what was expecteds   Prev)	r  Úre_trailer_prevrþ   r«   rJ   r   rŸ   r  r  )rL   rü   Ztrailer_offsetr  r  rÌ   r   r   r   r  ‹  s    


zPdfParser.read_prev_trailers   /([!-$&'*-.0-;=?-Z\\^-z|~]+)(?=r±   s   \<\<s   \>\>c             C   sÂ   i }d}x|| j  ||¡}|sV| j ||¡}t|o>| ¡ t|ƒkdt||d … ƒ ƒ P |  | d¡¡}|  	|| ¡ ¡\}}|||< q
W td|koœt
|d tƒdƒ td|ko¸t
|d tƒdƒ |S )Nr   z+name not found in trailer, remaining data: r   s   Sizez&/Size not in trailer or not an integers   Rootz1/Root not in trailer or not an indirect reference)Úre_nameÚmatchÚre_dict_endrJ   Úendr@   rw   rz   r   Ú	get_valuers   rŸ   rK   )r{   r  Ztrailerr¢   r  r^   r_   r   r   r   r  ¡  s,    zPdfParser.interpret_trailers   ([^#]*)(#([0-9a-fA-F]{2}))?Fc             C   sr   d}xR| j  |¡D ]B}| d¡rF|| d¡t | d¡ d¡¡ 7 }q|| d¡7 }qW |rf| d¡S t|ƒS d S )Nrð   é   r   zus-asciizutf-8)Úre_hashes_in_nameÚfinditerr   r‚   ÚfromhexrA   ru   )r{   ÚrawZas_textrt   r  r   r   r   rz   ¼  s    


zPdfParser.interpret_names   null(?=s   true(?=s   false(?=s   ([-+]?[0-9]+)(?=s)   ([-+]?([0-9]+\.[0-9]*|[0-9]*\.[0-9]+))(?=s   \[ó   ]s   \<(s   *)\>s   \(s   ([-+]?[0-9]+)s   R(?=s   obj(?=s	   endobj(?=r³   s	   %[^\r\n]*s   )*s   stream\r?\ns   endstream(?=r›   c          
   C   sØ  |dkrdS | j  ||¡}|r&| ¡ }| j ||¡}|rètt| d¡ƒdkdƒ tt| d¡ƒdkdƒ t|d kp’|tt| d¡ƒt| d¡ƒƒkdƒ | j|| ¡ |d d\}}|d krÄ|d fS | j	 ||¡}t|d	ƒ || ¡ fS t| d
ƒ | j
 ||¡}|r^tt| d¡ƒdkdƒ tt| d¡ƒdkdƒ tt| d¡ƒt| d¡ƒƒ| ¡ fS | j ||¡}|rÂ| ¡ }i }| j ||¡}xv|s| j|||d d\}}|d kr¾|d fS | j|||d d\}	}|	||< |d krð|d fS | j ||¡}qŽW | ¡ }| j ||¡}|r²yt|d ƒ}
W n0 tttfk
r`   td| dd ¡ ƒ‚Y nX || ¡ | ¡ |
 … }| j || ¡ |
 ¡}t|dƒ | ¡ }tt|ƒ|ƒ}nt|ƒ}||fS | j ||¡}|rL| ¡ }g }| j ||¡}xN|s>| j|||d d\}	}| |	¡ |d kr,|d fS | j ||¡}qòW || ¡ fS | j ||¡}|rld | ¡ fS | j ||¡}|rŒd| ¡ fS | j ||¡}|r¬d| ¡ fS | j ||¡}|rÜt|  | d¡¡ƒ| ¡ fS | j ||¡}|rt| d¡ƒ| ¡ fS | j  ||¡}|r0t!| d¡ƒ| ¡ fS | j" ||¡}|r”t#dd„ | d¡D ƒƒ}t$|ƒd dkr|| t%dƒ¡ t# &| 'd¡¡| ¡ fS | j( ||¡}|r¸|  )|| ¡ ¡S tdt*|||d … ƒ ƒ‚d S )Nr   )NNr   z<indirect object definition: object ID must be greater than 0r‘   z;indirect object definition: generation must be non-negativez2indirect object definition different than expected)Úmax_nestingz(indirect object definition end not foundz$indirect object definition not foundz;indirect object reference: object ID must be greater than 0z:indirect object reference: generation must be non-negatives   Lengthz)bad or missing Length in stream dict (%r)zstream end not foundTFc             S   s   g | ]}|d kr|‘qS )s   0123456789abcdefABCDEFr   )r<   rC   r   r   r   ú
<listcomp>U  s    z'PdfParser.get_value.<locals>.<listcomp>ó   0zus-asciizunrecognized object: é    )+Ú
re_commentr  r
  Úre_indirect_def_startrJ   rŸ   r   rK   r  Úre_indirect_def_endÚre_indirect_referenceÚre_dict_startr	  Úre_stream_startÚ	TypeErrorra   ró   rE   r:   Úre_stream_endr©   r   Úre_array_startÚre_array_endr„   Úre_nullÚre_trueÚre_falser  rr   rz   Úre_intÚre_realÚfloatÚre_string_hexr‚   r@   r?   r  rA   Úre_string_litÚget_literal_stringrw   )r{   r|   r¢   Úexpect_indirectr  r  Úobjectr†   r^   r_   Z
stream_lenZstream_dataZ
hex_stringr   r   r   r  ê  sÞ    

&




zPdfParser.get_valuesF   (\\[nrtbf()\\])|(\\[0-9]{1,3})|(\\(\r\n|\r|\n))|(\r\n|\r|\n)|(\()|(\))ó   nr¥   ó   ró   ó   tó   	ó   bó   ó   fó   r²   c             C   s  d}t ƒ }xø| j ||¡D ]æ}| ||| ¡ … ¡ | d¡rZ| | j| d¡d  ¡ nž| d¡r„| t| d¡dd … dƒ¡ nt| d¡rnh| d¡r¦| d¡ nR| d¡rÄ| d	¡ |d7 }n4| d
¡rø|dkræt	|ƒ| 
¡ fS | d¡ |d8 }| 
¡ }qW tdƒ‚d S )Nr   r   r‘   é   r  é   r¥   é   r³   é   r±   zunfinished literal string)r‚   Úre_lit_str_tokenr  r…   rÿ   r   Úescaped_charsr„   rŸ   ru   r
  rE   )r{   r|   r¢   Znesting_depthr†   r  r   r   r   r(  x  s,    

 






zPdfParser.get_literal_strings   xrefs+   ([0-9]{10}) ([0-9]{5}) ([fn])( \r| \n|\r\n)c             C   s  d}| j  | j|| j ¡}t|dƒ | ¡ }xà| j | j|¡}|sNt|dƒ P d}| ¡ }t| d¡ƒ}t| d¡ƒ}x’t	||| ƒD ]€}| j
 | j|¡}t|dƒ | ¡ }| d¡d	k}t| d¡ƒ}	|s†t| d¡ƒ|	f}
t|| jkpö| j| |
kd
ƒ |
| j|< q†W q.W |S )NFzxref section start not foundzxref subsection start not foundTr   r‘   zxref entry not foundr  r2  z)xref entry duplicated (and not identical))Úre_xref_section_startr  r«   rº   rJ   r
  Úre_xref_subsection_startrŸ   r   rˆ   Úre_xref_entryrÍ   )rL   rü   Zsubsection_foundr  r¢   Zfirst_objectZnum_objectsrå   Zis_freerQ   Z	new_entryr   r   r   r  ˜  s:    


zPdfParser.read_xref_tablec             C   sh   | j |d  \}}t||d kd|d |d ||f ƒ | j| j|| j t|Ž |dd }|| j|< |S )Nr   r   zgexpected to find generation %s for object ID %s in xref table, instead found generation %s at offset %s)r)  r  )rÍ   rJ   r  r«   rº   rK   r¿   )rL   rè   r  r¢   rQ   r_   r   r   r   rõ   ·  s    

zPdfParser.read_indirectc             C   sn   |d kr| j }t|d dkdƒ g }xD|d D ]8}|  |¡}|d dkrT| |¡ q.| | j|d¡ q.W |S )Ns   Types   Pagesz%/Type of page tree node is not /Pagess   Kidss   Page)Únode)rÇ   rJ   rõ   r„   r…   rö   )rL   r=  rÈ   ZkidZ
kid_objectr   r   r   rö   Ä  s    
zPdfParser.linearize_page_tree)NNNr   r·   )N)N)F)Nr›   )r›   )N)HrF   rG   rH   rI   r]   rÐ   rÕ   r×   rÖ   rÑ   rÎ   rÛ   rÜ   râ   rà   rç   rä   rá   rÞ   Ústaticmethodr¾   rÀ   rß   Z	delimiterZdelimiter_or_wsZ
whitespaceZwhitespace_or_hexZwhitespace_optionalZwhitespace_mandatoryZnewline_onlyÚnewlineÚreÚcompileÚDOTALLrý   r  rô   r  Zre_whitespace_optionalr  r  r	  r‡   r  r  rz   r   r!  r"  r#  r$  r  r  r&  r'  r  r  r  r  r  r  r  r8  r?   r9  r(  r:  r;  r<  r  rõ   rö   r   r   r   r   ry   „  sÈ    
(
	

62

"x

ry   )r¡   r   Úcollectionsrñ   rÙ   r@  r    r®   Z_utilr   r   ÚImportErrorr	   r   r9   rD   r¸   rE   rJ   Ú
namedtuplerK   rW   rX   rr   rµ   r‰   r   r¨   r©   r‹   ry   r   r   r   r   Ú<module>   sŽ   
	Z.;