ó
	Q˜[c           @` s³   d  d l  m Z m Z m Z d  d l m Z d  d l m Z d  d l m	 Z
 m Z e
 e _	 e e _ d g Z d e f d „  ƒ  YZ d e f d	 „  ƒ  YZ d
 e f d „  ƒ  YZ d S(   i    (   t   absolute_importt   divisiont   unicode_literals(   t   str(   t   urllib(   t   parset   requestu   RobotFileParsert   RobotFileParserc           B` sb   e  Z d  Z d d „ Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d	 „  Z
 d
 „  Z RS(   us    This class provides a set of methods to read, parse and answer
    questions about a single robots.txt file.

    u    c         C` s>   g  |  _  d  |  _ t |  _ t |  _ |  j | ƒ d |  _ d  S(   Ni    (   t   entriest   Nonet   default_entryt   Falset   disallow_allt	   allow_allt   set_urlt   last_checked(   t   selft   url(    (    sB   lib/python2.7/site-packages/future/backports/urllib/robotparser.pyt   __init__   s    				c         C` s   |  j  S(   u·   Returns the time the robots.txt file was last fetched.

        This is useful for long-running web spiders that need to
        check for new robots.txt files periodically.

        (   R   (   R   (    (    sB   lib/python2.7/site-packages/future/backports/urllib/robotparser.pyt   mtime&   s    c         C` s   d d l  } | j  ƒ  |  _ d S(   uY   Sets the time the robots.txt file was last fetched to the
        current time.

        i    N(   t   timeR   (   R   R   (    (    sB   lib/python2.7/site-packages/future/backports/urllib/robotparser.pyt   modified/   s    c         C` s2   | |  _  t j j | ƒ d d !\ |  _ |  _ d S(   u,   Sets the URL referring to a robots.txt file.i   i   N(   R   R   R   t   urlparset   hostt   path(   R   R   (    (    sB   lib/python2.7/site-packages/future/backports/urllib/robotparser.pyR   7   s    	c         C` s—   y t  j j |  j ƒ } WnO t  j j k
 rj } | j d k rL t |  _ q“ | j d k r“ t |  _	 q“ n) X| j
 ƒ  } |  j | j d ƒ j ƒ  ƒ d S(   u4   Reads the robots.txt URL and feeds it to the parser.i‘  i“  i  u   utf-8N(   i‘  i“  (   R   R   t   urlopenR   t   errort	   HTTPErrort   codet   TrueR   R   t   readR   t   decodet
   splitlines(   R   t   ft   errt   raw(    (    sB   lib/python2.7/site-packages/future/backports/urllib/robotparser.pyR   <   s    c         C` sA   d | j  k r- |  j d  k r= | |  _ q= n |  j j | ƒ d  S(   Nu   *(   t
   useragentsR
   R	   R   t   append(   R   t   entry(    (    sB   lib/python2.7/site-packages/future/backports/urllib/robotparser.pyt
   _add_entryI   s    c         C` s  d } t  ƒ  } xÝ| D]Õ} | sn | d k r@ t  ƒ  } d } qn | d k rn |  j | ƒ t  ƒ  } d } qn n  | j d ƒ } | d k r– | |  } n  | j ƒ  } | s® q n  | j d d ƒ } t | ƒ d k r | d j ƒ  j ƒ  | d <t j j	 | d j ƒ  ƒ | d <| d d k r^| d k rA|  j | ƒ t  ƒ  } n  | j
 j | d ƒ d } që| d d k r£| d k rè| j j t | d t ƒ ƒ d } qèqë| d d k rë| d k rè| j j t | d t ƒ ƒ d } qèqëq q W| d k r|  j | ƒ n  d	 S(
   u”   Parse the input lines from a robots.txt file.

        We allow that a user-agent: line is not preceded by
        one or more blank lines.
        i    i   i   u   #u   :u
   user-agentu   disallowu   allowN(   t   EntryR'   t   findt   stript   splitt   lent   lowerR   R   t   unquoteR$   R%   t	   rulelinest   RuleLineR   R   (   R   t   linest   stateR&   t   linet   i(    (    sB   lib/python2.7/site-packages/future/backports/urllib/robotparser.pyR   R   sJ    
				 	c         C` sÖ   |  j  r t S|  j r t St j j t j j | ƒ ƒ } t j j d d | j	 | j
 | j | j f ƒ } t j j | ƒ } | s‰ d } n  x- |  j D]" } | j | ƒ r“ | j | ƒ Sq“ W|  j rÒ |  j j | ƒ St S(   u=   using the parsed robots.txt decide if useragent can fetch urlu    u   /(   R   R   R   R   R   R   R   R.   t
   urlunparseR   t   paramst   queryt   fragmentt   quoteR   t
   applies_tot	   allowanceR
   (   R   t	   useragentR   t
   parsed_urlR&   (    (    sB   lib/python2.7/site-packages/future/backports/urllib/robotparser.pyt	   can_fetch…   s     				c         C` s-   d j  g  |  j D] } t | ƒ d ^ q ƒ S(   Nu    u   
(   t   joinR   R   (   R   R&   (    (    sB   lib/python2.7/site-packages/future/backports/urllib/robotparser.pyt   __str__œ   s    (   t   __name__t
   __module__t   __doc__R   R   R   R   R   R'   R   R>   R@   (    (    (    sB   lib/python2.7/site-packages/future/backports/urllib/robotparser.pyR      s   								3	R0   c           B` s)   e  Z d  Z d „  Z d „  Z d „  Z RS(   uo   A rule line is a single "Allow:" (allowance==True) or "Disallow:"
       (allowance==False) followed by a path.c         C` s>   | d k r | r t  } n  t j j | ƒ |  _ | |  _ d  S(   Nu    (   R   R   R   R9   R   R;   (   R   R   R;   (    (    sB   lib/python2.7/site-packages/future/backports/urllib/robotparser.pyR   £   s    	c         C` s   |  j  d k p | j |  j  ƒ S(   Nu   *(   R   t
   startswith(   R   t   filename(    (    sB   lib/python2.7/site-packages/future/backports/urllib/robotparser.pyR:   ª   s    c         C` s   |  j  r d p d d |  j S(   Nu   Allowu   Disallowu   : (   R;   R   (   R   (    (    sB   lib/python2.7/site-packages/future/backports/urllib/robotparser.pyR@   ­   s    (   RA   RB   RC   R   R:   R@   (    (    (    sB   lib/python2.7/site-packages/future/backports/urllib/robotparser.pyR0       s   		R(   c           B` s2   e  Z d  Z d „  Z d „  Z d „  Z d „  Z RS(   u?   An entry has one or more user-agents and zero or more rulelinesc         C` s   g  |  _  g  |  _ d  S(   N(   R$   R/   (   R   (    (    sB   lib/python2.7/site-packages/future/backports/urllib/robotparser.pyR   ³   s    	c         C` sj   g  } x' |  j  D] } | j d | d g ƒ q Wx* |  j D] } | j t | ƒ d g ƒ q: Wd j | ƒ S(   Nu   User-agent: u   
u    (   R$   t   extendR/   R   R?   (   R   t   rett   agentR3   (    (    sB   lib/python2.7/site-packages/future/backports/urllib/robotparser.pyR@   ·   s    c         C` s]   | j  d ƒ d j ƒ  } x= |  j D]2 } | d k r9 t S| j ƒ  } | | k r# t Sq# Wt S(   u2   check if this entry applies to the specified agentu   /i    u   *(   R+   R-   R$   R   R   (   R   R<   RH   (    (    sB   lib/python2.7/site-packages/future/backports/urllib/robotparser.pyR:   ¿   s    c         C` s.   x' |  j  D] } | j | ƒ r
 | j Sq
 Wt S(   uZ   Preconditions:
        - our agent applies to this entry
        - filename is URL decoded(   R/   R:   R;   R   (   R   RE   R3   (    (    sB   lib/python2.7/site-packages/future/backports/urllib/robotparser.pyR;   Ì   s    (   RA   RB   RC   R   R@   R:   R;   (    (    (    sB   lib/python2.7/site-packages/future/backports/urllib/robotparser.pyR(   ±   s
   			N(   t
   __future__R    R   R   t   future.builtinsR   t   future.backportsR   t   future.backports.urllibR   t   _parseR   t   _requestt   __all__t   objectR   R0   R(   (    (    (    sB   lib/python2.7/site-packages/future/backports/urllib/robotparser.pyt   <module>   s   			ˆ