ó
¦–Õ\c           @` së   d  d l  m Z m Z m Z d  d l Z d  d l Z d  d l Z d d l m Z d d l	 m
 Z
 d d l m Z d d Z e j d
 ƒ Z e j d ƒ Z d e f d „  ƒ  YZ e e j d <e e j d <d e f d „  ƒ  YZ d „  Z d S(   i    (   t   print_functiont   divisiont   absolute_importNi   (   t   core(   t   generic_globi   (   t   urlparsei   i   s%   <a\s+(?:[^>]*?\s+)?href=(["'])(.*?)\1s&   (http[s]?://[-a-zA-Z0-9@:%_+.~#?&/=]+)t   HTTPFileSystemc           B` sn   e  Z d  Z d Z e d e d „ Z e d „ Z d „  Z	 d „  Z
 d „  Z d d d „ Z d	 „  Z d
 „  Z RS(   s¤   
    Simple File-System for fetching data via HTTP(S)

    Unlike other file-systems, HTTP is limited in that it does not provide glob
    or write capability.
    t   /c         K` sI   | d k	 r | n t |  _ | |  _ | |  _ | |  _ t j ƒ  |  _ d S(   s¢  
        Parameters
        ----------
        block_size: int
            Blocks to read bytes; if 0, will default to raw requests file-like
            objects instead of HTTPFile instances
        simple_links: bool
            If True, will consider both HTML <a> tags and anything that looks
            like a URL; if False, will consider only the former.
        same_schema: bool
            For ls, glob: only return paths having the same schema
            (http/https) as the original URL.
        storage_options: key-value
            May be credentials, e.g., `{'auth': ('username', 'pword')}` or any
            other parameters passed on to requests
        N(	   t   Nonet   DEFAULT_BLOCK_SIZEt
   block_sizet   simple_linkst   same_schemat   kwargst   requestst   Sessiont   session(   t   selfR   R
   R   t   storage_options(    (    s.   lib/python2.7/site-packages/dask/bytes/http.pyt   __init__   s    				c   	      C` s  t  j | |  j  } |  j rC t j | j ƒ t j | j ƒ } n t j | j ƒ } t ƒ  } t	 | ƒ } x;| D]3} t
 | t ƒ r“ | d } n  | j d ƒ r$|  j rê | j d d ƒ d | j d d ƒ d k r!| j | ƒ q!q¤| j d d ƒ j | j d d ƒ ƒ r¤| j | ƒ q¤qq | j d ƒ rgt | ƒ d k rg| j | j d | j | ƒ qq | d k rq | j d j | j d ƒ | j d ƒ g ƒ ƒ qq qq W| | | d h } | rüg  | D]/ } i | d 6|  j | ƒ rîd n d d 6^ qÉSt t | ƒ ƒ Sd  S(   Ni   t   httpt   :i    t   httpsR   s   ://s   ..s   ../t    t   namet	   directoryt   filet   type(   s   ..s   ../R   (   R   t   getR   R   t   ex2t   findallt   textt   ext   setR   t
   isinstancet   tuplet
   startswithR   t   splitt   addt   replacet   lent   schemet   netloct   joint   rstript   lstript   isdirt   listt   sorted(	   R   t   urlt   detailt   rt   linkst   outt   partst   lt   u(    (    s.   lib/python2.7/site-packages/dask/bytes/http.pyt   ls3   s2    	%		,!"57c         C` s
   t  ‚ d S(   s7   Make any intermediate directories to make path writableN(   t   NotImplementedError(   R   R1   (    (    s.   lib/python2.7/site-packages/dask/bytes/http.pyt   mkdirsT   s    c         C` s   t  S(   N(   t   True(   R   t   path(    (    s.   lib/python2.7/site-packages/dask/bytes/http.pyR.   X   s    c         C` s   t  t |  t | ƒ ƒ S(   N(   R0   R   t	   posixpath(   R   R=   (    (    s.   lib/python2.7/site-packages/dask/bytes/http.pyt   glob[   s    t   rbc         K` sž   | d k r t  ‚ n  | d k	 r' | n |  j } | rO t | |  j | |  j  S|  j j ƒ  } t | d <|  j j | |  } | j	 ƒ  t | j
 _ | j
 Sd S(   sh  Make a file-like object

        Parameters
        ----------
        url: str
            Full URL with protocol
        mode: string
            must be "rb"
        block_size: int or None
            Bytes to download in one request; use instance value if None.
        kwargs: key-value
            Any other parameters, passed to requests calls
        R@   t   streamN(   R:   R   R
   t   HTTPFileR   R   t   copyR<   R   t   raise_for_statust   rawt   decode_content(   R   R1   t   modeR
   R   t   kwR3   (    (    s.   lib/python2.7/site-packages/dask/bytes/http.pyt   open^   s    	

c         C` s    d d l  m } | | |  j ƒ S(   s;   Unique identifier; assume HTTP files are static, unchangingi    (   t   tokenize(   t	   dask.baseRJ   R   (   R   R1   RJ   (    (    s.   lib/python2.7/site-packages/dask/bytes/http.pyt   ukeyy   s    c         C` s   t  | d |  j |  j S(   s!   Size in bytes of the file at pathR   (   t	   file_sizeR   R   (   R   R1   (    (    s.   lib/python2.7/site-packages/dask/bytes/http.pyt   size~   s    N(   t   __name__t
   __module__t   __doc__t   sepR<   R   R   t   FalseR9   R;   R.   R?   RI   RL   RN   (    (    (    s.   lib/python2.7/site-packages/dask/bytes/http.pyR      s   !				R   R   RB   c           B` sª   e  Z d  Z d d d „ Z d d „ Z d „  Z d d „ Z d „  Z d „  Z	 d	 „  Z
 d
 „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z RS(   sø  
    A file-like object pointing to a remove HTTP(S) resource

    Supports only reading, with read-ahead of a predermined block-size.

    In the case that the server does not supply the filesize, only reading of
    the complete file in one go is supported.

    Parameters
    ----------
    url: str
        Full URL of the remote resource, including the protocol
    session: requests.Session or None
        All calls will be made within this session, to avoid restarting
        connections where the server allows this
    block_size: int or None
        The amount of read-ahead to do, in bytes. Default is 5MB, or the value
        configured for the FileSystem creating this file.
    kwargs: all other key-values are passed to reqeuests calls.
    c         K` s  | |  _  | |  _ d |  _ | d  k	 r- | n	 t j ƒ  |  _ | d  k	 rN | n t |  _ y% t	 | |  j d t
 |  j |  _ Wnn t t j f k
 r¡ d  |  _ nL t j k
 rì } | j j } | d k s× | d k rà | ‚ n  d  |  _ n Xd |  _ t |  _ d  |  _ d  |  _ d  S(	   Ni    t   allow_redirectsiô  i‘  i“  i”  R   (   i‘  i“  i”  (   R1   R   t   locR   R   R   R   R	   t	   blocksizeRM   R<   RN   t
   ValueErrort	   HTTPErrort   responset   status_codet   cacheRS   t   closedt   startt   end(   R   R1   R   R
   R   t   errt   code(    (    s.   lib/python2.7/site-packages/dask/bytes/http.pyR      s(    			!					i    c         C` s»   |  j  d k r6 | | f d d	 g k r6 t d ƒ ‚ n  | d k rK | } nH | d k rg |  j | } n, | d k rƒ |  j  | } n t d | ƒ ‚ | d k  r® t d ƒ ‚ n  | |  _ | S(
   st  Set file position

        Parameters
        ----------
        where: int
            Location to set
        whence: int (default 0)
            If zero, set from start of file (value should be positive); if 1,
            set relative to current position; if 2, set relative to end of file
            (value shoulf be negative)

        Returns the position.
        i    i   s+   Cannot seek since size of file is not knowni   s'   Whence must be in [1, 2, 3], but got %ss   Seek before start of fileN(   i    i    (   i    i   (   RN   R   RW   RU   (   R   t   wheret   whencet   nloc(    (    s.   lib/python2.7/site-packages/dask/bytes/http.pyt   seek¹   s    '		c         C` s   |  j  S(   s   Get current file byte position(   RU   (   R   (    (    s.   lib/python2.7/site-packages/dask/bytes/http.pyt   tellÖ   s    iÿÿÿÿc         C` sþ   | d k r d S| d k  r5 |  j  d k r5 |  j ƒ  S|  j d k rl | d k r_ t d ƒ ‚ ql |  j ƒ  Sn  | d k  sŽ |  j  | |  j k rš |  j } n |  j  | } |  j  |  j k r½ d S|  j |  j  | ƒ |  j |  j  |  j | |  j !} | |  _  | S(   s5  Read bytes from file

        Parameters
        ----------
        length: int
            Read up to this many bytes. If negative, read all content to end of
            file. If the server has not supplied the filesize, attempting to
            read only part of the data will raise a ValueError.
        i    R   s(   File size is unknown, must read all dataN(   RU   t
   _fetch_allRN   R   RW   t   _fetchR[   R]   (   R   t   lengthR^   t   data(    (    s.   lib/python2.7/site-packages/dask/bytes/http.pyt   readÚ   s"    

"!	c         C` s”  |  j  d k rR |  j d k rR | |  _  | |  j |  _ |  j | |  j ƒ |  _ n>| |  j  k  rß |  j | |  j k r® | |  _  | |  j |  _ |  j |  j  |  j ƒ |  _ q|  j | |  j  ƒ } | |  _  | |  j |  _ n± | |  j k r|  j |  j k rd S| |  j |  j k rQ| |  _  | |  j |  _ |  j |  j  |  j ƒ |  _ q|  j |  j | |  j ƒ } | |  j |  _ |  j | |  _ n  d S(   s9   Set new bounds for data cache and fetch data, if requiredN(   R]   R   R^   RV   t   _fetch_rangeR[   RN   (   R   R]   R^   t   new(    (    s.   lib/python2.7/site-packages/dask/bytes/http.pyRg   þ   s,    				c         C` sƒ   |  j  j |  j |  j  } | j ƒ  | j } t | ƒ } | |  j k  rp d |  _ | |  _	 | |  _
 | |  _ n  t | ƒ |  _ | S(   sµ   Read whole file in one shot, without caching

        This is only called when size is None or position is still at zero,
        and read() is called without a byte-count.
        i    (   R   R   R1   R   RD   t   contentR(   RV   R]   R^   R[   RN   RU   (   R   R3   R5   R7   (    (    s.   lib/python2.7/site-packages/dask/bytes/http.pyRf     s    
				c   	      C` s]  |  j  j ƒ  } | j d i  ƒ } d | | d f | d <|  j j |  j d | d t | } | j ƒ  | j d k r} | j	 Sd | j
 k rÓ t | j
 d ƒ } | | | k r¶ | j	 St d | | | f ƒ ‚ n  d	 } g  } xn | j d
 d ƒ D]Z } | rK| j | ƒ | t | ƒ 7} | | | k rLt d | | | f ƒ ‚ qLqò Pqò Wd j | ƒ S(   s3  Download a block of data

        The expectation is that the server returns only the requested bytes,
        with HTTP code 206. If this is not the case, we first check the headers,
        and then stream the output - if the data size is bigger than we
        requested, an exception is raised.
        t   headerss   bytes=%i-%ii   t   RangeRA   iÎ   s   Content-Lengths'   Got more bytes (%i) than requested (%i)i    t
   chunk_sizei   i   s/   Got more bytes so far (>%i) than requested (%i)R   i   (   R   RC   t   popR   R   R1   R<   RD   RZ   Rm   Rn   t   intRW   t   iter_contentt   appendR(   R+   (	   R   R]   R^   R   Rn   R3   t   clR5   t   chunk(    (    s.   lib/python2.7/site-packages/dask/bytes/http.pyRk   .  s2    $
c         C` s   d |  _  |  S(   Ni    (   RU   (   R   (    (    s.   lib/python2.7/site-packages/dask/bytes/http.pyt	   __enter__U  s    	c         G` s   |  j  ƒ  d  S(   N(   t   close(   R   t   args(    (    s.   lib/python2.7/site-packages/dask/bytes/http.pyt   __exit__Y  s    c         C` s
   t  ‚ d  S(   N(   R:   (   R   (    (    s.   lib/python2.7/site-packages/dask/bytes/http.pyt   __iter__\  s    c         C` s
   t  ‚ d  S(   N(   R:   (   R   (    (    s.   lib/python2.7/site-packages/dask/bytes/http.pyt   write`  s    c         C` s   d  S(   N(    (   R   (    (    s.   lib/python2.7/site-packages/dask/bytes/http.pyt   flushc  s    c         C` s   t  |  _ d  S(   N(   R<   R\   (   R   (    (    s.   lib/python2.7/site-packages/dask/bytes/http.pyRx   f  s    c         C` s   t  S(   N(   R<   (   R   (    (    s.   lib/python2.7/site-packages/dask/bytes/http.pyt   seekablei  s    c         C` s   t  S(   N(   RS   (   R   (    (    s.   lib/python2.7/site-packages/dask/bytes/http.pyt   writablel  s    c         C` s   t  S(   N(   R<   (   R   (    (    s.   lib/python2.7/site-packages/dask/bytes/http.pyt   readableo  s    N(   RO   RP   RQ   R   R   Rd   Re   Rj   Rg   Rf   Rk   Rw   Rz   R{   R|   R}   Rx   R~   R   R€   (    (    (    s.   lib/python2.7/site-packages/dask/bytes/http.pyRB   ‡   s"   	$			'								c         K` s   | j  ƒ  } | j d t ƒ } | j d i  ƒ } d | d <| j |  d | | } | j ƒ  d | j k r| t | j d ƒ St d |  ƒ ‚ d S(   sº   Call HEAD on the server to get file size

    Default operation is to explicitly allow redirects and use encoding
    'identity' (no compression) to get the true size of the target.
    RT   Rn   t   identitys   Accept-Encodings   Content-Lengths    Server did not supply size of %sN(	   RC   Rq   R<   R   t   headRD   Rn   Rr   RW   (   R1   R   R   t   arR‚   R3   (    (    s.   lib/python2.7/site-packages/dask/bytes/http.pyRM   s  s    

i   (   t
   __future__R    R   R   R>   t   reR   R   R   R?   R   t   compatibilityR   R	   t   compileR    R   t   objectR   t   _filesystemsRB   RM   (    (    (    s.   lib/python2.7/site-packages/dask/bytes/http.pyt   <module>   s   
rì