ó
¦–Õ\c        
   @   sƒ   d  d l  Z d  d l Z d d l m Z d d l m Z d d l m	 Z	 m
 Z
 d d d d d d d
 d d d d „	 Z d „  Z d S(   iÿÿÿÿNi   (   t   delayed(   t   string_typesi   (   t   from_delayedt   from_pandasi   i   i   i   c   $   	   K   s5  d d l  } d d l  m } d d l m } | d k rG t d ƒ ‚ n  | d k rY i  n | } | j | |  } | j ƒ  } t |  t	 ƒ r³ | j
 |  | d t d | d |	 ƒ}  n  t | t	 ƒ rÏ |  j | n | } t | t	 | j f ƒ st d	 | ƒ ‚ n  | r| rt d
 ƒ ‚ n  | rWg  | D]7 } t | t	 ƒ rK|  j | n | ^ q)n t |  j ƒ } | | k rž| j t | t	 ƒ r”|  j | n | ƒ n  t | t	 ƒ rº| | d <n | j | d <|
 d k r| j | ƒ j | ƒ j |  ƒ } t j | | |  } | j rR|  j } |  j }	 t j | | d |	 d | ƒ} t | d d ƒS| j d t d t ƒ j ƒ  d } | d  }
 n' | d k r¨| d k r¨t d ƒ ‚ n  | d k rX| d k r+| j | j j | ƒ | j j | ƒ g ƒ j |  ƒ } t j | | ƒ } | j  d \ } } | j! d } n | \ } } t j" | ƒ j# } | d k r³| j | j j$ | ƒ g ƒ j |  ƒ } t j | | ƒ d d } t% | | | ƒ p­d } n  | j& d k rt j' d | d | d d | | j( ƒ  | ƒ j) ƒ  } | | d <| | d <qX| j& d k r@t* j+ | | | d ƒ j) ƒ  } qXt d j, | ƒ ƒ ‚ n  g  } | d  | d } } x¬ t- t. | | ƒ ƒ D]• \ }  \ }! }" |  t/ | ƒ d k r½| |" k n	 | |" k  }# | j | ƒ j0 | j1 | |! k |# ƒ ƒ j |  ƒ } | j t2 t3 ƒ | | |
 |  ƒ q‰Wt4 | |
 d | ƒS(    s/  
    Create dataframe from an SQL table.

    If neither divisions or npartitions is given, the memory footprint of the
    first few rows will be determined, and partitions of size ~256MB will
    be used.

    Parameters
    ----------
    table : string or sqlalchemy expression
        Select columns from here.
    uri : string
        Full sqlalchemy URI for the database connection
    index_col : string
        Column which becomes the index, and defines the partitioning. Should
        be a indexed column in the SQL server, and any orderable type. If the
        type is number or time, then partition boundaries can be inferred from
        npartitions or bytes_per_chunk; otherwide must supply explicit
        ``divisions=``.
        ``index_col`` could be a function to return a value, e.g.,
        ``sql.func.abs(sql.column('value')).label('abs(value)')``.
        Labeling columns created by functions or arithmetic operations is
        required.
    divisions: sequence
        Values of the index column to split the table by. If given, this will
        override npartitions and bytes_per_chunk. The divisions are the value
        boundaries of the index column used to define the partitions. For
        example, ``divisions=list('acegikmoqsuwz')`` could be used to partition
        a string column lexographically into 12 partitions, with the implicit
        assumption that each partition contains similar numbers of records.
    npartitions : int
        Number of partitions, if divisions is not given. Will split the values
        of the index column linearly between limits, if given, or the column
        max/min. The index column must be numeric or time for this to work
    limits: 2-tuple or None
        Manually give upper and lower range of values for use with npartitions;
        if None, first fetches max/min from the DB. Upper limit, if
        given, is inclusive.
    columns : list of strings or None
        Which columns to select; if None, gets all; can include sqlalchemy
        functions, e.g.,
        ``sql.func.abs(sql.column('value')).label('abs(value)')``.
        Labeling columns created by functions or arithmetic operations is
        recommended.
    bytes_per_chunk : int
        If both divisions and npartitions is None, this is the target size of
        each partition, in bytes
    head_rows : int
        How many rows to load for inferring the data-types, unless passing meta
    meta : empty DataFrame or None
        If provided, do not attempt to infer dtypes, but use these, coercing
        all chunks on load
    schema : str or None
        If using a table name, pass this to sqlalchemy to select which DB
        schema to use within the URI connection
    engine_kwargs : dict or None
        Specific db engine parameters for sqlalchemy
    kwargs : dict
        Additional parameters to pass to `pd.read_sql()`

    Returns
    -------
    dask.dataframe

    Examples
    --------
    >>> df = dd.read_sql_table('accounts', 'sqlite:///path/to/bank.db',
    ...                  npartitions=10, index_col='id')  # doctest: +SKIP
    iÿÿÿÿN(   t   sql(   t   elementss)   Must specify index column to partition ont   autoloadt   autoload_witht   schemas?   Use label when passing an SQLAlchemy instance as the index (%s)s5   Must supply either divisions or npartitions, not botht	   index_colt   npartitionsi   t   deept   indexi   i    s>   Must provide divisions or npartitions whenusing explicit meta.t   max_1t   count_1t   Mt   startt   endt   freqs   %iSt   it   ut   fsw   Provided index column is of type "{}".  If divisions is not provided the index column type must be numeric or datetime.t	   divisions(   R   R   R   (5   t
   sqlalchemyR   t   sqlalchemy.sqlR   t   Nonet
   ValueErrort   create_enginet   MetaDatat
   isinstanceR   t   Tablet   Truet   columnst   Labelt	   TypeErrort   listt   appendt   namet   selectt   limitt   select_fromt   pdt   read_sqlt   emptyR   t   read_sql_tableR   t   memory_usaget   sumt   funct   maxt   mint   iloct   dtypest   Seriest   dtypet   countt   roundt   kindt
   date_ranget   total_secondst   tolistt   npt   linspacet   formatt	   enumeratet   zipt   lent   wheret   and_R    t   _read_sql_chunkR   ($   t   tablet   uriR	   R   R
   t   limitsR    t   bytes_per_chunkt	   head_rowsR   t   metat   engine_kwargst   kwargst   saR   R   t   enginet   mR   t   ct   qt   headR%   t   bytes_per_rowt   minmaxt   maxit   miniR5   R6   t   partst   lowerst   uppersR   t   lowert   uppert   cond(    (    s4   lib/python2.7/site-packages/dask/dataframe/io/sql.pyR,   	   sŠ    HJ
!			"-	'	
"	(.*	&c         K   sB   t  j |  | |  } | j r" | S| j | j j ƒ  d t ƒSd  S(   Nt   copy(   R)   R*   R+   t   astypeR3   t   to_dictt   False(   RQ   RF   RJ   RL   t   df(    (    s4   lib/python2.7/site-packages/dask/dataframe/io/sql.pyRD   ­   s    	i   (   t   numpyR<   t   pandasR)   t    R    t   compatibilityR   t   ioR   R   R   R,   RD   (    (    (    s4   lib/python2.7/site-packages/dask/dataframe/io/sql.pyt   <module>   s   ¢