ó
šxŠ\c           @   sd  d  Z  d d l m Z d d l Z d d l Z d d l Z d d l m Z d d l Z	 d d l m
 Z
 d d l m Z m Z d Z d Z d	 Z d
 Z d d d d d d d d d d d d d d d d g Z d Z d Z d Z d Z d i e d  6e d! 6e d" 6e d# 6Z d$ i e d  6e d" 6Z d% Z d& „  Z d' „  Z d( „  Z d) „  Z d* e f d+ „  ƒ  YZ d S(,   sË   
Read a SAS XPort format file into a Pandas DataFrame.

Based on code from Jack Cushman (github.com/jcushman/xport).

The file format is defined here:

https://support.sas.com/techsup/technote/ts140.pdf
iÿÿÿÿ(   t   datetimeN(   t   Appender(   t   compat(   t   BaseIteratort   get_filepath_or_buffersP   HEADER RECORD*******LIBRARY HEADER RECORD!!!!!!!000000000000000000000000000000  sK   HEADER RECORD*******MEMBER  HEADER RECORD!!!!!!!000000000000000001600000000sP   HEADER RECORD*******DSCRPTR HEADER RECORD!!!!!!!000000000000000000000000000000  sP   HEADER RECORD*******OBS     HEADER RECORD!!!!!!!000000000000000000000000000000  t   ntypet   nhfunt   field_lengtht   nvar0t   namet   labelt   nformt   nflt   num_decimalst   nfjt   nfillt   niformt   niflt   nifdt   npost   _s…   Parameters
----------
filepath_or_buffer : string or file-like object
    Path to SAS file or object implementing binary read method.så   index : identifier of index column
    Identifier of column that should be used as index of the DataFrame.
encoding : string
    Encoding for text data.
chunksize : int
    Read file `chunksize` lines at a time, returns iterator.sE   format : string
    File format, only `xport` is currently supported.s_   iterator : boolean, default False
    Return XportReader object for reading file incrementally.s  Read a SAS file into a DataFrame.

%(_base_params_doc)s
%(_format_params_doc)s
%(_params2_doc)s
%(_iterator_doc)s

Returns
-------
DataFrame or XportReader

Examples
--------
Read a SAS Xport file:

>>> df = pd.read_sas('filename.XPT')

Read a Xport file in 10,000 line chunks:

>>> itr = pd.read_sas('filename.XPT', chunksize=10000)
>>> for chunk in itr:
>>>     do_something(chunk)

t   _base_params_doct   _format_params_doct   _params2_doct   _iterator_docsã   Class for reading SAS Xport files.

%(_base_params_doc)s
%(_params2_doc)s

Attributes
----------
member_info : list
    Contains information about the file
fields : list
    Contains information about the variables in the file
sÊ   Read observations from SAS Xport file, returning as data frame.

Parameters
----------
nrows : int
    Number of rows to read from data file; if None, read whole
    file.

Returns
-------
A DataFrame.
c         C   s0   y t  j |  d ƒ SWn t k
 r+ t j SXd S(   s3    Given a date in xport format, return Python date. s   %d%b%y:%H:%M:%SN(   R    t   strptimet
   ValueErrort   pdt   NaT(   t   datestr(    (    s6   lib/python2.7/site-packages/pandas/io/sas/sas_xport.pyt   _parse_datex   s    c         C   sS   i  } d } x9 | D]1 \ } } |  | | | !j  ƒ  | | <| | 7} q W| d =| S(   s  
    Parameters
    ----------
    s: string
        Fixed-length string to split
    parts: list of (name, length) pairs
        Used to break up string, name '_' will be filtered from output.

    Returns
    -------
    Dict of name:contents of string at given location.
    i    R   (   t   strip(   t   st   partst   outt   startR	   t   length(    (    s6   lib/python2.7/site-packages/pandas/io/sas/sas_xport.pyt   _split_line   s    c         C   sn   | d k rj t  j t |  ƒ t  j d ƒ ƒ } t  j d | d | f ƒ } | j d | ƒ } |  | d <| S|  S(   Ni   t   S8s   S%d,S%dt   dtypet   f0(   t   npt   zerost   lenR'   t   view(   t   vect   nbytest   vec1R'   t   vec2(    (    s6   lib/python2.7/site-packages/pandas/io/sas/sas_xport.pyt   _handle_truncated_float_vec—   s    	!
c   	      C   sY  t  j d ƒ } |  j d | ƒ } | d } | d } | d @} t  j t |  ƒ d t  j ƒ} d | t  j | d @ƒ <d | t  j | d	 @ƒ <d
 | t  j | d @ƒ <| | L} | | ?| d @d d
 | >B} | d M} | | d ?d @d d >| d d >| d @BO} t  j t | ƒ f d d ƒ} | | d <| | d <| j d d ƒ } | j d ƒ } | S(   sf   
    Parse a vector of float values representing IBM 8 byte floats into
    native 8 byte floats.
    s   >u4,>u4R'   R(   t   f1iÿÿÿ i   i    i   i  @ i   i  € i   i   Iÿÿïÿ    i   i   iA   iÿ  i   I   €    s   >f8t   f8(	   R)   R'   R,   R*   R+   t   uint8t   wheret   emptyt   astype(	   R-   R'   R/   t   xport1t   xport2t   ieee1t   shiftt   ieee2t   ieee(    (    s6   lib/python2.7/site-packages/pandas/io/sas/sas_xport.pyt   _parse_float_vecª   s(    


	

	+

t   XportReaderc           B   sz   e  Z e Z d
 d  d
 d „ Z d „  Z d „  Z d „  Z d „  Z	 d „  Z
 d
 d „ Z d „  Z e e ƒ d
 d	 „ ƒ Z RS(   s
   ISO-8859-1c         C   sÚ   | |  _  d |  _ | |  _ | |  _ t | t ƒ rT t | d | ƒ\ } } } } n  t | t t j t	 f ƒ r„ t
 | d ƒ |  _ nH | j ƒ  } y | j |  j  ƒ } Wn t k
 r¹ n Xt j | ƒ |  _ |  j ƒ  d  S(   Ni    t   encodingt   rb(   t	   _encodingt   _lines_readt   _indext
   _chunksizet
   isinstancet   strR   R   t	   text_typet   bytest   opent   filepath_or_buffert   readt   encodet   UnicodeEncodeErrort   BytesIOt   _read_header(   t   selfRK   t   indexR@   t	   chunksizet   compressiont   should_closet   contents(    (    s6   lib/python2.7/site-packages/pandas/io/sas/sas_xport.pyt   __init__æ   s     				c         C   s   |  j  j ƒ  d  S(   N(   RK   t   close(   RQ   (    (    s6   lib/python2.7/site-packages/pandas/io/sas/sas_xport.pyRX      s    c         C   s   |  j  j d ƒ j ƒ  S(   NiP   (   RK   RL   t   decode(   RQ   (    (    s6   lib/python2.7/site-packages/pandas/io/sas/sas_xport.pyt   _get_row  s    c         C   sË  |  j  j d ƒ |  j ƒ  } | t k rA |  j ƒ  t d ƒ ‚ n  |  j ƒ  } d d g d d g d d g d d g d	 d
 g g } t | | ƒ } | d d k r¸ |  j ƒ  t d ƒ ‚ n  t | d	 ƒ | d	 <| |  _ |  j ƒ  } t | d
  ƒ | d <|  j ƒ  } |  j ƒ  } | j	 t
 ƒ } | t k }	 | o1|	 sM|  j ƒ  t d ƒ ‚ n  t | d d !ƒ }
 d d g d d g d d g d d g d d g d d g d	 d
 g g } t |  j ƒ  | ƒ } d d
 g d d
 g d d g d d g g } | j t |  j ƒ  | ƒ ƒ t | d ƒ | d <t | d	 ƒ | d	 <| |  _ i d d 6d d 6} t |  j ƒ  d d !ƒ } |
 | } | d r‡| d | d 7} n  |  j  j | ƒ } g  } d } x4t | ƒ |
 k rÛ| |
  | |
 } } | j d ƒ } t j d | ƒ } t t t | ƒ ƒ } | d =| | d | d <| d  } | d d k rx| d k  sP| d k rx|  j ƒ  d! } t | j | ƒ ƒ ‚ n  xB | j ƒ  D]4 \ } } y | j ƒ  | | <Wq…t k
 r¸q…Xq…W| | d  7} | | g 7} q¨W|  j ƒ  } | t k s|  j ƒ  t d" ƒ ‚ n  | |  _ | |  _ |  j  j ƒ  |  _  |  j! ƒ  |  _" g  |  j D] } | d# j# ƒ  ^ qJ|  _$ g  t% |  j ƒ D]0 \ } } d$ t& | ƒ d% t& | d  ƒ f ^ qy} t' j( | ƒ } | |  _) d  S(&   Ni    s#   Header record is not an XPORT file.t   prefixi   t   versioni   t   OSR   t   createdi   s   SAS     SAS     SASLIBs!   Header record has invalid prefix.t   modifieds   Member header not foundiûÿÿÿiþÿÿÿt   set_namet   sasdataR
   i(   t   typet   numerici   t   chari   i6   i:   iP   iŒ   s   >hhhh8s40s8shhh2s8shhl52sR   R   s0   Floating field width {0} is not between 2 and 8.s   Observation header not found.R	   R    t   S(*   RK   t   seekRZ   t   _correct_line1RX   R   R%   R   t	   file_infot
   startswitht   _correct_header1t   _correct_header2t   intt   updatet   member_infoRL   R+   t   ljustt   structt   unpackt   dictt   zipt
   _fieldkeyst	   TypeErrort   formatt   itemsR   t   AttributeErrort   _correct_obs_headert   fieldst   record_lengtht   tellt   record_startt   _record_countt   nobsRY   t   columnst	   enumerateRG   R)   R'   t   _dtype(   RQ   t   line1t   line2t   fifRh   t   line3t   header1t   header2t	   headflag1t	   headflag2t   fieldnamelengtht   memRn   t   typest
   fieldcountt
   datalengtht	   fielddataRz   t
   obs_lengtht   fieldt   fieldstructt   flt   msgt   kt   vt   headert   xt   it   dtypelR'   (    (    s6   lib/python2.7/site-packages/pandas/io/sas/sas_xport.pyRP     s’    

	
**	


(

		)Cc         C   s   |  j  d |  j p d ƒ S(   Nt   nrowsi   (   RL   RE   (   RQ   (    (    s6   lib/python2.7/site-packages/pandas/io/sas/sas_xport.pyt   __next__i  s    c         C   s  |  j  j d d ƒ |  j  j ƒ  |  j } | d d k rI t j d ƒ n  |  j d k rv |  j  j |  j ƒ | |  j S|  j  j d d ƒ |  j  j d ƒ } t j	 | d t j
 ƒ} t j | d k ƒ } t | ƒ d k rã d } n d t | ƒ } |  j  j |  j ƒ | | |  j S(	   sÇ   
        Get number of records in file.

        This is maybe suboptimal because we have to seek to the end of
        the file.

        Side effect: returns file position to record_start.
        i    i   iP   s   xport file may be corruptedi°ÿÿÿR'   I        i   (   RK   Rf   R|   R}   t   warningst   warnR{   RL   R)   t
   frombuffert   uint64t   flatnonzeroR+   (   RQ   t   total_records_lengtht	   last_cardt   ixt   tail_pad(    (    s6   lib/python2.7/site-packages/pandas/io/sas/sas_xport.pyR~   l  s"    

	c         C   s(   | d k r |  j } n  |  j d | ƒ S(   s  
        Reads lines from Xport file and returns as dataframe

        Parameters
        ----------
        size : int, defaults to None
            Number of lines to read.  If None, reads whole file.

        Returns
        -------
        DataFrame
        Rœ   N(   t   NoneRE   RL   (   RQ   t   size(    (    s6   lib/python2.7/site-packages/pandas/io/sas/sas_xport.pyt	   get_chunk‘  s    c         C   s†   | j  d d ƒ } | d d k | d d k @| d d k @} | d d k | d d	 k @| d d
 k B| d d k B} | | M} | S(   NR'   s   u1,u1,u2,u4R2   i    t   f2t   f3R(   iA   iZ   i_   i.   (   R,   (   RQ   R-   R—   t   misst   miss1(    (    s6   lib/python2.7/site-packages/pandas/io/sas/sas_xport.pyt   _missing_double¢  s
    ,:
c         C   s  | d  k r |  j } n  t | |  j |  j ƒ } | |  j } | d k r] |  j ƒ  t ‚ n  |  j j | ƒ } t	 j
 | d |  j d | ƒ} t j d t | ƒ ƒ } xt |  j ƒ D]\ } } | d | }	 |  j | d }
 |
 d k r1t |	 |  j | d ƒ }	 |  j |	 ƒ } t |	 ƒ } t	 j | | <n| |  j | d d	 k r­g  |	 D] } | j ƒ  ^ qO} t j r­|  j d  k	 rªg  | D] } | j |  j ƒ ^ q†} qªq­n  | | | <qµ W|  j d  k rét |  j |  j | ƒ | _ n | j |  j ƒ } |  j | 7_ | S(
   Ni    R'   t   countRR   s   s%dR   Rc   R   Rd   (   R§   R   t   minRC   R{   RX   t   StopIterationRK   RL   R)   R    R‚   R   t	   DataFramet   rangeR   R€   Rz   R1   R®   R>   t   nant   rstripR   t   PY3RB   RY   RD   RR   t	   set_index(   RQ   Rœ   t
   read_linest   read_lent   rawt   datat   dft   jR™   R-   R   R¬   R—   t   y(    (    s6   lib/python2.7/site-packages/pandas/io/sas/sas_xport.pyRL   ª  s<    
		.N(   t   __name__t
   __module__t   _xport_reader_doct   __doc__R§   RW   RX   RZ   RP   R   R~   R©   R®   R   t   _read_method_docRL   (    (    (    s6   lib/python2.7/site-packages/pandas/io/sas/sas_xport.pyR?   ã   s   			c		%		(   RÂ   R    Rp   Rž   t   numpyR)   t   pandas.util._decoratorsR   t   pandasR   R   t   pandas.io.commonR   R   Rg   Rj   Rk   Ry   Rt   R   R   R   R   t   _read_sas_docRÁ   RÃ   R   R%   R1   R>   R?   (    (    (    s6   lib/python2.7/site-packages/pandas/io/sas/sas_xport.pyt   <module>	   s>   					9