
x\c           @   s   d  Z  d d l m Z d d l Z d d l Z d d l m Z d d l Z d d l m	 Z	 d d l
 m Z m Z d d l m Z d d l j j j Z d e f d	     YZ d
 e f d     YZ d e f d     YZ d S(   s  
Read SAS7BDAT files

Based on code written by Jared Hobbs:
  https://bitbucket.org/jaredhobbs/sas7bdat

See also:
  https://github.com/BioStatMatt/sas7bdat

Partial documentation of the file format:
  https://cran.r-project.org/web/packages/sas7bdat/vignettes/sas7bdat.pdf

Reference for binary data compression:
  http://collaboration.cmc.ec.gc.ca/science/rpn/biblio/ddj/Website/articles/CUJ/1992/9210/ross/ross.htm
i(   t   datetimeN(   t   EmptyDataError(   t   compat(   t   BaseIteratort   get_filepath_or_buffer(   t   Parsert   _subheader_pointerc           B   s   e  Z RS(    (   t   __name__t
   __module__(    (    (    s5   lib/python2.7/site-packages/pandas/io/sas/sas7bdat.pyR      s   t   _columnc           B   s   e  Z RS(    (   R   R   (    (    (    s5   lib/python2.7/site-packages/pandas/io/sas/sas7bdat.pyR	   #   s   t   SAS7BDATReaderc           B   s+  e  Z d  Z d e e d d e e d  Z d   Z d   Z d   Z d   Z	 d   Z
 d   Z d   Z d	   Z d
   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d d  Z d   Z  d   Z! RS(   s!  
    Read SAS files in SAS7BDAT format.

    Parameters
    ----------
    path_or_buf : path name or buffer
        Name of SAS file or file-like object pointing to SAS file
        contents.
    index : column identifier, defaults to None
        Column to use as index.
    convert_dates : boolean, defaults to True
        Attempt to convert dates to Pandas datetime values.  Note that
        some rarely used SAS date formats may be unsupported.
    blank_missing : boolean, defaults to True
        Convert empty strings to missing values (SAS uses blanks to
        indicate missing character variables).
    chunksize : int, defaults to None
        Return SAS7BDATReader object for iterations, returns chunks
        with given number of lines.
    encoding : string, defaults to None
        String encoding.
    convert_text : bool, defaults to True
        If False, text variables are left as raw bytes.
    convert_header_text : bool, defaults to True
        If False, header text, including column names, are left as raw
        bytes.
    c	   
      C   s)  | |  _  | |  _ | |  _ | |  _ | |  _ | |  _ | |  _ d |  _ d |  _ g  |  _	 g  |  _
 g  |  _ g  |  _ g  |  _ d  |  _ g  |  _ g  |  _ g  |  _ d |  _ d |  _ d |  _ t |  \ |  _ }	 }	 }	 t |  j t j  rt |  j d  |  _ |  j |  _ n  |  j   |  j   d  S(   Ns   latin-1t    i    t   rb(   t   indext   convert_datest   blank_missingt	   chunksizet   encodingt   convert_textt   convert_header_textt   default_encodingt   compressiont   column_names_stringst   column_namest   column_formatst   columnst%   _current_page_data_subheader_pointerst   Nonet   _cached_paget   _column_data_lengthst   _column_data_offsetst   _column_typest   _current_row_in_file_indext   _current_row_on_page_indexR   t   _path_or_buft
   isinstanceR   t   string_typest   opent   handlet   _get_propertiest   _parse_metadata(
   t   selft   path_or_bufR   R   R   R   R   R   R   t   _(    (    s5   lib/python2.7/site-packages/pandas/io/sas/sas7bdat.pyt   __init__E   s6    																					
c         C   s   t  j |  j d t  j S(   s5   Return a numpy int64 array of the column data lengthst   dtype(   t   npt   asarrayR   t   int64(   R)   (    (    s5   lib/python2.7/site-packages/pandas/io/sas/sas7bdat.pyt   column_data_lengthsj   s    c         C   s   t  j |  j d t  j S(   s0   Return a numpy int64 array of the column offsetsR-   (   R.   R/   R   R0   (   R)   (    (    s5   lib/python2.7/site-packages/pandas/io/sas/sas7bdat.pyt   column_data_offsetsn   s    c         C   s   t  j |  j d t  j d  S(   sX   Returns a numpy character array of the column types:
           s (string) or d (double)R-   t   S1(   R.   R/   R   R-   (   R)   (    (    s5   lib/python2.7/site-packages/pandas/io/sas/sas7bdat.pyt   column_typesr   s    c         C   s)   y |  j  j   Wn t k
 r$ n Xd  S(   N(   R&   t   closet   AttributeError(   R)   (    (    s5   lib/python2.7/site-packages/pandas/io/sas/sas7bdat.pyR5   w   s    c         C   s  |  j  j d  |  j  j d  |  _ |  j d t t j  !t j k r` |  j   t d   n  d \ } } |  j	 t j
 t j  } | t j k r t j } t |  _ d |  _ t j |  _ t j |  _ n* t |  _ t j |  _ t j |  _ d |  _ |  j	 t j t j  } | t j k r&t j } n  | | } |  j	 t j t j  } | d k r`d |  _ n	 d |  _ |  j	 t j t j  d } | t j  k rt j  | |  _! n d	 j" d
 |  |  _! |  j	 t j# t j$  } | d k rd |  _% n! | d k rd |  _% n	 d |  _% |  j	 t j& t j'  } | j( d  |  _) |  j* rd|  j) j+ |  j, pX|  j-  |  _) n  |  j	 t j. t j/  } | j( d  |  _0 |  j* r|  j0 j+ |  j, p|  j-  |  _0 n  t1 d d d  } |  j2 t j3 | t j4  } | t5 j6 | d d |  _7 |  j2 t j8 | t j9  } | t5 j6 | d d |  _: |  j; t j< | t j=  |  _> |  j  j |  j> d  } |  j | 7_ t |  j  |  j> k r|  j   t d   n  |  j; t j? | t j@  |  _A |  j; t jB | t jC  |  _D |  j	 t jE | t jF  } | j( d  |  _G |  j* rN|  jG j+ |  j, pB|  j-  |  _G n  |  j	 t jH | t jI  } | j( d  |  _J |  j* r|  jJ j+ |  j, p|  j-  |  _J n  |  j	 t jK | t jL  } | j( d  |  _M |  j* r|  jM j+ |  j, p|  j-  |  _M n  |  j	 t jN | t jO  } | j( d  } t |  d k rb| j+ |  j, pV|  j-  |  _P n[ |  j	 t jQ | t jR  } | j( d  |  _P |  j* r|  jP j+ |  j, p|  j-  |  _P n  d  S(   Ni    i   s'   magic number mismatch (not a SAS file?)i   i   s   t   <t   >s   unknown (code={name!s})t   namet   1t   unixt   2t   windowst   unknowns     i  i   t   unitt   ss*   The SAS7BDAT file appears to be truncated.(   i    i    (S   R"   t   seekt   readR   t   lent   constt   magicR5   t
   ValueErrort   _read_bytest   align_1_offsett   align_1_lengtht   u64_byte_checker_valuet   align_2_valuet   Truet   U64t   _int_lengtht   page_bit_offset_x64t   _page_bit_offsett   subheader_pointer_length_x64t   _subheader_pointer_lengtht   Falset   page_bit_offset_x86t   subheader_pointer_length_x86t   align_2_offsett   align_2_lengtht   align_1_checker_valuet   endianness_offsett   endianness_lengtht
   byte_ordert   encoding_offsett   encoding_lengtht   encoding_namest   file_encodingt   formatt   platform_offsett   platform_lengtht   platformt   dataset_offsett   dataset_lengtht   rstripR9   R   t   decodeR   R   t   file_type_offsett   file_type_lengtht	   file_typeR    t   _read_floatt   date_created_offsett   date_created_lengtht   pdt   to_timedeltat   date_createdt   date_modified_offsett   date_modified_lengtht   date_modifiedt	   _read_intt   header_size_offsett   header_size_lengtht   header_lengtht   page_size_offsett   page_size_lengtht   _page_lengtht   page_count_offsett   page_count_lengtht   _page_countt   sas_release_offsett   sas_release_lengtht   sas_releaset   sas_server_type_offsett   sas_server_type_lengtht   server_typet   os_version_number_offsett   os_version_number_lengtht
   os_versiont   os_name_offsett   os_name_lengtht   os_namet   os_maker_offsett   os_maker_length(   R)   t   align1t   align2t   buft   total_alignt   epocht   x(    (    s5   lib/python2.7/site-packages/pandas/io/sas/sas7bdat.pyR'   }   s    "
					
						
						!		c         C   s4   |  j  d |  j p d  } | d  k r0 t  n  | S(   Nt   nrowsi   (   RB   R   R   t   StopIteration(   R)   t   da(    (    s5   lib/python2.7/site-packages/pandas/io/sas/sas7bdat.pyt   __next__   s    	c         C   sj   | d k r% |  j    t d   n  |  j | |  } | d k rI d n d } t j |  j | |  d S(   Ni   i   s   invalid float widtht   ft   di    (   i   i   (   R5   RF   RG   t   structt   unpackR[   (   R)   t   offsett   widthR   t   fd(    (    s5   lib/python2.7/site-packages/pandas/io/sas/sas7bdat.pyRk      s    
c         C   s~   | d k r% |  j    t d   n  |  j | |  } i d d 6d d 6d d 6d	 d 6| } t j |  j | |  d
 } | S(   Ni   i   i   i   s   invalid int widtht   bt   ht   lt   qi    (   i   i   i   i   (   R5   RF   RG   R   R   R[   (   R)   R   R   R   t   itt   iv(    (    s5   lib/python2.7/site-packages/pandas/io/sas/sas7bdat.pyRt     s    
&c         C   s   |  j  d  k rr |  j j |  |  j j |  } t |  | k  rn |  j   d } t | j | |    n  | S| | t |  j   k r |  j   t d   n  |  j  | | | !Sd  S(   Ns2   Unable to read {:d} bytes from file position {:d}.s   The cached page is too small.(	   R   R   R"   RA   RB   RC   R5   RF   R`   (   R)   R   t   lengthR   t   msg(    (    s5   lib/python2.7/site-packages/pandas/io/sas/sas7bdat.pyRG     s    

c         C   s   t  } xx | s |  j j |  j  |  _ t |  j  d k r@ Pn  t |  j  |  j k rq |  j   t d   n  |  j   } q	 Wd  S(   Ni    s2   Failed to read a meta data page from the SAS file.(	   RS   R"   RB   Rz   R   RC   R5   RF   t   _process_page_meta(   R)   t   done(    (    s5   lib/python2.7/site-packages/pandas/io/sas/sas7bdat.pyR(     s    	
c         C   sz   |  j    t j t j g t j } |  j | k r? |  j   n  |  j t j @} |  j t j k } | py | py |  j g  k S(   N(	   t   _read_page_headerRD   t   page_meta_typet   page_amd_typet   page_mix_typest   _current_page_typet   _process_page_metadatat   page_data_typeR   (   R)   t   ptt   is_data_paget   is_mix_page(    (    s5   lib/python2.7/site-packages/pandas/io/sas/sas7bdat.pyR   +  s    
c         C   s|   |  j  } t j | } |  j | t j  |  _ t j | } |  j | t j  |  _ t j	 | } |  j | t j
  |  _ d  S(   N(   RP   RD   t   page_type_offsetRt   t   page_type_lengthR   t   block_count_offsett   block_count_lengtht   _current_page_block_countt   subheader_count_offsett   subheader_count_lengtht   _current_page_subheaders_count(   R)   t
   bit_offsett   tx(    (    s5   lib/python2.7/site-packages/pandas/io/sas/sas7bdat.pyR   5  s    	c         C   s   |  j  } x t |  j  D] } |  j t j | |  } | j d k rM q n  | j t j k re q n  |  j	 | j
  } |  j | | j | j  } |  j | |  q Wd  S(   Ni    (   RP   t   rangeR   t   _process_subheader_pointersRD   t   subheader_pointers_offsetR   R   t   truncated_subheader_idt   _read_subheader_signatureR   t   _get_subheader_indext   ptypet   _process_subheader(   R)   R   t   it   pointert   subheader_signaturet   subheader_index(    (    s5   lib/python2.7/site-packages/pandas/io/sas/sas7bdat.pyR   @  s    		c         C   s   t  j j |  } | d  k r | t  j k p6 | d k } | t  j k } |  j d k rr | rr | rr t  j j } q |  j	   t
 d   n  | S(   Ni    R   s   Unknown subheader signature(   RD   t   subheader_signature_to_indext   getR   t   compressed_subheader_idt   compressed_subheader_typeR   t   SASIndext   data_subheader_indexR5   RF   (   R)   t	   signatureR   R   R   t   f1t   f2(    (    s5   lib/python2.7/site-packages/pandas/io/sas/sas7bdat.pyR   Q  s    
c   
      C   s   |  j  } | | | } |  j | |  j  } | |  j 7} |  j | |  j  } | |  j 7} |  j | d  } | d 7} |  j | d  } t   }	 | |	 _ | |	 _ | |	 _ | |	 _ |	 S(   Ni   (   RR   Rt   RN   R   R   R   R   R   (
   R)   R   t   subheader_pointer_indext   subheader_pointer_lengtht   total_offsett   subheader_offsett   subheader_lengtht   subheader_compressiont   subheader_typeR   (    (    s5   lib/python2.7/site-packages/pandas/io/sas/sas7bdat.pyR   ^  s     	
					c         C   s   |  j  | |  j  } | S(   N(   RG   RN   (   R)   R   R   (    (    s5   lib/python2.7/site-packages/pandas/io/sas/sas7bdat.pyR   w  s    c         C   sE  | j  } | j } | t j j k r0 |  j } n| t j j k rN |  j } n | t j j k rl |  j	 } n | t j j
 k r |  j } n | t j j k r |  j } n | t j j k r |  j } nn | t j j k r |  j } nP | t j j k r|  j } n2 | t j j k r(|  j j |  d  St d   | | |  d  S(   Ns   unknown subheader index(   R   R   RD   R   t   row_size_indext   _process_rowsize_subheadert   column_size_indext   _process_columnsize_subheadert   column_text_indext   _process_columntext_subheadert   column_name_indext   _process_columnname_subheadert   column_attributes_indext#   _process_columnattributes_subheadert   format_and_label_indext   _process_format_subheadert   column_list_indext   _process_columnlist_subheadert   subheader_counts_indext   _process_subheader_countsR   R   t   appendRF   (   R)   R   R   R   R   t	   processor(    (    s5   lib/python2.7/site-packages/pandas/io/sas/sas7bdat.pyR   {  s.    		c         C   s  |  j  } | } | } |  j r5 | d 7} | d 7} n | d 7} | d 7} |  j | t j | |  |  _ |  j | t j | |  |  _ |  j | t j | |  |  _	 |  j | t j
 | |  |  _ t j | } |  j | | |  |  _ |  j | d  |  _ |  j | d  |  _ d  S(   Ni  i  ib  iz  i   (   RN   RM   Rt   RD   t   row_length_offset_multipliert
   row_lengtht   row_count_offset_multipliert	   row_countt   col_count_p1_multipliert   col_count_p1t   col_count_p2_multipliert   col_count_p2t'   row_count_on_mix_page_offset_multipliert   _mix_page_row_countt   _lcst   _lcp(   R)   R   R   t   int_lent
   lcs_offsett
   lcp_offsett   mx(    (    s5   lib/python2.7/site-packages/pandas/io/sas/sas7bdat.pyR     s(    		


c         C   sn   |  j  } | | 7} |  j | |  |  _ |  j |  j |  j k rj d j d |  j d |  j d |  j  GHn  d  S(   Ns?   Warning: column count mismatch ({p1} + {p2} != {column_count})
t   p1t   p2t   column_count(   RN   Rt   R   R   R   R`   (   R)   R   R   R   (    (    s5   lib/python2.7/site-packages/pandas/io/sas/sas7bdat.pyR     s    	
	c         C   s   d  S(   N(    (   R)   R   R   (    (    s5   lib/python2.7/site-packages/pandas/io/sas/sas7bdat.pyR     s    c   
      C   s  | |  j  7} |  j | t j  } |  j | |  } | d | !j d  } | } |  j rw | j |  j pn |  j	  } n  |  j
 j |  t |  j
  d k rd } x& t j D] } | | k r | } q q W| |  _ | |  j  8} | d }	 |  j r|	 d 7}	 n  |  j |	 |  j  } | j d  } | d k rd |  _ | d }	 |  j rZ|	 d 7}	 n  |  j |	 |  j  } | d |  j !|  _ n | t j k r| d	 }	 |  j r|	 d 7}	 n  |  j |	 |  j  } | d |  j !|  _ nc |  j d k rBd |  _ | d }	 |  j r|	 d 7}	 n  |  j |	 |  j  } | d |  j !|  _ n  |  j rt |  d
  r|  j j |  j pr|  j	  |  _ qqn  d  S(   Ni    s     i   R   i   i   s    i    i(   t   creator_proc(   RN   Rt   RD   t   text_block_size_lengthRG   Rf   R   Rg   R   R   R   R   RC   t   compression_literalsR   RM   R   R   R   t   rle_compressiont   hasattr(
   R)   R   R   t   text_block_sizeR   t	   cname_rawt   cnamet   compression_literalt   clt   offset1(    (    s5   lib/python2.7/site-packages/pandas/io/sas/sas7bdat.pyR     sX    		
		
	
		
			c         C   s   |  j  } | | 7} | d | d d } x t |  D] } | t j | d t j } | t j | d t j } | t j | d t j } |  j | t j  }	 |  j | t j	  }
 |  j | t j
  } |  j |	 } |  j j | |
 |
 | ! q6 Wd  S(   Ni   i   i   i   (   RN   R   RD   t   column_name_pointer_lengtht!   column_name_text_subheader_offsett   column_name_offset_offsett   column_name_length_offsetRt   t!   column_name_text_subheader_lengtht   column_name_offset_lengtht   column_name_length_lengthR   R   R   (   R)   R   R   R   t   column_name_pointers_countR   t   text_subheadert   col_name_offsett   col_name_lengtht   idxt
   col_offsett   col_lent   name_str(    (    s5   lib/python2.7/site-packages/pandas/io/sas/sas7bdat.pyR     s    	
c   
      C   s  |  j  } | d | d | d } x t |  D] } | | t j | | d } | d | t j | | d } | d | t j | | d } |  j | |  }	 |  j j |	  |  j | t j	  }	 |  j
 j |	  |  j | t j  }	 |  j j |	 d k rd n d  q0 Wd  S(   Ni   i   i   i   R   R@   (   RN   R   RD   t   column_data_offset_offsett   column_data_length_offsett   column_type_offsetRt   R   R   t   column_data_length_lengthR   t   column_type_lengthR   (
   R)   R   R   R   t   column_attributes_vectors_countR   t   col_data_offsett   col_data_lent	   col_typesR   (    (    s5   lib/python2.7/site-packages/pandas/io/sas/sas7bdat.pyR   	  s    	!c         C   s   d  S(   N(    (   R)   R   R   (    (    s5   lib/python2.7/site-packages/pandas/io/sas/sas7bdat.pyR      s    c         C   s   |  j  } | t j d | } | t j d | } | t j d | } | t j d | } | t j d | } | t j d | }	 |  j | t j	  }
 t
 |
 t |  j  d  } |  j | t j  } |  j | t j  } |  j | t j  } t
 | t |  j  d  } |  j | t j  } |  j |	 t j  } |  j | } | | | | !} |  j | } | | | | !} t |  j  } t   } | | _ |  j | | _ | | _ | | _ |  j | | _ |  j | | _ |  j j |  |  j j |  d  S(   Ni   i   (   RN   RD   t)   column_format_text_subheader_index_offsett   column_format_offset_offsett   column_format_length_offsett(   column_label_text_subheader_index_offsett   column_label_offset_offsett   column_label_length_offsetRt   t)   column_format_text_subheader_index_lengtht   minRC   R   t   column_format_offset_lengtht   column_format_length_lengtht(   column_label_text_subheader_index_lengtht   column_label_offset_lengtht   column_label_length_lengthR   R	   t   col_idR   R9   t   labelR`   R   t   ctypeR   R   R   R   (   R)   R   R   R   t   text_subheader_formatt   col_format_offsett   col_format_lent   text_subheader_labelt   col_label_offsett   col_label_lenR   t
   format_idxt   format_startt
   format_lent	   label_idxt   label_startt	   label_lent   label_namest   column_labelt   format_namest   column_formatt   current_column_numbert   col(    (    s5   lib/python2.7/site-packages/pandas/io/sas/sas7bdat.pyR   $  sR    	




						c         C   sh  | d  k r' |  j d  k	 r' |  j } n | d  k r? |  j } n  t |  j  d k rm |  j   t d   n  |  j |  j k r d  S|  j |  j } | | k r | } n  |  j j d  } |  j j d  } t	 j
 | | f d t	 j |  _ t	 j | d | f d t	 j |  _ d |  _ t |   } | j |  |  j   } |  j d  k	 rd| j |  j  } n  | S(   Ni    s   No columns to parse from fileR   R@   R-   i   (   R   R   R   RC   R   R5   R   R    t   countR.   t   emptyt   objectt   _string_chunkt   zerost   uint8t   _byte_chunkt   _current_row_in_chunk_indexR   RB   t   _chunk_to_dataframeR   t	   set_index(   R)   R   t   mt   ndt   nst   pt   rslt(    (    s5   lib/python2.7/site-packages/pandas/io/sas/sas7bdat.pyRB   ]  s.    
	!%	c         C   s   g  |  _  |  j j |  j  |  _ t |  j  d k r: t St |  j  |  j k r |  j   d } t | j	 t |  j  |  j    n  |  j
   |  j } | t j k r |  j   n  | t j @} t j g t j } | r |  j | k r |  j   St S(   Ni    s@   failed to read complete page from file (read {:d} of {:d} bytes)(   R   R"   RB   Rz   R   RC   RL   R5   RF   R`   R   R   RD   R   R   R   R   t   _read_next_pageRS   (   R)   R   t	   page_typeR   R   (    (    s5   lib/python2.7/site-packages/pandas/io/sas/sas7bdat.pyRP    s$    	

	
c         C   s<  |  j  } |  j } t | | |  } t j d |  } d \ } } xt |  j  D]} |  j | } |  j | d k rR|  j | d  d   f j	 d |  j
 d  | | <t j | | d t j | | <|  j rEd  }	 |  j | t j k r d }	 n |  j | t j k rd }	 n  |	 rEt j | | d |	 d d | | <qEn  | d	 7} qS |  j | d k r|  j | d  d   f | | <|  j r|  j d  k	 r| | j j |  j p|  j  | | <n  |  j r| | j j   d k }
 t j | j |
 | f <n  | d	 7} qS |  j   t  d
 j! d |  j |    qS W| S(   NR   i    R   R-   R@   R?   t   origins
   1960-01-01i   s   unknown column type {type}t   type(   i    i    ("   RH  R    R   Rn   t	   DataFrameR   R   R   RG  t   viewR[   R.   R/   t   float64R   R   R   RD   t   sas_date_formatst   sas_datetime_formatst   to_datetimeRD  R   R   t   strRg   R   R   RC   t   nant   locR5   RF   R`   (   R)   t   nRK  t   ixRO  t   jst   jbt   jR9   R?   t   ii(    (    s5   lib/python2.7/site-packages/pandas/io/sas/sas7bdat.pyRI    sD    		 				
N("   R   R   t   __doc__R   RL   R,   R1   R2   R4   R5   R'   R   Rk   Rt   RG   R(   R   R   R   R   R   R   R   R   R   R   R   R   R   R   R   RB   RP  RI  (    (    (    s5   lib/python2.7/site-packages/pandas/io/sas/sas7bdat.pyR
   (   s@   	#					z								
										3				9"	(   Rc  R    R   t   numpyR.   t   pandas.errorsR   t   pandasRn   R   t   pandas.io.commonR   R   t   pandas.io.sas._sasR   t   pandas.io.sas.sas_constantst   iot   sast   sas_constantsRD   RC  R   R	   R
   (    (    (    s5   lib/python2.7/site-packages/pandas/io/sas/sas7bdat.pyt   <module>   s   