
    .i:              	         d Z ddlmZ ddlmZ ddlZddlmZ ddlZddlZ	ddl
mZ ddlZddlmZ ddlmZ erdd	lmZmZmZmZ d
ZdZdZdZg dZdZdZdZdZde de de de d	Z de de dZ!ddZ"ddZ#d Z$d Z% G d de      Z&y) a-  
Read a SAS XPort format file into a Pandas DataFrame.

Based on code from Jack Cushman (github.com/jcushman/xport).

The file format is defined here:

https://support.sas.com/content/dam/SAS/support/en/technical-papers/record-layout-of-a-sas-version-5-or-6-data-set-in-sas-transport-xport-format.pdf
    )annotations)datetimeN)TYPE_CHECKING)find_stack_level)
get_handle)	SASReader)CompressionOptionsDatetimeNaTTypeFilePath
ReadBufferzPHEADER RECORD*******LIBRARY HEADER RECORD!!!!!!!000000000000000000000000000000  zKHEADER RECORD*******MEMBER  HEADER RECORD!!!!!!!000000000000000001600000000zPHEADER RECORD*******DSCRPTR HEADER RECORD!!!!!!!000000000000000000000000000000  zPHEADER RECORD*******OBS     HEADER RECORD!!!!!!!000000000000000000000000000000  )ntypenhfunfield_lengthnvar0namelabelnformnflnum_decimalsnfjnfillniformniflnifdnpos_zParameters
----------
filepath_or_buffer : str or file-like object
    Path to SAS file or object implementing binary read method.zindex : identifier of index column
    Identifier of column that should be used as index of the DataFrame.
encoding : str
    Encoding for text data.
chunksize : int
    Read file `chunksize` lines at a time, returns iterator.zBformat : str
    File format, only `xport` is currently supported.z\iterator : bool, default False
    Return XportReader object for reading file incrementally.z#Read a SAS file into a DataFrame.


a  

Returns
-------
DataFrame or XportReader

Examples
--------
Read a SAS Xport file:

>>> df = pd.read_sas('filename.XPT')

Read a Xport file in 10,000 line chunks:

>>> itr = pd.read_sas('filename.XPT', chunksize=10000)
>>> for chunk in itr:
>>>     do_something(chunk)

z$Class for reading SAS Xport files.

z

Attributes
----------
member_info : list
    Contains information about the file
fields : list
    Contains information about the variables in the file
c                n    	 t        j                  | d      S # t        $ r t        j                  cY S w xY w)z1Given a date in xport format, return Python date.z%d%b%y:%H:%M:%S)r   strptime
ValueErrorpdNaT)datestrs    U/var/www/app/trading-bot/venv/lib/python3.12/site-packages/pandas/io/sas/sas_xport.py_parse_dater%      s3      *;<< vvs    44c                d    i }d}|D ]#  \  }}| |||z    j                         ||<   ||z  }% |d= |S )a  
    Parameters
    ----------
    s: str
        Fixed-length string to split
    parts: list of (name, length) pairs
        Used to break up string, name '_' will be filtered from output.

    Returns
    -------
    Dict of name:contents of string at given location.
    r   r   )strip)spartsoutstartr   lengths         r$   _split_liner-      sU     CE feefn-335D	 	CJ    c                    |dk7  rit        j                  t        |       t        j                  d            }t        j                  d| dd|z
         }|j	                  |      }| |d<   |S | S )N   S8Sz,Sdtypef0)npzeroslenr4   view)vecnbytesvec1r4   vec2s        r$   _handle_truncated_float_vecr>      si     {xxC"((4.11VHBq6zl34yyuy%T
Jr.   c                   t        j                  d      }| j                  |      }|d   }|d   }|dz  }t        j                  t	        |       t         j
                        }d|t        j                  |dz        <   d|t        j                  |d	z        <   d
|t        j                  |dz        <   ||z  }||z	  |dz  dd
|z
  z   z  z  }|dz  }||dz	  dz  dz
  dz  |z   dz   dz  |dz  z  z  }t        j                  t	        |      fd      }||d<   ||d<   |j                  d      }|j                  d      }|S )zf
    Parse a vector of float values representing IBM 8 byte floats into
    native 8 byte floats.
    z>u4,>u4r3   r5   f1i    i       i  @    i         l          A   i     l        z>f8f8)	r6   r4   r9   r7   r8   uint8whereemptyastype)	r:   r4   r<   xport1xport2ieee1shiftieee2ieees	            r$   _parse_float_vecrU      sd   
 HHYE88%8 D$ZF$ZF ZE HHSXRXX.E+,E"((6J&
'(+,E"((6J&
'(+,E"((6J&
'( 
eOEu_&:"52U;K!LME 
ZE 
6R<4'2-!3u<tCJ E 88SZM3DDJDJ9959!D;;tDKr.   c                  t    e Zd ZeZ	 	 	 	 d	 	 	 	 	 	 	 	 	 ddZddZd ZddZddZ	ddZ
dddZd	 Zddd
Zy)XportReaderNc                    || _         d| _        || _        || _        t	        |d|d|      | _        | j
                  j                  | _        	 | j                          y # t        $ r | j                           w xY w)Nr   rbF)encodingis_textcompression)	_encoding_lines_read_index
_chunksizer   handleshandlefilepath_or_buffer_read_header	Exceptionclose)selfrc   indexrZ   	chunksizer\   s         r$   __init__zXportReader.__init__   s|     "#!#
 #',,"5"5	 	JJL	s   A A:c                8    | j                   j                          y N)ra   rf   rg   s    r$   rf   zXportReader.close  s    r.   c                T    | j                   j                  d      j                         S )NP   )rc   readdecoderm   s    r$   _get_rowzXportReader._get_row  s"    &&++B/6688r.   c           
        | j                   j                  d       | j                         }|t        k7  rd|v rt	        d      t	        d      | j                         }ddgddgd	dgd
dgddgg}t        ||      }|d   dk7  rt	        d      t        |d         |d<   || _        | j                         }t        |d d       |d<   | j                         }| j                         }|j                  t              }|t        k(  }	|r|	st	        d      t        |dd       }
ddgddgddgddgd	dgd
dgddgg}t        | j                         |      }ddgd
dgddgddgg}|j                  t        | j                         |             t        |d         |d<   t        |d         |d<   || _        ddd}t        | j                         dd       }|
|z  }|dz  r|d|dz  z
  z  }| j                   j                  |      }g }d}t        |      |
k\  r|d |
 ||
d  }}|j!                  d      }t#        j$                  d|      }t'        t)        t*        |d !            }|d
= ||d"      |d"<   |d#   }|d"   dk(  r|d$k  s|dkD  rd%| d&}t-        |      |j/                         D ]  \  }}	 |j1                         ||<    ||d#   z  }||gz  }t        |      |
k\  r| j                         }|t4        k(  st	        d'      || _        || _        | j                   j;                         | _        | j?                         | _         | j6                  D cg c]  }|d(   jC                          c}| _"        tG        | j6                        D cg c]$  \  }}d)tI        |      z   d*tI        |d#         z   f& }}}tK        jL                  |      }|| _'        y # t2        $ r Y <w xY wc c}w c c}}w )+Nr   z**COMPRESSED**z<Header record indicates a CPORT file, which is not readable.z#Header record is not an XPORT file.prefixrF   versionr0   OSr   created   zSAS     SAS     SASLIBz!Header record has invalid prefix.modifiedzMember header not foundset_namesasdatar   (   typenumericchar)rA   rB   6   :   ro      z>hhhh8s40s8shhh2s8shhl52sT)strictr   r   rB   zFloating field width z is not between 2 and 8.zObservation header not found.r   r(   r2   )(rc   seekrr   _correct_line1r    r-   r%   	file_info
startswith_correct_header1_correct_header2intupdatemember_inforp   r8   ljuststructunpackdictzip
_fieldkeys	TypeErroritemsr'   AttributeError_correct_obs_headerfieldsrecord_lengthtellrecord_start_record_countnobsrq   columns	enumeratestrr6   r4   _dtype)rg   line1line2fifr   line3header1header2	headflag1	headflag2fieldnamelengthmemr   types
fieldcount
datalength	fielddatar   
obs_length
fieldbytesfieldstructfieldflmsgkvheaderxidtypelr4   s                                  r$   rd   zXportReader._read_header  s]   $$Q' N"5( !R  BCC"~	1~ay3)iQS_Us+	X"::@AA*9Y+?@	)" +E#2J 7	* --/--/&&'78	//	i677gbn- qMONN1I"IO
 "$--/37B#rWbMFA;G;t}}<="-k*.E"FJ!,[-C!DI& &)B/0
$z1
?"zB..J++00<	
)n/ *?+/*+ "J $))#.J --(CZPKZTBCEc
"5>2E'N~&BW~*aR!V-bT1IJn$ 1 wwyE!H %//JugF7 )n/: ,,<==' 3388:&&(	48KK@q&	((*@
 &dkk2
5 3q6\3U>%:!;;<
 
  / &  A
s   0OO))O.	O&%O&c                B    | j                  | j                  xs d      S )NrA   nrows)rp   r`   rm   s    r$   __next__zXportReader.__next__  s    yyt3!y44r.   c                   | j                   j                  dd       | j                   j                         | j                  z
  }|dz  dk7  rt	        j
                  dt                      | j                  dkD  r4| j                   j                  | j                         || j                  z  S | j                   j                  dd       | j                   j                  d      }t        j                  |t        j                        }t        j                  |dk(        }t        |      dk(  rd}nd	t        |      z  }| j                   j                  | j                         ||z
  | j                  z  S )
z
        Get number of records in file.

        This is maybe suboptimal because we have to seek to the end of
        the file.

        Side effect: returns file position to record_start.
        r   rB   ro   zxport file may be corrupted.)
stacklevelir3   l     @@  r0   )rc   r   r   r   warningswarnr   r   rp   r6   
frombufferuint64flatnonzeror8   )rg   total_records_lengthlast_card_bytes	last_cardixtail_pads         r$   r   zXportReader._record_count  s:    	$$Q*#66;;=@Q@QQ"$)MM.+-
 "##(():):;'4+=+===$$S!,1166r:MM/C	 ^^I)<<=r7a<H3r7{H$$T%6%67$x/D4F4FFFr.   c                B    || j                   }| j                  |      S )a  
        Reads lines from Xport file and returns as dataframe

        Parameters
        ----------
        size : int, defaults to None
            Number of lines to read.  If None, reads whole file.

        Returns
        -------
        DataFrame
        r   )r`   rp   )rg   sizes     r$   	get_chunkzXportReader.get_chunk  s#     <??Dyyty$$r.   c                    |j                  d      }|d   dk(  |d   dk(  z  |d   dk(  z  }|d   dk\  |d   d	k  z  |d   d
k(  z  |d   dk(  z  }||z  }|S )Nzu1,u1,u2,u4r3   r@   r   f2f3r5   rH   Z   _   .   )r9   )rg   r:   r   missmiss1s        r$   _missing_doublezXportReader._missing_double  s    HH=H)$14A.!D'Q,?go!D'T/2w$ w$  	
 	r.   c                |   || j                   }t        || j                   | j                  z
        }|| j                  z  }|dk  r| j	                          t
        | j                  j                  |      }t        j                  || j                  |      }i }t        | j                        D ]  \  }}|dt        |      z      }	| j                  |   d   }
|
dk(  rLt        |	| j                  |   d         }	| j!                  |	      }t#        |	      }t        j$                  ||<   nf| j                  |   d   dk(  rQ|	D cg c]  }|j'                          }}| j(                  (|D cg c]  }|j+                  | j(                         }}|j-                  |i        t/        j0                  |      }| j2                  <t/        j4                  t7        | j                  | j                  |z               |_        n|j;                  | j2                        }| xj                  |z  c_        |S c c}w c c}w )a  Read observations from SAS Xport file, returning as data frame.

        Parameters
        ----------
        nrows : int
            Number of rows to read from data file; if None, read whole
            file.

        Returns
        -------
        A DataFrame.
        r   )r4   countr(   r   r   r   r   )r   minr^   r   rf   StopIterationrc   rp   r6   r   r   r   r   r   r   r>   r   rU   nanrstripr]   rq   r   r!   	DataFramer_   Indexrangerh   	set_index)rg   r   
read_linesread_lenrawdatadf_datajr   r:   r   r   r   ydfs                  r$   rp   zXportReader.read  s    =IIE		D,<,< <=
 2 22q=JJL%%**84}}S:Fdll+ 	#DAqsSV|$CKKN7+E	!1#t{{1~n7UV++C0$S)&&$Q(F2),-AQXXZ-->>-;<=a$..1=A=NNAq6"	# \\'";;xxd&6&68H8H:8U VWBHdkk*BJ&	 . >s   H4,"H9)Nz
ISO-8859-1Ninfer)
rc   zFilePath | ReadBuffer[bytes]rZ   z
str | Noneri   
int | Noner\   r	   returnNone)r   r   )r   pd.DataFrame)r   r   rl   )r   r   r   r   )r   r   r   r   )__name__
__module____qualname___xport_reader_doc__doc__rj   rf   rr   rd   r   r   r   r   rp    r.   r$   rW   rW      sv    G
 + $*18 	
  ( 
89l\5$GL%"	1r.   rW   )r#   r   r   r
   )r(   r   )'r   
__future__r   r   r   typingr   r   numpyr6   pandas.util._exceptionsr   pandasr!   pandas.io.commonr   pandas.io.sas.sasreaderr   pandas._typingr	   r
   r   r   r   r   r   r   r   _base_params_doc_params2_doc_format_params_doc_iterator_doc_read_sas_docr   r%   r-   r>   rU   rW   r   r.   r$   <module>r     s   #       4  ' -  W  R  W  W 
(C @9 A
      2    	 ,&6rI) Ir.   