
    .i(                        d Z ddlmZmZ ddlZddlZddlZddlZddl	m
Z
mZ ddlmZ ej                  j!                  d      Zej                  j%                  d      Zd Zed        Zeej                  j-                  d	d
dg      ej                  j-                  dg d      d                      Zd Zd Zej                  j-                  ddi dgfdddidgfdddgiddgfddgdddgfddgddej4                  dgfg      d         Zd! Zej                  j-                  d"g d#      d$        Zej                  j-                  d%ddg      d&        Zd' Zej                  j-                  dg d(      d)        Z ej                  j-                  dg d*      d+        Z!d, Z"d- Z#ej                  j%                  d.      ej                  j-                  d/d0d1g      d2               Z$y)3zZ
Tests encoding functionality during parsing
for all of the parsers defined in parsers.py
    )BytesIOTextIOWrapperN)	DataFrameread_csvz=ignore:Passing a BlockManager to DataFrame:DeprecationWarningpyarrow_skipc                     d}| }t        dj                  |            }|j                  |d|      }t        ddggddg	      }t	        j
                  ||       y )
Ncp1255u   שלום:1234
562:123:)sepencodingi2  {   u   שלום1234columnsr   encoder   r   tmassert_frame_equal)all_parsersr   parserdataresultexpecteds         b/var/www/app/trading-bot/venv/lib/python3.12/site-packages/pandas/tests/io/parser/test_encoding.pytest_bytes_io_inputr      s]    HF+228<=D__TsX_>F3*
F/CDH&(+    c                     | }t        dj                               }|j                  |ddd       }t        ddgg      }t	        j
                  ||       y )Nu   Łaski, Jan;1;utf-8)r   r   headeru   Łaski, Jan   r   )r   r   r   r   r   s        r   test_read_csv_unicoder"   (   sT    F&--/0D__TsWT_JF,a012H&(+r   r   ,	r   )utf-16zutf-16lezutf-16bec                    | }dj                  d|      }|dd}d}|j                  |      }t        |d      5 }	|	j                  |       d d d        t	        t        |j                  |            |      5 }
 |j                  |fd|i|} |j                  |
fd|i|}d d d        t        j                         y # 1 sw Y   xxY w# 1 sw Y   ,xY w)	Nz)skip this
skip this too
A,B,C
1,2,3
4,5,6r#      )r   skiprowsr   wbr   r   )	replacer   openwriter   r   r   r   r   )r   r   r   	temp_filer   r   kwargsutf8
bytes_datafbytes_bufferr   r   s                r   test_utf16_bom_skiprowsr4   2   s    
 F	 
c		 	
 a(FDX&J	i	 !	
 
wt{{401D	A J\ HXHH"6??<I$I&IJ &(+ J Js   C8+CCCc                     t         j                  j                  |d      }| }|j                  |dd      }t	        |      dk(  sJ y )Nzutf16_ex.txtr%   r$   )r   r   2   )ospathjoinr   len)r   csv_dir_pathr8   r   r   s        r   test_utf16_exampler<   K   sA    77<<n5DF__TH$_?Fv;"r   c                     t         j                  j                  |d      }| }|j                  |d d      }|j	                  d      }|d   d   }d}||k(  sJ y )Nunicode_series.csvlatin-1)r    r   r   r!   i`  u$   Á köldum klaka (Cold Fever) (1994))r7   r8   r9   r   	set_index)r   r;   r8   r   r   gotr   s          r   test_unicode_encodingrB   R   s^    77<<&:;DF__T$_CFa F
)D/C9H(??r   zdata,kwargs,expectedza
1r!   z"a"
1	quotechar"zb
1namesab1
1T)rE   skip_blank_linesFc                    | }ddfd}|j                   dk(  r-|dk(  r(|j                  dd      rt        j                  d	        |j                   ||      fd
i|}t        d|i      }t        j                  ||       y )Nu   ﻿r   c                 B    | z   j                        }t        |      S )N)r   r   )_databom_databomr0   s     r   _encode_data_with_bomz,test_utf8_bom.<locals>._encode_data_with_bomw   s"    %K''-x  r   pyarrowrI   rJ   Tz,https://github.com/apache/arrow/issues/38676)reasonr   rF   )enginegetpytestskipr   r   r   r   )	r   r   r/   r   r   rP   r   rO   r0   s	          @@r   test_utf8_bomrW   ^   s    * F
CD!
 	"EMJJ)40 	IJV__248R4R6RF#x)H&(+r   c                     t        dgdgd      }| }|j                  |      }dj                  |      }|j                  t	        |      |      }t        j                  ||       y )Ng333333@test)mb_num	multibytezmb_num,multibyte
4.8,testr*   )r   formatr   r   r   r   r   )r   	utf_valueencoding_fmtr   r   r   r   r   s           r   test_read_csv_utf_aliasesr_      s`    SEABHF""9-H'..x8D__WT]X_>F&(+r   zfile_path,encoding)))ior   csvz	test1.csvr   ))r`   r   r   r>   r?   ))r`   r   r   zsauron.SHIFT_JIS.csvshiftjisc                 R   | } || }|j                  ||      }t        ||      5 }|j                  |      }|j                  rJ 	 d d d        t        j                  |       t        |d      5 }	|j                  |	|      }|	j                  rJ 	 d d d        t        j                  ||       t        |dd      5 }	|j                  |	|      }|	j                  rJ 	 d d d        t        j                  ||       y # 1 sw Y   xY w# 1 sw Y   {xY w# 1 sw Y   8xY w)Nr*   rb)moder   )re   	buffering)r   r,   closedr   r   )
r   	file_pathr   datapathr   fpathr   far   fbs
             r   test_binary_mode_file_buffersrm      s    Fi Eux8H	eh	' 2$99}9 (F+	e$	 2h799}9 (F+	e$!	, h799}9 (F+ 
 
 s#    D4"D"DDDD&pass_encodingc                    | }|j                  |      }|j                  dk(  r|du r|dv rt        j                  d       t	        ddgi      }|j                  d|      5 }|j                  d	       |j                  d
       |j                  ||r|nd       }	t        j                  |	|       d d d        y # 1 sw Y   y xY w)NrQ   T)       zThese cases freezefoobarzw+re   r   zfoo
barr   r*   )r\   rS   rU   rV   r   r,   r-   seekr   r   r   )
r   r]   r^   rn   r.   r   r   r   r2   r   s
             r   test_encoding_temp_filerv      s    
 F""9-H}}	!mt&;	X@U()%%)*H	TH	5 0	
	q	XDQ
fh/0 0 0s   !AB::Cc                 ^   | }d}d}d}t        ||gi      }t        j                         5 }|j                  | d| j	                  |             |j                  d       |j                  ||      }t        j                  ||       |j                  rJ 	 d d d        y # 1 sw Y   y xY w)Nz	shift-jisu	   てすとu   こむ
r   r*   )
r   tempfileNamedTemporaryFiler-   r   ru   r   r   r   rg   )r   r   r   titler   r   r2   r   s           r   test_encoding_named_temp_filer|      s    FHED%$)H		$	$	& !	5'D6"))(34	q	X6
fh/88|8  s   A.B##B,)r   r%   z	utf-16-bez	utf-16-lezutf-32c                     d}t        |j                  |             }t        |d|       }t        ddgddgdd	ggd
dg      }t	        j
                  ||       y )Nu   a	b
：foo	0
bar	1
baz	2r$   )	delimiterr   u   ：foor   rs   r!   bazr'   rF   rG   )r   r   r   )r   r   encoded_datar   r   s        r   %test_parse_encoded_special_charactersr      sc     -D4;;x01LldXFFmeQZ%4c
H &(+r   )r   Nr%   r	   r?   c                 Z   | }t        g dg dg dd      }|j                  |d|       |j                  dk(  r;d}t        j                  t
        |	      5  |j                  ||d
       d d d        y |j                  ||d
      }t        j                  ||       y # 1 sw Y   y xY w)N)Raphael	DonatellozMiguel AngelLeonardo)redpurpleorangeblue)saizbo staffnunchunkkatana)namemaskweaponF)indexr   rQ   BThe 'memory_map' option is not supported with the 'pyarrow' enginematchT)r   
memory_map)	r   to_csvrS   rU   raises
ValueErrorr   r   r   )r   r   r.   r   r   msgdfs          r   test_encoding_memory_mapr      s     FH7?	
H OOIUXO>}}	!R]]:S1 	KOOITOJ	K	X$	GB"h'	Ks   B!!B*c                 p   | }t        dgdz        }d|j                  d<   |j                  |ddd       |j                  d	k(  r;d
}t	        j
                  t        |      5  |j                  |dd       ddd       y|j                  |dd      }t        j                  ||       y# 1 sw Y   yxY w)zO
    Chunk splits a multibyte character with memory_map=True

    GH 43540
    aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaai   )r   u   aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaąi  Fr   r   r    r   rQ   r   r   NT)r    r   )
r   ilocr   rS   rU   r   r   r   r   r   )r   r.   r   r   r   dfrs         r    test_chunk_splits_multibyte_charr     s     F	d*	+B %BGGDMIIiuUWIE}}	!R]]:S1 	EOOIdtOD	E
//)DT/
BC#r"	Es   #B,,B5c           	         g }d}d}d}t        t        |      t        |      |      D ]]  }dj                  t        ||dz         D cg c]  }t        |       c}      dz   }	 |j	                  d       |j                  |       _ | }	t        |      }
|
j                  |ddd       |	j                  d	k(  r<d
}t        j                  t        |      5  |	j                  |ddd       ddd       y|	j                  |ddd      }t        j                  |
|       yc c}w # t
        $ r Y 
w xY w# 1 sw Y   yxY w)zg
    GH 43787

    Test correct handling of UTF-8 chars when memory_map=True and encoding is UTF-8
        u   𐂀 rx   r   Fr   rQ   r   r   NT)r    r   r   )rangeordr9   chrr   UnicodeEncodeErrorappendr   r   rS   rU   r   r   r   r   r   )r   r.   linesline_length
start_charend_charlnumcliner   r   r   r   s                r   test_readcsv_memmap_utf8r     s7    EKJH c*os8}kB wwdD4K(@A1AABTI	KK  	T F	5	BIIiuUWIE}}	!R]]:S1 	WOOIdtgOV	W
//)DTG/
TC"c"# B " 			Ws$   D!
$D&D6&	D32D36D?pyarrow_xfailre   zw+bzw+tc                    | }d}d|v rd}t        j                  |d      5 }|j                  |       |j                  d       |j	                  |      }d d d        t        g dg      }t        j                  |       y # 1 sw Y   .xY w)Ns   abcdtabcdr   rt   r   r   )ry   SpooledTemporaryFiler-   ru   r   r   r   r   )r   re   r   contenthandler   r   s          r   test_not_readabler   @  s     FG
d{		&	&D7	C %vWA__V$% fX.H"h'% %s   4BB)%__doc__r`   r   r   r7   ry   numpynprU   pandasr   r   pandas._testing_testingr   markfilterwarnings
pytestmarkusefixturesskip_pyarrowr   r"   parametrizer4   r<   rB   nanrW   r_   rm   rv   r|   r   r   r   r   r    r   r   <module>r      sM  
 
    [[''C
 {{&&~6, , , d,%GH, I - ,,	  
aS	K%s+	'C5!C:.	3%T:QC@ e7VVQK	
&,'&,.	, ,,0 4-80 90*( G,, %ST( U(,#2#B )%0( 1 *(r   