o
    vhH+                     @  s   d dl mZ d dlmZ d dlZd dlmZ d dlmZ d dl	m
Z
mZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ erPd dlmZ d dlmZ G dd deZdS )    )annotations)TYPE_CHECKINGN)lib)import_optional_dependency)ParserErrorParserWarning)find_stack_level)pandas_dtype)
is_integer)arrow_table_to_pandas)
ParserBase)
ReadBuffer)	DataFramec                      sT   e Zd ZdZd fddZddd	Zdd
dZdddZdddZdddZ	  Z
S )ArrowParserWrapperz7
    Wrapper for the pyarrow engine for read_csv()
    srcReadBuffer[bytes]returnNonec                   s$   t  | || _|| _|   d S N)super__init__kwdsr   _parse_kwds)selfr   r   	__class__ d/var/www/html/hyperkenya/venv/lib/python3.10/site-packages/pandas/io/parsers/arrow_parser_wrapper.pyr      s   zArrowParserWrapper.__init__c                 C  sN   | j d}|du rdn|| _| j d }t|trtdt| j d | _dS )z?
        Validates keywords before passing to pyarrow.
        encodingNzutf-8	na_valuesz?The pyarrow engine doesn't support passing a dict for na_values)r   getr   
isinstancedict
ValueErrorlistr   )r   r   r   r   r   r   r   &   s   

zArrowParserWrapper._parse_kwdsc                 C  s  ddddddd}|  D ]\}}|| jv r'| j|dur'| j|| j|< q| j}t|tr4|g}nd}|| jd	< d
d | j  D | _| jd}|durt|rZ|| jd< n*|t	j
jkrfd| jd< n|t	j
jkrwddd}|| jd< n|t	j
jkrdd | jd< dd | j  D | _d| jd v | jd< | jdu rd| jv rdd | jd D | jd< | jdu | jdur| jn| jd | jd| _dS )z:
        Rename some arguments to pass to pyarrow
        include_columnsnull_valuesescape_charignore_empty_linesdecimal_point
quote_char)usecolsr   
escapecharskip_blank_linesdecimal	quotecharNtimestamp_parsersc                 S  &   i | ]\}}|d ur|dv r||qS )N)	delimiterr*   r'   r(   r   .0option_nameoption_valuer   r   r   
<dictcomp>S       z;ArrowParserWrapper._get_pyarrow_options.<locals>.<dictcomp>on_bad_linesinvalid_row_handlerr   strc                 S  s.   t jd| j d| j d| j tt d dS )Nz	Expected z columns, but found z: )
stacklevelskip)warningswarnexpected_columnsactual_columnstextr   r   )invalid_rowr   r   r   handle_warninge   s   
z?ArrowParserWrapper._get_pyarrow_options.<locals>.handle_warningc                 S  s   dS )Nr=   r   )_r   r   r   <lambda>p   s    z9ArrowParserWrapper._get_pyarrow_options.<locals>.<lambda>c                 S  r1   )N)r%   r&   true_valuesfalse_valuesr)   r0   r   r3   r   r   r   r7   r   r8    strings_can_be_nullc                 S  s   g | ]}d | qS )fr   )r4   nr   r   r   
<listcomp>   s    
z;ArrowParserWrapper._get_pyarrow_options.<locals>.<listcomp>skiprows)autogenerate_column_names	skip_rowsr   )r   r;   )itemsr   r    popdate_formatr!   r;   parse_optionscallabler   BadLineHandleMethodERRORWARNSKIPconvert_optionsheaderr   read_options)r   mappingpandas_namepyarrow_namerS   r9   rD   r   r   r   _get_pyarrow_options4   s^   


	
z'ArrowParserWrapper._get_pyarrow_optionsframer   c              
     s  t  j}d}| jdu r<| jdu r| jdu rt|| _t | j|kr8dd t|t | j D }|| j | _d}| j _|  j \} | jdur| j }t| jD ]S\}}t	|rd j| ||< n| jvrqt
d| d| jdur| j|dur|| j|fn j| | j j| f\}	}
|
dur |	 |
 |	< | j|	= qT j|ddd | jdu r|sdgt  jj  j_| jdurt| jtrۇ fd	d
| j D | _nt| j| _z	 | j W  S  ty } zt
|d}~ww  S )z
        Processes data read in based on kwargs.

        Parameters
        ----------
        frame: DataFrame
            The DataFrame to process.

        Returns
        -------
        DataFrame
            The processed DataFrame.
        TNc                 S  s   g | ]}t |qS r   )r;   r4   xr   r   r   rM      s    z>ArrowParserWrapper._finalize_pandas_output.<locals>.<listcomp>FzIndex z invalid)dropinplacec                   s$   i | ]\}}| j v r|t|qS r   )columnsr	   )r4   kvra   r   r   r7      s
    
z>ArrowParserWrapper._finalize_pandas_output.<locals>.<dictcomp>)lenrf   r[   namesrange_do_date_conversions	index_colcopy	enumerater
   r#   dtyper    astype	set_indexindexr!   r"   rQ   r	   	TypeError)r   ra   num_colsmulti_index_namedcolumns_prefixrE   index_to_setiitemkey	new_dtypeer   ri   r   _finalize_pandas_output   sZ   











z*ArrowParserWrapper._finalize_pandas_outputc                 C  s8   t |rtdd |D stdt|rtdd S )Nc                 s      | ]}t |tV  qd S r   r!   r;   rb   r   r   r   	<genexpr>   s    z7ArrowParserWrapper._validate_usecols.<locals>.<genexpr>zwThe pyarrow engine does not allow 'usecols' to be integer column positions. Pass a list of string column names instead.z=The pyarrow engine does not allow 'usecols' to be a callable.)r   is_list_likeallr#   rU   )r   r+   r   r   r   _validate_usecols   s   z$ArrowParserWrapper._validate_usecolsc              
   C  s  t d}t d}|   z|jdi | j}W n2 tyI   | jdd}|dur.| | | jdt }t	|rDt
dd |D sHtd w z|j| j|jdi | j|jdi | j|d	}W n |jyv } zt||d}~ww | jd
 }|tju r|j}	| }
t|jjD ]\}}|j|r|	||	||
}	q||	}t  tddt  t!||dd}W d   n1 sw   Y  | "|S )z
        Reads the contents of a CSV file into a DataFrame and
        processes it according to the kwargs passed in the
        constructor.

        Returns
        -------
        DataFrame
            The DataFrame created from the CSV file.
        pyarrowzpyarrow.csvr%   Nr&   c                 s  r   r   r   rb   r   r   r   r      s    

z*ArrowParserWrapper.read.<locals>.<genexpr>z9The 'pyarrow' engine requires all na_values to be strings)r\   rT   rZ   dtype_backendignorezmake_block is deprecatedT)r   null_to_int64r   )#r   r`   ConvertOptionsrZ   ru   r    r   setr   r   r   read_csvr   ReadOptionsr\   ParseOptionsrT   ArrowInvalidr   r   
no_defaultschemafloat64rp   typesis_nullfield	with_typecastr>   catch_warningsfilterwarningsDeprecationWarningr   r   )r   papyarrow_csvrZ   includenullstabler~   r   
new_schemanew_typerz   
arrow_typera   r   r   r   read   sf   








zArrowParserWrapper.read)r   r   r   r   )r   r   )ra   r   r   r   )r   r   )__name__
__module____qualname____doc__r   r   r`   r   r   r   __classcell__r   r   r   r   r      s    


[
Jr   )
__future__r   typingr   r>   pandas._libsr   pandas.compat._optionalr   pandas.errorsr   r   pandas.util._exceptionsr   pandas.core.dtypes.commonr	   pandas.core.dtypes.inferencer
   pandas.io._utilr   pandas.io.parsers.base_parserr   pandas._typingr   pandasr   r   r   r   r   r   <module>   s    