a
    Xh/                     @  sL  d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d	d
lmZmZmZmZmZmZmZ e
eddddddZe
eddddddZe
eddddddZe
eddddddZe
eddddddZe
eddddddZe
eddddddZe
eddddddZ e
eddddd d!Z!e
eddddd"d#Z"e
eddddd$d%Z#e
eddddd&d'Z$e
eddddd(d)Z%e
eddddd*d+Z&e
eddddd,d-Z'e
eddddd.d/Z(e
eddddd0d1Z)e
e*edddd2d3d4Z+e
eddddd5d6Z,d]d8d9dd:d;d<Z-e
d=dddd>d?d@Z.d8dAdBdCdDZ/dddEdFdGZ0d^ddddIdJdKZ1dddLdMdNdOZ2ddddMdPdQZ3dRej4dSfdd9ddTdUdVdWZ5d_d8ddXd9ddd8dddYdZ
d[d\Z6dS )`    )annotationsN)IncrementalDecoder)aliases)	lru_cache)findall)	Generator)MultibyteIncrementalDecoder   )ENCODING_MARKSIANA_SUPPORTED_SIMILARRE_POSSIBLE_ENCODING_INDICATIONUNICODE_RANGES_COMBINEDUNICODE_SECONDARY_RANGE_KEYWORDUTF8_MAXIMAL_ALLOCATIONCOMMON_CJK_CHARACTERS)maxsizestrbool)	characterreturnc                 C  sd   zt | }W n ty"   Y dS 0 d|v pbd|v pbd|v pbd|v pbd|v pbd|v pbd|v pbd	|v S )
NFz
WITH GRAVEz
WITH ACUTEzWITH CEDILLAzWITH DIAERESISzWITH CIRCUMFLEXz
WITH TILDEzWITH MACRONzWITH RING ABOVEunicodedataname
ValueErrorr   description r   [/var/www/viveiro_nova_floresta/venv/lib/python3.9/site-packages/charset_normalizer/utils.pyis_accentuated   s&    r   c                 C  s.   t | }|s| S |d}tt|d dS )N r      )r   decompositionsplitchrint)r   Z
decomposedcodesr   r   r   remove_accent-   s
    

r&   z
str | Nonec                 C  s.   t | }t D ]\}}||v r|  S qdS )zK
    Retrieve the Unicode range official name from a single character.
    N)ordr   items)r   Zcharacter_ord
range_nameZ	ord_ranger   r   r   unicode_range8   s
    
r*   c                 C  s,   zt | }W n ty"   Y dS 0 d|v S )NFZLATINr   r   r   r   r   is_latinF   s
    r+   c                 C  s2   t | }d|v rdS t| }|d u r*dS d|v S )NPTFPunctuationr   categoryr*   r   character_categorycharacter_ranger   r   r   is_punctuationO   s    
r3   c                 C  sB   t | }d|v sd|v rdS t| }|d u r2dS d|v o@|dkS )NSNTFZFormsZLor.   r0   r   r   r   	is_symbol^   s    
r6   c                 C  s$   t | }|d u rdS d|v p"d|v S )NFZ	EmoticonsZPictographs)r*   )r   r2   r   r   r   is_emoticonm   s    r7   c                 C  s.   |   s| dv rdS t| }d|v p,|dv S )N>   u   ｜+<>TZ>   ZPdZPoZPc)isspacer   r/   )r   r1   r   r   r   is_separatorw   s    
r=   c                 C  s   |   |  kS N)islowerisupperr   r   r   r   is_case_variable   s    rB   c                 C  s,   zt | }W n ty"   Y dS 0 d|v S )NFZCJKr   r   Zcharacter_namer   r   r   is_cjk   s
    rD   c                 C  s,   zt | }W n ty"   Y dS 0 d|v S )NFZHIRAGANAr   rC   r   r   r   is_hiragana   s
    rE   c                 C  s,   zt | }W n ty"   Y dS 0 d|v S )NFZKATAKANAr   rC   r   r   r   is_katakana   s
    rF   c                 C  s,   zt | }W n ty"   Y dS 0 d|v S )NFZHANGULr   rC   r   r   r   	is_hangul   s
    rG   c                 C  s,   zt | }W n ty"   Y dS 0 d|v S )NFZTHAIr   rC   r   r   r   is_thai   s
    rH   c                 C  s,   zt | }W n ty"   Y dS 0 d|v S )NFARABICr   rC   r   r   r   	is_arabic   s
    rJ   c                 C  s4   zt | }W n ty"   Y dS 0 d|v o2d|v S )NFrI   zISOLATED FORMr   rC   r   r   r   is_arabic_isolated_form   s
    rK   c                 C  s   | t vS r>   )r   rA   r   r   r   is_cjk_uncommon   s    rL   )r)   r   c                   s   t  fddtD S )Nc                 3  s   | ]}| v V  qd S r>   r   ).0keywordr)   r   r   	<genexpr>       z-is_unicode_range_secondary.<locals>.<genexpr>)anyr   rO   r   rO   r   is_unicode_range_secondary   s    rS   c                 C  s(   |   du o&|  du o&| dko&| dkS )NFu   ﻿)r<   isprintablerA   r   r   r   is_unprintable   s    
rV       bytesr$   )sequencesearch_zoner   c                 C  s   t | tstt| }tt| dt|| jddd}t|dkrHdS |D ]N}| 	dd}t
 D ]0\}}||kr|    S ||krh|    S qhqLdS )zW
    Extract using ASCII-only decoder any specified encoding in the first n-bytes.
    Nasciiignoreerrorsr   -_)
isinstancerX   	TypeErrorlenr   r   mindecodelowerreplacer   r(   )rY   rZ   Zseq_lenresultsZspecified_encodingencoding_aliasencoding_ianar   r   r   any_specified_encoding   s"    
rk      )r   r   c                 C  s    | dv pt td|  jtS )zQ
    Verify is a specific encoding is a multi byte one based on it IANA name
    >	   utf_8Z	utf_8_sigutf_32utf_7	utf_32_le	utf_16_le	utf_16_be	utf_32_beutf_16
encodings.)
issubclass	importlibimport_moduler   r   )r   r   r   r   is_multi_byte_encoding  s    
ry   ztuple[str | None, bytes])rY   r   c                 C  sJ   t D ]@}t | }t|tr |g}|D ]}| |r$||f    S q$qdS )z9
    Identify and extract SIG/BOM in given sequence.
    )NrQ   )r
   ra   rX   
startswith)rY   iana_encodingZmarksmarkr   r   r   identify_sig_or_bom  s    

r}   )r{   r   c                 C  s   | dvS )N>   rn   rt   r   )r{   r   r   r   should_strip_sig_or_bom(  s    r~   T)cp_namestrictr   c                 C  sN   |   dd} t D ]\}}| ||fv r|  S q|rJtd|  d| S )zIReturns the Python normalized encoding name (Not the IANA official name).r_   r`   zUnable to retrieve IANA for '')rf   rg   r   r(   r   )r   r   ri   rj   r   r   r   	iana_name,  s    
r   float)iana_name_aiana_name_br   c           	      C  s   t | st |rdS td|  j}td| j}|dd}|dd}d}tdD ]*}t|g}||||krX|d7 }qX|d S )	Ng        ru   r\   r]   r      r	      )ry   rw   rx   r   rangerX   re   )	r   r   Z	decoder_aZ	decoder_bZid_aZid_bZcharacter_match_countiZto_be_decodedr   r   r   cp_similarity=  s    



r   c                 C  s   | t v o|t |  v S )z
    Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using
    the function cp_similarity.
    )r   )r   r   r   r   r   is_cp_similarQ  s    
r   Zcharset_normalizerz)%(asctime)s | %(levelname)s | %(message)sNone)r   levelformat_stringr   c                 C  s:   t | }|| t  }|t | || d S r>   )logging	getLoggersetLevelStreamHandlersetFormatter	Formatter
addHandler)r   r   r   loggerhandlerr   r   r   set_logging_handler\  s
    

r   r   zGenerator[str, None, None])
	sequencesrj   offsets
chunk_sizebom_or_sig_availablestrip_sig_or_bomsig_payloadis_multi_byte_decoderdecoded_payloadr   c	                 c  s*  |r6|du r6|D ]"}	||	|	|  }
|
s, q4|
V  qn|D ]}	|	| }|t | d krXq:| |	|	|  }|r||du r||| }|j||rdndd}
|r|	dkrt|d}|r|
d | |vrt|	|	d d	D ]H}| || }|r|du r|| }|j|dd}
|
d | |v r qq|
V  q:d S )
NF   r\   r   r]   r   r       )rc   re   rd   r   )r   rj   r   r   r   r   r   r   r   r   chunkZ	chunk_endZcut_sequenceZchunk_partial_size_chkjr   r   r   cut_sequence_chunksi  s>    


r   )rW   )T)N)7
__future__r   rw   r   r   codecsr   Zencodings.aliasesr   	functoolsr   rer   typingr   Z_multibytecodecr   Zconstantr
   r   r   r   r   r   r   r   r&   r*   r+   r3   r6   r7   r=   rB   rD   rE   rF   rG   rH   rJ   rK   rL   rc   rS   rV   rk   ry   r}   r~   r   r   r   INFOr   r   r   r   r   r   <module>   s|   $
									
  