a
    XhN                     @  sf  d dl mZ d dlmZ d dlmZ ddlmZmZm	Z	 ddl
mZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ G dd dZG d	d
 d
eZG dd deZG dd deZ G dd deZ!G dd deZ"G dd deZ#G dd deZ$G dd deZ%G dd deZ&eddddddd d!Z'ed"dd+d%d&dd&d'd(d)Z(d*S ),    )annotations)	lru_cache)	getLogger   )COMMON_SAFE_ASCII_CHARACTERSTRACEUNICODE_SECONDARY_RANGE_KEYWORD)is_accentuated	is_arabicis_arabic_isolated_formis_case_variableis_cjkis_emoticon	is_hangulis_hiraganais_katakanais_latinis_punctuationis_separator	is_symbolis_thaiis_unprintableremove_accentunicode_rangeis_cjk_uncommonc                   @  sP   e Zd ZdZdddddZddddd	Zdd
ddZedd
ddZdS )MessDetectorPluginzy
    Base abstract class used for mess detection plugins.
    All detectors MUST extend and implement given methods.
    strbool	characterreturnc                 C  s   t dS )z@
        Determine if given character should be fed in.
        NNotImplementedErrorselfr    r%   X/var/www/viveiro_nova_floresta/venv/lib/python3.9/site-packages/charset_normalizer/md.pyeligible'   s    zMessDetectorPlugin.eligibleNonec                 C  s   t dS )z
        The main routine to be executed upon character.
        Insert the logic in witch the text would be considered chaotic.
        Nr!   r#   r%   r%   r&   feed-   s    zMessDetectorPlugin.feedr    c                 C  s   t dS )zB
        Permit to reset the plugin to the initial state.
        Nr!   r$   r%   r%   r&   reset4   s    zMessDetectorPlugin.resetfloatc                 C  s   t dS )z
        Compute the chaos ratio based on what your feed() has seen.
        Must NOT be lower than 0.; No restriction gt 0.
        Nr!   r+   r%   r%   r&   ratio:   s    zMessDetectorPlugin.ratioN)	__name__
__module____qualname____doc__r'   r)   r,   propertyr.   r%   r%   r%   r&   r   !   s   r   c                   @  sZ   e Zd ZddddZddddd	Zdddd
dZddddZeddddZdS ) TooManySymbolOrPunctuationPluginr(   r*   c                 C  s"   d| _ d| _d| _d | _d| _d S )Nr   F)_punctuation_count_symbol_count_character_count_last_printable_charZ_frenzy_symbol_in_wordr+   r%   r%   r&   __init__D   s
    z)TooManySymbolOrPunctuationPlugin.__init__r   r   r   c                 C  s   |  S Nisprintabler#   r%   r%   r&   r'   L   s    z)TooManySymbolOrPunctuationPlugin.eligiblec                 C  sp   |  j d7  _ || jkrf|tvrft|r8|  jd7  _n.| du rft|rft|du rf|  jd7  _|| _d S )Nr   F   )	r7   r8   r   r   r5   isdigitr   r   r6   r#   r%   r%   r&   r)   O   s    

z%TooManySymbolOrPunctuationPlugin.feedc                 C  s   d| _ d| _d| _d S Nr   )r5   r7   r6   r+   r%   r%   r&   r,   a   s    z&TooManySymbolOrPunctuationPlugin.resetr-   c                 C  s0   | j dkrdS | j| j | j  }|dkr,|S dS )Nr           333333?)r7   r5   r6   )r$   Zratio_of_punctuationr%   r%   r&   r.   f   s    

z&TooManySymbolOrPunctuationPlugin.ratioN	r/   r0   r1   r9   r'   r)   r,   r3   r.   r%   r%   r%   r&   r4   C   s   r4   c                   @  sZ   e Zd ZddddZddddd	Zdddd
dZddddZeddddZdS )TooManyAccentuatedPluginr(   r*   c                 C  s   d| _ d| _d S r?   r7   _accentuated_countr+   r%   r%   r&   r9   s   s    z!TooManyAccentuatedPlugin.__init__r   r   r   c                 C  s   |  S r:   )isalphar#   r%   r%   r&   r'   w   s    z!TooManyAccentuatedPlugin.eligiblec                 C  s(   |  j d7  _ t|r$|  jd7  _d S Nr   )r7   r	   rE   r#   r%   r%   r&   r)   z   s    zTooManyAccentuatedPlugin.feedc                 C  s   d| _ d| _d S r?   rD   r+   r%   r%   r&   r,      s    zTooManyAccentuatedPlugin.resetr-   c                 C  s*   | j dk rdS | j| j  }|dkr&|S dS )N   r@   gffffff?rD   )r$   Zratio_of_accentuationr%   r%   r&   r.      s    
zTooManyAccentuatedPlugin.ratioNrB   r%   r%   r%   r&   rC   r   s   rC   c                   @  sZ   e Zd ZddddZddddd	Zdddd
dZddddZeddddZdS )UnprintablePluginr(   r*   c                 C  s   d| _ d| _d S r?   )_unprintable_countr7   r+   r%   r%   r&   r9      s    zUnprintablePlugin.__init__r   r   r   c                 C  s   dS NTr%   r#   r%   r%   r&   r'      s    zUnprintablePlugin.eligiblec                 C  s(   t |r|  jd7  _|  jd7  _d S rG   )r   rJ   r7   r#   r%   r%   r&   r)      s    zUnprintablePlugin.feedc                 C  s
   d| _ d S r?   )rJ   r+   r%   r%   r&   r,      s    zUnprintablePlugin.resetr-   c                 C  s   | j dkrdS | jd | j  S )Nr   r@   rH   )r7   rJ   r+   r%   r%   r&   r.      s    
zUnprintablePlugin.ratioNrB   r%   r%   r%   r&   rI      s   rI   c                   @  sZ   e Zd ZddddZddddd	Zdddd
dZddddZeddddZdS )SuspiciousDuplicateAccentPluginr(   r*   c                 C  s   d| _ d| _d | _d S r?   _successive_countr7   _last_latin_characterr+   r%   r%   r&   r9      s    z(SuspiciousDuplicateAccentPlugin.__init__r   r   r   c                 C  s   |  ot|S r:   )rF   r   r#   r%   r%   r&   r'      s    z(SuspiciousDuplicateAccentPlugin.eligiblec                 C  st   |  j d7  _ | jd urjt|rjt| jrj| rJ| j rJ|  jd7  _t|t| jkrj|  jd7  _|| _d S rG   )r7   rO   r	   isupperrN   r   r#   r%   r%   r&   r)      s    z$SuspiciousDuplicateAccentPlugin.feedc                 C  s   d| _ d| _d | _d S r?   rM   r+   r%   r%   r&   r,      s    z%SuspiciousDuplicateAccentPlugin.resetr-   c                 C  s   | j dkrdS | jd | j  S )Nr   r@   r=   )r7   rN   r+   r%   r%   r&   r.      s    
z%SuspiciousDuplicateAccentPlugin.ratioNrB   r%   r%   r%   r&   rL      s   rL   c                   @  sZ   e Zd ZddddZddddd	Zdddd
dZddddZeddddZdS )SuspiciousRanger(   r*   c                 C  s   d| _ d| _d | _d S r?   )"_suspicious_successive_range_countr7   _last_printable_seenr+   r%   r%   r&   r9      s    zSuspiciousRange.__init__r   r   r   c                 C  s   |  S r:   r;   r#   r%   r%   r&   r'      s    zSuspiciousRange.eligiblec                 C  sx   |  j d7  _ | s&t|s&|tv r0d | _d S | jd u rD|| _d S t| j}t|}t||rn|  jd7  _|| _d S rG   )r7   isspacer   r   rS   r    is_suspiciously_successive_rangerR   )r$   r   unicode_range_aunicode_range_br%   r%   r&   r)      s"    


zSuspiciousRange.feedc                 C  s   d| _ d| _d | _d S r?   )r7   rR   rS   r+   r%   r%   r&   r,      s    zSuspiciousRange.resetr-   c                 C  s"   | j dkrdS | jd | j  }|S )N   r@   r=   )r7   rR   )r$   Zratio_of_suspicious_range_usager%   r%   r&   r.      s    
zSuspiciousRange.ratioNrB   r%   r%   r%   r&   rQ      s   rQ   c                   @  sZ   e Zd ZddddZddddd	Zdddd
dZddddZeddddZdS )SuperWeirdWordPluginr(   r*   c                 C  s@   d| _ d| _d| _d| _d| _d| _d| _d| _d| _d| _	d S )Nr   F )
_word_count_bad_word_count_foreign_long_count_is_current_word_bad_foreign_long_watchr7   _bad_character_count_buffer_buffer_accent_count_buffer_glyph_countr+   r%   r%   r&   r9      s    zSuperWeirdWordPlugin.__init__r   r   r   c                 C  s   dS rK   r%   r#   r%   r%   r&   r'     s    zSuperWeirdWordPlugin.eligiblec                 C  s  |  r|  j|7  _t|r,|  jd7  _| jdu rt|du sJt|rt|du rt|du rt|du rt	|du rt
|du rd| _t|st|st|st	|st
|r|  jd7  _d S | jsd S | st|st|r\| jr\|  jd7  _t| j}|  j|7  _|dkr| j| dkr:d| _npt| jd r| jd  rtdd | jD du r|  jd7  _d| _n | jdkrd| _|  jd7  _|d	kr| jrd
d t| jtd|D }d}|rt|| dkrd}|s|  jd7  _d| _| jrB|  jd7  _|  jt| j7  _d| _d| _d| _d| _d| _n6|dvr| du rt|rd| _|  j|7  _d S )Nr   FT         ?c                 s  s   | ]}|  V  qd S r:   rP   ).0_r%   r%   r&   	<genexpr>8      z,SuperWeirdWordPlugin.feed.<locals>.<genexpr>   c                 S  s   g | ]\}}|  r|qS r%   rg   )rh   cir%   r%   r&   
<listcomp>@  s   z-SuperWeirdWordPlugin.feed.<locals>.<listcomp>r   rA   rZ   >   >=-~ri   |<)rF   ra   r	   rb   r_   r   r   r   r   r   r   rc   rT   r   r   r[   lenr7   r^   rP   allr]   zipranger\   r`   r>   r   )r$   r   Zbuffer_lengthZcamel_case_dstZprobable_camel_casedr%   r%   r&   r)     s    





	


zSuperWeirdWordPlugin.feedc                 C  s4   d| _ d| _d| _d| _d| _d| _d| _d| _d S )NrZ   Fr   )ra   r^   r_   r\   r[   r7   r`   r]   r+   r%   r%   r&   r,   _  s    zSuperWeirdWordPlugin.resetr-   c                 C  s$   | j dkr| jdkrdS | j| j S )N
   r   r@   )r[   r]   r`   r7   r+   r%   r%   r&   r.   i  s    zSuperWeirdWordPlugin.ratioNrB   r%   r%   r%   r&   rY      s   Q
rY   c                   @  s^   e Zd ZdZddddZdddd	d
ZdddddZddddZeddddZ	dS )CjkUncommonPluginz<
    Detect messy CJK text that probably means nothing.
    r(   r*   c                 C  s   d| _ d| _d S r?   r7   _uncommon_countr+   r%   r%   r&   r9   v  s    zCjkUncommonPlugin.__init__r   r   r   c                 C  s   t |S r:   )r   r#   r%   r%   r&   r'   z  s    zCjkUncommonPlugin.eligiblec                 C  s,   |  j d7  _ t|r(|  jd7  _d S d S rG   )r7   r   r}   r#   r%   r%   r&   r)   }  s    zCjkUncommonPlugin.feedc                 C  s   d| _ d| _d S r?   r|   r+   r%   r%   r&   r,     s    zCjkUncommonPlugin.resetr-   c                 C  s.   | j dk rdS | j| j  }|dkr*|d S dS )NrH   r@   re   rz   r|   )r$   Zuncommon_form_usager%   r%   r&   r.     s    
zCjkUncommonPlugin.ratioN)
r/   r0   r1   r2   r9   r'   r)   r,   r3   r.   r%   r%   r%   r&   r{   q  s   r{   c                   @  sZ   e Zd ZddddZddddd	Zdddd
dZddddZeddddZdS )ArchaicUpperLowerPluginr(   r*   c                 C  s.   d| _ d| _d| _d| _d| _d | _d| _d S )NFr   T)_buf_character_count_since_last_sep_successive_upper_lower_count#_successive_upper_lower_count_finalr7   _last_alpha_seen_current_ascii_onlyr+   r%   r%   r&   r9     s    z ArchaicUpperLowerPlugin.__init__r   r   r   c                 C  s   dS rK   r%   r#   r%   r%   r&   r'     s    z ArchaicUpperLowerPlugin.eligiblec                 C  s$  |  ot|}|du }|r| jdkr| jdkrV| du rV| jdu rV|  j| j7  _d| _d| _d | _d| _|  j	d7  _	d| _d S | jdu r|
 du rd| _| jd ur| r| j s| r| j r| jdu r|  jd7  _d| _qd| _nd| _|  j	d7  _	|  jd7  _|| _d S )NFr   @   r   Tr=   )rF   r   r   r>   r   r   r   r   r   r7   isasciirP   islower)r$   r   Zis_concernedZ	chunk_sepr%   r%   r&   r)     sF    


zArchaicUpperLowerPlugin.feedc                 C  s.   d| _ d| _d| _d| _d | _d| _d| _d S )Nr   FT)r7   r   r   r   r   r   r   r+   r%   r%   r&   r,     s    zArchaicUpperLowerPlugin.resetr-   c                 C  s   | j dkrdS | j| j  S )Nr   r@   )r7   r   r+   r%   r%   r&   r.     s    
zArchaicUpperLowerPlugin.ratioNrB   r%   r%   r%   r&   r~     s   *	r~   c                   @  sZ   e Zd ZddddZddddZddd	d
dZddd	ddZeddddZdS )ArabicIsolatedFormPluginr(   r*   c                 C  s   d| _ d| _d S r?   r7   _isolated_form_countr+   r%   r%   r&   r9     s    z!ArabicIsolatedFormPlugin.__init__c                 C  s   d| _ d| _d S r?   r   r+   r%   r%   r&   r,     s    zArabicIsolatedFormPlugin.resetr   r   r   c                 C  s   t |S r:   )r
   r#   r%   r%   r&   r'     s    z!ArabicIsolatedFormPlugin.eligiblec                 C  s(   |  j d7  _ t|r$|  jd7  _d S rG   )r7   r   r   r#   r%   r%   r&   r)     s    zArabicIsolatedFormPlugin.feedr-   c                 C  s   | j dk rdS | j| j  }|S )NrH   r@   r   )r$   Zisolated_form_usager%   r%   r&   r.     s    
zArabicIsolatedFormPlugin.ratioN)	r/   r0   r1   r9   r,   r'   r)   r3   r.   r%   r%   r%   r&   r     s   r      )maxsizez
str | Noner   )rV   rW   r    c                 C  s  | du s|du rdS | |kr dS d| v r4d|v r4dS d| v sDd|v rHdS d| v sXd|v rld| v shd|v rldS |  d| d }}|D ]}|tv rq||v r dS q| dv |dv  }}|s|rd	| v sd	|v rdS |r|rdS d
| v sd
|v r d	| v sd	|v rdS | dks|dkr dS d	| v sHd	|v sH| dv r|dv rd| v s\d|v r`dS d| v std|v rxdS | dks|dkrdS dS )za
    Determine if two Unicode range seen next to each other can be considered as suspicious.
    NTFZLatinZ	EmoticonsZ	Combining )HiraganaKatakanaZCJKZHangulzBasic Latin)r   r   PunctuationZForms)splitr   )rV   rW   Zkeywords_range_aZkeywords_range_belZrange_a_jp_charsZrange_b_jp_charsr%   r%   r&   rU     sh    rU   i   皙?Fr   r-   )decoded_sequencemaximum_thresholddebugr    c              	   C  sX  dd t  D }t| d }d}|dk r0d}n|dkr>d}nd	}t| d
 t|D ]d\}}|D ]}	|	|r`|	| q`|dkr|| dks||d krTtdd |D }||krT qqT|rNtd}
|
	t
d| d| d|  t| dkr(|
	t
d| dd   |
	t
d| dd   |D ] }|
	t
|j d|j  q,t|dS )zw
    Compute a mess ratio given a decoded bytes sequence. The maximum threshold does stop the computation earlier.
    c                 S  s   g | ]
}| qS r%   r%   )rh   Zmd_classr%   r%   r&   ro   N  s   zmess_ratio.<locals>.<listcomp>r   r@   i       r   r      
r   c                 s  s   | ]}|j V  qd S r:   )r.   )rh   dtr%   r%   r&   rj   e  rk   zmess_ratio.<locals>.<genexpr>Zcharset_normalizerzIMess-detector extended-analysis start. intermediary_mean_mess_ratio_calc=z mean_mess_ratio=z maximum_threshold=   zStarting with: NzEnding with: iz:    )r   __subclasses__rv   rx   ry   r'   r)   sumr   logr   	__class__r.   round)r   r   r   Z	detectorslengthZmean_mess_ratioZ!intermediary_mean_mess_ratio_calcr   indexZdetectorloggerr   r%   r%   r&   
mess_ratioF  sR    


r   N)r   F))
__future__r   	functoolsr   loggingr   Zconstantr   r   r   utilsr	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r4   rC   rI   rL   rQ   rY   r{   r~   r   rU   r   r%   r%   r%   r&   <module>   s&   P"/%1v#LI 