a
    i,Me :                     @   s   d Z ddlZddlZddlZddlmZmZmZ ddlm	Z	 ddl
mZ ddlmZmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ G dd dZdS )a  
Module containing the UniversalDetector detector class, which is the primary
class a user of ``chardet`` should use.

:author: Mark Pilgrim (initial port to Python)
:author: Shy Shalom (original C code)
:author: Dan Blanchard (major refactoring for 3.0)
:author: Ian Cordasco
    N)ListOptionalUnion   )CharSetGroupProber)CharSetProber)
InputStateLanguageFilterProbingState)EscCharSetProber)Latin1Prober)MacRomanProber)MBCSGroupProber)
ResultDict)SBCSGroupProber)UTF1632Proberc                	   @   s   e Zd ZdZdZedZedZedZ	dddd	d
ddddZ
ddddddddZejdfeeddddZeedddZeedddZeee dddZddd d!Zeeef dd"d#d$Zedd%d&ZdS )'UniversalDetectoraq  
    The ``UniversalDetector`` class underlies the ``chardet.detect`` function
    and coordinates all of the different charset probers.

    To get a ``dict`` containing an encoding and its confidence, you can simply
    run:

    .. code::

            u = UniversalDetector()
            u.feed(some_bytes)
            u.close()
            detected = u.result

    g?s   [-]s   (|~{)s   [-]zWindows-1252zWindows-1250zWindows-1251zWindows-1256zWindows-1253zWindows-1255zWindows-1254zWindows-1257)
iso-8859-1z
iso-8859-2z
iso-8859-5z
iso-8859-6z
iso-8859-7z
iso-8859-8
iso-8859-9ziso-8859-13zISO-8859-11ZGB18030ZCP949UTF-16)asciir   ztis-620r   gb2312zeuc-krzutf-16leFN)lang_filtershould_rename_legacyreturnc                 C   sd   d | _ d | _g | _d dd d| _d| _d| _tj| _d| _	|| _
tt| _d| _|| _|   d S )N        encoding
confidencelanguageF    )_esc_charset_prober_utf1632_prober_charset_probersresultdone	_got_datar   
PURE_ASCII_input_state
_last_charr   logging	getLogger__name__logger_has_win_bytesr   reset)selfr   r    r1   T/var/www/webapp/lib/python3.9/site-packages/pip/_vendor/chardet/universaldetector.py__init__d   s     zUniversalDetector.__init__)r   c                 C   s   | j S N)r(   r0   r1   r1   r2   input_state{   s    zUniversalDetector.input_statec                 C   s   | j S r4   )r.   r5   r1   r1   r2   has_win_bytes   s    zUniversalDetector.has_win_bytesc                 C   s   | j S r4   )r#   r5   r1   r1   r2   charset_probers   s    z!UniversalDetector.charset_probersc                 C   sf   dddd| _ d| _d| _d| _tj| _d| _| jr>| j	  | j
rN| j
	  | jD ]}|	  qTdS )z
        Reset the UniversalDetector and all of its probers back to their
        initial states.  This is called by ``__init__``, so you only need to
        call this directly in between analyses of different documents.
        Nr   r   Fr    )r$   r%   r&   r.   r   r'   r(   r)   r!   r/   r"   r#   )r0   proberr1   r1   r2   r/      s    


zUniversalDetector.reset)byte_strr   c                 C   s  | j r
dS |sdS t|ts$t|}| js|tjrFdddd| _nv|tjtj	frhdddd| _nT|drdddd| _n:|d	rd
ddd| _n |tj
tjfrdddd| _d| _| jd durd| _ dS | jtjkr(| j|rtj| _n*| jtjkr(| j| j| r(tj| _|dd | _| jsFt | _| jjtjkr| j|tjkr| jj| j dd| _d| _ dS | jtjkr| jst| j | _| j|tjkr| jj| j | jj!d| _d| _ n| jtjkr| j"sBt#| j g| _"| j t$j%@ r&| j"&t'  | j"&t(  | j"&t)  | j"D ]:}||tjkrH|j| |j!d| _d| _  qqH| j*|rd| _+dS )a  
        Takes a chunk of a document and feeds it through all of the relevant
        charset probers.

        After calling ``feed``, you can check the value of the ``done``
        attribute to see if you need to continue feeding the
        ``UniversalDetector`` more data, or if it has made a prediction
        (in the ``result`` attribute).

        .. note::
           You should always call ``close`` when you're done feeding in your
           document if ``done`` is not already ``True``.
        Nz	UTF-8-SIG      ? r   zUTF-32s     zX-ISO-10646-UCS-4-3412s     zX-ISO-10646-UCS-4-2143r   Tr   ),r%   
isinstance	bytearrayr&   
startswithcodecsBOM_UTF8r$   BOM_UTF32_LEBOM_UTF32_BEBOM_LEBOM_BEr(   r   r'   HIGH_BYTE_DETECTORsearch	HIGH_BYTEESC_DETECTORr)   	ESC_ASCIIr"   r   stater
   	DETECTINGfeedFOUND_ITcharset_nameget_confidencer!   r   r   r   r#   r   r	   NON_CJKappendr   r   r   WIN_BYTE_DETECTORr.   )r0   r:   r9   r1   r1   r2   rN      s    









zUniversalDetector.feedc           	   	   C   s  | j r| jS d| _ | js&| jd n| jtjkrBdddd| _n| jtjkrd}d}d}| j	D ]"}|sjq`|
 }||kr`|}|}q`|r|| jkr|j}|dusJ | }|
 }|d	r| jr| j||}| jr| j|pd |}|||jd| _| j tjkr| jd
 du r| jd | j	D ]`}|s@q2t|trv|jD ] }| jd|j|j|
  qRn| jd|j|j|
  q2| jS )z
        Stop analyzing the current document and come up with a final
        prediction.

        :returns:  The ``result`` attribute, a ``dict`` with the keys
                   `encoding`, `confidence`, and `language`.
        Tzno data received!r   r;   r<   r   Nr   ziso-8859r   z no probers hit minimum thresholdz%s %s confidence = %s)r%   r$   r&   r-   debugr(   r   r'   rI   r#   rQ   MINIMUM_THRESHOLDrP   lowerr@   r.   ISO_WIN_MAPgetr   
LEGACY_MAPr   getEffectiveLevelr*   DEBUGr>   r   probers)	r0   Zprober_confidenceZmax_prober_confidenceZ
max_proberr9   rP   lower_charset_namer   Zgroup_proberr1   r1   r2   close  sr    	




zUniversalDetector.close)r,   
__module____qualname____doc__rV   recompilerG   rJ   rT   rX   rZ   r	   ALLboolr3   propertyintr6   r7   r   r   r8   r/   r   bytesr?   rN   r   r_   r1   r1   r1   r2   r   8   sN   


 r   )rb   rA   r*   rc   typingr   r   r   charsetgroupproberr   charsetproberr   enumsr   r	   r
   Z	escproberr   Zlatin1proberr   Zmacromanproberr   Zmbcsgroupproberr   
resultdictr   Zsbcsgroupproberr   Zutf1632proberr   r   r1   r1   r1   r2   <module>   s   