Your IP : 216.73.217.13


Current Path : /snap/core/current/usr/lib/python3.5/html/__pycache__/
Upload File :
Current File : //snap/core/current/usr/lib/python3.5/html/__pycache__/parser.cpython-35.pyc



H�iI�@s)dZddlZddlZddlZddlmZdgZejd�Zejd�Z	ejd�Z
ejd�Zejd	�Zejd
�Z
ejd�Zejd�Zejd
�Zejd�Zejdej�Zejd�Zejd�ZGdd�dej�ZdS)zA parser for HTML and XHTML.�N)�unescape�
HTMLParserz[&<]z
&[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z	<[a-zA-Z]z
</[a-zA-Z]�>z--\s*>z$([a-zA-Z][^	

 />]*)(?:\s|/(?!>))*z]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*aF
  <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
  (?:[\s/]*                          # optional whitespace before attribute name
    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
      (?:\s*=+\s*                    # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |"[^"]*"                   # LIT-enclosed value
          |(?!['"])[^>\s]*           # bare value
         )
         (?:\s*,)*                   # possibly followed by a comma
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                # trailing whitespace
z#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c@sWeZdZdZd:Zdddd�Zdd	�Zd
d�Zdd
�ZdZ	dd�Z
dd�Zdd�Zdd�Z
dd�Zddd�Zdd�Zdd�Zd d!�Zd"d#�Zd$d%�Zd&d'�Zd(d)�Zd*d+�Zd,d-�Zd.d/�Zd0d1�Zd2d3�Zd4d5�Zd6d7�Zd8d9�ZdS);raEFind tags and other markup and call handler functions.

    Usage:
        p = HTMLParser()
        p.feed(data)
        ...
        p.close()

    Start tags are handled by calling self.handle_starttag() or
    self.handle_startendtag(); end tags by self.handle_endtag().  The
    data between tags is passed from the parser to the derived class
    by calling self.handle_data() with the data as argument (the data
    may be split up in arbitrary chunks).  If convert_charrefs is
    True the character references are converted automatically to the
    corresponding Unicode character (and self.handle_data() is no
    longer split in chunks), otherwise they are passed by calling
    self.handle_entityref() or self.handle_charref() with the string
    containing respectively the named or numeric reference as the
    argument.
    �script�style�convert_charrefsTcCs||_|j�dS)z�Initialize and reset this instance.

        If convert_charrefs is True (the default), all character references
        are automatically converted to the corresponding Unicode characters.
        N)r�reset)�selfr�r
�!/usr/lib/python3.5/html/parser.py�__init__Xs	zHTMLParser.__init__cCs8d|_d|_t|_d|_tjj|�dS)z1Reset this instance.  Loses all unprocessed data.�z???N)�rawdata�lasttag�interesting_normal�interesting�
cdata_elem�_markupbase�
ParserBaser)r	r
r
rras
				zHTMLParser.resetcCs!|j||_|jd�dS)z�Feed data to the parser.

        Call this as often as you want, with as little or as much text
        as you want (may include '\n').
        rN)r�goahead)r	�datar
r
r�feediszHTMLParser.feedcCs|jd�dS)zHandle any buffered data.�N)r)r	r
r
r�closerszHTMLParser.closeNcCs|jS)z)Return full source of start tag: '<...>'.)�_HTMLParser__starttag_text)r	r
r
r�get_starttag_textxszHTMLParser.get_starttag_textcCs2|j�|_tjd|jtj�|_dS)Nz</\s*%s\s*>)�lowerr�re�compile�Ir)r	�elemr
r
r�set_cdata_mode|szHTMLParser.set_cdata_modecCst|_d|_dS)N)rrr)r	r
r
r�clear_cdata_mode�s	zHTMLParser.clear_cdata_modecCs�|j}d}t|�}x(||krE|jr�|jr�|jd|�}|dkr�|jdt||d��}|dkr�tjd�j	||�r�P|}n:|j
j	||�}|r�|j�}n|jr�P|}||kr<|jr%|jr%|jt
|||���n|j|||��|j||�}||kr[P|j}|d|�r-tj||�r�|j|�}	n�|d|�r�|j|�}	n�|d|�r�|j|�}	ns|d|�r�|j|�}	nR|d	|�r|j|�}	n1|d
|ks1|rK|jd�|d
}	nP|	dkr|s_Ptj||�rtn�|d|�r�|d|kr�|jd�qtj||�r�q|j||dd��n<|d|�rA|}x5dD]-}
|j|
|d�r�|t|
�8}Pq�W|j||d|��n�|d|�rn|j||dd��n�|||d�j�dkr�|j||dd��nf|d	|�r�|j||dd��n9|d|�r|j||dd��ntd��|}	|j||	�}q|d|�rtj||�}|r�|j �dd�}|j!|�|j"�}	|d|	d
�s�|	d
}	|j||	�}qqBd||d�kr�|j|||d��|j||d�}Pq|d|�r0t#j||�}|r�|j d
�}|j$|�|j"�}	|d|	d
�so|	d
}	|j||	�}qt%j||�}|r�|r�|j �||d�kr�|j"�}	|	|kr�|}	|j||d
�}PqB|d
|kr,|jd�|j||d
�}qBPqdstd��qW|r�||kr�|jr�|jr�|jr�|jt
|||���n|j|||��|j||�}||d�|_dS)Nr�<�&�"z[\s;]z</z<!--z<?z<!r��--!�--�-�z	<![CDATA[��	z	<!doctypezwe should not get here!z&#�;zinteresting.search() lied)r'r(r)���)&r�lenrr�find�rfind�maxrr�searchr�start�handle_datarZ	updatepos�
startswith�starttagopen�match�parse_starttag�parse_endtag�
parse_comment�parse_pi�parse_html_declaration�
endtagopen�handle_comment�endswith�unknown_declr�handle_decl�	handle_pi�AssertionError�charref�group�handle_charref�end�	entityref�handle_entityref�
incomplete)r	rHr�i�n�jZampposr8r6�k�suffix�namer
r
rr�s�			 	


 



"
 zHTMLParser.goaheadcCs�|j}|||d�dks/td��|||d�dkrV|j|�S|||d�dkr}|j|�S|||d�j�d	kr�|jd
|d�}|dkr�d
S|j||d|��|dS|j|�SdS)Nr&z<!z+unexpected call to parse_html_declaration()r*z<!--r+z<![r,z	<!doctyperrr.r.)rrDr;Zparse_marked_sectionrr0rB�parse_bogus_comment)r	rLr�gtposr
r
rr=s	&

 z!HTMLParser.parse_html_declarationrcCs~|j}|||d�dks/td��|jd|d�}|dkrUd	S|rv|j||d|��|dS)
Nr&�<!�</z"unexpected call to parse_comment()rr)rTrUr.r.)rrDr0r?)r	rLZreportr�posr
r
rrR(s	&zHTMLParser.parse_bogus_commentcCs�|j}|||d�dks/td��tj||d�}|sOdS|j�}|j||d|��|j�}|S)Nr&z<?zunexpected call to parse_pi()rr.)rrD�picloser3r4rCrH)r	rLrr8rNr
r
rr<4s	&zHTMLParser.parse_picCs�d|_|j|�}|dkr(|S|j}|||�|_g}tj||d�}|srtd��|j�}|jd�j�|_	}x�||kr�t
j||�}|s�P|jddd�\}	}
}|
s�d}np|dd�dko|dd�knsO|dd�dkoJ|dd�knr_|dd
�}|rqt|�}|j|	j�|f�|j�}q�W|||�j
�}|dkrC|j�\}
}d
|jkr|
|jjd
�}
t|j�|jjd
�}n|t|j�}|j|||��|S|jd	�re|j||�n,|j||�||jkr�|j|�|S)Nrrz#unexpected call to parse_starttag()r&r+�'�"r�/>�
r.r.r.)rrZ)r�check_for_whole_start_tagr�tagfind_tolerantr8rDrHrFrr�attrfind_tolerantr�append�stripZgetpos�countr/r1r5r@�handle_startendtag�handle_starttag�CDATA_CONTENT_ELEMENTSr!)r	rL�endposr�attrsr8rO�tag�mZattrname�restZ	attrvaluerH�lineno�offsetr
r
rr9@sR			00
zHTMLParser.parse_starttagcCs�|j}tj||�}|r�|j�}|||d�}|dkrU|dS|dkr�|jd|�r{|dS|jd|�r�d	S||kr�|S|dS|dkr�d
S|dkr�dS||kr�|S|dStd��dS)Nrr�/z/>r&r
z6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZzwe should not get here!r.r.r.)r�locatestarttagend_tolerantr8rHr6rD)r	rLrrhrN�nextr
r
rr\ss.	z$HTMLParser.check_for_whole_start_tagcCs�|j}|||d�dks/td��tj||d�}|sOdS|j�}tj||�}|s*|jdk	r�|j|||��|St	j||d�}|s�|||d�dkr�|dS|j
|�S|jd�j�}|j
d|j��}|j|�|dS|jd�j�}|jdk	rx||jkrx|j|||��|S|j|j��|j�|S)	Nr&z</zunexpected call to parse_endtagrr+z</>rr.)rrD�	endendtagr3rH�
endtagfindr8rr5r]rRrFrr0�
handle_endtagr")r	rLrr8rSZ	namematchZtagnamer r
r
rr:�s8	&


zHTMLParser.parse_endtagcCs!|j||�|j|�dS)N)rcrq)r	rgrfr
r
rrb�szHTMLParser.handle_startendtagcCsdS)Nr
)r	rgrfr
r
rrc�szHTMLParser.handle_starttagcCsdS)Nr
)r	rgr
r
rrq�szHTMLParser.handle_endtagcCsdS)Nr
)r	rQr
r
rrG�szHTMLParser.handle_charrefcCsdS)Nr
)r	rQr
r
rrJ�szHTMLParser.handle_entityrefcCsdS)Nr
)r	rr
r
rr5�szHTMLParser.handle_datacCsdS)Nr
)r	rr
r
rr?�szHTMLParser.handle_commentcCsdS)Nr
)r	Zdeclr
r
rrB�szHTMLParser.handle_declcCsdS)Nr
)r	rr
r
rrC�szHTMLParser.handle_picCsdS)Nr
)r	rr
r
rrA�szHTMLParser.unknown_declcCs tjdtdd�t|�S)NzZThe unescape method is deprecated and will be removed in 3.5, use html.unescape() instead.�
stacklevelr&)�warnings�warn�DeprecationWarningr)r	�sr
r
rr�s	
zHTMLParser.unescape)rr)�__name__�
__module__�__qualname__�__doc__rdrrrrrrr!r"rr=rRr<r9r\r:rbrcrqrGrJr5r?rBrCrArr
r
r
rr@s8		�3"()rzrrsrZhtmlr�__all__rrrKrIrEr7r>rWZcommentcloser]r^�VERBOSErmrorprrr
r
r
r�<module>s*