3
uQh9                 @   s"  d Z ddlmZ ddlmZ ddlmZ ddlmZm	Z	m
Z
 ddlmZmZmZmZ ddlmZ dd	lmZmZmZmZmZmZmZmZmZmZmZmZ G d
d deZe e	e e
e f dddZ!ed%ed&ed'ed(ed)ed*edd edd edd edd ed d iZ"e ee e d!d"d#Z#d$S )+z(
Simple HTML -> Telegram entity parser.
    )deque)escape)
HTMLParser)IterableTupleList   )add_surrogatedel_surrogatewithin_surrogate
strip_text)TLObject)MessageEntityBoldMessageEntityItalicMessageEntityCodeMessageEntityPreMessageEntityEmailMessageEntityUrlMessageEntityTextUrlMessageEntityMentionNameMessageEntityUnderlineMessageEntityStrikeMessageEntityBlockquoteTypeMessageEntityc                   s4   e Zd Z fddZdd Zdd Zdd Z  ZS )	HTMLToTelegramParserc                s0   t  j  d| _g | _i | _t | _t | _d S )N )super__init__textentities_building_entitiesr   
_open_tags_open_tags_meta)self)	__class__ </tmp/pip-build-2nz6shyl/telethon/telethon/extensions/html.pyr      s    
zHTMLToTelegramParser.__init__c             C   s  | j j| | jjd  t|}d }i }|dks8|dkr@t}nX|dksP|dkrXt}n@|dkrht}n0|dksx|dkrt}n|dkrt}n|d	kry>| j	d
 }y|d t
dd  |_W n tk
r   Y nX W n tk
r   t}Y nX n|d
kr
t}d|d< n|dkry|d }W n tk
r6   d S X |jdrZ|t
dd  }t}n(| j |krnt}nt}t||d< d }| jj  | jj| |r|| j	kr|f t
| jdd|| j	|< d S )Nstrongbemiudels
blockquotecodepreclassz	language-r   languageahrefzmailto:urlr   )offsetlength)r!   
appendleftr"   dictr   r   r   r   r   r    lenr2   KeyErrorr   r   
startswithr   get_starttag_textr   r   r
   popleftr   )r#   tagattrsZ
EntityTypeargsr0   r5   r%   r%   r&   handle_starttag   s^    





z$HTMLToTelegramParser.handle_starttagc             C   sr   t | jdkr| jd nd}|dkr6| jd }|r6|}x(| jj D ]\}}| jt |7  _qBW |  j|7  _d S )Nr   r   r3   )r:   r!   r"   r    itemsr7   r   )r#   r   Zprevious_tagr5   r?   entityr%   r%   r&   handle_dataY   s    
z HTMLToTelegramParser.handle_datac             C   sP   y| j j  | jj  W n tk
r,   Y nX | jj|d }|rL| jj| d S )N)r!   r>   r"   
IndexErrorr    popr   append)r#   r?   rD   r%   r%   r&   handle_endtage   s    
z"HTMLToTelegramParser.handle_endtag)__name__
__module____qualname__r   rB   rE   rI   __classcell__r%   r%   )r$   r&   r      s   <r   )htmlreturnc             C   sX   | s| g fS t  }|jt|  t|j|j}|jj  |jjdd d t||jfS )a  
    Parses the given HTML message and returns its stripped representation
    plus a list of the MessageEntity's that were found.

    :param html: the message with HTML to be parsed.
    :return: a tuple consisting of (clean message, [message entities]).
    c             S   s   | j S )N)r6   )rD   r%   r%   r&   <lambda>   s    zparse.<locals>.<lambda>)key)	r   feedr	   r   r   r   reversesortr
   )rN   parserr   r%   r%   r&   parsep   s    
rV   <strong>	</strong><em></em><code></code><u></u><del></del><blockquote></blockquote>c             C   s   dj | jdfS )Nz-<pre>
    <code class='language-{}'>
        z{}
    </code>
</pre>)formatr2   )e_r%   r%   r&   rP      s    rP   c             C   s   dj |dfS )Nz<a href="mailto:{}">z</a>)rc   )re   tr%   r%   r&   rP      s    c             C   s   dj |dfS )Nz<a href="{}">z</a>)rc   )re   rf   r%   r%   r&   rP      s    c             C   s   dj t| jdfS )Nz<a href="{}">z</a>)rc   r   r5   )rd   re   r%   r%   r&   rP      s    c             C   s   dj | jdfS )Nz<a href="tg://user?id={}">z</a>)rc   Zuser_id)rd   re   r%   r%   r&   rP      s    )r   r   rO   c             C   sN  | s| S |st | S t|tr$|f}t| } g }x~t|D ]r\}}|j}|j|j }tjt	|d}|r:t
|r||| || }|j|||d f |j|| |d f q:W |jdd d t| }x`|r(|j \}	}
}xt| |	r|	d7 }	qW | d|	 | t | |	|  | |d  } |	}qW t | d| | |d  } t| S )a=  
    Performs the reverse operation to .parse(), effectively returning HTML
    given a normal text and its MessageEntity's.

    :param text: the text to be reconverted into HTML.
    :param entities: the MessageEntity's applied to the text.
    :return: a HTML representation of the combination of both inputs.
    Nr      c             S   s   | d | d fS )Nr   rg   r%   )rf   r%   r%   r&   rP      s    zunparse.<locals>.<lambda>)rQ   )r   
isinstancer   r	   	enumerater6   r7   ENTITY_TO_FORMATTERgettypecallablerH   rT   r:   rG   r   r
   )r   r   Z	insert_atr*   rD   r-   rd   	delimiterZnext_escape_boundZatre   whatr%   r%   r&   unparse   s6    	
,rp   N)rW   rX   )rY   rZ   )r[   r\   )r]   r^   )r_   r`   )ra   rb   )$__doc__collectionsr   rN   r   html.parserr   typingr   r   r   helpersr	   r
   r   r   tlr   Ztl.typesr   r   r   r   r   r   r   r   r   r   r   r   r   strrV   rj   rp   r%   r%   r%   r&   <module>   s*   8	\