diff --git a/talon/html_quotations.py b/talon/html_quotations.py index a2db32d5..d440f8f4 100644 --- a/talon/html_quotations.py +++ b/talon/html_quotations.py @@ -85,6 +85,14 @@ def cut_gmail_quote(html_message): return True +def cut_yahoo_quote(html_message): + ''' Cuts the outermost block element with class yahoo_quoted. ''' + yahoo_quote = cssselect('div.yahoo_quoted', html_message) + if yahoo_quote and (yahoo_quote[0].text is None or not RE_FWD.match(yahoo_quote[0].text)): + yahoo_quote[0].getparent().remove(yahoo_quote[0]) + return True + + def cut_microsoft_quote(html_message): ''' Cuts splitter block and all following blocks. ''' #use EXSLT extensions to have a regex match() function with lxml diff --git a/talon/quotations.py b/talon/quotations.py index b244c6c0..0f11633d 100644 --- a/talon/quotations.py +++ b/talon/quotations.py @@ -446,6 +446,7 @@ def extract_from_html_tree(html_tree): then deleting necessary tags. """ cut_quotations = (html_quotations.cut_gmail_quote(html_tree) or + html_quotations.cut_yahoo_quote(html_tree) or html_quotations.cut_zimbra_quote(html_tree) or html_quotations.cut_blockquote(html_tree) or html_quotations.cut_microsoft_quote(html_tree) or diff --git a/tests/fixtures/html_replies/yahoo.html b/tests/fixtures/html_replies/yahoo.html new file mode 100644 index 00000000..c7b72d87 --- /dev/null +++ b/tests/fixtures/html_replies/yahoo.html @@ -0,0 +1 @@ +