From 87be2486def7d989bfe5a8de5a592daaae02f9d2 Mon Sep 17 00:00:00 2001 From: David Blado Date: Tue, 4 Jun 2019 19:18:54 -0700 Subject: [PATCH] add support for yahoo mail --- talon/html_quotations.py | 8 ++++++++ talon/quotations.py | 1 + tests/fixtures/html_replies/yahoo.html | 1 + tests/html_quotations_test.py | 25 +++++++++++++++++++++++++ 4 files changed, 35 insertions(+) create mode 100644 tests/fixtures/html_replies/yahoo.html diff --git a/talon/html_quotations.py b/talon/html_quotations.py index a2db32d5..d440f8f4 100644 --- a/talon/html_quotations.py +++ b/talon/html_quotations.py @@ -85,6 +85,14 @@ def cut_gmail_quote(html_message): return True +def cut_yahoo_quote(html_message): + ''' Cuts the outermost block element with class yahoo_quoted. ''' + yahoo_quote = cssselect('div.yahoo_quoted', html_message) + if yahoo_quote and (yahoo_quote[0].text is None or not RE_FWD.match(yahoo_quote[0].text)): + yahoo_quote[0].getparent().remove(yahoo_quote[0]) + return True + + def cut_microsoft_quote(html_message): ''' Cuts splitter block and all following blocks. ''' #use EXSLT extensions to have a regex match() function with lxml diff --git a/talon/quotations.py b/talon/quotations.py index 99eec956..c807a916 100644 --- a/talon/quotations.py +++ b/talon/quotations.py @@ -465,6 +465,7 @@ def _extract_from_html(msg_body): return msg_body cut_quotations = (html_quotations.cut_gmail_quote(html_tree) or + html_quotations.cut_yahoo_quote(html_tree) or html_quotations.cut_zimbra_quote(html_tree) or html_quotations.cut_blockquote(html_tree) or html_quotations.cut_microsoft_quote(html_tree) or diff --git a/tests/fixtures/html_replies/yahoo.html b/tests/fixtures/html_replies/yahoo.html new file mode 100644 index 00000000..c7b72d87 --- /dev/null +++ b/tests/fixtures/html_replies/yahoo.html @@ -0,0 +1 @@ +
Hi. I am fine.

Thanks,
Alex



On Tuesday, June 4, 2019, 5:41:43 PM PDT, John Smith <jsmith@example.com> wrote:


Hello! How are you?
diff --git a/tests/html_quotations_test.py b/tests/html_quotations_test.py index 541d3583..4fc852c2 100644 --- a/tests/html_quotations_test.py +++ b/tests/html_quotations_test.py @@ -157,6 +157,27 @@ def test_gmail_quote_blockquote(): RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body))) +def test_yahoo_quote(): + msg_body = """Reply +
+
+
+ On Tuesday, June 4, 2019, 5:41:43 PM PDT, John Smith <jsmith@example.com> wrote: +
+

+

+
+
+
Test
+
+
+
+
+""" + eq_("Reply", + RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body))) + + def test_unicode_in_reply(): msg_body = u"""Reply \xa0 \xa0 Text
@@ -324,6 +345,10 @@ def test_gmail_reply(): extract_reply_and_check("tests/fixtures/html_replies/gmail.html") +def test_yahoo_reply(): + extract_reply_and_check("tests/fixtures/html_replies/yahoo.html") + + def test_mail_ru_reply(): extract_reply_and_check("tests/fixtures/html_replies/mail_ru.html")