From 79ce81aa9de527223775da66ae35fa6e57aae7ea Mon Sep 17 00:00:00 2001 From: Stephane Blondon Date: Mon, 19 Mar 2018 20:56:22 +0100 Subject: [PATCH 1/3] fix spelling error --- imbox/parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imbox/parser.py b/imbox/parser.py index 157f0f9..6e25a34 100644 --- a/imbox/parser.py +++ b/imbox/parser.py @@ -54,7 +54,7 @@ def get_mail_addresses(message, header_name): for index, (address_name, address_email) in enumerate(addresses): addresses[index] = {'name': decode_mail_header(address_name), 'email': address_email} - logger.debug("{} Mail addressees in message: <{}> {}".format(header_name.upper(), address_name, address_email)) + logger.debug("{} Mail address in message: <{}> {}".format(header_name.upper(), address_name, address_email)) return addresses From cbb46ef07892da44361953d1665f6ae8de97df45 Mon Sep 17 00:00:00 2001 From: Stephane Blondon Date: Wed, 21 Mar 2018 18:24:59 +0100 Subject: [PATCH 2/3] fix decoding of sender e-mail if badly encoded --- imbox/parser.py | 2 +- tests/parser_tests.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/imbox/parser.py b/imbox/parser.py index 6e25a34..9ad1314 100644 --- a/imbox/parser.py +++ b/imbox/parser.py @@ -33,7 +33,7 @@ def decode_mail_header(value, default_charset='us-ascii'): return str_decode(str_encode(value, default_charset, 'replace'), default_charset) else: for index, (text, charset) in enumerate(headers): - logger.debug("Mail header no. {}: {} encoding {}".format(index, str_decode(text, charset or 'utf-8'), charset)) + logger.debug("Mail header no. {}: {} encoding {}".format(index, str_decode(text, charset or 'utf-8', 'replace'), charset)) try: headers[index] = str_decode(text, charset or default_charset, 'replace') diff --git a/tests/parser_tests.py b/tests/parser_tests.py index 46baa70..73f8de0 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -336,6 +336,9 @@ class TestParser(unittest.TestCase): from_message_object = email.message_from_string("From: John Smith ") self.assertEqual([{'email': 'johnsmith@gmail.com', 'name': 'John Smith'}], get_mail_addresses(from_message_object, 'from')) + invalid_encoding_in_from_message_object = email.message_from_string("From: =?UTF-8?Q?C=E4cilia?= ") + self.assertEqual([{'email': 'caciliahxg827m@example.org', 'name': 'C�cilia'}], get_mail_addresses(invalid_encoding_in_from_message_object, 'from')) + def test_parse_email_with_policy(self): if not SMTP: return From ecb62b585cf280e8b79591df8549704c28e1b754 Mon Sep 17 00:00:00 2001 From: Stephane Blondon Date: Wed, 21 Mar 2018 18:31:08 +0100 Subject: [PATCH 3/3] refactoring: string formatting more readable --- imbox/parser.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/imbox/parser.py b/imbox/parser.py index 9ad1314..58e10bf 100644 --- a/imbox/parser.py +++ b/imbox/parser.py @@ -33,7 +33,10 @@ def decode_mail_header(value, default_charset='us-ascii'): return str_decode(str_encode(value, default_charset, 'replace'), default_charset) else: for index, (text, charset) in enumerate(headers): - logger.debug("Mail header no. {}: {} encoding {}".format(index, str_decode(text, charset or 'utf-8', 'replace'), charset)) + logger.debug("Mail header no. {index}: {data} encoding {charset}".format( + index=index, + data=str_decode(text, charset or 'utf-8', 'replace'), + charset=charset)) try: headers[index] = str_decode(text, charset or default_charset, 'replace')