From 7e836fc81bd9527c6d421774111a71db2096451a Mon Sep 17 00:00:00 2001 From: Benson Tucker Date: Mon, 5 Dec 2016 18:44:32 -0500 Subject: [PATCH] add parsing policy as option to imbox constructor / parse_email; add test showing a case where you might set a policy --- imbox/__init__.py | 5 +++-- imbox/parser.py | 11 ++++++++--- tests/parser_tests.py | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/imbox/__init__.py b/imbox/__init__.py index 964a830..a8ddd04 100644 --- a/imbox/__init__.py +++ b/imbox/__init__.py @@ -9,13 +9,14 @@ logger = logging.getLogger(__name__) class Imbox(object): def __init__(self, hostname, username=None, password=None, ssl=True, - port=None, ssl_context=None): + port=None, ssl_context=None, policy=None): self.server = ImapTransport(hostname, ssl=ssl, port=port, ssl_context=ssl_context) self.hostname = hostname self.username = username self.password = password + self.parser_policy = policy self.connection = self.server.connect(username, password) logger.info("Connected to IMAP Server with user {username} on {hostname}{ssl}".format( hostname=hostname, username=username, ssl=(" over SSL" if ssl else ""))) @@ -38,7 +39,7 @@ class Imbox(object): logger.debug("Fetched message for UID {}".format(int(uid))) raw_email = data[0][1] - email_object = parse_email(raw_email) + email_object = parse_email(raw_email, policy=self.parser_policy) return email_object diff --git a/imbox/parser.py b/imbox/parser.py index 174ac69..1def6e5 100644 --- a/imbox/parser.py +++ b/imbox/parser.py @@ -121,13 +121,18 @@ def decode_content(message): return content -def parse_email(raw_email): +def parse_email(raw_email, policy=None): if isinstance(raw_email, binary_type): raw_email = str_encode(raw_email, 'utf-8') + if policy is not None: + email_parse_kwargs = dict(policy=policy) + else: + email_parse_kwargs = {} + try: - email_message = email.message_from_string(raw_email) + email_message = email.message_from_string(raw_email, **email_parse_kwargs) except UnicodeEncodeError: - email_message = email.message_from_string(raw_email.encode('utf-8')) + email_message = email.message_from_string(raw_email.encode('utf-8'), **email_parse_kwargs) maintype = email_message.get_content_maintype() parsed_email = {} diff --git a/tests/parser_tests.py b/tests/parser_tests.py index 04f6db6..742c77e 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -3,6 +3,9 @@ from __future__ import unicode_literals import unittest from imbox.parser import * +from email.policy import SMTP + + raw_email = """Delivered-To: johndoe@gmail.com X-Originating-Email: [martin@amon.cx] Message-ID: @@ -55,6 +58,25 @@ Content-Transfer-Encoding: quoted-printable ------=_Part_1295_1644105626.1458989730614-- """ +raw_email_encoded_needs_refolding = b"""Delivered-To: receiver@example.com +Return-Path: +Date: Sat, 26 Mar 2016 13:55:30 +0300 (FET) +From: sender@example.com +To: "Receiver" , "Second\r\n Receiver" +Message-ID: <811170233.1296.1345983710614.JavaMail.bris@BRIS-AS-NEW.site> +Subject: =?ISO-8859-5?B?suvf2OHa0CDf3iDa0ODi1Q==?= +MIME-Version: 1.0 +Content-Type: multipart/mixed; + boundary="----=_Part_1295_1644105626.1458989730614" + +------=_Part_1295_1644105626.1458989730614 +Content-Type: text/html; charset=ISO-8859-5 +Content-Transfer-Encoding: quoted-printable + +=B2=EB=DF=D8=E1=DA=D0 =DF=DE =DA=D0=E0=E2=D5 1234 +------=_Part_1295_1644105626.1458989730614-- +""" + class TestParser(unittest.TestCase): @@ -92,3 +114,14 @@ class TestParser(unittest.TestCase): from_message_object = email.message_from_string("From: John Smith ") self.assertEqual([{'email': 'johnsmith@gmail.com', 'name': 'John Smith'}], get_mail_addresses(from_message_object, 'from')) + + def test_parse_email_with_policy(self): + message_object = email.message_from_bytes( + raw_email_encoded_needs_refolding, + policy=SMTP.clone(refold_source='all') + ) + + self.assertEqual([ + {'email': 'receiver@example.com', 'name': 'Receiver'}, + {'email': 'recipient@example.com', 'name': 'Second Receiver'} + ], get_mail_addresses(message_object, 'to'))