add parsing policy as option to imbox constructor / parse_email; add test showing a case where you might set a policy

This commit is contained in:
Benson Tucker
2016-12-05 18:44:32 -05:00
parent 6a8f9cda54
commit 7e836fc81b
3 changed files with 44 additions and 5 deletions

View File

@@ -9,13 +9,14 @@ logger = logging.getLogger(__name__)
class Imbox(object):
def __init__(self, hostname, username=None, password=None, ssl=True,
port=None, ssl_context=None):
port=None, ssl_context=None, policy=None):
self.server = ImapTransport(hostname, ssl=ssl, port=port,
ssl_context=ssl_context)
self.hostname = hostname
self.username = username
self.password = password
self.parser_policy = policy
self.connection = self.server.connect(username, password)
logger.info("Connected to IMAP Server with user {username} on {hostname}{ssl}".format(
hostname=hostname, username=username, ssl=(" over SSL" if ssl else "")))
@@ -38,7 +39,7 @@ class Imbox(object):
logger.debug("Fetched message for UID {}".format(int(uid)))
raw_email = data[0][1]
email_object = parse_email(raw_email)
email_object = parse_email(raw_email, policy=self.parser_policy)
return email_object

View File

@@ -121,13 +121,18 @@ def decode_content(message):
return content
def parse_email(raw_email):
def parse_email(raw_email, policy=None):
if isinstance(raw_email, binary_type):
raw_email = str_encode(raw_email, 'utf-8')
if policy is not None:
email_parse_kwargs = dict(policy=policy)
else:
email_parse_kwargs = {}
try:
email_message = email.message_from_string(raw_email)
email_message = email.message_from_string(raw_email, **email_parse_kwargs)
except UnicodeEncodeError:
email_message = email.message_from_string(raw_email.encode('utf-8'))
email_message = email.message_from_string(raw_email.encode('utf-8'), **email_parse_kwargs)
maintype = email_message.get_content_maintype()
parsed_email = {}

View File

@@ -3,6 +3,9 @@ from __future__ import unicode_literals
import unittest
from imbox.parser import *
from email.policy import SMTP
raw_email = """Delivered-To: johndoe@gmail.com
X-Originating-Email: [martin@amon.cx]
Message-ID: <test0@example.com>
@@ -55,6 +58,25 @@ Content-Transfer-Encoding: quoted-printable
------=_Part_1295_1644105626.1458989730614--
"""
raw_email_encoded_needs_refolding = b"""Delivered-To: receiver@example.com
Return-Path: <sender@example.com>
Date: Sat, 26 Mar 2016 13:55:30 +0300 (FET)
From: sender@example.com
To: "Receiver" <receiver@example.com>, "Second\r\n Receiver" <recipient@example.com>
Message-ID: <811170233.1296.1345983710614.JavaMail.bris@BRIS-AS-NEW.site>
Subject: =?ISO-8859-5?B?suvf2OHa0CDf3iDa0ODi1Q==?=
MIME-Version: 1.0
Content-Type: multipart/mixed;
boundary="----=_Part_1295_1644105626.1458989730614"
------=_Part_1295_1644105626.1458989730614
Content-Type: text/html; charset=ISO-8859-5
Content-Transfer-Encoding: quoted-printable
=B2=EB=DF=D8=E1=DA=D0 =DF=DE =DA=D0=E0=E2=D5 1234
------=_Part_1295_1644105626.1458989730614--
"""
class TestParser(unittest.TestCase):
@@ -92,3 +114,14 @@ class TestParser(unittest.TestCase):
from_message_object = email.message_from_string("From: John Smith <johnsmith@gmail.com>")
self.assertEqual([{'email': 'johnsmith@gmail.com', 'name': 'John Smith'}], get_mail_addresses(from_message_object, 'from'))
def test_parse_email_with_policy(self):
message_object = email.message_from_bytes(
raw_email_encoded_needs_refolding,
policy=SMTP.clone(refold_source='all')
)
self.assertEqual([
{'email': 'receiver@example.com', 'name': 'Receiver'},
{'email': 'recipient@example.com', 'name': 'Second Receiver'}
], get_mail_addresses(message_object, 'to'))