add parsing policy as option to imbox constructor / parse_email; add test showing a case where you might set a policy
This commit is contained in:
@@ -9,13 +9,14 @@ logger = logging.getLogger(__name__)
|
||||
class Imbox(object):
|
||||
|
||||
def __init__(self, hostname, username=None, password=None, ssl=True,
|
||||
port=None, ssl_context=None):
|
||||
port=None, ssl_context=None, policy=None):
|
||||
|
||||
self.server = ImapTransport(hostname, ssl=ssl, port=port,
|
||||
ssl_context=ssl_context)
|
||||
self.hostname = hostname
|
||||
self.username = username
|
||||
self.password = password
|
||||
self.parser_policy = policy
|
||||
self.connection = self.server.connect(username, password)
|
||||
logger.info("Connected to IMAP Server with user {username} on {hostname}{ssl}".format(
|
||||
hostname=hostname, username=username, ssl=(" over SSL" if ssl else "")))
|
||||
@@ -38,7 +39,7 @@ class Imbox(object):
|
||||
logger.debug("Fetched message for UID {}".format(int(uid)))
|
||||
raw_email = data[0][1]
|
||||
|
||||
email_object = parse_email(raw_email)
|
||||
email_object = parse_email(raw_email, policy=self.parser_policy)
|
||||
|
||||
return email_object
|
||||
|
||||
|
||||
@@ -121,13 +121,18 @@ def decode_content(message):
|
||||
return content
|
||||
|
||||
|
||||
def parse_email(raw_email):
|
||||
def parse_email(raw_email, policy=None):
|
||||
if isinstance(raw_email, binary_type):
|
||||
raw_email = str_encode(raw_email, 'utf-8')
|
||||
if policy is not None:
|
||||
email_parse_kwargs = dict(policy=policy)
|
||||
else:
|
||||
email_parse_kwargs = {}
|
||||
|
||||
try:
|
||||
email_message = email.message_from_string(raw_email)
|
||||
email_message = email.message_from_string(raw_email, **email_parse_kwargs)
|
||||
except UnicodeEncodeError:
|
||||
email_message = email.message_from_string(raw_email.encode('utf-8'))
|
||||
email_message = email.message_from_string(raw_email.encode('utf-8'), **email_parse_kwargs)
|
||||
maintype = email_message.get_content_maintype()
|
||||
parsed_email = {}
|
||||
|
||||
|
||||
@@ -3,6 +3,9 @@ from __future__ import unicode_literals
|
||||
import unittest
|
||||
from imbox.parser import *
|
||||
|
||||
from email.policy import SMTP
|
||||
|
||||
|
||||
raw_email = """Delivered-To: johndoe@gmail.com
|
||||
X-Originating-Email: [martin@amon.cx]
|
||||
Message-ID: <test0@example.com>
|
||||
@@ -55,6 +58,25 @@ Content-Transfer-Encoding: quoted-printable
|
||||
------=_Part_1295_1644105626.1458989730614--
|
||||
"""
|
||||
|
||||
raw_email_encoded_needs_refolding = b"""Delivered-To: receiver@example.com
|
||||
Return-Path: <sender@example.com>
|
||||
Date: Sat, 26 Mar 2016 13:55:30 +0300 (FET)
|
||||
From: sender@example.com
|
||||
To: "Receiver" <receiver@example.com>, "Second\r\n Receiver" <recipient@example.com>
|
||||
Message-ID: <811170233.1296.1345983710614.JavaMail.bris@BRIS-AS-NEW.site>
|
||||
Subject: =?ISO-8859-5?B?suvf2OHa0CDf3iDa0ODi1Q==?=
|
||||
MIME-Version: 1.0
|
||||
Content-Type: multipart/mixed;
|
||||
boundary="----=_Part_1295_1644105626.1458989730614"
|
||||
|
||||
------=_Part_1295_1644105626.1458989730614
|
||||
Content-Type: text/html; charset=ISO-8859-5
|
||||
Content-Transfer-Encoding: quoted-printable
|
||||
|
||||
=B2=EB=DF=D8=E1=DA=D0 =DF=DE =DA=D0=E0=E2=D5 1234
|
||||
------=_Part_1295_1644105626.1458989730614--
|
||||
"""
|
||||
|
||||
|
||||
class TestParser(unittest.TestCase):
|
||||
|
||||
@@ -92,3 +114,14 @@ class TestParser(unittest.TestCase):
|
||||
|
||||
from_message_object = email.message_from_string("From: John Smith <johnsmith@gmail.com>")
|
||||
self.assertEqual([{'email': 'johnsmith@gmail.com', 'name': 'John Smith'}], get_mail_addresses(from_message_object, 'from'))
|
||||
|
||||
def test_parse_email_with_policy(self):
|
||||
message_object = email.message_from_bytes(
|
||||
raw_email_encoded_needs_refolding,
|
||||
policy=SMTP.clone(refold_source='all')
|
||||
)
|
||||
|
||||
self.assertEqual([
|
||||
{'email': 'receiver@example.com', 'name': 'Receiver'},
|
||||
{'email': 'recipient@example.com', 'name': 'Second Receiver'}
|
||||
], get_mail_addresses(message_object, 'to'))
|
||||
|
||||
Reference in New Issue
Block a user