Fix UnicecodeDecodeError parsing email
This commit is contained in:
@@ -123,7 +123,7 @@ def decode_content(message):
|
|||||||
|
|
||||||
def parse_email(raw_email, policy=None):
|
def parse_email(raw_email, policy=None):
|
||||||
if isinstance(raw_email, binary_type):
|
if isinstance(raw_email, binary_type):
|
||||||
raw_email = str_encode(raw_email, 'utf-8')
|
raw_email = str_encode(raw_email, 'utf-8', errors='ignore')
|
||||||
if policy is not None:
|
if policy is not None:
|
||||||
email_parse_kwargs = dict(policy=policy)
|
email_parse_kwargs = dict(policy=policy)
|
||||||
else:
|
else:
|
||||||
|
|||||||
22
tests/8422.msg
Normal file
22
tests/8422.msg
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
Delivered-To: receiver@example.com
|
||||||
|
Return-Path: <sender@example.com>
|
||||||
|
Date: Thu, 20 Jul 2017 07:34:22 -0500
|
||||||
|
Message-ID: <59705CFE.A95F.0016.0@journeys.com>
|
||||||
|
Subject: Following up Re: Looking to connect, let's schedule a call!
|
||||||
|
From: sender@example.com
|
||||||
|
To: "Receiver" <receiver@example.com>
|
||||||
|
Mime-Version: 1.0
|
||||||
|
Content-Type: multipart/mixed; boundary="=__PartBD85995F.0__="
|
||||||
|
|
||||||
|
This is a MIME message. If you are reading this text, you may want to
|
||||||
|
consider changing to a mail reader or gateway that understands how to
|
||||||
|
properly handle MIME multipart messages.
|
||||||
|
|
||||||
|
--=__PartBD85995F.0__=
|
||||||
|
Content-Type: multipart/alternative; boundary="=__PartBD85995F.1__="
|
||||||
|
|
||||||
|
--=__PartBD85995F.1__=
|
||||||
|
Content-Type: text/plain; charset=Windows-1252
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
Following up on my previous message. I’d love to connect you with
|
||||||
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
|||||||
import unittest
|
import unittest
|
||||||
from imbox.parser import *
|
from imbox.parser import *
|
||||||
|
|
||||||
|
import os
|
||||||
import sys
|
import sys
|
||||||
if sys.version_info.major < 3 or sys.version_info.minor < 3:
|
if sys.version_info.major < 3 or sys.version_info.minor < 3:
|
||||||
SMTP = False
|
SMTP = False
|
||||||
@@ -10,6 +11,9 @@ else:
|
|||||||
from email.policy import SMTP
|
from email.policy import SMTP
|
||||||
|
|
||||||
|
|
||||||
|
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
|
|
||||||
raw_email = """Delivered-To: johndoe@gmail.com
|
raw_email = """Delivered-To: johndoe@gmail.com
|
||||||
X-Originating-Email: [martin@amon.cx]
|
X-Originating-Email: [martin@amon.cx]
|
||||||
Message-ID: <test0@example.com>
|
Message-ID: <test0@example.com>
|
||||||
@@ -98,6 +102,10 @@ class TestParser(unittest.TestCase):
|
|||||||
self.assertEqual('Выписка по карте', parsed_email.subject)
|
self.assertEqual('Выписка по карте', parsed_email.subject)
|
||||||
self.assertEqual('Выписка по карте 1234', parsed_email.body['html'][0])
|
self.assertEqual('Выписка по карте 1234', parsed_email.body['html'][0])
|
||||||
|
|
||||||
|
def test_parse_email_invalid_unicode(self):
|
||||||
|
parsed_email = parse_email(open(os.path.join(TEST_DIR, '8422.msg'), 'rb').read())
|
||||||
|
self.assertEqual("Following up Re: Looking to connect, let's schedule a call!", parsed_email.subject)
|
||||||
|
|
||||||
def test_parse_email_ignores_header_casing(self):
|
def test_parse_email_ignores_header_casing(self):
|
||||||
self.assertEqual('one', parse_email('Message-ID: one').message_id)
|
self.assertEqual('one', parse_email('Message-ID: one').message_id)
|
||||||
self.assertEqual('one', parse_email('Message-Id: one').message_id)
|
self.assertEqual('one', parse_email('Message-Id: one').message_id)
|
||||||
|
|||||||
Reference in New Issue
Block a user