Fix UnicecodeDecodeError parsing email
This commit is contained in:
@@ -123,7 +123,7 @@ def decode_content(message):
|
||||
|
||||
def parse_email(raw_email, policy=None):
|
||||
if isinstance(raw_email, binary_type):
|
||||
raw_email = str_encode(raw_email, 'utf-8')
|
||||
raw_email = str_encode(raw_email, 'utf-8', errors='ignore')
|
||||
if policy is not None:
|
||||
email_parse_kwargs = dict(policy=policy)
|
||||
else:
|
||||
|
||||
22
tests/8422.msg
Normal file
22
tests/8422.msg
Normal file
@@ -0,0 +1,22 @@
|
||||
Delivered-To: receiver@example.com
|
||||
Return-Path: <sender@example.com>
|
||||
Date: Thu, 20 Jul 2017 07:34:22 -0500
|
||||
Message-ID: <59705CFE.A95F.0016.0@journeys.com>
|
||||
Subject: Following up Re: Looking to connect, let's schedule a call!
|
||||
From: sender@example.com
|
||||
To: "Receiver" <receiver@example.com>
|
||||
Mime-Version: 1.0
|
||||
Content-Type: multipart/mixed; boundary="=__PartBD85995F.0__="
|
||||
|
||||
This is a MIME message. If you are reading this text, you may want to
|
||||
consider changing to a mail reader or gateway that understands how to
|
||||
properly handle MIME multipart messages.
|
||||
|
||||
--=__PartBD85995F.0__=
|
||||
Content-Type: multipart/alternative; boundary="=__PartBD85995F.1__="
|
||||
|
||||
--=__PartBD85995F.1__=
|
||||
Content-Type: text/plain; charset=Windows-1252
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Following up on my previous message. I’d love to connect you with
|
||||
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
import unittest
|
||||
from imbox.parser import *
|
||||
|
||||
import os
|
||||
import sys
|
||||
if sys.version_info.major < 3 or sys.version_info.minor < 3:
|
||||
SMTP = False
|
||||
@@ -10,6 +11,9 @@ else:
|
||||
from email.policy import SMTP
|
||||
|
||||
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
raw_email = """Delivered-To: johndoe@gmail.com
|
||||
X-Originating-Email: [martin@amon.cx]
|
||||
Message-ID: <test0@example.com>
|
||||
@@ -98,6 +102,10 @@ class TestParser(unittest.TestCase):
|
||||
self.assertEqual('Выписка по карте', parsed_email.subject)
|
||||
self.assertEqual('Выписка по карте 1234', parsed_email.body['html'][0])
|
||||
|
||||
def test_parse_email_invalid_unicode(self):
|
||||
parsed_email = parse_email(open(os.path.join(TEST_DIR, '8422.msg'), 'rb').read())
|
||||
self.assertEqual("Following up Re: Looking to connect, let's schedule a call!", parsed_email.subject)
|
||||
|
||||
def test_parse_email_ignores_header_casing(self):
|
||||
self.assertEqual('one', parse_email('Message-ID: one').message_id)
|
||||
self.assertEqual('one', parse_email('Message-Id: one').message_id)
|
||||
|
||||
Reference in New Issue
Block a user