Merge pull request #96 from GetHappie/unicode-decode-fix

Fix UnicecodeDecodeError parsing email
This commit is contained in:
Martin Rusev
2017-09-28 17:11:13 +02:00
committed by GitHub
3 changed files with 31 additions and 1 deletions

View File

@@ -121,7 +121,7 @@ def decode_content(message):
def parse_email(raw_email, policy=None):
if isinstance(raw_email, bytes):
raw_email = str_encode(raw_email, 'utf-8')
raw_email = str_encode(raw_email, 'utf-8', errors='ignore')
if policy is not None:
email_parse_kwargs = dict(policy=policy)
else:

22
tests/8422.msg Normal file
View File

@@ -0,0 +1,22 @@
Delivered-To: receiver@example.com
Return-Path: <sender@example.com>
Date: Thu, 20 Jul 2017 07:34:22 -0500
Message-ID: <59705CFE.A95F.0016.0@journeys.com>
Subject: Following up Re: Looking to connect, let's schedule a call!
From: sender@example.com
To: "Receiver" <receiver@example.com>
Mime-Version: 1.0
Content-Type: multipart/mixed; boundary="=__PartBD85995F.0__="
This is a MIME message. If you are reading this text, you may want to
consider changing to a mail reader or gateway that understands how to
properly handle MIME multipart messages.
--=__PartBD85995F.0__=
Content-Type: multipart/alternative; boundary="=__PartBD85995F.1__="
--=__PartBD85995F.1__=
Content-Type: text/plain; charset=Windows-1252
Content-Transfer-Encoding: 8bit
Following up on my previous message. Id love to connect you with

View File

@@ -1,6 +1,7 @@
import unittest
from imbox.parser import *
import os
import sys
if sys.version_info.minor < 3:
SMTP = False
@@ -8,6 +9,9 @@ else:
from email.policy import SMTP
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
raw_email = """Delivered-To: johndoe@gmail.com
X-Originating-Email: [martin@amon.cx]
Message-ID: <test0@example.com>
@@ -192,6 +196,10 @@ class TestParser(unittest.TestCase):
self.assertEqual('Выписка по карте', parsed_email.subject)
self.assertEqual('Выписка по карте 1234', parsed_email.body['html'][0])
def test_parse_email_invalid_unicode(self):
parsed_email = parse_email(open(os.path.join(TEST_DIR, '8422.msg'), 'rb').read())
self.assertEqual("Following up Re: Looking to connect, let's schedule a call!", parsed_email.subject)
def test_parse_email_inline_body(self):
parsed_email = parse_email(raw_email_encoded_another_bad_multipart)
self.assertEqual("Re: Reaching Out About Peoples Home Equity", parsed_email.subject)