From 3ea4bd4bf6769215beced350c0865818b05cb15d Mon Sep 17 00:00:00 2001 From: Martin Rusev Date: Tue, 30 Jul 2013 13:48:37 +0300 Subject: [PATCH] Attachments support, Code cleanup --- README.md | 22 +++++++++------- mailbox/__init__.py | 61 ++++++++++++++++++++++++++++++++++----------- mailbox/parser.py | 3 ++- setup.py | 2 +- 4 files changed, 62 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 9641bb9..1c14b5a 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,9 @@ Usage message.headers message.message-id message.date - message.text_body + message.body.plain + message.body.html + message.attachments # To check all available keys print message.keys() @@ -63,7 +65,16 @@ Usage 'Name': 'MIME-Version', 'Value': '1.0' }], - 'text_body': ['ASCII'], + 'body': { + 'plain: ['ASCII'], + 'html': ['HTML BODY'] + }, + 'attachments': [{ + 'content': , + 'filename': "avatar.png", + 'content-type': 'image/png', + 'size': 80264 + }], 'date': u 'Fri, 26 Jul 2013 10:56:26 +0300', 'message-id': u '51F22BAA.1040606', 'sent_from': [{ @@ -77,12 +88,5 @@ Usage 'subject': u 'Hello John, How are you today' } -TODO -====== - -- Replace the dictionaries with objects and generators -- Add logging and exceptions -- Implement smarter header parsing - diff --git a/mailbox/__init__.py b/mailbox/__init__.py index 645c11a..74a03d0 100644 --- a/mailbox/__init__.py +++ b/mailbox/__init__.py @@ -1,4 +1,6 @@ import email +import re +import StringIO from mailbox.imap import ImapTransport from mailbox.parser import get_mail_addresses, decode_mail_header @@ -21,35 +23,64 @@ class MailBox(object): server = ImapTransport(hostname, ssl=ssl) self.connection = server.connect(username, password) + def parse_attachment(self, message_part): + content_disposition = message_part.get("Content-Disposition", None) # Check again if this is a valid attachment + if content_disposition != None: + dispositions = content_disposition.strip().split(";") + + if dispositions[0].lower() == "attachment": + file_data = message_part.get_payload(decode=True) + + attachment = { + 'content-type': message_part.get_content_type(), + 'size': len(file_data), + 'content': StringIO.StringIO(file_data) + } + + + for param in dispositions[1:]: + name,value = param.split("=") + name = name.lower() + + if 'file' in name: + attachment['filename'] = value + + if 'create-date' in name: + attachment['create-date'] = value + + return attachment + + return None + def parse_email(self, raw_email): email_message = email.message_from_string(raw_email) maintype = email_message.get_content_maintype() parsed_email = {} - body = {} - plain = [] - html = [] + + body = { + "plain": [], + "html": [] + } attachments = [] if maintype == 'multipart': for part in email_message.walk(): content = part.get_payload(decode=True) content_type = part.get_content_type() - content_disposition = part.get('Content-Disposition') - if content_type == "text/plain" and not content_disposition: - plain.append(content) - elif content_type == "text/html" and not content_disposition: - html.append(content) + content_disposition = part.get('Content-Disposition', None) + + if content_type == "text/plain" and content_disposition == None: + body['plain'].append(content) + elif content_type == "text/html" and content_disposition == None: + body['html'].append(content) elif content_disposition: - attachments.append(part.get_filename()) + attachments.append(self.parse_attachment(part)) + elif maintype == 'text': - plain.append(email_message.get_payload(decode=True)) + body['plain'].append(email_message.get_payload(decode=True)) - if plain: - body['plain'] = plain - elif html: - body['html'] = html - if attachments: + if len(attachments) > 0: parsed_email['attachments'] = attachments parsed_email['body'] = body diff --git a/mailbox/parser.py b/mailbox/parser.py index 2c331dc..159f3b7 100644 --- a/mailbox/parser.py +++ b/mailbox/parser.py @@ -1,11 +1,12 @@ import email +from email.header import decode_header def decode_mail_header(value, default_charset='us-ascii'): """ Decode a header value into a unicode string. """ try: - headers=email.header.decode_header(value) + headers=decode_header(value) except email.errors.HeaderParseError: return value.encode(default_charset, 'replace').decode(default_charset) else: diff --git a/setup.py b/setup.py index f08e1c8..0d384d4 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup import os -version = '0.4' +version = '0.5' def read(filename):