Attachments support, Code cleanup

This commit is contained in:
Martin Rusev
2013-07-30 13:48:37 +03:00
parent 161d26c72a
commit 3ea4bd4bf6
4 changed files with 62 additions and 26 deletions

View File

@@ -43,7 +43,9 @@ Usage
message.headers
message.message-id
message.date
message.text_body
message.body.plain
message.body.html
message.attachments
# To check all available keys
print message.keys()
@@ -63,7 +65,16 @@ Usage
'Name': 'MIME-Version',
'Value': '1.0'
}],
'text_body': ['ASCII'],
'body': {
'plain: ['ASCII'],
'html': ['HTML BODY']
},
'attachments': [{
'content': <StringIO.StringIO instance at 0x7f8e8445fa70>,
'filename': "avatar.png",
'content-type': 'image/png',
'size': 80264
}],
'date': u 'Fri, 26 Jul 2013 10:56:26 +0300',
'message-id': u '51F22BAA.1040606',
'sent_from': [{
@@ -77,12 +88,5 @@ Usage
'subject': u 'Hello John, How are you today'
}
TODO
======
- Replace the dictionaries with objects and generators
- Add logging and exceptions
- Implement smarter header parsing

View File

@@ -1,4 +1,6 @@
import email
import re
import StringIO
from mailbox.imap import ImapTransport
from mailbox.parser import get_mail_addresses, decode_mail_header
@@ -21,35 +23,64 @@ class MailBox(object):
server = ImapTransport(hostname, ssl=ssl)
self.connection = server.connect(username, password)
def parse_attachment(self, message_part):
content_disposition = message_part.get("Content-Disposition", None) # Check again if this is a valid attachment
if content_disposition != None:
dispositions = content_disposition.strip().split(";")
if dispositions[0].lower() == "attachment":
file_data = message_part.get_payload(decode=True)
attachment = {
'content-type': message_part.get_content_type(),
'size': len(file_data),
'content': StringIO.StringIO(file_data)
}
for param in dispositions[1:]:
name,value = param.split("=")
name = name.lower()
if 'file' in name:
attachment['filename'] = value
if 'create-date' in name:
attachment['create-date'] = value
return attachment
return None
def parse_email(self, raw_email):
email_message = email.message_from_string(raw_email)
maintype = email_message.get_content_maintype()
parsed_email = {}
body = {}
plain = []
html = []
body = {
"plain": [],
"html": []
}
attachments = []
if maintype == 'multipart':
for part in email_message.walk():
content = part.get_payload(decode=True)
content_type = part.get_content_type()
content_disposition = part.get('Content-Disposition')
if content_type == "text/plain" and not content_disposition:
plain.append(content)
elif content_type == "text/html" and not content_disposition:
html.append(content)
elif content_disposition:
attachments.append(part.get_filename())
elif maintype == 'text':
plain.append(email_message.get_payload(decode=True))
content_disposition = part.get('Content-Disposition', None)
if plain:
body['plain'] = plain
elif html:
body['html'] = html
if attachments:
if content_type == "text/plain" and content_disposition == None:
body['plain'].append(content)
elif content_type == "text/html" and content_disposition == None:
body['html'].append(content)
elif content_disposition:
attachments.append(self.parse_attachment(part))
elif maintype == 'text':
body['plain'].append(email_message.get_payload(decode=True))
if len(attachments) > 0:
parsed_email['attachments'] = attachments
parsed_email['body'] = body

View File

@@ -1,11 +1,12 @@
import email
from email.header import decode_header
def decode_mail_header(value, default_charset='us-ascii'):
"""
Decode a header value into a unicode string.
"""
try:
headers=email.header.decode_header(value)
headers=decode_header(value)
except email.errors.HeaderParseError:
return value.encode(default_charset, 'replace').decode(default_charset)
else:

View File

@@ -1,7 +1,7 @@
from setuptools import setup
import os
version = '0.4'
version = '0.5'
def read(filename):