0.4, parser updates, docs updated

This commit is contained in:
Martin Rusev
2013-07-26 15:17:03 +03:00
parent 73e6f2fd94
commit fa13f3a2d2
6 changed files with 134 additions and 83 deletions

View File

@@ -26,42 +26,56 @@ Usage
password='password', password='password',
ssl=True) ssl=True)
# Gets all unread messages
unread_messages = mailbox.get_unread()
# Gets all messages # Gets all messages
all_messages = mailbox.get_all() all_messages = mailbox.messages()
# Unread messages
unread_messages = mailbox.messages(unread=True)
for message in all_messages: for message in all_messages:
........ ........
# Every message is an object with the following keys
# Every message is converted to a dictionary with the following keys: message.sent_from
message.sent_to
message.subject
message.headers
message.message-id
message.date
message.text_body
{ # To check all available keys
'MesssageID': '22c74902-a0c1-4511-804f2-341342852c90', print message.keys()
'From': {
'Name': 'John Doe',
'Email': 'jonhdoe@email.com'
},
'To': {
'Name': 'Martin Rusev',
'Email': 'martin@amon.cx'
},
'Date': 'Mon, 22 Jul 2013 23:21:39 +0000 (UTC)',
'TextBody': ['ASCII'],
'Subject': 'This is a message'
'Headers': [{
'Name': 'Received-SPF', # To check the whole object, just write
'Value': 'pass (google.com: domain of bounces+......;'
}, { print message
'Name': 'MIME-Version',
'Value': '1.0' {
'headers':
[{
'Name': 'Received-SPF',
'Value': 'pass (google.com: domain of martinrusev@zoho.com designates 72.5.230.95 as permitted sender) client-ip=72.5.230.95;'
},
{
'Name': 'MIME-Version',
'Value': '1.0'
}],
'text_body': ['ASCII'],
'date': u 'Fri, 26 Jul 2013 10:56:26 +0300',
'message-id': u '51F22BAA.1040606',
'sent_from': [{
'name': u 'Martin Rusev',
'email': 'martin@amon.cx'
}], }],
'sent_to': [{
} 'name': u 'John Doe',
'email': 'john@gmail.com'
}],
'subject': u 'Hello John, How are you today'
}

View File

@@ -1,5 +1,18 @@
import email import email
from mailbox.imap import ImapTransport from mailbox.imap import ImapTransport
from mailbox.parser import get_mail_addresses, decode_mail_header
class Struct(object):
def __init__(self, **entries):
self.__dict__.update(entries)
def keys(self):
return self.__dict__.keys()
def __repr__(self):
return str(self.__dict__)
class MailBox(object): class MailBox(object):
@@ -10,10 +23,9 @@ class MailBox(object):
def parse_email(self, raw_email): def parse_email(self, raw_email):
parsed_email = {}
email_message = email.message_from_string(raw_email) email_message = email.message_from_string(raw_email)
maintype = email_message.get_content_maintype() maintype = email_message.get_content_maintype()
text_body = [] text_body = []
@@ -25,70 +37,61 @@ class MailBox(object):
elif maintype == 'text': elif maintype == 'text':
text_body.append(email_message.get_payload(decode=True)) text_body.append(email_message.get_payload(decode=True))
parsed_email['text_body'] = text_body
email_dict = dict(email_message.items()) email_dict = dict(email_message.items())
from_dict = {} parsed_email['sent_from'] = get_mail_addresses(email_message, 'from')
from_ = email.utils.parseaddr(email_dict['From']) parsed_email['sent_to'] = get_mail_addresses(email_message, 'to')
if len(from_) == 2:
from_dict = {'Name': from_[0], 'Email': from_[1]}
to_dict = {} value_headers_keys = ['Subject', 'Date','Message-ID']
to_ = email.utils.parseaddr(email_dict['To']) key_value_header_keys = ['Received-SPF',
if len(to_) == 2: 'MIME-Version',
to_dict = {'Name': to_[0], 'Email': to_[1]} 'X-Spam-Status',
'X-Spam-Score',
'Content-Type']
subject = email_dict.get('Subject', None) parsed_email['headers'] = []
date = email_dict.get('Date', None) for key, value in email_dict.iteritems():
message_id = email_dict.get('Message-ID', None)
# Get the headers if key in value_headers_keys:
headers = [] valid_key_name = key.lower()
headers_keys = ['Received-SPF', parsed_email[valid_key_name] = decode_mail_header(value)
'MIME-Version',
'X-Spam-Status',
'X-Spam-Score']
for key in headers_keys: if key in key_value_header_keys:
header_value = email_dict.get(key) parsed_email['headers'].append({'Name': key,
'Value': value})
if header_value: return Struct(**parsed_email)
headers.append({'Name': key,
'Value': header_value})
return {
'MesssageID': message_id,
'From': from_dict,
'To': to_dict,
'Subject': subject,
'Date': date,
'TextBody': text_body,
'Headers': headers
}
def fetch_by_uid(self, uid): def fetch_by_uid(self, uid):
message, data = self.connection.uid('fetch', uid, '(RFC822)') message, data = self.connection.uid('fetch', uid, '(BODY.PEEK[])') # Don't mark the messages as read
raw_email = data[0][1] raw_email = data[0][1]
email_metadata = self.parse_email(raw_email) email_object = self.parse_email(raw_email)
return email_metadata return email_object
def fetch_list(self, data): def fetch_list(self, data):
uid_list = data[0].split() uid_list = data[0].split()
messages_list = []
for uid in uid_list: for uid in uid_list:
messages_list.append(self.fetch_by_uid(uid)) yield self.fetch_by_uid(uid)
def messages(self, *args, **kwargs):
def get_all(self): query = "ALL"
message, data = self.connection.uid('search', None, "ALL")
# Parse keyword arguments
unread = kwargs.get('unread', False)
folder = kwargs.get('folder', False)
sent_from = kwargs.get('sent_from', False)
sent_to = kwargs.get('sent_to', False)
if unread != False:
query = "UNSEEN"
message, data = self.connection.uid('search', None, query)
return self.fetch_list(data) return self.fetch_list(data)
def get_unread(self):
message, data = self.connection.uid('search', None, "UNSEEN")
return self.fetch_list(data)

34
mailbox/parser.py Normal file
View File

@@ -0,0 +1,34 @@
import email
from email.header import decode_header
def decode_mail_header(value, default_charset='us-ascii'):
"""
Decode a header value into a unicode string.
"""
try:
headers=email.header.decode_header(value)
except email.errors.HeaderParseError:
return value.encode('us-ascii', 'replace').decode('us-ascii')
else:
for i, (text, charset) in enumerate(headers):
try:
headers[i]=text.decode(charset or 'us-ascii', 'replace')
except LookupError:
# if the charset is unknown, force default
headers[i]=text.decode(default_charset, 'replace')
return u"".join(headers)
def get_mail_addresses(message, header_name):
"""
retrieve all email addresses from one message header
"""
addresses = email.utils.getaddresses(h for h in message.get_all(header_name, []))
for i, (address_name, address) in enumerate(addresses):
addresses[i]={'name': decode_mail_header(address_name), 'email': address}
return addresses

View File

@@ -1,7 +1,7 @@
from setuptools import setup from setuptools import setup
import os import os
version = '0.3.3' version = '0.4'
def read(filename): def read(filename):

0
tests/__init__.py Normal file
View File

0
tests/parser_tests.py Normal file
View File