0.4, parser updates, docs updated
This commit is contained in:
68
README.md
68
README.md
@@ -26,42 +26,56 @@ Usage
|
||||
password='password',
|
||||
ssl=True)
|
||||
|
||||
|
||||
# Gets all unread messages
|
||||
unread_messages = mailbox.get_unread()
|
||||
|
||||
# Gets all messages
|
||||
all_messages = mailbox.get_all()
|
||||
all_messages = mailbox.messages()
|
||||
|
||||
# Unread messages
|
||||
unread_messages = mailbox.messages(unread=True)
|
||||
|
||||
|
||||
for message in all_messages:
|
||||
........
|
||||
# Every message is an object with the following keys
|
||||
|
||||
# Every message is converted to a dictionary with the following keys:
|
||||
message.sent_from
|
||||
message.sent_to
|
||||
message.subject
|
||||
message.headers
|
||||
message.message-id
|
||||
message.date
|
||||
message.text_body
|
||||
|
||||
{
|
||||
'MesssageID': '22c74902-a0c1-4511-804f2-341342852c90',
|
||||
'From': {
|
||||
'Name': 'John Doe',
|
||||
'Email': 'jonhdoe@email.com'
|
||||
},
|
||||
'To': {
|
||||
'Name': 'Martin Rusev',
|
||||
'Email': 'martin@amon.cx'
|
||||
},
|
||||
'Date': 'Mon, 22 Jul 2013 23:21:39 +0000 (UTC)',
|
||||
'TextBody': ['ASCII'],
|
||||
'Subject': 'This is a message'
|
||||
# To check all available keys
|
||||
print message.keys()
|
||||
|
||||
'Headers': [{
|
||||
'Name': 'Received-SPF',
|
||||
'Value': 'pass (google.com: domain of bounces+......;'
|
||||
}, {
|
||||
'Name': 'MIME-Version',
|
||||
'Value': '1.0'
|
||||
|
||||
# To check the whole object, just write
|
||||
|
||||
print message
|
||||
|
||||
{
|
||||
'headers':
|
||||
[{
|
||||
'Name': 'Received-SPF',
|
||||
'Value': 'pass (google.com: domain of martinrusev@zoho.com designates 72.5.230.95 as permitted sender) client-ip=72.5.230.95;'
|
||||
},
|
||||
{
|
||||
'Name': 'MIME-Version',
|
||||
'Value': '1.0'
|
||||
}],
|
||||
'text_body': ['ASCII'],
|
||||
'date': u 'Fri, 26 Jul 2013 10:56:26 +0300',
|
||||
'message-id': u '51F22BAA.1040606',
|
||||
'sent_from': [{
|
||||
'name': u 'Martin Rusev',
|
||||
'email': 'martin@amon.cx'
|
||||
}],
|
||||
|
||||
}
|
||||
'sent_to': [{
|
||||
'name': u 'John Doe',
|
||||
'email': 'john@gmail.com'
|
||||
}],
|
||||
'subject': u 'Hello John, How are you today'
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,5 +1,18 @@
|
||||
import email
|
||||
from mailbox.imap import ImapTransport
|
||||
from mailbox.parser import get_mail_addresses, decode_mail_header
|
||||
|
||||
|
||||
class Struct(object):
|
||||
def __init__(self, **entries):
|
||||
self.__dict__.update(entries)
|
||||
|
||||
def keys(self):
|
||||
return self.__dict__.keys()
|
||||
|
||||
def __repr__(self):
|
||||
return str(self.__dict__)
|
||||
|
||||
|
||||
class MailBox(object):
|
||||
|
||||
@@ -10,10 +23,9 @@ class MailBox(object):
|
||||
|
||||
|
||||
def parse_email(self, raw_email):
|
||||
parsed_email = {}
|
||||
|
||||
email_message = email.message_from_string(raw_email)
|
||||
|
||||
|
||||
maintype = email_message.get_content_maintype()
|
||||
|
||||
text_body = []
|
||||
@@ -25,70 +37,61 @@ class MailBox(object):
|
||||
elif maintype == 'text':
|
||||
text_body.append(email_message.get_payload(decode=True))
|
||||
|
||||
parsed_email['text_body'] = text_body
|
||||
|
||||
email_dict = dict(email_message.items())
|
||||
|
||||
from_dict = {}
|
||||
from_ = email.utils.parseaddr(email_dict['From'])
|
||||
if len(from_) == 2:
|
||||
from_dict = {'Name': from_[0], 'Email': from_[1]}
|
||||
parsed_email['sent_from'] = get_mail_addresses(email_message, 'from')
|
||||
parsed_email['sent_to'] = get_mail_addresses(email_message, 'to')
|
||||
|
||||
to_dict = {}
|
||||
to_ = email.utils.parseaddr(email_dict['To'])
|
||||
if len(to_) == 2:
|
||||
to_dict = {'Name': to_[0], 'Email': to_[1]}
|
||||
value_headers_keys = ['Subject', 'Date','Message-ID']
|
||||
key_value_header_keys = ['Received-SPF',
|
||||
'MIME-Version',
|
||||
'X-Spam-Status',
|
||||
'X-Spam-Score',
|
||||
'Content-Type']
|
||||
|
||||
subject = email_dict.get('Subject', None)
|
||||
date = email_dict.get('Date', None)
|
||||
message_id = email_dict.get('Message-ID', None)
|
||||
parsed_email['headers'] = []
|
||||
for key, value in email_dict.iteritems():
|
||||
|
||||
# Get the headers
|
||||
headers = []
|
||||
headers_keys = ['Received-SPF',
|
||||
'MIME-Version',
|
||||
'X-Spam-Status',
|
||||
'X-Spam-Score']
|
||||
if key in value_headers_keys:
|
||||
valid_key_name = key.lower()
|
||||
parsed_email[valid_key_name] = decode_mail_header(value)
|
||||
|
||||
for key in headers_keys:
|
||||
header_value = email_dict.get(key)
|
||||
if key in key_value_header_keys:
|
||||
parsed_email['headers'].append({'Name': key,
|
||||
'Value': value})
|
||||
|
||||
if header_value:
|
||||
headers.append({'Name': key,
|
||||
'Value': header_value})
|
||||
|
||||
return {
|
||||
'MesssageID': message_id,
|
||||
'From': from_dict,
|
||||
'To': to_dict,
|
||||
'Subject': subject,
|
||||
'Date': date,
|
||||
'TextBody': text_body,
|
||||
'Headers': headers
|
||||
}
|
||||
return Struct(**parsed_email)
|
||||
|
||||
def fetch_by_uid(self, uid):
|
||||
message, data = self.connection.uid('fetch', uid, '(RFC822)')
|
||||
|
||||
message, data = self.connection.uid('fetch', uid, '(BODY.PEEK[])') # Don't mark the messages as read
|
||||
raw_email = data[0][1]
|
||||
|
||||
email_metadata = self.parse_email(raw_email)
|
||||
email_object = self.parse_email(raw_email)
|
||||
|
||||
return email_metadata
|
||||
return email_object
|
||||
|
||||
def fetch_list(self, data):
|
||||
uid_list = data[0].split()
|
||||
|
||||
messages_list = []
|
||||
for uid in uid_list:
|
||||
messages_list.append(self.fetch_by_uid(uid))
|
||||
yield self.fetch_by_uid(uid)
|
||||
|
||||
def messages(self, *args, **kwargs):
|
||||
|
||||
def get_all(self):
|
||||
message, data = self.connection.uid('search', None, "ALL")
|
||||
query = "ALL"
|
||||
|
||||
# Parse keyword arguments
|
||||
unread = kwargs.get('unread', False)
|
||||
folder = kwargs.get('folder', False)
|
||||
sent_from = kwargs.get('sent_from', False)
|
||||
sent_to = kwargs.get('sent_to', False)
|
||||
|
||||
if unread != False:
|
||||
query = "UNSEEN"
|
||||
|
||||
message, data = self.connection.uid('search', None, query)
|
||||
|
||||
return self.fetch_list(data)
|
||||
|
||||
|
||||
def get_unread(self):
|
||||
message, data = self.connection.uid('search', None, "UNSEEN")
|
||||
|
||||
return self.fetch_list(data)
|
||||
|
||||
34
mailbox/parser.py
Normal file
34
mailbox/parser.py
Normal file
@@ -0,0 +1,34 @@
|
||||
import email
|
||||
from email.header import decode_header
|
||||
|
||||
def decode_mail_header(value, default_charset='us-ascii'):
|
||||
"""
|
||||
Decode a header value into a unicode string.
|
||||
"""
|
||||
try:
|
||||
headers=email.header.decode_header(value)
|
||||
except email.errors.HeaderParseError:
|
||||
return value.encode('us-ascii', 'replace').decode('us-ascii')
|
||||
else:
|
||||
for i, (text, charset) in enumerate(headers):
|
||||
try:
|
||||
headers[i]=text.decode(charset or 'us-ascii', 'replace')
|
||||
except LookupError:
|
||||
# if the charset is unknown, force default
|
||||
headers[i]=text.decode(default_charset, 'replace')
|
||||
|
||||
return u"".join(headers)
|
||||
|
||||
|
||||
def get_mail_addresses(message, header_name):
|
||||
"""
|
||||
retrieve all email addresses from one message header
|
||||
|
||||
"""
|
||||
addresses = email.utils.getaddresses(h for h in message.get_all(header_name, []))
|
||||
|
||||
for i, (address_name, address) in enumerate(addresses):
|
||||
addresses[i]={'name': decode_mail_header(address_name), 'email': address}
|
||||
|
||||
return addresses
|
||||
|
||||
2
setup.py
2
setup.py
@@ -1,7 +1,7 @@
|
||||
from setuptools import setup
|
||||
import os
|
||||
|
||||
version = '0.3.3'
|
||||
version = '0.4'
|
||||
|
||||
|
||||
def read(filename):
|
||||
|
||||
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
0
tests/parser_tests.py
Normal file
0
tests/parser_tests.py
Normal file
Reference in New Issue
Block a user