0.4, parser updates, docs updated
This commit is contained in:
68
README.md
68
README.md
@@ -26,42 +26,56 @@ Usage
|
|||||||
password='password',
|
password='password',
|
||||||
ssl=True)
|
ssl=True)
|
||||||
|
|
||||||
|
|
||||||
# Gets all unread messages
|
|
||||||
unread_messages = mailbox.get_unread()
|
|
||||||
|
|
||||||
# Gets all messages
|
# Gets all messages
|
||||||
all_messages = mailbox.get_all()
|
all_messages = mailbox.messages()
|
||||||
|
|
||||||
|
# Unread messages
|
||||||
|
unread_messages = mailbox.messages(unread=True)
|
||||||
|
|
||||||
|
|
||||||
for message in all_messages:
|
for message in all_messages:
|
||||||
........
|
........
|
||||||
|
# Every message is an object with the following keys
|
||||||
|
|
||||||
# Every message is converted to a dictionary with the following keys:
|
message.sent_from
|
||||||
|
message.sent_to
|
||||||
|
message.subject
|
||||||
|
message.headers
|
||||||
|
message.message-id
|
||||||
|
message.date
|
||||||
|
message.text_body
|
||||||
|
|
||||||
{
|
# To check all available keys
|
||||||
'MesssageID': '22c74902-a0c1-4511-804f2-341342852c90',
|
print message.keys()
|
||||||
'From': {
|
|
||||||
'Name': 'John Doe',
|
|
||||||
'Email': 'jonhdoe@email.com'
|
|
||||||
},
|
|
||||||
'To': {
|
|
||||||
'Name': 'Martin Rusev',
|
|
||||||
'Email': 'martin@amon.cx'
|
|
||||||
},
|
|
||||||
'Date': 'Mon, 22 Jul 2013 23:21:39 +0000 (UTC)',
|
|
||||||
'TextBody': ['ASCII'],
|
|
||||||
'Subject': 'This is a message'
|
|
||||||
|
|
||||||
'Headers': [{
|
|
||||||
'Name': 'Received-SPF',
|
# To check the whole object, just write
|
||||||
'Value': 'pass (google.com: domain of bounces+......;'
|
|
||||||
}, {
|
print message
|
||||||
'Name': 'MIME-Version',
|
|
||||||
'Value': '1.0'
|
{
|
||||||
|
'headers':
|
||||||
|
[{
|
||||||
|
'Name': 'Received-SPF',
|
||||||
|
'Value': 'pass (google.com: domain of martinrusev@zoho.com designates 72.5.230.95 as permitted sender) client-ip=72.5.230.95;'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'Name': 'MIME-Version',
|
||||||
|
'Value': '1.0'
|
||||||
|
}],
|
||||||
|
'text_body': ['ASCII'],
|
||||||
|
'date': u 'Fri, 26 Jul 2013 10:56:26 +0300',
|
||||||
|
'message-id': u '51F22BAA.1040606',
|
||||||
|
'sent_from': [{
|
||||||
|
'name': u 'Martin Rusev',
|
||||||
|
'email': 'martin@amon.cx'
|
||||||
}],
|
}],
|
||||||
|
'sent_to': [{
|
||||||
}
|
'name': u 'John Doe',
|
||||||
|
'email': 'john@gmail.com'
|
||||||
|
}],
|
||||||
|
'subject': u 'Hello John, How are you today'
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,18 @@
|
|||||||
import email
|
import email
|
||||||
from mailbox.imap import ImapTransport
|
from mailbox.imap import ImapTransport
|
||||||
|
from mailbox.parser import get_mail_addresses, decode_mail_header
|
||||||
|
|
||||||
|
|
||||||
|
class Struct(object):
|
||||||
|
def __init__(self, **entries):
|
||||||
|
self.__dict__.update(entries)
|
||||||
|
|
||||||
|
def keys(self):
|
||||||
|
return self.__dict__.keys()
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return str(self.__dict__)
|
||||||
|
|
||||||
|
|
||||||
class MailBox(object):
|
class MailBox(object):
|
||||||
|
|
||||||
@@ -10,10 +23,9 @@ class MailBox(object):
|
|||||||
|
|
||||||
|
|
||||||
def parse_email(self, raw_email):
|
def parse_email(self, raw_email):
|
||||||
|
parsed_email = {}
|
||||||
|
|
||||||
email_message = email.message_from_string(raw_email)
|
email_message = email.message_from_string(raw_email)
|
||||||
|
|
||||||
|
|
||||||
maintype = email_message.get_content_maintype()
|
maintype = email_message.get_content_maintype()
|
||||||
|
|
||||||
text_body = []
|
text_body = []
|
||||||
@@ -25,70 +37,61 @@ class MailBox(object):
|
|||||||
elif maintype == 'text':
|
elif maintype == 'text':
|
||||||
text_body.append(email_message.get_payload(decode=True))
|
text_body.append(email_message.get_payload(decode=True))
|
||||||
|
|
||||||
|
parsed_email['text_body'] = text_body
|
||||||
|
|
||||||
email_dict = dict(email_message.items())
|
email_dict = dict(email_message.items())
|
||||||
|
|
||||||
from_dict = {}
|
parsed_email['sent_from'] = get_mail_addresses(email_message, 'from')
|
||||||
from_ = email.utils.parseaddr(email_dict['From'])
|
parsed_email['sent_to'] = get_mail_addresses(email_message, 'to')
|
||||||
if len(from_) == 2:
|
|
||||||
from_dict = {'Name': from_[0], 'Email': from_[1]}
|
|
||||||
|
|
||||||
to_dict = {}
|
value_headers_keys = ['Subject', 'Date','Message-ID']
|
||||||
to_ = email.utils.parseaddr(email_dict['To'])
|
key_value_header_keys = ['Received-SPF',
|
||||||
if len(to_) == 2:
|
'MIME-Version',
|
||||||
to_dict = {'Name': to_[0], 'Email': to_[1]}
|
'X-Spam-Status',
|
||||||
|
'X-Spam-Score',
|
||||||
|
'Content-Type']
|
||||||
|
|
||||||
subject = email_dict.get('Subject', None)
|
parsed_email['headers'] = []
|
||||||
date = email_dict.get('Date', None)
|
for key, value in email_dict.iteritems():
|
||||||
message_id = email_dict.get('Message-ID', None)
|
|
||||||
|
|
||||||
# Get the headers
|
if key in value_headers_keys:
|
||||||
headers = []
|
valid_key_name = key.lower()
|
||||||
headers_keys = ['Received-SPF',
|
parsed_email[valid_key_name] = decode_mail_header(value)
|
||||||
'MIME-Version',
|
|
||||||
'X-Spam-Status',
|
|
||||||
'X-Spam-Score']
|
|
||||||
|
|
||||||
for key in headers_keys:
|
if key in key_value_header_keys:
|
||||||
header_value = email_dict.get(key)
|
parsed_email['headers'].append({'Name': key,
|
||||||
|
'Value': value})
|
||||||
|
|
||||||
if header_value:
|
return Struct(**parsed_email)
|
||||||
headers.append({'Name': key,
|
|
||||||
'Value': header_value})
|
|
||||||
|
|
||||||
return {
|
|
||||||
'MesssageID': message_id,
|
|
||||||
'From': from_dict,
|
|
||||||
'To': to_dict,
|
|
||||||
'Subject': subject,
|
|
||||||
'Date': date,
|
|
||||||
'TextBody': text_body,
|
|
||||||
'Headers': headers
|
|
||||||
}
|
|
||||||
|
|
||||||
def fetch_by_uid(self, uid):
|
def fetch_by_uid(self, uid):
|
||||||
message, data = self.connection.uid('fetch', uid, '(RFC822)')
|
message, data = self.connection.uid('fetch', uid, '(BODY.PEEK[])') # Don't mark the messages as read
|
||||||
|
|
||||||
raw_email = data[0][1]
|
raw_email = data[0][1]
|
||||||
|
|
||||||
email_metadata = self.parse_email(raw_email)
|
email_object = self.parse_email(raw_email)
|
||||||
|
|
||||||
return email_metadata
|
return email_object
|
||||||
|
|
||||||
def fetch_list(self, data):
|
def fetch_list(self, data):
|
||||||
uid_list = data[0].split()
|
uid_list = data[0].split()
|
||||||
|
|
||||||
messages_list = []
|
|
||||||
for uid in uid_list:
|
for uid in uid_list:
|
||||||
messages_list.append(self.fetch_by_uid(uid))
|
yield self.fetch_by_uid(uid)
|
||||||
|
|
||||||
|
def messages(self, *args, **kwargs):
|
||||||
|
|
||||||
def get_all(self):
|
query = "ALL"
|
||||||
message, data = self.connection.uid('search', None, "ALL")
|
|
||||||
|
# Parse keyword arguments
|
||||||
|
unread = kwargs.get('unread', False)
|
||||||
|
folder = kwargs.get('folder', False)
|
||||||
|
sent_from = kwargs.get('sent_from', False)
|
||||||
|
sent_to = kwargs.get('sent_to', False)
|
||||||
|
|
||||||
|
if unread != False:
|
||||||
|
query = "UNSEEN"
|
||||||
|
|
||||||
|
message, data = self.connection.uid('search', None, query)
|
||||||
|
|
||||||
return self.fetch_list(data)
|
return self.fetch_list(data)
|
||||||
|
|
||||||
|
|
||||||
def get_unread(self):
|
|
||||||
message, data = self.connection.uid('search', None, "UNSEEN")
|
|
||||||
|
|
||||||
return self.fetch_list(data)
|
|
||||||
|
|||||||
34
mailbox/parser.py
Normal file
34
mailbox/parser.py
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
import email
|
||||||
|
from email.header import decode_header
|
||||||
|
|
||||||
|
def decode_mail_header(value, default_charset='us-ascii'):
|
||||||
|
"""
|
||||||
|
Decode a header value into a unicode string.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
headers=email.header.decode_header(value)
|
||||||
|
except email.errors.HeaderParseError:
|
||||||
|
return value.encode('us-ascii', 'replace').decode('us-ascii')
|
||||||
|
else:
|
||||||
|
for i, (text, charset) in enumerate(headers):
|
||||||
|
try:
|
||||||
|
headers[i]=text.decode(charset or 'us-ascii', 'replace')
|
||||||
|
except LookupError:
|
||||||
|
# if the charset is unknown, force default
|
||||||
|
headers[i]=text.decode(default_charset, 'replace')
|
||||||
|
|
||||||
|
return u"".join(headers)
|
||||||
|
|
||||||
|
|
||||||
|
def get_mail_addresses(message, header_name):
|
||||||
|
"""
|
||||||
|
retrieve all email addresses from one message header
|
||||||
|
|
||||||
|
"""
|
||||||
|
addresses = email.utils.getaddresses(h for h in message.get_all(header_name, []))
|
||||||
|
|
||||||
|
for i, (address_name, address) in enumerate(addresses):
|
||||||
|
addresses[i]={'name': decode_mail_header(address_name), 'email': address}
|
||||||
|
|
||||||
|
return addresses
|
||||||
|
|
||||||
2
setup.py
2
setup.py
@@ -1,7 +1,7 @@
|
|||||||
from setuptools import setup
|
from setuptools import setup
|
||||||
import os
|
import os
|
||||||
|
|
||||||
version = '0.3.3'
|
version = '0.4'
|
||||||
|
|
||||||
|
|
||||||
def read(filename):
|
def read(filename):
|
||||||
|
|||||||
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
0
tests/parser_tests.py
Normal file
0
tests/parser_tests.py
Normal file
Reference in New Issue
Block a user