From fa13f3a2d26bf54c5310672fdabd88b3c9e1a980 Mon Sep 17 00:00:00 2001 From: Martin Rusev Date: Fri, 26 Jul 2013 15:17:03 +0300 Subject: [PATCH] 0.4, parser updates, docs updated --- README.md | 76 +++++++++++++++++------------- mailbox/__init__.py | 105 ++++++++++++++++++++++-------------------- mailbox/parser.py | 34 ++++++++++++++ setup.py | 2 +- tests/__init__.py | 0 tests/parser_tests.py | 0 6 files changed, 134 insertions(+), 83 deletions(-) create mode 100644 mailbox/parser.py create mode 100644 tests/__init__.py create mode 100644 tests/parser_tests.py diff --git a/README.md b/README.md index 31c29d0..921c537 100644 --- a/README.md +++ b/README.md @@ -25,43 +25,57 @@ Usage username='username', password='password', ssl=True) - - - # Gets all unread messages - unread_messages = mailbox.get_unread() - + # Gets all messages - all_messages = mailbox.get_all() + all_messages = mailbox.messages() + + # Unread messages + unread_messages = mailbox.messages(unread=True) for message in all_messages: ........ - - # Every message is converted to a dictionary with the following keys: - - { - 'MesssageID': '22c74902-a0c1-4511-804f2-341342852c90', - 'From': { - 'Name': 'John Doe', - 'Email': 'jonhdoe@email.com' - }, - 'To': { - 'Name': 'Martin Rusev', - 'Email': 'martin@amon.cx' - }, - 'Date': 'Mon, 22 Jul 2013 23:21:39 +0000 (UTC)', - 'TextBody': ['ASCII'], - 'Subject': 'This is a message' - - 'Headers': [{ - 'Name': 'Received-SPF', - 'Value': 'pass (google.com: domain of bounces+......;' - }, { - 'Name': 'MIME-Version', - 'Value': '1.0' - }], + # Every message is an object with the following keys - } + message.sent_from + message.sent_to + message.subject + message.headers + message.message-id + message.date + message.text_body + + # To check all available keys + print message.keys() + + + # To check the whole object, just write + + print message + + { + 'headers': + [{ + 'Name': 'Received-SPF', + 'Value': 'pass (google.com: domain of martinrusev@zoho.com designates 72.5.230.95 as permitted sender) client-ip=72.5.230.95;' + }, + { + 'Name': 'MIME-Version', + 'Value': '1.0' + }], + 'text_body': ['ASCII'], + 'date': u 'Fri, 26 Jul 2013 10:56:26 +0300', + 'message-id': u '51F22BAA.1040606', + 'sent_from': [{ + 'name': u 'Martin Rusev', + 'email': 'martin@amon.cx' + }], + 'sent_to': [{ + 'name': u 'John Doe', + 'email': 'john@gmail.com' + }], + 'subject': u 'Hello John, How are you today' + } diff --git a/mailbox/__init__.py b/mailbox/__init__.py index c494c06..5685739 100644 --- a/mailbox/__init__.py +++ b/mailbox/__init__.py @@ -1,5 +1,18 @@ import email from mailbox.imap import ImapTransport +from mailbox.parser import get_mail_addresses, decode_mail_header + + +class Struct(object): + def __init__(self, **entries): + self.__dict__.update(entries) + + def keys(self): + return self.__dict__.keys() + + def __repr__(self): + return str(self.__dict__) + class MailBox(object): @@ -10,10 +23,9 @@ class MailBox(object): def parse_email(self, raw_email): + parsed_email = {} email_message = email.message_from_string(raw_email) - - maintype = email_message.get_content_maintype() text_body = [] @@ -25,70 +37,61 @@ class MailBox(object): elif maintype == 'text': text_body.append(email_message.get_payload(decode=True)) + parsed_email['text_body'] = text_body + email_dict = dict(email_message.items()) - from_dict = {} - from_ = email.utils.parseaddr(email_dict['From']) - if len(from_) == 2: - from_dict = {'Name': from_[0], 'Email': from_[1]} + parsed_email['sent_from'] = get_mail_addresses(email_message, 'from') + parsed_email['sent_to'] = get_mail_addresses(email_message, 'to') - to_dict = {} - to_ = email.utils.parseaddr(email_dict['To']) - if len(to_) == 2: - to_dict = {'Name': to_[0], 'Email': to_[1]} + value_headers_keys = ['Subject', 'Date','Message-ID'] + key_value_header_keys = ['Received-SPF', + 'MIME-Version', + 'X-Spam-Status', + 'X-Spam-Score', + 'Content-Type'] - subject = email_dict.get('Subject', None) - date = email_dict.get('Date', None) - message_id = email_dict.get('Message-ID', None) + parsed_email['headers'] = [] + for key, value in email_dict.iteritems(): + + if key in value_headers_keys: + valid_key_name = key.lower() + parsed_email[valid_key_name] = decode_mail_header(value) + + if key in key_value_header_keys: + parsed_email['headers'].append({'Name': key, + 'Value': value}) - # Get the headers - headers = [] - headers_keys = ['Received-SPF', - 'MIME-Version', - 'X-Spam-Status', - 'X-Spam-Score'] - - for key in headers_keys: - header_value = email_dict.get(key) - - if header_value: - headers.append({'Name': key, - 'Value': header_value}) - - return { - 'MesssageID': message_id, - 'From': from_dict, - 'To': to_dict, - 'Subject': subject, - 'Date': date, - 'TextBody': text_body, - 'Headers': headers - } + return Struct(**parsed_email) def fetch_by_uid(self, uid): - message, data = self.connection.uid('fetch', uid, '(RFC822)') - + message, data = self.connection.uid('fetch', uid, '(BODY.PEEK[])') # Don't mark the messages as read raw_email = data[0][1] - email_metadata = self.parse_email(raw_email) + email_object = self.parse_email(raw_email) - return email_metadata + return email_object def fetch_list(self, data): uid_list = data[0].split() - messages_list = [] for uid in uid_list: - messages_list.append(self.fetch_by_uid(uid)) - + yield self.fetch_by_uid(uid) - def get_all(self): - message, data = self.connection.uid('search', None, "ALL") - - return self.fetch_list(data) - - - def get_unread(self): - message, data = self.connection.uid('search', None, "UNSEEN") + def messages(self, *args, **kwargs): + + query = "ALL" + + # Parse keyword arguments + unread = kwargs.get('unread', False) + folder = kwargs.get('folder', False) + sent_from = kwargs.get('sent_from', False) + sent_to = kwargs.get('sent_to', False) + + if unread != False: + query = "UNSEEN" + + message, data = self.connection.uid('search', None, query) return self.fetch_list(data) + diff --git a/mailbox/parser.py b/mailbox/parser.py new file mode 100644 index 0000000..49c15ae --- /dev/null +++ b/mailbox/parser.py @@ -0,0 +1,34 @@ +import email +from email.header import decode_header + +def decode_mail_header(value, default_charset='us-ascii'): + """ + Decode a header value into a unicode string. + """ + try: + headers=email.header.decode_header(value) + except email.errors.HeaderParseError: + return value.encode('us-ascii', 'replace').decode('us-ascii') + else: + for i, (text, charset) in enumerate(headers): + try: + headers[i]=text.decode(charset or 'us-ascii', 'replace') + except LookupError: + # if the charset is unknown, force default + headers[i]=text.decode(default_charset, 'replace') + + return u"".join(headers) + + +def get_mail_addresses(message, header_name): + """ + retrieve all email addresses from one message header + + """ + addresses = email.utils.getaddresses(h for h in message.get_all(header_name, [])) + + for i, (address_name, address) in enumerate(addresses): + addresses[i]={'name': decode_mail_header(address_name), 'email': address} + + return addresses + \ No newline at end of file diff --git a/setup.py b/setup.py index 2e3d9f7..f08e1c8 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup import os -version = '0.3.3' +version = '0.4' def read(filename): diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/parser_tests.py b/tests/parser_tests.py new file mode 100644 index 0000000..e69de29