simple pep8 compliance fixes
This commit is contained in:
248
imbox/parser.py
248
imbox/parser.py
@@ -1,160 +1,170 @@
|
|||||||
import re
|
import re
|
||||||
import StringIO
|
import StringIO
|
||||||
import email
|
import email
|
||||||
import base64, quopri
|
import base64
|
||||||
import time
|
import quopri
|
||||||
|
import time
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from email.header import Header, decode_header
|
from email.header import decode_header
|
||||||
|
|
||||||
|
|
||||||
class Struct(object):
|
class Struct(object):
|
||||||
def __init__(self, **entries):
|
def __init__(self, **entries):
|
||||||
self.__dict__.update(entries)
|
self.__dict__.update(entries)
|
||||||
|
|
||||||
def keys(self):
|
def keys(self):
|
||||||
return self.__dict__.keys()
|
return self.__dict__.keys()
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return str(self.__dict__)
|
return str(self.__dict__)
|
||||||
|
|
||||||
|
|
||||||
def decode_mail_header(value, default_charset='us-ascii'):
|
def decode_mail_header(value, default_charset='us-ascii'):
|
||||||
"""
|
"""
|
||||||
Decode a header value into a unicode string.
|
Decode a header value into a unicode string.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
headers=decode_header(value)
|
headers = decode_header(value)
|
||||||
except email.errors.HeaderParseError:
|
except email.errors.HeaderParseError:
|
||||||
return value.encode(default_charset, 'replace').decode(default_charset)
|
return value.encode(default_charset, 'replace').decode(default_charset)
|
||||||
else:
|
else:
|
||||||
for index, (text, charset) in enumerate(headers):
|
for index, (text, charset) in enumerate(headers):
|
||||||
try:
|
try:
|
||||||
headers[index]=text.decode(charset or default_charset, 'replace')
|
headers[index] = text.decode(charset or default_charset,
|
||||||
except LookupError:
|
'replace')
|
||||||
# if the charset is unknown, force default
|
except LookupError:
|
||||||
headers[index]=text.decode(default_charset, 'replace')
|
# if the charset is unknown, force default
|
||||||
|
headers[index] = text.decode(default_charset, 'replace')
|
||||||
|
|
||||||
return u"".join(headers)
|
return u"".join(headers)
|
||||||
|
|
||||||
|
|
||||||
def get_mail_addresses(message, header_name):
|
def get_mail_addresses(message, header_name):
|
||||||
"""
|
"""
|
||||||
Retrieve all email addresses from one message header.
|
Retrieve all email addresses from one message header.
|
||||||
"""
|
"""
|
||||||
addresses = email.utils.getaddresses(header for header in message.get_all(header_name, []))
|
headers = [h for h in message.get_all(header_name, [])]
|
||||||
|
addresses = email.utils.getaddresses(headers)
|
||||||
|
|
||||||
for index, (address_name, address_email) in enumerate(addresses):
|
for index, (address_name, address_email) in enumerate(addresses):
|
||||||
addresses[index]={'name': decode_mail_header(address_name), 'email': address_email}
|
addresses[index] = {'name': decode_mail_header(address_name),
|
||||||
|
'email': address_email}
|
||||||
|
|
||||||
return addresses
|
return addresses
|
||||||
|
|
||||||
|
|
||||||
def decode_param(param):
|
def decode_param(param):
|
||||||
name, v = param.split('=', 1)
|
name, v = param.split('=', 1)
|
||||||
values = v.split('\n')
|
values = v.split('\n')
|
||||||
value_results = []
|
value_results = []
|
||||||
for value in values:
|
for value in values:
|
||||||
match = re.search(r'=\?(\w+)\?(Q|B)\?(.+)\?=', value)
|
match = re.search(r'=\?(\w+)\?(Q|B)\?(.+)\?=', value)
|
||||||
if match:
|
if match:
|
||||||
encoding, type_, code = match.groups()
|
encoding, type_, code = match.groups()
|
||||||
if type_ == 'Q':
|
if type_ == 'Q':
|
||||||
value = quopri.decodestring(code)
|
value = quopri.decodestring(code)
|
||||||
elif type_ == 'B':
|
elif type_ == 'B':
|
||||||
value = base64.decodestring(code)
|
value = base64.decodestring(code)
|
||||||
value = unicode(value, encoding)
|
value = unicode(value, encoding)
|
||||||
value_results.append(value)
|
value_results.append(value)
|
||||||
if value_results: v = ''.join(value_results)
|
if value_results:
|
||||||
return name, v
|
v = ''.join(value_results)
|
||||||
|
return name, v
|
||||||
|
|
||||||
|
|
||||||
def parse_attachment(message_part):
|
def parse_attachment(message_part):
|
||||||
content_disposition = message_part.get("Content-Disposition", None) # Check again if this is a valid attachment
|
# Check again if this is a valid attachment
|
||||||
if content_disposition != None:
|
content_disposition = message_part.get("Content-Disposition", None)
|
||||||
dispositions = content_disposition.strip().split(";")
|
if content_disposition is not None:
|
||||||
|
dispositions = content_disposition.strip().split(";")
|
||||||
if dispositions[0].lower() in ["attachment", "inline"]:
|
|
||||||
file_data = message_part.get_payload(decode=True)
|
|
||||||
|
|
||||||
attachment = {
|
if dispositions[0].lower() in ["attachment", "inline"]:
|
||||||
'content-type': message_part.get_content_type(),
|
file_data = message_part.get_payload(decode=True)
|
||||||
'size': len(file_data),
|
|
||||||
'content': StringIO.StringIO(file_data)
|
|
||||||
}
|
|
||||||
|
|
||||||
for param in dispositions[1:]:
|
attachment = {
|
||||||
name, value = decode_param(param)
|
'content-type': message_part.get_content_type(),
|
||||||
|
'size': len(file_data),
|
||||||
|
'content': StringIO.StringIO(file_data)
|
||||||
|
}
|
||||||
|
|
||||||
if 'file' in name:
|
for param in dispositions[1:]:
|
||||||
attachment['filename'] = value
|
name, value = decode_param(param)
|
||||||
|
|
||||||
if 'create-date' in name:
|
if 'file' in name:
|
||||||
attachment['create-date'] = value
|
attachment['filename'] = value
|
||||||
|
|
||||||
return attachment
|
if 'create-date' in name:
|
||||||
|
attachment['create-date'] = value
|
||||||
|
|
||||||
|
return attachment
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
def parse_email(raw_email):
|
def parse_email(raw_email):
|
||||||
email_message = email.message_from_string(raw_email)
|
email_message = email.message_from_string(raw_email)
|
||||||
maintype = email_message.get_content_maintype()
|
maintype = email_message.get_content_maintype()
|
||||||
parsed_email = {}
|
parsed_email = {}
|
||||||
|
|
||||||
parsed_email['raw_email'] = raw_email
|
parsed_email['raw_email'] = raw_email
|
||||||
|
|
||||||
body = {
|
|
||||||
"plain": [],
|
|
||||||
"html": []
|
|
||||||
}
|
|
||||||
attachments = []
|
|
||||||
|
|
||||||
if maintype == 'multipart':
|
body = {
|
||||||
for part in email_message.walk():
|
"plain": [],
|
||||||
content = part.get_payload(decode=True)
|
"html": []
|
||||||
content_type = part.get_content_type()
|
}
|
||||||
content_disposition = part.get('Content-Disposition', None)
|
attachments = []
|
||||||
|
|
||||||
if content_type == "text/plain" and (content_disposition == None or content_disposition == "inline"):
|
|
||||||
body['plain'].append(content)
|
|
||||||
elif content_type == "text/html" and (content_disposition == None or content_disposition == "inline"):
|
|
||||||
body['html'].append(content)
|
|
||||||
elif content_disposition:
|
|
||||||
attachment = parse_attachment(part)
|
|
||||||
if attachment: attachments.append(attachment)
|
|
||||||
|
|
||||||
elif maintype == 'text':
|
|
||||||
body['plain'].append(email_message.get_payload(decode=True))
|
|
||||||
|
|
||||||
parsed_email['attachments'] = attachments
|
if maintype == 'multipart':
|
||||||
|
for part in email_message.walk():
|
||||||
|
content = part.get_payload(decode=True)
|
||||||
|
content_type = part.get_content_type()
|
||||||
|
content_disposition = part.get('Content-Disposition', None)
|
||||||
|
|
||||||
parsed_email['body'] = body
|
is_inline = content_disposition is None \
|
||||||
email_dict = dict(email_message.items())
|
or content_disposition == "inline"
|
||||||
|
if content_type == "text/plain" and is_inline:
|
||||||
|
body['plain'].append(content)
|
||||||
|
elif content_type == "text/html" and is_inline:
|
||||||
|
body['html'].append(content)
|
||||||
|
elif content_disposition:
|
||||||
|
attachment = parse_attachment(part)
|
||||||
|
if attachment:
|
||||||
|
attachments.append(attachment)
|
||||||
|
|
||||||
parsed_email['sent_from'] = get_mail_addresses(email_message, 'from')
|
elif maintype == 'text':
|
||||||
parsed_email['sent_to'] = get_mail_addresses(email_message, 'to')
|
payload = email_message.get_payload(decode=True)
|
||||||
|
body['plain'].append(payload)
|
||||||
|
|
||||||
|
parsed_email['attachments'] = attachments
|
||||||
|
|
||||||
value_headers_keys = ['subject', 'date','message-id']
|
parsed_email['body'] = body
|
||||||
key_value_header_keys = ['received-spf',
|
email_dict = dict(email_message.items())
|
||||||
'mime-version',
|
|
||||||
'x-spam-status',
|
|
||||||
'x-spam-score',
|
|
||||||
'content-type']
|
|
||||||
|
|
||||||
parsed_email['headers'] = []
|
parsed_email['sent_from'] = get_mail_addresses(email_message, 'from')
|
||||||
for key, value in email_dict.iteritems():
|
parsed_email['sent_to'] = get_mail_addresses(email_message, 'to')
|
||||||
|
|
||||||
if key.lower() in value_headers_keys:
|
|
||||||
valid_key_name = key.lower().replace('-', '_')
|
|
||||||
parsed_email[valid_key_name] = decode_mail_header(value)
|
|
||||||
|
|
||||||
if key.lower() in key_value_header_keys:
|
|
||||||
parsed_email['headers'].append({'Name': key,
|
|
||||||
'Value': value})
|
|
||||||
|
|
||||||
if parsed_email.get('date'):
|
value_headers_keys = ['subject', 'date', 'message-id']
|
||||||
timetuple = email.utils.parsedate(parsed_email['date'])
|
key_value_header_keys = ['received-spf',
|
||||||
parsed_email['parsed_date'] = datetime.fromtimestamp(time.mktime(timetuple)) if timetuple else None
|
'mime-version',
|
||||||
|
'x-spam-status',
|
||||||
|
'x-spam-score',
|
||||||
|
'content-type']
|
||||||
|
|
||||||
return Struct(**parsed_email)
|
parsed_email['headers'] = []
|
||||||
|
for key, value in email_dict.iteritems():
|
||||||
|
|
||||||
|
if key.lower() in value_headers_keys:
|
||||||
|
valid_key_name = key.lower().replace('-', '_')
|
||||||
|
parsed_email[valid_key_name] = decode_mail_header(value)
|
||||||
|
|
||||||
|
if key.lower() in key_value_header_keys:
|
||||||
|
parsed_email['headers'].append({'Name': key,
|
||||||
|
'Value': value})
|
||||||
|
|
||||||
|
if parsed_email.get('date'):
|
||||||
|
timetuple = email.utils.parsedate(parsed_email['date'])
|
||||||
|
parsed_date = datetime.fromtimestamp(time.mktime(timetuple)) \
|
||||||
|
if timetuple else None
|
||||||
|
parsed_email['parsed_date'] = parsed_date
|
||||||
|
|
||||||
|
return Struct(**parsed_email)
|
||||||
|
|||||||
Reference in New Issue
Block a user