From 55f64a1922dbae1aef537fd464f57d6503d7c2f0 Mon Sep 17 00:00:00 2001 From: zevav Date: Wed, 25 Jul 2018 08:46:59 -0400 Subject: [PATCH 1/4] removed some unused args, renamed to avoid shadowing a name in the global scope. --- .idea/workspace.xml | 305 ++++++++++++++++++++++++++++++++++++++++++++ imbox/__init__.py | 7 +- 2 files changed, 308 insertions(+), 4 deletions(-) create mode 100644 .idea/workspace.xml diff --git a/.idea/workspace.xml b/.idea/workspace.xml new file mode 100644 index 0000000..dd6bb44 --- /dev/null +++ b/.idea/workspace.xml @@ -0,0 +1,305 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - 1532438596665 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/imbox/imap.py b/imbox/imap.py index 6142739..6474eab 100644 --- a/imbox/imap.py +++ b/imbox/imap.py @@ -10,22 +10,16 @@ class ImapTransport: def __init__(self, hostname, port=None, ssl=True, ssl_context=None, starttls=False): self.hostname = hostname - self.port = port - kwargs = {} if ssl: - self.transport = IMAP4_SSL - if not self.port: - self.port = 993 + self.port = port or 993 if ssl_context is None: ssl_context = pythonssllib.create_default_context() - kwargs["ssl_context"] = ssl_context + self.server = IMAP4_SSL(self.hostname, self.port, ssl_context=ssl_context) else: - self.transport = IMAP4 - if not self.port: - self.port = 143 + self.port = port or 143 + self.server = IMAP4(self.hostname, self.port) - self.server = self.transport(self.hostname, self.port, **kwargs) if starttls: self.server.starttls() logger.debug("Created IMAP4 transport for {host}:{port}" diff --git a/imbox/parser.py b/imbox/parser.py index 58e10bf..fdc061f 100644 --- a/imbox/parser.py +++ b/imbox/parser.py @@ -66,7 +66,7 @@ def decode_param(param): values = v.split('\n') value_results = [] for value in values: - match = re.search(r'=\?((?:\w|-)+)\?(Q|B)\?(.+)\?=', value) + match = re.search(r'=\?((?:\w|-)+)\?([QB])\?(.+)\?=', value) if match: encoding, type_, code = match.groups() if type_ == 'Q': @@ -140,9 +140,7 @@ def parse_email(raw_email, policy=None): except UnicodeEncodeError: email_message = email.message_from_string(raw_email.encode('utf-8'), **email_parse_kwargs) maintype = email_message.get_content_maintype() - parsed_email = {} - - parsed_email['raw_email'] = raw_email + parsed_email = {'raw_email': raw_email} body = { "plain": [], diff --git a/imbox/utils.py b/imbox/utils.py index 46d881d..e82deb8 100644 --- a/imbox/utils.py +++ b/imbox/utils.py @@ -1,14 +1,16 @@ import logging logger = logging.getLogger(__name__) + def str_encode(value='', encoding=None, errors='strict'): logger.debug("Encode str {} with and errors {}".format(value, encoding, errors)) return str(value, encoding, errors) + def str_decode(value='', encoding=None, errors='strict'): if isinstance(value, str): return bytes(value, encoding, errors).decode('utf-8') elif isinstance(value, bytes): return value.decode(encoding or 'utf-8', errors=errors) else: - raise TypeError( "Cannot decode '{}' object".format(value.__class__) ) + raise TypeError("Cannot decode '{}' object".format(value.__class__)) From 8e26e92a399126a21562efe8fa0ffb3a04c13442 Mon Sep 17 00:00:00 2001 From: zevav Date: Wed, 25 Jul 2018 11:48:19 -0400 Subject: [PATCH 3/4] Moved `query_ids` from `Imbox` to `Messages`. Moved `fetch_list` to `Messages` and renamed `fetch_email_list`, moving assignment of uid_list to the constructor of `Messages`. Moved `fetch_by_uid` from `Imbox` to a pure function in `parser` module. Replaced call to `Imbox.fetch_list` with a call to `Messages` instead. created `Messages` class, which encapsulates the generator `Messages.fetch_email_list`, while also implementing `__len__` to show how many emails match a given query, and `__iter__` to refresh the `fetch_email_list` generator when it's exhausted. It also implements `__getitem__` to support indexing of the emails matching a query. Messages requires the `connection` and `parser_policy` established in `Imbox` and accepts arbitrary keyword arguments, which it uses in the IMAP query as well as in the `__repr__`. --- imbox/__init__.py | 86 +++++++++++++++++++++++++++++++++-------------- imbox/parser.py | 13 ++++++- imbox/query.py | 3 +- 3 files changed, 74 insertions(+), 28 deletions(-) diff --git a/imbox/__init__.py b/imbox/__init__.py index e5fb0f3..d94b277 100644 --- a/imbox/__init__.py +++ b/imbox/__init__.py @@ -1,8 +1,9 @@ from imbox.imap import ImapTransport -from imbox.parser import parse_email +from imbox.parser import parse_email, fetch_email_by_uid from imbox.query import build_search_query import logging + logger = logging.getLogger(__name__) __version_info__ = (0, 9, 5) @@ -36,29 +37,6 @@ class Imbox: logger.info("Disconnected from IMAP Server {username}@{hostname}".format( hostname=self.hostname, username=self.username)) - def query_uids(self, **kwargs): - query_ = build_search_query(**kwargs) - message, data = self.connection.uid('search', None, query_) - if data[0] is None: - return [] - return data[0].split() - - def fetch_by_uid(self, uid): - message, data = self.connection.uid('fetch', uid, '(BODY.PEEK[])') - logger.debug("Fetched message for UID {}".format(int(uid))) - raw_email = data[0][1] - - email_object = parse_email(raw_email, policy=self.parser_policy) - - return email_object - - def fetch_list(self, **kwargs): - uid_list = self.query_uids(**kwargs) - logger.debug("Fetch all messages for UID in {}".format(uid_list)) - - for uid in uid_list: - yield (uid, self.fetch_by_uid(uid)) - def mark_seen(self, uid): logger.info("Mark UID {} with \\Seen FLAG".format(int(uid))) self.connection.uid('STORE', uid, '+FLAGS', '(\\Seen)') @@ -89,7 +67,65 @@ class Imbox: msg = " from folder '{}'".format(folder) logger.info("Fetch list of messages{}".format(msg)) - return self.fetch_list(**kwargs) + return Messages(connection=self.connection, + parser_policy=self.parser_policy, + **kwargs) def folders(self): return self.connection.list() + + +class Messages: + + def __init__(self, + connection, + parser_policy, + **kwargs): + + self.connection = connection + self.parser_policy = parser_policy + self.kwargs = kwargs + self.uid_list = self.query_uids(**kwargs) + + logger.debug("Fetch all messages for UID in {}".format(self.uid_list)) + + def fetch_email(self, uid): + return fetch_email_by_uid(uid=uid, + connection=self.connection, + parser_policy=self.parser_policy) + + def query_uids(self, **kwargs): + query_ = build_search_query(**kwargs) + message, data = self.connection.uid('search', None, query_) + if data[0] is None: + return [] + return data[0].split() + + def fetch_email_list(self): + for uid in self.uid_list: + yield uid, self.fetch_email(uid) + + def __repr__(self): + if len(self.kwargs) > 0: + return 'Messages({})'.format('\n'.join('{}={}'.format(key, value) + for key, value in self.kwargs.items())) + return 'Messages(ALL)' + + def __iter__(self): + return self.fetch_email_list() + + def __next__(self): + return self + + def __len__(self): + return len(self.uid_list) + + def __getitem__(self, index): + uids = self.uid_list[index] + + if not isinstance(uids, list): + uid = uids + return uid, self.fetch_email(uid) + + return [(uid, self.fetch_email(uid)) + for uid in uids] diff --git a/imbox/parser.py b/imbox/parser.py index fdc061f..68749fc 100644 --- a/imbox/parser.py +++ b/imbox/parser.py @@ -9,6 +9,7 @@ from email.header import decode_header from imbox.utils import str_encode, str_decode import logging + logger = logging.getLogger(__name__) @@ -127,6 +128,16 @@ def decode_content(message): return content +def fetch_email_by_uid(uid, connection, parser_policy): + message, data = connection.uid('fetch', uid, '(BODY.PEEK[])') + logger.debug("Fetched message for UID {}".format(int(uid))) + raw_email = data[0][1] + + email_object = parse_email(raw_email, policy=parser_policy) + + return email_object + + def parse_email(raw_email, policy=None): if isinstance(raw_email, bytes): raw_email = str_encode(raw_email, 'utf-8', errors='ignore') @@ -160,7 +171,7 @@ def parse_email(raw_email, policy=None): content = decode_content(part) is_inline = content_disposition is None \ - or content_disposition.startswith("inline") + or content_disposition.startswith("inline") if content_type == "text/plain" and is_inline: body['plain'].append(content) elif content_type == "text/html" and is_inline: diff --git a/imbox/query.py b/imbox/query.py index 3e7c91b..cd4fe7f 100644 --- a/imbox/query.py +++ b/imbox/query.py @@ -8,8 +8,7 @@ logger = logging.getLogger(__name__) def format_date(date): if isinstance(date, datetime.date): return date.strftime('%d-%b-%Y') - else: - return date + return date def build_search_query(**kwargs): From 73fafcb368cb07e7c91559800210e3ebfbcb0761 Mon Sep 17 00:00:00 2001 From: zevav Date: Wed, 25 Jul 2018 12:11:59 -0400 Subject: [PATCH 4/4] made Messages methods private, as well as uid_list. --- imbox/__init__.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/imbox/__init__.py b/imbox/__init__.py index d94b277..375e2a1 100644 --- a/imbox/__init__.py +++ b/imbox/__init__.py @@ -85,25 +85,25 @@ class Messages: self.connection = connection self.parser_policy = parser_policy self.kwargs = kwargs - self.uid_list = self.query_uids(**kwargs) + self._uid_list = self._query_uids(**kwargs) - logger.debug("Fetch all messages for UID in {}".format(self.uid_list)) + logger.debug("Fetch all messages for UID in {}".format(self._uid_list)) - def fetch_email(self, uid): + def _fetch_email(self, uid): return fetch_email_by_uid(uid=uid, connection=self.connection, parser_policy=self.parser_policy) - def query_uids(self, **kwargs): + def _query_uids(self, **kwargs): query_ = build_search_query(**kwargs) message, data = self.connection.uid('search', None, query_) if data[0] is None: return [] return data[0].split() - def fetch_email_list(self): - for uid in self.uid_list: - yield uid, self.fetch_email(uid) + def _fetch_email_list(self): + for uid in self._uid_list: + yield uid, self._fetch_email(uid) def __repr__(self): if len(self.kwargs) > 0: @@ -112,20 +112,20 @@ class Messages: return 'Messages(ALL)' def __iter__(self): - return self.fetch_email_list() + return self._fetch_email_list() def __next__(self): return self def __len__(self): - return len(self.uid_list) + return len(self._uid_list) def __getitem__(self, index): - uids = self.uid_list[index] + uids = self._uid_list[index] if not isinstance(uids, list): uid = uids - return uid, self.fetch_email(uid) + return uid, self._fetch_email(uid) - return [(uid, self.fetch_email(uid)) + return [(uid, self._fetch_email(uid)) for uid in uids]