Merge branch 'master' into unicode-decode-fix

This commit is contained in:
Andrey
2017-09-26 14:43:21 +03:00
committed by GitHub
12 changed files with 211 additions and 154 deletions

View File

@@ -5,8 +5,7 @@ python:
- "3.4" - "3.4"
- "3.5" - "3.5"
- "3.6" - "3.6"
- "3.6-dev"
- "3.7-dev" - "3.7-dev"
install: install:
- python setup.py -q install - python setup.py -q install
script: nosetests script: nosetests -v

View File

@@ -1,3 +1,12 @@
## 0.9 (18 September 2017)
IMPROVEMENTS:
* Permissively Decode Emails: ([#78](https://github.com/martinrusev/imbox/pull/78))
* "With" statement for automatic cleanup/logout ([#92](https://github.com/martinrusev/imbox/pull/92))
## 0.8.6 (6 December 2016) ## 0.8.6 (6 December 2016)
IMPROVEMENTS: IMPROVEMENTS:

View File

@@ -1,3 +1,5 @@
include LICENSE include LICENSE
include MANIFEST.in include MANIFEST.in
include README.md include README.rst
include CHANGELOG.md
graft tests

9
Makefile Normal file
View File

@@ -0,0 +1,9 @@
upload_to_pypi:
pip install twine setuptools
rm -rf dist/*
rm -rf build/*
python setup.py sdist build
twine upload dist/*
test:
nosetests -v

116
README.md
View File

@@ -1,116 +0,0 @@
Imbox - Python IMAP for Humans
=======
[![Build Status](https://travis-ci.org/martinrusev/imbox.svg?branch=master)](https://travis-ci.org/martinrusev/imbox)
Python library for reading IMAP mailboxes and converting email content to machine readable data
Requirements
============
Python (3.2, 3.3, 3.4, 3.5, 3.6)
Installation
============
pip install imbox
Usage
=====
```python
from imbox import Imbox
# SSL Context docs https://docs.python.org/2/library/ssl.html#ssl.create_default_context
imbox = Imbox('imap.gmail.com',
username='username',
password='password',
ssl=True,
ssl_context=None)
# Gets all messages
all_messages = imbox.messages()
# Unread messages
unread_messages = imbox.messages(unread=True)
# Messages sent FROM
messages_from = imbox.messages(sent_from='martin@amon.cx')
# Messages sent TO
messages_from = imbox.messages(sent_to='martin@amon.cx')
# Messages received before specific date
messages_from = imbox.messages(date__lt='31-July-2013')
# Messages received after specific date
messages_from = imbox.messages(date__gt='30-July-2013')
# Messages from a specific folder
messages_folder = imbox.messages(folder='Social')
for uid, message in all_messages:
........
# Every message is an object with the following keys
message.sent_from
message.sent_to
message.subject
message.headers
message.message_id
message.date
message.body.plain
message.body.html
message.attachments
# To check all available keys
print message.keys()
# To check the whole object, just write
print message
{
'headers':
[{
'Name': 'Received-SPF',
'Value': 'pass (google.com: domain of ......;'
},
{
'Name': 'MIME-Version',
'Value': '1.0'
}],
'body': {
'plain: ['ASCII'],
'html': ['HTML BODY']
},
'attachments': [{
'content': <StringIO.StringIO instance at 0x7f8e8445fa70>,
'filename': "avatar.png",
'content-type': 'image/png',
'size': 80264
}],
'date': u 'Fri, 26 Jul 2013 10:56:26 +0300',
'message_id': u '51F22BAA.1040606',
'sent_from': [{
'name': u 'Martin Rusev',
'email': 'martin@amon.cx'
}],
'sent_to': [{
'name': u 'John Doe',
'email': 'john@gmail.com'
}],
'subject': u 'Hello John, How are you today'
}
```
# [Changelog](https://github.com/martinrusev/imbox/blob/master/CHANGELOG.md)

117
README.rst Normal file
View File

@@ -0,0 +1,117 @@
Imbox - Python IMAP for Humans
==============================
.. image:: https://travis-ci.org/martinrusev/imbox.svg?branch=master
:target: https://travis-ci.org/martinrusev/imbox
:alt: Build Status
Python library for reading IMAP mailboxes and converting email content to machine readable data
Requirements
------------
Python (3.2, 3.3, 3.4, 3.5, 3.6)
Installation
------------
``pip install imbox``
Usage
-----
.. code:: python
from imbox import Imbox
# SSL Context docs https://docs.python.org/3/library/ssl.html#ssl.create_default_context
with Imbox('imap.gmail.com',
username='username',
password='password',
ssl=True,
ssl_context=None) as imbox:
# Gets all messages
all_messages = imbox.messages()
# Unread messages
unread_messages = imbox.messages(unread=True)
# Messages sent FROM
messages_from = imbox.messages(sent_from='martin@amon.cx')
# Messages sent TO
messages_from = imbox.messages(sent_to='martin@amon.cx')
# Messages received before specific date
messages_from = imbox.messages(date__lt='31-July-2013')
# Messages received after specific date
messages_from = imbox.messages(date__gt='30-July-2013')
# Messages from a specific folder
messages_folder = imbox.messages(folder='Social')
for uid, message in all_messages:
# Every message is an object with the following keys
message.sent_from
message.sent_to
message.subject
message.headers
message.message_id
message.date
message.body.plain
message.body.html
message.attachments
# To check all available keys
print(message.keys())
# To check the whole object, just write
print(message)
{
'headers':
[{
'Name': 'Received-SPF',
'Value': 'pass (google.com: domain of ......;'
},
{
'Name': 'MIME-Version',
'Value': '1.0'
}],
'body': {
'plain': ['ASCII'],
'html': ['HTML BODY']
},
'attachments': [{
'content': <StringIO.StringIO instance at 0x7f8e8445fa70>,
'filename': "avatar.png",
'content-type': 'image/png',
'size': 80264
}],
'date': u 'Fri, 26 Jul 2013 10:56:26 +0300',
'message_id': u '51F22BAA.1040606',
'sent_from': [{
'name': u 'Martin Rusev',
'email': 'martin@amon.cx'
}],
'sent_to': [{
'name': u 'John Doe',
'email': 'john@gmail.com'
}],
'subject': u 'Hello John, How are you today'
}
`Changelog <https://github.com/martinrusev/imbox/blob/master/CHANGELOG.md>`_

View File

@@ -6,7 +6,7 @@ import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class Imbox(object): class Imbox:
def __init__(self, hostname, username=None, password=None, ssl=True, def __init__(self, hostname, username=None, password=None, ssl=True,
port=None, ssl_context=None, policy=None): port=None, ssl_context=None, policy=None):
@@ -21,6 +21,12 @@ class Imbox(object):
logger.info("Connected to IMAP Server with user {username} on {hostname}{ssl}".format( logger.info("Connected to IMAP Server with user {username} on {hostname}{ssl}".format(
hostname=hostname, username=username, ssl=(" over SSL" if ssl else ""))) hostname=hostname, username=username, ssl=(" over SSL" if ssl else "")))
def __enter__(self):
return self
def __exit__(self, type, value, traceback):
self.logout()
def logout(self): def logout(self):
self.connection.close() self.connection.close()
self.connection.logout() self.connection.logout()

View File

@@ -6,7 +6,7 @@ import ssl as pythonssllib
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class ImapTransport(object): class ImapTransport:
def __init__(self, hostname, port=None, ssl=True, ssl_context=None): def __init__(self, hostname, port=None, ssl=True, ssl_context=None):
self.hostname = hostname self.hostname = hostname

View File

@@ -1,6 +1,4 @@
from __future__ import unicode_literals import io
from six import BytesIO, binary_type
import re import re
import email import email
import base64 import base64
@@ -14,7 +12,7 @@ import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class Struct(object): class Struct:
def __init__(self, **entries): def __init__(self, **entries):
self.__dict__.update(entries) self.__dict__.update(entries)
@@ -71,7 +69,7 @@ def decode_param(param):
if type_ == 'Q': if type_ == 'Q':
value = quopri.decodestring(code) value = quopri.decodestring(code)
elif type_ == 'B': elif type_ == 'B':
value = base64.decodestring(code) value = base64.decodebytes(code.encode())
value = str_encode(value, encoding) value = str_encode(value, encoding)
value_results.append(value) value_results.append(value)
if value_results: if value_results:
@@ -92,7 +90,7 @@ def parse_attachment(message_part):
attachment = { attachment = {
'content-type': message_part.get_content_type(), 'content-type': message_part.get_content_type(),
'size': len(file_data), 'size': len(file_data),
'content': BytesIO(file_data) 'content': io.BytesIO(file_data)
} }
filename = message_part.get_param('name') filename = message_part.get_param('name')
if filename: if filename:
@@ -122,7 +120,7 @@ def decode_content(message):
def parse_email(raw_email, policy=None): def parse_email(raw_email, policy=None):
if isinstance(raw_email, binary_type): if isinstance(raw_email, bytes):
raw_email = str_encode(raw_email, 'utf-8', errors='ignore') raw_email = str_encode(raw_email, 'utf-8', errors='ignore')
if policy is not None: if policy is not None:
email_parse_kwargs = dict(policy=policy) email_parse_kwargs = dict(policy=policy)

View File

@@ -1,24 +1,14 @@
from __future__ import unicode_literals
from six import PY3
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
if PY3: def str_encode(value='', encoding=None, errors='strict'):
def str_encode(value='', encoding=None, errors='strict'): logger.debug("Encode str {} with and errors {}".format(value, encoding, errors))
logger.debug("Encode str {} with and errors {}".format(value, encoding, errors)) return str(value, encoding, errors)
return str(value, encoding, errors)
def str_decode(value='', encoding=None, errors='strict'): def str_decode(value='', encoding=None, errors='strict'):
if isinstance(value, str): if isinstance(value, str):
return bytes(value, encoding, errors).decode('utf-8') return bytes(value, encoding, errors).decode('utf-8')
elif isinstance(value, bytes): elif isinstance(value, bytes):
return value.decode(encoding or 'utf-8', errors=errors) return value.decode(encoding or 'utf-8', errors=errors)
else: else:
raise TypeError( "Cannot decode '{}' object".format(value.__class__) ) raise TypeError( "Cannot decode '{}' object".format(value.__class__) )
else:
def str_encode(string='', encoding=None, errors='strict'):
return unicode(string, encoding, errors)
def str_decode(value='', encoding=None, errors='strict'):
return value.decode(encoding, errors)

View File

@@ -1,7 +1,7 @@
from setuptools import setup from setuptools import setup
import os import os
version = '0.8.5' version = '0.9'
def read(filename): def read(filename):
@@ -11,7 +11,7 @@ setup(
name='imbox', name='imbox',
version=version, version=version,
description="Python IMAP for Human beings", description="Python IMAP for Human beings",
long_description=read('README.md'), long_description=read('README.rst'),
keywords='email, IMAP, parsing emails', keywords='email, IMAP, parsing emails',
author='Martin Rusev', author='Martin Rusev',
author_email='martin@amon.cx', author_email='martin@amon.cx',
@@ -20,12 +20,13 @@ setup(
packages=['imbox'], packages=['imbox'],
package_dir={'imbox': 'imbox'}, package_dir={'imbox': 'imbox'},
zip_safe=False, zip_safe=False,
install_requires=['six'],
classifiers=( classifiers=(
'Programming Language :: Python', 'Programming Language :: Python',
'Programming Language :: Python :: 3.2', 'Programming Language :: Python :: 3.2',
'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.3',
'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6'
), ),
test_suite='tests',
) )

View File

@@ -1,11 +1,9 @@
# Encoding: utf-8
from __future__ import unicode_literals
import unittest import unittest
from imbox.parser import * from imbox.parser import *
import os import os
import sys import sys
if sys.version_info.major < 3 or sys.version_info.minor < 3: if sys.version_info.minor < 3:
SMTP = False SMTP = False
else: else:
from email.policy import SMTP from email.policy import SMTP
@@ -86,6 +84,46 @@ Content-Transfer-Encoding: quoted-printable
""" """
raw_email_encoded_bad_multipart = b"""Delivered-To: receiver@example.com
Return-Path: <sender@example.com>
From: sender@example.com
To: "Receiver" <receiver@example.com>, "Second\r\n Receiver" <recipient@example.com>
Subject: Re: Looking to connect with you...
Date: Thu, 20 Apr 2017 15:32:52 +0000
Message-ID: <BN6PR16MB179579288933D60C4016D078C31B0@BN6PR16MB1795.namprd16.prod.outlook.com>
Content-Type: multipart/related;
boundary="_004_BN6PR16MB179579288933D60C4016D078C31B0BN6PR16MB1795namp_";
type="multipart/alternative"
MIME-Version: 1.0
--_004_BN6PR16MB179579288933D60C4016D078C31B0BN6PR16MB1795namp_
Content-Type: multipart/alternative;
boundary="_000_BN6PR16MB179579288933D60C4016D078C31B0BN6PR16MB1795namp_"
--_000_BN6PR16MB179579288933D60C4016D078C31B0BN6PR16MB1795namp_
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: base64
SGkgRGFuaWVsbGUsDQoNCg0KSSBhY3R1YWxseSBhbSBoYXBweSBpbiBteSBjdXJyZW50IHJvbGUs
Y3J1aXRlciB8IENoYXJsb3R0ZSwgTkMNClNlbnQgdmlhIEhhcHBpZQ0KDQoNCg==
--_000_BN6PR16MB179579288933D60C4016D078C31B0BN6PR16MB1795namp_
Content-Type: text/html; charset="utf-8"
Content-Transfer-Encoding: base64
PGh0bWw+DQo8aGVhZD4NCjxtZXRhIGh0dHAtZXF1aXY9IkNvbnRlbnQtVHlwZSIgY29udGVudD0i
CjwvZGl2Pg0KPC9kaXY+DQo8L2JvZHk+DQo8L2h0bWw+DQo=
--_000_BN6PR16MB179579288933D60C4016D078C31B0BN6PR16MB1795namp_--
--_004_BN6PR16MB179579288933D60C4016D078C31B0BN6PR16MB1795namp_
Content-Type: image/png; name="=?utf-8?B?T3V0bG9va0Vtb2ppLfCfmIoucG5n?="
Content-Description: =?utf-8?B?T3V0bG9va0Vtb2ppLfCfmIoucG5n?=
Content-Disposition: inline;
filename="=?utf-8?B?T3V0bG9va0Vtb2ppLfCfmIoucG5n?="; size=488;
creation-date="Thu, 20 Apr 2017 15:32:52 GMT";
modification-date="Thu, 20 Apr 2017 15:32:52 GMT"
Content-ID: <254962e2-f05c-40d1-aa11-0d34671b056c>
Content-Transfer-Encoding: base64
iVBORw0KGgoAAAANSUhEUgAAABMAAAATCAYAAAByUDbMAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJ
cvED9AIR3TCAAAMAqh+p+YMVeBQAAAAASUVORK5CYII=
--_004_BN6PR16MB179579288933D60C4016D078C31B0BN6PR16MB1795namp_--
"""
class TestParser(unittest.TestCase): class TestParser(unittest.TestCase):
def test_parse_email(self): def test_parse_email(self):
@@ -106,6 +144,10 @@ class TestParser(unittest.TestCase):
parsed_email = parse_email(open(os.path.join(TEST_DIR, '8422.msg'), 'rb').read()) parsed_email = parse_email(open(os.path.join(TEST_DIR, '8422.msg'), 'rb').read())
self.assertEqual("Following up Re: Looking to connect, let's schedule a call!", parsed_email.subject) self.assertEqual("Following up Re: Looking to connect, let's schedule a call!", parsed_email.subject)
def test_parse_email_bad_multipart(self):
parsed_email = parse_email(raw_email_encoded_bad_multipart)
self.assertEqual("Re: Looking to connect with you...", parsed_email.subject)
def test_parse_email_ignores_header_casing(self): def test_parse_email_ignores_header_casing(self):
self.assertEqual('one', parse_email('Message-ID: one').message_id) self.assertEqual('one', parse_email('Message-ID: one').message_id)
self.assertEqual('one', parse_email('Message-Id: one').message_id) self.assertEqual('one', parse_email('Message-Id: one').message_id)