Created
October 31, 2016 09:13
-
-
Save ph87/8d6e249fece7ed318235a70a30609542 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import logging | |
from logging import handlers as logging_handlers | |
import email | |
from email.parser import Parser | |
from email.header import decode_header | |
from email.utils import parseaddr | |
from StringIO import StringIO | |
logger = logging.getLogger(__name__) | |
handler = logging.StreamHandler() | |
formatter = logging.Formatter('%(asctime)s - %(levelname)s: %(message)s') | |
handler.setFormatter(formatter) | |
logger.addHandler(handler) | |
logger.setLevel(logging.INFO) | |
def decode_str(s): | |
if not s: | |
return s | |
value, charset = decode_header(s)[0] | |
if charset: | |
value = value.decode(charset) | |
return value | |
def decode_addr(s): | |
if not s: | |
return s | |
header, addr = parseaddr(s) | |
name = decode_str(header) | |
value = '%s <%s>' % (name, addr) | |
return value | |
def parse_payload(message): | |
from_ = decode_addr(message.get('From')) | |
to_ = decode_addr(message.get('To')) | |
subject = decode_str(message.get('Subject')) | |
if message.is_multipart(): | |
content = None | |
attachment = None | |
content_type = message.get_content_type() | |
payloads = message.get_payload() | |
payloads = [parse_payload(payload) for payload in payloads] | |
else: | |
payloads = [] | |
content_type = message.get_content_type() | |
if content_type in ['text/plain', 'text/html']: | |
content = message.get_payload(decode=True) | |
attachment = None | |
else: | |
content = None | |
file_data = message.get_payload(decode=True) | |
attachment = StringIO(file_data) | |
attachment.content_type = content_type | |
attachment.size = len(file_data) | |
attachment.name = None | |
attachment.create_date = None | |
attachment.modify_date = None | |
attachment.read_date = None | |
disp = message.get('Content-Disposition', '') | |
disps = disp.strip().split(';') | |
if disps[0].lower() == 'attachment': | |
for param in disps[1:]: | |
param = param.strip() | |
name = param.split('=')[0] | |
rex = re.compile('"(.*)"') | |
if rex.search(param): | |
value = rex.search(param).groups()[0] | |
value = decode_str(value) | |
else: | |
logger.error('parse disposition value error') | |
value = None | |
if name == 'filename': | |
attachment.name = value | |
elif name == 'create-date': | |
attachment.create_date = value | |
elif name == 'modification-date': | |
attachment.modify_date = value | |
elif name == 'read-date': | |
attachment.read_date = value | |
payload = { | |
'from': from_, | |
'to': to_, | |
'subject': subject, | |
'content_type': content_type, | |
'content': content, | |
'payloads': payloads, | |
'attachment': attachment if attachment and attachment.name else None, | |
} | |
return payload | |
def parse_message_str(content): | |
message = Parser().parsestr(content) | |
payload = parse_payload(message) | |
return payload | |
def filter_attachment(payload): | |
attachments = [] | |
def _parse_attachment_from_payload(_payload): | |
if _payload['attachment']: | |
attachment = _payload['attachment'] | |
attachments.append(attachment) | |
for __payload in _payload['payloads']: | |
_parse_attachment_from_payload(__payload) | |
_parse_attachment_from_payload(payload) | |
return attachments | |
def save_attachment(attachment, name): | |
with file(name, 'w') as f: | |
f.write(attachment.read()) | |
return name | |
if __name__ == '__main__': | |
content = file('email_content').read() | |
payload = parse_message_str(content) | |
attachments = filter_attachment(payload) | |
for atta in attachments: | |
save_attachment(atta) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment