Skip to content

Instantly share code, notes, and snippets.

@ph87
Created October 31, 2016 09:13
Show Gist options
  • Save ph87/8d6e249fece7ed318235a70a30609542 to your computer and use it in GitHub Desktop.
Save ph87/8d6e249fece7ed318235a70a30609542 to your computer and use it in GitHub Desktop.
import re
import logging
from logging import handlers as logging_handlers
import email
from email.parser import Parser
from email.header import decode_header
from email.utils import parseaddr
from StringIO import StringIO
logger = logging.getLogger(__name__)
handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(levelname)s: %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.INFO)
def decode_str(s):
if not s:
return s
value, charset = decode_header(s)[0]
if charset:
value = value.decode(charset)
return value
def decode_addr(s):
if not s:
return s
header, addr = parseaddr(s)
name = decode_str(header)
value = '%s <%s>' % (name, addr)
return value
def parse_payload(message):
from_ = decode_addr(message.get('From'))
to_ = decode_addr(message.get('To'))
subject = decode_str(message.get('Subject'))
if message.is_multipart():
content = None
attachment = None
content_type = message.get_content_type()
payloads = message.get_payload()
payloads = [parse_payload(payload) for payload in payloads]
else:
payloads = []
content_type = message.get_content_type()
if content_type in ['text/plain', 'text/html']:
content = message.get_payload(decode=True)
attachment = None
else:
content = None
file_data = message.get_payload(decode=True)
attachment = StringIO(file_data)
attachment.content_type = content_type
attachment.size = len(file_data)
attachment.name = None
attachment.create_date = None
attachment.modify_date = None
attachment.read_date = None
disp = message.get('Content-Disposition', '')
disps = disp.strip().split(';')
if disps[0].lower() == 'attachment':
for param in disps[1:]:
param = param.strip()
name = param.split('=')[0]
rex = re.compile('"(.*)"')
if rex.search(param):
value = rex.search(param).groups()[0]
value = decode_str(value)
else:
logger.error('parse disposition value error')
value = None
if name == 'filename':
attachment.name = value
elif name == 'create-date':
attachment.create_date = value
elif name == 'modification-date':
attachment.modify_date = value
elif name == 'read-date':
attachment.read_date = value
payload = {
'from': from_,
'to': to_,
'subject': subject,
'content_type': content_type,
'content': content,
'payloads': payloads,
'attachment': attachment if attachment and attachment.name else None,
}
return payload
def parse_message_str(content):
message = Parser().parsestr(content)
payload = parse_payload(message)
return payload
def filter_attachment(payload):
attachments = []
def _parse_attachment_from_payload(_payload):
if _payload['attachment']:
attachment = _payload['attachment']
attachments.append(attachment)
for __payload in _payload['payloads']:
_parse_attachment_from_payload(__payload)
_parse_attachment_from_payload(payload)
return attachments
def save_attachment(attachment, name):
with file(name, 'w') as f:
f.write(attachment.read())
return name
if __name__ == '__main__':
content = file('email_content').read()
payload = parse_message_str(content)
attachments = filter_attachment(payload)
for atta in attachments:
save_attachment(atta)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment