Skip to content

Instantly share code, notes, and snippets.

@kjaymiller
Last active May 3, 2016 13:53
Show Gist options
  • Save kjaymiller/804b33f3ba9fb8174457f6826e24e9e6 to your computer and use it in GitHub Desktop.
Save kjaymiller/804b33f3ba9fb8174457f6826e24e9e6 to your computer and use it in GitHub Desktop.
File
"""
Takes files in a directory and strips out the html
created: 20160503
by: Kevin 'Jay' Miller
kjaymiller@gmail.com
Place this
"""
from os import listdir
import re
def merge_emails(path='.'):
files = sorted(listdir(path))
merger = []
mail_text = re.compile(r'\<\/*html\>', re.M)
mail_subject = re.compile(r'^Subject: .+$', re.M)
for email in files:
with open(email, 'r') as f:
if email.endswith('.eml'):
f = f.read()
subject = (re.search(mail_subject, f).group(0))
merger.append('<h1>{}</h1>'.format(subject))
text = re.split(mail_text, f)[-2]
merger.append(text)
html = '<html>\n{}</html>'.format(''.join(merger))
return html
def create_html(html):
with open('merged_emails.pdf', 'w+') as f:
f.write(html)
def run():
html = merge_emails()
create_html(html)
if __name__ == '__main__':
run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment