Skip to content

Instantly share code, notes, and snippets.

@eliask
Created November 19, 2016 08:17
Show Gist options
  • Save eliask/3dddfe40a3133d4913b829b36b191419 to your computer and use it in GitHub Desktop.
Save eliask/3dddfe40a3133d4913b829b36b191419 to your computer and use it in GitHub Desktop.
Playing with gmail mbox dump from Google Takeout (writes all attachments as separate files etc under /tmp)
#! /usr/bin/env python3
# Usage: mbox_stuff.py All\ mail\ Including\ Spam\ and\ Trash.mbox
import mailbox
import traceback
import sys
from itertools import chain
num = 1
def get_parts(msg,n=0):
global num
num += 1
if n == 2: # too lazy to look up specs. But seems to result in looping otherwise :|
return
if msg.is_multipart():
for part in msg.walk():
yield from get_parts(part,n+1)
else:
type_ = msg.get_content_type()
payload = msg.get_payload(decode=True)
if type_.startswith('text/'):
yield payload, msg.get_charset()
ext = type_.split('/')[-1]
with open('/tmp/msg%06d.%s' % (num, ext), 'wb') as fh:
fh.write(payload)
def get_body_parts(msg):
all_charsets = {x for x in msg.get_charsets() if x}
for B, charset1 in get_parts(msg):
for charset in chain([charset1] if charset1 else [], all_charsets, ['utf-8']):
try:
yield B.decode(charset)
except Exception as e:
traceback.print_exc(file=sys.stdout)
mboxfile = sys.argv[1]
for i, mail in enumerate(mailbox.mbox(mboxfile)):
print('=================', i, '=====================')
try:
print('''Subject: {}
From: {from}
'''.format(mail['subject'], mail['From']))
except:
pass
for body in get_body_parts(mail):
print(body)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment