Skip to content

Instantly share code, notes, and snippets.

@dodger487
Created January 30, 2020 04:08
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dodger487/3e5843918f318770e39b570840595ce3 to your computer and use it in GitHub Desktop.
Save dodger487/3e5843918f318770e39b570840595ce3 to your computer and use it in GitHub Desktop.
"""Print out text of emails from MBox file."""
from collections import Counter
import mailbox
import sys
import re
mbox = mailbox.mbox("friends-001.mbox")
c = Counter(m["from"] for m in mbox)
print(c.most_common(10))
def remove_r(text):
return text.replace("\r", "")
def strip_replies(text):
lines = text.split("\n")
lines = [l for l in lines if len(l) > 0]
lines = [line for line in lines if line[0] != ">"]
return "\n".join(lines)
def strip_footer(text):
text, _ = re.subn("On (Sun|Mon|Tue|Wed|Thu|Fri|Sat),.*, 20.. at.*@gmail.com.*wrote.*",
"",
text,
flags=re.DOTALL)
text, _ = re.subn("You received this message because you are subscribed to the Google Groups.*",
"",
text,
flags=re.DOTALL)
return text
def get_core_text(msg):
msg = get_text(msg)
msg = remove_r(msg)
msg = strip_replies(msg)
msg = strip_footer(msg)
return msg
def get_member_emails(mbox, sender_list, limit=100):
msgs = []
for msg in mbox:
if (msg["from"] in sender_list
and msg["to"] is not None
and "dunghole" in msg["to"]):
msgs.append(msg)
if limit is not None and len(msgs) > limit:
break
return msgs
def get_text(msg):
while msg.is_multipart():
msg = msg.get_payload()[0]
return msg.get_payload()
def print_msgs(msg_list, f=sys.stdout):
for msg in msg_list:
print("-------------------------------------------------------------------", file=f)
print("Subject:", msg["subject"], file=f)
print("", file=f)
print(get_core_text(msg), file=f)
print("", file=f)
george_email_addr = [
"George <george@example.com>",
"George <georgeS@college.edu>",
]
george_emails = get_member_emails(mbox, george_email_addr, limit=None)
with open("george_emails.txt", 'w') as f:
print_msgs(george_emails, f)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment