Skip to content

Instantly share code, notes, and snippets.

@rjpower
Created March 20, 2012 21:58
Show Gist options
  • Save rjpower/2141702 to your computer and use it in GitHub Desktop.
Save rjpower/2141702 to your computer and use it in GitHub Desktop.
Extract email sent times from a mailbox
#!/usr/bin/env python
import collections, glob, os, re
from dateutil.parser import *
sentmail_mbox = glob.glob(os.path.expanduser('~/.thunderbird/*/ImapMail/*/*/Sent Mail'))
lines = []
for mbox in sentmail_mbox:
lines.extend(open(mbox).read().split('\n'))
c = re.compile('^Date: (.*)00')
dates = [ c.search(l).group(1) for l in lines if c.search(l) ]
pdates = [parse(d) for d in dates]
keys = [(p.year,) for p in pdates]
kcounts = collections.defaultdict(int)
for k in keys:
kcounts[k] += 1
mapping = collections.defaultdict(int)
for k, p in zip(keys, pdates):
mapping[tuple(list(k) + [p.hour])] += 1.0 / kcounts[k]
for k, v in sorted(mapping.items()):
print ",".join([str(kk) for kk in k]), ',', v
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment