public
Last active

Store messages in a Maildir into a couchdb database.

  • Download Gist
maildirtocouch.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
#!/usr/bin/env python
"""Store messages in a Maildir into a couchdb database."""
 
import couchdb
from mailbox import Maildir
from optparse import OptionParser
import os
from pprint import pprint
import sys
from uuid import uuid4
 
 
def error(msg):
"""Insert blatantly obvious docstring here."""
 
sys.stderr.write('%s\n' % msg)
sys.exit(1)
 
 
def insertmessages(database, maildir, verbose):
"""Put each maildir message in CouchDb."""
 
inserted = 0
ignored = 0
count = 0
for fname, message in maildir.iteritems():
count += 1
# is it already in there?
doc = database.get(fname)
if doc:
print '%s already in db; skipping' % fname
ignored += 1
continue
 
(fromname, fromaddr) = message.getaddr('From')
data = {'type': 'email',
'fromname': fromname.decode('utf-8', 'ignore'),
'fromaddr': fromaddr,
'fname': fname,
'fullpath': message.fp._file.name}
for header in ('delivered-to', 'subject', 'date', 'status', 'sender',
'message-id', 'in-reply-to', 'references'):
data[header] = message.get(header, '').decode('utf-8', 'ignore')
for header in ('to', 'cc'):
data[header] = message.getaddrlist(header)
 
# doc_id = uuid4().hex
doc_id = fname
if verbose:
print 'putting %s...' % data['fullpath']
try:
database[doc_id] = data
except Exception, exc:
pprint(data)
error('exception for %s:, %s' % (fname, str(exc)))
doc = database.get(doc_id)
if doc:
# TODO: handle failure here
database.put_attachment(doc, message.fp, fname,
content_type='message/rfc822')
inserted += 1
 
if count % 100 == 0:
print 'messages processed: %d' % count
 
print 'inserted messages: %d ignored: %d' % (inserted, ignored)
 
def main():
"""Main branching logic."""
 
home = os.environ.get('HOME', os.getcwd())
 
parser = OptionParser('usage: %prog -d dbname [other options]')
parser.add_option('-v', '--verbose', dest='verbose', action='store_true',
help='enable loquacious mode. like, duh.')
parser.add_option('-d', '--database', dest='database',
help='mail database in CouchDb [required]')
parser.add_option('-s', '--server', dest='server',
help='CouchDb URI [default: %default]',
default='http://localhost:5984/')
parser.add_option('-m', '--maildir', dest='maildir',
help='path to the maildir to import [default: %default]',
default='%s/Maildir/' % home)
 
(opts, _args) = parser.parse_args()
 
if not opts.database:
parser.error('a database name is required.')
 
if not (os.access(opts.maildir, os.F_OK) and
os.access(os.sep.join([opts.maildir, 'new']), os.F_OK)):
parser.error('cannot access maildir %s' % opts.maildir)
maildir = Maildir(opts.maildir)
 
server = couchdb.Server(opts.server)
try:
_version = server.version
except AttributeError, _exc:
error('cannot connect to %s' % opts.server)
 
try:
database = server[opts.database]
except couchdb.client.ResourceNotFound, _exc:
error('invalid database name: %s' % opts.database)
 
insertmessages(database, maildir, opts.verbose)
 
if __name__ == '__main__':
main()
 
# eof

https://github.com/nborwankar/sproingg might be of interest to you. similar in nature to this gist

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.