Created

Embed URL

HTTPS clone URL

SSH clone URL

You can clone with HTTPS or SSH.

Download Gist

Store messages in a Maildir into a couchdb database.

View maildirtocouch.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
#!/usr/bin/env python
"""Store messages in a Maildir into a couchdb database."""
 
import couchdb
from mailbox import Maildir
from optparse import OptionParser
import os
from pprint import pprint
import sys
from uuid import uuid4
 
 
def error(msg):
"""Insert blatantly obvious docstring here."""
 
sys.stderr.write('%s\n' % msg)
sys.exit(1)
 
 
def insertmessages(database, maildir, verbose):
"""Put each maildir message in CouchDb."""
 
inserted = 0
ignored = 0
count = 0
for fname, message in maildir.iteritems():
count += 1
# is it already in there?
doc = database.get(fname)
if doc:
print '%s already in db; skipping' % fname
ignored += 1
continue
 
(fromname, fromaddr) = message.getaddr('From')
data = {'type': 'email',
'fromname': fromname.decode('utf-8', 'ignore'),
'fromaddr': fromaddr,
'fname': fname,
'fullpath': message.fp._file.name}
for header in ('delivered-to', 'subject', 'date', 'status', 'sender',
'message-id', 'in-reply-to', 'references'):
data[header] = message.get(header, '').decode('utf-8', 'ignore')
for header in ('to', 'cc'):
data[header] = message.getaddrlist(header)
 
# doc_id = uuid4().hex
doc_id = fname
if verbose:
print 'putting %s...' % data['fullpath']
try:
database[doc_id] = data
except Exception, exc:
pprint(data)
error('exception for %s:, %s' % (fname, str(exc)))
doc = database.get(doc_id)
if doc:
# TODO: handle failure here
database.put_attachment(doc, message.fp, fname,
content_type='message/rfc822')
inserted += 1
 
if count % 100 == 0:
print 'messages processed: %d' % count
 
print 'inserted messages: %d ignored: %d' % (inserted, ignored)
 
def main():
"""Main branching logic."""
 
home = os.environ.get('HOME', os.getcwd())
 
parser = OptionParser('usage: %prog -d dbname [other options]')
parser.add_option('-v', '--verbose', dest='verbose', action='store_true',
help='enable loquacious mode. like, duh.')
parser.add_option('-d', '--database', dest='database',
help='mail database in CouchDb [required]')
parser.add_option('-s', '--server', dest='server',
help='CouchDb URI [default: %default]',
default='http://localhost:5984/')
parser.add_option('-m', '--maildir', dest='maildir',
help='path to the maildir to import [default: %default]',
default='%s/Maildir/' % home)
 
(opts, _args) = parser.parse_args()
 
if not opts.database:
parser.error('a database name is required.')
 
if not (os.access(opts.maildir, os.F_OK) and
os.access(os.sep.join([opts.maildir, 'new']), os.F_OK)):
parser.error('cannot access maildir %s' % opts.maildir)
maildir = Maildir(opts.maildir)
 
server = couchdb.Server(opts.server)
try:
_version = server.version
except AttributeError, _exc:
error('cannot connect to %s' % opts.server)
 
try:
database = server[opts.database]
except couchdb.client.ResourceNotFound, _exc:
error('invalid database name: %s' % opts.database)
 
insertmessages(database, maildir, opts.verbose)
 
if __name__ == '__main__':
main()
 
# eof

https://github.com/nborwankar/sproingg might be of interest to you. similar in nature to this gist

Owner

Thanks!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.