Created
February 1, 2011 22:02
-
-
Save faried/806797 to your computer and use it in GitHub Desktop.
Store messages in a Maildir into a couchdb database.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
"""Store messages in a Maildir into a couchdb database.""" | |
import couchdb | |
from mailbox import Maildir | |
from optparse import OptionParser | |
import os | |
from pprint import pprint | |
import sys | |
from uuid import uuid4 | |
def error(msg): | |
"""Insert blatantly obvious docstring here.""" | |
sys.stderr.write('%s\n' % msg) | |
sys.exit(1) | |
def insertmessages(database, maildir, verbose): | |
"""Put each maildir message in CouchDb.""" | |
inserted = 0 | |
ignored = 0 | |
count = 0 | |
for fname, message in maildir.iteritems(): | |
count += 1 | |
# is it already in there? | |
doc = database.get(fname) | |
if doc: | |
print '%s already in db; skipping' % fname | |
ignored += 1 | |
continue | |
(fromname, fromaddr) = message.getaddr('From') | |
data = {'type': 'email', | |
'fromname': fromname.decode('utf-8', 'ignore'), | |
'fromaddr': fromaddr, | |
'fname': fname, | |
'fullpath': message.fp._file.name} | |
for header in ('delivered-to', 'subject', 'date', 'status', 'sender', | |
'message-id', 'in-reply-to', 'references'): | |
data[header] = message.get(header, '').decode('utf-8', 'ignore') | |
for header in ('to', 'cc'): | |
data[header] = message.getaddrlist(header) | |
# doc_id = uuid4().hex | |
doc_id = fname | |
if verbose: | |
print 'putting %s...' % data['fullpath'] | |
try: | |
database[doc_id] = data | |
except Exception, exc: | |
pprint(data) | |
error('exception for %s:, %s' % (fname, str(exc))) | |
doc = database.get(doc_id) | |
if doc: | |
# TODO: handle failure here | |
database.put_attachment(doc, message.fp, fname, | |
content_type='message/rfc822') | |
inserted += 1 | |
if count % 100 == 0: | |
print 'messages processed: %d' % count | |
print 'inserted messages: %d ignored: %d' % (inserted, ignored) | |
def main(): | |
"""Main branching logic.""" | |
home = os.environ.get('HOME', os.getcwd()) | |
parser = OptionParser('usage: %prog -d dbname [other options]') | |
parser.add_option('-v', '--verbose', dest='verbose', action='store_true', | |
help='enable loquacious mode. like, duh.') | |
parser.add_option('-d', '--database', dest='database', | |
help='mail database in CouchDb [required]') | |
parser.add_option('-s', '--server', dest='server', | |
help='CouchDb URI [default: %default]', | |
default='http://localhost:5984/') | |
parser.add_option('-m', '--maildir', dest='maildir', | |
help='path to the maildir to import [default: %default]', | |
default='%s/Maildir/' % home) | |
(opts, _args) = parser.parse_args() | |
if not opts.database: | |
parser.error('a database name is required.') | |
if not (os.access(opts.maildir, os.F_OK) and | |
os.access(os.sep.join([opts.maildir, 'new']), os.F_OK)): | |
parser.error('cannot access maildir %s' % opts.maildir) | |
maildir = Maildir(opts.maildir) | |
server = couchdb.Server(opts.server) | |
try: | |
_version = server.version | |
except AttributeError, _exc: | |
error('cannot connect to %s' % opts.server) | |
try: | |
database = server[opts.database] | |
except couchdb.client.ResourceNotFound, _exc: | |
error('invalid database name: %s' % opts.database) | |
insertmessages(database, maildir, opts.verbose) | |
if __name__ == '__main__': | |
main() | |
# eof |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
https://github.com/nborwankar/sproingg might be of interest to you. similar in nature to this gist