Created
May 9, 2011 09:00
-
-
Save pklaus/962257 to your computer and use it in GitHub Desktop.
mb2md: convert mailbox to Maildir. See <http://www.gerg.ca/hacks/mb2md/>
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
"""addtomaildir | |
Reads an RFC 822 message (possibly with leading "From " line) on stdin | |
and adds it to a Maildir. The exact details of where it lands and what | |
it's called in the Maildir depend on various header values in the input | |
message: | |
* if no "Status" header, the message goes in "new", otherwise in "cur" | |
* if "Status" is "O" (old), the filename has no info field | |
* if "Status" is "RO" (read old), the filename has ":2,S" appended | |
as its info field | |
* the mtime of the file will be the delivery time of the message, | |
if we can figure out the delivery time. Tries the "Delivery-date" | |
header first, then the "From " line; if neither exists or can | |
be parsed, leaves the mtime alone. | |
""" | |
# | |
# Found on <http://www.gerg.ca/hacks/mb2md/>. | |
# Referred to by Philipp Klaus on <http://blog.philippklaus.de/2010/02/convert-mbox-to-maildir/> | |
# | |
import sys, os, re | |
import socket, errno | |
from time import time, mktime, strptime, ctime, sleep | |
from rfc822 import Message, parsedate_tz, mktime_tz | |
class Error (Exception): | |
pass | |
def warn (msg): | |
sys.stderr.write("warning: %s\n" % msg) | |
def maildir_open (maildir): | |
# Assumes we're already chdir'd into maildir | |
hostname = socket.gethostname() | |
pid = os.getpid() | |
num_tries = 0 | |
max_tries = 5 | |
while 1: | |
name = "tmp/%.6f%05d.%s" % (time(), pid, hostname) | |
ok = 0 # assume the worst | |
num_tries += 1 | |
try: | |
os.stat(name) | |
except OSError, err: | |
# Good: file called 'name' doesn't already exist. | |
if err.errno == errno.ENOENT: | |
ok = 1 | |
if ok: | |
break | |
else: | |
if num_tries > max_tries: | |
raise Error("error: could not create temporary file in %s/tmp" | |
% maildir) | |
sleep(2) # and try again | |
fd = os.open(name, os.O_WRONLY|os.O_EXCL|os.O_CREAT, 0600) | |
return (name, fd) | |
def grok_status (msg): | |
# Figure out if this is a new message, an "old" message | |
# (seen by MUA, but not read by user), or a read message. | |
status = msg.get("Status") | |
if status == "O": # seen by MUA, but not read by user | |
dir = "cur" | |
info = "" | |
elif status == "RO": # read by user | |
dir = "cur" | |
info = ":2,S" | |
else: # not there, empty, or unknown value | |
dir = "new" | |
info = "" | |
return (dir, info) | |
def get_delivery_time (msg): | |
# Figure out the delivery time. | |
dtime = None | |
if msg.has_key("Delivery-date"): | |
# eg. "Thu, 12 Jul 2001 08:47:20 -0400" to 994942040 (seconds | |
# since epoch in UTC) | |
dtime = mktime_tz(parsedate_tz(msg["Delivery-date"])) | |
elif msg.unixfrom: | |
# Parse eg. | |
# "From python-dev-admin@python.org Thu Jul 12 08:47:20 2001" | |
# -- this is the "From " line format used by Exim; hopefully other | |
# MTAs do the same! | |
m = re.match(r'^From (\S+) +(\w{3} \w{3}\s+\d\d? \d\d:\d\d:\d\d \d{4})$', | |
msg.unixfrom) | |
if not m: | |
warn("warning: could not parse \"From \" line: %s" % msg.unixfrom) | |
else: | |
(return_path, dtime_str) = m.groups() | |
# Eg. "Thu Jul 12 08:47:20 2001" -> 994945640 -- note that | |
# this might be different from what we get parsing the same | |
# date string above, because this one doesn't include the | |
# timezone. Sigh. | |
dtime = mktime(strptime(dtime_str, "%c")) | |
# Attempt to detect and correct for DST differences. | |
# (This works if we parsed a summer time during the winter; | |
# what about the inverse?) | |
dtime_str_curtz = ctime(dtime) | |
if dtime_str_curtz != dtime_str: | |
dtime_curtz = mktime(strptime(dtime_str_curtz, "%c")) | |
diff = dtime_curtz - dtime | |
dtime -= diff | |
return dtime | |
def write_message (msg, msg_file, out_fd): | |
# Write the headers to the temp file. | |
headers = str(msg) + "\n" | |
n = os.write(out_fd, headers) | |
if n != len(headers): | |
raise Error("failed to write headers (%d/%d bytes written)" | |
% (n, len(headers))) | |
# Copy the body from msg_file to the temp file. | |
chunk = 16*1024 | |
while 1: | |
data = msg_file.read(chunk) | |
if not data: | |
break | |
n = os.write(out_fd, data) | |
if n != len(data): | |
raise Error("failed to write chunk of body (%d/%d bytes written)" | |
% (n, len(data))) | |
# Sync and close the temp file. | |
try: | |
os.fsync(out_fd) | |
os.close(out_fd) | |
except OSError, err: | |
os.unlink(tmp_name) | |
raise Error("unable to fsync() or close() temp file: %s" % err) | |
def finish_message (tmp_name, dir, info, dtime): | |
# Link the temp file to its ultimate destination (in either "new" or | |
# "cur", with info appended to the name), and remove the temp name. | |
base_name = os.path.basename(tmp_name) | |
dst_name = os.path.join(dir, base_name + info) | |
os.link(tmp_name, dst_name) | |
# Set the modification time to the delivery time, if known. | |
if dtime is not None: | |
atime = os.stat(dst_name).st_atime | |
os.utime(dst_name, (atime, dtime)) | |
return dst_name | |
def add (msg_file, maildir): | |
# First reserve a place in the maildir (ie. open the file in tmp). | |
start_dir = os.getcwd() | |
os.chdir(maildir) | |
(tmp_name, out_fd) = maildir_open(maildir) | |
try: | |
msg = Message(msg_file) | |
(dir, info) = grok_status(msg) | |
dtime = get_delivery_time(msg) | |
write_message(msg, msg_file, out_fd) | |
dst_name = finish_message(tmp_name, dir, info, dtime) | |
finally: | |
os.unlink(tmp_name) | |
os.chdir(start_dir) | |
print dst_name | |
# add () | |
def main (): | |
prog = os.path.basename(sys.argv[0]) | |
args = sys.argv[1:] | |
if len(args) == 1: | |
maildir = args[0] | |
msg_file = sys.stdin | |
elif len(args) == 2: | |
(msg_filename, maildir) = args | |
msg_file = open(msg_filename) | |
else: | |
sys.exit("usage: %s maildir\n" | |
" %s msg_file maildir\n" | |
"\n" | |
"error: incorrect number of arguments\n") | |
if not (os.path.isdir(maildir) and | |
os.path.isdir(os.path.join(maildir, "tmp")) and | |
os.path.isdir(os.path.join(maildir, "cur")) and | |
os.path.isdir(os.path.join(maildir, "new"))): | |
sys.exit("error: not a maildir: %s" % maildir) | |
try: | |
add(msg_file, maildir) | |
except Error, err: | |
sys.exit(str(err)) | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# Convert an mbox mail file to a maildir. | |
# Requires formail (from procmail) or reformail (from maildrop) | |
# and my addtomaildir script. | |
# Idea stolen from | |
# http://www.nb.net/~lbudney/linux/software/safecat/one-liners.html | |
# | |
# by Greg Ward, 2002/01/22 | |
# | |
# Usage: | |
# mb2md mbox maildir | |
# where mbox must exist, and maildir must not exist. | |
# | |
# Found on <http://www.gerg.ca/hacks/mb2md/>. | |
# Referred to by Philipp Klaus on <http://blog.philippklaus.de/2010/02/convert-mbox-to-maildir/> | |
# | |
if [ "$#" -ne 2 ] ; then | |
echo "usage: $0 mbox maildir" >&2 | |
exit 1 | |
fi | |
mbox=$1 | |
maildir=$2 | |
if [ -e "$maildir" ] ; then | |
echo "error: $maildir already exists" >&2 | |
exit 1 | |
fi | |
if [ ! -e "$mbox" ] ; then | |
echo "error: $mbox does not exist" >&2 | |
exit 1 | |
fi | |
for p in formail reformail ; do | |
pp=`which $p` | |
if [ -n "$pp" -a -x $pp ] ; then | |
helper=$pp | |
break | |
fi | |
done | |
if [ -z "$helper" ]; then | |
echo "error: either formail or reformail is required" >&2 | |
exit 1 | |
fi | |
for d in cur new tmp ; do | |
mkdir -p $maildir/$d | |
done | |
$helper -s python addtomaildir $maildir < $mbox |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment