Skip to content

Instantly share code, notes, and snippets.

@vsajip
Last active November 17, 2020 18:53
Show Gist options
  • Save vsajip/da1aad66e8a96cb5a868d72fccf1eee9 to your computer and use it in GitHub Desktop.
Save vsajip/da1aad66e8a96cb5a868d72fccf1eee9 to your computer and use it in GitHub Desktop.
Python 3.6+ scripts for synchronizing IMAP mailboxes using mbsync and imapsync
#
# Copyright (C) 2020 Red Dove Consultants Limited
#
# License: GPL v2
#
#
# This script uses imapsync to synchronize IMAP mailboxes. It uses as input a CSV file
# with columns oldhost,olduser,oldpwd,newhost,newuser,newpwd,migrate. The first line
# is expected to be those column names. oldXXX columns refer to the source mailbox
# information and newXXX columns refer to the target mailbox information. The migrate
# column, if not empty, triggers a migration.
#
# You can use --dry to just do a dry run. Memory usage seems to top out at around
# 350 MB for a mailbox with > 22000 messages.
#
import argparse
import csv
import os
import subprocess
import sys
PRINT_EXC_TYPE = False
def map_row(row, header):
result = dict(zip(header, row))
return result
def main():
adhf = argparse.ArgumentDefaultsHelpFormatter
parser = argparse.ArgumentParser(formatter_class=adhf)
aa = parser.add_argument
aa('input', metavar='INPUT', help='Input file')
aa('--dry', default=False, action='store_true', help='Do a dry run')
options, args = parser.parse_known_args()
with open(options.input) as f:
reader = csv.reader(f)
header = next(reader)
for row in reader:
row = map_row(row, header)
if row['migrate']:
cmd = ['imapsync', '--tmpdir', 'tmp', '--host1', row['oldhost'],
'--user1', row['olduser'], '--password1', row['oldpwd'],
'--host2', row['newhost'], '--user2', row['newuser'],
'--password2', row['newpwd'], '--ssl2'] + args
if options.dry:
cmd.append('--dry')
try:
subprocess.check_call(cmd, shell=False)
except subprocess.CalledProcessError as e:
raise ValueError(f'imapsync failed with return code {e.returncode}')
if __name__ == '__main__':
try:
rc = main()
except KeyboardInterrupt:
rc = 2
except Exception as e:
if PRINT_EXC_TYPE:
s = ' %s:' % type(e).__name__
else:
s = ''
sys.stderr.write('Failed:%s %s\n' % (s, e))
if 'PY_DEBUG' in os.environ: import traceback; traceback.print_exc()
rc = 1
sys.exit(rc)
#
# Copyright (C) 2020 Red Dove Consultants Limited
#
# License: GPL v2
#
#
# This script uses mbsync to synchronize IMAP mailboxes. It uses as input a CSV file
# with columns oldhost,olduser,oldpwd,newhost,newuser,newpwd,migrate. The first line
# is expected to be those column names. oldXXX columns refer to the source mailbox
# information and newXXX columns refer to the target mailbox information. The migrate
# column, if not empty, triggers a migration.
#
# You can use --dry to just print the mbsync command that would be run, without actually
# running it. The script creates a configuration file for each source mailbox as well
# as a place for holding the migration state for each source mailbox (this can be used
# to resync mailboxes with repeated runs). While mbsync is seemingly a memory hog, which
# might cause problems with quotas, this script might be useful in some scenarios.
#
import argparse
import csv
import os
import subprocess
import sys
PRINT_EXC_TYPE = False
def map_row(row, header):
result = dict(zip(header, row))
return result
MBSYNC_CONFIG = '''
IMAPAccount oldhost
SSLType IMAPS
Host %(oldhost)s
User %(olduser)s
Pass %(oldpwd)s
IMAPStore oldhost
Account oldhost
IMAPAccount newhost
SSLType IMAPS
Host %(newhost)s
User %(newuser)s
Pass %(newpwd)s
IMAPStore newhost
Account newhost
Channel main
Master :oldhost:
Slave :newhost:
Patterns *
Create Slave
SyncState %(syncstate)s/
Sync Pull
'''.strip()
def sync_row(options, args, row):
data = dict(row)
curdir = os.getcwd()
olduser = data['olduser']
statepath = os.path.abspath(os.path.join(curdir, 'syncstates', olduser))
data['syncstate'] = syncstate = statepath
if not options.dry:
os.makedirs(statepath, exist_ok=True)
config = MBSYNC_CONFIG % data
cfn = os.path.join(curdir, '%s.conf' % olduser)
with open(cfn, 'w', encoding='utf-8') as f:
f.write(config + '\n')
cmd = ['mbsync', '-ac', cfn]
print(' '.join(cmd))
if not options.dry:
try:
subprocess.check_call(cmd, shell=False)
except subprocess.CalledProcessError as e:
print('mbsync failed with return code %s' % e.returncode)
def main():
adhf = argparse.ArgumentDefaultsHelpFormatter
parser = argparse.ArgumentParser(formatter_class=adhf)
aa = parser.add_argument
aa('input', metavar='INPUT', help='Input file')
aa('--dry', default=False, action='store_true', help='Do a dry run')
options, args = parser.parse_known_args()
with open(options.input) as f:
reader = csv.reader(f)
header = next(reader)
for row in reader:
row = map_row(row, header)
if row['migrate']:
sync_row(options, args, row)
if __name__ == '__main__':
try:
rc = main()
except KeyboardInterrupt:
rc = 2
except Exception as e:
if PRINT_EXC_TYPE:
s = ' %s:' % type(e).__name__
else:
s = ''
sys.stderr.write('Failed:%s %s\n' % (s, e))
if 'PY_DEBUG' in os.environ: import traceback; traceback.print_exc()
rc = 1
sys.exit(rc)
@kesavanm
Copy link

Wow! Thanks for the script; You made my day!
Migrated 80K mails with 18GB in ~8+ Hours

++++ Statistics
Transfer started on               : Tue Nov 17 06:49:21 2020
Transfer ended on                 : Tue Nov 17 15:40:56 2020
Transfer time                     : 31895.9 sec
Folders synced                    : 109/109 synced
Messages transferred              : 79650
Messages skipped                  : 12764
Messages found duplicate on host1 : 153
Messages found duplicate on host2 : 3029
Messages void (noheader) on host1 : 3
Messages void (noheader) on host2 : 0
Messages deleted on host1         : 0
Messages deleted on host2         : 0
Total bytes transferred           : 19817090024 (18.456 GiB)
Total bytes duplicate host1       : 31018509 (29.582 MiB)
Total bytes duplicate host2       : 433065682 (413.004 MiB)
Total bytes skipped               : 1083346307 (1.009 GiB)
Total bytes error                 : 0 (0.000 KiB)
Message rate                      : 2.5 messages/s
Average bandwidth rate            : 606.7 KiB/s
Memory consumption                : 692.5 MiB
Biggest message                   : 35531110 bytes (33.885 MiB)
Memory/biggest message ratio      : 20.4
Start difference host2 - host1    : -76773 messages, -19414666500 bytes (-18.081 GiB)
Final difference host2 - host1    : 2866 messages, 401007141 bytes (382.430 MiB)
Detected 0 errors

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment