Skip to content

Instantly share code, notes, and snippets.

@vadimii
Last active September 7, 2018 16:41
Show Gist options
  • Save vadimii/5002455 to your computer and use it in GitHub Desktop.
Save vadimii/5002455 to your computer and use it in GitHub Desktop.
Dump MongoDB databases to the zip archives and copy its to the output folder.
# -*- coding: utf-8 -*-
'''Dump MongoDB databases to the zip archives
and copy its to the output folder.
Usage: python mongozip.py
'''
import os
import os.path
import datetime
import tempfile
import shutil
import subprocess
import sys
from zipfile import ZipFile, ZIP_DEFLATED
from pymongo import MongoClient
from pymongo.read_preferences import ReadPreference
MONGO_HOST = 'm2.example.com'
MONGO_PORT = 27017
MONGODUMP = 'C:\\mongodb\\bin\\mongodump.exe' # Windows
OUTPUT_DIR = os.path.abspath('./some/secure/published/folder')
MONGO_TARGETS = [
{
'database': 'database1',
'ignore': ['fs.chunks'],
'login': 'database1',
'password': 'Jfhgd76eHHs'
},
{
'database': 'database2',
'ignore': [],
'login': 'database2',
'password': 'Jdhg613TFg'
}
]
def get_target_collections(target):
'''Get list of collections except system collections
and collections which configured as ignored.
'''
mongo = MongoClient(MONGO_HOST, MONGO_PORT)
dbname = target['database']
ignores = target['ignore']
login = target['login']
password = target['password']
database = mongo[dbname]
database.authenticate(login, password)
database.read_preference = ReadPreference.SECONDARY
ignore = lambda col: col.startswith('system.') or col in ignores
return [col for col in database.collection_names() if not ignore(col)]
def dump_collection(target, collection, temp_dir):
'''Dump target collection to the temporary folder.'''
dbname = target['database']
login = target['login']
password = target['password']
args = [
MONGODUMP,
'-h', MONGO_HOST,
'--port', str(MONGO_PORT),
'-u', login,
'-p', password,
'-d', dbname,
'-c', collection,
'-o', temp_dir
]
subprocess.call(args)
def zip_db_dump(dbname, temp_dir):
'''Zip database dump folder.'''
source_zip = os.path.join(temp_dir, dbname)
date_stamp = datetime.date.today().isoformat()
zip_name = '{dbname}.{date}.zip'.format(dbname=dbname, date=date_stamp)
target_zip = os.path.join(temp_dir, zip_name)
with ZipFile(target_zip, 'w', ZIP_DEFLATED) as myzip:
for root, _, files in os.walk(source_zip):
for fname in files:
absfn = os.path.join(root, fname)
zfn = absfn[len(temp_dir)+len(os.sep):]
myzip.write(absfn, zfn)
return target_zip
def write_to_output(dbname, abszipfn):
'''Copy archive to the output and rewrite latest database archive.'''
if not os.path.exists(OUTPUT_DIR):
os.makedirs(OUTPUT_DIR)
_, zipfn = os.path.split(abszipfn)
move_to = os.path.join(OUTPUT_DIR, zipfn)
shutil.move(abszipfn, move_to)
latest = '{dbname}.latest.zip'.format(dbname=dbname)
latest = os.path.join(OUTPUT_DIR, latest)
shutil.copy2(move_to, latest)
def main():
'''The entry point of the script.'''
temp_dir = tempfile.mkdtemp()
try:
for target in MONGO_TARGETS:
cols = get_target_collections(target)
for col in cols:
dump_collection(target, col, temp_dir)
dbname = target['database']
abszipfn = zip_db_dump(dbname, temp_dir)
write_to_output(dbname, abszipfn)
except StandardError as err:
print >> sys.stderr, str(err)
return 1
finally:
shutil.rmtree(temp_dir)
if __name__ == '__main__':
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment