Skip to content

Instantly share code, notes, and snippets.

@bkreider
Created December 7, 2018 20:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bkreider/e728257bb8b903ac6012411817a469f6 to your computer and use it in GitHub Desktop.
Save bkreider/e728257bb8b903ac6012411817a469f6 to your computer and use it in GitHub Desktop.
Test Mongo Replication
"""
Generally used like this:
1. fab show_status # shows mongo replication status
2. fab kill_master # kills the current master - only if the replicaset has no failures, see "safe" flag
3. fab start_down_mongo # brings up down machine
"""
import os.path
import subprocess
from tempfile import NamedTemporaryFile
from fabric.api import env, local, run, sudo, cd, hide, put, settings, task, put, execute, show
from fabric.contrib.files import append, exists, sed
# hide all output
from fabric.state import output
output['everything'] = False
env.use_ssh_config = True
# todo: use "local" command with fabric
def _run_command(cmd):
p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
output = p.stdout.read()
retval = p.wait()
# todo: error checking
return output
def _terraform_user():
return _run_command("terraform output ssh_user")
def _terraform_ips():
t = _run_command("terraform output public_ips").strip()
return t.split()
USER = _terraform_user()
HOSTS = _terraform_ips()
# todo: this is stupid - uses .ssh/config to load the key b/c this matches config
HOST = "cdh-server"
env.hosts = [HOST]
@task
def show_status():
def _get_status():
return run("""echo 'printjson(rs.status())' | mongo --quiet""")
host = find_down_mongo(up=True)
res = execute(_get_status, hosts=[host])
print res.values()[0]
def find_master():
"""Return hostname of master member of DB"""
def _get_repl_status():
return run("""echo "printjson(db.isMaster()['ismaster'])" | mongo --quiet""")
counter = 0
for host in HOSTS:
with settings(warn_only=True):
res = execute(_get_repl_status, hosts=[host])
if res.values()[0] == "true":
print "master: %s:%s" % (counter, host,)
return host
counter += 1
return ""
@task
def kill_master(safe=True):
"""Kills the master by stopping mongod on that machine - safe prevents you from 2 dead members at the same time"""
def _kill_master():
return sudo("""/etc/init.d/mongod stop""")
if safe:
down_host = find_down_mongo()
if down_host != "":
print "Refusing to kill a 2nd machine (dead: %s)" % (down_host,)
return ""
host = find_master()
return execute(_kill_master, hosts=[host])
def find_down_mongo(up=False):
"""
Returns a working mongodb instance or the opposite (dead server)
"""
def _get_down_mongod():
return sudo("""/etc/init.d/mongod status""")
counter = 0
for host in HOSTS:
with settings(warn_only=True):
res = execute(_get_down_mongod, hosts=[host])
if res.values()[0].return_code == 0:
if up:
print "Up: %s:%s" % (counter, host,)
return host
elif res.values()[0].return_code == 3:
if not up:
print "Down: %s:%s" % (counter, host,)
return host
counter += 1
return ""
@task
def start_down_mongo():
"""Starts mongodb on the first server that is down"""
def _start_mongod():
return sudo("/etc/init.d/mongod start")
host = find_down_mongo()
# Maybe all hosts are up?
if host == "":
return False
return execute(_start_mongod, hosts=[host])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment