Skip to content

Instantly share code, notes, and snippets.

@lavagetto
Last active Apr 21, 2016
Embed
What would you like to do?
#!/usr/bin/python
# data files are here:
# memcached.yaml https://gist.github.com/lavagetto/f03d6c342dcdd9e718347b11937da9b7
# jobqueue.yaml https://gist.github.com/lavagetto/5a8be60410fdc9e988a80052535835b2
#!/usr/bin/python
import logging
import argparse
import os
import subprocess
import yaml
env = os.environ.copy()
# Use your own agent here
env['SSH_AUTH_SOCK'] = '/run/user/1000/ssh-agent-prod.socket'
logging.basicConfig(level=logging.DEBUG,
format='%(asctime)s %(levelname)s %(message)s',
handlers=[logging.StreamHandler()])
log = logging.getLogger('switch-redis')
class RedisProcessor(object):
exec_host = 'rdb1001.eqiad.wmnet'
def __init__(self, dry_run):
self.passwd = self.get_pass()
self.dry_run = dry_run
def get_pass(self):
"""Obtain the redis password"""
log.info("Fetching the redis password")
return subprocess.check_output(['ssh', self.exec_host, 'sudo grep requirepass /etc/redis/tcp_6379.conf'], env=env).strip().split(" ")[-1]
def cmd(self, data, command, dry_run=False):
"""Executes a command on the remote redis server"""
command = "redis-cli -h %s -p %d -a %s %s" % (data['host'], data['port'], self.passwd,
command)
if dry_run:
log.info("Would have executed: %s", command)
else:
return subprocess.check_output(['ssh', self.exec_host,
command], env=env)
@staticmethod
def parse_info(raw):
"""Do a very coarse-grained parsing of the INFO command output"""
info = {}
for line in raw.split("\n"):
if line.startswith('#'):
continue
try:
k, v = line.split(':', 1)
except:
continue
info[k] = v.strip()
return info
def check_replication_status(self, master, slave):
master_info = RedisProcessor.parse_info(
self.cmd(master, 'INFO REPLICATION')
)
slave_info = RedisProcessor.parse_info(
self.cmd(slave, 'INFO REPLICATION')
)
is_ok = True
if master_info['role'] != 'master':
log.error('%s:%d should be role:master, is %s',
master['host'],
master['port'],
master_info['role'])
is_ok = False
if slave_info['role'] != 'slave':
log.error('%s:%d should be role:slave, is %s',
slave['host'],
slave['port'],
slave_info['role'])
is_ok = False
if slave_info['master_host'] != master['host'] or \
slave_info['master_port'] != str(master['port']):
is_ok = False
log.error("Master should be %s:%d, is %s:%s",
master['host'],
master['port'],
slave_info['master_host'],
slave_info['master_port'])
if not is_ok:
raise Exception("Replication is broken or incorrect")
else:
log.info("Replication is correct before the switchover")
def stop_slave(self, data):
res = self.cmd(data, 'INFO', dry_run=self.dry_run)
if not self.dry_run and res.strip() != 'OK':
log.error("Could not stop the replica on %s:%d: %s",
data['host'], data['port'], res)
raise Exception("Could not stop the replication")
else:
log.info("Replication stopped successfully on %s:%d",
data['host'], data['port'])
def start_slave(self, data, master):
res = self.cmd(data, 'SLAVEOF %s %d' % (master['host'], master['port']),
dry_run=self.dry_run)
if not self.dry_run and res.strip() != 'OK':
log.error("Could not start the replica on %s:%d: %s",
data['host'], data['port'], res)
raise Exception("Could not start the slave")
else:
log.info("Replication started successfully on %s:%d; master %s:%d",
data['host'], data['port'],
master['host'], master['port'])
def check_master(self, data):
if self.dry_run:
log.info("Skipping verification as it's a dry run")
return
master_info = RedisProcessor.parse_info(
self.cmd(data, 'INFO REPLICATION')
)
if master_info['role'] != 'master':
log.error('%s:%d should be role:master, is %s',
data['host'],
data['port'],
master_info['role'])
raise Exception("Not switched to master")
log.info("%s:%d is now a master", data['host'], data['port'])
def main():
p = argparse.ArgumentParser(
description="Tool to switch redis replica at the WMF")
p.add_argument('--exec-host', default=None, help="host from which to execute the commands")
p.add_argument('cluster_file', help="The file with the information on the cluster")
p.add_argument('dc_from', help="The datacenter to switch FROM", choices=('eqiad', 'codfw'))
p.add_argument('dc_to', help="The datacenter to switch TO", choices=('eqiad', 'codfw'))
p.add_argument('--dry-run', action='store_true', default=False)
args = p.parse_args()
redis = RedisProcessor(args.dry_run)
if args.exec_host is not None:
redis.exec_host = args.exec_host
# Read the data file
with open(args.cluster_file, 'r') as f:
shards = yaml.load(f)
for label, data in shards[args.dc_from].items():
try:
log.info("Acting on shard %s", label)
data_to = shards[args.dc_to][label]
log.info("Checking replication status before transition")
redis.check_replication_status(data, data_to)
log.info("Switching replica")
redis.stop_slave(data_to)
redis.check_master(data_to)
redis.start_slave(data, data_to)
if not args.dry_run:
redis.check_replication_status(data_to, data)
except:
log.critical("Error occurred", exc_info=True)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment