/redis_switch_datacenter.py Secret
Last active
April 21, 2016 12:07
Star
You must be signed in to star a gist
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# data files are here: | |
# memcached.yaml https://gist.github.com/lavagetto/f03d6c342dcdd9e718347b11937da9b7 | |
# jobqueue.yaml https://gist.github.com/lavagetto/5a8be60410fdc9e988a80052535835b2 | |
#!/usr/bin/python | |
import logging | |
import argparse | |
import os | |
import subprocess | |
import yaml | |
env = os.environ.copy() | |
# Use your own agent here | |
env['SSH_AUTH_SOCK'] = '/run/user/1000/ssh-agent-prod.socket' | |
logging.basicConfig(level=logging.DEBUG, | |
format='%(asctime)s %(levelname)s %(message)s', | |
handlers=[logging.StreamHandler()]) | |
log = logging.getLogger('switch-redis') | |
class RedisProcessor(object): | |
exec_host = 'rdb1001.eqiad.wmnet' | |
def __init__(self, dry_run): | |
self.passwd = self.get_pass() | |
self.dry_run = dry_run | |
def get_pass(self): | |
"""Obtain the redis password""" | |
log.info("Fetching the redis password") | |
return subprocess.check_output(['ssh', self.exec_host, 'sudo grep requirepass /etc/redis/tcp_6379.conf'], env=env).strip().split(" ")[-1] | |
def cmd(self, data, command, dry_run=False): | |
"""Executes a command on the remote redis server""" | |
command = "redis-cli -h %s -p %d -a %s %s" % (data['host'], data['port'], self.passwd, | |
command) | |
if dry_run: | |
log.info("Would have executed: %s", command) | |
else: | |
return subprocess.check_output(['ssh', self.exec_host, | |
command], env=env) | |
@staticmethod | |
def parse_info(raw): | |
"""Do a very coarse-grained parsing of the INFO command output""" | |
info = {} | |
for line in raw.split("\n"): | |
if line.startswith('#'): | |
continue | |
try: | |
k, v = line.split(':', 1) | |
except: | |
continue | |
info[k] = v.strip() | |
return info | |
def check_replication_status(self, master, slave): | |
master_info = RedisProcessor.parse_info( | |
self.cmd(master, 'INFO REPLICATION') | |
) | |
slave_info = RedisProcessor.parse_info( | |
self.cmd(slave, 'INFO REPLICATION') | |
) | |
is_ok = True | |
if master_info['role'] != 'master': | |
log.error('%s:%d should be role:master, is %s', | |
master['host'], | |
master['port'], | |
master_info['role']) | |
is_ok = False | |
if slave_info['role'] != 'slave': | |
log.error('%s:%d should be role:slave, is %s', | |
slave['host'], | |
slave['port'], | |
slave_info['role']) | |
is_ok = False | |
if slave_info['master_host'] != master['host'] or \ | |
slave_info['master_port'] != str(master['port']): | |
is_ok = False | |
log.error("Master should be %s:%d, is %s:%s", | |
master['host'], | |
master['port'], | |
slave_info['master_host'], | |
slave_info['master_port']) | |
if not is_ok: | |
raise Exception("Replication is broken or incorrect") | |
else: | |
log.info("Replication is correct before the switchover") | |
def stop_slave(self, data): | |
res = self.cmd(data, 'INFO', dry_run=self.dry_run) | |
if not self.dry_run and res.strip() != 'OK': | |
log.error("Could not stop the replica on %s:%d: %s", | |
data['host'], data['port'], res) | |
raise Exception("Could not stop the replication") | |
else: | |
log.info("Replication stopped successfully on %s:%d", | |
data['host'], data['port']) | |
def start_slave(self, data, master): | |
res = self.cmd(data, 'SLAVEOF %s %d' % (master['host'], master['port']), | |
dry_run=self.dry_run) | |
if not self.dry_run and res.strip() != 'OK': | |
log.error("Could not start the replica on %s:%d: %s", | |
data['host'], data['port'], res) | |
raise Exception("Could not start the slave") | |
else: | |
log.info("Replication started successfully on %s:%d; master %s:%d", | |
data['host'], data['port'], | |
master['host'], master['port']) | |
def check_master(self, data): | |
if self.dry_run: | |
log.info("Skipping verification as it's a dry run") | |
return | |
master_info = RedisProcessor.parse_info( | |
self.cmd(data, 'INFO REPLICATION') | |
) | |
if master_info['role'] != 'master': | |
log.error('%s:%d should be role:master, is %s', | |
data['host'], | |
data['port'], | |
master_info['role']) | |
raise Exception("Not switched to master") | |
log.info("%s:%d is now a master", data['host'], data['port']) | |
def main(): | |
p = argparse.ArgumentParser( | |
description="Tool to switch redis replica at the WMF") | |
p.add_argument('--exec-host', default=None, help="host from which to execute the commands") | |
p.add_argument('cluster_file', help="The file with the information on the cluster") | |
p.add_argument('dc_from', help="The datacenter to switch FROM", choices=('eqiad', 'codfw')) | |
p.add_argument('dc_to', help="The datacenter to switch TO", choices=('eqiad', 'codfw')) | |
p.add_argument('--dry-run', action='store_true', default=False) | |
args = p.parse_args() | |
redis = RedisProcessor(args.dry_run) | |
if args.exec_host is not None: | |
redis.exec_host = args.exec_host | |
# Read the data file | |
with open(args.cluster_file, 'r') as f: | |
shards = yaml.load(f) | |
for label, data in shards[args.dc_from].items(): | |
try: | |
log.info("Acting on shard %s", label) | |
data_to = shards[args.dc_to][label] | |
log.info("Checking replication status before transition") | |
redis.check_replication_status(data, data_to) | |
log.info("Switching replica") | |
redis.stop_slave(data_to) | |
redis.check_master(data_to) | |
redis.start_slave(data, data_to) | |
if not args.dry_run: | |
redis.check_replication_status(data_to, data) | |
except: | |
log.critical("Error occurred", exc_info=True) | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment