Skip to content

Instantly share code, notes, and snippets.

@tjake
Last active January 2, 2016 01:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tjake/8230602 to your computer and use it in GitHub Desktop.
Save tjake/8230602 to your computer and use it in GitHub Desktop.
script to slowly repair the primary vnode ranges for a node. If it fails you can restart from last sucessfull range. It waits between ranges to avoid buildup of compactions.
#!/usr/bin/env python
import os, sys, re, subprocess, time
def usage():
print "Usage:"
print " repair hostname keyspace [offset]\n"
exit(1)
'''Grab the hostid guid from nodetool info'''
def get_hostid(host):
p = subprocess.Popen([ "nodetool", "--host", host, "info"], stdin=subprocess.PIPE, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
for line in p.stdout:
id = re.search("id\s+:\s+([a-f0-9\-]+)",line, re.I)
if id:
return id.group(1);
raise RuntimeError("Missing hostid");
'''Grab the ip matching the hostid in nodetool status'''
def get_ip(hostid):
p = subprocess.Popen([ "nodetool", "--host", host, "status"], stdin=subprocess.PIPE, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
for line in p.stdout:
if hostid in line:
return re.search("(\d+\.\d+.\d+.\d+)",line).group(1);
raise RuntimeError("Missing ip for hostid");
'''Find the primary token ranges for specified ip'''
#!/usr/bin/python -u
from datetime import datetime
import os, sys, re, subprocess, time
def usage():
print "Usage:"
print " repair hostname keyspace [offset]\n"
exit(1)
'''Grab the hostid guid from nodetool info'''
def get_hostid(host):
p = subprocess.Popen([ "nodetool", "--host", host, "info"], stdin=subprocess.PIPE, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
for line in p.stdout:
id = re.search("id\s+:\s+([a-f0-9\-]+)",line, re.I)
if id:
return id.group(1);
raise RuntimeError("Missing hostid");
'''Grab the ip matching the hostid in nodetool status'''
def get_ip(hostid):
p = subprocess.Popen([ "nodetool", "--host", host, "status"], stdin=subprocess.PIPE, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
for line in p.stdout:
if hostid in line:
return re.search("(\d+\.\d+.\d+.\d+)",line).group(1);
raise RuntimeError("Missing ip for hostid");
'''Find the primary token ranges for specified ip'''
def get_tokens(ip, keyspace):
p = subprocess.Popen([ "nodetool", "--host", host, "describering", keyspace], stdin=subprocess.PIPE, stderr=subprocess.PIPE, stdout=subprocess.PIPE);
tokens = [];
for line in p.stdout:
if "endpoints:["+ip in line:
ranges = re.search("start_token:([\-\d]+),\s+end_token:([\-\d]+)",line);
if ranges:
tokens.append([ranges.group(1), ranges.group(2)]);
if len(tokens) == 0:
raise RuntimeError("No primary tokens for ip"+p.stdout.read());
return tokens;
'''Gets the pending compactions for a node'''
def get_pending_compactions(host):
p = subprocess.Popen([ "nodetool", "--host", host, "compactionstats", keyspace], stdin=subprocess.PIPE, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
for line in p.stdout:
if "pending tasks" in line:
return re.search("^pending tasks:\s*(\d+)",line).group(1);
'''Kicks off repair in primary vnode ranges'''
def repair(host,keyspace,offset):
ranges = get_tokens(get_ip(get_hostid(host)), keyspace);
coffset = offset;
for range in ranges[offset:]:
p = subprocess.Popen(["nodetool", "--host", host, "repair", "-pr", "-st",range[0],"-et",range[1],"-local"], stdin=subprocess.PIPE, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
print "repairing range "+str(coffset)+" "+str(datetime.now());
e = p.wait();
if e != 0:
raise RuntimeError("Error encountered: "+p.stderr.readline());
while(1):
pending = get_pending_compactions(host);
if int(pending) > 10:
print "waiting for compactions to drop: "+str(pending)
time.sleep(10);
else:
break;
coffset += 1;
if len(sys.argv) < 3:
usage();
host = sys.argv[1];
keyspace = sys.argv[2];
offset = 0;
if len(sys.argv) == 4:
offset = int(sys.argv[3]);
repair(host,keyspace,offset);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment