Created
October 7, 2018 23:50
-
-
Save TheJJ/c6be62e612ac4782bd0aa279d8c82197 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Ceph OSD log-overlap-missing crash fixer. | |
For a OSD that crashes, this tool finds the other OSD that causes the crash | |
and deletes (and exports :) the PG from it. | |
Supported assertions: | |
* src/osd/PGLog.cc PGLog::merge_log assert(log.head >= olog.tail && olog.head >= log.tail) | |
usage: fix_log_breakage.py $osdthatcrashes $outputfolder | |
options: | |
--logfile $name -- use this logfile instead of the default name | |
--delete-pg -- use this to automatically delete the broken pg | |
(c) 2018 Jonas Jelten <jj@stusta.net> | |
Released under GPLv3 or later. | |
""" | |
import argparse | |
import json | |
import gzip | |
import subprocess | |
import os | |
import re | |
import time | |
from pathlib import Path | |
def reverse_readline(filename): | |
with open(filename) as fh: | |
yield from iterate_lines(fh) | |
# via https://stackoverflow.com/questions/2301789/read-a-file-in-reverse-order-using-python | |
def iterate_lines(fh, buf_size=8192): | |
"""a generator that returns the lines of a file in reverse order""" | |
segment = None | |
offset = 0 | |
fh.seek(0, os.SEEK_END) | |
file_size = remaining_size = fh.tell() | |
while remaining_size > 0: | |
offset = min(file_size, offset + buf_size) | |
fh.seek(file_size - offset) | |
buffer = fh.read(min(remaining_size, buf_size)) | |
remaining_size -= buf_size | |
lines = buffer.split('\n') | |
if segment is not None: | |
if buffer[-1] is not '\n': | |
lines[-1] += segment | |
else: | |
yield segment | |
segment = lines[0] | |
for index in range(len(lines) - 1, 0, -1): | |
if len(lines[index]): | |
yield lines[index] | |
if segment is not None: | |
yield segment | |
def osd_setstate(osdid, set_active): | |
# --wait says to block until start/stop is done. | |
action = "start" if set_active else "stop" | |
print("%sing osd %d..." % (action, osdid)) | |
subprocess.run(["systemctl", action, | |
"ceph-osd@%s.service" % osdid]) | |
def osd_checkstate(osdid): | |
proc = subprocess.run(["systemctl", "is-active", | |
"ceph-osd@%s.service" % osdid], | |
stdout=subprocess.PIPE) | |
output = proc.stdout.strip() | |
if output == b"active": | |
return True | |
else: | |
return False | |
def pg_dump(): | |
result = subprocess.run(["ceph", "pg", "dump", "--format=json"], | |
stdout=subprocess.PIPE) | |
return json.loads(result.stdout) | |
def osd_dump(): | |
result = subprocess.run(["ceph", "osd", "dump", "--format=json"], | |
stdout=subprocess.PIPE) | |
return json.loads(result.stdout) | |
def find_broken_osds(): | |
brokens = list() | |
for osd_state in osd_dump()["osds"]: | |
if osd_state["up"] == 0: | |
brokens.append(osd_state["osd"]) | |
return brokens | |
def log_version_int(logversion): | |
epoch, version = logversion.split("'") | |
return (int(epoch), int(version)) | |
def find_broken_pg(osdid, logfile=None, max_line_limit=20000): | |
if not logfile: | |
logfile = "/var/log/ceph/ceph-osd.%s.log" % osdid | |
# 2018-10-07 00:44:48.411 7f75d8ab1700 -1 *** Caught signal (Aborted) ** | |
find_thread_id = re.compile(r"[0-9-_:\.]+ [0-9:\.]+ ([a-f0-9]+) -1 \*\*\* Caught signal \(Aborted\) \*\*") | |
# get crash type | |
# /home/a006619a/cephsrc/ceph-13.2.2/src/osd/PGLog.cc: 345: FAILED assert(log.head >= olog.tail && olog.head >= log.tail) | |
find_failed_assertion = re.compile(r"([^:]+): [0-9]+: FAILED (assert\([^\)]+\))") | |
# groups: 1: filename, 2: assertname (=="assert(log.head >= olog.tail && olog.head >= log.tail)") | |
# confirm the threadid | |
# -19> 2018-10-07 00:44:48.351 7f75d8ab1700 -1 /home/a006619a/cephsrc/ceph-13.2.2/src/osd/PGLog.cc: In function 'void PGLog::merge_log(pg_info_t&, pg_log_t&, pg_shard_t, pg_info_t&, PGLog::LogEntryHandler*, bool&, bool&)' thread 7f09efc39700 time 2018-10-07 00:44:48.35111 | |
find_crashing_function = re.compile(r"-?[0-9]+> [0-9-_:\.]+ [0-9:\.]+ ([a-f0-9]+) -1 ([^ ]+): In function '([^']+)' thread ([a-f0-9]+)") | |
# groups: 1: threadid, 2: filename, 3: functionname, 4: threadid | |
# then the problematic osd | |
# -670> 2018-10-07 00:44:48.343 7f75d8ab1700 10 merge_log log((3035'143528,3035'143528], crt=3035'143528) from osd.6 into log((3035'143577,3035'146408], crt=3035'146408) | |
logrange = "(\([0-9,']+\])" | |
logmatch = "%s, crt=([^ ]+)" % logrange | |
find_problem_osd = re.compile(r"-?[0-9]+> [0-9-_:\.]+ [0-9:\.]+ ([a-f0-9]+) [0-9]+ merge_log log\(%s\) from osd.([0-9]+) into log\(%s\)" % (logmatch, logmatch)) | |
# groups: 1: threadid, 2: otherosdlogrange, 3: otherosdlogrollback, 4: problemosdid, 5: currentosdlogrange, 6: currentosdlogrollback | |
# then get the problem pg | |
# -671> 2018-10-07 00:44:48.343 7f75d8ab1700 10 osd.9 pg_epoch: 13880 pg[4.10( v 3035'146408 (3035'143577,3035'146408] local-lis/les=3015/3016 n=6923 ec=140/140 lis/c 3196/3015 les/c/f 3197/3016/0 13880/13880/138 | |
find_problem_pg = re.compile(r"-?[0-9]+> [0-9-_:\.]+ [0-9:\.]+ ([a-f0-9]+) [0-9]+ osd.([0-9]+) pg_epoch: [0-9]+ pg\[([a-f0-9.]+)\( v ([0-9a-f']+) %s" % logrange) | |
# groups: 1: threadid, 2: currentosdid, 3: pgid, 4: currentosdrollback, 5: currentosdlogrange | |
skip_state_0 = False | |
state = 0 | |
crashed_thread_id = None | |
problem_osd = None | |
problem_pg = None | |
current_osd_logrange = None | |
current_osd_logrollback = None | |
lines_since_last_match = 0 | |
#import pdb | |
#pdb.set_trace() | |
if skip_state_0: | |
state = 1 | |
prev_state = state | |
for idx, line in enumerate(reverse_readline(logfile)): | |
if idx > max_line_limit: | |
raise Exception("while in state %d, could not find new match in last %d lines" % (state, max_line_limit)) | |
if prev_state != state: | |
prev_state = state | |
lines_since_last_match = 0 | |
lines_since_last_match += 1 | |
# find the crashed thread | |
if state == 0: | |
match = find_thread_id.search(line) | |
if match: | |
crashed_thread_id = match.group(1) | |
print("recorded crashing thread id: %s" % crashed_thread_id) | |
state = 1 | |
# verify the failed assertion | |
elif state == 1: | |
match = find_failed_assertion.search(line) | |
if match: | |
if (match.group(1).endswith("src/osd/PGLog.cc") and | |
match.group(2) == "assert(log.head >= olog.tail && olog.head >= log.tail)"): | |
state = 2 | |
else: | |
raise Exception("in state %d, unknown failed assertion found: %s" % (state, match.group(1))) | |
# confirm the threadid and crashing function | |
elif state == 2: | |
match = find_crashing_function.search(line) | |
if match: | |
if skip_state_0: | |
crashed_thread_id = match.group(1) | |
print("recorded crashing thread id: %s" % crashed_thread_id) | |
elif match.group(1) != crashed_thread_id: | |
print("warning: found other crash for different thread (%s)" % match.group(1)) | |
continue | |
if (match.group(2).endswith("src/osd/PGLog.cc") and | |
match.group(3).startswith("void PGLog::merge_log")): | |
if lines_since_last_match > 1: | |
raise Exception("assertion report and crash function def are separated") | |
state = 3 | |
else: | |
raise Exception("in state %d, matched unknown function: %s" % (state, match.groups())) | |
# find problematic osd id | |
elif state == 3: | |
match = find_problem_osd.search(line) | |
if match: | |
if match.group(1) != crashed_thread_id: | |
continue | |
otherosdlogrange = match.group(2) | |
otherosdlogrollback = match.group(3) | |
problemosdid = match.group(4) | |
currentosdlogrange = match.group(5) | |
currentosdlogrollback = match.group(6) | |
# verify the failed assertion: | |
s_olog_tail, s_olog_head = otherosdlogrange[1:-1].split(",") | |
s_log_tail, s_log_head = currentosdlogrange[1:-1].split(",") | |
olog_tail = log_version_int(s_olog_tail) | |
olog_head = log_version_int(s_olog_head) | |
log_tail = log_version_int(s_log_tail) | |
log_head = log_version_int(s_log_head) | |
print("log versions: osd.%s-olog: %s, osd.%d-log: %s" % (problemosdid, (olog_tail, olog_head), osdid, (log_tail, log_head))) | |
overlap = False | |
if log_head >= olog_tail: | |
if olog_head >= log_tail: | |
overlap = True | |
if overlap: | |
raise Exception("logs did overlap, but the assertion should say otherwise.") | |
elif olog_head > log_head: | |
raise Exception("the PG we would delete has a newer log version! we should now delete the PG from osd.%d instead." % (osdid)) | |
state = 4 | |
problem_osd = int(problemosdid) | |
current_osd_logrange = currentosdlogrange | |
current_osd_logrollback = currentosdlogrollback | |
# find the broken pg id | |
elif state == 4: | |
match = find_problem_pg.search(line) | |
if match: | |
if match.group(1) != crashed_thread_id: | |
continue | |
# verify that this is the log that fails: | |
if not (match.group(4) == current_osd_logrollback and | |
match.group(5) == current_osd_logrange): | |
raise Exception("in state %d, current osd log info does not match previous state findings: " | |
"(%s == %s and %s == %s) was not true" % (state, match.group(4), current_osd_logrollback, match.group(5), current_osd_logrange)) | |
problem_pg = match.group(3) | |
break | |
if not (problem_pg and problem_osd): | |
raise Exception("log file %s didn't contain required information. state machine was in state %d. do you have '[osd] debug osd = 0/10' at least?" % (logfile, state)) | |
return problem_pg, problem_osd | |
def main(): | |
cmd = argparse.ArgumentParser() | |
cmd.add_argument("osd_that_crashes", type=int, help="id of the osd that crashes") | |
cmd.add_argument("output_path", help="where to put the backup pg files") | |
cmd.add_argument("--delete-pg", action='store_true') | |
cmd.add_argument("--logfile", help="override logfile to read from instead of default location") | |
args = cmd.parse_args() | |
if not Path(args.output_path).is_dir(): | |
raise Exception("must be a directory: %s" % args.output_path) | |
broken_osds = find_broken_osds() | |
print("offline osds are: %s" % broken_osds) | |
print("will try to fix startup of %d" % args.osd_that_crashes) | |
pgid, osdid = find_broken_pg(args.osd_that_crashes, args.logfile) | |
print("found the problem with pg=%s stored on osd.%s." % (pgid, osdid)) | |
if not args.delete_pg: | |
print(" run with --delete-pg to export and delete the pg.") | |
return | |
print("checking osd.%s service state..." % osdid) | |
broken_osd_running = osd_checkstate(osdid) | |
if broken_osd_running: | |
# stop the offending osd | |
osd_setstate(osdid, False) | |
print("deleting pg %s from osd.%d..." % (pgid, osdid)) | |
delpgcmd = ["ceph-objectstore-tool", "--op", "export-remove", | |
"--data-path", "/var/lib/ceph/osd/ceph-%s" % osdid, | |
"--pgid", pgid, | |
"--file", "%s/bupdata-pg%s-osd%d" % (args.output_path, pgid, osdid)] | |
dellog = "%s/buplog-pg%s-osd%d" % (args.output_path, pgid, osdid) | |
with open(dellog, "wb") as logfile: | |
print("writing log of export to %s..." % dellog) | |
delresult = subprocess.run(delpgcmd, check=True, stdout=logfile, stderr=logfile) | |
if broken_osd_running: | |
# start the offending osd again | |
osd_setstate(osdid, True) | |
print("done!") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment