Skip to content

Instantly share code, notes, and snippets.

@TheJJ
Created October 7, 2018 23:50
Show Gist options
  • Save TheJJ/c6be62e612ac4782bd0aa279d8c82197 to your computer and use it in GitHub Desktop.
Save TheJJ/c6be62e612ac4782bd0aa279d8c82197 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""
Ceph OSD log-overlap-missing crash fixer.
For a OSD that crashes, this tool finds the other OSD that causes the crash
and deletes (and exports :) the PG from it.
Supported assertions:
* src/osd/PGLog.cc PGLog::merge_log assert(log.head >= olog.tail && olog.head >= log.tail)
usage: fix_log_breakage.py $osdthatcrashes $outputfolder
options:
--logfile $name -- use this logfile instead of the default name
--delete-pg -- use this to automatically delete the broken pg
(c) 2018 Jonas Jelten <jj@stusta.net>
Released under GPLv3 or later.
"""
import argparse
import json
import gzip
import subprocess
import os
import re
import time
from pathlib import Path
def reverse_readline(filename):
with open(filename) as fh:
yield from iterate_lines(fh)
# via https://stackoverflow.com/questions/2301789/read-a-file-in-reverse-order-using-python
def iterate_lines(fh, buf_size=8192):
"""a generator that returns the lines of a file in reverse order"""
segment = None
offset = 0
fh.seek(0, os.SEEK_END)
file_size = remaining_size = fh.tell()
while remaining_size > 0:
offset = min(file_size, offset + buf_size)
fh.seek(file_size - offset)
buffer = fh.read(min(remaining_size, buf_size))
remaining_size -= buf_size
lines = buffer.split('\n')
if segment is not None:
if buffer[-1] is not '\n':
lines[-1] += segment
else:
yield segment
segment = lines[0]
for index in range(len(lines) - 1, 0, -1):
if len(lines[index]):
yield lines[index]
if segment is not None:
yield segment
def osd_setstate(osdid, set_active):
# --wait says to block until start/stop is done.
action = "start" if set_active else "stop"
print("%sing osd %d..." % (action, osdid))
subprocess.run(["systemctl", action,
"ceph-osd@%s.service" % osdid])
def osd_checkstate(osdid):
proc = subprocess.run(["systemctl", "is-active",
"ceph-osd@%s.service" % osdid],
stdout=subprocess.PIPE)
output = proc.stdout.strip()
if output == b"active":
return True
else:
return False
def pg_dump():
result = subprocess.run(["ceph", "pg", "dump", "--format=json"],
stdout=subprocess.PIPE)
return json.loads(result.stdout)
def osd_dump():
result = subprocess.run(["ceph", "osd", "dump", "--format=json"],
stdout=subprocess.PIPE)
return json.loads(result.stdout)
def find_broken_osds():
brokens = list()
for osd_state in osd_dump()["osds"]:
if osd_state["up"] == 0:
brokens.append(osd_state["osd"])
return brokens
def log_version_int(logversion):
epoch, version = logversion.split("'")
return (int(epoch), int(version))
def find_broken_pg(osdid, logfile=None, max_line_limit=20000):
if not logfile:
logfile = "/var/log/ceph/ceph-osd.%s.log" % osdid
# 2018-10-07 00:44:48.411 7f75d8ab1700 -1 *** Caught signal (Aborted) **
find_thread_id = re.compile(r"[0-9-_:\.]+ [0-9:\.]+ ([a-f0-9]+) -1 \*\*\* Caught signal \(Aborted\) \*\*")
# get crash type
# /home/a006619a/cephsrc/ceph-13.2.2/src/osd/PGLog.cc: 345: FAILED assert(log.head >= olog.tail && olog.head >= log.tail)
find_failed_assertion = re.compile(r"([^:]+): [0-9]+: FAILED (assert\([^\)]+\))")
# groups: 1: filename, 2: assertname (=="assert(log.head >= olog.tail && olog.head >= log.tail)")
# confirm the threadid
# -19> 2018-10-07 00:44:48.351 7f75d8ab1700 -1 /home/a006619a/cephsrc/ceph-13.2.2/src/osd/PGLog.cc: In function 'void PGLog::merge_log(pg_info_t&, pg_log_t&, pg_shard_t, pg_info_t&, PGLog::LogEntryHandler*, bool&, bool&)' thread 7f09efc39700 time 2018-10-07 00:44:48.35111
find_crashing_function = re.compile(r"-?[0-9]+> [0-9-_:\.]+ [0-9:\.]+ ([a-f0-9]+) -1 ([^ ]+): In function '([^']+)' thread ([a-f0-9]+)")
# groups: 1: threadid, 2: filename, 3: functionname, 4: threadid
# then the problematic osd
# -670> 2018-10-07 00:44:48.343 7f75d8ab1700 10 merge_log log((3035'143528,3035'143528], crt=3035'143528) from osd.6 into log((3035'143577,3035'146408], crt=3035'146408)
logrange = "(\([0-9,']+\])"
logmatch = "%s, crt=([^ ]+)" % logrange
find_problem_osd = re.compile(r"-?[0-9]+> [0-9-_:\.]+ [0-9:\.]+ ([a-f0-9]+) [0-9]+ merge_log log\(%s\) from osd.([0-9]+) into log\(%s\)" % (logmatch, logmatch))
# groups: 1: threadid, 2: otherosdlogrange, 3: otherosdlogrollback, 4: problemosdid, 5: currentosdlogrange, 6: currentosdlogrollback
# then get the problem pg
# -671> 2018-10-07 00:44:48.343 7f75d8ab1700 10 osd.9 pg_epoch: 13880 pg[4.10( v 3035'146408 (3035'143577,3035'146408] local-lis/les=3015/3016 n=6923 ec=140/140 lis/c 3196/3015 les/c/f 3197/3016/0 13880/13880/138
find_problem_pg = re.compile(r"-?[0-9]+> [0-9-_:\.]+ [0-9:\.]+ ([a-f0-9]+) [0-9]+ osd.([0-9]+) pg_epoch: [0-9]+ pg\[([a-f0-9.]+)\( v ([0-9a-f']+) %s" % logrange)
# groups: 1: threadid, 2: currentosdid, 3: pgid, 4: currentosdrollback, 5: currentosdlogrange
skip_state_0 = False
state = 0
crashed_thread_id = None
problem_osd = None
problem_pg = None
current_osd_logrange = None
current_osd_logrollback = None
lines_since_last_match = 0
#import pdb
#pdb.set_trace()
if skip_state_0:
state = 1
prev_state = state
for idx, line in enumerate(reverse_readline(logfile)):
if idx > max_line_limit:
raise Exception("while in state %d, could not find new match in last %d lines" % (state, max_line_limit))
if prev_state != state:
prev_state = state
lines_since_last_match = 0
lines_since_last_match += 1
# find the crashed thread
if state == 0:
match = find_thread_id.search(line)
if match:
crashed_thread_id = match.group(1)
print("recorded crashing thread id: %s" % crashed_thread_id)
state = 1
# verify the failed assertion
elif state == 1:
match = find_failed_assertion.search(line)
if match:
if (match.group(1).endswith("src/osd/PGLog.cc") and
match.group(2) == "assert(log.head >= olog.tail && olog.head >= log.tail)"):
state = 2
else:
raise Exception("in state %d, unknown failed assertion found: %s" % (state, match.group(1)))
# confirm the threadid and crashing function
elif state == 2:
match = find_crashing_function.search(line)
if match:
if skip_state_0:
crashed_thread_id = match.group(1)
print("recorded crashing thread id: %s" % crashed_thread_id)
elif match.group(1) != crashed_thread_id:
print("warning: found other crash for different thread (%s)" % match.group(1))
continue
if (match.group(2).endswith("src/osd/PGLog.cc") and
match.group(3).startswith("void PGLog::merge_log")):
if lines_since_last_match > 1:
raise Exception("assertion report and crash function def are separated")
state = 3
else:
raise Exception("in state %d, matched unknown function: %s" % (state, match.groups()))
# find problematic osd id
elif state == 3:
match = find_problem_osd.search(line)
if match:
if match.group(1) != crashed_thread_id:
continue
otherosdlogrange = match.group(2)
otherosdlogrollback = match.group(3)
problemosdid = match.group(4)
currentosdlogrange = match.group(5)
currentosdlogrollback = match.group(6)
# verify the failed assertion:
s_olog_tail, s_olog_head = otherosdlogrange[1:-1].split(",")
s_log_tail, s_log_head = currentosdlogrange[1:-1].split(",")
olog_tail = log_version_int(s_olog_tail)
olog_head = log_version_int(s_olog_head)
log_tail = log_version_int(s_log_tail)
log_head = log_version_int(s_log_head)
print("log versions: osd.%s-olog: %s, osd.%d-log: %s" % (problemosdid, (olog_tail, olog_head), osdid, (log_tail, log_head)))
overlap = False
if log_head >= olog_tail:
if olog_head >= log_tail:
overlap = True
if overlap:
raise Exception("logs did overlap, but the assertion should say otherwise.")
elif olog_head > log_head:
raise Exception("the PG we would delete has a newer log version! we should now delete the PG from osd.%d instead." % (osdid))
state = 4
problem_osd = int(problemosdid)
current_osd_logrange = currentosdlogrange
current_osd_logrollback = currentosdlogrollback
# find the broken pg id
elif state == 4:
match = find_problem_pg.search(line)
if match:
if match.group(1) != crashed_thread_id:
continue
# verify that this is the log that fails:
if not (match.group(4) == current_osd_logrollback and
match.group(5) == current_osd_logrange):
raise Exception("in state %d, current osd log info does not match previous state findings: "
"(%s == %s and %s == %s) was not true" % (state, match.group(4), current_osd_logrollback, match.group(5), current_osd_logrange))
problem_pg = match.group(3)
break
if not (problem_pg and problem_osd):
raise Exception("log file %s didn't contain required information. state machine was in state %d. do you have '[osd] debug osd = 0/10' at least?" % (logfile, state))
return problem_pg, problem_osd
def main():
cmd = argparse.ArgumentParser()
cmd.add_argument("osd_that_crashes", type=int, help="id of the osd that crashes")
cmd.add_argument("output_path", help="where to put the backup pg files")
cmd.add_argument("--delete-pg", action='store_true')
cmd.add_argument("--logfile", help="override logfile to read from instead of default location")
args = cmd.parse_args()
if not Path(args.output_path).is_dir():
raise Exception("must be a directory: %s" % args.output_path)
broken_osds = find_broken_osds()
print("offline osds are: %s" % broken_osds)
print("will try to fix startup of %d" % args.osd_that_crashes)
pgid, osdid = find_broken_pg(args.osd_that_crashes, args.logfile)
print("found the problem with pg=%s stored on osd.%s." % (pgid, osdid))
if not args.delete_pg:
print(" run with --delete-pg to export and delete the pg.")
return
print("checking osd.%s service state..." % osdid)
broken_osd_running = osd_checkstate(osdid)
if broken_osd_running:
# stop the offending osd
osd_setstate(osdid, False)
print("deleting pg %s from osd.%d..." % (pgid, osdid))
delpgcmd = ["ceph-objectstore-tool", "--op", "export-remove",
"--data-path", "/var/lib/ceph/osd/ceph-%s" % osdid,
"--pgid", pgid,
"--file", "%s/bupdata-pg%s-osd%d" % (args.output_path, pgid, osdid)]
dellog = "%s/buplog-pg%s-osd%d" % (args.output_path, pgid, osdid)
with open(dellog, "wb") as logfile:
print("writing log of export to %s..." % dellog)
delresult = subprocess.run(delpgcmd, check=True, stdout=logfile, stderr=logfile)
if broken_osd_running:
# start the offending osd again
osd_setstate(osdid, True)
print("done!")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment