Skip to content

Instantly share code, notes, and snippets.

@JodiTheTigger
Created January 14, 2014 01:52
Show Gist options
  • Save JodiTheTigger/8411686 to your computer and use it in GitHub Desktop.
Save JodiTheTigger/8411686 to your computer and use it in GitHub Desktop.
gdbBacktraceToJson.py parses the output of the command "thread apply all bt full" and turns it into a json array. Useful for automating the analysis of coredump files generated when an application crashes. Use the tokenised json to search a database of crashes for similar crashes or make a nice web interface for viewing back traces. You could ma…
#!/usr/bin/python2
#
# gdbBacktraceToJson.py. Parses gdb backtraces into json.
# Copyright (C) 2014 Richard Maxwell <jodi.the.tigger@gmail.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>
#
# Description:
# gdbBacktraceToJson.py parses the output of the command "thread apply all bt full" and turns it into a json array.
# Useful for automating the analysis of coredump files generated when an application crashes. Use the tokenised json
# to seach a database of crashes for similar crashes or make a nice web interface for viewing back traces. You could
# make a backtrace diff tool. It's much easier to use and write tools using a standard data format.
# Usage:
# python2 gdbBacktraceToJson.py <backtrace file>
# It will parse the file and output the backtrace as a json array to std out.
# you can get the backtrace file from a core dump file by running gdb in the following way:
# gdb [app with debug symbols] [core file] --eval-command "thread apply all bt full" --eval-command "quit" > mybacktrace.txt
import sys
import os
import re
import json
import string
import datetime
def parseLocals(lines):
result = {}
index = 0
while index < len(lines):
simpleVars = re.match( r'\s*(.*)\s=\s(.*)\s*', lines[index], re.I|re.M)
if lines[index].find('{') == -1:
if simpleVars:
result[simpleVars.group(1)] = simpleVars.group(2).strip().strip(',')
else:
# find the closing brace.
closingIndex = index + 1
closingIndexFound = -1
depth = 1
while closingIndex < len(lines):
closingIndexFound = closingIndex
if lines[closingIndex].find('{') != -1:
depth = depth + 1
else:
if lines[closingIndex].find('}') != -1:
depth = depth - 1
if depth < 1:
closingIndexFound = closingIndex
break
closingIndex = closingIndex + 1
if closingIndexFound == -1:
# wtf?
print "*ERROR* Coreline: parseLocals: Can't find closing brace."
return result
# deal with nested braces using recursion.
joinedLines = '\n'.join(lines[index+1:closingIndex])
if simpleVars:
result[simpleVars.group(1)] = parseLocals(lines[index+1:closingIndex])
index = closingIndex
else:
return result
index = index + 1
return result
def coreLinesToObject(coreLine):
coreObject = {}
# line format is:
# #frame [0x12345678] in (<function>) [from|at] [library|file]
# (?:....) means don't capture that group (?:)
matchResult = re.match( r'\#(\d+)\s+(?:(0x(?:[0-9A-F])*) in |)(\S+) (\((?:.|\n|\r)*\))(?: (?:at|from) (.*)|$)', coreLine, re.I|re.M)
if matchResult:
# matches are:
# 1: frame
# 2: address or no match
# 3: function name
# 4: argument list (including braces)
# 5: source / library
coreObject['frame'] = matchResult.group(1)
coreObject['address'] = matchResult.group(2)
coreObject['function'] = matchResult.group(3)
coreObject['source'] = matchResult.group(5)
coreObject['arguments'] = {}
# right, parse in the argument list
# arguments can have the @ symbol in them 'this@entry=0x12345678'
argSearch = re.findall( r'([\w@]+)=(\w+|<optimized out>)', matchResult.group(4), re.I|re.M)
for (argKey, argValue) in argSearch:
coreObject['arguments'][argKey] = argValue
# bt full stuff will come here. Stack variables and source files too.
arguments = coreLine.split('\n')[1:]
if len(arguments) > 1:
if coreObject['source'] == None:
sourceMatch = re.match( r'\s+(?:at|from) (.*)\w', arguments[0], re.I|re.M)
if sourceMatch:
coreObject['source'] = sourceMatch.group(1)
# parse the arguments.
coreObject['locals'] = parseLocals(arguments[1:])
else:
# really should complain.
print "*ERROR* Coreline mismatch: ", coreLine
return coreObject
def textToList(filePath, fileText):
core = {}
core['filePath'] = filePath
core['fileName'] = os.path.splitext(os.path.basename(filePath))[0]
core['threads'] = []
core['jsonCreationTimeUtc'] = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
threadIndex = -1;
threadId = ""
multipleLines = ""
for line in fileText:
# Search for core dump global meta
# (command line and termination reason)
# Core was generated by `.....'.
# Program terminated with ...
# ---------------------------------------
if not core.has_key('commandLine'):
if line.find("Core was generated by") == 0:
# [23:-3] manually deduced so I can keep what's in quotes
# If I did it properly I would use a regex.
core['commandLine'] = line[23:-3]
if not core.has_key('coreReason'):
if line.find("Program terminated with") == 0:
# [:-1] remove line ending
core['coreReason'] = line[:-1]
# Parse core dumps per thread.
# ---------------------------------------
if line.find("Thread")== 0:
#right, make sure we purge the last line of the last stack trace please.
if len(multipleLines) > 0:
core['threads'][threadIndex]['stackTrace'].append(coreLinesToObject(multipleLines))
multipleLines = "";
threadIndex += 1
threadId = line[:-2]
core['threads'].append({})
core['threads'][threadIndex]['stackTrace'] = []
threadResult = re.match( r'Thread\s+(\d+)\s+\(LWP\s+(\d+)\)', threadId, re.I|re.M)
if threadResult:
core['threads'][threadIndex]['threadId'] = threadResult.group(2)
core['threads'][threadIndex]['threadNumber'] = threadResult.group(1)
else:
print "*ERROR* ThreadId mismatch: ", threadId
core['threads'][threadIndex]['threadId'] = threadId
else:
if threadIndex > -1:
if len(line) > 0:
if len(multipleLines) > 0:
if line[0] == '#':
core['threads'][threadIndex]['stackTrace'].append(coreLinesToObject(multipleLines))
multipleLines = line;
else:
multipleLines += line;
else:
if (line[0] == '#'):
multipleLines = line;
else:
if len(multipleLines) > 0:
core['threads'][threadIndex]['stackTrace'].append(coreLinesToObject(multipleLines))
multipleLines = "";
return core
# the filename is the name of the textual output of gdb's "thread apply all bt"
def process(argList):
fileName = argList[1]
coreDump = open(fileName, 'r')
lines = coreDump.readlines()
coreDump.close()
coreDumpObject = textToList(fileName, lines)
# right, dump the json
print json.dumps(coreDumpObject, sort_keys=True, indent=4)
# decode the first passed filename
process(sys.argv)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment