Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
Read SQL server deadlock tracefile (exported to XML format) in Python and convert it to Pandas
import timeit
from lxml import etree
from pandas import DataFrame
from uuid import uuid4
# List of process attributes to retrieve
def analyze_deadlock(elem, data_locks, data_procs):
# Generate a UUID to identify all processes belonging to the same deadlock event
dl_uuid = uuid4().hex
databases = set()
data_lock = {}
data_lock['dl_uuid'] = dl_uuid
data_lock['victim_id'] = elem.get('victim')
for proc in elem.find('./process-list').iterchildren(tag='process'):
data_proc = {}
data_proc['dl_uuid'] = dl_uuid
# Add all process XML attributes
# Look for the SQL statement in the executionStack/frame element
# Disregard SQL statements with placeholders (e.g. @1)
# Also, only consider frames with adhoc as procname (not unknown or anything else)
for frame in proc.find('./executionStack').iterchildren(tag='frame'):
if (frame.get('procname') == 'adhoc') and ('@1' not in frame.text):
data_proc['sql'] = frame.text.strip()
# If no SQL statement could be found, take the SQL statement from the
# inputbuf element
data_proc['sql'] = proc.find('./inputbuf').text.strip()
data_lock['db_count'] = len(databases)
def fast_iter(context):
data_locks = []
data_procs = []
for event, elem in context:
analyze_deadlock(elem, data_locks, data_procs)
while elem.getprevious() is not None:
del elem.getparent()[0]
del context
return (data_locks, data_procs)
# Parse the XML file
infile = 'hugetrace.xdl'
start_time = timeit.default_timer()
context = etree.iterparse(infile, events=('end',), tag=('deadlock'))
data_locks, data_procs = fast_iter(context)
end_time = timeit.default_timer()
print('Analyzed in {} seconds'.format(end_time - start_time))
# Convert to DataFrames
df_procs = DataFrame(data_procs)
df_locks = DataFrame(data_locks)
df_procs.fillna('', inplace=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment