Skip to content

Instantly share code, notes, and snippets.

@kspeeckaert
Created March 14, 2016 14:32
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kspeeckaert/ba7ab24daa5383429279 to your computer and use it in GitHub Desktop.
Save kspeeckaert/ba7ab24daa5383429279 to your computer and use it in GitHub Desktop.
Read SQL server deadlock tracefile (exported to XML format) in Python and convert it to Pandas
import timeit
from lxml import etree
from pandas import DataFrame
from uuid import uuid4
# List of process attributes to retrieve
def analyze_deadlock(elem, data_locks, data_procs):
# Generate a UUID to identify all processes belonging to the same deadlock event
dl_uuid = uuid4().hex
databases = set()
data_lock = {}
data_lock['dl_uuid'] = dl_uuid
data_lock['victim_id'] = elem.get('victim')
for proc in elem.find('./process-list').iterchildren(tag='process'):
data_proc = {}
data_proc['dl_uuid'] = dl_uuid
# Add all process XML attributes
data_proc.update(proc.attrib)
# Look for the SQL statement in the executionStack/frame element
# Disregard SQL statements with placeholders (e.g. @1)
# Also, only consider frames with adhoc as procname (not unknown or anything else)
for frame in proc.find('./executionStack').iterchildren(tag='frame'):
if (frame.get('procname') == 'adhoc') and ('@1' not in frame.text):
data_proc['sql'] = frame.text.strip()
break
else:
# If no SQL statement could be found, take the SQL statement from the
# inputbuf element
data_proc['sql'] = proc.find('./inputbuf').text.strip()
data_procs.append(data_proc)
databases.add(proc.get('currentdb'))
data_lock['db_count'] = len(databases)
data_locks.append(data_lock)
def fast_iter(context):
data_locks = []
data_procs = []
for event, elem in context:
analyze_deadlock(elem, data_locks, data_procs)
elem.clear()
while elem.getprevious() is not None:
del elem.getparent()[0]
del context
return (data_locks, data_procs)
# Parse the XML file
infile = 'hugetrace.xdl'
start_time = timeit.default_timer()
context = etree.iterparse(infile, events=('end',), tag=('deadlock'))
data_locks, data_procs = fast_iter(context)
end_time = timeit.default_timer()
print('Analyzed in {} seconds'.format(end_time - start_time))
# Convert to DataFrames
df_procs = DataFrame(data_procs)
df_locks = DataFrame(data_locks)
df_procs.fillna('', inplace=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment