Skip to content

Instantly share code, notes, and snippets.

@flaviut
Created October 5, 2023 15:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save flaviut/8efa404088f658ec509baba808c7d4e1 to your computer and use it in GitHub Desktop.
Save flaviut/8efa404088f658ec509baba808c7d4e1 to your computer and use it in GitHub Desktop.
Parses an strace output file to determine how long each file spends open, and makes a treemap. This may (depending on your program) be equvilant to how long it takes to read each file.
#!/usr/bin/env python
# use with `strace --absolute-timestamps=format:unix,precision:us -o strace.log yourprogram`.
import re
import csv
import os
from collections import defaultdict
import plotly.express as px
def extract_data_from_log(log_lines):
# Regular expressions to match the desired lines
open_pattern = re.compile(r'(\d+\.\d+) openat\(AT_FDCWD, "(.+)", .+\) = (\d+)')
close_pattern = re.compile(r'(\d+\.\d+) close\((\d+)\)')
open_timestamps = {}
results = []
for line in log_lines:
# Check for openat line
open_match = open_pattern.match(line)
if open_match:
timestamp, filename, fd = open_match.groups()
open_timestamps[fd] = (timestamp, filename)
continue
# Check for close line
close_match = close_pattern.match(line)
if close_match:
close_timestamp, fd = close_match.groups()
if fd in open_timestamps:
open_timestamp, filename = open_timestamps[fd]
time_taken_ms = (float(close_timestamp) - float(open_timestamp)) * 1000
results.append((time_taken_ms, filename))
del open_timestamps[fd]
return results
with open('./strace.log') as log_lines:
extracted_data = extract_data_from_log(log_lines)
def write_to_csv(data, filename):
with open(filename, 'w', newline='') as csvfile:
csv_writer = csv.writer(csvfile)
csv_writer.writerow(["timeTakenMs", "filename"])
for row in data:
csv_writer.writerow(row)
write_to_csv(extracted_data, './processed.csv')
normalized_data = [(time_taken, os.path.normpath(path)) for time_taken, path in extracted_data]
# Initialize dictionary for aggregation
aggregated_times = defaultdict(float)
# Aggregate time taken for each subpath
for time_taken, path in normalized_data:
path_parts = path.split('/')
subpath = ''
for part in path_parts:
if subpath:
subpath = os.path.join(subpath, part)
else:
subpath = part
aggregated_times[subpath] += time_taken
labels = []
parents = []
values = []
for path, val in aggregated_times.items():
labels.append(path)
parent = os.path.dirname(path)
if parent == path: # For root directory
parent = ""
parents.append(parent)
values.append(val)
fig = px.treemap(
names=labels,
parents=parents,
values=values
)
fig.update_traces(root_color="lightgrey")
fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
fig.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment