Created
April 9, 2015 20:12
-
-
Save tylerneylon/086003f632093e469b77 to your computer and use it in GitHub Desktop.
Tool to isolate bottleneck mutex locks in C-family code.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- time_lock_inj.lua | |
-- | |
local usage_str = [[ | |
Usage: | |
lua time_lock_inj.lua <list_of_input_source_files> | |
Injects lock timing code for all the given files. | |
This assumes you're locking things with calls to | |
dbgcheck__named_lock. | |
In a C-based repo, I suggest using the glob syntax | |
*.{c,cpp,m} to capture all source files in a given dir, | |
using only the suffixes that exist; so, drop the m if | |
you have no objective-C files, for example. | |
]] | |
------------------------------------------------------------------------------- | |
-- Internal globals. | |
------------------------------------------------------------------------------- | |
local timelock_h_lines = {[[ | |
// timelock.h | |
// | |
// This file has been autogenerated by time_lock_inj.lua. | |
// | |
void add_interval_to_timer(double interval, int timer_index); | |
]] | |
} | |
local timelock_h_suffix = [[ | |
]] | |
local timelock_c_lines = {[[ | |
// timelock.c | |
// | |
// This file has been autogenerated by time_lock_inj.lua. | |
// | |
#include "timelock.h" | |
#include "oswrap/oswrap.h" | |
#include <stdio.h> | |
#include <stdlib.h> | |
// Inserted lines below will vaguely look like this: | |
// #define num_timers 34 | |
// double timelock_timers[num_timers] = {0, 0, 0, ..., 0}; | |
// const char *lock_names[num_timers] = {"filename.c:123", "other_file.c:234"}; | |
// | |
]] | |
} | |
timelock_c_suffix = [[ | |
// When the first timer reach this many seconds, we print out the current | |
// lock timing stats. | |
#define timer_trigger_sec 0.05 | |
typedef struct { | |
double perc; | |
double time; | |
int index; | |
const char * lock_name; | |
} TimerInfo; | |
static double start_time = 0; | |
// Compare two TimerInfos given via pointers. | |
// For use as a qsort callback. | |
static int cmp(const void *vp1, const void *vp2) { | |
// We want to return an int with the opposite sign as diff. | |
// (Opposite sign gives us a largest-to-smallest sorting.) | |
TimerInfo *ti1 = (TimerInfo *)vp1; | |
TimerInfo *ti2 = (TimerInfo *)vp2; | |
double diff = ti1->perc - ti2->perc; | |
if (diff < 0) return 1; | |
if (diff > 0) return -1; | |
return 0; | |
} | |
static void print_stats() { | |
// Get the total time. | |
double total = 0; | |
for (int i = 0; i < num_timers; ++i) { | |
total += timelock_timers[i]; | |
} | |
// Print out the percent of total time used. | |
double total_wall_time = now() - start_time; | |
dbg__printf("\n %7s %9s %9s\n", "lock%", "lock time", "wall time"); | |
dbg__printf("%7.4f%% %9.4fs %9.4fs\n", (total / total_wall_time) * 100, | |
total, total_wall_time); | |
// Compute percentages and sort. | |
TimerInfo info[num_timers]; | |
for (int i = 0; i < num_timers; ++i) { | |
info[i].perc = timelock_timers[i] / total; | |
info[i].time = timelock_timers[i]; | |
info[i].index = i; | |
info[i].lock_name = lock_names[i]; | |
} | |
qsort(info, num_timers, sizeof(TimerInfo), cmp); | |
// Print out the stats. | |
dbg__printf("\n%5s %5s %5s location\n", "index", "perc", "time"); | |
for (int i = 0; i < num_timers; ++i) { | |
TimerInfo *ti = info + i; | |
dbg__printf("%5d %5.2f%% %5.3fs %s\n", | |
ti->index, ti->perc * 100, ti->time, ti->lock_name); | |
} | |
} | |
static int done_timing = 0; | |
void add_interval_to_timer(double interval, int timer_index) { | |
if (done_timing) return; | |
if (start_time == 0) start_time = now(); | |
timelock_timers[timer_index] += interval; | |
if (timelock_timers[timer_index] >= timer_trigger_sec) { | |
// This use of done_timing isn't 100% thread-proof, but it avoids needing to | |
// check any lock behavior within this function, which is important because | |
// we want to minimize interference with the true timing data. | |
// In addition, in practice, the resulting inaccuracies are likely to | |
// be small or zero so that the data is still quite useful. | |
done_timing = 1; | |
print_stats(); | |
} | |
} | |
]] | |
local first_line_suffix = ' // This file was modified by time_lock_inj.lua.' | |
local num_timers = 0 | |
local lock_names = {} | |
------------------------------------------------------------------------------- | |
-- Internal functions. | |
------------------------------------------------------------------------------- | |
local function modify_file(filename) | |
assert(filename) | |
local did_modify = false | |
-- Read in the file as an array of lines. | |
local lines = {} | |
local header_injected = false | |
local line_num = 1 | |
for line in io.lines(filename) do | |
if line_num == 1 then | |
line = line .. first_line_suffix | |
end | |
if line:match('^//') == nil and not header_injected then | |
lines[#lines + 1] = '#include "timelock.h"' | |
header_injected = true | |
end | |
-- The optional slash at the end can help handle some cases where a | |
-- lock is being used within a multiline macro definition. | |
ind, lock, slash = line:match('^(%s*)dbgcheck__named_lock(%b());%s*(\\?)') | |
if ind and lock then | |
num_timers = num_timers + 1 | |
lock_names[#lock_names + 1] = filename .. ':' .. line_num | |
-- Surround the lock call by timing lines. | |
tmp_name = 'tmp_' .. line_num | |
local fmt = '%sdouble %s = now();%s' | |
lines[#lines + 1] = string.format(fmt, ind, tmp_name, slash); | |
lines[#lines + 1] = line | |
local fmt = '%sadd_interval_to_timer(now() - %s, %d);%s' | |
lines[#lines + 1] = string.format(fmt, ind, tmp_name, num_timers, slash); | |
did_modify = true | |
else | |
lines[#lines + 1] = line | |
end | |
line_num = line_num + 1 | |
end | |
if not did_modify then return end | |
-- Write out the modified version. | |
print(string.format('Modifying %s', filename)) | |
f = io.open(filename, 'w') | |
for _, line in ipairs(lines) do | |
f:write(line, '\n') -- TODO Ok for windows? | |
end | |
f:close() | |
end | |
local function write_out_timelock_hc() | |
-- Add timer definition to timelock.c. | |
table.insert(timelock_c_lines, '#define num_timers ' .. num_timers) | |
local timer_defn = 'double timelock_timers[num_timers] = {' | |
if num_timers > 0 then | |
timer_defn = timer_defn .. '0' .. string.rep(', 0', num_timers - 1) | |
end | |
timer_defn = timer_defn .. '};' | |
table.insert(timelock_c_lines, timer_defn) | |
names_defn = 'const char *lock_names[num_timers] = {' | |
for i, name in ipairs(lock_names) do | |
if i > 1 then names_defn = names_defn .. ', ' end | |
names_defn = names_defn .. '"' .. name .. '"' | |
end | |
names_defn = names_defn .. '};' | |
table.insert(timelock_c_lines, names_defn) | |
-- Write out the files. | |
local filenames = { 'timelock.h', 'timelock.c' } | |
local line_sets = { timelock_h_lines, timelock_c_lines } | |
local suffixes = { timelock_h_suffix, timelock_c_suffix } | |
for i = 1, 2 do | |
f = io.open(filenames[i], 'w') | |
for _, line in ipairs(line_sets[i]) do | |
f:write(line, '\n') | |
end | |
f:write(suffixes[i]) | |
f:close() | |
end | |
end | |
------------------------------------------------------------------------------- | |
-- Main. | |
------------------------------------------------------------------------------- | |
if #arg == 0 then | |
io.write(usage_str) | |
os.exit(0) | |
end | |
for _, filename in ipairs(arg) do | |
modify_file(filename) | |
end | |
write_out_timelock_hc() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
It can be tricky to measure which mutex locks may be a bottleneck in your code. Your code is typically not running when a mutex is waiting, so user time is not accumulating, making lag due to these look similar to slow system calls, which you can't do anything about. It's also difficult to measure since you're working with multithreaded code, and need to measure the times involved in a thread-safe way, but without adding any measurable overhead from new locks. This script gives you a way to isolate mutex lock speed bottlenecks in spite of those challenges.
Use this code like so from the directory of the code you want to measure:
Then build and run your code, also building and linking in the generated
time_lock.c
file. When you run your app, a print-out like the following will be sent to eitherstdout
on mac/linux or to the debug console in Visual Studio:The first line shows the total percentage of time used waiting for any mutex lock. If this percentage is low, then you have no bottlenecks in your use of mutex locks - good job! Otherwise, look at the top line of the index/perc/time table to see which mutex lock has used up the most time so far.
This script modifies your source files, so after you're done measuring things you can run:
To drop the modifications.
This script makes two depedency assumptions that you can avoid if needed:
dbgcheck__named_lock
macro indbgcheck
to lock mutexes. You can really use any lock command you want; modify theind, lock, slash = line:match(...)
line to match your preferred locking mechanism.oswrap
, which provides the usefuldbg__printf
andnow
functions. You can replace these with anything you like that provides bothprintf
-like funcitonality and a way to measure time passing at high resolution.To be extra clear, if you want to avoid those dependencies, you have to modify the script, but in theory it's not a difficult modification.