Skip to content

Instantly share code, notes, and snippets.

@jpastuszek
Last active November 6, 2019 13:13
Show Gist options
  • Save jpastuszek/10a6fbcd20fe022ff983393ab6a950d7 to your computer and use it in GitHub Desktop.
Save jpastuszek/10a6fbcd20fe022ff983393ab6a950d7 to your computer and use it in GitHub Desktop.
Groups GDB stack traces
#!/usr/bin/env denim
/* Cargo.toml
[package]
name = "tracer"
version = "0.1.0"
authors = ["Anonymous"]
edition = "2018"
[dependencies]
cotton = "0.0.5"
structopt = "0.3.2"
simhash = "*"
petgraph = "*"
*/
use cotton::prelude::*;
use simhash::simhash_stream;
use simhash::hamming_distance;
use petgraph::graphmap::UnGraphMap;
use petgraph::algo::tarjan_scc;
/// Groups GDB stack traces
#[derive(Debug, StructOpt)]
struct Cli {
#[structopt(flatten)]
logging: LoggingOpt,
#[structopt(flatten)]
dry_run: DryRunOpt,
/// Hamming Simhash distance to consider similar
#[structopt(short, long, default_value="5")]
group_threshold: u32,
/// Skip gropus with lots of threads
#[structopt(short, long)]
skip_long_groups: Option<usize>,
/// Show only single thread from the group
#[structopt(short = "S", long)]
single: bool,
}
#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
struct Thread<'s> {
name: &'s str,
trace: &'s str,
}
fn main() -> FinalResult {
let args = Cli::from_args();
init_logger(&args.logging, vec![module_path!()]);
let data = read_stdin();
let threads = data
.split("\n\n")
.skip(1) // skip header
.map(|t| {
let mut parts = t.splitn(2, ":\n");
Thread {
name: parts.next().unwrap(),
trace: parts.next().unwrap(),
}
})
.collect::<Vec<_>>();
let hashes = threads.iter()
.map(|t| {
let call_sites = t.trace.split("\n").map(|t| t.splitn(2, " at ").skip(1).next().unwrap_or("??"));
(simhash_stream(call_sites), t)
}).collect::<Vec<_>>();
let dist_pairs = hashes.iter()
.combinations(2).map(|v| v.into_iter().collect_tuple().unwrap())
.map(|(a, b)| (hamming_distance(a.0, b.0), a, b)).collect::<Vec<_>>();
let mut graph = UnGraphMap::default();
for (dist, a, b) in &dist_pairs {
if *dist < args.group_threshold {
graph.add_edge(a, b, *dist);
}
}
let mut groups = tarjan_scc(&graph);
groups.sort_by_key(|group| group.len());
let mut threads_ungroupped = threads.clone();
for group in groups {
for (_, thread) in &group {
threads_ungroupped.retain(|t| t.name != thread.name)
}
if let Some(skip_long_groups) = args.skip_long_groups {
if group.len() > skip_long_groups {
info!("Skipping {} long group", group.len());
continue
}
}
if args.single {
println!("\nGroup of {} threads similar to:\n", group.len());
} else {
println!("\nGroup of {} threads:\n", group.len());
}
for (_, thread) in group {
if args.single {
println!("{}\n", thread.trace);
break
} else {
println!("{}\n{}\n", thread.name, thread.trace);
}
}
}
println!("\nRemaining {} threads not part of any group:\n", threads_ungroupped.len());
for thread in threads_ungroupped {
println!("{}\n{}\n", thread.name, thread.trace);
}
Ok(())
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment