Skip to content

Instantly share code, notes, and snippets.

@tingley
Created October 16, 2013 21:02
Show Gist options
  • Save tingley/7014806 to your computer and use it in GitHub Desktop.
Save tingley/7014806 to your computer and use it in GitHub Desktop.
Scala script to parse and analyze large numbers of jstack dumps in order to create an extremely basic profiler. This is currently set up to be used with GlobalSight, but it could be adapted for other uses as well.
import java.io.File
import scala.collection.mutable
// A tool to digest and visualize jstack traces, written poorly in scala.
// Usage
// scala jstack.scala -t [regex] [files]
// where
// [regex] is a regular expression applied to thread names in the jstack
// dumps. Matching threads will be processed. The -t option
// can be specified multiple times.
// [files] is one or more files or directories to process. Files should be
// jstack dumps; directories are expected to contain only jstack
// dumps.
// Output is written to stdout. It will be very wide, so capturing via
// redirection is recommended.
//
// The stack data will be merged into a call tree that tracks how often
// each execution point is seen in a stack. The options below
// (not exposed via the UI) allow you to trim the tree in both directions
// to only capture calls bracketed by a certain codebase (in this case,
// com.globalsight). The entire tree is dumped as output. Following that,
// leaf nodes with more than a configurable percentage of the observed
// data will be identified as "hot spots".
//
// This could go a lot further. In particular:
// - Find out how better to merge stacks at the bottom as well
// - Figure out how to identify hot nodes in the middle of the tree
// - I probably need a better trimming mechanism.
// - Expose options via command line
// - etc
val OPTION_TRIM_FROM_BOTTOM = true // Trim from the bottom?
val OPTION_CLEAN_FROM_TOP = true // Clean from the top?
val WARN_UNRECOGNIZED = false // for debugging
val HOTSPOT_THRESHOLD = 0.02 // threshold for hot spots (% / 100)
val PACKAGE_PREFIX = "com.globalsight" // Code we care about
class StackThread(n : String) {
val name : String = n
var stackFrames : List[StackFrame] = null
override def toString() = "Thread '" + name + "'"
}
case class StackFrame(method : String, location : String) {
override def toString() = {
val colon = location.indexOf(":")
val num = if (colon >= 0) location.substring(colon + 1, location.length())
else location
method + ":" + num
}
}
object State extends Enumeration {
type State = Value
val Initial, SeenDate, Body, SeenHeader, InThread, SeenThread,
Synchronizers = Value
}
import State._
// Thread header regexes
val ThreadHeaderRegex = """"(.+)" (?:daemon )?prio=\d+ tid=(0x[0-9a-f]+) nid=0x[0-9a-f]+ (.*)""".r
val ThreadStateRegex = """\s*java.lang.Thread.State: (\w+)""".r
// Thread body regexes
val StackFrameRegex = """\s*at ([^(]+)\((.+)\)""".r
val WaitingRegex = """\s+- waiting on (.*)""".r
val LockedRegex = """\s+- locked (.*)""".r
val ParkingRegex = """\s+- parking (.*)""".r
var threads : List[StackThread] = List()
def parse(files : Seq[File]) : Unit = {
for (file <- files) {
//println("Parsing " + file)
var currentThread : StackThread = null
var state : State = Initial
var frames : List[StackFrame] = List()
var wantThread = false;
for (line <- scala.io.Source.fromFile(file).getLines) {
state match {
case Initial => {
state = _state(SeenDate, line)
}
case SeenDate => {
state = _state(Body, line)
frames = List()
}
case Body => {
if (line.trim != "") {
val ThreadHeaderRegex(name, tid, loc) = line
state = _state(SeenHeader, line)
require (currentThread == null);
// TODO: filter by name so we don't collect useless data
currentThread = new StackThread(name)
}
}
case SeenHeader => {
// Parse ThreadState
state = _state(InThread, line)
}
case InThread => {
// Need to be able to test multiple regexes
if (line.trim == "") {
state = _state(SeenThread, line)
currentThread.stackFrames =
if (OPTION_TRIM_FROM_BOTTOM) trimFrames(frames) else frames
if (OPTION_CLEAN_FROM_TOP) {
currentThread.stackFrames = cleanThread(currentThread.stackFrames)
if (currentThread.stackFrames.length == 0) {
// Pretend this thread never happened
currentThread = null
}
// Special case: strip automatic import threads that are
// sleeping
else if (currentThread.stackFrames.last.method.contains("AutomaticImportMonitor.sleepUntilNextPoll")) {
currentThread = null
}
}
frames = List()
}
else if (StackFrameRegex.findFirstIn(line) != None) {
frames ::= makeStackFrame(line)
}
else if (WaitingRegex.findFirstIn(line) != None) {
// no-op
}
else if (LockedRegex.findFirstIn(line) != None) {
// no-op
}
else if (ParkingRegex.findFirstIn(line) != None) {
// no-op
}
else {
if (WARN_UNRECOGNIZED) println("* Unrecognized: [" + line + "]")
}
}
case SeenThread => {
val t = line.trim
if (t == "Locked ownable synchronizers:") {
state = _state(Synchronizers, line)
}
else if (t != "") {
if (WARN_UNRECOGNIZED) println("* Unrecognized: [" + line + "]")
}
}
case Synchronizers => {
// TODO: handle cases where this is not trivial
if (line.trim == "") {
state = _state(Body, line)
if (currentThread != null) {
threads ::= currentThread
}
currentThread = null
}
}
case _ => ; //println(line)
}
}
}
}
def cleanThread(frames : List[StackFrame]) : List[StackFrame] = {
frames match {
case head :: tail => {
if (head.method.startsWith(PACKAGE_PREFIX))
head :: tail
else
cleanThread(tail)
}
case nil => nil
}
}
def trimFrames(frames : List[StackFrame]) : List[StackFrame] = {
var result : List[StackFrame] = List()
var b = 0
for (f <- frames.reverse) {
if (b == 0) {
if (f.method.startsWith(PACKAGE_PREFIX)) {
b = 1
result ::= f
}
}
else {
result ::= f
}
}
result
}
def _state(s : State, l : String) : State = {
//println(s + "<-- [" + l + "]")
s
}
def makeStackFrame(line : String) : StackFrame = {
try {
val StackFrameRegex(method, location) = line
new StackFrame(method, location)
}
catch {
case e: MatchError => return null
}
}
class StackTreeNode(f : StackFrame) {
val stackFrame = f
var count : Int = 1
var children = mutable.Map.empty[StackFrame, StackTreeNode]
override def toString() = "[" + stackFrame + ", " + count + "]"
}
// Build a huge tree
def analyzeThreads(threads : Iterable[StackThread],
threadNames : Set[String]) : (StackTreeNode, Int) = {
val root = new StackTreeNode(null)
var threadCount = 0
for {t <- threads
pattern <- threadNames
if t.name.matches(pattern)} {
threadCount += 1
var node = root
for (f <- t.stackFrames) {
val c = node.children.get(f)
c match {
case Some(childNode) => {
childNode.count += 1
node = childNode
}
case None => {
val n = new StackTreeNode(f)
node.children += (f -> n)
node = n
}
}
}
}
(root, threadCount)
}
def dumpThread(t : StackThread) : Unit = {
println(t)
for (f <- t.stackFrames) {
println("\t" + f)
}
}
def dump(n : StackTreeNode, indent : String) : Unit = {
println(indent + " " + n.stackFrame + " [" + n.count + "]")
for (c <- n.children.values) {
dump(c, indent + " ")
}
}
def dumpHotLeaves(n : StackTreeNode, leafCount : Int) : Unit = {
// Look for things with more than 3% total
val threshold = (leafCount * HOTSPOT_THRESHOLD).toInt
println(leafCount + " leaf nodes; printing those with >= "
+ threshold + " hits")
def search(n : StackTreeNode) : Unit = {
if (n.children.size == 0) {
if (n.count >= threshold) {
println(n)
}
}
else {
for (child <- n.children.values) {
search(child)
}
}
}
search(n)
}
//
// Here's the actual program
// Notes:
// - My scala sucks
// - Threadnames can be (Java) regexes
// - items in the file list can be directories
val usage = """
Usage: ProcessJStack [-t thread1 -t thread2 ...] [files]
"""
if (argv.length == 0) {
println(usage)
sys.exit
}
def parseArg(threads : List[String], files : List[String],
list : List[String]) : (List[String], List[String]) = {
list match {
case Nil => (threads, files);
case "-t" :: threadName :: tail =>
parseArg(threadName :: threads, files, tail);
case fileName :: tail =>
parseArg(threads, fileName :: files, tail);
}
}
val (threadNames, fileNames) = parseArg(List(), List(), argv.toList)
var fileList = List[File]()
for (fileName <- fileNames) {
val f = new File(fileName)
if (f.isDirectory()) fileList ++= f.listFiles().toList
else fileList +:= f
}
parse(fileList)
println("Found " + threads.size + " threads")
val (root, threadCount) = analyzeThreads(threads, threadNames.toSet)
for (n <- root.children.values)
dump(n, "")
dumpHotLeaves(root, threadCount)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment