Skip to content

Instantly share code, notes, and snippets.

@bishabosha
Last active November 30, 2023 09:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bishabosha/379f324d542bf433b1c75e7b6e0a26a2 to your computer and use it in GitHub Desktop.
Save bishabosha/379f324d542bf433b1c75e7b6e0a26a2 to your computer and use it in GitHub Desktop.
Parse -Yprofile-destination csv from Scala 3 compiler
//> using toolkit latest
import profileparser.*
val sources = os.list(os.pwd / "input")
println("data:")
def project(source: os.Path) = source.last match
case s"compile-$target-profile$rest" => target
case _ => "<unknown>"
def worker(source: os.Path) = source.last match
case s"compile-$target-profile.csv" => "main"
case s"compile-$target-profile-worker-$id.csv" => s"worker-$id"
case _ => "<unknown>"
def simplePhase(phase: String) = phase match
case s"MegaPhase{$phases}" =>
val ps = phases.split(" ")
s"[${ps.head}..${ps.last}]"
case other => other
case class GanttRow(taskId: String, start: Long, phaseDurations: List[(String, Long)])
val tasks = List.newBuilder[GanttRow]
for
source <- sources
ProfileRun(id, _, events) <- parseFile(source.toString)
do
val proj = project(source)
val wrk = worker(source)
println("=========================================")
println(s"profile run $id for $proj ($wrk)")
val taskId = s"$proj ($wrk)"
var startNs = -1L
val phaseDurations = List.newBuilder[(String, Long)]
for case Event.PhaseRow(_, start, end, _, _, phase, _, _, _, _, _, _, _, _, _, _) <- events do
if startNs == -1L then startNs = start
val phaseName = simplePhase(phase)
val duration = end - start
phaseDurations += (phaseName -> duration)
println(f"$phaseName: ${(end - start).toDouble / 1_000_000}%.3f ms")
tasks += GanttRow(taskId, startNs, phaseDurations.result())
val rowsRaw = tasks.result()
val initialStart = rowsRaw.map(_.start).min
val maxPhaseCount = rowsRaw.map(_.phaseDurations.length).max
val allPhases = rowsRaw.flatMap(r => r.phaseDurations.map(_(0) -> r.taskId))
val phasesToIds = allPhases.groupMap((phase, task) => phase)((phase, task) => task)
println("=========================================")
println("ANALYSIS")
println("=========================================")
// val taskIds = rowsRaw.map(r => r.taskId).toSet
// val taskPhases = rowsRaw.map(r => r.taskId -> r.phaseDurations).toMap
// var taskToPatchedPhases = taskPhases
// phasesToIds.foreach { case (phase, tasks) =>
// if tasks.length != taskIds.size then
// println(s"$phase appears only in: ${tasks.mkString(", ")}")
// val unseen = taskIds -- tasks
// unseen.foreach(task =>
// val taskWithPhase = tasks.head
// val phases0 = taskToPatchedPhases(taskWithPhase)
// val (_ :+ (prePhase, _), _) = phases0.span(_(0) != phase): @unchecked
// val (pre0, post0) = taskToPatchedPhases(task).span(_(0) != prePhase): @unchecked
// val patched =
// if post0.isEmpty then
// ???
// else
// val (pre1 :: post) = post0: @unchecked
// (pre0 ::: pre1 :: (phase, 0L) :: post)
// println(s"patching $task to add $phase, now ${patched.map(_(0)).mkString(", ")}")
// taskToPatchedPhases += task -> patched
// )
// }
// val patchedSizes = taskToPatchedPhases.map((task, phases) => phases.length -> task).groupMap(_._1)(_._2)
// assert(patchedSizes.size == 1, s"not all tasks have the same number of phases: ${patchedSizes.map((s, ps) => s"[$s](${ps.mkString(",")})").mkString(", ")}")
// val maxPhaseCount0 = taskToPatchedPhases.map(_(1).length).max
// assert(maxPhaseCount0 == maxPhaseCount, s"extra phases were added, see ${taskToPatchedPhases.filter(_._2.length != maxPhaseCount).map(_(0)).mkString("\n")}")
// format rowsRaw as csv, normalising the start of each row to the initialStart, and padding the phaseDurations to maxPhaseCount
val rows = rowsRaw.map { row =>
val start = row.start - initialStart
val phaseDurations = row.phaseDurations.padTo(maxPhaseCount, ("", 0L))
// val phaseDurations = taskToPatchedPhases(row.taskId)
row.taskId :: start.toString :: phaseDurations.map((_, duration) => duration.toString)
}
val header = "Task ID" :: "Start" :: (1 to maxPhaseCount).map(i => s"Phase $i Duration").toList
val asCSV = (header :: rows).map(row => row.mkString(",")).mkString("\n")
os.write.over(os.pwd / "output" / "gantt.csv", asCSV, createFolders = true)
package profileparser
import scala.util.Using
import scala.io.Source
case class ProfileRun(id: Int, target: String, events: List[Event])
enum ThreadState:
case Main, Background
enum Event:
case GCRow(
startNs: Long,
endNs: Long,
startMs: Long,
endMs: Long,
name: String,
action: String,
cause: String,
threads: Int
)
case PhaseRow(
state: ThreadState,
startNs: Long,
endNs: Long,
runId: Int,
phaseId: Int,
phaseName: String,
purpose: String,
taskCount: Int,
threadId: Int,
threadName: String,
runNs: Long,
idleNs: Long,
cpuTimeNs: Long,
userTimeNs: Long,
allocatedByte: Long,
heapSize: Long
)
private def parseLine(line: String): Event = {
val fields = line.split(",")
def parseMainBackground(state: ThreadState) =
Event.PhaseRow(
state,
fields(1).toLong,
fields(2).toLong,
fields(3).toInt,
fields(4).toInt,
fields(5),
fields(6),
fields(7).toInt,
fields(8).toInt,
fields(9),
fields(10).toLong,
fields(11).toLong,
fields(12).toLong,
fields(13).toLong,
fields(14).toLong,
fields(15).toLong
)
fields(0) match {
case "EventType(GC)" =>
Event.GCRow(
fields(1).toLong,
fields(2).toLong,
fields(3).toLong,
fields(4).trim.toLong, // dotty adds an extra space here
fields(5),
fields(6),
fields(7),
fields(8).toInt
)
case "EventType(main)" =>
parseMainBackground(ThreadState.Main)
case "EventType(background)" =>
parseMainBackground(ThreadState.Background)
}
}
def parseFile(filename: String): List[ProfileRun] = {
Using(Source.fromFile(filename)): source =>
val lines = source.getLines
val runs = List.newBuilder[ProfileRun]
val events = collection.mutable.ListBuffer.empty[Event]
var inHeader = false
var csvVersion = -1
var runId = -1
var target = ""
def enterRun() =
if (events.nonEmpty) {
runs += ProfileRun(runId, target, events.toList)
runId = -1
target = ""
events.clear()
}
inHeader = false
csvVersion = -1
for (line <- lines) {
if (line.startsWith("info,")) {
assert(!inHeader)
inHeader = true
val header = line.split(",").map(_.trim())
runId = header(1).toInt
val _ = header(2).ensuring(_ == "version", "unexpected format of csv file")
csvVersion = header(3).toInt.ensuring(_ == 2, "unexpected format of csv file")
target = header(5)
}
else if (line.startsWith("header(")) {
assert(inHeader)
}
else {
if inHeader && csvVersion == 2 then
enterRun()
events += parseLine(line.trim())
}
}
if events.nonEmpty then
enterRun()
runs.result()
}.get
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment