-
-
Save bishabosha/379f324d542bf433b1c75e7b6e0a26a2 to your computer and use it in GitHub Desktop.
Parse -Yprofile-destination csv from Scala 3 compiler
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//> using toolkit latest | |
import profileparser.* | |
val sources = os.list(os.pwd / "input") | |
println("data:") | |
def project(source: os.Path) = source.last match | |
case s"compile-$target-profile$rest" => target | |
case _ => "<unknown>" | |
def worker(source: os.Path) = source.last match | |
case s"compile-$target-profile.csv" => "main" | |
case s"compile-$target-profile-worker-$id.csv" => s"worker-$id" | |
case _ => "<unknown>" | |
def simplePhase(phase: String) = phase match | |
case s"MegaPhase{$phases}" => | |
val ps = phases.split(" ") | |
s"[${ps.head}..${ps.last}]" | |
case other => other | |
case class GanttRow(taskId: String, start: Long, phaseDurations: List[(String, Long)]) | |
val tasks = List.newBuilder[GanttRow] | |
for | |
source <- sources | |
ProfileRun(id, _, events) <- parseFile(source.toString) | |
do | |
val proj = project(source) | |
val wrk = worker(source) | |
println("=========================================") | |
println(s"profile run $id for $proj ($wrk)") | |
val taskId = s"$proj ($wrk)" | |
var startNs = -1L | |
val phaseDurations = List.newBuilder[(String, Long)] | |
for case Event.PhaseRow(_, start, end, _, _, phase, _, _, _, _, _, _, _, _, _, _) <- events do | |
if startNs == -1L then startNs = start | |
val phaseName = simplePhase(phase) | |
val duration = end - start | |
phaseDurations += (phaseName -> duration) | |
println(f"$phaseName: ${(end - start).toDouble / 1_000_000}%.3f ms") | |
tasks += GanttRow(taskId, startNs, phaseDurations.result()) | |
val rowsRaw = tasks.result() | |
val initialStart = rowsRaw.map(_.start).min | |
val maxPhaseCount = rowsRaw.map(_.phaseDurations.length).max | |
val allPhases = rowsRaw.flatMap(r => r.phaseDurations.map(_(0) -> r.taskId)) | |
val phasesToIds = allPhases.groupMap((phase, task) => phase)((phase, task) => task) | |
println("=========================================") | |
println("ANALYSIS") | |
println("=========================================") | |
// val taskIds = rowsRaw.map(r => r.taskId).toSet | |
// val taskPhases = rowsRaw.map(r => r.taskId -> r.phaseDurations).toMap | |
// var taskToPatchedPhases = taskPhases | |
// phasesToIds.foreach { case (phase, tasks) => | |
// if tasks.length != taskIds.size then | |
// println(s"$phase appears only in: ${tasks.mkString(", ")}") | |
// val unseen = taskIds -- tasks | |
// unseen.foreach(task => | |
// val taskWithPhase = tasks.head | |
// val phases0 = taskToPatchedPhases(taskWithPhase) | |
// val (_ :+ (prePhase, _), _) = phases0.span(_(0) != phase): @unchecked | |
// val (pre0, post0) = taskToPatchedPhases(task).span(_(0) != prePhase): @unchecked | |
// val patched = | |
// if post0.isEmpty then | |
// ??? | |
// else | |
// val (pre1 :: post) = post0: @unchecked | |
// (pre0 ::: pre1 :: (phase, 0L) :: post) | |
// println(s"patching $task to add $phase, now ${patched.map(_(0)).mkString(", ")}") | |
// taskToPatchedPhases += task -> patched | |
// ) | |
// } | |
// val patchedSizes = taskToPatchedPhases.map((task, phases) => phases.length -> task).groupMap(_._1)(_._2) | |
// assert(patchedSizes.size == 1, s"not all tasks have the same number of phases: ${patchedSizes.map((s, ps) => s"[$s](${ps.mkString(",")})").mkString(", ")}") | |
// val maxPhaseCount0 = taskToPatchedPhases.map(_(1).length).max | |
// assert(maxPhaseCount0 == maxPhaseCount, s"extra phases were added, see ${taskToPatchedPhases.filter(_._2.length != maxPhaseCount).map(_(0)).mkString("\n")}") | |
// format rowsRaw as csv, normalising the start of each row to the initialStart, and padding the phaseDurations to maxPhaseCount | |
val rows = rowsRaw.map { row => | |
val start = row.start - initialStart | |
val phaseDurations = row.phaseDurations.padTo(maxPhaseCount, ("", 0L)) | |
// val phaseDurations = taskToPatchedPhases(row.taskId) | |
row.taskId :: start.toString :: phaseDurations.map((_, duration) => duration.toString) | |
} | |
val header = "Task ID" :: "Start" :: (1 to maxPhaseCount).map(i => s"Phase $i Duration").toList | |
val asCSV = (header :: rows).map(row => row.mkString(",")).mkString("\n") | |
os.write.over(os.pwd / "output" / "gantt.csv", asCSV, createFolders = true) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package profileparser | |
import scala.util.Using | |
import scala.io.Source | |
case class ProfileRun(id: Int, target: String, events: List[Event]) | |
enum ThreadState: | |
case Main, Background | |
enum Event: | |
case GCRow( | |
startNs: Long, | |
endNs: Long, | |
startMs: Long, | |
endMs: Long, | |
name: String, | |
action: String, | |
cause: String, | |
threads: Int | |
) | |
case PhaseRow( | |
state: ThreadState, | |
startNs: Long, | |
endNs: Long, | |
runId: Int, | |
phaseId: Int, | |
phaseName: String, | |
purpose: String, | |
taskCount: Int, | |
threadId: Int, | |
threadName: String, | |
runNs: Long, | |
idleNs: Long, | |
cpuTimeNs: Long, | |
userTimeNs: Long, | |
allocatedByte: Long, | |
heapSize: Long | |
) | |
private def parseLine(line: String): Event = { | |
val fields = line.split(",") | |
def parseMainBackground(state: ThreadState) = | |
Event.PhaseRow( | |
state, | |
fields(1).toLong, | |
fields(2).toLong, | |
fields(3).toInt, | |
fields(4).toInt, | |
fields(5), | |
fields(6), | |
fields(7).toInt, | |
fields(8).toInt, | |
fields(9), | |
fields(10).toLong, | |
fields(11).toLong, | |
fields(12).toLong, | |
fields(13).toLong, | |
fields(14).toLong, | |
fields(15).toLong | |
) | |
fields(0) match { | |
case "EventType(GC)" => | |
Event.GCRow( | |
fields(1).toLong, | |
fields(2).toLong, | |
fields(3).toLong, | |
fields(4).trim.toLong, // dotty adds an extra space here | |
fields(5), | |
fields(6), | |
fields(7), | |
fields(8).toInt | |
) | |
case "EventType(main)" => | |
parseMainBackground(ThreadState.Main) | |
case "EventType(background)" => | |
parseMainBackground(ThreadState.Background) | |
} | |
} | |
def parseFile(filename: String): List[ProfileRun] = { | |
Using(Source.fromFile(filename)): source => | |
val lines = source.getLines | |
val runs = List.newBuilder[ProfileRun] | |
val events = collection.mutable.ListBuffer.empty[Event] | |
var inHeader = false | |
var csvVersion = -1 | |
var runId = -1 | |
var target = "" | |
def enterRun() = | |
if (events.nonEmpty) { | |
runs += ProfileRun(runId, target, events.toList) | |
runId = -1 | |
target = "" | |
events.clear() | |
} | |
inHeader = false | |
csvVersion = -1 | |
for (line <- lines) { | |
if (line.startsWith("info,")) { | |
assert(!inHeader) | |
inHeader = true | |
val header = line.split(",").map(_.trim()) | |
runId = header(1).toInt | |
val _ = header(2).ensuring(_ == "version", "unexpected format of csv file") | |
csvVersion = header(3).toInt.ensuring(_ == 2, "unexpected format of csv file") | |
target = header(5) | |
} | |
else if (line.startsWith("header(")) { | |
assert(inHeader) | |
} | |
else { | |
if inHeader && csvVersion == 2 then | |
enterRun() | |
events += parseLine(line.trim()) | |
} | |
} | |
if events.nonEmpty then | |
enterRun() | |
runs.result() | |
}.get |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment