Skip to content

Instantly share code, notes, and snippets.

@TheAngryByrd
Created November 3, 2023 18:30
Show Gist options
  • Save TheAngryByrd/caa24856e0bae16e36b55c709682fd7e to your computer and use it in GitHub Desktop.
Save TheAngryByrd/caa24856e0bae16e36b55c709682fd7e to your computer and use it in GitHub Desktop.
f# graph based parser
open System
open System.IO
open System.Collections.Generic
let root = __SOURCE_DIRECTORY__
let srcDir = Path.Combine(root, "src")
// Add <OtherFlags>$(OtherFlags) --test:DumpGraphChecking --times:timings.csv</OtherFlags> to the .fsproj or Directory.Build.props
let allTimingsFiles =
Directory.GetFiles(srcDir, "timings.csv", SearchOption.AllDirectories)
let allGraphFiles =
Directory.GetFiles(srcDir, "*.graph.md", SearchOption.AllDirectories)
// --- Parse timings files
let checkLines = "CheckOneImplFile"
// Name,StartTime,EndTime,Duration(s),Id,ParentId,RootId,fileName,project,qualifiedNameOfFile,userOpName,length,cache,cpuDelta(s),realDelta(s),gc0,gc1,gc2,outputDllFile,buildPhase
type Timings =
{
Name: string
StartTime: string
EndTime: string
Duration: float
Id: string
ParentId: string
RootId: string
FileName: string
Project: string
QualifiedNameOfFile: string
TimingsFile: string
}
static member Parse(timingsFile, line: string) =
let parts = line.Split(',')
{
Name = parts.[0]
StartTime = parts.[1]
EndTime = parts.[2]
Duration = float parts.[3]
Id = parts.[4]
ParentId = parts.[5]
RootId = parts.[6]
FileName = parts.[7].Trim('"')
Project = parts.[8]
QualifiedNameOfFile = parts.[9]
TimingsFile = timingsFile
}
let allCheckTimings =
allTimingsFiles
|> Array.map (fun x -> (x, File.ReadAllLines(x)))
|> Array.collect (fun (name, lines) ->
lines
|> Array.filter (fun x -> x.Contains(checkLines))
|> Array.map (fun x -> (name, x))
)
|> Array.map (Timings.Parse)
allCheckTimings
|> Array.sortByDescending (fun x -> x.Duration)
|> Array.iter (fun x -> printfn "%s %f - %s" x.FileName x.Duration x.TimingsFile)
// --- Parse graph files
allGraphFiles
|> Seq.head
type Node =
{
GraphFile: string
Index: int
FileName: string
Dependents: Set<int>
Dependencies: Set<int>
}
member x.Summary() =
printfn
$"{x.Index} - {x.FileName} - Dependencies:{x.Dependencies.Count} - Dependents:{x.Dependents.Count}"
member x.Details(allNodes) =
printfn
$"{x.Index} - {x.FileName} - Dependencies:{x.Dependencies.Count} - Dependents:{x.Dependents.Count}"
printfn "Dependencies"
x.Dependencies
|> Set.iter (fun x -> printfn $" {x} - {allNodes[x]}")
printfn "Dependents"
x.Dependents
|> Set.iter (fun x -> printfn $" {x} - {allNodes[x]}")
let indexToNameRegex =
System.Text.RegularExpressions.Regex(@"(?<index>\d+)\[(?<filename>.*)\]")
let edgesRegex =
System.Text.RegularExpressions.Regex(@"(?<index>\d+)\s-->\s(?<dependency>\d+)")
let contains (t: string) (x: string) = x.Contains t
// let badFiles = [
// contains "AssemblyAttributes.fs"
// contains "AssemblyInfo.fs"
// ]
let nodesPerFile =
allGraphFiles
|> Array.map (fun x -> (x, File.ReadAllLines(x)))
|> Array.map (fun (graphFile, lines) ->
let nodes =
lines
|> Array.map (fun x -> indexToNameRegex.Match(x))
|> Array.choose (fun m ->
let filename = m.Groups.["filename"].Value.Trim('"')
let index = m.Groups.["index"].Value
// if badFiles |> Seq.exists(fun f -> f filename) then
// None
// else
try
let index = int index
Some(
index,
{
GraphFile = graphFile
Index = int index
FileName = filename
Dependents = Set.empty
Dependencies = Set.empty
}
)
with e ->
// eprintfn "Error %s - %s" e.Message index
None
)
|> Map.ofArray
let edges =
lines
|> Array.map (fun x -> edgesRegex.Match(x))
|> Array.choose (fun m ->
let index = m.Groups.["index"].Value
let dependency = m.Groups.["dependency"].Value
try
(int index, int dependency)
|> Some
with e ->
// eprintfn "Error %s - %s" e.Message source
None
)
(nodes, edges)
||> Array.fold (fun state (index, dependency) ->
let node = Map.tryFind index state
let dependencyNode = Map.tryFind dependency state
match node, dependencyNode with
| Some node, Some dependencyNode ->
let node =
{ node with
Dependencies = Set.add dependency node.Dependencies
}
let dependencyNode =
{ dependencyNode with
Dependents = Set.add index dependencyNode.Dependents
}
state
|> Map.add index node
|> Map.add dependency dependencyNode
| _ -> state
)
)
let topologicalSort =
nodesPerFile
|> Array.map (fun nodes ->
let stack = Stack<Node>()
let rec visit node =
if
not (
stack
|> Seq.exists (fun n -> n.Index = node.Index)
)
then
stack.Push node
node.Dependents
|> Seq.iter (fun x -> visit (Map.find x nodes))
let nodesWithNoDependencies =
nodes
|> Seq.filter (fun (KeyValue(_, x)) -> x.Dependencies.Count = 0)
|> Seq.map (fun (KeyValue(_, x)) -> x)
nodesWithNoDependencies
|> Seq.iter visit
stack
|> Seq.toList
)
// --- Determine best files for signatures
let firstFileNodes =
topologicalSort
|> Seq.head
|> Seq.rev
// |> Seq.head
// |> Map.find 4
firstFileNodes
|> Seq.length
for node in firstFileNodes do
node.GraphFile
|> printfn "%s"
node.Summary()
// let timing =
// allCheckTimings
// |> List.filter(fun timing ->
// let result = timing.FileName.ToLower().Contains (node.FileName.ToLower())
// // printfn $"{timing.FileName.ToLower()} = {node.FileName.ToLower()} -> {result}"
// result
// )
// timing |> List.iter (fun x -> x.Duration |> printfn "%f")
// Notes
// 1. Largest path of the tree
// 2.Having a lot of dependent files
// 3. Compare timings to detect bottlenecks (run without graph and with graph)
// for example if a file is taking longer in graph, there are bottlenecks getting to it
// 4.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment