Skip to content

Instantly share code, notes, and snippets.

@dburriss
Last active October 17, 2020 08:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dburriss/643d61715e7330ff29a83f32cb51edce to your computer and use it in GitHub Desktop.
Save dburriss/643d61715e7330ff29a83f32cb51edce to your computer and use it in GitHub Desktop.
Some files for a proof of concept that makes recommendations based on code and the git history. The original uses code complexity but has a dependency on a SonarQube report. This is completely standalone but uses LoC as a proxy. Not the best but it is a PoC. It also takes things like bus factor into account in the recommendations. See comments f…
#load "StringX.fsx"
#load "Result.fsx"
open System
open System.Diagnostics
type Commit = | Commit of string
type Author = | Author of string
type Lines = string list
type Log = {
Commit : Commit
Author : Author
Date : DateTimeOffset
}
let private splitByNewline s =
//printfn "SPLITTING LINE: %A" s
match s with
| Some x -> x |> StringX.split [|Environment.NewLine|]
| None -> Array.empty
let private arrToLog splitLine =
match splitLine with
| [|sId;sEmail;sDate|] -> { Commit = (sId |> Commit); Author = (sEmail |> Author); Date = DateTimeOffset.Parse(sDate)}
| _ -> failwithf "Line did not match expected items: %A" splitLine
let private lineToLog line =
if String.IsNullOrWhiteSpace line then None
else
let split = line |> StringX.split [|","|]
split |> arrToLog |> Some
let private folder logs line =
let log = lineToLog line
[log] @ logs
let private parseLogs (s:string) =
let lines = s |> StringX.splitLines |> Array.toList
let data = []
lines |> List.fold folder data
let runGitCommand repository command =
//printfn "RUNNING: 'git %s'" command
let info = ProcessStartInfo()
info.WorkingDirectory <- repository//must be full path not using ~/
info.FileName <- "/usr/local/bin/git"
info.Arguments <- command
info.UseShellExecute <- false
info.RedirectStandardOutput <- true
info.RedirectStandardError <- true
let p = Process.Start(info)
let output = p.StandardOutput.ReadToEnd()
let err = p.StandardError.ReadToEnd()
p.WaitForExit()
if String.IsNullOrEmpty(err) then
if String.IsNullOrWhiteSpace(output) then
None |> Ok
else output |> Some |> Ok
else Error (sprintf "ERROR running 'git %s' %s %s" command Environment.NewLine err)
let private gitLogOfFile repository file =
let cmd = sprintf "log --format=format:\"%%h,%%ae,%%aI\" --follow %s" file
runGitCommand repository cmd
let private gitLogByHash repository h =
//Console.WriteLine("gitLogByHash")
let cmd = sprintf "log -1 --format=format:\"%%h,%%ae,%%aI\" %s" h
runGitCommand repository cmd
let fileHistory repository file =
match (gitLogOfFile repository file) with
| Ok (Some output) -> output |> parseLogs |> Ok
| Ok None -> [] |> Ok
| Error err -> Error err
let firstLog repository =
//Console.WriteLine("firstLog")
let run = runGitCommand repository
let cmdGetFirstCommit = "rev-list --max-parents=0 HEAD"
run cmdGetFirstCommit
|> Result.map ( splitByNewline >> Array.tryLast)
|> Result.bind (function
| Some hash -> hash |> gitLogByHash repository
| None -> Ok None)
|> Result.map (function
| Some line -> line |> lineToLog
| None -> None)
let lastLog repository =
//Console.WriteLine("lastLog")
let run = runGitCommand repository
let cmd = sprintf "log -1 --format=format:\"%%h,%%ae,%%aI\""
run cmd
//|> fun x -> printfn "DEBUG: %A" x ; x
|> Result.map (Option.bind lineToLog)
let repositoryRange repository =
let first = repository |> firstLog
let last = repository |> lastLog
Result.map2 (fun f l -> (f |> Option.get |> fun x -> x.Date, l |> Option.get |> fun x -> x.Date)) first last
#load "Git.fsx"
#load "Loc.fsx"
#load "Stats.fsx"
open System
// DATA TYPES
type Repository = {
Path : string
CreatedAt : DateTimeOffset
LastUpdatedAt : DateTimeOffset
}
type RawData = {
Path : string
CreatedAt : DateTimeOffset
LastTouchedAt : DateTimeOffset
History : Git.Log list
LoC : int
}
type ProjectFolder = string
type RepositoryData = {
Path : string
Project : ProjectFolder
CreatedAt : DateTimeOffset
LastUpdatedAt : DateTimeOffset
Data : RawData list
}
type Analysis = {
Path : string
Raw : RawData
PriorityScore : int64
}
type AnalyzedRepository = {
Path : string
Project : ProjectFolder
CreatedAt : DateTimeOffset
LastUpdatedAt : DateTimeOffset
Analysis : Analysis list
}
type RecommendationData = {
RelativePriority: int
//Complexity : int
LoC : int
History : Git.Log list
}
type Recommendation = {
Path : string
Comments : string list
RecommendationData : RecommendationData
}
type RecommendationReport = {
Path : string
Project : ProjectFolder
CreatedAt : DateTimeOffset
LastUpdatedAt : DateTimeOffset
Recommendations : Map<string,Recommendation>
}
// WORKFLOWS
type GatherRepositoryData = ProjectFolder -> Repository -> RepositoryData
type AnalyzeRepository = RepositoryData -> AnalyzedRepository
type MakeRecommendations = AnalyzedRepository -> RecommendationReport
// HELPERS
let private getFiles path = IO.Directory.GetFiles(path)
let private getDirs path = IO.Directory.GetDirectories(path)
let private combine (path, file) = IO.Path.Combine (path, file)
/// Get basic repository info
let descRepository repoPath =
let (start, finish) = Git.repositoryRange repoPath |> function | Ok x -> x | Error e -> failwith e
{
Path = repoPath
CreatedAt = start
LastUpdatedAt = finish
}
let private gitFileRawData (repository : Repository) file : RawData option =
let filePath = combine(repository.Path, file)
let locStats = Loc.getStats filePath
let history = Git.fileHistory repository.Path filePath |> function | Ok x -> x |> List.choose id | Error e -> failwith e
match history with
| [] -> None
| hs ->
let (fileCreated,lastTouchedAt) = (hs |> List.head |> fun x -> x.Date, history |> List.last |> fun x -> x.Date)
{
Path = filePath
CreatedAt = fileCreated
LastTouchedAt = lastTouchedAt
History = history
LoC = locStats.LoC
} |> Some
let rec private mapFiles f path =
let dirs = path |> getDirs
let files = path |> getFiles |> Seq.map (fun file -> (path, file))
seq {
yield! (files |> Seq.map f)
yield! (Seq.collect (mapFiles f) dirs)
}
let gitRawData extensionList (repository : Repository) =
let inExtensionIncludeList filePath = extensionList |> List.contains (IO.FileInfo(filePath).Extension)
repository.Path
|> mapFiles (fun (path, file) ->
let filePath = combine(path, file)
if(filePath |> inExtensionIncludeList) then
gitFileRawData repository filePath
else None)
/// Get all files with history and LoC
let gatherRepositoryRawData gatherRawData projectFolder (repository : Repository) =
{
Path = repository.Path
Project = projectFolder
CreatedAt = repository.CreatedAt
LastUpdatedAt = repository.LastUpdatedAt
Data = (gatherRawData repository) |> Seq.toList |> List.choose id
}
let calcPriority (repository : RepositoryData) (data : RawData) =
let calcCoeff = Stats.calculateCoeffiecient repository.CreatedAt repository.LastUpdatedAt
let touchScores =
data.History
|> List.map (fun log -> log.Date |> calcCoeff)
|> List.sumBy (fun coeff -> coeff * (data.LoC |> int64)) // We want to do on cyclomatic complexity rather than LoC
touchScores
let analyzeData calcPriority (repository : RepositoryData) (data : RawData) =
{
Path = data.Path
Raw = data
PriorityScore = calcPriority repository data
}
/// Analyze the data
let performAnalysis analyzeData (repository : RepositoryData) =
let analyze = analyzeData repository
{
Path = repository.Path
Project = repository.Project
CreatedAt = repository.CreatedAt
LastUpdatedAt = repository.LastUpdatedAt
Analysis = repository.Data |> List.map analyze
}
let distinctAuthors (history : Git.Log list) = history |> List.distinctBy (fun h -> h.Author)
let recommendations (data : RecommendationData) =
[
if(data.LoC > 400) then
if(data.RelativePriority >= 50 && data.History |> List.length > 5) then
yield sprintf "PRIORITY: MEDIUM | This file is large at %i lines of code and changes often. It is strongly suggested you break it up to avoid conflicting changes." data.LoC
else
yield sprintf "PRIORITY: LOW | You may want to break this file up into smaller files as it is %i lines of code." data.LoC
if(data.LoC > 100 && data.History |> distinctAuthors |> List.length = 1) then
if data.RelativePriority > 50 && data.RelativePriority < 80 then
yield "PRIORITY: MEDIUM | Bus factor is 1 on a significant file. Make sure covered by descriptive tests & try get spread knowledge across the team."
if data.RelativePriority >= 80 then
yield "PRIORITY: HIGH | Bus factor is 1 on a VERY significant file. Make sure covered by descriptive tests & try pair up working on this file to prioritize knowledge transfer."
else
if data.RelativePriority >= 80 then
yield "PRIORITY: MEDIUM | This file seems to be significant based on complexity and changes. Make sure covered by descriptive tests & try get spread knowledge across the team."
// if(data.Complexity >= 10 && data.RelativePriority >= 20) then
// yield sprintf "PRIORITY: %i/100 | Due to cyclomatic complexity of %i and recency of changes, this should be simplified. See: http://codinghelmet.com/articles/reduce-cyclomatic-complexity-switchable-factory-methods" (data.RelativePriority) (data.Complexity)
]
let analysisRecommendation recommendations shiftPriority (analysis : Analysis) =
let data = {
RelativePriority = shiftPriority analysis.PriorityScore
//Complexity = analysis.Raw.Metrics.Complexity
LoC = analysis.Raw.LoC
History = analysis.Raw.History
}
let recommendation = {
Path = analysis.Path
Comments = recommendations data
RecommendationData = data
}
(analysis.Raw.Path, recommendation)
let makeRecommendationsWith analysisRecommendation (analyzedRepository : AnalyzedRepository) =
//let (min,max) = analyzedRepository.Analysis |> List.map (fun a -> a.PriorityScore) |> fun xs -> (xs |> List.min, xs |> List.max)
//let shiftPriority = Stats.shiftTo100L min max >> int
{
Path = analyzedRepository.Path
Project = analyzedRepository.Project
CreatedAt = analyzedRepository.CreatedAt
LastUpdatedAt = analyzedRepository.LastUpdatedAt
Recommendations = analyzedRepository.Analysis |> List.map analysisRecommendation |> Map.ofList
}
let printRecommendations report =
printfn "Repository: %s" report.Path
report.Recommendations
|> Map.toArray
|> Array.map (fun (file, r) ->
{| File = IO.Path.GetRelativePath(report.Path, file)
LoC = r.RecommendationData.LoC
Priority = r.RecommendationData.RelativePriority
Comments = r.Comments
Authours = r.RecommendationData.History |> distinctAuthors |> List.length
LastUpdate = r.RecommendationData.History |> List.map (fun x -> x.Date) |> List.max
|})
|> Array.iter (fun x ->
if(x.Comments.Length > 0) then
printfn "%s [ Priority : %i | LoC : %i | Authors : %i | LastUpdate : %s]" x.File x.Priority x.LoC x.Authours (x.LastUpdate.ToLocalTime().ToString("yyyy-MM-dd"))
x.Comments |> List.iter (printfn " %s")
)
report
// execute
let testRepo = fsi.CommandLineArgs.[1]
let projFolder : ProjectFolder = fsi.CommandLineArgs.[2] //"./"
let includeList = fsi.CommandLineArgs.[3] |> StringX.split [|","|] |> Array.toList //[".fs"]
let repo = testRepo |> descRepository
let repoData = repo |> gatherRepositoryRawData (gitRawData includeList) projFolder
let analyze = performAnalysis (analyzeData calcPriority)
let recommend analyzedRepo =
// TODO: this can be done more efficiently
let scores = analyzedRepo.Analysis |> List.map (fun x -> x.PriorityScore)
let min = scores |> List.min
let max = scores |> List.max
makeRecommendationsWith (analysisRecommendation recommendations (Stats.shiftTo100L min max >> int)) analyzedRepo
repoData
|> analyze
|> recommend
|> printRecommendations
open System
open System.IO
type Stats = {
Ext : string
Lines : int
LoC : int
CommentLines : int
}
type LineType = | Comment | Code | Empty
let inspectLine (line : string) =
let mutable t = Empty
let mutable prevWasSlash = false
for c in line do
if t = Empty && Char.IsWhiteSpace c then
prevWasSlash <- false
ignore()
elif t = Empty && c = '/' then
if prevWasSlash then
t <- Comment
else prevWasSlash <- true
else t <- Code
t
let getStats filePath =
let lineTypes = File.ReadLines(filePath) |> Seq.map (inspectLine) |> Seq.toList
{
Ext = FileInfo(filePath).Extension
Lines = lineTypes |> List.length
LoC = lineTypes |> List.filter (fun x -> x = Code) |> List.length
CommentLines = lineTypes |> List.filter (fun x -> x = Comment) |> List.length
}
module Result
let map2 mapping r1 r2 =
match (r1,r2) with
| (Ok x1, Ok x2) -> mapping x1 x2 |> Ok
| (Error e, _) -> Error e
| (_, Error e) -> Error e
open System
let private ticks (dt:DateTimeOffset) = dt.UtcTicks
// https://math.stackexchange.com/questions/914823/shift-numbers-into-a-different-range/914843#914843?newreg=7a69752f1d4a4a0d8cb7ab3b0b475c0e
let inline private transform a b c d t =
//printfn "%f + ((%f - %f)/(%f - %f)) * (%f - %f)" c d c b a t a
c + ( ( (d - c) / (b - a) ) * (t - a) )
let inline shift toMin toMax fromMin fromMax value =
transform (fromMin |> double) (fromMax |> double) (toMin |> double) (toMax |> double) (value |> double)
let shiftTo100 fromMin fromMax value = shift 1 100 fromMin fromMax value |> int
let shiftTo100L fromMin fromMax value = shift 1L 100L fromMin fromMax value |> int64
//let shiftP = shift100L 210L 2000L
// Only meaningful for codebase with consistent work going on.
// If the last thing worked on was complex but a long time ago, it would still show up as meaningful.
let calculateCoeffiecient startDt endDt nowDt =
// linear but should possibly be exponential
let now = nowDt |> ticks
let start = startDt |> ticks
let finish = endDt |> ticks
shiftTo100L start finish now
// let percentage = (now - start) / (finish - start)
// percentage * (100L - 1L) + 1L
open System
let split (sep:string []) (s:string) = s.Split(sep, StringSplitOptions.RemoveEmptyEntries)
let splitLines s = split [|Environment.NewLine|] s
let startsWith (value:string) (s:string) = s.StartsWith(value)
let sub start len (s:string) = s.Substring(start, len)
let join<'a> (sep:string) (xs:'a seq) = String.Join(sep, xs)
@dburriss
Copy link
Author

dburriss commented Oct 17, 2020

Requirements: latest 3.1 .NET Core (or higher)
Usage: dotnet fsi gitory.fsx '/path/to/your/git/repository' './' '.fs,.cs'

  • dotnet - the don't Cli
  • fsi - fsharp interactive scripting
  • gitory.fsx - the FSharp script to execute
  • Path to your git repository
  • Path to a specific folder to look at in the repository eg here its the root
  • List of file extensions you want to be included in the scan - eg here it is fsharp and csharp files

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment