Skip to content

Instantly share code, notes, and snippets.

@paulirwin
Last active August 29, 2015 14:18
Show Gist options
  • Save paulirwin/aaf2da7983cc31a54576 to your computer and use it in GitHub Desktop.
Save paulirwin/aaf2da7983cc31a54576 to your computer and use it in GitHub Desktop.
Git repo most modified files analyzer
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace LensLib
{
class Program
{
static void Main(string[] args)
{
var analyzer = new RepoAnalyzer("...path to git repo...", "master");
var mostChangedFiles = analyzer.GetFilesSortedByMostChanges();
Console.WriteLine("Top 50 files with most changes:");
foreach (var file in mostChangedFiles.Take(50))
{
Console.WriteLine(file);
}
Console.ReadKey();
}
}
}
using LibGit2Sharp;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace LensLib
{
public class RepoAnalyzer
{
private readonly string _repoPath;
private readonly string _branchName;
private static readonly string[] _ignoreExtensions = new[] { "proj", "config", "sln", "scmp", "refactorlog" };
public RepoAnalyzer(string repoPath, string branchName)
{
_repoPath = repoPath;
_branchName = branchName;
}
public IEnumerable<string> GetFilesSortedByMostChanges()
{
using (var repo = new Repository(_repoPath))
{
return repo.Branches
.Where(i => i.Name == _branchName)
.SelectMany(i => i.Commits.Zip(i.Commits.Skip(1), Tuple.Create))
.AsParallel()
.Select(i => repo.Diff.Compare<TreeChanges>(i.Item1.Tree, i.Item2.Tree))
.ToList() // important: force enumeration so we can dispose of repo before part below is evaluated
.AsParallel()
.SelectMany(i => i.Modified)
.Select(i => i.Path)
.Where(i => !_ignoreExtensions.Any(j => i.EndsWith(j)))
.GroupBy(i => i, StringComparer.OrdinalIgnoreCase)
.Select(i => new { i.Key, Count = i.Count() })
.OrderByDescending(i => i.Count)
.Select(i => i.Key);
}
}
}
}
namespace LensLib.Functional
open FSharp.Collections.ParallelSeq
open LibGit2Sharp
[<AutoOpen>]
module public RepoAnalyzer =
let private ignoreExtensions = seq [ "proj"; "config"; "sln"; "scmp"; "refactorlog" ]
let GetFilesSortedByMostCommitChanges (repo:Repository) branchName =
repo.Branches
|> PSeq.filter (fun branch -> branch.Name = branchName)
|> PSeq.collect (fun branch -> branch.Commits)
|> Seq.pairwise
|> PSeq.map (fun (prev, next) -> repo.Diff.Compare<TreeChanges>(prev.Tree, next.Tree))
|> PSeq.collect (fun commit -> commit.Modified)
|> PSeq.filter (fun change -> not (ignoreExtensions |> Seq.exists (fun bad -> change.Path.EndsWith bad)))
|> PSeq.countBy (fun change -> change.Path)
|> PSeq.sortBy (fun (path, count) -> -count)
let GetDiffs (repo:Repository) branchName =
repo.Branches
|> Seq.filter (fun branch -> branch.Name = branchName)
|> Seq.collect (fun branch -> branch.Commits)
|> Seq.pairwise
|> Seq.map (fun (prev, next) -> repo.Diff.Compare<TreeChanges>(prev.Tree, next.Tree))
|> Seq.collect (fun diff -> diff.Modified)
|> Seq.filter (fun change -> not (ignoreExtensions |> Seq.exists (fun bad -> change.Path.EndsWith bad)))
|> Seq.map (fun change -> (change, repo.Lookup change.Oid :?> Blob, repo.Lookup change.OldOid :?> Blob))
|> Seq.filter (fun (change, newobj, oldobj) -> newobj <> null && oldobj <> null)
|> Seq.map (fun (change, newobj, oldobj) -> (change, repo.Diff.Compare(oldobj, newobj)))
|> Seq.map (fun (change, changes) -> (change, changes.Patch))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment