Skip to content

Instantly share code, notes, and snippets.

@thnk2wn
Created May 12, 2019 01:18
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save thnk2wn/39b27cfe2a8719bcecca13725a086dd2 to your computer and use it in GitHub Desktop.
Save thnk2wn/39b27cfe2a8719bcecca13725a086dd2 to your computer and use it in GitHub Desktop.
[CmdletBinding()]
param(
[Parameter(Mandatory=$true)]
[ValidateScript({
if (-Not ($_ | Test-Path) ) {
throw "repos root path does not exist"
}
if (-Not ($_ | Test-Path -PathType Container) ) {
throw "Repos root must be a folder"
}
return $true
})]
[System.IO.DirectoryInfo]$reposRoot,
[Parameter(Mandatory=$true)]
[ValidateScript({
if (-Not ($_ | Test-Path) ) {
throw "search strings path does not exist"
}
if (-Not ($_ | Test-Path -PathType Leaf) ) {
throw "Search strings path must be a filename"
}
return $true
})]
[System.IO.FileInfo]$searchStringsFile,
[Parameter(Mandatory=$false)]
[String]$pathSpec,
[Parameter(Mandatory=$false)]
[String[]]$excludedRepos = @(),
[Parameter(Mandatory=$false)]
[int]$threads = 10
)
$pathSpec = $pathSpec.Replace("`"", "")
$searchFileInfo = (Get-Item $searchStringsFile)
$searchStringName = $searchFileInfo.Name.Replace($searchFileInfo.Extension, "")
$outputDir = (Join-Path $PSScriptRoot "output\$searchStringName")
if (!(Test-Path $outputDir)) {
New-Item -Path $outputDir -ItemType Directory | Out-Null
}
else {
Push-Location $outputDir
Get-ChildItem | ForEach-Object { Remove-Item $_ -Force -ErrorAction SilentlyContinue }
Pop-Location
}
$transcriptFile = (Join-Path $outputDir "$searchStringName.log")
try {
Start-transcript $transcriptFile -ErrorAction Stop}
catch {
Start-Transcript $transcriptFile
}
Write-Progress -Activity "Preprocessing" -Status "Reading search strings in $searchStringsFile"
$searchStrings = @(Get-Content -Path $searchStringsFile)
Write-Progress -Activity "Preprocessing" -Status "Scanning repos"
Push-Location $reposRoot
$repoDirs = Get-ChildItem -Recurse -Depth 2 -Force |
Where-Object { $_.Mode -match "h" -and $_.FullName -like "*\.git"} |
ForEach-Object { $_.Parent.FullName } |
Where-Object { $excludedRepos -notcontains (Get-Item $_).Name }
Write-Progress -Activity "Preprocessing" -Completed
Write-Progress -Id 1 -Activity "Searching $($repoDirs.Length) repos for $($searchStrings.Length) terms" `
-Status "Walking repo directories and running git grep"
$sw = [Diagnostics.Stopwatch]::StartNew()
$found = @{}
$repoIndex = 0
$pathSpecFinal = ""
if ($pathSpec) {
$pathSpecFinal = " -- $pathSpec"
}
$repoDirs | ForEach-Object {
$repoDir = $_
$repoShortName = (Get-Item $repoDir).Name
$repoIndex = $repoIndex + 1
Write-Progress -Id 2 -ParentId 1 -Activity "Searching repo $repoIndex of $($repoDirs.Length) $repoDir" `
-Status "Searching $repoDir" `
-PercentComplete ($repoIndex/$repoDirs.Length*100)
Push-Location $repoDir
$repoMatchesFilename = (Join-Path $outputDir "$repoShortName.txt")
"`n$repoDir ".PadRight(100, '*')
$searchStringIndex = 0
$searchStrings | Foreach-Object {
$searchFor = $_
Write-Progress -Id 3 -ParentId 2 -Activity "Searching $($searchStrings.Length) terms" `
-Status "Searching for $searchFor" `
-PercentComplete ($searchStringIndex/$searchStrings.Length*100)
# Seem to have to use Invoke-Expression to get the git pathspec right here.
# It'll work inline fine with -- '*.ext' '*.ext2' inline/hardcoded but as variable value isn't passed correctly
$output = $null
$expression = "`$output = git --no-pager grep -n -i --break --heading -A 0 -B 0 --threads $threads $searchFor $pathSpecFinal"
Invoke-Expression $expression
if ($output) {
if (!$found.ContainsKey($searchFor)) {
$found.Add($searchFor, 1)
}
else {
$found[$searchFor] = $found[$searchFor] + 1
}
Add-Content $repoMatchesFilename $output
"$($output.Trim())`n"
}
else {
$notFound += $searchFor
}
$searchStringIndex = $searchStringIndex + 1
}
Pop-Location
}
$sw.Stop()
"Total search time: $($sw.Elapsed)"
$found.GetEnumerator() | sort-object Name | Format-Table -Autosize
$csvFilename = (Join-Path $outputDir "$searchStringName-found.csv")
$found.GetEnumerator() | sort-object Name | Export-Csv $csvFilename -NoTypeInformation
"Found results written to $csvFilename"
$notFoundStrings = $searchStrings `
| Where-Object { $found.ContainsKey($_) -eq $false } `
| Sort-Object `
| ForEach-Object { [pscustomobject]@{ SearchTerm = $_ } }
$csvFilename = (Join-Path $outputDir "$searchStringName-not-found.csv")
$notFoundStrings | Export-Csv $csvFilename -NoTypeInformation
"Not found results written to $csvFilename"
Pop-Location
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment