Skip to content

Instantly share code, notes, and snippets.

@cchamberlain
Last active February 18, 2024 02:40
Show Gist options
  • Star 33 You must be signed in to star a gist
  • Fork 10 You must be signed in to fork a gist
  • Save cchamberlain/883959151aa1162e73f1 to your computer and use it in GitHub Desktop.
Save cchamberlain/883959151aa1162e73f1 to your computer and use it in GitHub Desktop.
Recursively diffs 2 directories (files and contents) using MD5 hashes - Includes validation for paths and optional summary export. Requires PowerShell 3.0 or later.
#########################################################################
### USAGE: rdiff path/to/left,path/to/right [-s path/to/summary/dir] ###
### ADD LOCATION OF THIS SCRIPT TO PATH ###
#########################################################################
[CmdletBinding()]
param (
[parameter(HelpMessage="Stores the execution working directory.")]
[string]$ExecutionDirectory=$PWD,
[parameter(Position=0,HelpMessage="Compare two directories recursively for differences.")]
[alias("c")]
[string[]]$Compare,
[parameter(HelpMessage="Export a summary to path.")]
[alias("s")]
[string]$ExportSummary
)
### FUNCTION DEFINITIONS ###
# SETS WORKING DIRECTORY FOR .NET #
function SetWorkDir($PathName, $TestPath) {
$AbsPath = NormalizePath $PathName $TestPath
Set-Location $AbsPath
[System.IO.Directory]::SetCurrentDirectory($AbsPath)
}
# RESTORES THE EXECUTION WORKING DIRECTORY AND EXITS #
function SafeExit() {
SetWorkDir /path/to/execution/directory $ExecutionDirectory
Exit
}
function Print {
[CmdletBinding()]
param (
[parameter(Mandatory=$TRUE,Position=0,HelpMessage="Message to print.")]
[string]$Message,
[parameter(HelpMessage="Specifies a success.")]
[alias("s")]
[switch]$SuccessFlag,
[parameter(HelpMessage="Specifies a warning.")]
[alias("w")]
[switch]$WarningFlag,
[parameter(HelpMessage="Specifies an error.")]
[alias("e")]
[switch]$ErrorFlag,
[parameter(HelpMessage="Specifies a fatal error.")]
[alias("f")]
[switch]$FatalFlag,
[parameter(HelpMessage="Specifies a info message.")]
[alias("i")]
[switch]$InfoFlag = !$SuccessFlag -and !$WarningFlag -and !$ErrorFlag -and !$FatalFlag,
[parameter(HelpMessage="Specifies blank lines to print before.")]
[alias("b")]
[int]$LinesBefore=0,
[parameter(HelpMessage="Specifies blank lines to print after.")]
[alias("a")]
[int]$LinesAfter=0,
[parameter(HelpMessage="Specifies if program should exit.")]
[alias("x")]
[switch]$ExitAfter
)
PROCESS {
if($LinesBefore -ne 0) {
foreach($i in 0..$LinesBefore) { Write-Host "" }
}
if($InfoFlag) { Write-Host "$Message" }
if($SuccessFlag) { Write-Host "$Message" -ForegroundColor "Green" }
if($WarningFlag) { Write-Host "$Message" -ForegroundColor "Orange" }
if($ErrorFlag) { Write-Host "$Message" -ForegroundColor "Red" }
if($FatalFlag) { Write-Host "$Message" -ForegroundColor "Red" -BackgroundColor "Black" }
if($LinesAfter -ne 0) {
foreach($i in 0..$LinesAfter) { Write-Host "" }
}
if($ExitAfter) { SafeExit }
}
}
# VALIDATES STRING MIGHT BE A PATH #
function ValidatePath($PathName, $TestPath) {
If([string]::IsNullOrWhiteSpace($TestPath)) {
Print -x -f "$PathName is not a path"
}
}
# NORMALIZES RELATIVE OR ABSOLUTE PATH TO ABSOLUTE PATH #
function NormalizePath($PathName, $TestPath) {
ValidatePath "$PathName" "$TestPath"
$TestPath = [System.IO.Path]::Combine((pwd).Path, $TestPath)
$NormalizedPath = [System.IO.Path]::GetFullPath($TestPath)
return $NormalizedPath
}
# VALIDATES STRING MIGHT BE A PATH AND RETURNS ABSOLUTE PATH #
function ResolvePath($PathName, $TestPath) {
ValidatePath "$PathName" "$TestPath"
$ResolvedPath = NormalizePath $PathName $TestPath
return $ResolvedPath
}
# VALIDATES STRING RESOLVES TO A PATH AND RETURNS ABSOLUTE PATH #
function RequirePath($PathName, $TestPath, $PathType) {
ValidatePath $PathName $TestPath
If(!(Test-Path $TestPath -PathType $PathType)) {
Print -x -f "$PathName ($TestPath) does not exist as a $PathType"
}
$ResolvedPath = Resolve-Path $TestPath
return $ResolvedPath
}
# Like mkdir -p -> creates a directory recursively if it doesn't exist #
function MakeDirP {
[CmdletBinding()]
param (
[parameter(Mandatory=$TRUE,Position=0,HelpMessage="Path create.")]
[string]$Path
)
PROCESS {
New-Item -path $Path -itemtype Directory -force | Out-Null
}
}
# GETS ALL FILES IN A PATH RECURSIVELY #
function GetFiles {
[CmdletBinding()]
param (
[parameter(Mandatory=$TRUE,Position=0,HelpMessage="Path to get files for.")]
[string]$Path
)
PROCESS {
ls $Path -r | where { !$_.PSIsContainer }
}
}
# GETS ALL FILES WITH CALCULATED HASH PROPERTY RELATIVE TO A ROOT DIRECTORY RECURSIVELY #
# RETURNS LIST OF @{RelativePath, Hash, FullName}
function GetFilesWithHash {
[CmdletBinding()]
param (
[parameter(Mandatory=$TRUE,Position=0,HelpMessage="Path to get directories for.")]
[string]$Path,
[parameter(HelpMessage="The hash algorithm to use.")]
[string]$Algorithm="MD5"
)
PROCESS {
$OriginalPath = $PWD
SetWorkDir path/to/diff $Path
GetFiles $Path | select @{N="RelativePath";E={$_.FullName | Resolve-Path -Relative}},
@{N="Hash";E={(Get-FileHash $_.FullName -Algorithm $Algorithm | select Hash).Hash}},
FullName
SetWorkDir path/to/original $OriginalPath
}
}
# COMPARE TWO DIRECTORIES RECURSIVELY #
# RETURNS LIST OF @{RelativePath, Hash, FullName}
function DiffDirectories {
[CmdletBinding()]
param (
[parameter(Mandatory=$TRUE,Position=0,HelpMessage="Directory to compare left.")]
[alias("l")]
[string]$LeftPath,
[parameter(Mandatory=$TRUE,Position=1,HelpMessage="Directory to compare right.")]
[alias("r")]
[string]$RightPath
)
PROCESS {
$LeftHash = GetFilesWithHash $LeftPath
$RightHash = GetFilesWithHash $RightPath
diff -ReferenceObject $LeftHash -DifferenceObject $RightHash -Property RelativePath,Hash
}
}
### END FUNCTION DEFINITIONS ###
### PROGRAM LOGIC ###
if($Compare.length -ne 2) {
Print -x "Compare requires passing exactly 2 path parameters separated by comma, you passed $($Compare.length)." -f
}
Print "Comparing $($Compare[0]) to $($Compare[1])..." -a 1
$LeftPath = RequirePath path/to/left $Compare[0] container
$RightPath = RequirePath path/to/right $Compare[1] container
$Diff = DiffDirectories $LeftPath $RightPath
$LeftDiff = $Diff | where {$_.SideIndicator -eq "<="} | select RelativePath,Hash
$RightDiff = $Diff | where {$_.SideIndicator -eq "=>"} | select RelativePath,Hash
if($ExportSummary) {
$ExportSummary = ResolvePath path/to/summary/dir $ExportSummary
MakeDirP $ExportSummary
$SummaryPath = Join-Path $ExportSummary summary.txt
$LeftCsvPath = Join-Path $ExportSummary left.csv
$RightCsvPath = Join-Path $ExportSummary right.csv
$LeftMeasure = $LeftDiff | measure
$RightMeasure = $RightDiff | measure
"== DIFF SUMMARY ==" > $SummaryPath
"" >> $SummaryPath
"-- DIRECTORIES --" >> $SummaryPath
"`tLEFT -> $LeftPath" >> $SummaryPath
"`tRIGHT -> $RightPath" >> $SummaryPath
"" >> $SummaryPath
"-- DIFF COUNT --" >> $SummaryPath
"`tLEFT -> $($LeftMeasure.Count)" >> $SummaryPath
"`tRIGHT -> $($RightMeasure.Count)" >> $SummaryPath
"" >> $SummaryPath
$Diff | Format-Table >> $SummaryPath
$LeftDiff | Export-Csv $LeftCsvPath -f
$RightDiff | Export-Csv $RightCsvPath -f
}
$Diff
SafeExit
@AnnieBrownTX
Copy link

Hi,

I am a newbie, so please bear with me...

How do I use this? I have already copied thousands of folders from one directory to another, and merged many along the way. I would now like to check the new and old folders, to make sure that nothing was missed in the copy. So I can't do it by file count because the new folders may have had files 50 folders placed in them. I would like to make sure the new folder has everything in one of the old folders, so I can delete the old folders. I would also like the results to exported, so I can keep track of it all.

Please help! :-)
Annie

@cchamberlain
Copy link
Author

@AnnieBrownTX - I'm sure its probably too late now but I didn't receive any notification.

I wrote this to compare two directories, you basically just need to put it in a directory in your environment path or powershell profile scripts directory, or call it from a relative directory.

USAGE: rdiff path/to/left,path/to/right [-s path/to/summary/dir]

If you pass the -s path option, it will export a summary file and two csv's (left and right) that show files found that had differences on each side. It uses MD5 hashes of the files contents for the comparison so you know they are exact matches or if diffs were found.

@adanecito
Copy link

I am trying to use this but get the following error:
Any ideas how to fix? Or just a simple file name & date compare would be fine since it looks like this has an issue with some files.

Compare-Object : Cannot bind argument to parameter 'DifferenceObject' because it is null.
At C:\Users\tane461\test.ps1:183 char:55

  • ... diff -ReferenceObject $LeftHash -DifferenceObject $RightHash -Proper ...
  •                                                    ~~~~~~~~~~
    
    • CategoryInfo : InvalidData: (:) [Compare-Object], ParameterBindingValidationException
    • FullyQualifiedErrorId : ParameterArgumentValidationErrorNullNotAllowed,Microsoft.PowerShell.Commands.CompareObje
      ctCommand

@adanecito
Copy link

Ok the issue I ran into is if you have no files in the right side directory it gives this error. I was comparing the left side to the right side and the left side has 5 files and the right side directory has none. I also notice when I have one file in the right side directory it does not list it on the resulting summary.

@giulioturetta
Copy link

I guess this script only works as expected on PowerShell version 4.0 or later (not 3.0 as stated in description) so Get-FileHash cmdlet is supported otherwise hash value of files won't be computed.

@dotdoom
Copy link

dotdoom commented Apr 14, 2019

A PowerShell noob here. I have this script that does something similar. Of course it's not as powerful as the one in the gist. Used it on AppVeyor CI to detect where cache changes are coming from.

$LHS = Get-ChildItem –Path C:\path1 -Recurse | foreach {Get-FileHash –Path $_.FullName}
$RHS = Get-ChildItem –Path C:\path2 -Recurse | foreach {Get-FileHash –Path $_.FullName}
Compare-Object -ReferenceObject $LHS -DifferenceObject $RHS -Property hash -PassThru | Format-List

Output for one changed file (I think it may be fine tuned to print a nice table instead):

Algorithm     : SHA256
Hash          : A82DA24A8B9AE319CC8580A75EC655E5480CB45D4F316ADDCC870AB1372AB71D
Path          : C:\path1\bin\flutter
SideIndicator : =>

Algorithm     : SHA256
Hash          : 46D675B69D58B7B50C1C3214BAFDD69AD952701D7A2881971B7C7C7B45B58A72
Path          : C:\path2\bin\flutter
SideIndicator : <=

@jceddy
Copy link

jceddy commented Apr 19, 2019

When I pass in UNC paths to this, I get "The given path's format is not supported." error in GetFullPath(), but if I pass either of the paths to GetFullPath() directly, it works fine...any idea what the issue could be?

Here is an example:

PS C:\Users\UserG> c:\temp\rdiff.ps1 \\machine.net.path.local\D\Staging\test\dev\ApplicationEF\TS,\\machine.net.path.local\D\webVisionCore\root\test\dev\ApplicationEF
Comparing \\machine.net.path.local\D\Staging\test\dev\ApplicationEF\TS to \\machine.net.path.local\D\webVisionCore\root\test\dev\ApplicationEF...

Exception calling "GetFullPath" with "1" argument(s): "The given path's format is not supported."
At C:\temp\rdiff.ps1:99 char:3
+   $NormalizedPath = [System.IO.Path]::GetFullPath($TestPath)
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    + CategoryInfo          : NotSpecified: (:) [], MethodInvocationException
    + FullyQualifiedErrorId : NotSupportedException

Set-Location : Cannot process argument because the value of argument "path" is null. Change the value of argument
"path" to a non-null value.
At C:\temp\rdiff.ps1:24 char:3
+   Set-Location $AbsPath
+   ~~~~~~~~~~~~~~~~~~~~~
    + CategoryInfo          : InvalidArgument: (:) [Set-Location], PSArgumentNullException
    + FullyQualifiedErrorId : ArgumentNull,Microsoft.PowerShell.Commands.SetLocationCommand

Exception calling "SetCurrentDirectory" with "1" argument(s): "Path cannot be the empty string or all whitespace."
At C:\temp\rdiff.ps1:25 char:3
+   [System.IO.Directory]::SetCurrentDirectory($AbsPath)
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    + CategoryInfo          : NotSpecified: (:) [], MethodInvocationException
    + FullyQualifiedErrorId : ArgumentException

PS C:\Users\UserG> [System.IO.Path]::GetFullPath("\\machine.net.path.local\D\Staging\test\dev\ApplicationEF\TS")
\\machine.net.path.local\D\Staging\test\dev\ApplicationEF\TS
PS C:\Users\UserG> [System.IO.Path]::GetFullPath("\\machine.net.path.local\D\webVisionCore\root\test\dev\ApplicationEF")
\\machine.net.path.local\D\webVisionCore\root\test\dev\ApplicationEF
PS C:\Users\UserG>

Edit: Ah, the issue seems to be that Resolve-Path sticks "Microsoft.PowerShell.Core\FileSystem::" on the front of UNC paths.

@viktor
Copy link

viktor commented Jan 9, 2020

Pretty good script. (y)

@jeejee123
Copy link

This is really nice Powershell script. Good work!

@trobinson794
Copy link

Very helpful, thanks for posting this!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment