Skip to content

Instantly share code, notes, and snippets.

@ecapuano
Last active December 19, 2024 02:36
Show Gist options
  • Save ecapuano/ac7c8b37e4723f872e6f56076e8e694c to your computer and use it in GitHub Desktop.
Save ecapuano/ac7c8b37e4723f872e6f56076e8e694c to your computer and use it in GitHub Desktop.
interactive tutorial to better understand the difference between static hashes and fuzzy hashes
#################################
# Script: HashMorpher.ps1
# Author: Eric Capuano
# Source: https://blog.ecapuano.com/p/the-role-of-fuzzy-hashes-in-security
# Social: https://bsky.app/profile/eric.zip
######### What is this? #########
#
# This script is an interactive tutorial to better understand the difference
# between static hashes and fuzzy hashes
#
######### What does it do? ######
#
# It does absolutely nothing sketchy or harmful. It does, however, download an open source
# fuzzy hash tool called "ssdeep" to the directory it is run from.
# Learn more: https://github.com/ssdeep-project/ssdeep
#
# The script makes 9 copies of itself, slightly modifying each copy with a random GUID
# then compares the similarities of the copies via hashing revealing the difference between
# static hashes, and fuzzy hashes.
#
#################################
# Define the path to the script itself
$scriptPath = $MyInvocation.MyCommand.Path
# Define paths for ssdeep executable and zip file
$ssdeepPath = ".\ssdeep-2.14.1\ssdeep.exe"
$ssdeepZip = ".\ssdeep-2.14.1-win32-binary.zip"
$ssdeepUri = "https://github.com/ssdeep-project/ssdeep/releases/download/release-2.14.1/ssdeep-2.14.1-win32-binary.zip"
$ssdeepHashFile = "ssdeep-hashes.txt"
$csvPath = "all-hashes.csv"
# Cleanup any leftovers from previous runs
if (Test-Path $csvPath) {
Remove-Item -Path $csvPath -Force
}
if (Test-Path $ssdeepHashFile) {
Remove-Item -Path $ssdeepHashFile -Force
}
Get-ChildItem -Path . -Filter "HashMorpher_Copy*" -File | Remove-Item -Force
# End of cleanup
# Function to calculate static hashes
function Get-Hashes {
param (
[string]$filePath
)
$hashes = @()
@("MD5", "SHA1", "SHA256") | ForEach-Object {
$hash = Get-FileHash -Path $filePath -Algorithm $_
$hashes += [PSCustomObject]@{
Algorithm = $_
Hash = $hash.Hash
}
}
return $hashes
}
# Function to calculate SSDEEP hashes
function Get-SSDeepHash {
param (
[string]$filePath
)
if (-Not (Test-Path $ssdeepPath)) {
Write-Error "SSDEEP executable not found at $ssdeepPath."
return
}
$ssdeepOutput = & $ssdeepPath -s $filePath 2>&1
if ($LASTEXITCODE -ne 0) {
Write-Error "SSDEEP encountered an error: $ssdeepOutput"
return
}
# Parse and return the SSDEEP hash (last line of output after the header line)
$hashLine = ($ssdeepOutput -split "`n" | Where-Object { $_ -notmatch "^ssdeep," })
return $hashLine -join ""
}
# Function to log all hashes to a CSV file
function Log-HashesToCSV {
param (
[string]$filePath,
[string]$csvPath,
[int]$iteration
)
$hashes = Get-Hashes -filePath $filePath
$ssdeepHash = Get-SSDeepHash -filePath $filePath
$csvEntry = [PSCustomObject]@{
Iteration = $iteration
MD5 = ($hashes | Where-Object { $_.Algorithm -eq "MD5" }).Hash
SHA1 = ($hashes | Where-Object { $_.Algorithm -eq "SHA1" }).Hash
SHA256 = ($hashes | Where-Object { $_.Algorithm -eq "SHA256" }).Hash
SSDEEP = $ssdeepHash
}
if (-Not (Test-Path $csvPath)) {
$csvEntry | Export-Csv -Path $csvPath -NoTypeInformation -Force
} else {
$csvEntry | Export-Csv -Path $csvPath -NoTypeInformation -Append
}
}
# Function to append a GUID to the bottom of copied versions of the script
function Append-GUID {
param (
[string]$filePath
)
# Read the current script content
$scriptContent = Get-Content -Path $filePath -Raw
$scriptLines = $scriptContent -split "`n"
# Append a new GUID
$scriptLines += "#" + [guid]::NewGuid().ToString()
# Write updated content back to the file
Set-Content -Path $filePath -Value ($scriptLines -join "`n")
}
# Ensure SSDEEP is available
if (-Not (Test-Path $ssdeepPath)) {
Write-Output "SSDEEP not found. Downloading and extracting..."
Invoke-WebRequest -Uri $ssdeepUri -OutFile $ssdeepZip
Expand-Archive -Path $ssdeepZip -DestinationPath .\ -Force
Remove-Item $ssdeepZip
Write-Output "SSDEEP installed successfully."
}
# Function to log SSDEEP hash to a text file
function Log-SSDeepHashToFile {
param (
[string]$filePath,
[string]$hashFilePath
)
$ssdeepHash = Get-SSDeepHash -filePath $filePath
if (-Not $ssdeepHash) {
Write-Error "Failed to generate SSDEEP hash for $filePath."
return
}
# Trim the SSDEEP hash to remove any unwanted spaces or hidden characters
$ssdeepHash = $ssdeepHash.Trim()
# Check if the hash file exists
if (-Not (Test-Path $hashFilePath)) {
# Write the header and hash to a new file
@("ssdeep,1.1--blocksize:hash:hash,filename", $ssdeepHash) | Out-File -FilePath $hashFilePath -Encoding ASCII
} else {
# Append the new hash to the existing file
$ssdeepHash | Out-File -FilePath $hashFilePath -Encoding ASCII -Append
}
}
# Function to run SSDEEP matching
function Run-SSDeepMatching {
param (
[string]$hashFilePath,
[string]$targetFile
)
if (-Not (Test-Path $ssdeepPath)) {
Write-Error "SSDEEP executable not found at $ssdeepPath."
return
}
# Execute ssdeep matching command
$command = "`"$ssdeepPath`" -l -m `"$hashFilePath`" `"$targetFile`""
Write-Host "Running: $command"
Write-Host
& $ssdeepPath -l -m $hashFilePath $targetFile
}
# Log hashes for the original script
Log-HashesToCSV -filePath $scriptPath -csvPath $csvPath -iteration 0
Clear
Write-Host "Hello! This script will demonstrate the difference between static and fuzzy hashes to identify similarties in different files." -ForegroundColor Yellow
Write-Host
Write-Host "The script will copy itself 9 times, modifying each copy slightly, then allow you to compare the various hashes of each file." -ForegroundColor Yellow
Read-Host "Press Enter to continue"
# Create 9 copies of the script, appending a GUID to each and logging their hashes
Write-Host "Copying this script 9 times, slightly modifying each copy for comparison." -ForegroundColor Yellow
for ($i = 1; $i -le 9; $i++) {
$newFilePath = "HashMorpher_Copy$i.ps1"
Copy-Item -Path $scriptPath -Destination $newFilePath -Force
Append-GUID -filePath $newFilePath
Log-HashesToCSV -filePath $newFilePath -csvPath $csvPath -iteration $i
Log-SSDeepHashToFile -filePath $newFilePath -hashFilePath $ssdeepHashFile
Write-Output "Created $newFilePath"
}
Write-Host
Write-Host "To see all 10 of the static hashes, press Enter" -ForegroundColor Yellow
Read-Host "Press Enter to continue"
Clear
Write-Host
Write-Host "Iteration '0' is the original script, the rest are the copied & modified versions." -ForegroundColor Yellow
Write-Host "Remember, these are the conventional 'static' hashes." -ForegroundColor Yellow
Import-Csv .\all-hashes.csv | Select-Object "Iteration","MD5","SHA256","SHA1" | Format-Table -AutoSize
Write-Host
Write-Host "Notice how the static hashes change entirely between iterations?" -ForegroundColor Yellow
Write-Host "Even the slightest change to the file generates a radically new set of hashes." -ForegroundColor Yellow
Write-Host
Write-Host "To see all 10 of the SSDEEP (fuzzy) hashes alongside the MD5 hashes, press Enter" -ForegroundColor Yellow
Read-Host "Press Enter to continue"
Clear
Write-Host
Write-Host "Iteration '0' is the original script, the rest are the copied & modified versions." -ForegroundColor Yellow
Write-Host "Compare the two hashes for each version of the file." -ForegroundColor Yellow
Import-Csv .\all-hashes.csv | Select-Object "Iteration", "MD5", @{Name="SSDEEP"; Expression={($_.SSDEEP -split ',')[0]}} |
Format-Table -AutoSize
Write-Host
Write-Host "Notice how similar the SSDEEP hashes are, despite the radical differences between the MD5 hashes?" -ForegroundColor Yellow
Write-Host "Now you see how fuzzy hashing is better for finding similar files!" -ForegroundColor Yellow
Write-Host
Write-Host "Let's take it a step further..." -ForegroundColor Yellow
# Run SSDEEP matching
Write-Host
Write-Host "To ask SSDEEP to compare the original script to the 9 modified versions to identify likeness, press Enter" -ForegroundColor Yellow
Read-Host "Press Enter to continue"
Clear
Run-SSDeepMatching -hashFilePath $ssdeepHashFile -targetFile $scriptPath
Write-Host
Write-Host "Above, you can see how closely all 9 copies of the file match the original according to SSDEEP." -ForegroundColor Yellow
Write-Host "The number in parenthesis (##) indicates the percentage match between each copy and the original." -ForegroundColor Yellow
Write-Host
Write-Host "Notice how SSDEEP identifies that all of the copies are extremely similar to the original?" -ForegroundColor Yellow
Write-Host "Pretty cool, huh? :)" -ForegroundColor Yellow
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment