Skip to content

Instantly share code, notes, and snippets.

@pr3sidentspence
Last active December 3, 2018 21:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pr3sidentspence/c0fc5689ab48e385e0174ac96d8a1ec6 to your computer and use it in GitHub Desktop.
Save pr3sidentspence/c0fc5689ab48e385e0174ac96d8a1ec6 to your computer and use it in GitHub Desktop.
This powershell script uses ImageMagick to find gaps between documents arranged vertically in a scanned image and crops out those images into individual files. Changed to fix BermanID output (no dash after B[0-9][A-Z]).
# A PowerShell script to use ImageMagick to find documents
# arranged vertically in scanned images
# and crop out each into separate images.
#
# Usage: ./findGapsBetweenScans.ps1 [-extension abc] [-threshold float ][-nosource] [-nodelete]
#
# Will analyze all images with extention abc (default is bmp)
# in current folder.
#
# if -nodeltee switch is used then script will skip deleting of temp files
# useful when testing as rowDev function can take a while, especially
# in folder with many large source images.
# if -nosource switch is used then script will skip analyzing source images
# and just work from any .txt files in temp folder. Use after a -nodelete.
# Using -nosource switch will automatically activate -nodelete.
# if -threshold is provided this changes what standard deviations of the pixes in a line
# is considered uniform enough to be considered as a gap.
# **** Note this version tries to extract BermanIDs from the names.
param (
[string]$extension = "bmp",
[float]$threshold = 0.1,
[switch]$nodelete = $false,
[switch]$nosource = $false,
[switch]$help = $false
)
if ($help) {
Write-Host Will analyze all images with extention abc (default is bmp)
Write-Host in current folder.
Write-Host
Write-Host if -nodeltee switch is used then script will skip deleting of temp files
Write-Host \t useful when testing as rowDev function can take a while, especially
Write-Host \t in folder with many large source images.
Write-Host if -nosource switch is used then script will skip analyzing source images
Write-Host \t and just work from any .txt files in temp folder. Use after a -nodelete.
Write-Host \t Using -nosource switch will automatically activate -nodelete.
Write-Host if -threshold is provided this changes what standard deviations of the pixes in a line
Write-Host \t is considered uniform enough to be considered as a gap.
$host.exit
}
# Always need an output folder:
New-Item -ItemType directory -Path $PSScriptRoot/output -Force
if ($nosource) {
$nodelete = $true
Write-Host nosource switch set, skipping $extension files and using temp .txt files.
Write-Host Skipping temp folder creation, because with nosource it must already exist.
Write-Host "If you get errors that temp\XYZ.txt cannot be found then temp .txt files do not exist"
Write-Host and you need to run the script without the -nosource flag to create them.
}
else {
Write-Host "Finding documents in scans in current folder with extension $extension."
Write-Host "Creating temp folder."
New-Item -ItemType directory -Path $PSScriptRoot/temp -Force
}
if ($nodelete) {
Write-Host nodelete switch is true. Will not delete temp .txt files after use.
}
function cropper {
$fileName = $gapInfo[0]
Write-Host The arraylist has $gapInfo.Count values.
# The arraylist has one entry for the filename, and two for each gap, there is always 1 more
# gap than there are cards. So starting with arraylist.Count (e.g. 9) take 1 off for the name (8)
# and 2 off for the extra gap info (6), divide by two to get number of cards.
$numCards = ($gapInfo.Count - 3) / 2
Write-Host That means the scan should have $numCards cards
for ($i = 0; $i -lt (2 * $numCards); $i = $i + 2) {
$overGapEnd = $gapInfo[($i + 2)] - 10
$overGapStart = $overGapEnd - $gapInfo[($i + 1)] + 10
$underGapEnd = $gapInfo[($i + 4)] - 10
$height = $underGapEnd - $overGapStart
#Write-Host crop from y $overGapStart to $underGapEnd for a height of $height
$bmpName = "$fileName.$extension"
$cropParams = "x$height+0+$overGapStart"
$position = $i / 2 + 1
if ($fileName -match ("(B.{1,3})\-([0-9][0-9][0-9])\-([0-9][0-9][0-9])\-([0-9][0-9][0-9])\-(.*)")) {
$keepPos = $position + 1
$replace = "`$1`$$keepPos-`$5"
$fileName = $fileName -replace ("(B.{1,3})\-([0-9][0-9][0-9])\-([0-9][0-9][0-9])\-([0-9][0-9][0-9])\-(.*)"), ($replace)
}
$output = "$PSScriptRoot/output/$fileName.jpg"
# Uncomment next line to see during process what it's sending to IM.
#Write-Host magick.exe $bmpName "-crop" $cropParams $output
& magick.exe $bmpName "-crop" $cropParams $output
if (!$nodelete) {
Remove-Item $PSScriptRoot\temp\$fileName.txt
}
}
}
function rowDev {
Get-ChildItem $PSScriptRoot -Filter *.$extension |
Foreach-Object {
$fName = $_.BaseName
Write-Host Getting line by line standard deviations for $_
& magick $_ -resize 10% -crop x1 +repage -format "%[fx:standard_deviation]\n" info: >$PSScriptRoot/temp/$fName.txt
}
}
function findGaps {
[int]$j = 0
[bool]$lowDev = 0
[bool]$gapEndend = 0
[bool]$inCard =
[int]$gapCount = 0
[System.Collections.ArrayList]$gapInfo = @()
Get-ChildItem $PSScriptRoot"\temp" -Filter *.txt |
Foreach-Object {
$j = 0
[int]$lineCounter = 0
$gapCount = 0
$gapInfo = @()
$gapInfo += $_.BaseName
$lines = [System.IO.File]::ReadLines( "$PSScriptRoot\temp\$_" )
foreach ( $line in $lines) {
[float]$stDev = $line
if ( $stDev -lt $threshold ) {
#Write-Host $_.BaseName line $lineCounter $stDev lessthan 0.015
$j++
}
elseif ($j -gt 9) {
$gapCount++
[int]$gapLength = (10 * $j)
$gapEndLine = (10 * $lineCounter)
#Write-Host In $_.BaseName the $gapCount"st/nd" gap, lasting $gapLength lines, ended at about $gapEndLine
$gapInfo += ($gapLength, $gapEndLine)
$j = 0
}
else {
$lowDev = 0
$inCard = 1
$j = 0
}
$gapEndend = 0
$lineCounter++
}
# last line is not recorded as a gap end because it probably does not have a high standard deviation
# So we have to add the last line of the scan as a gap end.
# We will assume it's a gap end and has a gapLength of at least 1.
$gapEndend = 1
$gapEndLine = (10 * $lineCounter)
$gapLength = (10 * $j)
$gapInfo += ($gapLength, $gapEndLine)
# Next 4 lines have useful debug info, uncomment if needed.
# Write-Host **********************************************************************************************
# for ($i = 0; $i -lt $gapInfo.Count; $i++) {
# Write-Host gapInfo index $i is $gapInfo[$i]
# }
cropper($gapInfo)
}
}
if (!$nosource) {
rowDev
}
findGaps
if (!$nodelete) {
Remove-Item -path $PSScriptRoot\temp -recurse
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment