Skip to content

Instantly share code, notes, and snippets.

@pr3sidentspence
Last active December 3, 2018 18:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pr3sidentspence/dcb0ef116222fd17ff39fa10e933a663 to your computer and use it in GitHub Desktop.
Save pr3sidentspence/dcb0ef116222fd17ff39fa10e933a663 to your computer and use it in GitHub Desktop.
Same as findDocsInScanAndSeparate.ps1 but handles files named with the BermanID schema. Updated to only use BermanID naming when it detects that the sources started that way.
# A PowerShell script to use ImageMagick to find documents
# arranged vertically in scanned images
# and crop out each into separate images.
#
# Usage: ./findGapsBetweenScans.ps1 [-extension abc] [-threshold float ][-nosource] [-nodelete]
#
# Will analyze all images with extention abc (default is bmp)
# in current folder.
#
# if -nodeltee switch is used then script will skip deleting of temp files
# useful when testing as rowDev function can take a while, especially
# in folder with many large source images.
# if -nosource switch is used then script will skip analyzing source images
# and just work from any .txt files in temp folder. Use after a -nodelete.
# Using -nosource switch will automatically activate -nodelete.
# if -threshold is provided this changes what standard deviations of the pixes in a line
# is considered uniform enough to be considered as a gap.
# **** Note this version tries to extract BermanIDs from the names.
param (
[string]$extension = "bmp",
[float]$threshold = 0.1,
[switch]$nodelete = $false,
[switch]$nosource = $false,
[switch]$help = $false
)
if ($help) {
Write-Host Will analyze all images with extention abc (default is bmp)
Write-Host in current folder.
Write-Host
Write-Host if -nodeltee switch is used then script will skip deleting of temp files
Write-Host \t useful when testing as rowDev function can take a while, especially
Write-Host \t in folder with many large source images.
Write-Host if -nosource switch is used then script will skip analyzing source images
Write-Host \t and just work from any .txt files in temp folder. Use after a -nodelete.
Write-Host \t Using -nosource switch will automatically activate -nodelete.
Write-Host if -threshold is provided this changes what standard deviations of the pixes in a line
Write-Host \t is considered uniform enough to be considered as a gap.
$host.exit
}
# Always need an output folder:
New-Item -ItemType directory -Path $PSScriptRoot/output -Force
if ($nosource) {
$nodelete = $true
Write-Host nosource switch set, skipping $extension files and using temp .txt files.
Write-Host Skipping temp folder creation, because with nosource it must already exist.
Write-Host "If you get errors that temp\XYZ.txt cannot be found then temp .txt files do not exist"
Write-Host and you need to run the script without the -nosource flag to create them.
}
else {
Write-Host "Finding documents in scans in current folder with extension $extension."
Write-Host "Creating temp folder."
New-Item -ItemType directory -Path $PSScriptRoot/temp -Force
}
if ($nodelete) {
Write-Host nodelete switch is true. Will not delete temp .txt files after use.
}
function cropper {
$fileName = $gapInfo[0]
Write-Host The arraylist has $gapInfo.Count values.
# The arraylist has one entry for the filename, and two for each gap, there is always 1 more
# gap than there are cards. So starting with arraylist.Count (e.g. 9) take 1 off for the name (8)
# and 2 off for the extra gap info (6), divide by two to get number of cards.
$numCards = ($gapInfo.Count - 3) / 2
Write-Host That means the scan should have $numCards cards
for ($i = 0; $i -lt (2 * $numCards); $i = $i + 2) {
$overGapEnd = $gapInfo[($i + 2)] - 10
$overGapStart = $overGapEnd - $gapInfo[($i + 1)] + 10
$underGapEnd = $gapInfo[($i + 4)] - 10
$height = $underGapEnd - $overGapStart
Write-Host crop from y $overGapStart to $underGapEnd for a height of $height
$bmpName = "$fileName.$extension"
$cropParams = "x$height+0+$overGapStart"
$position = $i / 2 + 1
if ($fileName -match ("(B.{1,3})\-([0-9][0-9][0-9])\-([0-9][0-9][0-9])\-([0-9][0-9][0-9])\-(.*)")) {
$keepPos = $position + 1
$replace = "`$1-`$$keepPos-`$5"
$fileName = $fileName -replace ("(B.{1,3})\-([0-9][0-9][0-9])\-([0-9][0-9][0-9])\-([0-9][0-9][0-9])\-(.*)"), ($replace)
}
$output = "$PSScriptRoot/output/$fileName.jpg"
# Uncomment next line to see during process what it's sending to IM.
#Write-Host magick.exe $bmpName "-crop" $cropParams $output
& magick.exe $bmpName "-crop" $cropParams $output
if (!$nodelete) {
Remove-Item $PSScriptRoot\temp\$fileName.txt
}
}
}
function rowDev {
Get-ChildItem $PSScriptRoot -Filter *.$extension |
Foreach-Object {
$fName = $_.BaseName
Write-Host Getting line by line standard deviations for $_
& magick $_ -resize 10% -crop x1 +repage -format "%[fx:standard_deviation]\n" info: >$PSScriptRoot/temp/$fName.txt
}
}
function findGaps {
[int]$j = 0
[bool]$lowDev = 0
[bool]$gapEndend = 0
[bool]$inCard =
[int]$gapCount = 0
[System.Collections.ArrayList]$gapInfo = @()
Get-ChildItem $PSScriptRoot"\temp" -Filter *.txt |
Foreach-Object {
$j = 0
[int]$lineCounter = 0
$gapCount = 0
$gapInfo = @()
$gapInfo += $_.BaseName
$lines = [System.IO.File]::ReadLines( "$PSScriptRoot\temp\$_" )
foreach ( $line in $lines) {
[float]$stDev = $line
if ( $stDev -lt $threshold ) {
#Write-Host $_.BaseName line $lineCounter $stDev lessthan 0.015
$j++
}
elseif ($j -gt 9) {
$gapCount++
[int]$gapLength = (10 * $j)
$gapEndLine = (10 * $lineCounter)
#Write-Host In $_.BaseName the $gapCount"st/nd" gap, lasting $gapLength lines, ended at about $gapEndLine
$gapInfo += ($gapLength, $gapEndLine)
$j = 0
}
else {
$lowDev = 0
$inCard = 1
$j = 0
}
$gapEndend = 0
$lineCounter++
}
# last line is not recorded as a gap end because it probably does not have a high standard deviation
# So we have to add the last line of the scan as a gap end.
# We will assume it's a gap end and has a gapLength of at least 1.
$gapEndend = 1
$gapEndLine = (10 * $lineCounter)
$gapLength = (10 * $j)
$gapInfo += ($gapLength, $gapEndLine)
# Next 4 lines have useful debug info, uncomment if needed.
# Write-Host **********************************************************************************************
# for ($i = 0; $i -lt $gapInfo.Count; $i++) {
# Write-Host gapInfo index $i is $gapInfo[$i]
# }
cropper($gapInfo)
}
}
if (!$nosource) {
rowDev
}
findGaps
if (!$nodelete) {
Remove-Item -path $PSScriptRoot\temp -recurse
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment