Same as findDocsInScanAndSeparate.ps1 but handles files named with the BermanID schema. Updated to only use BermanID naming when it detects that the sources started that way.
# A PowerShell script to use ImageMagick to find documents
# arranged vertically in scanned images
# and crop out each into separate images.
# Usage: ./findGapsBetweenScans.ps1 [-extension abc] [-threshold float ][-nosource] [-nodelete]
# Will analyze all images with extention abc (default is bmp)
# in current folder.
# if -nodeltee switch is used then script will skip deleting of temp files
# useful when testing as rowDev function can take a while, especially
# in folder with many large source images.
# if -nosource switch is used then script will skip analyzing source images
# and just work from any .txt files in temp folder. Use after a -nodelete.
# Using -nosource switch will automatically activate -nodelete.
# if -threshold is provided this changes what standard deviations of the pixes in a line
# is considered uniform enough to be considered as a gap.
# **** Note this version tries to extract BermanIDs from the names.
param (
[string]$extension = "bmp",
[float]$threshold = 0.1,
[switch]$nodelete = $false,
[switch]$nosource = $false,
[switch]$help = $false
if ($help) {
Write-Host Will analyze all images with extention abc (default is bmp)
Write-Host in current folder.
Write-Host if -nodeltee switch is used then script will skip deleting of temp files
Write-Host \t useful when testing as rowDev function can take a while, especially
Write-Host \t in folder with many large source images.
Write-Host if -nosource switch is used then script will skip analyzing source images
Write-Host \t and just work from any .txt files in temp folder. Use after a -nodelete.
Write-Host \t Using -nosource switch will automatically activate -nodelete.
Write-Host if -threshold is provided this changes what standard deviations of the pixes in a line
Write-Host \t is considered uniform enough to be considered as a gap.
# Always need an output folder:
New-Item -ItemType directory -Path $PSScriptRoot/output -Force
if ($nosource) {
$nodelete = $true
Write-Host nosource switch set, skipping $extension files and using temp .txt files.
Write-Host Skipping temp folder creation, because with nosource it must already exist.
Write-Host "If you get errors that temp\XYZ.txt cannot be found then temp .txt files do not exist"
Write-Host and you need to run the script without the -nosource flag to create them.
else {
Write-Host "Finding documents in scans in current folder with extension $extension."
Write-Host "Creating temp folder."
New-Item -ItemType directory -Path $PSScriptRoot/temp -Force
if ($nodelete) {
Write-Host nodelete switch is true. Will not delete temp .txt files after use.
function cropper {
$fileName = $gapInfo[0]
Write-Host The arraylist has $gapInfo.Count values.
# The arraylist has one entry for the filename, and two for each gap, there is always 1 more
# gap than there are cards. So starting with arraylist.Count (e.g. 9) take 1 off for the name (8)
# and 2 off for the extra gap info (6), divide by two to get number of cards.
$numCards = ($gapInfo.Count - 3) / 2
Write-Host That means the scan should have $numCards cards
for ($i = 0; $i -lt (2 * $numCards); $i = $i + 2) {
$overGapEnd = $gapInfo[($i + 2)] - 10
$overGapStart = $overGapEnd - $gapInfo[($i + 1)] + 10
$underGapEnd = $gapInfo[($i + 4)] - 10
$height = $underGapEnd - $overGapStart
Write-Host crop from y $overGapStart to $underGapEnd for a height of $height
$bmpName = "$fileName.$extension"
$cropParams = "x$height+0+$overGapStart"
$position = $i / 2 + 1
if ($fileName -match ("(B.{1,3})\-([0-9][0-9][0-9])\-([0-9][0-9][0-9])\-([0-9][0-9][0-9])\-(.*)")) {
$keepPos = $position + 1
$replace = "`$1-`$$keepPos-`$5"
$fileName = $fileName -replace ("(B.{1,3})\-([0-9][0-9][0-9])\-([0-9][0-9][0-9])\-([0-9][0-9][0-9])\-(.*)"), ($replace)
$output = "$PSScriptRoot/output/$fileName.jpg"
# Uncomment next line to see during process what it's sending to IM.
#Write-Host magick.exe $bmpName "-crop" $cropParams $output
& magick.exe $bmpName "-crop" $cropParams $output
if (!$nodelete) {
Remove-Item $PSScriptRoot\temp\$fileName.txt
function rowDev {
Get-ChildItem $PSScriptRoot -Filter *.$extension |
Foreach-Object {
$fName = $_.BaseName
Write-Host Getting line by line standard deviations for $_
& magick $_ -resize 10% -crop x1 +repage -format "%[fx:standard_deviation]\n" info: >$PSScriptRoot/temp/$fName.txt
function findGaps {
[int]$j = 0
[bool]$lowDev = 0
[bool]$gapEndend = 0
[bool]$inCard =
[int]$gapCount = 0
[System.Collections.ArrayList]$gapInfo = @()
Get-ChildItem $PSScriptRoot"\temp" -Filter *.txt |
Foreach-Object {
$j = 0
[int]$lineCounter = 0
$gapCount = 0
$gapInfo = @()
$gapInfo += $_.BaseName
$lines = [System.IO.File]::ReadLines( "$PSScriptRoot\temp\$_" )
foreach ( $line in $lines) {
[float]$stDev = $line
if ( $stDev -lt $threshold ) {
#Write-Host $_.BaseName line $lineCounter $stDev lessthan 0.015
elseif ($j -gt 9) {
[int]$gapLength = (10 * $j)
$gapEndLine = (10 * $lineCounter)
#Write-Host In $_.BaseName the $gapCount"st/nd" gap, lasting $gapLength lines, ended at about $gapEndLine
$gapInfo += ($gapLength, $gapEndLine)
$j = 0
else {
$lowDev = 0
$inCard = 1
$j = 0
$gapEndend = 0
# last line is not recorded as a gap end because it probably does not have a high standard deviation
# So we have to add the last line of the scan as a gap end.
# We will assume it's a gap end and has a gapLength of at least 1.
$gapEndend = 1
$gapEndLine = (10 * $lineCounter)
$gapLength = (10 * $j)
$gapInfo += ($gapLength, $gapEndLine)
# Next 4 lines have useful debug info, uncomment if needed.
# Write-Host **********************************************************************************************
# for ($i = 0; $i -lt $gapInfo.Count; $i++) {
# Write-Host gapInfo index $i is $gapInfo[$i]
# }
if (!$nosource) {
if (!$nodelete) {
Remove-Item -path $PSScriptRoot\temp -recurse
