Skip to content

Instantly share code, notes, and snippets.

@shoogle
Last active October 14, 2018 03:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save shoogle/3dc1604e4930f07523728d3795a66ec8 to your computer and use it in GitHub Desktop.
Save shoogle/3dc1604e4930f07523728d3795a66ec8 to your computer and use it in GitHub Desktop.
Produce a PDF document from scanned images
#!/usr/bin/env bash
# pdf-from-images.sh is licensed under the MIT License (a.k.a. the "Expat
# License"), details of which are below. Comments and code contributions are
# welcome at <https://gist.github.com/shoogle/3dc1604e4930f07523728d3795a66ec8>
#
# MIT License
#
# Copyright (c) 2018 Peter Jonas
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
readonly output_page_width_mm=210
readonly output_page_height_mm=297
readonly output_page_margin_mm=5
readonly output_page_footer_mm=5
readonly text_height_mm=3 # must be smaller than footer
function print_help() {
cat <<EOF
pdf-from-images.sh - produce a PDF from scanned images prepared by Scan Taylor
Usage: pdf-from-images.sh title images [...]
Example: pdf-from-images.sh "My Scanned Document" scan-01.png scan-02.png
Input images are assumed to be monochrome and have no margins (i.e. tightly
cropped in Scan Taylor or similar program). The script outputs a PDF which has:
- One input image per page, with images scaled to fit A4 (no resampling).
- Title and page number displayed at the bottom of each page.
- Small ${output_page_margin_mm}mm margins for safe printing.
- Maxium lossless compression to optimize file size.
- PDF filename set to title passed in on the command line.
EOF
}
function image_info() {
local image_file="$1" info
info="$(identify -units PixelsPerCentimeter -format "%wx%h %xx%y\n" "$1")"
size="${info% *}"
width="${size%x*}"
height="${size#*x}"
density="${info#* }"
densityX="${density%x*}"
densityY="${density#*x}"
}
function calculations() {
readonly output_area_width=$((output_page_width_mm-2*output_page_margin_mm))
readonly output_area_height=$((output_page_height_mm-2*output_page_margin_mm-output_page_footer_mm))
max_width=0
max_height=0
for img in "$@"; do
image_info "${img}"
if ((width > max_width)); then max_width=${width}; fi
if ((height > max_height)); then max_height=${height}; fi
done
readonly max_width max_height
output_densityX="$(bc -l <<<"${max_width}0/${output_area_width}")"
output_densityY="$(bc -l <<<"${max_height}0/${output_area_height}")"
if [[ "$(bc -l <<<"${output_densityX} > ${output_densityY}")" == "1" ]]; then
output_density="${output_densityX}"
else
output_density="${output_densityY}"
fi
readonly output_page_margin_px="$(bc <<< "${output_density} * ${output_page_margin_mm} / 10")"
readonly output_page_footer_px="$(bc <<< "${output_density} * ${output_page_footer_mm} / 10")"
readonly text_height_px="$(bc <<< "${output_density} * ${text_height_mm} / 10")"
readonly output_page_width_px=$((max_width+2*output_page_margin_px))
readonly output_page_height_px=$((max_height+2*output_page_margin_px+output_page_footer_px))
# Recalculate density to adjust for rounding errors in height and width
output_densityX="$(bc -l <<<"${output_page_width_px}0/${output_page_width_mm}")"
output_densityY="$(bc -l <<<"${output_page_height_px}0/${output_page_height_mm}")"
if [[ "$(bc -l <<<"${output_densityX} > ${output_densityY}")" == "1" ]]; then
output_density="${output_densityX}"
else
output_density="${output_densityY}"
fi
readonly output_density output_densityX output_densityY
}
function image_convert() {
local in="$1" out="$2" i="$3" n="$4" title="$5" label
label="${title} - $i of $n"
try convert -units PixelsPerCentimeter -background White \
"${in}" -density "${output_density}" \
-gravity Center -extent "${max_width}x${max_height}" \
-gravity South -splice "0x${output_page_footer_px}" \
-fill Black -size "x${text_height_px}" label:"${label}" -composite \
-gravity Center -extent "${output_page_width_px}x${output_page_height_px}" \
-type BiLevel "${out}"
}
# sometimes imagemagick fails to allocate memory, but works if we try again...
function try() {
# keep trying to execute a failing command until it succeeds or we get bored
local i=0 n=5 command="$1"
while ! "$@"; do
((i++))
if ((i < n)); then
echo >&2 "$0: ${command} failed $i times, trying again... (max $n times)"
sleep 0.1 # wait before trying again, maybe free some memory
else
echo >&2 "$0: ${command} failed $n times. Giving up now."
exit 1
fi
done
}
function main() {
if [[ "$1" =~ ^-+h ]]; then
print_help
exit
fi
local title="$1" tmp="$(mktemp -d)"
trap "rm -rf \"${tmp}\"" EXIT
shift
echo >&2 "Computing page dimensions..."
local images=("$@") i=0
calculations "${images[@]}"
echo >&2 "Processing images..."
for img in "${images[@]}"; do
local out="${tmp}/$(basename "${img%.*}.png")"
output[$i]="${out}"
((i++))
string="$i of ${#images[@]}"
echo >&2 "${img} (${string})"
image_convert "${img}" "${out}" $i ${#images[@]} "${title}"
done
echo >&2 "Creating PDF..."
try convert "${output[@]}" -type BiLevel -compress Fax "${title}.pdf"
}
main "$@"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment