Last active
October 14, 2018 03:20
-
-
Save shoogle/3dc1604e4930f07523728d3795a66ec8 to your computer and use it in GitHub Desktop.
Produce a PDF document from scanned images
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# pdf-from-images.sh is licensed under the MIT License (a.k.a. the "Expat | |
# License"), details of which are below. Comments and code contributions are | |
# welcome at <https://gist.github.com/shoogle/3dc1604e4930f07523728d3795a66ec8> | |
# | |
# MIT License | |
# | |
# Copyright (c) 2018 Peter Jonas | |
# | |
# Permission is hereby granted, free of charge, to any person obtaining a copy | |
# of this software and associated documentation files (the "Software"), to deal | |
# in the Software without restriction, including without limitation the rights | |
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
# copies of the Software, and to permit persons to whom the Software is | |
# furnished to do so, subject to the following conditions: | |
# | |
# The above copyright notice and this permission notice shall be included in all | |
# copies or substantial portions of the Software. | |
# | |
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
# SOFTWARE. | |
readonly output_page_width_mm=210 | |
readonly output_page_height_mm=297 | |
readonly output_page_margin_mm=5 | |
readonly output_page_footer_mm=5 | |
readonly text_height_mm=3 # must be smaller than footer | |
function print_help() { | |
cat <<EOF | |
pdf-from-images.sh - produce a PDF from scanned images prepared by Scan Taylor | |
Usage: pdf-from-images.sh title images [...] | |
Example: pdf-from-images.sh "My Scanned Document" scan-01.png scan-02.png | |
Input images are assumed to be monochrome and have no margins (i.e. tightly | |
cropped in Scan Taylor or similar program). The script outputs a PDF which has: | |
- One input image per page, with images scaled to fit A4 (no resampling). | |
- Title and page number displayed at the bottom of each page. | |
- Small ${output_page_margin_mm}mm margins for safe printing. | |
- Maxium lossless compression to optimize file size. | |
- PDF filename set to title passed in on the command line. | |
EOF | |
} | |
function image_info() { | |
local image_file="$1" info | |
info="$(identify -units PixelsPerCentimeter -format "%wx%h %xx%y\n" "$1")" | |
size="${info% *}" | |
width="${size%x*}" | |
height="${size#*x}" | |
density="${info#* }" | |
densityX="${density%x*}" | |
densityY="${density#*x}" | |
} | |
function calculations() { | |
readonly output_area_width=$((output_page_width_mm-2*output_page_margin_mm)) | |
readonly output_area_height=$((output_page_height_mm-2*output_page_margin_mm-output_page_footer_mm)) | |
max_width=0 | |
max_height=0 | |
for img in "$@"; do | |
image_info "${img}" | |
if ((width > max_width)); then max_width=${width}; fi | |
if ((height > max_height)); then max_height=${height}; fi | |
done | |
readonly max_width max_height | |
output_densityX="$(bc -l <<<"${max_width}0/${output_area_width}")" | |
output_densityY="$(bc -l <<<"${max_height}0/${output_area_height}")" | |
if [[ "$(bc -l <<<"${output_densityX} > ${output_densityY}")" == "1" ]]; then | |
output_density="${output_densityX}" | |
else | |
output_density="${output_densityY}" | |
fi | |
readonly output_page_margin_px="$(bc <<< "${output_density} * ${output_page_margin_mm} / 10")" | |
readonly output_page_footer_px="$(bc <<< "${output_density} * ${output_page_footer_mm} / 10")" | |
readonly text_height_px="$(bc <<< "${output_density} * ${text_height_mm} / 10")" | |
readonly output_page_width_px=$((max_width+2*output_page_margin_px)) | |
readonly output_page_height_px=$((max_height+2*output_page_margin_px+output_page_footer_px)) | |
# Recalculate density to adjust for rounding errors in height and width | |
output_densityX="$(bc -l <<<"${output_page_width_px}0/${output_page_width_mm}")" | |
output_densityY="$(bc -l <<<"${output_page_height_px}0/${output_page_height_mm}")" | |
if [[ "$(bc -l <<<"${output_densityX} > ${output_densityY}")" == "1" ]]; then | |
output_density="${output_densityX}" | |
else | |
output_density="${output_densityY}" | |
fi | |
readonly output_density output_densityX output_densityY | |
} | |
function image_convert() { | |
local in="$1" out="$2" i="$3" n="$4" title="$5" label | |
label="${title} - $i of $n" | |
try convert -units PixelsPerCentimeter -background White \ | |
"${in}" -density "${output_density}" \ | |
-gravity Center -extent "${max_width}x${max_height}" \ | |
-gravity South -splice "0x${output_page_footer_px}" \ | |
-fill Black -size "x${text_height_px}" label:"${label}" -composite \ | |
-gravity Center -extent "${output_page_width_px}x${output_page_height_px}" \ | |
-type BiLevel "${out}" | |
} | |
# sometimes imagemagick fails to allocate memory, but works if we try again... | |
function try() { | |
# keep trying to execute a failing command until it succeeds or we get bored | |
local i=0 n=5 command="$1" | |
while ! "$@"; do | |
((i++)) | |
if ((i < n)); then | |
echo >&2 "$0: ${command} failed $i times, trying again... (max $n times)" | |
sleep 0.1 # wait before trying again, maybe free some memory | |
else | |
echo >&2 "$0: ${command} failed $n times. Giving up now." | |
exit 1 | |
fi | |
done | |
} | |
function main() { | |
if [[ "$1" =~ ^-+h ]]; then | |
print_help | |
exit | |
fi | |
local title="$1" tmp="$(mktemp -d)" | |
trap "rm -rf \"${tmp}\"" EXIT | |
shift | |
echo >&2 "Computing page dimensions..." | |
local images=("$@") i=0 | |
calculations "${images[@]}" | |
echo >&2 "Processing images..." | |
for img in "${images[@]}"; do | |
local out="${tmp}/$(basename "${img%.*}.png")" | |
output[$i]="${out}" | |
((i++)) | |
string="$i of ${#images[@]}" | |
echo >&2 "${img} (${string})" | |
image_convert "${img}" "${out}" $i ${#images[@]} "${title}" | |
done | |
echo >&2 "Creating PDF..." | |
try convert "${output[@]}" -type BiLevel -compress Fax "${title}.pdf" | |
} | |
main "$@" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment