Skip to content

Instantly share code, notes, and snippets.

@Wikinaut
Last active January 25, 2023 15:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Wikinaut/b0b24981670f0e22d81b5249fff6350d to your computer and use it in GitHub Desktop.
Save Wikinaut/b0b24981670f0e22d81b5249fff6350d to your computer and use it in GitHub Desktop.
make-tifpdf: create a 600 dpi pdf with 1-bit-tiff images and with colored 300 dpi jpg front and back cover pages for use on bitsavers.org
#!/usr/bin/bash
# init 20230124
if [ $# -lt 1 ] ; then
echo "make-tifpdf: create a 600 dpi pdf with 1-bit-tiff images for uploading scans to → bitsavers.org"
echo "Usage: make-tifpdf <infile.pdf> [outfile.pdf]"
echo
exit
fi
# set to 0 to convert everything to 600dpi 1-bit tiff
# set to 1 to convert the front and back cover to colored jpg with coverresolution
coloredcover=1
# resolution in dpi for front and back cover jpg
coverresolution=300
# tumble source: https://github.com/brouhaha/tumble
# tumble is a utility by Eric Smith to construct PDF files from one or more 1-bit-tiff image files
start=$(date +%s.%N)
# Path to tumble program
tumble="/home/benutzer/src/tumble/tumble"
infile=$1
red="\e[1;31m"
yellow="\e[1;33m"
green="\e[1;32m"
cyan="\e[1;36m"
nocol="\e[0m"
# directory for resulting output files
tumbledir="tumble"
mkdir -p $tumbledir
# directory for processed input files
donedir="done"
mkdir -p $donedir
function ProgressBar {
# https://raw.githubusercontent.com/fearside/ProgressBar/master/progressbar.sh
# Author : Teddy Skarin
# 1. Create ProgressBar function
# 1.1 Input is currentState($1) and totalState($2)
# Process data
let _progress=(${1}*100/${2}*100)/100
let _done=(${_progress}*4)/10
let _left=40-$_done
# Build progressbar string lengths
_done=$(printf "%${_done}s")
_left=$(printf "%${_left}s")
# 1.2 Build progressbar strings and print the ProgressBar line
# 1.2.1 Output example:
# 1.2.1.1 Progress : [########################################] 100%
printf "\rProgress : [${_done// /#}${_left// /-}] ${_progress}%%"
}
if [ $# -eq 1 ] ; then
outfile=$(echo "$tumbledir/${infile%.*}--tumble--600dpi.pdf" | sed -e "s/--.00dpi--tumble--600dpi/--tumble--600dpi/")
else
outfile=$2
fi
if [ -f $outfile ] ; then
echo "${infile}:"
echo -e "${green}As the corresponding output file already exists, processing is skipped.${nocol}"
if [ -f $infile ] ; then
echo "Thus we simply move the input file to"
echo "→ $donedir/$infile.done"
mv "$infile" "$donedir/$infile.done"
fi
echo
exit
fi
tmpdir=$(mktemp -d -t make-tifpdf-XXXXXXXXXX)
numberofpages=$(pdfinfo "$infile" | grep "Pages" | awk '{print $2}')
echo -e "${cyan}Processing:"
echo $(du -h "$infile")
echo -e "$numberofpages PDF pages${nocol}"
lastbutonepage=$(echo "$numberofpages - 1" | bc)
# explode any page in the infile pdf to single 600 dpi ppm file
# each file: filesize 62 MB
progress=0
if [ $coloredcover -eq 1 ] ; then
let endprogress=$numberofpages*2
let halfprogress=$numberofpages
# process colored front and back cover
ProgressBar 1 $endprogress
pdftoppm -f 1 -singlefile -jpeg -r $coverresolution "$infile" $tmpdir/frontcover
pdftoppm -f 2 -l $lastbutonepage -r 600 "$infile" $tmpdir/p
ProgressBar $halfprogress $endprogress
pdftoppm -f $numberofpages -singlefile -jpeg -r $coverresolution "$infile" $tmpdir/backcover
else
ProgressBar $halfprogress $endprogress
pdftoppm -r 600 "$infile" $tmpdir/p
fi
# number of files = number of pages
ppmpages=$(ls $tmpdir | wc -l)
j=$halfprogress
for i in $tmpdir/p*.ppm; do
let j=$j+1
ProgressBar $j $endprogress
# "-colors 2" sometimes produces all black tif pages for all white pages
# do not use it
# convert $i -density 600 +dither -colors 2 -colorspace gray -contrast-stretch 0 -depth 1 "${i%.*}--600dpi.tif"
# http://www.fmwconcepts.com/imagemagick/2colorthresh/index.php
# PURPOSE:
# To automatically thresholds an image to binary (b/w) format using an adaptive spatial subdivision color reduction technique.
#
# DESCRIPTION:
# 2COLORTHRESH automatically thresholds an image to binary (b/w) format using an adaptive spatial subdivision color reduction technique.
# This is the -colors IM operator as implemented with slight modification from Anthony's Examples
# at http://www.imagemagick.org/Usage/quantize/#two_color
#
# For algorithm details, see http://www.imagemagick.org/script/quantize.php
#
# adapted to output create 1-bit tiff files in the desired resolution
convert $i -density 600 +dither -colorspace gray -contrast-stretch 0 -depth 1 "${i%.*}--600dpi.tif"
done
# Each uncompressed tiff file is now a 1-bit BlackWhite image with 600 dpi
# each file: filesize 2,7 MB
if [ $coloredcover -eq 1 ] ; then
# process colored front and back cover
$tumble $tmpdir/frontcover.jpg $(ls $tmpdir/*.tif) $tmpdir/backcover.jpg -o "$outfile" &>/dev/null
else
$tumble $(ls $tmpdir/*.tif) -o "$outfile" &>/dev/null
fi
let j=$endprogress
ProgressBar $j $endprogress
end=$(date +%s.%N)
runtime=$(echo "scale=2;($end - $start)/1" | bc)
pagesperminute=$(echo "scale=2;60 * $numberofpages / $runtime" | bc)
secondsperpage=$(echo "scale=2;$runtime / $numberofpages" | bc)
# postprocess infile
# move to $donedir
mv "$infile" "$donedir/$infile.done"
echo
echo $(du -h "$donedir/$infile.done")
echo $(du -h "$outfile")
echo
echo -e "$yellow=== Summary ===$nocol"
if [ $coloredcover -eq 1 ] ; then
echo "Colored front/back cover: yes"
else
echo "Colored front/back cover: no"
fi
echo "$runtime seconds processing time"
echo "PDF: $numberofpages pages"
echo "$pagesperminute pages/minute"
echo "$secondsperpage seconds/page"
echo
# cleaning up
rm -rf $tmpdir
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment