Skip to content

Instantly share code, notes, and snippets.

@mik01aj
Created February 21, 2015 17:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mik01aj/fb7ed2af4088d5937c9e to your computer and use it in GitHub Desktop.
Save mik01aj/fb7ed2af4088d5937c9e to your computer and use it in GitHub Desktop.
A script to filter out repeated partial slides in PDF presentations (especially lecture slides made in LaTeX Beamer) and generate a printer-friendly version.
#!/bin/bash
# Autor: Mikołaj Dądela <mik01aj@o2.pl>
inputPdf="$1"
threshold="10"
if [ "$inputPdf" == "" ]; then
echo "Usage: $0 input.pdf [output.pdf] [<threshold>]"
echo ""
echo "Default output filename for foo.pdf is foo_s.pdf."
echo ""
echo "Threshold is the minimal file size (in KB) of difference PNG. After"
echo "running this script have a look at diff files to decide what value"
echo "would be ok for you. The default is $threshold."
exit 1
fi
if [ ! -r "$inputPdf" ]; then
echo "Unable to open $inputPdf."
exit 1
fi
basename="${inputPdf%.pdf}"
basename="${basename%.PDF}"
outputPdf="${basename}_s.pdf"
if [ "$2" != "" ]; then
echo "$2" | grep -q "^[0-9]*$"
if [ "$?" == "0" ]; then #if it's a number
threshold="$2"
elif [ "$2" != "" ]; then
outputPdf="$2"
fi
fi
if [ "$3" != "" ] ; then
echo "$3" | grep -q "^[0-9]*$"
if [ "$?" == "0" ]; then #if it's a number
threshold="$3"
fi
fi
# ----------------------------------------------------------------------
numPages=`pdftk "$inputPdf" dump_data | grep NumberOfPages | cut -d' ' -f2`
echo "$inputPdf contains $numPages pages."
# 1st phase: thumbnails
thumbsDir="${basename}_thumbs"
if [ ! -f "$thumbsDir/p-0.png" ]; then
mkdir -p "$thumbsDir"
for i in `seq 0 $((numPages/10))`; do
start=$((i*10))
end=$((((i+1)*10)-1))
if [ "$end" -ge "$numPages" ]; then
end=$((numPages-1))
fi
echo -ne "\rGenerating thumbnails... $start-$end"
convert -resize 200x200 "$inputPdf[$start-$end]" $thumbsDir/p.png || exit
done
echo -e "\rGenerating thumbnails... done. (saved to $thumbsDir/p-*.png)"
else
echo "Using found $thumbsDir/p-*.png files."
fi
# 2nd phase: diffs
if [ ! -f "$thumbsDir/diff-0.png" ]; then
i=0
while true; do
if [ ! -f "$thumbsDir/p-$((i+1)).png" ]; then
break
fi
echo -ne "\rComparing pages... $i "
# this blending mode catches only things that disappear
# on the next slide - this what appears or doesn't change
# comes out as white on the diff image.
convert "$thumbsDir/p-$((i+1)).png" -negate \
"$thumbsDir/p-$i.png" \
-compose Plus -composite \
"$thumbsDir/diff-$i.png" || exit
i=$((i+1))
done
echo -e "\rComparing pages... done. (saved to $thumbsDir/diff-*.png)"
else
echo "Using found $thumbsDir/diff-*.png files."
fi
# 3rd phase: selection
i=1
pages="1"
echo -n "Selecting pages (with size threshold = ${threshold}KB)... $pages"
while [ -f "$thumbsDir/diff-$i.png" ]; do
if [ "`stat -c%s "$thumbsDir/diff-$i.png"`" -gt $((threshold*1024)) ]; then
echo -n " $((i+1))"
pages="$pages $((i+1))"
fi
i=$((i+1))
done
pages="$pages $((i+1))"
echo " $((i+1)) done."
# 4th phase: output
echo -n "Writing $outputPdf... "
pdftk "$inputPdf" cat $pages output "$outputPdf" || exit
echo "done."
echo "Finished. You can now remove $thumbsDir/."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment