Skip to content

Instantly share code, notes, and snippets.

@precious
Created December 24, 2011 20:05
Show Gist options
  • Save precious/1518232 to your computer and use it in GitHub Desktop.
Save precious/1518232 to your computer and use it in GitHub Desktop.
This script trims white margins out of PDF document and packs it to DjVu
#!/bin/bash
# vim: set fileencoding=utf-8 ts=2 sw=2 expandtab:
# PUBLIC DOMAIN
# This script trims white margins out of PDF document and packs it to DjVu.
# Author Sergei Astanin http://sovety.blogspot.com/
# Modified by Vsevolod K. https://github.com/precious
### Settings
DPI=150
MARGIN=10
MODE=gray # mono or gray or color
COLUMNS=1
### End of settings
prog=$0
dpidefault=$DPI
CROP=""
modedefault=$MODE
type=djvu
resizeopt=""
quality=60
function usage() {
me=`basename "$prog"`
monodefault=""
graydefault=""
colordefault=""
if [ $modedefault == "mono" ] ; then
monodefault="[default]"
elif [ $modedefault == "gray" ] ; then
graydefault="[default]"
else
colordefault="[default]"
fi
cat << END
Usage: $me [options] document.pdf
Options:
-f <int> the first page to process [default: 1]
-l <int> the last page to process
-d <int> resolution in DPI [default: $dpidefault]
-q|--quality <int> quality of images for generating pdf [default: 60]
-c|--columns <int> multi-column mode [default: $COLUMNS]
-p|--crop <tXbXlXrX> crop image's top, bottom, left and/or right edges
example: $me --crop b40r10 file.pdf
-m|--margin <int> borders width [default: $MARGIN]
-t|--type <str> type of result document - pdf/djvu [default: djvu]
-r|--resize <geomet> resize pages; geometry is WIDTH or xHEIGH or WIDTHxHEIGHT
or as presented at
http://www.imagemagick.org/script/command-line-processing.php#geometry
--mono bitonal compression (black and white only) $monodefault
--gray DjVuPhoto compression (shades of gray images) $graydefault
--color DjVuPhoto compression (color images) $colordefault
-h|--help print this message
END
}
opts=`getopt -l "help,mono,gray,grey,color,colour,columns,crop,margin:type:resize:quality:" \
"hf:l:d:c:p:m:t:r:q:" "$@"` && eval set -- "$opts"
while [ $# -gt 0 ] ; do
case "$1" in
-h|--help) usage ; exit 0 ;;
-f) frompage=$2 ; shift 2 ;;
-l) topage=$2 ; shift 2 ;;
-d) DPI=$2 ; shift 2 ;;
--mono) MODE=mono ; shift ;;
--color|--colour) MODE=color ; shift ;;
--gray|--grey) MODE=gray ; shift ;;
--columns|-c) COLUMNS=$2 ; shift 2 ;;
--crop|-p) CROP=$2 ; shift 2 ;;
--type|-t) type=$2 ; shift 2 ;;
--quality|-q) quality=$2 ; shift 2 ;;
--resize|-r) resize=$2 ; shift 2 ;;
-m|--margin) MARGIN=$2 ; shift 2 ;;
*) shift ; break ;;
esac
done
if [ $# -ne 1 ] ; then
printf "Filename is required.\n"
usage
exit 1
fi
# absolute paths to input PDF and output DjVu files
pdf="`pwd`/$1"
filename="${pdf%.pdf}"
tmpdir=`mktemp -d /tmp/pagesXXXXX`
if [ "x" == "x${frompage}" ] ; then # first page not defined
frompage=1
fi
if [ "x" == "x${topage}" ] ; then # last page not defined
topage=`LANG=C pdfinfo "$pdf" | awk '/^Pages:/ {print $NF;}'`
fi
if [ $MODE == "mono" ] ; then # use bitonal compression
monoopt="-mono"
pmext="pbm"
compress="cjb2"
elif [ $MODE == "gray" ] ; then # use grayscale compression
monoopt=""
pmext="ppm"
compress="c44 -crcbnone"
else # use DjVuPhoto compression (suitabable for color and grayscale images)
monoopt=""
pmext="ppm"
compress="c44"
fi
if [ ! -z "$CROP" ] ; then # process crop options and write them into appropriate variables
for param in t b l r ; do
export $param=`echo $CROP|grep -Po "(?<=$param)\d+"`
done
if [ "$l" -o "$t" ] ; then cropopt="$cropopt -crop +${l:-0}+${t:-0}" ; fi
if [ "$r" -o "$b" ] ; then cropopt="$cropopt -crop -${r:-0}-${b:-0}" ; fi
if [ -z "$cropopt" ] ; then echo "invalid crop arguments" ; usage ; exit 1 ; fi
fi
if [ ! -z "$resize" ] ; then # set appropriate resize option for mogrify
resizeopt="-resize $resize"
fi
# rasterize all pages: pdftoppm -r $DPI
# cut margins: mogrify ... -trim +repage
# add narrow margin: mogrify ... -bordercolor white -border $MARGIN
# compress with color-enabled djvu: c44
# display progress with page numbers (skipped pages indicated with "∙")
( cd $tmpdir &&
(
# for every page
i=$frompage
while [ $i -le $topage ] ; do
echo -n "$i" && \
pdftoppm -r $DPI $monoopt -f $i -l $i "$pdf" page && \
fname=`ls -1t page-*.$pmext | head -1` && \
if [ ! -z "$cropopt" ] ; then mogrify -quiet $cropopt +repage "$fname" ; fi && \
mogrify -quiet -trim +repage $resizeopt "$fname" && \
( # skip empty pages
if [ `identify -format "%k" "$fname"` -gt 1 ] ; then
####### mogrify -quiet -resize 800 "$fname"
if [ $COLUMNS -ne 1 ] ; then # cut page in COLUMNS x n pieces
w=$(($(identify -format "%w" "$fname")/COLUMNS)) # crop width
h=$(((2*MARGIN+w)*4/3-2*MARGIN)) # crop height, aspect 4:3
pageh=$(identify -format "%h" "$fname")
n=$((pageh/h + 1)) # number of vertical slices
overlap=$(((h*n-pageh)/(n-1)))
dh=$((h-overlap))
state=("/" "-" "\\" "|")
for c in `seq 0 $((COLUMNS-1))` ; do # for every column
for j in `seq 0 $((n-1))` ; do # for every partial image
pagename=`printf "%06d_%d_%02d.$pmext" $i $c $j`
djvuname=`printf "%06d_%d_%02d.djvu" $i $c $j`
convert "$fname" -crop ${w}x${h}+$((c*w))+$((j*dh)) \
-bordercolor white -border $MARGIN "$pagename"
$compress -dpi $DPI "$pagename" "$djvuname"
rm "$pagename"
printf "\b%s" ${state[$(((c*n+j)%4))]}
done
done
printf " "
else # keep page as is
if [ "$MARGIN" != "0" ] ; then
mogrify -bordercolor white -border $MARGIN "$fname"
fi
case "$type" in
pdf) convert -quality $quality "$fname" `printf "%06d.jpg" $i` ;;
djvu) $compress -dpi $DPI "$fname" `printf "%06d.djvu" $i` ;;
esac
printf " "
fi
else
echo -n " skiped [empty page]"
fi
) && rm page-*.$pmext && i=$((i+1))
done
) && (
echo -e "\nmerging..."
case "$type" in
djvu) djvm -c "$filename.djvu" *.djvu ;;
pdf) convert *.jpg "${filename}_.pdf" ;;
esac
)
) && rm -rf "$tmpdir" || (
#) || (
printf "Failure\nTemporary directory left: %s\n" $tmpdir
exit 2
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment