Skip to content

Instantly share code, notes, and snippets.

@pm0u
Last active August 29, 2015 14:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pm0u/dc60aaed6dfbca328968 to your computer and use it in GitHub Desktop.
Save pm0u/dc60aaed6dfbca328968 to your computer and use it in GitHub Desktop.
pdf processing script, for personal use. with some adaptation it could suit other people as well.
#!/bin/bash
# pdf scanning script
# 1/13/2015
bold='\033[01m' #bold
red='\033[01;31m' #error color
NC='\033[0m' # No Color
if [ $# == 0 ]; then
#tell em whats up
echo -e "pdeffer ${bold}[ -q ] [ -i | -I${NC} input_location${bold} ] [ -o | -O${NC} output_location${bold} ] [ -f${NC} filename ${bold} ] [ -t ${NC}datestring ${bold}| -T ] [ -N ] [ -u | -U ]${NC}"
echo -e "${bold} -d${NC}\tdefault"
echo -e "${bold} -q${NC}\treally quiet. no terminal output at all."
echo -e "${bold} -i${NC}\tspecify #, defaults to /run/media/${USER}/AV_SCAN/200DOC"
echo -e "${bold} -I${NC}\tspecify an explicit input path"
echo -e "${bold} -o${NC}\tspecify Dropbox directory, defaults to ~/Dropbox/UNSORTED"
echo -e "${bold} -O${NC}\tspecify an explicit output path"
echo -e "${bold} -f${NC}\tspecify string to append to files (after datestamp)"
echo -e "${bold} -t${NC}\tspecify date string format (see ${bold}man date${NC}) be cautious of special characters. defaults to ${bold}%Y-%m-%d_%H.%M.%S${NC}"
echo -e "${bold} -T${NC}\tno date stamping"
echo -e "${bold} -N${NC}\tno processing (pdfsandwich). move and apply name if applicable"
echo -e "${bold} -u${NC}\tunite all pdfs in input directory before processing. defaults to no datestamp. uses ${bold}pdfunite${NC}"
echo -e "${bold} -U\t-u ${NC}with a date stamp."
else
#make it happen
while getopts "i:I:o:O:f:t:TvdqNuU" OPTNAME
do
case "$OPTNAME" in
"i")
INPUT="/run/media/$USER/AV_SCAN-$OPTARG/DCIM/200DOC"
;;
"I")
INPUT="$OPTARG"
;;
"o")
OUTPUT="$HOME/Dropbox/$OPTARG/"
;;
"O")
OUTPUT="$OPTARG"
;;
"f")
#echo "filename is ${OPTARG}"
FILENAME="${OPTARG}.PDF"
;;
"t")
DATESTRING="$OPTARG"
;;
"T")
NODATE=1
;;
"q")
QUIET=1
;;
"N")
NOPROC=1
;;
"u")
UNITE=1
NODATE=1
;;
"U")
UNITE=1
;;
"d")
;;
esac
done
#set defaults
############
if [ -z $INPUT ]; then INPUT="/run/media/$USER/AV_SCAN/DCIM/200DOC"; fi
if [ -z $OUTPUT ]; then OUTPUT="$HOME/Dropbox/UNSORTED/"; fi
if [ -z $DATESTRING ]; then DATESTRING="%Y-%m-%d_%H.%M.%S"; fi
############
FILEES=$(find "$INPUT" -maxdepth 1 -name '*.PDF' -printf '%f\n' | sort)
if [ ! -z $UNITE ]; then
if [ -z $QUIET ]; then
UNITECOUNT=0
for z in $FILEES; do let "UNITECOUNT += 1"; done
echo -e "${bold}>>>>>Uniting $UNITECOUNT pdfs...${NC}"
fi
PDFUNITEFILES=$(find "$INPUT" -maxdepth 1 -name '*.PDF' -print0 | sort | sed -e 's/\x0/ /g')
if [ "$QUIET" = 1 ]; then
pdfunite ${PDFUNITEFILES} "$INPUT/UNITED.PDF" &> /dev/null
else
pdfunite ${PDFUNITEFILES} "$INPUT/UNITED.PDF"
fi
ORIGFILES=$FILEES
FILEES="UNITED.PDF"
fi
if [ -z $QUIET ]; then echo -e "${bold}>>>>>Copying pdfs to ~/pdfs..${NC}"; fi
for z in $FILEES
do
if [ -z $FILENAME ]; then FILENAME="$z"; fi
if [ -z $NODATE ]; then
TIMESTAMP="$(stat ${INPUT}/${z} --printf %y | sed 's/\([0-9]*-[0-9]*-[0-9]*\) \([0-9]*\):\([0-9]*\):\([0-9]*\).*/\1_\2.\3.\4/')_"
else
TIMESTAMP=""
fi
ORIGINALNAME="$z"
if [ "$QUIET" = 1 ]; then
cp --preserve=timestamps "${INPUT}/${ORIGINALNAME}" "$HOME/pdfs/${TIMESTAMP}${FILENAME}" &> /dev/null
else
cp -v --preserve=timestamps "${INPUT}/${ORIGINALNAME}" "$HOME/pdfs/${TIMESTAMP}${FILENAME}"
fi
done
if [ -z $NOPROC ]; then
if [ -z $QUIET ]; then echo -e "${bold}>>>>>pdfsandwiching them..${NC}"; fi
FILES=$(find "$HOME"/pdfs -maxdepth 1 -name '*.PDF' | sort)
PDFCOUNT=0
PDFTOT=0
for z in $FILES; do let "PDFTOT += 1"; done
for f in $FILES
do
let "PDFCOUNT += 1"
echo -e "${bold}>>>>>Processing $PDFCOUNT of $PDFTOT - $f${NC}"
if [ "$QUIET" = 1 ]; then
pdfsandwich -quiet -coo "-black-threshold 70% -enhance" $f &> /dev/null
PDFSTATUS="$?"
else
pdfsandwich -coo "-black-threshold 70% -enhance" $f
PDFSTATUS="$?"
fi
done
else
PDFSTATUS=0
fi
if [ "$PDFSTATUS" = 0 ]; then
if [ -z $QUIET ]; then echo -e "${bold}>>>>>Moving processed PDFs to ${OUTPUT}...${NC}"; fi
if [ "$QUIET" = 1 ]; then
mv "$HOME"/pdfs/*.PDF "$OUTPUT" &> /dev/null
else
mv -v "$HOME"/pdfs/*.PDF "$OUTPUT"
fi
if [ -z $QUIET ]; then echo -e "${bold}>>>>>moving PDFs to completed directory on scanner...${NC}"; fi
if [ "$QUIET" = 1 ]; then
for z in $FILEES
do
mv ${INPUT}/${z} ${INPUT}/OCRdone &> /dev/null
done
if [ ! -z "$ORIGFILES" ]; then
for z in $ORIGFILES
do
mv ${INPUT}/${z} ${INPUT}/OCRdone &> /dev/null
done
fi
else
for z in $FILEES
do
mv -v ${INPUT}/${z} ${INPUT}/OCRdone
done
if [ ! -z "$ORIGFILES" ]; then
for z in $ORIGFILES
do
mv -v ${INPUT}/${z} ${INPUT}/OCRdone
done
fi
fi
if [ -z $QUIET ]; then echo -e "${bold}>>>>completed, please check for errors${NC}"; fi
exit 0
else
echo -e "${red}>>>>>>pdfsandwich encountered an error, aborting...${NC}"
exit 2
fi
fi
@pm0u
Copy link
Author

pm0u commented Jan 14, 2015

updated to move original files as well as united files to completed dir after processing
also a file count / total count for progess

@pm0u
Copy link
Author

pm0u commented Jan 14, 2015

eliminated verbose, made it fully verbose by default. specify -q for totally silent.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment