Skip to content

Instantly share code, notes, and snippets.

@frennkie
Last active November 24, 2018 18:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save frennkie/e8be005ba0cd4002e582efd7aff787e0 to your computer and use it in GitHub Desktop.
Save frennkie/e8be005ba0cd4002e582efd7aff787e0 to your computer and use it in GitHub Desktop.
#!/bin/bash
set -x
mkdir -p output
mkdir -p merge split tiff
find ./originals -type f -name '*.pdf' -exec sh -c '
for i do
FILE_NAME=$(basename -- "$i")
qpdf --split-pages=50 "${i}" "split/${FILE_NAME}"
done
' sh {} +
find ./split -type f -name '*.pdf' -exec sh -c '
for i do
FILE_NAME=$(basename -- "$i")
EXTENSION="${FILE_NAME##*.}"
BASE_NAME="${FILE_NAME%.*}"
gs -q -dNOPAUSE -r300x300 -sDEVICE=tiff24nc -sOutputFile="tiff/${BASE_NAME}.tiff" "split/${BASE_NAME}.pdf" -c quit
rm -f "split/${BASE_NAME}.pdf"
tesseract "tiff/${BASE_NAME}.tiff" "merge/${BASE_NAME}" pdf
rm -f "tiff/${BASE_NAME}.tiff"
done
' sh {} +
find ./originals -type f -name '*.pdf' -exec sh -c '
for i do
FILE_NAME=$(basename -- "$i")
BASE_NAME="${FILE_NAME%.*}"
MERGE_LIST=$(find ./merge -type f -name "*.pdf" -printf "\"%p\" ")
echo "Manual Step:"
echo "qpdf --empty "output/${BASE_NAME}.pdf" --pages ${MERGE_LIST} --"
# qpdf --empty "output/${BASE_NAME}.pdf" --pages ${MERGE_LIST} --
done
' sh {} +
# rm -rf merge split tiff
rm -rf split tiff
# EOF
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment