Last active
May 30, 2018 14:37
-
-
Save vfrico/0ed96a5447b0a95a23b8c9a920bbbbd8 to your computer and use it in GitHub Desktop.
Wrapper for unpaper. Input is a double sided pdf book (scanned)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Usage: ./script.sh file.pdf | |
# Generate .png for each page on the PDF, assuming that the PDF is a scanned book. | |
# unpaper will crop and correct inclination and delete artifacts of each page | |
# Requirements: | |
# Ghostscript | |
# Unpaper (on arch repositories and https://github.com/Flameeyes/unpaper) | |
# basename (Maybe on coreutils package)1 | |
# pnm2png/png2pnm (Maybe on libpng package) | |
if [ "$#" -lt 1 ]; then | |
echo "unpaper_pdf - Generates a .png page for each input page of PDF" | |
echo "Usage: unpaper_pdf.sh <pdf_file>" | |
exit 1; | |
fi | |
OUTNAME=$(basename $1 .pdf) | |
# Convert PDF to PNG with Postscript | |
gs -sDEVICE=pngalpha -o %02d.png -r400 $1 | |
# Assuming that in the current folder all the png files will be the | |
# sheets of the scanned book (in PDF) | |
for FOLIO_PNG in $(ls *.png | cat) | |
do | |
FOLIO_BASE=$(basename $FOLIO_PNG .png) | |
FOLIO_PNM="${FOLIO_BASE}.pnm" | |
png2pnm -r $FOLIO_PNG $FOLIO_PNM | |
rm $FOLIO_PNG | |
unpaper --layout single $FOLIO_PNM $OUTNAME-$FOLIO_BASE-%02d.pnm | |
rm $FOLIO_PNM | |
ls $OUTNAME-$FOLIO_BASE-*.pnm | |
# Convertimos todos los PNM generados a PNG (son mas pequeños) | |
for PAGE_PNM in $(ls $OUTNAME-$FOLIO_BASE-*.pnm | cat) | |
do | |
PAGE_BASE=$(basename $PAGE_PNM .pnm) | |
PAGE_PNG="${PAGE_BASE}.png" | |
pnm2png $PAGE_PNM $PAGE_PNG | |
rm $PAGE_PNM | |
done | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment