jnerin/autoscan.sh

## autoscan.sh
#!/bin/bash
#
# Script to automatically scan consecutive pages and OCR them
#
# Dependencies:
# scanimage from SANE
# convert from imagemagick
# tesseract for OCR
#
# But they are easy to replace
#

# Place to store the scanned images
DIRECTORY="$HOME/Pictures/autoscan"
# device argument to connect to the scanner
SCANNER="smfp:net;192.168.x.x"
DPI=300
# OCR language
TESSERACT_OCR_LANG="spa"

fin() {
        echo -n "Waiting for all the children... "
        wait # we're waiting for all the background spawned ocrs (tesseract &)
        echo "done"
        exit;
}
trap 'fin' INT


cd "$DIRECTORY";

while true ; do
	FILENAME="scan-$(date +%Y%m%d-%H%M%S).jpg"

	scanimage --device-name="$SCANNER" -x 216mm -y 297mm --resolution $DPI --page-format A4 --format=pnm --progress | convert  pnm:- "$FILENAME"
	echo "Scan done, change sheet. Doing OCR now... "
	nice tesseract "$FILENAME" "$FILENAME" -l $TESSERACT_OCR_LANG -psm 1 &
	echo "Press Intro to scan another page or Ctrl-C to finish."
	read
done
	#!/bin/bash
	#
	# Script to automatically scan consecutive pages and OCR them
	#
	# Dependencies:
	# scanimage from SANE
	# convert from imagemagick
	# tesseract for OCR
	#
	# But they are easy to replace
	#

	# Place to store the scanned images
	DIRECTORY="$HOME/Pictures/autoscan"
	# device argument to connect to the scanner
	SCANNER="smfp:net;192.168.x.x"
	DPI=300
	# OCR language
	TESSERACT_OCR_LANG="spa"

	fin() {
	echo -n "Waiting for all the children... "
	wait # we're waiting for all the background spawned ocrs (tesseract &)
	echo "done"
	exit;
	}
	trap 'fin' INT


	cd "$DIRECTORY";

	while true ; do
	FILENAME="scan-$(date +%Y%m%d-%H%M%S).jpg"

	scanimage --device-name="$SCANNER" -x 216mm -y 297mm --resolution $DPI --page-format A4 --format=pnm --progress \| convert pnm:- "$FILENAME"
	echo "Scan done, change sheet. Doing OCR now... "
	nice tesseract "$FILENAME" "$FILENAME" -l $TESSERACT_OCR_LANG -psm 1 &
	echo "Press Intro to scan another page or Ctrl-C to finish."
	read
	done