RidaAyed/scnDuplex.script

## scnDuplex.script
#!/bin/bash
##########################################
## SCRIPT VERSION  1.0.5       	        ##
## AUTHOR: MARKUS (www.och-group.de)    ##
## Requires apt-get install:        	##
##      libtiff-tools       		    ##
##      tesseract\*     				##
##      libtiff-dev     				##
##      pdftk           				##
##      imagemagick     				##
##########################################
DATETIME=$(date +%Y-%m-%d"_"%H-%M-%S)
#startdir=$(pwd)
startdir=/home/ra/pi
RANDOMNUMBER=$(cat /dev/urandom | tr -dc A-Za-z0-9 | head -c 16)
outname=$DATETIME.pdf
tmpdir=/tmp/scan-$RANDOMNUMBER
echo "####### $DATETIME #########"
# Either Scan or use prepared *.tif files in folder named in first parameter
if [ -z "$1" ]; then
	echo "####### TMPDIR $tmpdir ##########"
	echo "####### OUTNAME $outname ##########"
	mkdir $tmpdir
	cd $tmpdir
	echo "################## Scanning ###################"
	scanResult=$(scanimage --page-width 221.121 --page-height 876.695 -l 0 -t 0 -x 221.121 -y 876.695 --ald=yes --overscan On --prepick=On -b --format=tiff --mode Color --resolution 300 --source 'ADF Duplex' --swcrop=yes --buffermode On --swdespeck 2 --swdeskew=yes --swskip 5% -d 'fujitsu:ScanSnap iX500:10443')
	regexScan="\s+scanimage: no SANE devices found\s+"
	if [[ " $scanResult " =~ $regexScan ]]; then
		echo "!!!!!!! No scanner found !!!!!!!"
		echo "SCANRESULT: "$scanResult
		exit 1
	fi
	echo "################## Scanned ####################"
else
	# use existing folder (absolute path)
	tmpdir="$1"
        echo "####### TMPDIR $tmpdir ##########"
	echo "####### OUTNAME $outname ##########"
        cd $tmpdir
	if [ ! -d "$tmpdir" ]; then
		echo "tmpdir $tmpdir does not exist"
		exit 1
	fi
fi

optimize_color() {
        ############################################################
        # Optimize Color of image
        ############################################################
        # call:                                                    #
        #       optimize_color <filename>                          #
        # result:                                                  #
        #       <filename>		                           #
        ############################################################
	thresholdc=0.91
	# optimize Colors --> test for colors
	convert $1 -level 20%,80%,2.0 $1

	######## Other Color check variants - best is scale option, then breakup option
	# testing average colorfulness of an image in HSL (green channel is colorfulness) http://www.imagemagick.org/discourse-server/viewtopic.php?t=19580
	#testc1=`convert $1 -colorspace HSV -channel g -separate +channel -format "%[fx:mean]" info:`
	#testc2=`convert $1 -colorspace HSL -channel g -separate +channel -format "%[fx:mean]" info:`
	#echo "   PAGE: ${1%.*} this pic is grey if close to 0:" $testc1 "and" $testc2
	# Two methods from here http://www.imagemagick.org/discourse-server/viewtopic.php?f=1&t=29781
	#testc3=`convert $1 -crop 50x50 -colorspace HCL -scale 1x1! -channel G -separate +channel -evaluate-sequence Max -format %[fx:mean] info: 2>/dev/null`
	#echo "   PAGE: ${1%.*} breakup option says color value is" $testc3

	testc=`convert $1 -colorspace HCL -scale 2% -format "%[fx:maxima.g+$thresholdc>1?1:0]" info:`
	echo "   PAGE: ${1%.*} scale option says color exists for %:" `convert $1 -colorspace HCL -scale 2% -format "%[fx:maxima.g]" info:`
	if [ $testc -eq 1 ]; then
		echo "   PAGE: ${1%.*} is colorful"
		## OPTIMIZE COLORS http://www.imagemagick.org/Usage/color_mods/
		#convert $1 -level 20%,80%,2.5 ${1%.*}"_color.tif"
		## Alternative Color optimization (for me it does not look as good as the first)
		#convert $infile -sigmoidal-contrast 10,50% ${inname}_color_sigmoidal.tif
	else
		echo "   PAGE: ${1%.*} is not colorful"
		######### DITHER IS BEST FOR COLORED IMAGES! - tx is fine for text
		#convert $1 -compress Group4 -adaptive-resize 75% -density 200 -type bilevel TIFF:- | convert - ${1%.*}"_compressed.pdf"
		##Text Optimization: Convert to lineart
		#convert $1 -negate -separate -lat 20x20+25% -negate -evaluate-sequence add ${1%.*}"_la.png"
		##Text Optimization: dither to black / white picture
		convert $1 +dither -colors 2 -colorspace gray -contrast-stretch 0 ${1%.*}"_dither.png"
		#Text Optimization: lots of Magic
		#convert -respect-parenthesis \
			#\( $1 -colorspace gray -type grayscale -contrast-stretch 0 \) \
			#\( -clone 0 -colorspace gray -negate -lat 15x15+10% -contrast-stretch 0 \) \
			#-compose copy_opacity -composite -fill white -opaque none +matte -deskew 40% +repage -sharpen 0x1 \
			#$1
	fi
}

optimize_crop() {
        ############################################################
        # Crop Image
        ############################################################
        # call:                                                    #
        #       optimize_crop <filename>                           #
        # result:                                                  #
        #       <filename>		                           #
        ############################################################

        ##################################### CROPPED 2 IS BETTER!
        # crop Borders if black 1
        #infile=$1
        #inname=${1%.*}
        #convert $infile +repage -scale x1! -bordercolor black -border 1 -fuzz 30% -trim ${inname}_tmp1.png
        #width=`convert ${inname}_tmp1.png -format "%w" info:`
        #offsets=`convert ${inname}_tmp1.png -format "%O" info:`
        #xoff=`echo $offsets | cut -d+ -f2`
        #convert $infile +repage -scale 1x! -bordercolor black -border 1 -fuzz 60% -trim ${inname}_tmp2.png
        #height=`convert ${inname}_tmp2.png -format "%h" info:`
        #offsets=`convert ${inname}_tmp2.png -format "%O" info:`
        #yoff=`echo $offsets | cut -d+ -f3`
        #convert $infile -crop ${width}x${height}+${xoff}+${yoff} +repage ${inname}_cropped_1.jpg
        # Crop Borders variant 2
        convert -fuzz 15% -trim $1 $1
}

correct_orientation() {
	########################################################
	# Orientation correction (rotate if 90,180,270 degree) #
	########################################################
	# call:                                                #
	# 	correct_orientation <filename)>		       #
	# result:                                              #
	#	<filename> with correct orientation 	       #
	########################################################
	# Get info from tesseract without creating a pdf file
	#tesseract -psm 0 -l eng+deu $1 result_${1%.*} 1>${1%.*}"_tesseract.info" 2>&1
	tesseract -psm 0 -l deu $1 result_${1%.*} 1>${1%.*}"_tesseract.info" 2>&1
	regexOrientation="\s+Orientation in degrees: ([0-9]{3})\s+"
	file=${1%.*}"_tesseract.info"
	file_content=$( cat "${file}" )
	orientation=0
	if [[ " $file_content " =~ $regexOrientation ]]; then
		case "${BASH_REMATCH[1]}" in
			'90')
				# 90 is readable from the right side
				#echo "   PAGE: ${1%.*} Detected wrong orientation:" ${BASH_REMATCH[1]}
				## Rotate picture
				#convert $1 -rotate 90 +repage $1;
				orientation=90
			;;
			'180')
				# 180 is upside down
				echo "   PAGE: ${1%.*} Detected wrong orientation:" ${BASH_REMATCH[1]}
				# Rotate picture
				convert $1 -rotate 180 $1
				orientation=180
			;;
			'270')
				# 270 is readable from left side
				echo "   PAGE: ${1%.*} Detected wrong orientation:" ${BASH_REMATCH[1]}
				# Rotate picture
				convert $1 -rotate 180 +repage $1
				orientation=270
			;;
			*)
				#echo "   PAGE: ${1%.*} Detected correct orientation:" ${BASH_REMATCH[1]}
			;;
		esac
	else
		echo "   PAGE: ${1%.*} Cannot find any orientation"
	fi
	############################################################
	# END Orientation correction				   #
	############################################################
}

correct_blank_page() {
	############################################################
	# Test if it is a blank page
	############################################################
	# call:                                                    #
	#       correct_blank_page <filename>        		   #
	# result:                                                  #
	#       <filename> or deleted file		           #
	############################################################
	# Threshold for deleting blank pages (Percentage)
	threshold=0.99
	# Test percentage of lineart against threshold
	test=`convert $1 +dither -colors 2 -colorspace gray -contrast-stretch 0 -format "%[fx:mean>$threshold?1:0]" info:`
	echo "   PAGE: ${1%.*} is blank for %:" `convert $1 +dither -colors 2 -colorspace gray -contrast-stretch 0 -format "%[fx:mean]" info:`
	if [ $test -eq 1 ]; then
		echo "   PAGE: ${1%.*} Blank Page (1/2 - GREY-CHECK): SEEMS TO BE A BLANK PAGE------"
		BLANKPROSPECT=true
	else
		BLANKPROSPECT=false
		return
	fi
	# If file does not exist - create it - else use it
	if [ ! -f ${1%.*}"_tesseract.info" ]; then
		#tesseract -psm 0 -l eng+deu $1 result_${1%.*} 1>${1%.*}"_tesseract.info" 2>&1
		tesseract -psm 0 -l deu $1 result_${1%.*} 1>${1%.*}"_tesseract.info" 2>&1
	fi
	regexCharacters="\s+Too few characters. Skipping this page\s+"
        file=${1%.*}"_tesseract.info"
        file_content=$( cat "${file}" )
        if [[ " $file_content " =~ $regexCharacters ]]; then
		echo "   PAGE: ${1%.*} Blank Page (2/2 - CHARACTER-CHECK): SEEMS TO BE A BLANK PAGE------"
		if [ "$BLANKPROSPECT" = "true" ]; then
			# If both matches for blank page
			echo "   PAGE: ${1%.*} is a blank page - deleting $1------"
			rm $1
		fi
        fi
	############################################################
}

declare -a pids
waitProcessing() {
	############################################################
        # parallelizable Process for working on each page
        ############################################################
        # usage:
        #       waitProcessing
        ############################################################
        # return:
        #       when all pids are processed, this one will end too
        ############################################################
	while [ ${#pids[@]} -ne 0 ]; do
		#echo "Waiting for pids: ${pids[@]}"
		local range=$(eval echo {0..$((${#pids[@]}-1))})
		local i
		for i in $range; do
			if ! kill -0 ${pids[$i]} 2> /dev/null; then
				#echo "Done -- ${pids[$i]}"
				unset pids[$i]
			fi
		done
		pids=("${pids[@]}") # Expunge nulls created by unset.
		sleep 1
	done
	echo "---All Pages Done!"
}

addProcess() {
        ############################################################
        # remembers process pids of main process
        ############################################################
        # usage:
        #       addProcess <filename> $!
        ############################################################
        # return:
        #       <filename> (processed file or deleted file)
        ############################################################
        x=${1%.*}
        pid=$2
        pids=(${pids[@]} $pid)
	echo "   PAGE: $x (PID $pid)"
        ############################################################
}

process() {
	############################################################
	# parallelizable Process for working on each page
	############################################################
	# usage:
	#	process <filename>
	############################################################
	# return:
	#	<filename> (processed file or deleted file)
	############################################################
	x=${1%.*}
	############################################################
	echo "   PAGE: $x - BEGIN"
	cp $x.tif ${x}_BEGIN.tif
	echo "   PAGE: $x - CROP"
	optimize_crop $x.tif
	if [ ! -f $x.tif ]; then
		continue
	fi
	cp $x.tif ${x}_CROPPED.tif
	echo "   PAGE: $x - COLOR CHECK"
	optimize_color $x.tif
	cp $x.tif ${x}_COLOR_OPTIMIZED.tif
	echo "   PAGE: $x - BLANK CHECK"
	correct_blank_page $x.tif
	if [ ! -f $x.tif ]; then
		continue
	fi
	cp $x.tif ${x}_NOT_BLANK.tif
	echo "   PAGE: $x - ROTATION CHECK"
	correct_orientation $x.tif
	cp $x.tif ${x}_CORRECT_ORIENTATION.tif
	echo "   PAGE: $x - CREATE PDF PAGE and reorientate" $orientation
	if [ -f $x".tif" ]; then
		#tiff2pdf -o "final_$x.pdf" -z -u m -p "A4" -F $x".tif"
		## Create PDF with fit to A4 - even in landscape mode - does not work
		#convert -compress Group4 -density 300 -define pdf:fit-page=A4 $x".tif" $x"_single.pdf"
		#convert $x".tif" -resize 595x823^> -gravity center -background white $x"_singles.pdf"

		# convert file to A4 PDF
		tiff2pdf -p a4 -z -u m -t "Scan-$DATETIME" -f -o $x".pdf" $x".tif"
		if [ "$orientation" = "90" ]; then
			# rotate pdf +90
			pdftk $x".pdf" cat 1east output $x"_o.pdf"
			rm $x".pdf"
			mv $x"_o.pdf" $x".pdf"
		elif [ "$orientation" = "270" ]; then
			# was rotated 180 - now add 90
			pdftk $x".pdf" cat 1east output $x"_o.pdf"
			rm $x".pdf"
			mv $x"_o.pdf" $x".pdf"
		fi
	fi


#echo "---PAGE: $i -PDFTK-----"
#echo "---PAGE: $i -FLATTEN---"
#pdftk tiff2pdf_$x.pdf cat output pdftk_$x.pdf flatten
#pdftk pdftk_$x.pdf dump_data > pdftk_$x.info
#echo "---PAGE: $i -NORMALIZE-"
#convert -normalize -density 300 -depth 8 pdftk_$x.pdf $x.png
#    echo "---PAGE: $i -TESSERACT get info-"
#    #FOR: correct_orientation(): tesseract -psm 0 -l deu+eng $x.png result_$x 1>tesseract_$x.info 2>&1
#    tesseract -psm 1 -l deu+eng $x.png result_$x pdf quiet 1>/dev/null 2>&1
#    echo "---PAGE: $i -METADATA--"
#    pdftk result_$x.pdf dump_data > pdftk_$x.info2
#    pdftk result_$x.pdf update_info pdftk_$x.info output final_$x.pdf
    echo "---PAGE: $i -END------"
}


############################################################
#			     MAIN
############################################################
echo "################### Preprocessing ####################"
i=1
for page in $(ls -v *.tif); do
	# Create x as number with 4 digits counting up
	x=`printf "%04d" $i`
	cp $page $x.tif
	# Execute parallel worker for each scanned page
	process $x.tif &
	addProcess $x.tif $!
	# Next page
	i=$(expr $i + 1)
done
# Wait until all pages are done
waitProcessing


echo "############ Combine all pdf to one ###########"
pdftk *.pdf cat output $outname
echo "############ OCR complete pdf #################"
## ocrmypdf mit -l eng+deu ..dann klappts auch mit den Umlauten
ocrmypdf $outname $startdir/$outname -l eng+deu
cp $startdir/$outname /home/ra/temp
#paperwork-shell import $startdir/$outname
#ranger --selectfile=$startdir/$outname
ranger --selectfile=/home/ra/temp/$outname
echo "################ Cleaning Up ##################"
cd ..
rm -rf $tmpdir
cd $startdir


# REMINDER for BARCODE FUNCTIONALITY
	#convert -density 150 "$i[0]" -quality 100 -sharpen 0x1.0 "$i.jpg" # JPG erzeugen um möglichen Barcode zu suchen
	#barcode=`zbarimg -q --raw "$i.jpg"` # Barcode suchen und in Variable speichern
	#rm "$i.jpg"									# Bild wieder löschen
	#!/bin/bash
	##########################################
	## SCRIPT VERSION 1.0.5 ##
	## AUTHOR: MARKUS (www.och-group.de) ##
	## Requires apt-get install: ##
	## libtiff-tools ##
	## tesseract\* ##
	## libtiff-dev ##
	## pdftk ##
	## imagemagick ##
	##########################################
	DATETIME=$(date +%Y-%m-%d"_"%H-%M-%S)
	#startdir=$(pwd)
	startdir=/home/ra/pi
	RANDOMNUMBER=$(cat /dev/urandom \| tr -dc A-Za-z0-9 \| head -c 16)
	outname=$DATETIME.pdf
	tmpdir=/tmp/scan-$RANDOMNUMBER
	echo "####### $DATETIME #########"
	# Either Scan or use prepared *.tif files in folder named in first parameter
	if [ -z "$1" ]; then
	echo "####### TMPDIR $tmpdir ##########"
	echo "####### OUTNAME $outname ##########"
	mkdir $tmpdir
	cd $tmpdir
	echo "################## Scanning ###################"
	scanResult=$(scanimage --page-width 221.121 --page-height 876.695 -l 0 -t 0 -x 221.121 -y 876.695 --ald=yes --overscan On --prepick=On -b --format=tiff --mode Color --resolution 300 --source 'ADF Duplex' --swcrop=yes --buffermode On --swdespeck 2 --swdeskew=yes --swskip 5% -d 'fujitsu:ScanSnap iX500:10443')
	regexScan="\s+scanimage: no SANE devices found\s+"
	if [[ " $scanResult " =~ $regexScan ]]; then
	echo "!!!!!!! No scanner found !!!!!!!"
	echo "SCANRESULT: "$scanResult
	exit 1
	fi
	echo "################## Scanned ####################"
	else
	# use existing folder (absolute path)
	tmpdir="$1"
	echo "####### TMPDIR $tmpdir ##########"
	echo "####### OUTNAME $outname ##########"
	cd $tmpdir
	if [ ! -d "$tmpdir" ]; then
	echo "tmpdir $tmpdir does not exist"
	exit 1
	fi
	fi

	optimize_color() {
	############################################################
	# Optimize Color of image
	############################################################
	# call: #
	# optimize_color <filename> #
	# result: #
	# <filename> #
	############################################################
	thresholdc=0.91
	# optimize Colors --> test for colors
	convert $1 -level 20%,80%,2.0 $1

	######## Other Color check variants - best is scale option, then breakup option
	# testing average colorfulness of an image in HSL (green channel is colorfulness) http://www.imagemagick.org/discourse-server/viewtopic.php?t=19580
	#testc1=`convert $1 -colorspace HSV -channel g -separate +channel -format "%[fx:mean]" info:`
	#testc2=`convert $1 -colorspace HSL -channel g -separate +channel -format "%[fx:mean]" info:`
	#echo " PAGE: ${1%.*} this pic is grey if close to 0:" $testc1 "and" $testc2
	# Two methods from here http://www.imagemagick.org/discourse-server/viewtopic.php?f=1&t=29781
	#testc3=`convert $1 -crop 50x50 -colorspace HCL -scale 1x1! -channel G -separate +channel -evaluate-sequence Max -format %[fx:mean] info: 2>/dev/null`
	#echo " PAGE: ${1%.*} breakup option says color value is" $testc3

	testc=`convert $1 -colorspace HCL -scale 2% -format "%[fx:maxima.g+$thresholdc>1?1:0]" info:`
	echo " PAGE: ${1%.*} scale option says color exists for %:" `convert $1 -colorspace HCL -scale 2% -format "%[fx:maxima.g]" info:`
	if [ $testc -eq 1 ]; then
	echo " PAGE: ${1%.*} is colorful"
	## OPTIMIZE COLORS http://www.imagemagick.org/Usage/color_mods/
	#convert $1 -level 20%,80%,2.5 ${1%.*}"_color.tif"
	## Alternative Color optimization (for me it does not look as good as the first)
	#convert $infile -sigmoidal-contrast 10,50% ${inname}_color_sigmoidal.tif
	else
	echo " PAGE: ${1%.*} is not colorful"
	######### DITHER IS BEST FOR COLORED IMAGES! - tx is fine for text
	#convert $1 -compress Group4 -adaptive-resize 75% -density 200 -type bilevel TIFF:- \| convert - ${1%.*}"_compressed.pdf"
	##Text Optimization: Convert to lineart
	#convert $1 -negate -separate -lat 20x20+25% -negate -evaluate-sequence add ${1%.*}"_la.png"
	##Text Optimization: dither to black / white picture
	convert $1 +dither -colors 2 -colorspace gray -contrast-stretch 0 ${1%.*}"_dither.png"
	#Text Optimization: lots of Magic
	#convert -respect-parenthesis \
	#\( $1 -colorspace gray -type grayscale -contrast-stretch 0 \) \
	#\( -clone 0 -colorspace gray -negate -lat 15x15+10% -contrast-stretch 0 \) \
	#-compose copy_opacity -composite -fill white -opaque none +matte -deskew 40% +repage -sharpen 0x1 \
	#$1
	fi
	}

	optimize_crop() {
	############################################################
	# Crop Image
	############################################################
	# call: #
	# optimize_crop <filename> #
	# result: #
	# <filename> #
	############################################################

	##################################### CROPPED 2 IS BETTER!
	# crop Borders if black 1
	#infile=$1
	#inname=${1%.*}
	#convert $infile +repage -scale x1! -bordercolor black -border 1 -fuzz 30% -trim ${inname}_tmp1.png
	#width=`convert ${inname}_tmp1.png -format "%w" info:`
	#offsets=`convert ${inname}_tmp1.png -format "%O" info:`
	#xoff=`echo $offsets \| cut -d+ -f2`
	#convert $infile +repage -scale 1x! -bordercolor black -border 1 -fuzz 60% -trim ${inname}_tmp2.png
	#height=`convert ${inname}_tmp2.png -format "%h" info:`
	#offsets=`convert ${inname}_tmp2.png -format "%O" info:`
	#yoff=`echo $offsets \| cut -d+ -f3`
	#convert $infile -crop ${width}x${height}+${xoff}+${yoff} +repage ${inname}_cropped_1.jpg
	# Crop Borders variant 2
	convert -fuzz 15% -trim $1 $1
	}

	correct_orientation() {
	########################################################
	# Orientation correction (rotate if 90,180,270 degree) #
	########################################################
	# call: #
	# correct_orientation <filename)> #
	# result: #
	# <filename> with correct orientation #
	########################################################
	# Get info from tesseract without creating a pdf file
	#tesseract -psm 0 -l eng+deu $1 result_${1%.} 1>${1%.}"_tesseract.info" 2>&1
	tesseract -psm 0 -l deu $1 result_${1%.} 1>${1%.}"_tesseract.info" 2>&1
	regexOrientation="\s+Orientation in degrees: ([0-9]{3})\s+"
	file=${1%.*}"_tesseract.info"
	file_content=$( cat "${file}" )
	orientation=0
	if [[ " $file_content " =~ $regexOrientation ]]; then
	case "${BASH_REMATCH[1]}" in
	'90')
	# 90 is readable from the right side
	#echo " PAGE: ${1%.*} Detected wrong orientation:" ${BASH_REMATCH[1]}
	## Rotate picture
	#convert $1 -rotate 90 +repage $1;
	orientation=90
	;;
	'180')
	# 180 is upside down
	echo " PAGE: ${1%.*} Detected wrong orientation:" ${BASH_REMATCH[1]}
	# Rotate picture
	convert $1 -rotate 180 $1
	orientation=180
	;;
	'270')
	# 270 is readable from left side
	echo " PAGE: ${1%.*} Detected wrong orientation:" ${BASH_REMATCH[1]}
	# Rotate picture
	convert $1 -rotate 180 +repage $1
	orientation=270
	;;
	*)
	#echo " PAGE: ${1%.*} Detected correct orientation:" ${BASH_REMATCH[1]}
	;;
	esac
	else
	echo " PAGE: ${1%.*} Cannot find any orientation"
	fi
	############################################################
	# END Orientation correction #
	############################################################
	}

	correct_blank_page() {
	############################################################
	# Test if it is a blank page
	############################################################
	# call: #
	# correct_blank_page <filename> #
	# result: #
	# <filename> or deleted file #
	############################################################
	# Threshold for deleting blank pages (Percentage)
	threshold=0.99
	# Test percentage of lineart against threshold
	test=`convert $1 +dither -colors 2 -colorspace gray -contrast-stretch 0 -format "%[fx:mean>$threshold?1:0]" info:`
	echo " PAGE: ${1%.*} is blank for %:" `convert $1 +dither -colors 2 -colorspace gray -contrast-stretch 0 -format "%[fx:mean]" info:`
	if [ $test -eq 1 ]; then
	echo " PAGE: ${1%.*} Blank Page (1/2 - GREY-CHECK): SEEMS TO BE A BLANK PAGE------"
	BLANKPROSPECT=true
	else
	BLANKPROSPECT=false
	return
	fi
	# If file does not exist - create it - else use it
	if [ ! -f ${1%.*}"_tesseract.info" ]; then
	#tesseract -psm 0 -l eng+deu $1 result_${1%.} 1>${1%.}"_tesseract.info" 2>&1
	tesseract -psm 0 -l deu $1 result_${1%.} 1>${1%.}"_tesseract.info" 2>&1
	fi
	regexCharacters="\s+Too few characters. Skipping this page\s+"
	file=${1%.*}"_tesseract.info"
	file_content=$( cat "${file}" )
	if [[ " $file_content " =~ $regexCharacters ]]; then
	echo " PAGE: ${1%.*} Blank Page (2/2 - CHARACTER-CHECK): SEEMS TO BE A BLANK PAGE------"
	if [ "$BLANKPROSPECT" = "true" ]; then
	# If both matches for blank page
	echo " PAGE: ${1%.*} is a blank page - deleting $1------"
	rm $1
	fi
	fi
	############################################################
	}

	declare -a pids
	waitProcessing() {
	############################################################
	# parallelizable Process for working on each page
	############################################################
	# usage:
	# waitProcessing
	############################################################
	# return:
	# when all pids are processed, this one will end too
	############################################################
	while [ ${#pids[@]} -ne 0 ]; do
	#echo "Waiting for pids: ${pids[@]}"
	local range=$(eval echo {0..$((${#pids[@]}-1))})
	local i
	for i in $range; do
	if ! kill -0 ${pids[$i]} 2> /dev/null; then
	#echo "Done -- ${pids[$i]}"
	unset pids[$i]
	fi
	done
	pids=("${pids[@]}") # Expunge nulls created by unset.
	sleep 1
	done
	echo "---All Pages Done!"
	}

	addProcess() {
	############################################################
	# remembers process pids of main process
	############################################################
	# usage:
	# addProcess <filename> $!
	############################################################
	# return:
	# <filename> (processed file or deleted file)
	############################################################
	x=${1%.*}
	pid=$2
	pids=(${pids[@]} $pid)
	echo " PAGE: $x (PID $pid)"
	############################################################
	}

	process() {
	############################################################
	# parallelizable Process for working on each page
	############################################################
	# usage:
	# process <filename>
	############################################################
	# return:
	# <filename> (processed file or deleted file)
	############################################################
	x=${1%.*}
	############################################################
	echo " PAGE: $x - BEGIN"
	cp $x.tif ${x}_BEGIN.tif
	echo " PAGE: $x - CROP"
	optimize_crop $x.tif
	if [ ! -f $x.tif ]; then
	continue
	fi
	cp $x.tif ${x}_CROPPED.tif
	echo " PAGE: $x - COLOR CHECK"
	optimize_color $x.tif
	cp $x.tif ${x}_COLOR_OPTIMIZED.tif
	echo " PAGE: $x - BLANK CHECK"
	correct_blank_page $x.tif
	if [ ! -f $x.tif ]; then
	continue
	fi
	cp $x.tif ${x}_NOT_BLANK.tif
	echo " PAGE: $x - ROTATION CHECK"
	correct_orientation $x.tif
	cp $x.tif ${x}_CORRECT_ORIENTATION.tif
	echo " PAGE: $x - CREATE PDF PAGE and reorientate" $orientation
	if [ -f $x".tif" ]; then
	#tiff2pdf -o "final_$x.pdf" -z -u m -p "A4" -F $x".tif"
	## Create PDF with fit to A4 - even in landscape mode - does not work
	#convert -compress Group4 -density 300 -define pdf:fit-page=A4 $x".tif" $x"_single.pdf"
	#convert $x".tif" -resize 595x823^> -gravity center -background white $x"_singles.pdf"

	# convert file to A4 PDF
	tiff2pdf -p a4 -z -u m -t "Scan-$DATETIME" -f -o $x".pdf" $x".tif"
	if [ "$orientation" = "90" ]; then
	# rotate pdf +90
	pdftk $x".pdf" cat 1east output $x"_o.pdf"
	rm $x".pdf"
	mv $x"_o.pdf" $x".pdf"
	elif [ "$orientation" = "270" ]; then
	# was rotated 180 - now add 90
	pdftk $x".pdf" cat 1east output $x"_o.pdf"
	rm $x".pdf"
	mv $x"_o.pdf" $x".pdf"
	fi
	fi



	#echo "---PAGE: $i -PDFTK-----"
	#echo "---PAGE: $i -FLATTEN---"
	#pdftk tiff2pdf_$x.pdf cat output pdftk_$x.pdf flatten
	#pdftk pdftk_$x.pdf dump_data > pdftk_$x.info
	#echo "---PAGE: $i -NORMALIZE-"
	#convert -normalize -density 300 -depth 8 pdftk_$x.pdf $x.png
	# echo "---PAGE: $i -TESSERACT get info-"
	# #FOR: correct_orientation(): tesseract -psm 0 -l deu+eng $x.png result_$x 1>tesseract_$x.info 2>&1
	# tesseract -psm 1 -l deu+eng $x.png result_$x pdf quiet 1>/dev/null 2>&1
	# echo "---PAGE: $i -METADATA--"
	# pdftk result_$x.pdf dump_data > pdftk_$x.info2
	# pdftk result_$x.pdf update_info pdftk_$x.info output final_$x.pdf
	echo "---PAGE: $i -END------"
	}











	############################################################
	# MAIN
	############################################################
	echo "################### Preprocessing ####################"
	i=1
	for page in $(ls -v *.tif); do
	# Create x as number with 4 digits counting up
	x=`printf "%04d" $i`
	cp $page $x.tif
	# Execute parallel worker for each scanned page
	process $x.tif &
	addProcess $x.tif $!
	# Next page
	i=$(expr $i + 1)
	done
	# Wait until all pages are done
	waitProcessing







	echo "############ Combine all pdf to one ###########"
	pdftk *.pdf cat output $outname
	echo "############ OCR complete pdf #################"
	## ocrmypdf mit -l eng+deu ..dann klappts auch mit den Umlauten
	ocrmypdf $outname $startdir/$outname -l eng+deu
	cp $startdir/$outname /home/ra/temp
	#paperwork-shell import $startdir/$outname
	#ranger --selectfile=$startdir/$outname
	ranger --selectfile=/home/ra/temp/$outname
	echo "################ Cleaning Up ##################"
	cd ..
	rm -rf $tmpdir
	cd $startdir






	# REMINDER for BARCODE FUNCTIONALITY
	#convert -density 150 "$i[0]" -quality 100 -sharpen 0x1.0 "$i.jpg" # JPG erzeugen um möglichen Barcode zu suchen
	#barcode=`zbarimg -q --raw "$i.jpg"` # Barcode suchen und in Variable speichern
	#rm "$i.jpg" # Bild wieder löschen