tjluoma/bulkocrpdfs.sh

## bulkocrpdfs.sh
#!/usr/bin/env zsh -f
# Purpose: OCR all of the PDFs in a given directory
#
# From:	Timothy J. Luoma
# Mail:	luomat at gmail dot com
# Date:	2020-03-27

	# ⚠️⚠️⚠️ !!! ⚠️⚠️⚠️ !!! ⚠️⚠️⚠️ !!! ⚠️⚠️⚠️ !!! ⚠️⚠️⚠️ !!! ⚠️⚠️⚠️ !!! ⚠️⚠️⚠️ !!! ⚠️⚠️⚠️ !!! ⚠️⚠️⚠️ !!! ⚠️⚠️⚠️ !!! ⚠️⚠️⚠️ !!! ⚠️⚠️⚠️ !!! ⚠️⚠️⚠️
	# !!! ⚠️⚠️⚠️ you *MUST* change this to the directory where all of your PDFs are that you want to OCR !!!
DIR="$HOME/AllMyPDFs"

	# This is the folder where PDFs will be stored after they are OCR'd
	# You can change this if you want. The directory will be created if needed.
OUT="$HOME/OCRedPDFs"

	# don't change this
NAME="$0:t:r"

	# this is where error messages will be saved
	# you could change/rename this if you wanted to
ERROR_LOG="$HOME/Desktop/${NAME}.errors.log"


#########################################################################################################
###
### You should not need to change anything below this line
###
#########################################################################################################

if [[ -e "$HOME/.path" ]]
then
	source "$HOME/.path"
else
	PATH="$HOME/scripts:/usr/local/bin:/usr/bin:/usr/sbin:/sbin:/bin"
fi

if ((! $+commands[ocrmypdf] ))
then
	echo "$NAME: 'ocrmypdf' is required but not found in $PATH" >>/dev/stderr
	echo "$NAME: The easiest way to install it is with 'brew'." >>/dev/stderr
	exit 2
fi

	# create the directory to store PDFs, if needed
if [[ ! -d "$DIR" ]]
then
	echo "$NAME: Ummmm. '$DIR' does not exist or is not a directory." >>/dev/stderr
	echo "$NAME: You should change the line 'DIR=' in '$0' to point to the directory you want to use." >>/dev/stderr
	exit 2
fi

	# create the directory to store PDFs, if needed
if [[ ! -d "$OUT" ]]
then
	echo "$NAME: Creating '$OUT'..."
	mkdir -p "$OUT"

	if [[ ! -d "$OUT" ]]
	then
		echo "$NAME: failed to create '$OUT' for some reason." >>/dev/stderr
		exit 2
	fi
fi

cd "$DIR"

COUNT='0'

ls -1 | egrep -i '\.pdf$' | while read line
do

	FILE="$line"

		# if we have already OCR'd this PDF then there's no need to do it again
	[[ -e "$OUT/$FILE" ]] && echo "$NAME: '$OUT/$FILE' already exists. Skipping..." && continue

	echo "\n${NAME}: Starting OCR on '$FILE'..."

	ocrmypdf --output-type pdf --skip-text "$FILE" "$OUT/$FILE" 2> "$OUT/$NAME.$FILE.errors.log"

	EXIT="$?"

	if [[ "$EXIT" == "0" ]]
	then
		echo "$NAME: Successfully completed OCR on '$FILE'...\n"

	else
			# put the filename in the error log so we know which file it is about
		echo "\n$NAME: errors from '$FILE':" >> "$ERROR_LOG"

			# add the error message to the Error Log
		cat "$OUT/$NAME.$FILE.errors.log" >> "$ERROR_LOG"

			# get rid of the individual error log (no longer needed)
		mv -vf "$OUT/$NAME.$FILE.errors.log" "$HOME/.Trash/"

			# tell the user we failed
		echo "$NAME: OCR on '$FILE' FAILED (\$EXIT = $EXIT)" | tee -a "$ERROR_LOG"

			# remove any file that was created by the failed process, if it exists
		[[ -e "$OUT/$FILE" ]] && rm -f "$OUT/$FILE"

			# increment error counter
		((COUNT++))
	fi

done

if [[ "$COUNT" = "0" ]]
then
	echo "$NAME: Finished processing all files in '$DIR' with no errors."
	exit 0

elif [[ "$COUNT" = "1" ]]
then
	echo "$NAME: Finished processing all files in '$DIR' with 1 error. See '$ERROR_LOG'"

		# reveal error log in Finder
	open -R "$ERROR_LOG"

	exit 1

else
	echo "$NAME: Finished processing all files in '$DIR' with $COUNT errors. See '$ERROR_LOG'"

		# reveal error log in Finder
	open -R "$ERROR_LOG"

	exit $COUNT

fi

exit 0
#EOF
	#!/usr/bin/env zsh -f
	# Purpose: OCR all of the PDFs in a given directory
	#
	# From: Timothy J. Luoma
	# Mail: luomat at gmail dot com
	# Date: 2020-03-27

	# ⚠️⚠️⚠️ !!! ⚠️⚠️⚠️ !!! ⚠️⚠️⚠️ !!! ⚠️⚠️⚠️ !!! ⚠️⚠️⚠️ !!! ⚠️⚠️⚠️ !!! ⚠️⚠️⚠️ !!! ⚠️⚠️⚠️ !!! ⚠️⚠️⚠️ !!! ⚠️⚠️⚠️ !!! ⚠️⚠️⚠️ !!! ⚠️⚠️⚠️ !!! ⚠️⚠️⚠️
	# !!! ⚠️⚠️⚠️ you MUST change this to the directory where all of your PDFs are that you want to OCR !!!
	DIR="$HOME/AllMyPDFs"

	# This is the folder where PDFs will be stored after they are OCR'd
	# You can change this if you want. The directory will be created if needed.
	OUT="$HOME/OCRedPDFs"

	# don't change this
	NAME="$0:t:r"

	# this is where error messages will be saved
	# you could change/rename this if you wanted to
	ERROR_LOG="$HOME/Desktop/${NAME}.errors.log"



	#########################################################################################################
	###
	### You should not need to change anything below this line
	###
	#########################################################################################################

	if [[ -e "$HOME/.path" ]]
	then
	source "$HOME/.path"
	else
	PATH="$HOME/scripts:/usr/local/bin:/usr/bin:/usr/sbin:/sbin:/bin"
	fi

	if ((! $+commands[ocrmypdf] ))
	then
	echo "$NAME: 'ocrmypdf' is required but not found in $PATH" >>/dev/stderr
	echo "$NAME: The easiest way to install it is with 'brew'." >>/dev/stderr
	exit 2
	fi

	# create the directory to store PDFs, if needed
	if [[ ! -d "$DIR" ]]
	then
	echo "$NAME: Ummmm. '$DIR' does not exist or is not a directory." >>/dev/stderr
	echo "$NAME: You should change the line 'DIR=' in '$0' to point to the directory you want to use." >>/dev/stderr
	exit 2
	fi

	# create the directory to store PDFs, if needed
	if [[ ! -d "$OUT" ]]
	then
	echo "$NAME: Creating '$OUT'..."
	mkdir -p "$OUT"

	if [[ ! -d "$OUT" ]]
	then
	echo "$NAME: failed to create '$OUT' for some reason." >>/dev/stderr
	exit 2
	fi
	fi

	cd "$DIR"

	COUNT='0'

	ls -1 \| egrep -i '\.pdf$' \| while read line
	do

	FILE="$line"

	# if we have already OCR'd this PDF then there's no need to do it again
	[[ -e "$OUT/$FILE" ]] && echo "$NAME: '$OUT/$FILE' already exists. Skipping..." && continue

	echo "\n${NAME}: Starting OCR on '$FILE'..."

	ocrmypdf --output-type pdf --skip-text "$FILE" "$OUT/$FILE" 2> "$OUT/$NAME.$FILE.errors.log"

	EXIT="$?"

	if [[ "$EXIT" == "0" ]]
	then
	echo "$NAME: Successfully completed OCR on '$FILE'...\n"

	else
	# put the filename in the error log so we know which file it is about
	echo "\n$NAME: errors from '$FILE':" >> "$ERROR_LOG"

	# add the error message to the Error Log
	cat "$OUT/$NAME.$FILE.errors.log" >> "$ERROR_LOG"

	# get rid of the individual error log (no longer needed)
	mv -vf "$OUT/$NAME.$FILE.errors.log" "$HOME/.Trash/"

	# tell the user we failed
	echo "$NAME: OCR on '$FILE' FAILED (\$EXIT = $EXIT)" \| tee -a "$ERROR_LOG"

	# remove any file that was created by the failed process, if it exists
	[[ -e "$OUT/$FILE" ]] && rm -f "$OUT/$FILE"

	# increment error counter
	((COUNT++))
	fi

	done

	if [[ "$COUNT" = "0" ]]
	then
	echo "$NAME: Finished processing all files in '$DIR' with no errors."
	exit 0

	elif [[ "$COUNT" = "1" ]]
	then
	echo "$NAME: Finished processing all files in '$DIR' with 1 error. See '$ERROR_LOG'"

	# reveal error log in Finder
	open -R "$ERROR_LOG"

	exit 1

	else
	echo "$NAME: Finished processing all files in '$DIR' with $COUNT errors. See '$ERROR_LOG'"

	# reveal error log in Finder
	open -R "$ERROR_LOG"

	exit $COUNT

	fi

	exit 0
	#EOF