rhoit/part2.sh

## part2.sh
#!/bin/bash

cd run

#tesseract npl.preeti.exp0.tif

# NOTE: for 3.0.4 version and above
export TESSDATA_PREFIX=/usr/share/tessdata

# NOTE: make sure the tessaract is in your run $PATH
# open the terminal at the folder where you have the tif image
lang="urd"
font="nas"
file_name="${lang}2.$font.exp0"
echo $file_name

rm -f "$file_name".{box,tr} unicharset

if [[ ! -e "$file_name.box" ]]; then
    echo "# CREATE BOXFILE"
    tesseract "$file_name.tif" "$file_name" batch.nochop makebox
else
    echo "'$file_name.tif' already exists"
fi
exit

echo -e "\n# USE COWBOXER"
rm "$file_name".box
cp "$file_name".box_edited "$file_name".box

if [[ ! -e "$file_name.tr" ]]; then
    echo -e "\n# TRAIN BOX FILE"
    tesseract "$file_name".tif "$file_name" nobatch box.train #.stderr
else
    echo "'$file_name.tr' already exist"
fi


# NOTE: this will take out unique character set from box file
if [[ ! -e unicharset ]]; then
    echo -e "\nUNICHARSET"
    unicharset_extractor "$file_name".box
else
    echo "'unicharset' already exist"
fi

# <font-name> <italic> <bold> <fixed> <serif> <fraktur>
echo -e "\n# MAKING font_properties.txt"
echo -n "nas 0 0 0 0 0" > font_properties.txt
cat font_properties.txt
echo
file font_properties.txt

# clustering
echo -e "\n# CLUSTERING"
mftraining -F font_properties.txt -U unicharset -O "$lang".unicharset "$file_name".tr

# cn train
echo -e "\n# CN TRAIN"
cntraining "$file_name".tr

## NOTE: combine the files                                                           │
# It will output eh lang.traineddata
echo -e "\n# COMBINE FILE"
combine_tessdata $lang.

echo -e "\n# Recognition"
tesseract "$file_name".tif tessaract_output -l eng

#- Move eng1.traineddata to Tessaract-OCR\tessdata\ directory
#- change directory to .. Tesserac-OCR\ and run recognition command
# $ tesseract image.tif output -l lang
# e.g.
# $ tessaract eurotext.tif -l eng1
	#!/bin/bash

	cd run

	#tesseract npl.preeti.exp0.tif

	# NOTE: for 3.0.4 version and above
	export TESSDATA_PREFIX=/usr/share/tessdata

	# NOTE: make sure the tessaract is in your run $PATH
	# open the terminal at the folder where you have the tif image
	lang="urd"
	font="nas"
	file_name="${lang}2.$font.exp0"
	echo $file_name

	rm -f "$file_name".{box,tr} unicharset

	if [[ ! -e "$file_name.box" ]]; then
	echo "# CREATE BOXFILE"
	tesseract "$file_name.tif" "$file_name" batch.nochop makebox
	else
	echo "'$file_name.tif' already exists"
	fi
	exit

	echo -e "\n# USE COWBOXER"
	rm "$file_name".box
	cp "$file_name".box_edited "$file_name".box

	if [[ ! -e "$file_name.tr" ]]; then
	echo -e "\n# TRAIN BOX FILE"
	tesseract "$file_name".tif "$file_name" nobatch box.train #.stderr
	else
	echo "'$file_name.tr' already exist"
	fi


	# NOTE: this will take out unique character set from box file
	if [[ ! -e unicharset ]]; then
	echo -e "\nUNICHARSET"
	unicharset_extractor "$file_name".box
	else
	echo "'unicharset' already exist"
	fi

	# <font-name> <italic> <bold> <fixed> <serif> <fraktur>
	echo -e "\n# MAKING font_properties.txt"
	echo -n "nas 0 0 0 0 0" > font_properties.txt
	cat font_properties.txt
	echo
	file font_properties.txt

	# clustering
	echo -e "\n# CLUSTERING"
	mftraining -F font_properties.txt -U unicharset -O "$lang".unicharset "$file_name".tr

	# cn train
	echo -e "\n# CN TRAIN"
	cntraining "$file_name".tr

	## NOTE: combine the files │
	# It will output eh lang.traineddata
	echo -e "\n# COMBINE FILE"
	combine_tessdata $lang.

	echo -e "\n# Recognition"
	tesseract "$file_name".tif tessaract_output -l eng

	#- Move eng1.traineddata to Tessaract-OCR\tessdata\ directory
	#- change directory to .. Tesserac-OCR\ and run recognition command
	# $ tesseract image.tif output -l lang
	# e.g.
	# $ tessaract eurotext.tif -l eng1