Created
March 14, 2018 17:14
-
-
Save Benehiko/734e4ceb24bf62f0fa2a24865afe0b1d to your computer and use it in GitHub Desktop.
Tesseract auto build tool
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
#This script is just for the python tesseract-ocr wrapper | |
#First install all pre-requisits | |
sudo apt-get install g++ # or clang++ (presumably) | |
sudo apt-get install autoconf automake libtool | |
sudo apt-get install autoconf-archive | |
sudo apt-get install pkg-config | |
sudo apt-get install libpng-dev | |
sudo apt-get install libjpeg8-dev | |
sudo apt-get install libtiff5-dev | |
sudo apt-get install zlib1g-dev | |
#Install Leptonica | |
wget http://www.leptonica.org/source/leptonica-1.75.3.tar.gz | |
tar -xvzf leptonica-1.75.3.tar.gz | |
cd leptonica-1.75.3 | |
./configure | |
make | |
sudo make install | |
cd - | |
#Clone Tesseract from Git | |
#First make sure git is installed | |
sudo apt-get install git | |
git clone https://github.com/tesseract-ocr/tesseract.git tesseract-ocr | |
#Compile Tesseract | |
cd tesseract-ocr | |
./autogen.sh | |
./configure | |
make | |
sudo make install | |
sudo ldconfig | |
#get wget | |
sudo apt-get install wget | |
mkdir lang | |
cd lang/ | |
#Get English supported language | |
wget https://github.com/tesseract-ocr/tessdata/raw/4.00/eng.traineddata | |
cd - | |
#Get Tesseract Java wrapper | |
git clone https://github.com/nguyenq/tess4j.git | |
echo "You're good to go!" | |
echo "Netbeans -> Find and add all the required JAR files, including jai_imageio.jar, jna.jar, commons-io-2.4.jar, lept4j.jar, and tess4j.jar." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment