Skip to content

Instantly share code, notes, and snippets.

@yatt
Created April 12, 2011 12:49
Show Gist options
  • Save yatt/915443 to your computer and use it in GitHub Desktop.
Save yatt/915443 to your computer and use it in GitHub Desktop.
simple c# class for Optical Character Recognition(OCR) using tesseract (http://code.google.com/p/tesseract-ocr/) usage: pass .exe path to constructor
// usage:
//
// TesseractOCR ocr = TesseractOCR(@"C:\bin\tesseract.exe");
// string result = ocr.OCRFromBitmap(bmp);
// textBox1.Text = result;
//
using System;
using System.IO;
using System.Diagnostics;
using System.Drawing;
public class TesseractOCR
{
private string commandpath;
private string outpath;
private string tmppath;
public TesseractOCR(string commandpath)
{
this.commandpath = commandpath;
tmppath = System.Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData) + @"\out.tif";
outpath = System.Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData) + @"\out.txt";
}
public string analyze(string filename)
{
string args = filename + " " + outpath.Replace(".txt", "");
ProcessStartInfo startinfo = new ProcessStartInfo(commandpath, args);
startinfo.CreateNoWindow = true;
startinfo.UseShellExecute = false;
Process.Start(startinfo).WaitForExit();
string ret = "";
using (StreamReader r = new StreamReader(outpath))
{
string content = r.ReadToEnd();
ret = content;
}
File.Delete(outpath);
return ret;
}
public string OCRFromBitmap(Bitmap bmp)
{
bmp.Save(tmppath, System.Drawing.Imaging.ImageFormat.Tiff);
string ret = analyze(tmppath);
File.Delete(tmppath);
return ret;
}
public string OCRFromFile(string filename)
{
return analyze(filename);
}
}
@natank1
Copy link

natank1 commented Aug 5, 2014

Hello

It it throws an exception for not having the outpath, particularly this code does not work (I have tried different types of outpath)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment