Skip to content

Instantly share code, notes, and snippets.

@yatt
Created April 12, 2011 12:49
Show Gist options
  • Star 9 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save yatt/915443 to your computer and use it in GitHub Desktop.
Save yatt/915443 to your computer and use it in GitHub Desktop.
simple c# class for Optical Character Recognition(OCR) using tesseract (http://code.google.com/p/tesseract-ocr/) usage: pass .exe path to constructor
// usage:
//
// TesseractOCR ocr = TesseractOCR(@"C:\bin\tesseract.exe");
// string result = ocr.OCRFromBitmap(bmp);
// textBox1.Text = result;
//
using System;
using System.IO;
using System.Diagnostics;
using System.Drawing;
public class TesseractOCR
{
private string commandpath;
private string outpath;
private string tmppath;
public TesseractOCR(string commandpath)
{
this.commandpath = commandpath;
tmppath = System.Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData) + @"\out.tif";
outpath = System.Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData) + @"\out.txt";
}
public string analyze(string filename)
{
string args = filename + " " + outpath.Replace(".txt", "");
ProcessStartInfo startinfo = new ProcessStartInfo(commandpath, args);
startinfo.CreateNoWindow = true;
startinfo.UseShellExecute = false;
Process.Start(startinfo).WaitForExit();
string ret = "";
using (StreamReader r = new StreamReader(outpath))
{
string content = r.ReadToEnd();
ret = content;
}
File.Delete(outpath);
return ret;
}
public string OCRFromBitmap(Bitmap bmp)
{
bmp.Save(tmppath, System.Drawing.Imaging.ImageFormat.Tiff);
string ret = analyze(tmppath);
File.Delete(tmppath);
return ret;
}
public string OCRFromFile(string filename)
{
return analyze(filename);
}
}
@saakshid
Copy link

saakshid commented Apr 7, 2014

Shouldn't there be a "new" keyword in the usage before TessercatOCR("path")?

@neperz
Copy link

neperz commented May 4, 2014

public class TesseractOCR
{
private string commandpath;
private string outpath;
private string tmppath;

    public TesseractOCR(string commandpath, string tempDir)
    {
        this.commandpath = commandpath;
        var guidImage = Guid.NewGuid();

        tmppath = tempDir + @"\" + guidImage + ".tif";
        outpath = tempDir + @"\" + guidImage + ".txt";
    }
    public string analyze(string filename)
    {
        var timeout = 1000 * 60;
        string args = filename + " " + outpath.Replace(".txt", "");


        string ret = "";



        using (Process process = new Process())
        {
            process.StartInfo.FileName = commandpath;
            process.StartInfo.Arguments = args;
            process.StartInfo.UseShellExecute = false;
            process.StartInfo.RedirectStandardOutput = true;
            process.StartInfo.RedirectStandardError = true;

            StringBuilder output = new StringBuilder();
            StringBuilder error = new StringBuilder();

            using (AutoResetEvent outputWaitHandle = new AutoResetEvent(false))
            using (AutoResetEvent errorWaitHandle = new AutoResetEvent(false))
            {
                process.OutputDataReceived += (sender, e) =>
                {
                    if (e.Data == null)
                    {
                        outputWaitHandle.Set();
                    }
                    else
                    {
                        output.AppendLine(e.Data);
                    }
                };
                process.ErrorDataReceived += (sender, e) =>
                {
                    if (e.Data == null)
                    {
                        errorWaitHandle.Set();
                    }
                    else
                    {
                        error.AppendLine(e.Data);
                    }
                };

                process.Start();

                process.BeginOutputReadLine();
                process.BeginErrorReadLine();

                if (process.WaitForExit(timeout) &&
                    outputWaitHandle.WaitOne(timeout) &&
                    errorWaitHandle.WaitOne(timeout))
                {
                    using (StreamReader r = new StreamReader(outpath))
                    {
                        string content = r.ReadToEnd();
                        ret = content;
                    }
                    File.Delete(outpath);
                }
                else
                {                            
                        throw new Exception("Time out" + error.ToString());
                }
            }
        }
        return ret.Trim();
    }
    public string OCRFromBitmap(Bitmap bmp)
    {
        bmp.Save(tmppath, System.Drawing.Imaging.ImageFormat.Tiff);
        string ret = analyze(tmppath);
        File.Delete(tmppath);
        return ret;
    }
    public string OCRFromFile(string filename)
    {
        return analyze(filename);
    }
}

@natank1
Copy link

natank1 commented Aug 5, 2014

Hello

It it throws an exception for not having the outpath, particularly this code does not work (I have tried different types of outpath)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment