Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
ocr by opencv
using Emgu.CV;
using Emgu.CV.CvEnum;
using Emgu.CV.OCR;
using Emgu.CV.Structure;
using Emgu.CV.Text;
using Emgu.CV.Util;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Extensions.Options;
using System;
using System.Collections.Generic;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Net.Http;
using System.Text;
namespace OpenCVOcrApi.Apis
{
[Route("api/[controller]")]
[ApiController]
public class OcrController : ControllerBase
{
private string _numberPlateFilePath;
private HttpClient _httpClient;
private string _classifierDataPath;
private int _ocrMode;
private Tesseract _ocr;
public OcrController(IOptionsSnapshot<AppSettings> options, IHttpClientFactory httpClientFactory)
{
_classifierDataPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "classifier");
_numberPlateFilePath = options?.Value?.NumberPlateFilePath;
_httpClient = httpClientFactory.CreateClient();
}
[HttpGet, Route("{ocrMode?}")]
public IActionResult Get(int ocrMode = 1)
{
_ocrMode = ocrMode;
if (InitOcr(Tesseract.DefaultTesseractDirectory, "eng", OcrEngineMode.TesseractLstmCombined))
{
if (new FileInfo(_numberPlateFilePath).Exists)
{
var img = new Mat(_numberPlateFilePath);
var ret = OcrImage(img, _classifierDataPath);
return Ok(ret);
}
return BadRequest($"{_numberPlateFilePath} did NOT exist.");
}
return Ok();
}
/// <summary>
/// download lang file from github repo
/// </summary>
private void TesseractDownloadLangFile(string folder, string lang)
{
if (!Directory.Exists(folder))
{
Directory.CreateDirectory(folder);
}
string dest = Path.Combine(folder, $"{lang}.traineddata");
if (!System.IO.File.Exists(dest))
{
var source = Tesseract.GetLangFileUrl(lang);
Console.WriteLine($"Downloading file from '{source}' to '{dest}'");
using var fileStream = _httpClient.GetStreamAsync(source).Result;
using var f = System.IO.File.Create(dest);
fileStream.CopyTo(f);
fileStream.Flush();
Console.WriteLine("Download completed");
}
}
private bool InitOcr(string path, string lang, OcrEngineMode mode)
{
try
{
if (_ocr != null)
{
_ocr.Dispose();
_ocr = null;
}
if (string.IsNullOrEmpty(path))
path = Tesseract.DefaultTesseractDirectory;
TesseractDownloadLangFile(path, lang);
TesseractDownloadLangFile(path, "osd");
_ocr = new Tesseract(path, lang, mode);
Console.WriteLine($"{lang} : {mode} (tesseract version {Tesseract.VersionString})");
return true;
}
catch (Exception e)
{
_ocr = null;
Console.WriteLine(e.Message);
Console.WriteLine("Failed to initialize tesseract OCR engine");
return false;
}
}
/// <summary>
/// The OCR mode
/// </summary>
private enum OCRMode
{
/// <summary>
/// Perform a full page OCR
/// </summary>
FullPage,
/// <summary>
/// Detect the text region before applying OCR.
/// </summary>
TextDetection
}
private OCRMode Mode =>
_ocrMode == 0 ? OCRMode.FullPage : OCRMode.TextDetection;
private static Rectangle ScaleRectangle(Rectangle r, double scale)
{
double centerX = r.Location.X + r.Width / 2.0;
double centerY = r.Location.Y + r.Height / 2.0;
double newWidth = Math.Round(r.Width * scale);
double newHeight = Math.Round(r.Height * scale);
return new Rectangle((int)Math.Round(centerX - newWidth / 2.0), (int)Math.Round(centerY - newHeight / 2.0),
(int)newWidth, (int)newHeight);
}
private static string OcrImage(Tesseract ocr, Mat image, OCRMode mode, Mat imageColor, string classifierDataPath)
{
var drawCharColor = new Bgr(Color.Red);
if (image.NumberOfChannels == 1)
CvInvoke.CvtColor(image, imageColor, ColorConversion.Gray2Bgr);
else
image.CopyTo(imageColor);
if (mode == OCRMode.FullPage)
{
ocr.SetImage(imageColor);
if (ocr.Recognize() != 0)
throw new Exception("Failed to recognizer image");
var characters = ocr.GetCharacters();
if (characters?.Any() == true)
{
var imgGrey = new Mat();
CvInvoke.CvtColor(image, imgGrey, ColorConversion.Bgr2Gray);
var imgThresholded = new Mat();
CvInvoke.Threshold(imgGrey, imgThresholded, 65, 255, ThresholdType.Binary);
ocr.SetImage(imgThresholded);
characters = ocr.GetCharacters();
imageColor = imgThresholded;
if (characters?.Any() == true)
{
CvInvoke.Threshold(image, imgThresholded, 190, 255, ThresholdType.Binary);
ocr.SetImage(imgThresholded);
characters = ocr.GetCharacters();
imageColor = imgThresholded;
}
}
foreach (Tesseract.Character c in characters)
{
CvInvoke.Rectangle(imageColor, c.Region, drawCharColor.MCvScalar);
}
return ocr.GetUTF8Text();
}
else
{
var checkInvert = true;
Rectangle[] regions;
var erf1 = Path.Combine(classifierDataPath, "trained_classifierNM1.xml");
var erf2 = Path.Combine(classifierDataPath, "trained_classifierNM2.xml");
using var er1 = new ERFilterNM1(erf1, 8, 0.00025f, 0.13f, 0.4f, true, 0.1f);
using var er2 = new ERFilterNM2(erf2, 0.3f);
var channelCount = image.NumberOfChannels;
var channels = new UMat[checkInvert ? channelCount * 2 : channelCount];
for (var i = 0; i < channelCount; i++)
{
var c = new UMat();
CvInvoke.ExtractChannel(image, c, i);
channels[i] = c;
}
if (checkInvert)
{
for (var i = 0; i < channelCount; i++)
{
var c = new UMat();
CvInvoke.BitwiseNot(channels[i], c);
channels[i + channelCount] = c;
}
}
var regionVecs = new VectorOfERStat[channels.Length];
for (var i = 0; i < regionVecs.Length; i++)
{
regionVecs[i] = new VectorOfERStat();
}
try
{
for (var i = 0; i < channels.Length; i++)
{
er1.Run(channels[i], regionVecs[i]);
er2.Run(channels[i], regionVecs[i]);
}
using var vm = new VectorOfUMat(channels);
var classifier = Path.Combine(classifierDataPath, "trained_classifier_erGrouping.xml");
regions = ERFilter.ERGrouping(image, vm, regionVecs, ERFilter.GroupingMethod.OrientationHoriz, classifier, 0.5f);
}
finally
{
foreach (var tmp in channels)
{
if (tmp is not null) tmp.Dispose();
}
foreach (var tmp in regionVecs)
{
if (tmp is not null) tmp.Dispose();
}
}
var imageRegion = new Rectangle(Point.Empty, imageColor.Size);
for (var i = 0; i < regions.Length; i++)
{
var r = ScaleRectangle(regions[i], 1.1);
r.Intersect(imageRegion);
regions[i] = r;
}
var allChars = new List<Tesseract.Character>();
var allText = new StringBuilder();
foreach (var rect in regions)
{
using var region = new Mat(image, rect);
ocr.SetImage(region);
if (ocr.Recognize() != 0)
throw new Exception("Failed to recognize image");
var characters = ocr.GetCharacters();
//convert the coordinates from the local region to global
for (var i = 0; i < characters.Length; i++)
{
var charRegion = characters[i].Region;
charRegion.Offset(rect.Location);
characters[i].Region = charRegion;
}
allChars.AddRange(characters);
allText.Append(ocr.GetUTF8Text() + Environment.NewLine);
}
var drawRegionColor = new Bgr(Color.Red);
foreach (var rect in regions)
{
CvInvoke.Rectangle(imageColor, rect, drawRegionColor.MCvScalar);
}
foreach (Tesseract.Character c in allChars)
{
CvInvoke.Rectangle(imageColor, c.Region, drawCharColor.MCvScalar);
}
return allText.ToString();
}
}
private string OcrImage(Mat source, string classifierDataPath)
{
try
{
var result = new Mat();
var ocredText = OcrImage(_ocr, source, Mode, result, classifierDataPath);
if (Mode == OCRMode.FullPage)
{
return _ocr.GetHOCRText();
}
return ocredText;
}
catch
{
throw;
}
}
private string OcrImg()
{
//old ref
//https://leonwoo-tech.blogspot.com/2018/06/tip-how-to-properly-setup-opencv-ocr-to.html
using var image = new Image<Bgr, byte>(_numberPlateFilePath);
using var tesseractOcrProvider = new Tesseract(Tesseract.DefaultTesseractDirectory, "eng", OcrEngineMode.Default);
tesseractOcrProvider.SetImage(image);
tesseractOcrProvider.Recognize();
var text = tesseractOcrProvider.GetBoxText().TrimEnd();
return text;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment