Created
April 26, 2021 02:52
-
-
Save mattyyzac/c49d55ca1a44ecc1161c3230b06db54e to your computer and use it in GitHub Desktop.
ocr by opencv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using Emgu.CV; | |
using Emgu.CV.CvEnum; | |
using Emgu.CV.OCR; | |
using Emgu.CV.Structure; | |
using Emgu.CV.Text; | |
using Emgu.CV.Util; | |
using Microsoft.AspNetCore.Mvc; | |
using Microsoft.Extensions.Options; | |
using System; | |
using System.Collections.Generic; | |
using System.Drawing; | |
using System.IO; | |
using System.Linq; | |
using System.Net.Http; | |
using System.Text; | |
namespace OpenCVOcrApi.Apis | |
{ | |
[Route("api/[controller]")] | |
[ApiController] | |
public class OcrController : ControllerBase | |
{ | |
private string _numberPlateFilePath; | |
private HttpClient _httpClient; | |
private string _classifierDataPath; | |
private int _ocrMode; | |
private Tesseract _ocr; | |
public OcrController(IOptionsSnapshot<AppSettings> options, IHttpClientFactory httpClientFactory) | |
{ | |
_classifierDataPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "classifier"); | |
_numberPlateFilePath = options?.Value?.NumberPlateFilePath; | |
_httpClient = httpClientFactory.CreateClient(); | |
} | |
[HttpGet, Route("{ocrMode?}")] | |
public IActionResult Get(int ocrMode = 1) | |
{ | |
_ocrMode = ocrMode; | |
if (InitOcr(Tesseract.DefaultTesseractDirectory, "eng", OcrEngineMode.TesseractLstmCombined)) | |
{ | |
if (new FileInfo(_numberPlateFilePath).Exists) | |
{ | |
var img = new Mat(_numberPlateFilePath); | |
var ret = OcrImage(img, _classifierDataPath); | |
return Ok(ret); | |
} | |
return BadRequest($"{_numberPlateFilePath} did NOT exist."); | |
} | |
return Ok(); | |
} | |
/// <summary> | |
/// download lang file from github repo | |
/// </summary> | |
private void TesseractDownloadLangFile(string folder, string lang) | |
{ | |
if (!Directory.Exists(folder)) | |
{ | |
Directory.CreateDirectory(folder); | |
} | |
string dest = Path.Combine(folder, $"{lang}.traineddata"); | |
if (!System.IO.File.Exists(dest)) | |
{ | |
var source = Tesseract.GetLangFileUrl(lang); | |
Console.WriteLine($"Downloading file from '{source}' to '{dest}'"); | |
using var fileStream = _httpClient.GetStreamAsync(source).Result; | |
using var f = System.IO.File.Create(dest); | |
fileStream.CopyTo(f); | |
fileStream.Flush(); | |
Console.WriteLine("Download completed"); | |
} | |
} | |
private bool InitOcr(string path, string lang, OcrEngineMode mode) | |
{ | |
try | |
{ | |
if (_ocr != null) | |
{ | |
_ocr.Dispose(); | |
_ocr = null; | |
} | |
if (string.IsNullOrEmpty(path)) | |
path = Tesseract.DefaultTesseractDirectory; | |
TesseractDownloadLangFile(path, lang); | |
TesseractDownloadLangFile(path, "osd"); | |
_ocr = new Tesseract(path, lang, mode); | |
Console.WriteLine($"{lang} : {mode} (tesseract version {Tesseract.VersionString})"); | |
return true; | |
} | |
catch (Exception e) | |
{ | |
_ocr = null; | |
Console.WriteLine(e.Message); | |
Console.WriteLine("Failed to initialize tesseract OCR engine"); | |
return false; | |
} | |
} | |
/// <summary> | |
/// The OCR mode | |
/// </summary> | |
private enum OCRMode | |
{ | |
/// <summary> | |
/// Perform a full page OCR | |
/// </summary> | |
FullPage, | |
/// <summary> | |
/// Detect the text region before applying OCR. | |
/// </summary> | |
TextDetection | |
} | |
private OCRMode Mode => | |
_ocrMode == 0 ? OCRMode.FullPage : OCRMode.TextDetection; | |
private static Rectangle ScaleRectangle(Rectangle r, double scale) | |
{ | |
double centerX = r.Location.X + r.Width / 2.0; | |
double centerY = r.Location.Y + r.Height / 2.0; | |
double newWidth = Math.Round(r.Width * scale); | |
double newHeight = Math.Round(r.Height * scale); | |
return new Rectangle((int)Math.Round(centerX - newWidth / 2.0), (int)Math.Round(centerY - newHeight / 2.0), | |
(int)newWidth, (int)newHeight); | |
} | |
private static string OcrImage(Tesseract ocr, Mat image, OCRMode mode, Mat imageColor, string classifierDataPath) | |
{ | |
var drawCharColor = new Bgr(Color.Red); | |
if (image.NumberOfChannels == 1) | |
CvInvoke.CvtColor(image, imageColor, ColorConversion.Gray2Bgr); | |
else | |
image.CopyTo(imageColor); | |
if (mode == OCRMode.FullPage) | |
{ | |
ocr.SetImage(imageColor); | |
if (ocr.Recognize() != 0) | |
throw new Exception("Failed to recognizer image"); | |
var characters = ocr.GetCharacters(); | |
if (characters?.Any() == true) | |
{ | |
var imgGrey = new Mat(); | |
CvInvoke.CvtColor(image, imgGrey, ColorConversion.Bgr2Gray); | |
var imgThresholded = new Mat(); | |
CvInvoke.Threshold(imgGrey, imgThresholded, 65, 255, ThresholdType.Binary); | |
ocr.SetImage(imgThresholded); | |
characters = ocr.GetCharacters(); | |
imageColor = imgThresholded; | |
if (characters?.Any() == true) | |
{ | |
CvInvoke.Threshold(image, imgThresholded, 190, 255, ThresholdType.Binary); | |
ocr.SetImage(imgThresholded); | |
characters = ocr.GetCharacters(); | |
imageColor = imgThresholded; | |
} | |
} | |
foreach (Tesseract.Character c in characters) | |
{ | |
CvInvoke.Rectangle(imageColor, c.Region, drawCharColor.MCvScalar); | |
} | |
return ocr.GetUTF8Text(); | |
} | |
else | |
{ | |
var checkInvert = true; | |
Rectangle[] regions; | |
var erf1 = Path.Combine(classifierDataPath, "trained_classifierNM1.xml"); | |
var erf2 = Path.Combine(classifierDataPath, "trained_classifierNM2.xml"); | |
using var er1 = new ERFilterNM1(erf1, 8, 0.00025f, 0.13f, 0.4f, true, 0.1f); | |
using var er2 = new ERFilterNM2(erf2, 0.3f); | |
var channelCount = image.NumberOfChannels; | |
var channels = new UMat[checkInvert ? channelCount * 2 : channelCount]; | |
for (var i = 0; i < channelCount; i++) | |
{ | |
var c = new UMat(); | |
CvInvoke.ExtractChannel(image, c, i); | |
channels[i] = c; | |
} | |
if (checkInvert) | |
{ | |
for (var i = 0; i < channelCount; i++) | |
{ | |
var c = new UMat(); | |
CvInvoke.BitwiseNot(channels[i], c); | |
channels[i + channelCount] = c; | |
} | |
} | |
var regionVecs = new VectorOfERStat[channels.Length]; | |
for (var i = 0; i < regionVecs.Length; i++) | |
{ | |
regionVecs[i] = new VectorOfERStat(); | |
} | |
try | |
{ | |
for (var i = 0; i < channels.Length; i++) | |
{ | |
er1.Run(channels[i], regionVecs[i]); | |
er2.Run(channels[i], regionVecs[i]); | |
} | |
using var vm = new VectorOfUMat(channels); | |
var classifier = Path.Combine(classifierDataPath, "trained_classifier_erGrouping.xml"); | |
regions = ERFilter.ERGrouping(image, vm, regionVecs, ERFilter.GroupingMethod.OrientationHoriz, classifier, 0.5f); | |
} | |
finally | |
{ | |
foreach (var tmp in channels) | |
{ | |
if (tmp is not null) tmp.Dispose(); | |
} | |
foreach (var tmp in regionVecs) | |
{ | |
if (tmp is not null) tmp.Dispose(); | |
} | |
} | |
var imageRegion = new Rectangle(Point.Empty, imageColor.Size); | |
for (var i = 0; i < regions.Length; i++) | |
{ | |
var r = ScaleRectangle(regions[i], 1.1); | |
r.Intersect(imageRegion); | |
regions[i] = r; | |
} | |
var allChars = new List<Tesseract.Character>(); | |
var allText = new StringBuilder(); | |
foreach (var rect in regions) | |
{ | |
using var region = new Mat(image, rect); | |
ocr.SetImage(region); | |
if (ocr.Recognize() != 0) | |
throw new Exception("Failed to recognize image"); | |
var characters = ocr.GetCharacters(); | |
//convert the coordinates from the local region to global | |
for (var i = 0; i < characters.Length; i++) | |
{ | |
var charRegion = characters[i].Region; | |
charRegion.Offset(rect.Location); | |
characters[i].Region = charRegion; | |
} | |
allChars.AddRange(characters); | |
allText.Append(ocr.GetUTF8Text() + Environment.NewLine); | |
} | |
var drawRegionColor = new Bgr(Color.Red); | |
foreach (var rect in regions) | |
{ | |
CvInvoke.Rectangle(imageColor, rect, drawRegionColor.MCvScalar); | |
} | |
foreach (Tesseract.Character c in allChars) | |
{ | |
CvInvoke.Rectangle(imageColor, c.Region, drawCharColor.MCvScalar); | |
} | |
return allText.ToString(); | |
} | |
} | |
private string OcrImage(Mat source, string classifierDataPath) | |
{ | |
try | |
{ | |
var result = new Mat(); | |
var ocredText = OcrImage(_ocr, source, Mode, result, classifierDataPath); | |
if (Mode == OCRMode.FullPage) | |
{ | |
return _ocr.GetHOCRText(); | |
} | |
return ocredText; | |
} | |
catch | |
{ | |
throw; | |
} | |
} | |
private string OcrImg() | |
{ | |
//old ref | |
//https://leonwoo-tech.blogspot.com/2018/06/tip-how-to-properly-setup-opencv-ocr-to.html | |
using var image = new Image<Bgr, byte>(_numberPlateFilePath); | |
using var tesseractOcrProvider = new Tesseract(Tesseract.DefaultTesseractDirectory, "eng", OcrEngineMode.Default); | |
tesseractOcrProvider.SetImage(image); | |
tesseractOcrProvider.Recognize(); | |
var text = tesseractOcrProvider.GetBoxText().TrimEnd(); | |
return text; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment