Last active
October 11, 2021 12:56
-
-
Save ludwo/c091ed6261d26654c8b71949d89f8142 to your computer and use it in GitHub Desktop.
EAST Text Detector. OpenCvSharp version of https://github.com/opencv/opencv/blob/master/samples/dnn/text_detection.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// <summary> | |
/// Read text from image. | |
/// </summary> | |
/// <see cref="https://github.com/opencv/opencv/blob/master/samples/dnn/text_detection.cpp"/> | |
/// <param name="fileName">Name of the image file.</param> | |
/// <param name="loaderFactory">The loader factory.</param> | |
/// <returns>Scanned text.</returns> | |
public string ReadAllText(string fileName, ITextDocumentLoaderFactory loaderFactory) | |
{ | |
// Load network. | |
using (Net net = CvDnn.ReadNet(Path.GetFullPath(EastModelFile))) | |
using (Mat img = new Mat(fileName)) | |
// Prepare input image | |
using (var blob = CvDnn.BlobFromImage(img, 1.0, new Size(InputWidth, InputHeight), new Scalar(123.68, 116.78, 103.94), true, false)) | |
{ | |
// Forward Pass | |
// Now that we have prepared the input, we will pass it through the network. There are two outputs of the network. | |
// One specifies the geometry of the Text-box and the other specifies the confidence score of the detected box. | |
// These are given by the layers : | |
// feature_fusion/concat_3 | |
// feature_fusion/Conv_7/Sigmoid | |
var outputBlobNames = new string[] { "feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3" }; | |
var outputBlobs = outputBlobNames.Select(_ => new Mat()).ToArray(); | |
net.SetInput(blob); | |
net.Forward(outputBlobs, outputBlobNames); | |
Mat scores = outputBlobs[0]; | |
Mat geometry = outputBlobs[1]; | |
// Decode predicted bounding boxes (decode the positions of the text boxes along with their orientation) | |
this.Decode(scores, geometry, ConfThreshold, out var boxes, out var confidences); | |
// Apply non-maximum suppression procedure for filtering out the false positives and get the final predictions | |
CvDnn.NMSBoxes(boxes, confidences, ConfThreshold, NmsThreshold, out var indices); | |
// Render detections. | |
Point2f ratio = new Point2f((float)img.Cols / InputWidth, (float)img.Rows / InputHeight); | |
for (var i = 0; i < indices.Length; ++i) | |
{ | |
RotatedRect box = boxes[indices[i]]; | |
Point2f[] vertices = box.Points(); | |
for (int j = 0; j < 4; ++j) | |
{ | |
vertices[j].X *= ratio.X; | |
vertices[j].Y *= ratio.Y; | |
} | |
for (int j = 0; j < 4; ++j) | |
{ | |
Cv2.Line(img, (int)vertices[j].X, (int)vertices[j].Y, (int)vertices[(j + 1) % 4].X, (int)vertices[(j + 1) % 4].Y, new Scalar(0, 255, 0), 3); | |
} | |
} | |
// Optional - Save detections | |
img.SaveImage(Path.Combine(Path.GetDirectoryName(fileName), $"{Path.GetFileNameWithoutExtension(fileName)}_east.jpg")); | |
// return GetText(img, ...) | |
return string.Empty; | |
} | |
} | |
private unsafe void Decode(Mat scores, Mat geometry, float confThreshold, out IList<RotatedRect> boxes, out IList<float> confidences) | |
{ | |
boxes = new List<RotatedRect>(); | |
confidences = new List<float>(); | |
if ((scores == null || scores.Dims() != 4 || scores.Size(0) != 1 || scores.Size(1) != 1) || | |
(geometry == null || geometry.Dims() != 4 || geometry.Size(0) != 1 || geometry.Size(1) != 5) || | |
(scores.Size(2) != geometry.Size(2) || scores.Size(3) != geometry.Size(3))) | |
{ | |
return; | |
} | |
int height = scores.Size(2); | |
int width = scores.Size(3); | |
for (int y = 0; y < height; ++y) | |
{ | |
var scoresData = new ReadOnlySpan<float>((void*)scores.Ptr(0, 0, y), height); | |
var x0Data = new ReadOnlySpan<float>((void*)geometry.Ptr(0, 0, y), height); | |
var x1Data = new ReadOnlySpan<float>((void*)geometry.Ptr(0, 1, y), height); | |
var x2Data = new ReadOnlySpan<float>((void*)geometry.Ptr(0, 2, y), height); | |
var x3Data = new ReadOnlySpan<float>((void*)geometry.Ptr(0, 3, y), height); | |
var anglesData = new ReadOnlySpan<float>((void*)geometry.Ptr(0, 4, y), height); | |
for (int x = 0; x < width; ++x) | |
{ | |
var score = scoresData[x]; | |
if (score >= confThreshold) | |
{ | |
float offsetX = x * 4.0f; | |
float offsetY = y * 4.0f; | |
float angle = anglesData[x]; | |
float cosA = (float)Math.Cos(angle); | |
float sinA = (float)Math.Sin(angle); | |
float x0 = x0Data[x]; | |
float x1 = x1Data[x]; | |
float x2 = x2Data[x]; | |
float x3 = x3Data[x]; | |
float h = x0 + x2; | |
float w = x1 + x3; | |
Point2f offset = new Point2f(offsetX + (cosA * x1) + (sinA * x2), offsetY - (sinA * x1) + (cosA * x2)); | |
Point2f p1 = new Point2f((-sinA * h) + offset.X, (-cosA * h) + offset.Y); | |
Point2f p3 = new Point2f((-cosA * w) + offset.X, (sinA * w) + offset.Y); | |
RotatedRect r = new RotatedRect(new Point2f(0.5f * (p1.X + p3.X), 0.5f * (p1.Y + p3.Y)), new Size2f(w, h), (float)(-angle * 180.0f / Math.PI)); | |
boxes.Add(r); | |
confidences.Add(score); | |
} | |
} | |
} | |
} |
Where to I find this function? ReadOnlySpan
That would be System.ReadOnlySpan . Alternatively I think you can use At but it is not as fast, example:
var scoresData = Enumerable.Range(0, height).Select(row => scores.At<float>(0,0, y, row)).ToArray();
and I can't get this to work: scores.Dims()
Dims is available as a property scores.Dims
: https://github.com/shimat/opencvsharp/blob/master/src/OpenCvSharp/Modules/core/Mat/Mat.cs#L3445
I want to read several size of image, like, 320x320, 640x640, 960x960,, using the same net.
Because to load pb file, it requires much time.
But I change the InputSize and InputWidth per file, I got exception at
net.Forward(outputBlobs, outputBlobNames);
like
Inconsistent shape for ConcatLayer
Please help me
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi
Thank you for the conversion. I had some issues which I hope you can help me with.
Where to I find this function? ReadOnlySpan
and I can't get this to work: scores.Dims()
I am using opencvsharp4.
Best
Christian