Skip to content

Instantly share code, notes, and snippets.

@ludwo
Last active October 11, 2021 12:56
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ludwo/c091ed6261d26654c8b71949d89f8142 to your computer and use it in GitHub Desktop.
Save ludwo/c091ed6261d26654c8b71949d89f8142 to your computer and use it in GitHub Desktop.
/// <summary>
/// Read text from image.
/// </summary>
/// <see cref="https://github.com/opencv/opencv/blob/master/samples/dnn/text_detection.cpp"/>
/// <param name="fileName">Name of the image file.</param>
/// <param name="loaderFactory">The loader factory.</param>
/// <returns>Scanned text.</returns>
public string ReadAllText(string fileName, ITextDocumentLoaderFactory loaderFactory)
{
// Load network.
using (Net net = CvDnn.ReadNet(Path.GetFullPath(EastModelFile)))
using (Mat img = new Mat(fileName))
// Prepare input image
using (var blob = CvDnn.BlobFromImage(img, 1.0, new Size(InputWidth, InputHeight), new Scalar(123.68, 116.78, 103.94), true, false))
{
// Forward Pass
// Now that we have prepared the input, we will pass it through the network. There are two outputs of the network.
// One specifies the geometry of the Text-box and the other specifies the confidence score of the detected box.
// These are given by the layers :
// feature_fusion/concat_3
// feature_fusion/Conv_7/Sigmoid
var outputBlobNames = new string[] { "feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3" };
var outputBlobs = outputBlobNames.Select(_ => new Mat()).ToArray();
net.SetInput(blob);
net.Forward(outputBlobs, outputBlobNames);
Mat scores = outputBlobs[0];
Mat geometry = outputBlobs[1];
// Decode predicted bounding boxes (decode the positions of the text boxes along with their orientation)
this.Decode(scores, geometry, ConfThreshold, out var boxes, out var confidences);
// Apply non-maximum suppression procedure for filtering out the false positives and get the final predictions
CvDnn.NMSBoxes(boxes, confidences, ConfThreshold, NmsThreshold, out var indices);
// Render detections.
Point2f ratio = new Point2f((float)img.Cols / InputWidth, (float)img.Rows / InputHeight);
for (var i = 0; i < indices.Length; ++i)
{
RotatedRect box = boxes[indices[i]];
Point2f[] vertices = box.Points();
for (int j = 0; j < 4; ++j)
{
vertices[j].X *= ratio.X;
vertices[j].Y *= ratio.Y;
}
for (int j = 0; j < 4; ++j)
{
Cv2.Line(img, (int)vertices[j].X, (int)vertices[j].Y, (int)vertices[(j + 1) % 4].X, (int)vertices[(j + 1) % 4].Y, new Scalar(0, 255, 0), 3);
}
}
// Optional - Save detections
img.SaveImage(Path.Combine(Path.GetDirectoryName(fileName), $"{Path.GetFileNameWithoutExtension(fileName)}_east.jpg"));
// return GetText(img, ...)
return string.Empty;
}
}
private unsafe void Decode(Mat scores, Mat geometry, float confThreshold, out IList<RotatedRect> boxes, out IList<float> confidences)
{
boxes = new List<RotatedRect>();
confidences = new List<float>();
if ((scores == null || scores.Dims() != 4 || scores.Size(0) != 1 || scores.Size(1) != 1) ||
(geometry == null || geometry.Dims() != 4 || geometry.Size(0) != 1 || geometry.Size(1) != 5) ||
(scores.Size(2) != geometry.Size(2) || scores.Size(3) != geometry.Size(3)))
{
return;
}
int height = scores.Size(2);
int width = scores.Size(3);
for (int y = 0; y < height; ++y)
{
var scoresData = new ReadOnlySpan<float>((void*)scores.Ptr(0, 0, y), height);
var x0Data = new ReadOnlySpan<float>((void*)geometry.Ptr(0, 0, y), height);
var x1Data = new ReadOnlySpan<float>((void*)geometry.Ptr(0, 1, y), height);
var x2Data = new ReadOnlySpan<float>((void*)geometry.Ptr(0, 2, y), height);
var x3Data = new ReadOnlySpan<float>((void*)geometry.Ptr(0, 3, y), height);
var anglesData = new ReadOnlySpan<float>((void*)geometry.Ptr(0, 4, y), height);
for (int x = 0; x < width; ++x)
{
var score = scoresData[x];
if (score >= confThreshold)
{
float offsetX = x * 4.0f;
float offsetY = y * 4.0f;
float angle = anglesData[x];
float cosA = (float)Math.Cos(angle);
float sinA = (float)Math.Sin(angle);
float x0 = x0Data[x];
float x1 = x1Data[x];
float x2 = x2Data[x];
float x3 = x3Data[x];
float h = x0 + x2;
float w = x1 + x3;
Point2f offset = new Point2f(offsetX + (cosA * x1) + (sinA * x2), offsetY - (sinA * x1) + (cosA * x2));
Point2f p1 = new Point2f((-sinA * h) + offset.X, (-cosA * h) + offset.Y);
Point2f p3 = new Point2f((-cosA * w) + offset.X, (sinA * w) + offset.Y);
RotatedRect r = new RotatedRect(new Point2f(0.5f * (p1.X + p3.X), 0.5f * (p1.Y + p3.Y)), new Size2f(w, h), (float)(-angle * 180.0f / Math.PI));
boxes.Add(r);
confidences.Add(score);
}
}
}
}
@BergChristian
Copy link

Hi

Thank you for the conversion. I had some issues which I hope you can help me with.

Where to I find this function? ReadOnlySpan

and I can't get this to work: scores.Dims()

I am using opencvsharp4.

Best
Christian

@marquesmps
Copy link

marquesmps commented Feb 12, 2020

Where to I find this function? ReadOnlySpan

That would be System.ReadOnlySpan . Alternatively I think you can use At but it is not as fast, example:
var scoresData = Enumerable.Range(0, height).Select(row => scores.At<float>(0,0, y, row)).ToArray();

and I can't get this to work: scores.Dims()

Dims is available as a property scores.Dims: https://github.com/shimat/opencvsharp/blob/master/src/OpenCvSharp/Modules/core/Mat/Mat.cs#L3445

@kurikabocya
Copy link

kurikabocya commented Dec 9, 2020

I want to read several size of image, like, 320x320, 640x640, 960x960,, using the same net.
Because to load pb file, it requires much time.

But I change the InputSize and InputWidth per file, I got exception at
net.Forward(outputBlobs, outputBlobNames);
like
Inconsistent shape for ConcatLayer
Please help me

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment