ludwo/EastTextDetection.cs

## EastTextDetection.cs
        /// <summary>
        /// Read text from image.
        /// </summary>
        /// <see cref="https://github.com/opencv/opencv/blob/master/samples/dnn/text_detection.cpp"/>
        /// <param name="fileName">Name of the image file.</param>
        /// <param name="loaderFactory">The loader factory.</param>
        /// <returns>Scanned text.</returns>
        public string ReadAllText(string fileName, ITextDocumentLoaderFactory loaderFactory)
        {
            // Load network.
            using (Net net = CvDnn.ReadNet(Path.GetFullPath(EastModelFile)))
            using (Mat img = new Mat(fileName))

            // Prepare input image
            using (var blob = CvDnn.BlobFromImage(img, 1.0, new Size(InputWidth, InputHeight), new Scalar(123.68, 116.78, 103.94), true, false))
            {
                // Forward Pass
                // Now that we have prepared the input, we will pass it through the network. There are two outputs of the network.
                // One specifies the geometry of the Text-box and the other specifies the confidence score of the detected box.
                // These are given by the layers :
                //   feature_fusion/concat_3
                //   feature_fusion/Conv_7/Sigmoid
                var outputBlobNames = new string[] { "feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3" };
                var outputBlobs = outputBlobNames.Select(_ => new Mat()).ToArray();

                net.SetInput(blob);
                net.Forward(outputBlobs, outputBlobNames);
                Mat scores = outputBlobs[0];
                Mat geometry = outputBlobs[1];

                // Decode predicted bounding boxes (decode the positions of the text boxes along with their orientation)
                this.Decode(scores, geometry, ConfThreshold, out var boxes, out var confidences);

                // Apply non-maximum suppression procedure for filtering out the false positives and get the final predictions
                CvDnn.NMSBoxes(boxes, confidences, ConfThreshold, NmsThreshold, out var indices);

                // Render detections.
                Point2f ratio = new Point2f((float)img.Cols / InputWidth, (float)img.Rows / InputHeight);
                for (var i = 0; i < indices.Length; ++i)
                {
                    RotatedRect box = boxes[indices[i]];

                    Point2f[] vertices = box.Points();

                    for (int j = 0; j < 4; ++j)
                    {
                        vertices[j].X *= ratio.X;
                        vertices[j].Y *= ratio.Y;
                    }

                    for (int j = 0; j < 4; ++j)
                    {
                        Cv2.Line(img, (int)vertices[j].X, (int)vertices[j].Y, (int)vertices[(j + 1) % 4].X, (int)vertices[(j + 1) % 4].Y, new Scalar(0, 255, 0), 3);
                    }
                }

                // Optional - Save detections
                img.SaveImage(Path.Combine(Path.GetDirectoryName(fileName), $"{Path.GetFileNameWithoutExtension(fileName)}_east.jpg"));

                // return GetText(img, ...)
                return string.Empty;
            }
        }

        private unsafe void Decode(Mat scores, Mat geometry, float confThreshold, out IList<RotatedRect> boxes, out IList<float> confidences)
        {
            boxes = new List<RotatedRect>();
            confidences = new List<float>();

            if ((scores == null || scores.Dims() != 4 || scores.Size(0) != 1 || scores.Size(1) != 1) ||
                (geometry == null || geometry.Dims() != 4 || geometry.Size(0) != 1 || geometry.Size(1) != 5) ||
                (scores.Size(2) != geometry.Size(2) || scores.Size(3) != geometry.Size(3)))
            {
                return;
            }

            int height = scores.Size(2);
            int width = scores.Size(3);

            for (int y = 0; y < height; ++y)
            {
                var scoresData = new ReadOnlySpan<float>((void*)scores.Ptr(0, 0, y), height);
                var x0Data = new ReadOnlySpan<float>((void*)geometry.Ptr(0, 0, y), height);
                var x1Data = new ReadOnlySpan<float>((void*)geometry.Ptr(0, 1, y), height);
                var x2Data = new ReadOnlySpan<float>((void*)geometry.Ptr(0, 2, y), height);
                var x3Data = new ReadOnlySpan<float>((void*)geometry.Ptr(0, 3, y), height);
                var anglesData = new ReadOnlySpan<float>((void*)geometry.Ptr(0, 4, y), height);

                for (int x = 0; x < width; ++x)
                {
                    var score = scoresData[x];
                    if (score >= confThreshold)
                    {
                        float offsetX = x * 4.0f;
                        float offsetY = y * 4.0f;
                        float angle = anglesData[x];
                        float cosA = (float)Math.Cos(angle);
                        float sinA = (float)Math.Sin(angle);
                        float x0 = x0Data[x];
                        float x1 = x1Data[x];
                        float x2 = x2Data[x];
                        float x3 = x3Data[x];
                        float h = x0 + x2;
                        float w = x1 + x3;
                        Point2f offset = new Point2f(offsetX + (cosA * x1) + (sinA * x2), offsetY - (sinA * x1) + (cosA * x2));
                        Point2f p1 = new Point2f((-sinA * h) + offset.X, (-cosA * h) + offset.Y);
                        Point2f p3 = new Point2f((-cosA * w) + offset.X, (sinA * w) + offset.Y);
                        RotatedRect r = new RotatedRect(new Point2f(0.5f * (p1.X + p3.X), 0.5f * (p1.Y + p3.Y)), new Size2f(w, h), (float)(-angle * 180.0f / Math.PI));
                        boxes.Add(r);
                        confidences.Add(score);
                    }
                }
            }
        }
	/// <summary>
	/// Read text from image.
	/// </summary>
	/// <see cref="https://github.com/opencv/opencv/blob/master/samples/dnn/text_detection.cpp"/>
	/// <param name="fileName">Name of the image file.</param>
	/// <param name="loaderFactory">The loader factory.</param>
	/// <returns>Scanned text.</returns>
	public string ReadAllText(string fileName, ITextDocumentLoaderFactory loaderFactory)
	{
	// Load network.
	using (Net net = CvDnn.ReadNet(Path.GetFullPath(EastModelFile)))
	using (Mat img = new Mat(fileName))

	// Prepare input image
	using (var blob = CvDnn.BlobFromImage(img, 1.0, new Size(InputWidth, InputHeight), new Scalar(123.68, 116.78, 103.94), true, false))
	{
	// Forward Pass
	// Now that we have prepared the input, we will pass it through the network. There are two outputs of the network.
	// One specifies the geometry of the Text-box and the other specifies the confidence score of the detected box.
	// These are given by the layers :
	// feature_fusion/concat_3
	// feature_fusion/Conv_7/Sigmoid
	var outputBlobNames = new string[] { "feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3" };
	var outputBlobs = outputBlobNames.Select(_ => new Mat()).ToArray();

	net.SetInput(blob);
	net.Forward(outputBlobs, outputBlobNames);
	Mat scores = outputBlobs[0];
	Mat geometry = outputBlobs[1];

	// Decode predicted bounding boxes (decode the positions of the text boxes along with their orientation)
	this.Decode(scores, geometry, ConfThreshold, out var boxes, out var confidences);

	// Apply non-maximum suppression procedure for filtering out the false positives and get the final predictions
	CvDnn.NMSBoxes(boxes, confidences, ConfThreshold, NmsThreshold, out var indices);

	// Render detections.
	Point2f ratio = new Point2f((float)img.Cols / InputWidth, (float)img.Rows / InputHeight);
	for (var i = 0; i < indices.Length; ++i)
	{
	RotatedRect box = boxes[indices[i]];

	Point2f[] vertices = box.Points();

	for (int j = 0; j < 4; ++j)
	{
	vertices[j].X *= ratio.X;
	vertices[j].Y *= ratio.Y;
	}

	for (int j = 0; j < 4; ++j)
	{
	Cv2.Line(img, (int)vertices[j].X, (int)vertices[j].Y, (int)vertices[(j + 1) % 4].X, (int)vertices[(j + 1) % 4].Y, new Scalar(0, 255, 0), 3);
	}
	}

	// Optional - Save detections
	img.SaveImage(Path.Combine(Path.GetDirectoryName(fileName), $"{Path.GetFileNameWithoutExtension(fileName)}_east.jpg"));

	// return GetText(img, ...)
	return string.Empty;
	}
	}

	private unsafe void Decode(Mat scores, Mat geometry, float confThreshold, out IList<RotatedRect> boxes, out IList<float> confidences)
	{
	boxes = new List<RotatedRect>();
	confidences = new List<float>();

	if ((scores == null \|\| scores.Dims() != 4 \|\| scores.Size(0) != 1 \|\| scores.Size(1) != 1) \|\|
	(geometry == null \|\| geometry.Dims() != 4 \|\| geometry.Size(0) != 1 \|\| geometry.Size(1) != 5) \|\|
	(scores.Size(2) != geometry.Size(2) \|\| scores.Size(3) != geometry.Size(3)))
	{
	return;
	}

	int height = scores.Size(2);
	int width = scores.Size(3);

	for (int y = 0; y < height; ++y)
	{
	var scoresData = new ReadOnlySpan<float>((void*)scores.Ptr(0, 0, y), height);
	var x0Data = new ReadOnlySpan<float>((void*)geometry.Ptr(0, 0, y), height);
	var x1Data = new ReadOnlySpan<float>((void*)geometry.Ptr(0, 1, y), height);
	var x2Data = new ReadOnlySpan<float>((void*)geometry.Ptr(0, 2, y), height);
	var x3Data = new ReadOnlySpan<float>((void*)geometry.Ptr(0, 3, y), height);
	var anglesData = new ReadOnlySpan<float>((void*)geometry.Ptr(0, 4, y), height);

	for (int x = 0; x < width; ++x)
	{
	var score = scoresData[x];
	if (score >= confThreshold)
	{
	float offsetX = x * 4.0f;
	float offsetY = y * 4.0f;
	float angle = anglesData[x];
	float cosA = (float)Math.Cos(angle);
	float sinA = (float)Math.Sin(angle);
	float x0 = x0Data[x];
	float x1 = x1Data[x];
	float x2 = x2Data[x];
	float x3 = x3Data[x];
	float h = x0 + x2;
	float w = x1 + x3;
	Point2f offset = new Point2f(offsetX + (cosA * x1) + (sinA * x2), offsetY - (sinA * x1) + (cosA * x2));
	Point2f p1 = new Point2f((-sinA * h) + offset.X, (-cosA * h) + offset.Y);
	Point2f p3 = new Point2f((-cosA * w) + offset.X, (sinA * w) + offset.Y);
	RotatedRect r = new RotatedRect(new Point2f(0.5f * (p1.X + p3.X), 0.5f * (p1.Y + p3.Y)), new Size2f(w, h), (float)(-angle * 180.0f / Math.PI));
	boxes.Add(r);
	confidences.Add(score);
	}
	}
	}
	}