Skip to content

Instantly share code, notes, and snippets.

@GenchoBG
Created April 15, 2019 07:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save GenchoBG/75b8296a32348c81ee388f8507f337c9 to your computer and use it in GitHub Desktop.
Save GenchoBG/75b8296a32348c81ee388f8507f337c9 to your computer and use it in GitHub Desktop.
Process all of the XML annotation file in the desired format & generate the labels.txt file needed to train our model
using System;
using System.IO;
using System.Text.RegularExpressions;
namespace XMLProcessing
{
class Program
{
public static void Main(string[] args)
{
var numberPatterns = new[] {
@"<width>(\d+?)<\/width>",
@"<height>(\d+?)<\/height>",
@"<xmin>(\d+?)<\/xmin>",
@"<ymin>(\d+?)<\/ymin>",
@"<xmax>(\d+?)<\/xmax>",
@"<ymax>(\d+?)<\/ymax>"
};
var labelPatterns = new[] {
@"<name>(label_\d+?_w)<\/name>",
@"<name>(label_\d+?_m)<\/name>",
@"<name>(label_\d+?)<\/name>"
};
var itemPatterns = new[]
{
@"<name>((\d+?)_w)<\/name>"
};
var missingItemPattern = @"<object>(?:.|\n)*?<name>(\d+?)_m<\/name>(?:.|\n)*?<\/object>";
var difficultPattern = @"<object>(?:.|\n)*?<name>(.*?)<\/name>(?:.|\n)*?<difficult>(1|0)<\/difficult>(?:.|\n)*?<\/object>";
foreach (var path in Directory.GetFiles("./ground_truth_xml"))
{
var text = File.ReadAllText(path);
foreach (var pattern in numberPatterns)
{
var matches = Regex.Matches(text, pattern, RegexOptions.Multiline);
foreach (Match match in matches)
{
var oldValue = match.Groups[1].Value;
var newValue = int.Parse(oldValue) / 10;
text = text.Replace(match.Value, match.Value.Replace(oldValue, newValue.ToString()));
}
}
foreach (var pattern in labelPatterns)
{
var matches = Regex.Matches(text, pattern, RegexOptions.Multiline);
foreach (Match match in matches)
{
var oldValue = match.Groups[1].Value;
var newValue = "label";
text = text.Replace(match.Value, match.Value.Replace(oldValue, newValue));
}
}
foreach (var pattern in itemPatterns)
{
var matches = Regex.Matches(text, pattern, RegexOptions.Multiline);
foreach (Match match in matches)
{
var oldValue = match.Groups[1].Value;
var newValue = match.Groups[2].Value;
text = text.Replace(match.Value, match.Value.Replace(oldValue, newValue));
}
}
foreach (Match match in Regex.Matches(text, difficultPattern))
{
var isDifficult = int.Parse(match.Groups[2].Value);
if (isDifficult == 1)
{
var name = match.Groups[1].Value;
if (!name.Contains("label"))
{
text = text.Replace(match.Value, "");
}
}
}
foreach (Match match in Regex.Matches(text, missingItemPattern))
{
text = text.Replace(match.Value, "");
}
File.WriteAllText(path, text);
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment