Created
April 15, 2019 07:30
-
-
Save GenchoBG/75b8296a32348c81ee388f8507f337c9 to your computer and use it in GitHub Desktop.
Process all of the XML annotation file in the desired format & generate the labels.txt file needed to train our model
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.IO; | |
using System.Text.RegularExpressions; | |
namespace XMLProcessing | |
{ | |
class Program | |
{ | |
public static void Main(string[] args) | |
{ | |
var numberPatterns = new[] { | |
@"<width>(\d+?)<\/width>", | |
@"<height>(\d+?)<\/height>", | |
@"<xmin>(\d+?)<\/xmin>", | |
@"<ymin>(\d+?)<\/ymin>", | |
@"<xmax>(\d+?)<\/xmax>", | |
@"<ymax>(\d+?)<\/ymax>" | |
}; | |
var labelPatterns = new[] { | |
@"<name>(label_\d+?_w)<\/name>", | |
@"<name>(label_\d+?_m)<\/name>", | |
@"<name>(label_\d+?)<\/name>" | |
}; | |
var itemPatterns = new[] | |
{ | |
@"<name>((\d+?)_w)<\/name>" | |
}; | |
var missingItemPattern = @"<object>(?:.|\n)*?<name>(\d+?)_m<\/name>(?:.|\n)*?<\/object>"; | |
var difficultPattern = @"<object>(?:.|\n)*?<name>(.*?)<\/name>(?:.|\n)*?<difficult>(1|0)<\/difficult>(?:.|\n)*?<\/object>"; | |
foreach (var path in Directory.GetFiles("./ground_truth_xml")) | |
{ | |
var text = File.ReadAllText(path); | |
foreach (var pattern in numberPatterns) | |
{ | |
var matches = Regex.Matches(text, pattern, RegexOptions.Multiline); | |
foreach (Match match in matches) | |
{ | |
var oldValue = match.Groups[1].Value; | |
var newValue = int.Parse(oldValue) / 10; | |
text = text.Replace(match.Value, match.Value.Replace(oldValue, newValue.ToString())); | |
} | |
} | |
foreach (var pattern in labelPatterns) | |
{ | |
var matches = Regex.Matches(text, pattern, RegexOptions.Multiline); | |
foreach (Match match in matches) | |
{ | |
var oldValue = match.Groups[1].Value; | |
var newValue = "label"; | |
text = text.Replace(match.Value, match.Value.Replace(oldValue, newValue)); | |
} | |
} | |
foreach (var pattern in itemPatterns) | |
{ | |
var matches = Regex.Matches(text, pattern, RegexOptions.Multiline); | |
foreach (Match match in matches) | |
{ | |
var oldValue = match.Groups[1].Value; | |
var newValue = match.Groups[2].Value; | |
text = text.Replace(match.Value, match.Value.Replace(oldValue, newValue)); | |
} | |
} | |
foreach (Match match in Regex.Matches(text, difficultPattern)) | |
{ | |
var isDifficult = int.Parse(match.Groups[2].Value); | |
if (isDifficult == 1) | |
{ | |
var name = match.Groups[1].Value; | |
if (!name.Contains("label")) | |
{ | |
text = text.Replace(match.Value, ""); | |
} | |
} | |
} | |
foreach (Match match in Regex.Matches(text, missingItemPattern)) | |
{ | |
text = text.Replace(match.Value, ""); | |
} | |
File.WriteAllText(path, text); | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment