Created
June 24, 2021 08:34
-
-
Save pczajkowski/f69f0329ae3af2c30cf2d67c782513a6 to your computer and use it in GitHub Desktop.
Remove segments where source is duplicated from TMX file while streaming it
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Linq; | |
using System.Xml; | |
using System.Xml.Linq; | |
namespace DuplicatesInTMX | |
{ | |
class Program | |
{ | |
static XElement ReadHeader(XmlReader reader) | |
{ | |
if (reader == null) | |
throw new ArgumentNullException("reader"); | |
reader.MoveToContent(); | |
while (reader.Read()) | |
{ | |
if (reader.NodeType == XmlNodeType.Element | |
&& reader.Name == "header") | |
{ | |
var header = XElement.ReadFrom(reader) as XElement; | |
return header; | |
} | |
} | |
return null; | |
} | |
static IEnumerable<XElement> NoDuplicatedSource(XmlReader reader) | |
{ | |
if (reader == null) | |
throw new ArgumentNullException("reader"); | |
var duplicates = new Dictionary<string, bool>(); | |
while (reader.Read()) | |
{ | |
if (reader.NodeType == XmlNodeType.Element | |
&& reader.Name == "tu") | |
{ | |
var tu = XElement.ReadFrom(reader) as XElement; | |
var tuv = tu.Element("tuv"); | |
if (tuv == null) | |
continue; | |
var source = tuv.Element("seg"); | |
if (source == null) | |
continue; | |
var sourceText = source.ToString(); | |
if (duplicates.ContainsKey(sourceText)) | |
continue; | |
duplicates.Add(sourceText, true); | |
yield return tu; | |
} | |
} | |
} | |
static void Main(string[] args) | |
{ | |
if (!args.Any()) | |
{ | |
Console.WriteLine("You need to specify a path to TMX file!"); | |
return; | |
} | |
XmlReaderSettings settings = new XmlReaderSettings() | |
{ | |
DtdProcessing = DtdProcessing.Ignore | |
}; | |
using (XmlReader reader = XmlReader.Create(args[0], settings)) | |
{ | |
var version = new XAttribute("version", "1.4"); | |
var root = new XStreamingElement("tmx"); | |
root.Add(version); | |
var header = ReadHeader(reader); | |
if (header == null) | |
throw new NullReferenceException("There's no header in the file!"); | |
root.Add(header); | |
var body = new XStreamingElement("body", | |
from el in NoDuplicatedSource(reader) | |
select el); | |
root.Add(body); | |
root.Save("output.tmx"); | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment