Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Remove segments where source is duplicated from TMX file while streaming it
using System;
using System.Collections.Generic;
using System.Linq;
using System.Xml;
using System.Xml.Linq;
namespace DuplicatesInTMX
{
class Program
{
static XElement ReadHeader(XmlReader reader)
{
if (reader == null)
throw new ArgumentNullException("reader");
reader.MoveToContent();
while (reader.Read())
{
if (reader.NodeType == XmlNodeType.Element
&& reader.Name == "header")
{
var header = XElement.ReadFrom(reader) as XElement;
return header;
}
}
return null;
}
static IEnumerable<XElement> NoDuplicatedSource(XmlReader reader)
{
if (reader == null)
throw new ArgumentNullException("reader");
var duplicates = new Dictionary<string, bool>();
while (reader.Read())
{
if (reader.NodeType == XmlNodeType.Element
&& reader.Name == "tu")
{
var tu = XElement.ReadFrom(reader) as XElement;
var tuv = tu.Element("tuv");
if (tuv == null)
continue;
var source = tuv.Element("seg");
if (source == null)
continue;
var sourceText = source.ToString();
if (duplicates.ContainsKey(sourceText))
continue;
duplicates.Add(sourceText, true);
yield return tu;
}
}
}
static void Main(string[] args)
{
if (!args.Any())
{
Console.WriteLine("You need to specify a path to TMX file!");
return;
}
XmlReaderSettings settings = new XmlReaderSettings()
{
DtdProcessing = DtdProcessing.Ignore
};
using (XmlReader reader = XmlReader.Create(args[0], settings))
{
var version = new XAttribute("version", "1.4");
var root = new XStreamingElement("tmx");
root.Add(version);
var header = ReadHeader(reader);
if (header == null)
throw new NullReferenceException("There's no header in the file!");
root.Add(header);
var body = new XStreamingElement("body",
from el in NoDuplicatedSource(reader)
select el);
root.Add(body);
root.Save("output.tmx");
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment