Skip to content

Instantly share code, notes, and snippets.

@duncansmart
Created October 11, 2016 14:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save duncansmart/d2c77ac8eaeda5cd929492b196c44f89 to your computer and use it in GitHub Desktop.
Save duncansmart/d2c77ac8eaeda5cd929492b196c44f89 to your computer and use it in GitHub Desktop.
Streaming XML parser
using System;
using System.Linq;
using System.Collections.Generic;
using System.IO;
using System.Xml;
using System.Diagnostics;
class StreamingXmlParser
{
public class ElementInfo
{
public string Name { get; set; }
public List<string> ParentNames { get; set; }
public Dictionary<string, string> Attributes { get; set; }
public string Text { get; set; }
}
public static IEnumerable<ElementInfo> StreamElements(Stream file)
{
using (var rdr = XmlReader.Create(file))
{
string prevName = null;
int prevDepth = 0;
var parentNames = new Stack<string>();
ElementInfo elementInfo = null;
while (rdr.Read())
{
if (rdr.NodeType == XmlNodeType.Element)
{
if (rdr.Depth > prevDepth)
parentNames.Push(prevName);
else if (rdr.Depth < prevDepth && parentNames.Any())
parentNames.Pop();
var elementName = rdr.Name;
var elementDepth = rdr.Depth;
string elementValue = null;
var attributes = new Dictionary<string, string>();
while (rdr.MoveToNextAttribute())
attributes[rdr.Name] = rdr.Value;
if (elementInfo != null)
yield return elementInfo;
elementInfo = new ElementInfo
{
Name = elementName,
ParentNames = parentNames.ToList(),
Attributes = attributes,
Text = elementValue,
};
prevDepth = elementDepth;
prevName = elementName;
}
else if (rdr.NodeType == XmlNodeType.Text && elementInfo != null)
{
elementInfo.Text = rdr.Value;
yield return elementInfo;
elementInfo = null;
}
}
// last one
if (elementInfo != null)
yield return elementInfo;
}
}
static void streamElements_TEST()
{
//
using (var file = System.IO.File.OpenRead(@"C:\Temp\test.xml"))
{
var elements = StreamElements(file);
var stuff = from e in elements
//where e.ParentNames.Count == 2
select e;
foreach (var item in stuff)
{
Debug.WriteLine(string.Join(" / ", ((IEnumerable<string>)item.ParentNames).Reverse()));
Debug.Write(" " + item.Name);
if (item.Text != null)
Debug.Write(" = '" + item.Text.Trim() + "'");
Debug.WriteLine("");
foreach (var attr in item.Attributes)
{
Debug.WriteLine($" * {attr}");
}
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment