Skip to content

Instantly share code, notes, and snippets.

@mikeminutillo
Last active May 11, 2021 05:21
Show Gist options
  • Save mikeminutillo/b767695599789ccc49551a82e0e99eaf to your computer and use it in GitHub Desktop.
Save mikeminutillo/b767695599789ccc49551a82e0e99eaf to your computer and use it in GitHub Desktop.
Using a very basic algorithm to extract ids from serialized messages. Running this on my my machine reports 6.662 ms per message (117,870 bytes) for Json messages and 11.328 ms per message (165,034 bytes) for xml
<Query Kind="Program">
<NuGetReference>Newtonsoft.Json</NuGetReference>
<NuGetReference>System.Text.Json</NuGetReference>
<Namespace>Newtonsoft.Json</Namespace>
<Namespace>System.Text.Json</Namespace>
<Namespace>System.Xml.Serialization</Namespace>
</Query>
void Main()
{
var random = new Random();
var message = new MyMessage
{
CustomerId = Guid.NewGuid(),
OrderId = Guid.NewGuid(),
LineItems = (from r in Enumerable.Range(1, 1000)
select new OrderLineItem
{
ProductId = Guid.NewGuid(),
Qty = random.Next(1, 100),
UnitCost = random.Next(50, 10000) / 100m
}
).ToArray()
};
var propertyFinder = new IndexablePropertyFinder();
propertyFinder.AddExtractor(new JsonIndexablePropertyExtractor());
propertyFinder.AddExtractor(new XmlIndexablePropertyExtractor());
propertyFinder.AddPropertyIndexRule("id$");
TestJson(message, propertyFinder, 1000);
TestXml(message, propertyFinder, 1000);
}
void TestJson(MyMessage message, IndexablePropertyFinder propertyFinder, int iterations)
{
var serialized = JsonConvert.SerializeObject(message, Newtonsoft.Json.Formatting.Indented);
var jsonBytes = Encoding.UTF8.GetBytes(serialized);
var stopwatch = Stopwatch.StartNew();
for (var i = 0; i < iterations; i++)
{
propertyFinder.FindProperties(jsonBytes, "application/json");
}
stopwatch.Stop();
new
{
TotalBytes = jsonBytes.Length,
TotalMilliseconds = stopwatch.ElapsedMilliseconds,
MillisecondsPerMessage = stopwatch.ElapsedMilliseconds / (decimal)iterations
}.Dump("Json");
}
void TestXml(MyMessage message, IndexablePropertyFinder propertyFinder, int iterations)
{
var serializer = new XmlSerializer(typeof(MyMessage));
var serialized = message.Serialize();
var xmlBytes = Encoding.UTF8.GetBytes(serialized);
var stopwatch = Stopwatch.StartNew();
for (var i = 0; i < iterations; i++)
{
propertyFinder.FindProperties(xmlBytes, "text/xml");
}
stopwatch.Stop();
new
{
TotalBytes = xmlBytes.Length,
TotalMilliseconds = stopwatch.ElapsedMilliseconds,
MillisecondsPerMessage = stopwatch.ElapsedMilliseconds / (decimal)iterations
}.Dump("Xml");
}
public static class Extensions
{
public static string Serialize<T>(this T obj)
{
var serializer = new XmlSerializer(typeof(T));
var settings = new XmlWriterSettings
{
Indent = true,
NewLineOnAttributes = true,
Encoding = Encoding.UTF8
};
using(var stringWriter = new Utf8StringWriter())
using(var textWriter = XmlWriter.Create(stringWriter, settings))
{
serializer.Serialize(textWriter, obj);
stringWriter.Flush();
return stringWriter.ToString();
}
}
class Utf8StringWriter : StringWriter
{
public override Encoding Encoding => Encoding.UTF8;
}
}
#region Data Model
public class MyMessage
{
public Guid CustomerId { get; set; }
public Guid OrderId { get; set; }
public OrderLineItem[] LineItems { get; set; }
}
public class OrderLineItem
{
public Guid ProductId { get; set; }
public int Qty { get; set; }
public decimal UnitCost { get; set; }
}
#endregion
class IndexableProperty
{
public string PropertyName { get; }
public string PropertyValue { get; }
public IndexableProperty(string propertyName, string propertyValue)
{
PropertyName = propertyName;
PropertyValue = propertyValue;
}
}
interface IIndexablePropertyExtractor
{
string ContentType { get; }
IEnumerable<IndexableProperty> ExtractProperties(byte[] bytes, Func<string, bool> shouldIndex);
}
class IndexablePropertyFinder
{
private IDictionary<string, IIndexablePropertyExtractor> extractors = new Dictionary<string, IIndexablePropertyExtractor>(StringComparer.InvariantCultureIgnoreCase);
private IList<Func<string, bool>> rules = new List<Func<string, bool>>();
public void AddExtractor(IIndexablePropertyExtractor extractor)
{
extractors.Add(extractor.ContentType, extractor);
}
public void AddPropertyIndexRule(Func<string, bool> rule)
{
rules.Add(rule);
}
public void AddPropertyIndexRule(string rule)
{
var regex = new Regex(rule, RegexOptions.IgnoreCase | RegexOptions.Compiled);
rules.Add(regex.IsMatch);
}
public IEnumerable<IndexableProperty> FindProperties(byte[] bytes, string contentType)
{
return extractors.TryGetValue(contentType, out var finder)
? finder.ExtractProperties(bytes, property => rules.Any(r => r.Invoke(property)))
: Enumerable.Empty<IndexableProperty>();
}
}
class JsonIndexablePropertyExtractor : IIndexablePropertyExtractor
{
public string ContentType { get; } = "application/json";
public IEnumerable<IndexableProperty> ExtractProperties(byte[] bytes, Func<string, bool> shouldIndex)
{
var found = new List<IndexableProperty>();
var reader = new Utf8JsonReader(bytes);
while (reader.Read())
{
if (reader.TokenType == JsonTokenType.PropertyName)
{
var property = reader.GetString();
if (shouldIndex(property))
{
if (reader.Read())
{
switch (reader.TokenType)
{
case JsonTokenType.Number:
found.Add(new IndexableProperty(property, reader.GetDecimal().ToString()));
break;
case JsonTokenType.String:
found.Add(new IndexableProperty(property, reader.GetString()));
break;
}
}
}
}
}
return found;
}
}
class XmlIndexablePropertyExtractor : IIndexablePropertyExtractor
{
public string ContentType { get; } = "text/xml";
public IEnumerable<IndexableProperty> ExtractProperties(byte[] bytes, Func<string, bool> shouldIndex)
{
var found = new List<IndexableProperty>();
using (var stream = new MemoryStream(bytes))
using (var reader = XmlReader.Create(stream))
{
while (reader.Read())
{
if (reader.NodeType == XmlNodeType.Element
&& shouldIndex(reader.Name))
{
var propertyName = reader.Name;
using (var innerReader = reader.ReadSubtree())
{
while (innerReader.Read())
{
if (innerReader.NodeType == XmlNodeType.Text)
{
found.Add(new IndexableProperty(
propertyName,
innerReader.Value
));
}
}
}
}
}
}
return found;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment