Skip to content

Instantly share code, notes, and snippets.

@rivantsov
Forked from yallie/chardict2.cs
Last active December 14, 2015 03:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save rivantsov/5022903 to your computer and use it in GitHub Desktop.
Save rivantsov/5022903 to your computer and use it in GitHub Desktop.
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
/* Results
Dictionary. Time elapsed: 00:00:12.0486110, single iteration: 0.00012048611 ms.
CharDictionary. Time elapsed: 00:00:00.5085145, single iteration: 5.085145E-06 ms
CompactCharDictionary. Time elapsed: 00:00:02.6166454, single iteration: 2.6166454E-05 ms
Env: 2.6GHz, Windows 7, .NET 4.5, Release mode, running executable directly
*/
public static class CharDictTests {
public static void RunTest() {
// source data
var keywords = new[] { "%", "&", "!", "*", "$"};//, "for", "foreach", "return", "начало", "конец", "—" };
var stringLookup = keywords.ToLookup(s => s[0]);
// create dictionaries
var plainDictionary = stringLookup.ToDictionary(g => g.Key, g => g.OrderByDescending(s => s.Length).ToArray());
var compactCharDictionary = new CompactCharDictionary<string[]>(plainDictionary);
var charDictionary = new CharDictionary<string[]>(plainDictionary);
// warm up
Benchmark(charDictionary, 1, false);
Benchmark(compactCharDictionary, 1, false);
// run benchmarks
const int iterations = 100 * 1000 * 1000;
Benchmark(plainDictionary, iterations);
Benchmark(charDictionary, iterations);
Benchmark(compactCharDictionary, iterations);
}
static void Benchmark(Dictionary<char, string[]> chars, int iterations) {
string[] tmp;
var sw = Stopwatch.StartNew();
for (var i = 0; i < iterations; i++) {
chars.TryGetValue('<', out tmp); chars.TryGetValue('>', out tmp); chars.TryGetValue('/', out tmp);
chars.TryGetValue('*', out tmp); chars.TryGetValue('f', out tmp); chars.TryGetValue('r', out tmp);
chars.TryGetValue('н', out tmp); chars.TryGetValue('к', out tmp); chars.TryGetValue('—', out tmp);
chars.TryGetValue('X', out tmp);
}
sw.Stop();
Console.WriteLine("Dictionary. Time elapsed: {0}, single iteration: {1} ms.", sw.Elapsed, sw.Elapsed.TotalMilliseconds / iterations);
}
static void Benchmark(CharDictionary<string[]> chars, int iterations, bool report = true) {
string[] tmp;
var sw = Stopwatch.StartNew();
for (var i = 0; i < iterations; i++) {
tmp = chars['<']; tmp = chars['>']; tmp = chars['/'];
tmp = chars['*']; tmp = chars['f']; tmp = chars['r'];
tmp = chars['н']; tmp = chars['к']; tmp = chars['_'];
tmp = chars['X'];
}
sw.Stop();
if (report)
Console.WriteLine("CharDictionary. Time elapsed: {0}, single iteration: {1} ms", sw.Elapsed, sw.Elapsed.TotalMilliseconds / iterations);
}
static void Benchmark(CompactCharDictionary<string[]> chars, int iterations, bool report = true) {
string[] tmp;
var sw = Stopwatch.StartNew();
for (var i = 0; i < iterations; i++) {
tmp = chars['<']; tmp = chars['>']; tmp = chars['/'];
tmp = chars['*']; tmp = chars['f']; tmp = chars['r'];
tmp = chars['н']; tmp = chars['к']; tmp = chars['_'];
tmp = chars['X'];
}
sw.Stop();
if (report)
Console.WriteLine("CompactCharDictionary. Time elapsed: {0}, single iteration: {1} ms", sw.Elapsed, sw.Elapsed.TotalMilliseconds / iterations);
}
}
public class Tuplex<T1, T2> {
public T1 Item1;
public T2 Item2;
public Tuplex(T1 item1, T2 item2) {
Item1 = item1;
Item2 = item2;
}
}
public class CompactCharDictionary<T> {
private struct CharLookupData {
public Tuplex<char, T>[] Data;
}
public CompactCharDictionary() {
Table = new CharLookupData[256];
for (int i = 0; i < Table.Length; i++)
Table[i] = new CharLookupData();
}
public CompactCharDictionary(IEnumerable<KeyValuePair<char, T>> source)
: this() {
foreach (var pair in source) {
this[pair.Key] = pair.Value;
}
}
// slim table to look up single-byte chars
private CharLookupData[] Table;
public T this[char c] {
get {
var index = c & 0xFF;
var lookupArray = Table[index];
var lookupData = lookupArray.Data;
if (lookupData != null) {
/*if (lookupData.Length == 1) {
var d = lookupData[0];
if (d.Item1 == c)
return d.Item2;
}*/
for (var i = 0; i < lookupData.Length; i++) {
var tuple = lookupData[i];
if (tuple.Item1 == c) {
return tuple.Item2;
}
if (tuple.Item1 > c) {
break;
}
}
}
return default(T);
}
set {
var index = c & 0xFF;
var lookupArray = Table[index];
var lookupData = lookupArray.Data ?? new Tuplex<char, T>[0];
var tuple = new Tuplex<char, T>(c, value);
for (var i = 0; i < lookupData.Length; i++) {
if (lookupData[i].Item1 == c) {
lookupData[i] = tuple;
return;
}
}
Table[index].Data = lookupData.Concat(new[] { tuple }).OrderBy(t => t.Item1).ToArray();
}
}
}
public class CharDictionary<T> {
public CharDictionary() {
AsciiTable = new T[256];
UnicodeTable = new T[256][];
}
public CharDictionary(IEnumerable<KeyValuePair<char, T>> source)
: this() {
foreach (var pair in source) {
this[pair.Key] = pair.Value;
}
}
// slim table to look up single-byte chars
private T[] AsciiTable;
// sparse jagged table to look up two-byte chars
private T[][] UnicodeTable;
public T this[char c] {
get {
var low = c & 0xFF;
var high = c >> 8;
if (high == 0)
return AsciiTable[low];
var tmp = UnicodeTable[high - 1];
if (tmp == null)
return default(T);
return tmp[low];
//return high == 0 ? AsciiTable[low] : UnicodeTable[high - 1][low];
}
set {
var low = c & 0xFF;
var high = c >> 8;
if (high == 0) {
AsciiTable[low] = value;
return;
}
var table = UnicodeTable[high - 1];
if (table == null) {
table = new T[255];
UnicodeTable[high - 1] = table;
}
table[low] = value;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment