Last active
December 14, 2015 03:39
-
-
Save yallie/5022202 to your computer and use it in GitHub Desktop.
Faster replacement classes for Dictionary<char, T>.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Diagnostics; | |
using System.Linq; | |
struct Program | |
{ | |
static void Main() | |
{ | |
// source data | |
var keywords = new[] { "<<", ">>", "<", ">", "/*", "*/", "for", "foreach", "return", "начало", "конец", "—" }; | |
var stringLookup = keywords.ToLookup(s => s[0]); | |
// create dictionaries | |
var plainDictionary = stringLookup.ToDictionary(g => g.Key, g => g.OrderByDescending(s => s.Length).ToArray()); | |
var charDictionary = new CharDictionary<string[]>(plainDictionary); | |
var compactCharDictionary = new CompactCharDictionary<string[]>(plainDictionary); | |
// run benchmarks | |
const int iterations = 10000000; | |
Benchmark(plainDictionary, iterations); | |
Benchmark(charDictionary, iterations); | |
Benchmark(compactCharDictionary, iterations); | |
} | |
static void Benchmark(Dictionary<char, string[]> chars, int iterations) | |
{ | |
string[] tmp; | |
var sw = Stopwatch.StartNew(); | |
for (var i = 0; i < iterations; i++) | |
{ | |
tmp = chars['<']; tmp = chars['>']; tmp = chars['/']; | |
tmp = chars['*']; tmp = chars['f']; tmp = chars['r']; | |
tmp = chars['н']; tmp = chars['к']; tmp = chars['—']; | |
} | |
sw.Stop(); | |
Console.WriteLine("Dictionary. Time elapsed: {0}, single iteration: {1} ms", sw.Elapsed, sw.Elapsed.TotalMilliseconds / iterations); | |
} | |
static void Benchmark(CharDictionary<string[]> chars, int iterations) | |
{ | |
string[] tmp; | |
var sw = Stopwatch.StartNew(); | |
for (var i = 0; i < iterations; i++) | |
{ | |
tmp = chars['<']; tmp = chars['>']; tmp = chars['/']; | |
tmp = chars['*']; tmp = chars['f']; tmp = chars['r']; | |
tmp = chars['н']; tmp = chars['к']; tmp = chars['—']; | |
} | |
sw.Stop(); | |
Console.WriteLine("CharDictionary. Time elapsed: {0}, single iteration: {1} ms", sw.Elapsed, sw.Elapsed.TotalMilliseconds / iterations); | |
} | |
static void Benchmark(CompactCharDictionary<string[]> chars, int iterations) | |
{ | |
string[] tmp; | |
var sw = Stopwatch.StartNew(); | |
for (var i = 0; i < iterations; i++) | |
{ | |
tmp = chars['<']; tmp = chars['>']; tmp = chars['/']; | |
tmp = chars['*']; tmp = chars['f']; tmp = chars['r']; | |
tmp = chars['н']; tmp = chars['к']; tmp = chars['—']; | |
} | |
sw.Stop(); | |
Console.WriteLine("CompactCharDictionary. Time elapsed: {0}, single iteration: {1} ms", sw.Elapsed, sw.Elapsed.TotalMilliseconds / iterations); | |
} | |
} | |
public class CompactCharDictionary<T> | |
{ | |
private struct CharLookupData | |
{ | |
public Tuple<char, T>[] Data; | |
} | |
public CompactCharDictionary() | |
{ | |
Table = new CharLookupData[256]; | |
} | |
public CompactCharDictionary(IEnumerable<KeyValuePair<char, T>> source) : this() | |
{ | |
foreach (var pair in source) | |
{ | |
this[pair.Key] = pair.Value; | |
} | |
} | |
// slim table to look up single-byte chars | |
private CharLookupData[] Table; | |
public T this[char c] | |
{ | |
get | |
{ | |
var index = c & 0xFF; | |
var lookupArray = Table[index]; | |
var lookupData = lookupArray.Data; | |
if (lookupData != null) | |
{ | |
for (var i = 0; i < lookupData.Length; i++) | |
{ | |
var tuple = lookupData[i]; | |
if (tuple.Item1 == c) | |
{ | |
return tuple.Item2; | |
} | |
if (tuple.Item1 > c) | |
{ | |
break; | |
} | |
} | |
} | |
throw new IndexOutOfRangeException(); | |
} | |
set | |
{ | |
var index = c & 0xFF; | |
var lookupArray = Table[index]; | |
var lookupData = lookupArray.Data ?? new Tuple<char, T>[0]; | |
var tuple = Tuple.Create(c, value); | |
for (var i = 0; i < lookupData.Length; i++) | |
{ | |
if (lookupData[i].Item1 == c) | |
{ | |
lookupData[i] = tuple; | |
return; | |
} | |
} | |
Table[index].Data = lookupData.Concat(new[] { tuple }).OrderBy(t => t.Item1).ToArray(); | |
} | |
} | |
} | |
public class CharDictionary<T> | |
{ | |
public CharDictionary() | |
{ | |
AsciiTable = new T[256]; | |
UnicodeTable = new T[256][]; | |
} | |
public CharDictionary(IEnumerable<KeyValuePair<char, T>> source) : this() | |
{ | |
foreach (var pair in source) | |
{ | |
this[pair.Key] = pair.Value; | |
} | |
} | |
// slim table to look up single-byte chars | |
private T[] AsciiTable; | |
// sparse jagged table to look up two-byte chars | |
private T[][] UnicodeTable; | |
public T this[char c] | |
{ | |
get | |
{ | |
// warning: looking up an unknown char may cause IndexOutOfRangeException | |
var low = c & 0xFF; | |
var high = c >> 8; | |
return high == 0 ? AsciiTable[low] : UnicodeTable[high - 1][low]; | |
} | |
set | |
{ | |
var low = c & 0xFF; | |
var high = c >> 8; | |
if (high == 0) | |
{ | |
AsciiTable[low] = value; | |
return; | |
} | |
var table = UnicodeTable[high - 1]; | |
if (table == null) | |
{ | |
table = new T[255]; | |
UnicodeTable[high - 1] = table; | |
} | |
table[low] = value; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment