Instantly share code, notes, and snippets.

Embed
What would you like to do?
C# BigDictionary Class Over 2GB
public class BigDictionary<TKey, TValue> : IEnumerable<KeyValuePair<TKey, TValue>>
{
private const ulong MinimumSize = 524288;
private readonly IEqualityComparer<TKey> _comparer;
private BigArray<Entry> _entries;
private BigArray<ulong> _hashBuckets;
private BigHashSet<TKey> _keys;
private ulong _nextFreePosition;
private BigHashSet<TValue> _values;
private bool ResetKeysValues;
public BigDictionary() : this(EqualityComparer<TKey>.Default, MinimumSize)
{
}
public BigDictionary(IEqualityComparer<TKey> comparer) : this(comparer, MinimumSize)
{
}
public BigDictionary(IEqualityComparer<TKey> comparer, ulong size)
{
if (comparer == null)
comparer = EqualityComparer<TKey>.Default;
_comparer = comparer;
Initialize(size);
ResetKeysValues = false;
}
public BigDictionary(ulong size) : this(EqualityComparer<TKey>.Default, size)
{
}
public BigDictionary(IEnumerable<KeyValuePair<TKey, TValue>> collection) : this(collection, EqualityComparer<TKey>.Default)
{
}
public BigDictionary(IEnumerable<KeyValuePair<TKey, TValue>> collection, IEqualityComparer<TKey> comparer)
{
if (comparer == null)
comparer = EqualityComparer<TKey>.Default;
_comparer = comparer;
if (collection == null) throw new Exception("Error: Collection in constructor is null.");
if (collection is ICollection<KeyValuePair<TKey, TValue>> coll)
foreach (var item in coll)
InsertInt(item.Key, item.Value, (ulong)coll.Count);
ResetKeysValues = false;
}
public ulong GrowthDelta { get; set; } = 2;
public bool IsDirty => Count != _hashBuckets.Length;
public int LastFindItemComparisons { get; private set; }
public ulong Count { get; private set; }
public int Resizes { get; private set; }
public BigHashSet<TKey> Keys
{
get
{
if (_keys == null || _keys.Count != Count || ResetKeysValues)
{
_keys = new BigHashSet<TKey>(Count);
foreach (var v in _entries)
_keys.Add(v.key);
}
return _keys;
}
}
public BigHashSet<TValue> Values
{
get
{
if (_values == null || _values.Count != Count || ResetKeysValues)
{
_values = new BigHashSet<TValue>(Count);
foreach (var v in _entries)
_values.Add(v.value);
}
return _values;
}
}
public TValue this[TKey key]
{
get
{
var pos = FindEntry(key);
return pos == ulong.MaxValue ? default(TValue) : _entries[pos].value;
}
set
{
Insert(key, value, true);
ResetKeysValues = true;
}
}
IEnumerator<KeyValuePair<TKey, TValue>> IEnumerable<KeyValuePair<TKey, TValue>>.GetEnumerator()
{
return new Enumerator(this, Enumerator.KeyValuePair);
}
IEnumerator IEnumerable.GetEnumerator()
{
return new Enumerator(this, Enumerator.KeyValuePair);
}
public Enumerator GetEnumerator()
{
return new Enumerator(this, 2);
}
public bool Add(TKey key, TValue value)
{
return Insert(key, value, false);
}
public bool AddRange(IEnumerable<KeyValuePair<TKey, TValue>> collection)
{
if (collection == null)
return false;
var coll = collection as ICollection<KeyValuePair<TKey, TValue>>;
if (Count == 0)
{
if (coll != null)
foreach (var item in coll)
InsertInt(item.Key, item.Value, (ulong)coll.Count);
return true;
}
if (coll != null)
{
Initialize((ulong)coll.Count);
foreach (var item in coll)
Insert(item.Key, item.Value, false);
}
return true;
}
public bool ContainsKey(TKey key)
{
return FindEntry(key) != ulong.MaxValue;
}
public bool ContainsValue(TValue value)
{
return Values.Contains(value);
}
public bool TryGetValue(TKey key, out TValue value)
{
var pos = FindEntry(key);
if (pos == ulong.MaxValue)
{
value = default(TValue);
return false;
}
value = _entries[pos].value;
return true;
}
public void Clear()
{
if (Count == 0) return;
Initialize(Count);
ResetKeysValues = true;
}
public void Release()
{
if (Count == 0) return;
Clear();
Compact();
}
public void Compact()
{
if (Count == 0) return;
var tset = new BigDictionary<TKey, TValue>();
for (ulong i = 0; i < Count; i++)
if (_entries[i].hashCode != ulong.MaxValue)
tset.Add(_entries[i].key, _entries[i].value);
Count = tset.Count;
_entries = tset._entries;
_hashBuckets = tset._hashBuckets;
}
private void InsertInt(TKey key, TValue value, ulong size)
{
if (key == null)
throw new Exception("Error: key value is null in function 'Insert'.");
if (value == null)
throw new Exception("Error: value value is null in function 'Insert'.");
if (size > _entries.Length)
Initialize(size);
var hashCode = (uint)InternalGetHashCode(key);
var hashPos = hashCode % (uint)_hashBuckets.Length;
var entryLocation = _hashBuckets[hashPos];
_hashBuckets[hashPos] = Count;
var ent = new Entry();
ent.next = entryLocation;
ent.key = key;
ent.value = value;
ent.hashCode = hashCode;
_entries[Count] = ent;
Count++;
}
private bool Insert(TKey key, TValue value, bool update)
{
if (key == null)
throw new Exception("Error: key value is null in function 'Insert'.");
if (value == null)
throw new Exception("Error: value value is null in function 'Insert'.");
if (Count >= _entries.Length)
Resize();
var hashCode = (uint)InternalGetHashCode(key);
var hashPos = hashCode % (uint)_hashBuckets.Length;
var entryLocation = _hashBuckets[hashPos];
if (update)
for (var position = entryLocation; position != ulong.MaxValue; position = _entries[position].next)
{
if (_entries[position].hashCode != hashCode || !_comparer.Equals(_entries[position].key, key))
continue;
var newe = new Entry();
Entry.SetNewValue(_entries[position], ref newe, value);
_entries[position] = newe;
return entryLocation != ulong.MaxValue;
}
_hashBuckets[hashPos] = Count;
var ent = new Entry();
ent.next = entryLocation;
ent.key = key;
ent.value = value;
ent.hashCode = hashCode;
_entries[Count] = ent;
Count++;
return entryLocation != ulong.MaxValue;
}
private int InternalGetHashCode(TKey key)
{
if (key == null)
return 0;
return _comparer.GetHashCode(key) & int.MaxValue;
}
private void Resize()
{
Resizes++;
var newsize = _hashBuckets.Length * GrowthDelta;
var newhashes = new BigArray<ulong>(newsize);
var newentries = new BigArray<Entry>(newsize);
for (ulong i = 0; i < _entries.Length; i++)
newentries[i] = _entries[i];
for (ulong i = 0; i < _hashBuckets.Length; i++)
newhashes[i] = _hashBuckets[i];
for (var i = _hashBuckets.Length; i < newhashes.Length; i++)
newhashes[i] = ulong.MaxValue;
_hashBuckets = newhashes;
_entries = newentries;
}
public bool Remove(TKey key)
{
if (key == null)
throw new Exception("Error: key value is null in function Remove.");
if (_hashBuckets != null)
{
var hashcode = (uint)InternalGetHashCode(key);
var hashpos = hashcode % (uint)_hashBuckets.Length;
var entryLocation = ulong.MaxValue;
for (var position = _hashBuckets[hashpos]; position != ulong.MaxValue; position = _entries[position].next)
{
if (_entries[position].hashCode == hashcode && _comparer.Equals(_entries[position].key, key))
{
if (entryLocation == ulong.MaxValue)
{
_hashBuckets[hashpos] = _entries[position].next;
}
else
{
var newe = new Entry();
Entry.SetNewNext(_entries[entryLocation], ref newe, _entries[position].next);
_entries[entryLocation] = newe;
}
var ent = new Entry();
ent.hashCode = 0;
ent.next = _nextFreePosition;
ent.key = default(TKey);
ent.value = default(TValue);
_entries[position] = ent;
_nextFreePosition = position;
ResetKeysValues = true;
return true;
}
entryLocation = position;
}
}
return false;
}
public ulong FindEntry(TKey key)
{
LastFindItemComparisons = 0;
var hashValue = (uint)InternalGetHashCode(key);
var HashBucketPosition = hashValue % _hashBuckets.Length;
var BucketentryLocation = _hashBuckets[HashBucketPosition];
return BucketentryLocation == ulong.MaxValue ? ulong.MaxValue : GetNextBucketPosition(key, BucketentryLocation);
}
private ulong GetNextBucketPosition(TKey key, ulong bucketentryLocation)
{
if (bucketentryLocation > _entries.Count)
return ulong.MaxValue;
var nextpos = bucketentryLocation;
do
{
var entry = _entries[nextpos];
LastFindItemComparisons++;
if (key.Equals(entry.key))
return nextpos;
nextpos = entry.next;
} while (nextpos != ulong.MaxValue);
return ulong.MaxValue;
}
private void Initialize(ulong size)
{
_hashBuckets = new BigArray<ulong>(size);
_entries = new BigArray<Entry>(size);
Count = 0;
for (ulong i = 0; i < _hashBuckets.Length; i++)
_hashBuckets[i] = ulong.MaxValue;
}
public void CopyTo(KeyValuePair<TKey, TValue>[] array)
{
CopyTo(array, 0, Count);
}
public void CopyTo(KeyValuePair<TKey, TValue>[] array, ulong arrayIndex, ulong count)
{
if (count > int.MaxValue)
throw new Exception("Error: CopyTo count cannot exceed int.MaxValue.");
ulong Copied = 0;
for (ulong i = 0; i < Count && Copied < count; i++)
if (_entries[i].hashCode != ulong.MaxValue)
array[arrayIndex + Copied++] = new KeyValuePair<TKey, TValue>(_entries[i].key, _entries[i].value);
}
public void CopyKeysTo(TKey[] array)
{
CopyKeysTo(array, 0, Count);
}
public void CopyKeysTo(TKey[] array, ulong arrayIndex, ulong count)
{
if (count > int.MaxValue)
throw new Exception("Error: CopyKeysTo count cannot exceed int.MaxValue.");
ulong Copied = 0;
for (ulong i = 0; i < Count && Copied < count; i++)
if (_entries[i].hashCode > 0)
array[arrayIndex + Copied++] = _entries[i].key;
}
public void CopyValuesTo(TValue[] array)
{
CopyValuesTo(array, 0, Count);
}
public void CopyValuesTo(TValue[] array, ulong arrayIndex, ulong count)
{
if (count > int.MaxValue)
throw new Exception("Error: CopyValuesTo count cannot exceed int.MaxValue.");
ulong Copied = 0;
for (ulong i = 0; i < Count && Copied < count; i++)
if (_entries[i].hashCode > 0)
array[arrayIndex + Copied++] = _entries[i].value;
}
public KeyValuePair<TKey, TValue>[] ToArray()
{
var newArray = new KeyValuePair<TKey, TValue>[Count];
CopyTo(newArray);
return newArray;
}
public BigArray<TValue> ToBigArray()
{
var ba = new BigArray<TValue>(Count);
for (ulong i = 0, p = 0; i < Count; i++)
if (_entries[i].hashCode != ulong.MaxValue)
ba[p++] = _entries[i].value;
return ba;
}
public BigHashSet<TValue> ToBigHashSet()
{
var bhs = new BigHashSet<TValue>();
bhs.AddRange(Values);
return bhs;
}
[StructLayout(LayoutKind.Sequential, Pack = 1)]
private struct Entry
{
public ulong hashCode;
public ulong next;
public TKey key;
public TValue value;
public static void SetNewValue(Entry olde, ref Entry newe, TValue newvalue)
{
newe.hashCode = olde.hashCode;
newe.next = olde.next;
newe.key = olde.key;
newe.value = newvalue;
}
public static void SetNewNext(Entry olde, ref Entry newe, ulong newnext)
{
newe.hashCode = olde.hashCode;
newe.next = newnext;
newe.key = olde.key;
newe.value = olde.value;
}
}
[Serializable]
public struct Enumerator : IEnumerator<KeyValuePair<TKey, TValue>>, IDictionaryEnumerator
{
private readonly BigDictionary<TKey, TValue> _dictionary;
private ulong _index;
private readonly int _getEnumeratorRetType;
internal const int DictEntry = 1;
internal const int KeyValuePair = 2;
internal Enumerator(BigDictionary<TKey, TValue> _dictionary, int _getEnumeratorRetType)
{
this._dictionary = _dictionary;
_index = 0;
this._getEnumeratorRetType = _getEnumeratorRetType;
Current = new KeyValuePair<TKey, TValue>();
}
public bool MoveNext()
{
if (_index < _dictionary.Count)
{
Current = new KeyValuePair<TKey, TValue>(_dictionary._entries[_index].key, _dictionary._entries[_index].value);
_index++;
return true;
}
_index = _dictionary.Count + 1;
Current = new KeyValuePair<TKey, TValue>();
return false;
}
public KeyValuePair<TKey, TValue> Current { get; private set; }
public void Dispose()
{
}
object IEnumerator.Current
{
get
{
if (_index == 0 || _index == _dictionary.Count + 1)
throw new Exception("Invalid Index.");
if (_getEnumeratorRetType == DictEntry)
return new DictionaryEntry(Current.Key, Current.Value);
return new KeyValuePair<TKey, TValue>(Current.Key, Current.Value);
}
}
void IEnumerator.Reset()
{
_index = 0;
Current = new KeyValuePair<TKey, TValue>();
}
DictionaryEntry IDictionaryEnumerator.Entry
{
get
{
if (_index == 0 || _index == _dictionary.Count + 1)
throw new Exception("Invalid Index.");
return new DictionaryEntry(Current.Key, Current.Value);
}
}
object IDictionaryEnumerator.Key
{
get
{
if (_index == 0 || _index == _dictionary.Count + 1)
throw new Exception("Invalid Index.");
return Current.Key;
}
}
object IDictionaryEnumerator.Value
{
get
{
if (_index == 0 || _index == _dictionary.Count + 1)
throw new Exception("Invalid Index.");
return Current.Value;
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment