Instantly share code, notes, and snippets.

Embed
What would you like to do?
C# BigHashSet Class HashSet Over 2GB
public class BigHashSet<T> : IEnumerable<T>
{
private const ulong MinimumSize = 524288;
private readonly IEqualityComparer<T> m_comparer;
private BigArray<ulong> _hashBuckets;
private ulong _hashCode;
private BigArray<Slot> _slots;
public BigHashSet() : this(EqualityComparer<T>.Default)
{
}
public BigHashSet(ulong size) : this(EqualityComparer<T>.Default, size)
{
}
public BigHashSet(IEqualityComparer<T> comparer)
{
if (comparer == null)
comparer = EqualityComparer<T>.Default;
m_comparer = comparer;
Initialize(MinimumSize);
}
public BigHashSet(IEqualityComparer<T> comparer, ulong size)
{
if (comparer == null)
comparer = EqualityComparer<T>.Default;
m_comparer = comparer;
Initialize(size);
}
public BigHashSet(IEnumerable<T> collection)
{
if (collection == null)
{
Initialize(MinimumSize);
}
else
{
if (!(collection is ICollection<T> coll))
throw new Exception("Error: The constructor was called using a null collection.");
InternalInsert(coll);
}
}
public ulong Count { get; private set; }
public ulong Length => _hashBuckets.Length;
public ulong DuplicateAddAttempts { get; private set; }
public ulong Resizes { get; private set; }
public ulong LastFindItemComparisons { get; private set; }
public float GrowthDelta { get; set; } = 2.0f;
public bool IsDirty => Count != _hashBuckets.Length;
public T this[ulong index]
{
get
{
if (index > Count)
throw new Exception($"Getter: Index out of bounds {index} must be less than {Count}");
return _slots[index].value;
}
}
IEnumerator<T> IEnumerable<T>.GetEnumerator()
{
return new Enumerator(this);
}
IEnumerator IEnumerable.GetEnumerator()
{
return new Enumerator(this);
}
public Enumerator GetEnumerator()
{
return new Enumerator(this);
}
public bool Add(T item)
{
return Insert(item);
}
public bool AddRange(IEnumerable<T> collection)
{
if (!(collection is ICollection<T> coll))
return false;
if (Count > 0)
foreach (var item in coll)
Add(item);
else InternalInsert(coll);
return true;
}
public bool Contains(T item)
{
return FindItem(item) != ulong.MaxValue;
}
public bool ContainsAllElements(IEnumerable<T> elemcol)
{
return elemcol.All(Contains);
}
public void Clear()
{
if (Count == 0) return;
Initialize();
}
public void Release()
{
if (Count == 0) return;
Clear();
Compact();
}
public void Compact()
{
if (Count == 0) return;
var tset = new BigHashSet<T>(Count);
for (ulong i = 0; i < Count; i++)
if (_slots[i].hashCode != ulong.MaxValue)
tset.Add(_slots[i].value);
_slots = tset._slots;
_hashBuckets = tset._hashBuckets;
}
private bool Insert(T item)
{
if (FindItem(item) == ulong.MaxValue)
{
try
{
if (Count >= _slots.Length)
Resize();
}
catch (Exception e)
{
throw new Exception($"Error resizing hash bucket list {e.Message}");
}
var hashPos = _hashCode % _hashBuckets.Length;
var nextPos = _hashBuckets[hashPos];
_hashBuckets[hashPos] = Count;
var news = new Slot();
news.next = nextPos;
news.value = item;
news.hashCode = _hashCode;
_slots[Count] = news;
Count++;
return true;
}
DuplicateAddAttempts++;
return false;
}
private void InternalInsert(ICollection<T> coll)
{
Initialize((ulong) coll.Count);
foreach (var item in coll)
{
var hashCode = (uint) InternalGetHashCode(item);
var hashPos = hashCode % _hashBuckets.Length;
var nextPos = _hashBuckets[hashPos];
_hashBuckets[hashPos] = Count;
var news = new Slot
{
next = nextPos,
value = item,
hashCode = hashCode
};
_slots[Count] = news;
Count++;
}
}
private void Resize(ulong newsize = 0)
{
Resizes++;
if (newsize == 0)
newsize = (ulong) (Count * GrowthDelta);
var newhashebuckets = new BigArray<ulong>(newsize);
var newslots = new BigArray<Slot>(newsize);
for (ulong i = 0; i < _slots.Length; i++)
newslots[i] = _slots[i];
for (ulong i = 0; i < _hashBuckets.Length; i++)
newhashebuckets[i] = _hashBuckets[i];
for (var i = _hashBuckets.Length; i < newhashebuckets.Length; i++)
newhashebuckets[i] = ulong.MaxValue;
_hashBuckets = newhashebuckets;
_slots = newslots;
}
public ulong FindItem(T item)
{
_hashCode = (uint) InternalGetHashCode(item);
LastFindItemComparisons = 0;
for (var position = _hashBuckets[_hashCode % _hashBuckets.Length]; position != ulong.MaxValue; position = _slots[position].next)
{
LastFindItemComparisons++;
if (_slots[position].hashCode == _hashCode && m_comparer.Equals(_slots[position].value,item))
return position;
}
return ulong.MaxValue;
}
private int InternalGetHashCode(T item)
{
if (item == null)
return 0;
return m_comparer.GetHashCode(item) & int.MaxValue;
}
private void Initialize(ulong size = 0)
{
if (size == 0)
size = MinimumSize;
_hashBuckets = new BigArray<ulong>(size);
_slots = new BigArray<Slot>(size);
Count = 0;
for (ulong i = 0; i < _hashBuckets.Length; i++)
_hashBuckets[i] = ulong.MaxValue;
}
public void CopyTo(T[] array)
{
CopyTo(array, 0, Count);
}
public void CopyTo(T[] array, ulong arrayIndex, ulong count)
{
if (count > int.MaxValue)
throw new Exception("Error: CopyTo count cannot exceed int.MaxValue.");
ulong Copied = 0;
for (ulong i = 0; i < Count && Copied < count; i++)
if (_slots[i].hashCode != ulong.MaxValue)
array[arrayIndex + Copied++] = _slots[i].value;
}
public T[] ToArray()
{
var newArray = new T[Count];
CopyTo(newArray);
return newArray;
}
public BigArray<T> ToBigArray()
{
var ba = new BigArray<T>(Count);
for (ulong i = 0, p = 0; i < Count; i++)
if (_slots[i].hashCode != ulong.MaxValue)
ba[p++] = _slots[i].value;
return ba;
}
private struct Slot
{
public ulong hashCode;
public ulong next;
public T value;
}
[Serializable]
public struct Enumerator : IEnumerator<T>
{
private readonly BigHashSet<T> set;
private ulong index;
public T Current { get; private set; }
object IEnumerator.Current
{
get
{
if (index == 0 || index == set.Count + 1)
throw new InvalidOperationException($"Enumerator out of range: {index}");
return Current;
}
}
internal Enumerator(BigHashSet<T> set)
{
this.set = set;
index = 0;
Current = default(T);
}
public void Dispose()
{
}
public bool MoveNext()
{
if (index < set.Count)
{
Current = set._slots[index].value;
index++;
return true;
}
index = set.Count + 1;
Current = default(T);
return false;
}
void IEnumerator.Reset()
{
index = 0;
Current = default(T);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment