Created
May 30, 2019 14:57
-
-
Save geniuszxy/75eace5a2a4f188127b38429caa43b4e to your computer and use it in GitHub Desktop.
BetterHashSet
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections; | |
using System.Diagnostics; | |
using System.Diagnostics.Contracts; | |
using System.Security.Permissions; | |
using System.Text; | |
using System.Diagnostics.CodeAnalysis; | |
using System.Security; | |
namespace System.Collections.Generic | |
{ | |
public class BetterHashSet<T> : ICollection<T>, ISet<T>, IReadOnlyCollection<T> | |
{ | |
// store lower 31 bits of hash code | |
private const int Lower31BitMask = 0x7FFFFFFF; | |
// cutoff point, above which we won't do stackallocs. This corresponds to 100 integers. | |
private const int StackAllocThreshold = 100; | |
// when constructing a hashset from an existing collection, it may contain duplicates, | |
// so this is used as the max acceptable excess ratio of capacity to count. Note that | |
// this is only used on the ctor and not to automatically shrink if the hashset has, e.g, | |
// a lot of adds followed by removes. Users must explicitly shrink by calling TrimExcess. | |
// This is set to 3 because capacity is acceptable as 2x rounded up to nearest prime. | |
private const int ShrinkThreshold = 3; | |
private int[] m_buckets; | |
private Slot[] m_slots; | |
private int m_count; | |
private int m_lastIndex; | |
private int m_freeList; | |
private int m_version; | |
#region Constructors | |
public BetterHashSet() | |
: this(EqualityComparer<T>.Default) { } | |
public BetterHashSet(int capacity) | |
: this(capacity, EqualityComparer<T>.Default) { } | |
public BetterHashSet(IEqualityComparer<T> comparer) | |
{ | |
if (comparer == null) | |
{ | |
comparer = EqualityComparer<T>.Default; | |
} | |
this.m_comparer = comparer; | |
m_lastIndex = 0; | |
m_count = 0; | |
m_freeList = -1; | |
m_version = 0; | |
} | |
public BetterHashSet(IEnumerable<T> collection) | |
: this(collection, EqualityComparer<T>.Default) { } | |
/// <summary> | |
/// Implementation Notes: | |
/// Since resizes are relatively expensive (require rehashing), this attempts to minimize | |
/// the need to resize by setting the initial capacity based on size of collection. | |
/// </summary> | |
/// <param name="collection"></param> | |
/// <param name="comparer"></param> | |
public BetterHashSet(IEnumerable<T> collection, IEqualityComparer<T> comparer) | |
: this(comparer) | |
{ | |
if (collection == null) | |
{ | |
throw new ArgumentNullException("collection"); | |
} | |
Contract.EndContractBlock(); | |
var otherAsHashSet = collection as BetterHashSet<T>; | |
if (otherAsHashSet != null && AreEqualityComparersEqual(this, otherAsHashSet)) | |
{ | |
CopyFrom(otherAsHashSet); | |
} | |
else | |
{ | |
// to avoid excess resizes, first set size based on collection's count. Collection | |
// may contain duplicates, so call TrimExcess if resulting hashset is larger than | |
// threshold | |
ICollection<T> coll = collection as ICollection<T>; | |
int suggestedCapacity = coll == null ? 0 : coll.Count; | |
Initialize(suggestedCapacity); | |
this.UnionWith(collection); | |
if (m_count > 0 && m_slots.Length / m_count > ShrinkThreshold) | |
{ | |
TrimExcess(); | |
} | |
} | |
} | |
// Initializes the HashSet from another HashSet with the same element type and | |
// equality comparer. | |
private void CopyFrom(BetterHashSet<T> source) | |
{ | |
int count = source.m_count; | |
if (count == 0) | |
{ | |
// As well as short-circuiting on the rest of the work done, | |
// this avoids errors from trying to access otherAsHashSet.m_buckets | |
// or otherAsHashSet.m_slots when they aren't initialized. | |
return; | |
} | |
int capacity = source.m_buckets.Length; | |
int threshold = HashHelpers.ExpandPrime(count + 1); | |
if (threshold >= capacity) | |
{ | |
m_buckets = (int[])source.m_buckets.Clone(); | |
m_slots = (Slot[])source.m_slots.Clone(); | |
m_lastIndex = source.m_lastIndex; | |
m_freeList = source.m_freeList; | |
} | |
else | |
{ | |
int lastIndex = source.m_lastIndex; | |
Slot[] slots = source.m_slots; | |
Initialize(count); | |
int index = 0; | |
for (int i = 0; i < lastIndex; ++i) | |
{ | |
int hashCode = slots[i].hashCode; | |
if (hashCode >= 0) | |
{ | |
AddValue(index, hashCode, slots[i].value); | |
++index; | |
} | |
} | |
Debug.Assert(index == count); | |
m_lastIndex = index; | |
} | |
m_count = count; | |
} | |
public BetterHashSet(int capacity, IEqualityComparer<T> comparer) | |
: this(comparer) | |
{ | |
if (capacity < 0) | |
{ | |
throw new ArgumentOutOfRangeException("capacity"); | |
} | |
Contract.EndContractBlock(); | |
if (capacity > 0) | |
{ | |
Initialize(capacity); | |
} | |
} | |
#endregion | |
#region ICollection<T> methods | |
/// <summary> | |
/// Add item to this hashset. This is the explicit implementation of the ICollection<T> | |
/// interface. The other Add method returns bool indicating whether item was added. | |
/// </summary> | |
/// <param name="item">item to add</param> | |
void ICollection<T>.Add(T item) | |
{ | |
AddIfNotPresent(item); | |
} | |
/// <summary> | |
/// Remove all items from this set. This clears the elements but not the underlying | |
/// buckets and slots array. Follow this call by TrimExcess to release these. | |
/// </summary> | |
public void Clear() | |
{ | |
if (m_lastIndex > 0) | |
{ | |
Debug.Assert(m_buckets != null, "m_buckets was null but m_lastIndex > 0"); | |
// clear the elements so that the gc can reclaim the references. | |
// clear only up to m_lastIndex for m_slots | |
Array.Clear(m_slots, 0, m_lastIndex); | |
Array.Clear(m_buckets, 0, m_buckets.Length); | |
m_lastIndex = 0; | |
m_count = 0; | |
m_freeList = -1; | |
} | |
m_version++; | |
} | |
/// <summary> | |
/// Checks if this hashset contains the item | |
/// </summary> | |
/// <param name="item">item to check for containment</param> | |
/// <returns>true if item contained; false if not</returns> | |
public bool Contains(T item) | |
{ | |
if (m_buckets != null) | |
{ | |
int hashCode = InternalGetHashCode(item); | |
// see note at "HashSet" level describing why "- 1" appears in for loop | |
for (int i = m_buckets[hashCode % m_buckets.Length] - 1; i >= 0; i = m_slots[i].next) | |
{ | |
if (m_slots[i].hashCode == hashCode && m_comparer.Equals(m_slots[i].value, item)) | |
{ | |
return true; | |
} | |
} | |
} | |
// either m_buckets is null or wasn't found | |
return false; | |
} | |
/// <summary> | |
/// Copy items in this hashset to array, starting at arrayIndex | |
/// </summary> | |
/// <param name="array">array to add items to</param> | |
/// <param name="arrayIndex">index to start at</param> | |
public void CopyTo(T[] array, int arrayIndex) | |
{ | |
CopyTo(array, arrayIndex, m_count); | |
} | |
/// <summary> | |
/// Remove item from this hashset | |
/// </summary> | |
/// <param name="item">item to remove</param> | |
/// <returns>true if removed; false if not (i.e. if the item wasn't in the HashSet)</returns> | |
public bool Remove(T item) | |
{ | |
if (m_buckets != null) | |
{ | |
int hashCode = InternalGetHashCode(item); | |
int bucket = hashCode % m_buckets.Length; | |
int last = -1; | |
for (int i = m_buckets[bucket] - 1; i >= 0; last = i, i = m_slots[i].next) | |
{ | |
if (m_slots[i].hashCode == hashCode && m_comparer.Equals(m_slots[i].value, item)) | |
{ | |
if (last < 0) | |
{ | |
// first iteration; update buckets | |
m_buckets[bucket] = m_slots[i].next + 1; | |
} | |
else | |
{ | |
// subsequent iterations; update 'next' pointers | |
m_slots[last].next = m_slots[i].next; | |
} | |
m_slots[i].hashCode = -1; | |
m_slots[i].value = default(T); | |
m_slots[i].next = m_freeList; | |
m_count--; | |
m_version++; | |
if (m_count == 0) | |
{ | |
m_lastIndex = 0; | |
m_freeList = -1; | |
} | |
else | |
{ | |
m_freeList = i; | |
} | |
return true; | |
} | |
} | |
} | |
// either m_buckets is null or wasn't found | |
return false; | |
} | |
/// <summary> | |
/// Number of elements in this hashset | |
/// </summary> | |
public int Count | |
{ | |
get { return m_count; } | |
} | |
/// <summary> | |
/// Whether this is readonly | |
/// </summary> | |
bool ICollection<T>.IsReadOnly | |
{ | |
get { return false; } | |
} | |
#endregion | |
#region IEnumerable methods | |
public Enumerator GetEnumerator() | |
{ | |
return new Enumerator(this); | |
} | |
IEnumerator<T> IEnumerable<T>.GetEnumerator() | |
{ | |
return new Enumerator(this); | |
} | |
IEnumerator IEnumerable.GetEnumerator() | |
{ | |
return new Enumerator(this); | |
} | |
#endregion | |
#region HashSet methods | |
/// <summary> | |
/// Add item to this HashSet. Returns bool indicating whether item was added (won't be | |
/// added if already present) | |
/// </summary> | |
/// <param name="item"></param> | |
/// <returns>true if added, false if already present</returns> | |
public bool Add(T item) | |
{ | |
return AddIfNotPresent(item); | |
} | |
/// <summary> | |
/// Searches the set for a given value and returns the equal value it finds, if any. | |
/// </summary> | |
/// <param name="equalValue">The value to search for.</param> | |
/// <param name="actualValue">The value from the set that the search found, or the default value of <typeparamref name="T"/> when the search yielded no match.</param> | |
/// <returns>A value indicating whether the search was successful.</returns> | |
/// <remarks> | |
/// This can be useful when you want to reuse a previously stored reference instead of | |
/// a newly constructed one (so that more sharing of references can occur) or to look up | |
/// a value that has more complete data than the value you currently have, although their | |
/// comparer functions indicate they are equal. | |
/// </remarks> | |
public bool TryGetValue(T equalValue, out T actualValue) | |
{ | |
if (m_buckets != null) | |
{ | |
int i = InternalIndexOf(equalValue); | |
if (i >= 0) | |
{ | |
actualValue = m_slots[i].value; | |
return true; | |
} | |
} | |
actualValue = default(T); | |
return false; | |
} | |
/// <summary> | |
/// Take the union of this HashSet with other. Modifies this set. | |
/// | |
/// Implementation note: GetSuggestedCapacity (to increase capacity in advance avoiding | |
/// multiple resizes ended up not being useful in practice; quickly gets to the | |
/// point where it's a wasteful check. | |
/// </summary> | |
/// <param name="other">enumerable with items to add</param> | |
public void UnionWith(IEnumerable<T> other) | |
{ | |
if (other == null) | |
{ | |
throw new ArgumentNullException("other"); | |
} | |
Contract.EndContractBlock(); | |
foreach (T item in other) | |
{ | |
AddIfNotPresent(item); | |
} | |
} | |
/// <summary> | |
/// Takes the intersection of this set with other. Modifies this set. | |
/// | |
/// Implementation Notes: | |
/// We get better perf if other is a hashset using same equality comparer, because we | |
/// get constant contains check in other. Resulting cost is O(n1) to iterate over this. | |
/// | |
/// If we can't go above route, iterate over the other and mark intersection by checking | |
/// contains in this. Then loop over and delete any unmarked elements. Total cost is n2+n1. | |
/// | |
/// Attempts to return early based on counts alone, using the property that the | |
/// intersection of anything with the empty set is the empty set. | |
/// </summary> | |
/// <param name="other">enumerable with items to add </param> | |
public void IntersectWith(IEnumerable<T> other) | |
{ | |
if (other == null) | |
{ | |
throw new ArgumentNullException("other"); | |
} | |
Contract.EndContractBlock(); | |
// intersection of anything with empty set is empty set, so return if count is 0 | |
if (m_count == 0) | |
{ | |
return; | |
} | |
// if other is empty, intersection is empty set; remove all elements and we're done | |
// can only figure this out if implements ICollection<T>. (IEnumerable<T> has no count) | |
ICollection<T> otherAsCollection = other as ICollection<T>; | |
if (otherAsCollection != null) | |
{ | |
if (otherAsCollection.Count == 0) | |
{ | |
Clear(); | |
return; | |
} | |
BetterHashSet<T> otherAsSet = other as BetterHashSet<T>; | |
// faster if other is a hashset using same equality comparer; so check | |
// that other is a hashset using the same equality comparer. | |
if (otherAsSet != null && AreEqualityComparersEqual(this, otherAsSet)) | |
{ | |
IntersectWithHashSetWithSameEC(otherAsSet); | |
return; | |
} | |
} | |
IntersectWithEnumerable(other); | |
} | |
/// <summary> | |
/// Remove items in other from this set. Modifies this set. | |
/// </summary> | |
/// <param name="other">enumerable with items to remove</param> | |
public void ExceptWith(IEnumerable<T> other) | |
{ | |
if (other == null) | |
{ | |
throw new ArgumentNullException("other"); | |
} | |
Contract.EndContractBlock(); | |
// this is already the enpty set; return | |
if (m_count == 0) | |
{ | |
return; | |
} | |
// special case if other is this; a set minus itself is the empty set | |
if (other == this) | |
{ | |
Clear(); | |
return; | |
} | |
// remove every element in other from this | |
foreach (T element in other) | |
{ | |
Remove(element); | |
} | |
} | |
/// <summary> | |
/// Takes symmetric difference (XOR) with other and this set. Modifies this set. | |
/// </summary> | |
/// <param name="other">enumerable with items to XOR</param> | |
public void SymmetricExceptWith(IEnumerable<T> other) | |
{ | |
if (other == null) | |
{ | |
throw new ArgumentNullException("other"); | |
} | |
Contract.EndContractBlock(); | |
// if set is empty, then symmetric difference is other | |
if (m_count == 0) | |
{ | |
UnionWith(other); | |
return; | |
} | |
// special case this; the symmetric difference of a set with itself is the empty set | |
if (other == this) | |
{ | |
Clear(); | |
return; | |
} | |
BetterHashSet<T> otherAsSet = other as BetterHashSet<T>; | |
// If other is a HashSet, it has unique elements according to its equality comparer, | |
// but if they're using different equality comparers, then assumption of uniqueness | |
// will fail. So first check if other is a hashset using the same equality comparer; | |
// symmetric except is a lot faster and avoids bit array allocations if we can assume | |
// uniqueness | |
if (otherAsSet != null && AreEqualityComparersEqual(this, otherAsSet)) | |
{ | |
SymmetricExceptWithUniqueHashSet(otherAsSet); | |
} | |
else | |
{ | |
SymmetricExceptWithEnumerable(other); | |
} | |
} | |
/// <summary> | |
/// Checks if this is a subset of other. | |
/// | |
/// Implementation Notes: | |
/// The following properties are used up-front to avoid element-wise checks: | |
/// 1. If this is the empty set, then it's a subset of anything, including the empty set | |
/// 2. If other has unique elements according to this equality comparer, and this has more | |
/// elements than other, then it can't be a subset. | |
/// | |
/// Furthermore, if other is a hashset using the same equality comparer, we can use a | |
/// faster element-wise check. | |
/// </summary> | |
/// <param name="other"></param> | |
/// <returns>true if this is a subset of other; false if not</returns> | |
public bool IsSubsetOf(IEnumerable<T> other) | |
{ | |
if (other == null) | |
{ | |
throw new ArgumentNullException("other"); | |
} | |
Contract.EndContractBlock(); | |
// The empty set is a subset of any set | |
if (m_count == 0) | |
{ | |
return true; | |
} | |
BetterHashSet<T> otherAsSet = other as BetterHashSet<T>; | |
// faster if other has unique elements according to this equality comparer; so check | |
// that other is a hashset using the same equality comparer. | |
if (otherAsSet != null && AreEqualityComparersEqual(this, otherAsSet)) | |
{ | |
// if this has more elements then it can't be a subset | |
if (m_count > otherAsSet.Count) | |
{ | |
return false; | |
} | |
// already checked that we're using same equality comparer. simply check that | |
// each element in this is contained in other. | |
return IsSubsetOfHashSetWithSameEC(otherAsSet); | |
} | |
else | |
{ | |
ElementCount result = CheckUniqueAndUnfoundElements(other, false); | |
return (result.uniqueCount == m_count && result.unfoundCount >= 0); | |
} | |
} | |
/// <summary> | |
/// Checks if this is a proper subset of other (i.e. strictly contained in) | |
/// | |
/// Implementation Notes: | |
/// The following properties are used up-front to avoid element-wise checks: | |
/// 1. If this is the empty set, then it's a proper subset of a set that contains at least | |
/// one element, but it's not a proper subset of the empty set. | |
/// 2. If other has unique elements according to this equality comparer, and this has >= | |
/// the number of elements in other, then this can't be a proper subset. | |
/// | |
/// Furthermore, if other is a hashset using the same equality comparer, we can use a | |
/// faster element-wise check. | |
/// </summary> | |
/// <param name="other"></param> | |
/// <returns>true if this is a proper subset of other; false if not</returns> | |
public bool IsProperSubsetOf(IEnumerable<T> other) | |
{ | |
if (other == null) | |
{ | |
throw new ArgumentNullException("other"); | |
} | |
Contract.EndContractBlock(); | |
ICollection<T> otherAsCollection = other as ICollection<T>; | |
if (otherAsCollection != null) | |
{ | |
// the empty set is a proper subset of anything but the empty set | |
if (m_count == 0) | |
{ | |
return otherAsCollection.Count > 0; | |
} | |
BetterHashSet<T> otherAsSet = other as BetterHashSet<T>; | |
// faster if other is a hashset (and we're using same equality comparer) | |
if (otherAsSet != null && AreEqualityComparersEqual(this, otherAsSet)) | |
{ | |
if (m_count >= otherAsSet.Count) | |
{ | |
return false; | |
} | |
// this has strictly less than number of items in other, so the following | |
// check suffices for proper subset. | |
return IsSubsetOfHashSetWithSameEC(otherAsSet); | |
} | |
} | |
ElementCount result = CheckUniqueAndUnfoundElements(other, false); | |
return (result.uniqueCount == m_count && result.unfoundCount > 0); | |
} | |
/// <summary> | |
/// Checks if this is a superset of other | |
/// | |
/// Implementation Notes: | |
/// The following properties are used up-front to avoid element-wise checks: | |
/// 1. If other has no elements (it's the empty set), then this is a superset, even if this | |
/// is also the empty set. | |
/// 2. If other has unique elements according to this equality comparer, and this has less | |
/// than the number of elements in other, then this can't be a superset | |
/// | |
/// </summary> | |
/// <param name="other"></param> | |
/// <returns>true if this is a superset of other; false if not</returns> | |
public bool IsSupersetOf(IEnumerable<T> other) | |
{ | |
if (other == null) | |
{ | |
throw new ArgumentNullException("other"); | |
} | |
Contract.EndContractBlock(); | |
// try to fall out early based on counts | |
ICollection<T> otherAsCollection = other as ICollection<T>; | |
if (otherAsCollection != null) | |
{ | |
// if other is the empty set then this is a superset | |
if (otherAsCollection.Count == 0) | |
{ | |
return true; | |
} | |
BetterHashSet<T> otherAsSet = other as BetterHashSet<T>; | |
// try to compare based on counts alone if other is a hashset with | |
// same equality comparer | |
if (otherAsSet != null && AreEqualityComparersEqual(this, otherAsSet)) | |
{ | |
if (otherAsSet.Count > m_count) | |
{ | |
return false; | |
} | |
} | |
} | |
return ContainsAllElements(other); | |
} | |
/// <summary> | |
/// Checks if this is a proper superset of other (i.e. other strictly contained in this) | |
/// | |
/// Implementation Notes: | |
/// This is slightly more complicated than above because we have to keep track if there | |
/// was at least one element not contained in other. | |
/// | |
/// The following properties are used up-front to avoid element-wise checks: | |
/// 1. If this is the empty set, then it can't be a proper superset of any set, even if | |
/// other is the empty set. | |
/// 2. If other is an empty set and this contains at least 1 element, then this is a proper | |
/// superset. | |
/// 3. If other has unique elements according to this equality comparer, and other's count | |
/// is greater than or equal to this count, then this can't be a proper superset | |
/// | |
/// Furthermore, if other has unique elements according to this equality comparer, we can | |
/// use a faster element-wise check. | |
/// </summary> | |
/// <param name="other"></param> | |
/// <returns>true if this is a proper superset of other; false if not</returns> | |
public bool IsProperSupersetOf(IEnumerable<T> other) | |
{ | |
if (other == null) | |
{ | |
throw new ArgumentNullException("other"); | |
} | |
Contract.EndContractBlock(); | |
// the empty set isn't a proper superset of any set. | |
if (m_count == 0) | |
{ | |
return false; | |
} | |
ICollection<T> otherAsCollection = other as ICollection<T>; | |
if (otherAsCollection != null) | |
{ | |
// if other is the empty set then this is a superset | |
if (otherAsCollection.Count == 0) | |
{ | |
// note that this has at least one element, based on above check | |
return true; | |
} | |
BetterHashSet<T> otherAsSet = other as BetterHashSet<T>; | |
// faster if other is a hashset with the same equality comparer | |
if (otherAsSet != null && AreEqualityComparersEqual(this, otherAsSet)) | |
{ | |
if (otherAsSet.Count >= m_count) | |
{ | |
return false; | |
} | |
// now perform element check | |
return ContainsAllElements(otherAsSet); | |
} | |
} | |
// couldn't fall out in the above cases; do it the long way | |
ElementCount result = CheckUniqueAndUnfoundElements(other, true); | |
return (result.uniqueCount < m_count && result.unfoundCount == 0); | |
} | |
/// <summary> | |
/// Checks if this set overlaps other (i.e. they share at least one item) | |
/// </summary> | |
/// <param name="other"></param> | |
/// <returns>true if these have at least one common element; false if disjoint</returns> | |
public bool Overlaps(IEnumerable<T> other) | |
{ | |
if (other == null) | |
{ | |
throw new ArgumentNullException("other"); | |
} | |
Contract.EndContractBlock(); | |
if (m_count == 0) | |
{ | |
return false; | |
} | |
foreach (T element in other) | |
{ | |
if (Contains(element)) | |
{ | |
return true; | |
} | |
} | |
return false; | |
} | |
/// <summary> | |
/// Checks if this and other contain the same elements. This is set equality: | |
/// duplicates and order are ignored | |
/// </summary> | |
/// <param name="other"></param> | |
/// <returns></returns> | |
public bool SetEquals(IEnumerable<T> other) | |
{ | |
if (other == null) | |
{ | |
throw new ArgumentNullException("other"); | |
} | |
Contract.EndContractBlock(); | |
BetterHashSet<T> otherAsSet = other as BetterHashSet<T>; | |
// faster if other is a hashset and we're using same equality comparer | |
if (otherAsSet != null && AreEqualityComparersEqual(this, otherAsSet)) | |
{ | |
// attempt to return early: since both contain unique elements, if they have | |
// different counts, then they can't be equal | |
if (m_count != otherAsSet.Count) | |
{ | |
return false; | |
} | |
// already confirmed that the sets have the same number of distinct elements, so if | |
// one is a superset of the other then they must be equal | |
return ContainsAllElements(otherAsSet); | |
} | |
else | |
{ | |
ICollection<T> otherAsCollection = other as ICollection<T>; | |
if (otherAsCollection != null) | |
{ | |
// if this count is 0 but other contains at least one element, they can't be equal | |
if (m_count == 0 && otherAsCollection.Count > 0) | |
{ | |
return false; | |
} | |
} | |
ElementCount result = CheckUniqueAndUnfoundElements(other, true); | |
return (result.uniqueCount == m_count && result.unfoundCount == 0); | |
} | |
} | |
public void CopyTo(T[] array) { CopyTo(array, 0, m_count); } | |
public void CopyTo(T[] array, int arrayIndex, int count) | |
{ | |
if (array == null) | |
{ | |
throw new ArgumentNullException("array"); | |
} | |
Contract.EndContractBlock(); | |
// check array index valid index into array | |
if (arrayIndex < 0) | |
{ | |
throw new ArgumentOutOfRangeException("arrayIndex"); | |
} | |
// also throw if count less than 0 | |
if (count < 0) | |
{ | |
throw new ArgumentOutOfRangeException("count"); | |
} | |
// will array, starting at arrayIndex, be able to hold elements? Note: not | |
// checking arrayIndex >= array.Length (consistency with list of allowing | |
// count of 0; subsequent check takes care of the rest) | |
if (arrayIndex > array.Length || count > array.Length - arrayIndex) | |
{ | |
throw new ArgumentException(); | |
} | |
int numCopied = 0; | |
for (int i = 0; i < m_lastIndex && numCopied < count; i++) | |
{ | |
if (m_slots[i].hashCode >= 0) | |
{ | |
array[arrayIndex + numCopied] = m_slots[i].value; | |
numCopied++; | |
} | |
} | |
} | |
/// <summary> | |
/// Remove elements that match specified predicate. Returns the number of elements removed | |
/// </summary> | |
/// <param name="match"></param> | |
/// <returns></returns> | |
public int RemoveWhere(Predicate<T> match) | |
{ | |
if (match == null) | |
{ | |
throw new ArgumentNullException("match"); | |
} | |
Contract.EndContractBlock(); | |
int numRemoved = 0; | |
for (int i = 0; i < m_lastIndex; i++) | |
{ | |
if (m_slots[i].hashCode >= 0) | |
{ | |
// cache value in case delegate removes it | |
T value = m_slots[i].value; | |
if (match(value)) | |
{ | |
// check again that remove actually removed it | |
if (Remove(value)) | |
{ | |
numRemoved++; | |
} | |
} | |
} | |
} | |
return numRemoved; | |
} | |
/// <summary> | |
/// Gets the IEqualityComparer that is used to determine equality of keys for | |
/// the HashSet. | |
/// </summary> | |
public IEqualityComparer<T> Comparer | |
{ | |
get | |
{ | |
return m_comparer; | |
} | |
} | |
/// <summary> | |
/// Sets the capacity of this list to the size of the list (rounded up to nearest prime), | |
/// unless count is 0, in which case we release references. | |
/// | |
/// This method can be used to minimize a list's memory overhead once it is known that no | |
/// new elements will be added to the list. To completely clear a list and release all | |
/// memory referenced by the list, execute the following statements: | |
/// | |
/// list.Clear(); | |
/// list.TrimExcess(); | |
/// </summary> | |
public void TrimExcess() | |
{ | |
Debug.Assert(m_count >= 0, "m_count is negative"); | |
if (m_count == 0) | |
{ | |
// if count is zero, clear references | |
m_buckets = null; | |
m_slots = null; | |
m_version++; | |
} | |
else | |
{ | |
Debug.Assert(m_buckets != null, "m_buckets was null but m_count > 0"); | |
// similar to IncreaseCapacity but moves down elements in case add/remove/etc | |
// caused fragmentation | |
int newSize = HashHelpers.GetPrime(m_count); | |
Slot[] newSlots = new Slot[newSize]; | |
int[] newBuckets = new int[newSize]; | |
// move down slots and rehash at the same time. newIndex keeps track of current | |
// position in newSlots array | |
int newIndex = 0; | |
for (int i = 0; i < m_lastIndex; i++) | |
{ | |
if (m_slots[i].hashCode >= 0) | |
{ | |
newSlots[newIndex] = m_slots[i]; | |
// rehash | |
int bucket = newSlots[newIndex].hashCode % newSize; | |
newSlots[newIndex].next = newBuckets[bucket] - 1; | |
newBuckets[bucket] = newIndex + 1; | |
newIndex++; | |
} | |
} | |
Debug.Assert(newSlots.Length <= m_slots.Length, "capacity increased after TrimExcess"); | |
m_lastIndex = newIndex; | |
m_slots = newSlots; | |
m_buckets = newBuckets; | |
m_freeList = -1; | |
} | |
} | |
#endregion | |
#region Helper methods | |
/// <summary> | |
/// Initializes buckets and slots arrays. Uses suggested capacity by finding next prime | |
/// greater than or equal to capacity. | |
/// </summary> | |
/// <param name="capacity"></param> | |
private void Initialize(int capacity) | |
{ | |
Debug.Assert(m_buckets == null, "Initialize was called but m_buckets was non-null"); | |
int size = HashHelpers.GetPrime(capacity); | |
m_buckets = new int[size]; | |
m_slots = new Slot[size]; | |
} | |
/// <summary> | |
/// Expand to new capacity. New capacity is next prime greater than or equal to suggested | |
/// size. This is called when the underlying array is filled. This performs no | |
/// defragmentation, allowing faster execution; note that this is reasonable since | |
/// AddIfNotPresent attempts to insert new elements in re-opened spots. | |
/// </summary> | |
/// <param name="sizeSuggestion"></param> | |
private void IncreaseCapacity() | |
{ | |
Debug.Assert(m_buckets != null, "IncreaseCapacity called on a set with no elements"); | |
int newSize = HashHelpers.ExpandPrime(m_count); | |
if (newSize <= m_count) | |
{ | |
throw new ArgumentException(); | |
} | |
// Able to increase capacity; copy elements to larger array and rehash | |
SetCapacity(newSize, false); | |
} | |
/// <summary> | |
/// Set the underlying buckets array to size newSize and rehash. Note that newSize | |
/// *must* be a prime. It is very likely that you want to call IncreaseCapacity() | |
/// instead of this method. | |
/// </summary> | |
private void SetCapacity(int newSize, bool forceNewHashCodes) | |
{ | |
Contract.Assert(HashHelpers.IsPrime(newSize), "New size is not prime!"); | |
Contract.Assert(m_buckets != null, "SetCapacity called on a set with no elements"); | |
Slot[] newSlots = new Slot[newSize]; | |
if (m_slots != null) | |
{ | |
Array.Copy(m_slots, 0, newSlots, 0, m_lastIndex); | |
} | |
if (forceNewHashCodes) | |
{ | |
for (int i = 0; i < m_lastIndex; i++) | |
{ | |
if (newSlots[i].hashCode != -1) | |
{ | |
newSlots[i].hashCode = InternalGetHashCode(newSlots[i].value); | |
} | |
} | |
} | |
int[] newBuckets = new int[newSize]; | |
for (int i = 0; i < m_lastIndex; i++) | |
{ | |
int bucket = newSlots[i].hashCode % newSize; | |
newSlots[i].next = newBuckets[bucket] - 1; | |
newBuckets[bucket] = i + 1; | |
} | |
m_slots = newSlots; | |
m_buckets = newBuckets; | |
} | |
/// <summary> | |
/// Adds value to HashSet if not contained already | |
/// Returns true if added and false if already present | |
/// </summary> | |
/// <param name="value">value to find</param> | |
/// <returns></returns> | |
private bool AddIfNotPresent(T value) | |
{ | |
if (m_buckets == null) | |
{ | |
Initialize(0); | |
} | |
int hashCode = InternalGetHashCode(value); | |
int bucket = hashCode % m_buckets.Length; | |
#if FEATURE_RANDOMIZED_STRING_HASHING && !FEATURE_NETCORE | |
int collisionCount = 0; | |
#endif | |
for (int i = m_buckets[hashCode % m_buckets.Length] - 1; i >= 0; i = m_slots[i].next) | |
{ | |
if (m_slots[i].hashCode == hashCode && m_comparer.Equals(m_slots[i].value, value)) | |
{ | |
return false; | |
} | |
#if FEATURE_RANDOMIZED_STRING_HASHING && !FEATURE_NETCORE | |
collisionCount++; | |
#endif | |
} | |
int index; | |
if (m_freeList >= 0) | |
{ | |
index = m_freeList; | |
m_freeList = m_slots[index].next; | |
} | |
else | |
{ | |
if (m_lastIndex == m_slots.Length) | |
{ | |
IncreaseCapacity(); | |
// this will change during resize | |
bucket = hashCode % m_buckets.Length; | |
} | |
index = m_lastIndex; | |
m_lastIndex++; | |
} | |
m_slots[index].hashCode = hashCode; | |
m_slots[index].value = value; | |
m_slots[index].next = m_buckets[bucket] - 1; | |
m_buckets[bucket] = index + 1; | |
m_count++; | |
m_version++; | |
#if FEATURE_RANDOMIZED_STRING_HASHING && !FEATURE_NETCORE | |
if(collisionCount > HashHelpers.HashCollisionThreshold && HashHelpers.IsWellKnownEqualityComparer(m_comparer)) { | |
m_comparer = (IEqualityComparer<T>) HashHelpers.GetRandomizedEqualityComparer(m_comparer); | |
SetCapacity(m_buckets.Length, true); | |
} | |
#endif // FEATURE_RANDOMIZED_STRING_HASHING | |
return true; | |
} | |
// Add value at known index with known hash code. Used only | |
// when constructing from another HashSet. | |
private void AddValue(int index, int hashCode, T value) | |
{ | |
int bucket = hashCode % m_buckets.Length; | |
#if DEBUG | |
Debug.Assert(InternalGetHashCode(value) == hashCode); | |
for (int i = m_buckets[bucket] - 1; i >= 0; i = m_slots[i].next) | |
{ | |
Debug.Assert(!m_comparer.Equals(m_slots[i].value, value)); | |
} | |
#endif | |
Debug.Assert(m_freeList == -1); | |
m_slots[index].hashCode = hashCode; | |
m_slots[index].value = value; | |
m_slots[index].next = m_buckets[bucket] - 1; | |
m_buckets[bucket] = index + 1; | |
} | |
/// <summary> | |
/// Checks if this contains of other's elements. Iterates over other's elements and | |
/// returns false as soon as it finds an element in other that's not in this. | |
/// Used by SupersetOf, ProperSupersetOf, and SetEquals. | |
/// </summary> | |
/// <param name="other"></param> | |
/// <returns></returns> | |
private bool ContainsAllElements(IEnumerable<T> other) | |
{ | |
foreach (T element in other) | |
{ | |
if (!Contains(element)) | |
{ | |
return false; | |
} | |
} | |
return true; | |
} | |
/// <summary> | |
/// Used internally by set operations which have to rely on bit array marking. This is like | |
/// Contains but returns index in slots array. | |
/// </summary> | |
/// <param name="item"></param> | |
/// <returns></returns> | |
private int InternalIndexOf(T item) | |
{ | |
Debug.Assert(m_buckets != null, "m_buckets was null; callers should check first"); | |
int hashCode = InternalGetHashCode(item); | |
for (int i = m_buckets[hashCode % m_buckets.Length] - 1; i >= 0; i = m_slots[i].next) | |
{ | |
if ((m_slots[i].hashCode) == hashCode && m_comparer.Equals(m_slots[i].value, item)) | |
{ | |
return i; | |
} | |
} | |
// wasn't found | |
return -1; | |
} | |
/// <summary> | |
/// Add if not already in hashset. Returns an out param indicating index where added. This | |
/// is used by SymmetricExcept because it needs to know the following things: | |
/// - whether the item was already present in the collection or added from other | |
/// - where it's located (if already present, it will get marked for removal, otherwise | |
/// marked for keeping) | |
/// </summary> | |
/// <param name="value"></param> | |
/// <param name="location"></param> | |
/// <returns></returns> | |
private bool AddOrGetLocation(T value, out int location) | |
{ | |
Debug.Assert(m_buckets != null, "m_buckets is null, callers should have checked"); | |
int hashCode = InternalGetHashCode(value); | |
int bucket = hashCode % m_buckets.Length; | |
for (int i = m_buckets[hashCode % m_buckets.Length] - 1; i >= 0; i = m_slots[i].next) | |
{ | |
if (m_slots[i].hashCode == hashCode && m_comparer.Equals(m_slots[i].value, value)) | |
{ | |
location = i; | |
return false; //already present | |
} | |
} | |
int index; | |
if (m_freeList >= 0) | |
{ | |
index = m_freeList; | |
m_freeList = m_slots[index].next; | |
} | |
else | |
{ | |
if (m_lastIndex == m_slots.Length) | |
{ | |
IncreaseCapacity(); | |
// this will change during resize | |
bucket = hashCode % m_buckets.Length; | |
} | |
index = m_lastIndex; | |
m_lastIndex++; | |
} | |
m_slots[index].hashCode = hashCode; | |
m_slots[index].value = value; | |
m_slots[index].next = m_buckets[bucket] - 1; | |
m_buckets[bucket] = index + 1; | |
m_count++; | |
m_version++; | |
location = index; | |
return true; | |
} | |
/// <summary> | |
/// Determines counts that can be used to determine equality, subset, and superset. This | |
/// is only used when other is an IEnumerable and not a HashSet. If other is a HashSet | |
/// these properties can be checked faster without use of marking because we can assume | |
/// other has no duplicates. | |
/// | |
/// The following count checks are performed by callers: | |
/// 1. Equals: checks if unfoundCount = 0 and uniqueFoundCount = m_count; i.e. everything | |
/// in other is in this and everything in this is in other | |
/// 2. Subset: checks if unfoundCount >= 0 and uniqueFoundCount = m_count; i.e. other may | |
/// have elements not in this and everything in this is in other | |
/// 3. Proper subset: checks if unfoundCount > 0 and uniqueFoundCount = m_count; i.e | |
/// other must have at least one element not in this and everything in this is in other | |
/// 4. Proper superset: checks if unfound count = 0 and uniqueFoundCount strictly less | |
/// than m_count; i.e. everything in other was in this and this had at least one element | |
/// not contained in other. | |
/// | |
/// An earlier implementation used delegates to perform these checks rather than returning | |
/// an ElementCount struct; however this was changed due to the perf overhead of delegates. | |
/// </summary> | |
/// <param name="other"></param> | |
/// <param name="returnIfUnfound">Allows us to finish faster for equals and proper superset | |
/// because unfoundCount must be 0.</param> | |
/// <returns></returns> | |
[System.Security.SecuritySafeCritical] | |
private unsafe ElementCount CheckUniqueAndUnfoundElements(IEnumerable<T> other, bool returnIfUnfound) | |
{ | |
ElementCount result; | |
// need special case in case this has no elements. | |
if (m_count == 0) | |
{ | |
int numElementsInOther = 0; | |
foreach (T item in other) | |
{ | |
numElementsInOther++; | |
// break right away, all we want to know is whether other has 0 or 1 elements | |
break; | |
} | |
result.uniqueCount = 0; | |
result.unfoundCount = numElementsInOther; | |
return result; | |
} | |
Debug.Assert((m_buckets != null) && (m_count > 0), "m_buckets was null but count greater than 0"); | |
int originalLastIndex = m_lastIndex; | |
int intArrayLength = BitHelper.ToIntArrayLength(originalLastIndex); | |
BitHelper bitHelper; | |
if (intArrayLength <= StackAllocThreshold) | |
{ | |
int* bitArrayPtr = stackalloc int[intArrayLength]; | |
bitHelper = new BitHelper(bitArrayPtr, intArrayLength); | |
} | |
else | |
{ | |
int[] bitArray = new int[intArrayLength]; | |
bitHelper = new BitHelper(bitArray, intArrayLength); | |
} | |
// count of items in other not found in this | |
int unfoundCount = 0; | |
// count of unique items in other found in this | |
int uniqueFoundCount = 0; | |
foreach (T item in other) | |
{ | |
int index = InternalIndexOf(item); | |
if (index >= 0) | |
{ | |
if (!bitHelper.IsMarked(index)) | |
{ | |
// item hasn't been seen yet | |
bitHelper.MarkBit(index); | |
uniqueFoundCount++; | |
} | |
} | |
else | |
{ | |
unfoundCount++; | |
if (returnIfUnfound) | |
{ | |
break; | |
} | |
} | |
} | |
result.uniqueCount = uniqueFoundCount; | |
result.unfoundCount = unfoundCount; | |
return result; | |
} | |
/// <summary> | |
/// Copies this to an array. Used for DebugView | |
/// </summary> | |
/// <returns></returns> | |
internal T[] ToArray() | |
{ | |
T[] newArray = new T[Count]; | |
CopyTo(newArray); | |
return newArray; | |
} | |
/// <summary> | |
/// Internal method used for HashSetEqualityComparer. Compares set1 and set2 according | |
/// to specified comparer. | |
/// | |
/// Because items are hashed according to a specific equality comparer, we have to resort | |
/// to n^2 search if they're using different equality comparers. | |
/// </summary> | |
/// <param name="set1"></param> | |
/// <param name="set2"></param> | |
/// <param name="comparer"></param> | |
/// <returns></returns> | |
internal static bool HashSetEquals(BetterHashSet<T> set1, BetterHashSet<T> set2, IEqualityComparer<T> comparer) | |
{ | |
// handle null cases first | |
if (set1 == null) | |
{ | |
return (set2 == null); | |
} | |
else if (set2 == null) | |
{ | |
// set1 != null | |
return false; | |
} | |
// all comparers are the same; this is faster | |
if (AreEqualityComparersEqual(set1, set2)) | |
{ | |
if (set1.Count != set2.Count) | |
{ | |
return false; | |
} | |
// suffices to check subset | |
foreach (T item in set2) | |
{ | |
if (!set1.Contains(item)) | |
{ | |
return false; | |
} | |
} | |
return true; | |
} | |
else | |
{ // n^2 search because items are hashed according to their respective ECs | |
foreach (T set2Item in set2) | |
{ | |
bool found = false; | |
foreach (T set1Item in set1) | |
{ | |
if (comparer.Equals(set2Item, set1Item)) | |
{ | |
found = true; | |
break; | |
} | |
} | |
if (!found) | |
{ | |
return false; | |
} | |
} | |
return true; | |
} | |
} | |
/// <summary> | |
/// Checks if equality comparers are equal. This is used for algorithms that can | |
/// speed up if it knows the other item has unique elements. I.e. if they're using | |
/// different equality comparers, then uniqueness assumption between sets break. | |
/// </summary> | |
/// <param name="set1"></param> | |
/// <param name="set2"></param> | |
/// <returns></returns> | |
private static bool AreEqualityComparersEqual(BetterHashSet<T> set1, BetterHashSet<T> set2) | |
{ | |
return set1.Comparer.Equals(set2.Comparer); | |
} | |
/// <summary> | |
/// Workaround Comparers that throw ArgumentNullException for GetHashCode(null). | |
/// </summary> | |
/// <param name="item"></param> | |
/// <returns>hash code</returns> | |
private int InternalGetHashCode(T item) | |
{ | |
if (item == null) | |
{ | |
return 0; | |
} | |
return m_comparer.GetHashCode(item) & Lower31BitMask; | |
} | |
#endregion | |
// used for set checking operations (using enumerables) that rely on counting | |
internal struct ElementCount | |
{ | |
internal int uniqueCount; | |
internal int unfoundCount; | |
} | |
internal struct Slot | |
{ | |
internal int hashCode; // Lower 31 bits of hash code, -1 if unused | |
internal int next; // Index of next entry, -1 if last | |
internal T value; | |
} | |
public struct Enumerator : IEnumerator<T>, System.Collections.IEnumerator | |
{ | |
private BetterHashSet<T> set; | |
private int index; | |
private int version; | |
private T current; | |
internal Enumerator(BetterHashSet<T> set) | |
{ | |
this.set = set; | |
index = 0; | |
version = set.m_version; | |
current = default(T); | |
} | |
public void Dispose() | |
{ | |
} | |
public bool MoveNext() | |
{ | |
if (version != set.m_version) | |
{ | |
throw new InvalidOperationException(SR.GetString(SR.InvalidOperation_EnumFailedVersion)); | |
} | |
while (index < set.m_lastIndex) | |
{ | |
if (set.m_slots[index].hashCode >= 0) | |
{ | |
current = set.m_slots[index].value; | |
index++; | |
return true; | |
} | |
index++; | |
} | |
index = set.m_lastIndex + 1; | |
current = default(T); | |
return false; | |
} | |
public T Current | |
{ | |
get | |
{ | |
return current; | |
} | |
} | |
Object System.Collections.IEnumerator.Current | |
{ | |
get | |
{ | |
if (index == 0 || index == set.m_lastIndex + 1) | |
{ | |
throw new InvalidOperationException(SR.GetString(SR.InvalidOperation_EnumOpCantHappen)); | |
} | |
return Current; | |
} | |
} | |
void System.Collections.IEnumerator.Reset() | |
{ | |
if (version != set.m_version) | |
{ | |
throw new InvalidOperationException(SR.GetString(SR.InvalidOperation_EnumFailedVersion)); | |
} | |
index = 0; | |
current = default(T); | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment