Skip to content

Instantly share code, notes, and snippets.

@AdamWhiteHat
Last active October 10, 2022 00:14
Show Gist options
  • Save AdamWhiteHat/019c79ae0039708e11a3b4dd0f4d02c5 to your computer and use it in GitHub Desktop.
Save AdamWhiteHat/019c79ae0039708e11a3b4dd0f4d02c5 to your computer and use it in GitHub Desktop.
Enables adding hundreds of millions of string keys to a Dictionary without slow-down. Provides a GetHashCode implementation that provides the mathematically best, even distribution of hash values accross the entire 32 bit integer range of possible values. Allows a Dictionary, HashSet, SortedSet, or anything that needs a hash, to hash hundreds of…
/// <summary>
/// A IEqualityComparer<string> that provides the mathematically best GetHashCode
/// possible for adding millions of strings to Dictionaries, HashTables, HashMaps and more.
/// It works by interpreting an array of bytes (your string) as a number in base 256,
/// and then returning the unit of that number in the multiplicative group of integers modulo a prime p,
/// where p is the largest prime less than 2^32.
/// </summary>
public class BestStringEqualityComparer : IEqualityComparer<string>
{
public int GetHashCode(string obj)
{
BigInteger value = CalculateNumericValue(obj);
return CalculateMultiplicativeGroup(value);
}
public bool Equals(string x, string y)
{
return string.Equals(x, y, StringComparison.InvariantCulture);
}
private static readonly byte _minOctet = 32;
private static readonly BigInteger _generator = 2;
private static readonly BigInteger _prime = new BigInteger(4294967291);
private static readonly BigInteger _byteRadix = new BigInteger(256);
private static int CalculateMultiplicativeGroup(BigInteger input)
{
uint result = (uint)BigInteger.ModPow(_generator, input % (_prime - 1), _prime);
return (int)result;
}
private static BigInteger CalculateNumericValue(string input)
{
if (string.IsNullOrWhiteSpace(input))
{
throw new ArgumentNullException(nameof(input));
}
byte[] bytes = Encoding.ASCII.GetBytes(input);
Array.Reverse(bytes);
int index = 0;
BigInteger result = new BigInteger(0);
foreach (byte octet in bytes)
{
result += (BigInteger.Pow(_byteRadix, index) * octet);
index++;
}
return result;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment