Last active
October 10, 2022 00:14
-
-
Save AdamWhiteHat/019c79ae0039708e11a3b4dd0f4d02c5 to your computer and use it in GitHub Desktop.
Enables adding hundreds of millions of string keys to a Dictionary without slow-down. Provides a GetHashCode implementation that provides the mathematically best, even distribution of hash values accross the entire 32 bit integer range of possible values. Allows a Dictionary, HashSet, SortedSet, or anything that needs a hash, to hash hundreds of…
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// <summary> | |
/// A IEqualityComparer<string> that provides the mathematically best GetHashCode | |
/// possible for adding millions of strings to Dictionaries, HashTables, HashMaps and more. | |
/// It works by interpreting an array of bytes (your string) as a number in base 256, | |
/// and then returning the unit of that number in the multiplicative group of integers modulo a prime p, | |
/// where p is the largest prime less than 2^32. | |
/// </summary> | |
public class BestStringEqualityComparer : IEqualityComparer<string> | |
{ | |
public int GetHashCode(string obj) | |
{ | |
BigInteger value = CalculateNumericValue(obj); | |
return CalculateMultiplicativeGroup(value); | |
} | |
public bool Equals(string x, string y) | |
{ | |
return string.Equals(x, y, StringComparison.InvariantCulture); | |
} | |
private static readonly byte _minOctet = 32; | |
private static readonly BigInteger _generator = 2; | |
private static readonly BigInteger _prime = new BigInteger(4294967291); | |
private static readonly BigInteger _byteRadix = new BigInteger(256); | |
private static int CalculateMultiplicativeGroup(BigInteger input) | |
{ | |
uint result = (uint)BigInteger.ModPow(_generator, input % (_prime - 1), _prime); | |
return (int)result; | |
} | |
private static BigInteger CalculateNumericValue(string input) | |
{ | |
if (string.IsNullOrWhiteSpace(input)) | |
{ | |
throw new ArgumentNullException(nameof(input)); | |
} | |
byte[] bytes = Encoding.ASCII.GetBytes(input); | |
Array.Reverse(bytes); | |
int index = 0; | |
BigInteger result = new BigInteger(0); | |
foreach (byte octet in bytes) | |
{ | |
result += (BigInteger.Pow(_byteRadix, index) * octet); | |
index++; | |
} | |
return result; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment