Skip to content

Instantly share code, notes, and snippets.

@jnm2
Last active August 29, 2015 14:24
Show Gist options
  • Save jnm2/4a240dbf2f8e385b0059 to your computer and use it in GitHub Desktop.
Save jnm2/4a240dbf2f8e385b0059 to your computer and use it in GitHub Desktop.
Calculates the String.GetHashCode compatible hash code for a substring without allocating the substring
/// <summary>
/// Calculates the hash code for a substring without allocating the substring.
/// </summary>
// Identical to String.GetHashCode except for custom start and stop
// I have unit tests on this
[SecuritySafeCritical]
[ReliabilityContract(Consistency.WillNotCorruptState, Cer.MayFail)]
public static int GetHashCode(this string str, int startIndex, int length)
{
if (str == null) throw new ArgumentNullException("str");
if (startIndex < 0 || startIndex > str.Length - length) throw new ArgumentOutOfRangeException("startIndex", startIndex, "Start index must be greater than or equal to zero and less than or equal to the length of the string minus the substring length.");
if (length < 0) throw new ArgumentOutOfRangeException("length", length, "Length must be greater than or equal to zero.");
unsafe
{
fixed (char* src = str)
{
Contract.Assert(src[str.Length] == '\0', "src[this.Length] == '\\0'");
Contract.Assert(((int)src) % 4 == 0, "Managed string should start at 4 bytes boundary");
// Must use Environment.Is64BitProcess instead of conditional compilation for AnyCPU builds
int hash1 = Environment.Is64BitProcess ? 5381 : (5381 << 16) + 5381;
int hash2 = hash1;
if (!Environment.Is64BitProcess)
{
// 32 bit machines.
int* pint = (int*)(src + startIndex); // ORIGINAL: int* pint = (int *)src;
int len = length; // ORIGINAL: int len = src.Length;
while (len > 3) // ORIGINAL: while (len > 2) // this implementation hashes the zero terminator if the length mod 4 is 3. Unfortunately, the substring is probably not zero-terminated.
{
hash1 = ((hash1 << 5) + hash1 + (hash1 >> 27)) ^ pint[0];
hash2 = ((hash2 << 5) + hash2 + (hash2 >> 27)) ^ pint[1];
pint += 2;
len -= 4;
}
// BEGIN NEW
switch (len)
{
case 3:
// Avoid hashing the next character, since it's not guaranteed to be a zero terminator anymore
hash1 = ((hash1 << 5) + hash1 + (hash1 >> 27)) ^ pint[0];
hash2 = ((hash2 << 5) + hash2 + (hash2 >> 27)) ^ (pint[1] & 0xFF);
break;
case 2:
hash1 = ((hash1 << 5) + hash1 + (hash1 >> 27)) ^ pint[0]; // ORIGINAL when (len > 0)
break;
case 1:
// Avoid hashing the next character, since it's not guaranteed to be a zero terminator anymore
hash1 = ((hash1 << 5) + hash1 + (hash1 >> 27)) ^ (pint[0] & 0xFF);
break;
}
//END NEW
}
else
{
// INLINED: int c;
char* s = (src + startIndex); // ORIGINAL: char *s = src;
int len = length; // NEW
while (len != 0) // ORIGINAL: while ((c = s[0]) != 0) {
{
// INLINED: c = s[0];
hash1 = ((hash1 << 5) + hash1) ^ s[0]; // ORIGINAL: hash1 = ((hash1 << 5) + hash1) ^ c;
// INLINED: c = s[1];
if (len == 1) // ORIGINAL: if (c == 0)
break;
len -= 2; // NEW
hash2 = ((hash2 << 5) + hash2) ^ s[1];
s += 2;
}
}
return hash1 + (hash2 * 1566083941);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment