Created
November 17, 2023 06:34
-
-
Save kino6052/a0fc4716212249976c33ccf6d88389f2 to your computer and use it in GitHub Desktop.
Hash Collision Calculator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// NOTE: This utility is for illustration purposes | |
// Allows to calculate minimal hash length based on the number of files to consider | |
// MD5 uses 16 symbols | |
enum EConstant { | |
NumberOfSymbols = 16, | |
HashStringLength = 32, // MD5 hash length | |
DefaultProbabilityThreshold = 0.01, | |
BinarySearchFactor = 1.5, | |
} | |
/* | |
* Calculates the probability of at least one collision using the birthday problem approach. | |
* | |
* https://kevingal.com/blog/collisions.html#:~:text=,the%20math%20behind%20hash%20collisions | |
* | |
* @param numFiles - The number of files to consider for hash collisions. | |
* @param hashLength - The length of the hash used. | |
* @returns The probability of at least one collision occurring. | |
*/ | |
export function calculateCollisionProbability({ | |
numberOfFiles, | |
hashLength, | |
}: { | |
numberOfFiles: number, | |
hashLength: number, | |
}): number { | |
const totalCombinations: number = EConstant.NumberOfSymbols ** hashLength; | |
let probNoCollision = 1; | |
for (let i = 0; i < numberOfFiles; i += 1) { | |
probNoCollision *= (totalCombinations - i) / totalCombinations; | |
} | |
const probAtLeastOneCollision: number = 1 - probNoCollision; | |
return probAtLeastOneCollision; | |
} | |
/* | |
* Uses binary search to find the minimal hash length for which the collision probability | |
* is below a specified threshold. | |
* @param numFiles - The number of files to consider for hash collisions. | |
* @param currentHashLength - The current length of the hash being tested. | |
* @param threshold - The upper limit for an acceptable collision probability. | |
* @returns The minimal hash length that meets the criteria. | |
*/ | |
export function findMinimalHashLengthWithCollisionBellowThreshold( | |
{ | |
numberOfFiles, | |
currentHashLength, | |
threshold = Number(EConstant.DefaultProbabilityThreshold), | |
}: { | |
numberOfFiles: number, | |
currentHashLength: number, | |
threshold: number | |
}, | |
): number { | |
const probabilityOfCollision: number = calculateCollisionProbability({ | |
hashLength: currentHashLength, | |
numberOfFiles, | |
}); | |
if (currentHashLength > EConstant.HashStringLength) { | |
return EConstant.HashStringLength; | |
} | |
if (probabilityOfCollision > threshold) { | |
currentHashLength = Math.ceil(EConstant.BinarySearchFactor * currentHashLength); | |
} else { | |
return currentHashLength; | |
} | |
return findMinimalHashLengthWithCollisionBellowThreshold({ | |
numberOfFiles, | |
currentHashLength, | |
threshold, | |
}); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment