Created
July 6, 2013 02:34
-
-
Save edahlgren/5938401 to your computer and use it in GitHub Desktop.
Empirically test the non-uniformness of a hash ring for different server, replication, and bit-space values. Uses Jenkin's hash.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Data.Bits | |
import System.IO | |
import qualified Data.List as L | |
import qualified Data.Vector as V | |
import qualified Data.HashMap.Strict as HM | |
import Statistics.Test.ChiSquared | |
import Data.BloomFilter.Hash | |
best :: Int -> Int -> Int | |
best servers bits = 2^bits `div` servers | |
distribution :: Int -> Int -> Int -> IO () | |
distribution servers replicas bits = if servers*replicas > 2^bits then return () else distribution' servers replicas bits | |
distribution' :: Int -> Int -> Int -> IO () | |
distribution' servers replicas bits = do | |
hPutStrLn stdout $ "servers=" ++ (show servers) ++ " replicas=" ++ (show replicas) | |
let hashes = L.foldl' replicate HM.empty [1..servers] | |
let sorted = L.sortBy (\(h1,_) (h2,_) -> compare h1 h2) $ HM.toList hashes | |
let dist = reduce . group . cycle $ sorted | |
hPutStrLn stdout $ "percent non-uniform=" ++ (show $ nonUniform dist) | |
where | |
serverKey n = "server" ++ (show n) | |
replicaKey n replica = (serverKey n) ++ "-" ++ (show replica) | |
bitMask h = if bits > 64 then h else h `shiftR` (64-bits) | |
hash = fromIntegral . bitMask . hash64 | |
hashReplica server replica postFix hm = case HM.lookup h hm of | |
Just _ -> hashReplica server replica (postFix+1) hm | |
Nothing -> HM.insert h skey hm | |
where | |
skey = serverKey server | |
rkey = skey ++ (replicaKey server replica) | |
key = if postFix > 0 then rkey ++ (show postFix) else rkey | |
h :: Int | |
h = fromIntegral . hash $ key | |
replicate hm n = L.foldl' (\hm' replica -> hashReplica n replica 0 hm') hm [1..replicas] | |
cycle xs = cycle' xs [] | |
where | |
cycle' [] accum = accum | |
cycle' (x:[]) accum = (cyclicDiff x (head xs)):accum | |
cycle' ((h1,_):y@(h2,s):xs') accum = cycle' (y:xs') ((h2-h1,s):accum) | |
cyclicDiff (h1,_) (h2,s) = (2^bits - h1 + h2, s) | |
group = L.groupBy (\(_,s1) (_,s2) -> s1 == s2) . L.sortBy (\(_,s1) (_,s2) -> compare s1 s2) | |
reduce = map (L.foldl' (\b (h,_) -> b+h) 0) | |
nonUniform dist = 100*(fromIntegral wiggle)/(fromIntegral $ 2^bits) | |
where | |
wiggle = L.foldl' (\p d -> p + (abs $ d - uniform)) 0 dist | |
uniform = best servers bits | |
runTest bits = do | |
hPutStrLn stdout $ (show bits) ++ "-bit space" | |
varyReplicas 3 | |
varyReplicas 10 | |
varyReplicas 100 | |
varyReplicas 500 | |
varyReplicas 1000 | |
where | |
varyReplicas servers = do | |
hPutStrLn stdout "---------" | |
distribution servers 1 bits | |
distribution servers 10 bits | |
distribution servers 100 bits | |
distribution servers 500 bits | |
distribution servers 1000 bits | |
distribution servers 5000 bits | |
distribution servers 10000 bits | |
main = do | |
runTest 16 | |
runTest 32 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment