Skip to content

Instantly share code, notes, and snippets.

@edahlgren
Created July 6, 2013 02:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save edahlgren/5938401 to your computer and use it in GitHub Desktop.
Save edahlgren/5938401 to your computer and use it in GitHub Desktop.
Empirically test the non-uniformness of a hash ring for different server, replication, and bit-space values. Uses Jenkin's hash.
import Data.Bits
import System.IO
import qualified Data.List as L
import qualified Data.Vector as V
import qualified Data.HashMap.Strict as HM
import Statistics.Test.ChiSquared
import Data.BloomFilter.Hash
best :: Int -> Int -> Int
best servers bits = 2^bits `div` servers
distribution :: Int -> Int -> Int -> IO ()
distribution servers replicas bits = if servers*replicas > 2^bits then return () else distribution' servers replicas bits
distribution' :: Int -> Int -> Int -> IO ()
distribution' servers replicas bits = do
hPutStrLn stdout $ "servers=" ++ (show servers) ++ " replicas=" ++ (show replicas)
let hashes = L.foldl' replicate HM.empty [1..servers]
let sorted = L.sortBy (\(h1,_) (h2,_) -> compare h1 h2) $ HM.toList hashes
let dist = reduce . group . cycle $ sorted
hPutStrLn stdout $ "percent non-uniform=" ++ (show $ nonUniform dist)
where
serverKey n = "server" ++ (show n)
replicaKey n replica = (serverKey n) ++ "-" ++ (show replica)
bitMask h = if bits > 64 then h else h `shiftR` (64-bits)
hash = fromIntegral . bitMask . hash64
hashReplica server replica postFix hm = case HM.lookup h hm of
Just _ -> hashReplica server replica (postFix+1) hm
Nothing -> HM.insert h skey hm
where
skey = serverKey server
rkey = skey ++ (replicaKey server replica)
key = if postFix > 0 then rkey ++ (show postFix) else rkey
h :: Int
h = fromIntegral . hash $ key
replicate hm n = L.foldl' (\hm' replica -> hashReplica n replica 0 hm') hm [1..replicas]
cycle xs = cycle' xs []
where
cycle' [] accum = accum
cycle' (x:[]) accum = (cyclicDiff x (head xs)):accum
cycle' ((h1,_):y@(h2,s):xs') accum = cycle' (y:xs') ((h2-h1,s):accum)
cyclicDiff (h1,_) (h2,s) = (2^bits - h1 + h2, s)
group = L.groupBy (\(_,s1) (_,s2) -> s1 == s2) . L.sortBy (\(_,s1) (_,s2) -> compare s1 s2)
reduce = map (L.foldl' (\b (h,_) -> b+h) 0)
nonUniform dist = 100*(fromIntegral wiggle)/(fromIntegral $ 2^bits)
where
wiggle = L.foldl' (\p d -> p + (abs $ d - uniform)) 0 dist
uniform = best servers bits
runTest bits = do
hPutStrLn stdout $ (show bits) ++ "-bit space"
varyReplicas 3
varyReplicas 10
varyReplicas 100
varyReplicas 500
varyReplicas 1000
where
varyReplicas servers = do
hPutStrLn stdout "---------"
distribution servers 1 bits
distribution servers 10 bits
distribution servers 100 bits
distribution servers 500 bits
distribution servers 1000 bits
distribution servers 5000 bits
distribution servers 10000 bits
main = do
runTest 16
runTest 32
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment