Created
May 26, 2017 21:46
-
-
Save tjennings/5a5cfee29571d1a362b8da985751b81c to your computer and use it in GitHub Desktop.
Treatment bucketing using consistent hashing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.jes.experiment; | |
import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation; | |
import org.apache.commons.math3.stat.descriptive.moment.Variance; | |
import java.nio.ByteBuffer; | |
import java.security.MessageDigest; | |
import java.security.NoSuchAlgorithmException; | |
import java.util.ArrayList; | |
import java.util.HashMap; | |
import java.util.Map; | |
import java.util.Random; | |
import java.util.stream.IntStream; | |
/** | |
* @author Tyler J on 5/26/17. | |
*/ | |
public class BucketUtil { | |
static Object bucket(Object key, String salt, Object ... buckets) throws NoSuchAlgorithmException { | |
MessageDigest digest = MessageDigest.getInstance("MD5"); | |
byte[] hash = digest.digest((key.toString() + salt).getBytes()); | |
return buckets[Math.abs(hash.hashCode()) % buckets.length]; | |
} | |
public static void main(String[] argv) throws Exception { | |
Map<Object, Integer> counts = new HashMap<>(); | |
String salt = "12345lslkjdsa;jsd;ja"; | |
int buckets = 3; | |
int samples = 5000; | |
//Same key and salt is always the same bucket | |
ArrayList<String> treatments = new ArrayList<>(); | |
for(int i = 0; i < buckets; i++) { | |
treatments.add("t" + i); | |
} | |
for(int i = 0; i < samples; i++) { | |
Object bucket = bucket(i, salt, treatments.toArray()); | |
Integer count = counts.get(bucket); | |
if(count != null) { | |
counts.put(bucket, count + 1); | |
} else { | |
counts.put(bucket, 1 ); | |
} | |
} | |
StandardDeviation std = new StandardDeviation(); | |
Variance variance = new Variance(); | |
double[] vals = counts.values().stream().mapToDouble(o -> o.doubleValue()).toArray(); | |
double stdDev = std.evaluate(vals); | |
double var = variance.evaluate(vals); | |
double diffFromExpected = (stdDev / (samples / buckets)) * 100; | |
for(Map.Entry<Object, Integer> entry : counts.entrySet()) { | |
System.out.println(entry.getKey() + " " + entry.getValue().toString()); | |
} | |
System.out.println("StdDev " + stdDev); | |
System.out.println("% diff from expected " + diffFromExpected); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment