Skip to content

Instantly share code, notes, and snippets.

@davidclarance
Last active July 29, 2024 11:00
Show Gist options
  • Save davidclarance/daeafad5fc3e28e019950a12b0da01f5 to your computer and use it in GitHub Desktop.
Save davidclarance/daeafad5fc3e28e019950a12b0da01f5 to your computer and use it in GitHub Desktop.
import hashlib
probability_assignments = {"Control": 50, "Variant 1": 30, "Variant 2": 20}
random_customer_ids = ["Customer1", "Customer2", "Customer3", "Customer4"]
def get_hash(customer_id):
hash_object = hashlib.md5(customer_id.encode())
return hash_object.hexdigest()[:10]
def get_integer_representation_of_hash(customer_id):
hash_value = get_hash(customer_id)
return int(hash_value, 16)
def get_assigned_bucket(customer_id, total_buckets):
hash_value = get_integer_representation_of_hash(customer_id)
return hash_value % total_buckets
def divide_space_into_partitions(prob_distribution):
partition_ranges = []
start = 0
for partition in prob_distribution:
partition_ranges.append((start, start + partition))
start += partition
return partition_ranges
def assign_groups(customer_id, partitions):
hash_value = get_assigned_bucket(customer_id, 100)
for idx, (start, end) in enumerate(partitions):
if start <= hash_value < end:
return idx
return None
partitions = divide_space_into_partitions(
prob_distribution=probability_assignments.values()
)
groups = {
customer: [
get_hash(customer),
get_integer_representation_of_hash(customer),
get_assigned_bucket(customer, 100),
assign_groups(customer, partitions),
]
for customer in random_customer_ids
}
groups
@davidclarance
Copy link
Author

You can run a simulation to test if the distribution is correct:

import uuid
from collections import Counter

n_customers = 1000000

random_customer_ids = [uuid.uuid4().hex for _ in range(n_customers)]

partitions = divide_space_into_partitions(
    prob_distribution=probability_assignments.values()
)

groups = {
    customer: list(probability_assignments.keys())[assign_groups(customer, partitions)]
    for customer in random_customer_ids
}

{group: count / n_customers for group, count in Counter(groups.values()).items()}

This gives the following output:

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment