Last active
July 29, 2024 11:00
-
-
Save davidclarance/daeafad5fc3e28e019950a12b0da01f5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import hashlib | |
probability_assignments = {"Control": 50, "Variant 1": 30, "Variant 2": 20} | |
random_customer_ids = ["Customer1", "Customer2", "Customer3", "Customer4"] | |
def get_hash(customer_id): | |
hash_object = hashlib.md5(customer_id.encode()) | |
return hash_object.hexdigest()[:10] | |
def get_integer_representation_of_hash(customer_id): | |
hash_value = get_hash(customer_id) | |
return int(hash_value, 16) | |
def get_assigned_bucket(customer_id, total_buckets): | |
hash_value = get_integer_representation_of_hash(customer_id) | |
return hash_value % total_buckets | |
def divide_space_into_partitions(prob_distribution): | |
partition_ranges = [] | |
start = 0 | |
for partition in prob_distribution: | |
partition_ranges.append((start, start + partition)) | |
start += partition | |
return partition_ranges | |
def assign_groups(customer_id, partitions): | |
hash_value = get_assigned_bucket(customer_id, 100) | |
for idx, (start, end) in enumerate(partitions): | |
if start <= hash_value < end: | |
return idx | |
return None | |
partitions = divide_space_into_partitions( | |
prob_distribution=probability_assignments.values() | |
) | |
groups = { | |
customer: [ | |
get_hash(customer), | |
get_integer_representation_of_hash(customer), | |
get_assigned_bucket(customer, 100), | |
assign_groups(customer, partitions), | |
] | |
for customer in random_customer_ids | |
} | |
groups |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
You can run a simulation to test if the distribution is correct:
This gives the following output: