Skip to content

Instantly share code, notes, and snippets.

@kvoli
Last active April 1, 2022 18:23
Show Gist options
  • Save kvoli/be27efd4662e89e8918430a9c7117858 to your computer and use it in GitHub Desktop.
Save kvoli/be27efd4662e89e8918430a9c7117858 to your computer and use it in GitHub Desktop.
import random
def check(candidates, watermark):
mean = sum(candidates) / len(candidates)
targets, excluded = [], []
for c in candidates:
if c > 20 and c > mean * watermark:
excluded.append(c)
continue
targets.append(c)
return targets, excluded
def percentile_check(candidates, percentile):
percentiles = [(i+1)/len(candidates) for i in range(len(candidates))]
targets, excluded = [], []
for i, c in enumerate(candidates):
if c > 20 and percentiles[i] > percentile:
excluded.append(c)
continue
targets.append(c)
return targets, excluded
def test_distribution(store_list):
mean_targets, mean_excluded = check(store_list, 1.1)
percentile_50_targets, percentile_50_excluded = percentile_check(store_list, 0.5)
percentile_75_targets, percentile_75_excluded = percentile_check(store_list, 0.75)
percentile_90_targets, percentile_90_excluded = percentile_check(store_list, 0.90)
percentile_95_targets, percentile_95_excluded = percentile_check(store_list, 0.95)
print(f"mean: +{mean_targets} -{mean_excluded}")
print(f"percentile_50: +{percentile_50_targets} -{percentile_50_excluded}")
print(f"percentile_75: +{percentile_75_targets} -{percentile_75_excluded}")
print(f"percentile_90: +{percentile_90_targets} -{percentile_90_excluded}")
print(f"percentile_95: +{percentile_95_targets} -{percentile_95_excluded}")
def run(cluster_size):
uniform_stores = sorted([int(random.uniform(15, 45)) for _ in range(cluster_size)])
gauss_stores = sorted([int(random.gauss(30, 10)) for _ in range(cluster_size)])
exponential_stores = sorted([10 + int(random.expovariate(1 / 15)) for _ in range(cluster_size)])
pareto_stores = sorted([int(random.paretovariate(1.25) * 15) for _ in range(cluster_size)])
print(f"cluster_size = {cluster_size} nodes, targets = +[], removed = -[]")
print("\n=========[ UNIFORM(15,45) ]=========")
test_distribution(uniform_stores)
print("\n=========[ GAUSSIAN(30,10) ]=========")
test_distribution(gauss_stores)
print("\n=========[ EXPONENTIAL(15)+10 ]=========")
test_distribution(exponential_stores)
print("\n=========[ PARETO(1.25, 15) ]=========")
test_distribution(pareto_stores)
print("\n")
random.seed(0x666)
run(3)
run(5)
run(9)
run(15)
run(27)
run(41)
"""
❯ python avg_threshold.py
cluster_size = 3 nodes, targets = +[], removed = -[]
=========[ UNIFORM(15,45) ]=========
mean: +[34, 37, 39] -[]
percentile_50: +[34] -[37, 39]
percentile_75: +[34, 37] -[39]
percentile_90: +[34, 37] -[39]
percentile_95: +[34, 37] -[39]
=========[ GAUSSIAN(30,10) ]=========
mean: +[16, 25] -[42]
percentile_50: +[16] -[25, 42]
percentile_75: +[16, 25] -[42]
percentile_90: +[16, 25] -[42]
percentile_95: +[16, 25] -[42]
=========[ EXPONENTIAL(15)+10 ]=========
mean: +[10] -[31, 33]
percentile_50: +[10] -[31, 33]
percentile_75: +[10, 31] -[33]
percentile_90: +[10, 31] -[33]
percentile_95: +[10, 31] -[33]
=========[ PARETO(1.25, 15) ]=========
mean: +[15, 18] -[23]
percentile_50: +[15, 18] -[23]
percentile_75: +[15, 18] -[23]
percentile_90: +[15, 18] -[23]
percentile_95: +[15, 18] -[23]
cluster_size = 5 nodes, targets = +[], removed = -[]
=========[ UNIFORM(15,45) ]=========
mean: +[19, 20, 21, 25] -[34]
percentile_50: +[19, 20] -[21, 25, 34]
percentile_75: +[19, 20, 21] -[25, 34]
percentile_90: +[19, 20, 21, 25] -[34]
percentile_95: +[19, 20, 21, 25] -[34]
=========[ GAUSSIAN(30,10) ]=========
mean: +[15, 31] -[34, 34, 35]
percentile_50: +[15, 31] -[34, 34, 35]
percentile_75: +[15, 31, 34] -[34, 35]
percentile_90: +[15, 31, 34, 34] -[35]
percentile_95: +[15, 31, 34, 34] -[35]
=========[ EXPONENTIAL(15)+10 ]=========
mean: +[10, 12, 13, 17] -[23]
percentile_50: +[10, 12, 13, 17] -[23]
percentile_75: +[10, 12, 13, 17] -[23]
percentile_90: +[10, 12, 13, 17] -[23]
percentile_95: +[10, 12, 13, 17] -[23]
=========[ PARETO(1.25, 15) ]=========
mean: +[18, 22, 31, 35] -[76]
percentile_50: +[18, 22] -[31, 35, 76]
percentile_75: +[18, 22, 31] -[35, 76]
percentile_90: +[18, 22, 31, 35] -[76]
percentile_95: +[18, 22, 31, 35] -[76]
cluster_size = 9 nodes, targets = +[], removed = -[]
=========[ UNIFORM(15,45) ]=========
mean: +[16, 16, 16, 28, 28] -[33, 34, 35, 42]
percentile_50: +[16, 16, 16, 28] -[28, 33, 34, 35, 42]
percentile_75: +[16, 16, 16, 28, 28, 33] -[34, 35, 42]
percentile_90: +[16, 16, 16, 28, 28, 33, 34, 35] -[42]
percentile_95: +[16, 16, 16, 28, 28, 33, 34, 35] -[42]
=========[ GAUSSIAN(30,10) ]=========
mean: +[17, 25, 30, 31, 33, 35] -[37, 41, 42]
percentile_50: +[17, 25, 30, 31] -[33, 35, 37, 41, 42]
percentile_75: +[17, 25, 30, 31, 33, 35] -[37, 41, 42]
percentile_90: +[17, 25, 30, 31, 33, 35, 37, 41] -[42]
percentile_95: +[17, 25, 30, 31, 33, 35, 37, 41] -[42]
=========[ EXPONENTIAL(15)+10 ]=========
mean: +[12, 13, 16, 18, 19, 20, 21] -[24, 49]
percentile_50: +[12, 13, 16, 18, 19, 20] -[21, 24, 49]
percentile_75: +[12, 13, 16, 18, 19, 20] -[21, 24, 49]
percentile_90: +[12, 13, 16, 18, 19, 20, 21, 24] -[49]
percentile_95: +[12, 13, 16, 18, 19, 20, 21, 24] -[49]
=========[ PARETO(1.25, 15) ]=========
mean: +[15, 15, 16, 16, 16, 18, 20] -[35, 36]
percentile_50: +[15, 15, 16, 16, 16, 18, 20] -[35, 36]
percentile_75: +[15, 15, 16, 16, 16, 18, 20] -[35, 36]
percentile_90: +[15, 15, 16, 16, 16, 18, 20, 35] -[36]
percentile_95: +[15, 15, 16, 16, 16, 18, 20, 35] -[36]
cluster_size = 15 nodes, targets = +[], removed = -[]
=========[ UNIFORM(15,45) ]=========
mean: +[17, 18, 18, 27, 28, 29, 29, 31, 32, 33] -[36, 38, 38, 40, 41]
percentile_50: +[17, 18, 18, 27, 28, 29, 29] -[31, 32, 33, 36, 38, 38, 40, 41]
percentile_75: +[17, 18, 18, 27, 28, 29, 29, 31, 32, 33, 36] -[38, 38, 40, 41]
percentile_90: +[17, 18, 18, 27, 28, 29, 29, 31, 32, 33, 36, 38, 38] -[40, 41]
percentile_95: +[17, 18, 18, 27, 28, 29, 29, 31, 32, 33, 36, 38, 38, 40] -[41]
=========[ GAUSSIAN(30,10) ]=========
mean: +[4, 14, 21, 22, 23, 25, 27, 28, 30, 31] -[32, 33, 35, 49, 51]
percentile_50: +[4, 14, 21, 22, 23, 25, 27] -[28, 30, 31, 32, 33, 35, 49, 51]
percentile_75: +[4, 14, 21, 22, 23, 25, 27, 28, 30, 31, 32] -[33, 35, 49, 51]
percentile_90: +[4, 14, 21, 22, 23, 25, 27, 28, 30, 31, 32, 33, 35] -[49, 51]
percentile_95: +[4, 14, 21, 22, 23, 25, 27, 28, 30, 31, 32, 33, 35, 49] -[51]
=========[ EXPONENTIAL(15)+10 ]=========
mean: +[10, 10, 11, 13, 14, 18, 19, 23, 24, 25] -[33, 35, 38, 50, 81]
percentile_50: +[10, 10, 11, 13, 14, 18, 19] -[23, 24, 25, 33, 35, 38, 50, 81]
percentile_75: +[10, 10, 11, 13, 14, 18, 19, 23, 24, 25, 33] -[35, 38, 50, 81]
percentile_90: +[10, 10, 11, 13, 14, 18, 19, 23, 24, 25, 33, 35, 38] -[50, 81]
percentile_95: +[10, 10, 11, 13, 14, 18, 19, 23, 24, 25, 33, 35, 38, 50] -[81]
=========[ PARETO(1.25, 15) ]=========
mean: +[15, 16, 16, 18, 23, 24, 29, 35, 36, 38, 44, 47, 69] -[399, 461]
percentile_50: +[15, 16, 16, 18, 23, 24, 29] -[35, 36, 38, 44, 47, 69, 399, 461]
percentile_75: +[15, 16, 16, 18, 23, 24, 29, 35, 36, 38, 44] -[47, 69, 399, 461]
percentile_90: +[15, 16, 16, 18, 23, 24, 29, 35, 36, 38, 44, 47, 69] -[399, 461]
percentile_95: +[15, 16, 16, 18, 23, 24, 29, 35, 36, 38, 44, 47, 69, 399] -[461]
cluster_size = 27 nodes, targets = +[], removed = -[]
=========[ UNIFORM(15,45) ]=========
mean: +[15, 16, 17, 17, 20, 22, 25, 25, 27, 27, 29, 30, 31, 33, 33, 33] -[34, 34, 35, 37, 38, 39, 39, 39, 40, 41, 44]
percentile_50: +[15, 16, 17, 17, 20, 22, 25, 25, 27, 27, 29, 30, 31] -[33, 33, 33, 34, 34, 35, 37, 38, 39, 39, 39, 40, 41, 44]
percentile_75: +[15, 16, 17, 17, 20, 22, 25, 25, 27, 27, 29, 30, 31, 33, 33, 33, 34, 34, 35, 37] -[38, 39, 39, 39, 40, 41, 44]
percentile_90: +[15, 16, 17, 17, 20, 22, 25, 25, 27, 27, 29, 30, 31, 33, 33, 33, 34, 34, 35, 37, 38, 39, 39, 39] -[40, 41, 44]
percentile_95: +[15, 16, 17, 17, 20, 22, 25, 25, 27, 27, 29, 30, 31, 33, 33, 33, 34, 34, 35, 37, 38, 39, 39, 39, 40] -[41, 44]
=========[ GAUSSIAN(30,10) ]=========
mean: +[-1, 7, 9, 11, 13, 15, 16, 18, 19, 19, 19, 19, 21, 22, 24, 24] -[30, 30, 30, 33, 33, 35, 36, 41, 44, 50, 50]
percentile_50: +[-1, 7, 9, 11, 13, 15, 16, 18, 19, 19, 19, 19, 21] -[22, 24, 24, 30, 30, 30, 33, 33, 35, 36, 41, 44, 50, 50]
percentile_75: +[-1, 7, 9, 11, 13, 15, 16, 18, 19, 19, 19, 19, 21, 22, 24, 24, 30, 30, 30, 33] -[33, 35, 36, 41, 44, 50, 50]
percentile_90: +[-1, 7, 9, 11, 13, 15, 16, 18, 19, 19, 19, 19, 21, 22, 24, 24, 30, 30, 30, 33, 33, 35, 36, 41] -[44, 50, 50]
percentile_95: +[-1, 7, 9, 11, 13, 15, 16, 18, 19, 19, 19, 19, 21, 22, 24, 24, 30, 30, 30, 33, 33, 35, 36, 41, 44] -[50, 50]
=========[ EXPONENTIAL(15)+10 ]=========
mean: +[10, 10, 10, 11, 11, 11, 11, 12, 14, 15, 15, 15, 16, 16, 16, 17, 17, 19, 19, 20, 20] -[22, 24, 32, 33, 43, 51]
percentile_50: +[10, 10, 10, 11, 11, 11, 11, 12, 14, 15, 15, 15, 16, 16, 16, 17, 17, 19, 19, 20, 20] -[22, 24, 32, 33, 43, 51]
percentile_75: +[10, 10, 10, 11, 11, 11, 11, 12, 14, 15, 15, 15, 16, 16, 16, 17, 17, 19, 19, 20, 20] -[22, 24, 32, 33, 43, 51]
percentile_90: +[10, 10, 10, 11, 11, 11, 11, 12, 14, 15, 15, 15, 16, 16, 16, 17, 17, 19, 19, 20, 20, 22, 24, 32] -[33, 43, 51]
percentile_95: +[10, 10, 10, 11, 11, 11, 11, 12, 14, 15, 15, 15, 16, 16, 16, 17, 17, 19, 19, 20, 20, 22, 24, 32, 33] -[43, 51]
=========[ PARETO(1.25, 15) ]=========
mean: +[15, 15, 16, 16, 17, 17, 18, 18, 20, 20, 21, 21, 22, 23, 24, 24, 26, 26, 26, 27, 27] -[36, 36, 38, 43, 47, 70]
percentile_50: +[15, 15, 16, 16, 17, 17, 18, 18, 20, 20, 21, 21, 22] -[23, 24, 24, 26, 26, 26, 27, 27, 36, 36, 38, 43, 47, 70]
percentile_75: +[15, 15, 16, 16, 17, 17, 18, 18, 20, 20, 21, 21, 22, 23, 24, 24, 26, 26, 26, 27] -[27, 36, 36, 38, 43, 47, 70]
percentile_90: +[15, 15, 16, 16, 17, 17, 18, 18, 20, 20, 21, 21, 22, 23, 24, 24, 26, 26, 26, 27, 27, 36, 36, 38] -[43, 47, 70]
percentile_95: +[15, 15, 16, 16, 17, 17, 18, 18, 20, 20, 21, 21, 22, 23, 24, 24, 26, 26, 26, 27, 27, 36, 36, 38, 43] -[47, 70]
cluster_size = 41 nodes, targets = +[], removed = -[]
=========[ UNIFORM(15,45) ]=========
mean: +[15, 15, 16, 17, 18, 21, 21, 22, 22, 23, 25, 25, 25, 26, 26, 26, 27, 27, 27, 28, 29, 30, 30, 31, 32, 32, 33] -[34, 35, 36, 37, 38, 40, 41, 41, 43, 43, 43, 44, 44, 44]
percentile_50: +[15, 15, 16, 17, 18, 21, 21, 22, 22, 23, 25, 25, 25, 26, 26, 26, 27, 27, 27, 28] -[29, 30, 30, 31, 32, 32, 33, 34, 35, 36, 37, 38, 40, 41, 41, 43, 43, 43, 44, 44, 44]
percentile_75: +[15, 15, 16, 17, 18, 21, 21, 22, 22, 23, 25, 25, 25, 26, 26, 26, 27, 27, 27, 28, 29, 30, 30, 31, 32, 32, 33, 34, 35, 36] -[37, 38, 40, 41, 41, 43, 43, 43, 44, 44, 44]
percentile_90: +[15, 15, 16, 17, 18, 21, 21, 22, 22, 23, 25, 25, 25, 26, 26, 26, 27, 27, 27, 28, 29, 30, 30, 31, 32, 32, 33, 34, 35, 36, 37, 38, 40, 41, 41, 43] -[43, 43, 44, 44, 44]
percentile_95: +[15, 15, 16, 17, 18, 21, 21, 22, 22, 23, 25, 25, 25, 26, 26, 26, 27, 27, 27, 28, 29, 30, 30, 31, 32, 32, 33, 34, 35, 36, 37, 38, 40, 41, 41, 43, 43, 43] -[44, 44, 44]
=========[ GAUSSIAN(30,10) ]=========
mean: +[6, 11, 19, 19, 20, 21, 21, 21, 23, 24, 24, 24, 25, 26, 26, 27, 28, 28, 29, 30, 31, 31, 31, 32, 32] -[33, 33, 33, 33, 33, 33, 36, 36, 36, 39, 39, 40, 40, 41, 41, 43]
percentile_50: +[6, 11, 19, 19, 20, 21, 21, 21, 23, 24, 24, 24, 25, 26, 26, 27, 28, 28, 29, 30] -[31, 31, 31, 32, 32, 33, 33, 33, 33, 33, 33, 36, 36, 36, 39, 39, 40, 40, 41, 41, 43]
percentile_75: +[6, 11, 19, 19, 20, 21, 21, 21, 23, 24, 24, 24, 25, 26, 26, 27, 28, 28, 29, 30, 31, 31, 31, 32, 32, 33, 33, 33, 33, 33] -[33, 36, 36, 36, 39, 39, 40, 40, 41, 41, 43]
percentile_90: +[6, 11, 19, 19, 20, 21, 21, 21, 23, 24, 24, 24, 25, 26, 26, 27, 28, 28, 29, 30, 31, 31, 31, 32, 32, 33, 33, 33, 33, 33, 33, 36, 36, 36, 39, 39] -[40, 40, 41, 41, 43]
percentile_95: +[6, 11, 19, 19, 20, 21, 21, 21, 23, 24, 24, 24, 25, 26, 26, 27, 28, 28, 29, 30, 31, 31, 31, 32, 32, 33, 33, 33, 33, 33, 33, 36, 36, 36, 39, 39, 40, 40] -[41, 41, 43]
=========[ EXPONENTIAL(15)+10 ]=========
mean: +[10, 10, 10, 10, 11, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 14, 15, 15, 15, 16, 16, 19, 21, 22, 23, 26, 28, 30] -[32, 32, 36, 43, 43, 43, 51, 57, 64, 71, 71, 73, 77]
percentile_50: +[10, 10, 10, 10, 11, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 14, 15, 15, 15, 16, 16, 19] -[21, 22, 23, 26, 28, 30, 32, 32, 36, 43, 43, 43, 51, 57, 64, 71, 71, 73, 77]
percentile_75: +[10, 10, 10, 10, 11, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 14, 15, 15, 15, 16, 16, 19, 21, 22, 23, 26, 28, 30, 32, 32] -[36, 43, 43, 43, 51, 57, 64, 71, 71, 73, 77]
percentile_90: +[10, 10, 10, 10, 11, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 14, 15, 15, 15, 16, 16, 19, 21, 22, 23, 26, 28, 30, 32, 32, 36, 43, 43, 43, 51, 57] -[64, 71, 71, 73, 77]
percentile_95: +[10, 10, 10, 10, 11, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 14, 15, 15, 15, 16, 16, 19, 21, 22, 23, 26, 28, 30, 32, 32, 36, 43, 43, 43, 51, 57, 64, 71] -[71, 73, 77]
=========[ PARETO(1.25, 15) ]=========
mean: +[16, 17, 17, 17, 17, 18, 18, 19, 19, 20, 21, 22, 22, 22, 23, 23, 23, 24, 24, 25, 25, 26, 26, 28, 30, 30, 36, 40, 43, 45, 63, 65, 71, 73, 75] -[82, 122, 142, 203, 335, 987]
percentile_50: +[16, 17, 17, 17, 17, 18, 18, 19, 19, 20, 21, 22, 22, 22, 23, 23, 23, 24, 24, 25] -[25, 26, 26, 28, 30, 30, 36, 40, 43, 45, 63, 65, 71, 73, 75, 82, 122, 142, 203, 335, 987]
percentile_75: +[16, 17, 17, 17, 17, 18, 18, 19, 19, 20, 21, 22, 22, 22, 23, 23, 23, 24, 24, 25, 25, 26, 26, 28, 30, 30, 36, 40, 43, 45] -[63, 65, 71, 73, 75, 82, 122, 142, 203, 335, 987]
percentile_90: +[16, 17, 17, 17, 17, 18, 18, 19, 19, 20, 21, 22, 22, 22, 23, 23, 23, 24, 24, 25, 25, 26, 26, 28, 30, 30, 36, 40, 43, 45, 63, 65, 71, 73, 75, 82] -[122, 142, 203, 335, 987]
percentile_95: +[16, 17, 17, 17, 17, 18, 18, 19, 19, 20, 21, 22, 22, 22, 23, 23, 23, 24, 24, 25, 25, 26, 26, 28, 30, 30, 36, 40, 43, 45, 63, 65, 71, 73, 75, 82, 122, 142] -[203, 335, 987]
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment