Last active
April 1, 2022 18:23
-
-
Save kvoli/be27efd4662e89e8918430a9c7117858 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
def check(candidates, watermark): | |
mean = sum(candidates) / len(candidates) | |
targets, excluded = [], [] | |
for c in candidates: | |
if c > 20 and c > mean * watermark: | |
excluded.append(c) | |
continue | |
targets.append(c) | |
return targets, excluded | |
def percentile_check(candidates, percentile): | |
percentiles = [(i+1)/len(candidates) for i in range(len(candidates))] | |
targets, excluded = [], [] | |
for i, c in enumerate(candidates): | |
if c > 20 and percentiles[i] > percentile: | |
excluded.append(c) | |
continue | |
targets.append(c) | |
return targets, excluded | |
def test_distribution(store_list): | |
mean_targets, mean_excluded = check(store_list, 1.1) | |
percentile_50_targets, percentile_50_excluded = percentile_check(store_list, 0.5) | |
percentile_75_targets, percentile_75_excluded = percentile_check(store_list, 0.75) | |
percentile_90_targets, percentile_90_excluded = percentile_check(store_list, 0.90) | |
percentile_95_targets, percentile_95_excluded = percentile_check(store_list, 0.95) | |
print(f"mean: +{mean_targets} -{mean_excluded}") | |
print(f"percentile_50: +{percentile_50_targets} -{percentile_50_excluded}") | |
print(f"percentile_75: +{percentile_75_targets} -{percentile_75_excluded}") | |
print(f"percentile_90: +{percentile_90_targets} -{percentile_90_excluded}") | |
print(f"percentile_95: +{percentile_95_targets} -{percentile_95_excluded}") | |
def run(cluster_size): | |
uniform_stores = sorted([int(random.uniform(15, 45)) for _ in range(cluster_size)]) | |
gauss_stores = sorted([int(random.gauss(30, 10)) for _ in range(cluster_size)]) | |
exponential_stores = sorted([10 + int(random.expovariate(1 / 15)) for _ in range(cluster_size)]) | |
pareto_stores = sorted([int(random.paretovariate(1.25) * 15) for _ in range(cluster_size)]) | |
print(f"cluster_size = {cluster_size} nodes, targets = +[], removed = -[]") | |
print("\n=========[ UNIFORM(15,45) ]=========") | |
test_distribution(uniform_stores) | |
print("\n=========[ GAUSSIAN(30,10) ]=========") | |
test_distribution(gauss_stores) | |
print("\n=========[ EXPONENTIAL(15)+10 ]=========") | |
test_distribution(exponential_stores) | |
print("\n=========[ PARETO(1.25, 15) ]=========") | |
test_distribution(pareto_stores) | |
print("\n") | |
random.seed(0x666) | |
run(3) | |
run(5) | |
run(9) | |
run(15) | |
run(27) | |
run(41) | |
""" | |
❯ python avg_threshold.py | |
cluster_size = 3 nodes, targets = +[], removed = -[] | |
=========[ UNIFORM(15,45) ]========= | |
mean: +[34, 37, 39] -[] | |
percentile_50: +[34] -[37, 39] | |
percentile_75: +[34, 37] -[39] | |
percentile_90: +[34, 37] -[39] | |
percentile_95: +[34, 37] -[39] | |
=========[ GAUSSIAN(30,10) ]========= | |
mean: +[16, 25] -[42] | |
percentile_50: +[16] -[25, 42] | |
percentile_75: +[16, 25] -[42] | |
percentile_90: +[16, 25] -[42] | |
percentile_95: +[16, 25] -[42] | |
=========[ EXPONENTIAL(15)+10 ]========= | |
mean: +[10] -[31, 33] | |
percentile_50: +[10] -[31, 33] | |
percentile_75: +[10, 31] -[33] | |
percentile_90: +[10, 31] -[33] | |
percentile_95: +[10, 31] -[33] | |
=========[ PARETO(1.25, 15) ]========= | |
mean: +[15, 18] -[23] | |
percentile_50: +[15, 18] -[23] | |
percentile_75: +[15, 18] -[23] | |
percentile_90: +[15, 18] -[23] | |
percentile_95: +[15, 18] -[23] | |
cluster_size = 5 nodes, targets = +[], removed = -[] | |
=========[ UNIFORM(15,45) ]========= | |
mean: +[19, 20, 21, 25] -[34] | |
percentile_50: +[19, 20] -[21, 25, 34] | |
percentile_75: +[19, 20, 21] -[25, 34] | |
percentile_90: +[19, 20, 21, 25] -[34] | |
percentile_95: +[19, 20, 21, 25] -[34] | |
=========[ GAUSSIAN(30,10) ]========= | |
mean: +[15, 31] -[34, 34, 35] | |
percentile_50: +[15, 31] -[34, 34, 35] | |
percentile_75: +[15, 31, 34] -[34, 35] | |
percentile_90: +[15, 31, 34, 34] -[35] | |
percentile_95: +[15, 31, 34, 34] -[35] | |
=========[ EXPONENTIAL(15)+10 ]========= | |
mean: +[10, 12, 13, 17] -[23] | |
percentile_50: +[10, 12, 13, 17] -[23] | |
percentile_75: +[10, 12, 13, 17] -[23] | |
percentile_90: +[10, 12, 13, 17] -[23] | |
percentile_95: +[10, 12, 13, 17] -[23] | |
=========[ PARETO(1.25, 15) ]========= | |
mean: +[18, 22, 31, 35] -[76] | |
percentile_50: +[18, 22] -[31, 35, 76] | |
percentile_75: +[18, 22, 31] -[35, 76] | |
percentile_90: +[18, 22, 31, 35] -[76] | |
percentile_95: +[18, 22, 31, 35] -[76] | |
cluster_size = 9 nodes, targets = +[], removed = -[] | |
=========[ UNIFORM(15,45) ]========= | |
mean: +[16, 16, 16, 28, 28] -[33, 34, 35, 42] | |
percentile_50: +[16, 16, 16, 28] -[28, 33, 34, 35, 42] | |
percentile_75: +[16, 16, 16, 28, 28, 33] -[34, 35, 42] | |
percentile_90: +[16, 16, 16, 28, 28, 33, 34, 35] -[42] | |
percentile_95: +[16, 16, 16, 28, 28, 33, 34, 35] -[42] | |
=========[ GAUSSIAN(30,10) ]========= | |
mean: +[17, 25, 30, 31, 33, 35] -[37, 41, 42] | |
percentile_50: +[17, 25, 30, 31] -[33, 35, 37, 41, 42] | |
percentile_75: +[17, 25, 30, 31, 33, 35] -[37, 41, 42] | |
percentile_90: +[17, 25, 30, 31, 33, 35, 37, 41] -[42] | |
percentile_95: +[17, 25, 30, 31, 33, 35, 37, 41] -[42] | |
=========[ EXPONENTIAL(15)+10 ]========= | |
mean: +[12, 13, 16, 18, 19, 20, 21] -[24, 49] | |
percentile_50: +[12, 13, 16, 18, 19, 20] -[21, 24, 49] | |
percentile_75: +[12, 13, 16, 18, 19, 20] -[21, 24, 49] | |
percentile_90: +[12, 13, 16, 18, 19, 20, 21, 24] -[49] | |
percentile_95: +[12, 13, 16, 18, 19, 20, 21, 24] -[49] | |
=========[ PARETO(1.25, 15) ]========= | |
mean: +[15, 15, 16, 16, 16, 18, 20] -[35, 36] | |
percentile_50: +[15, 15, 16, 16, 16, 18, 20] -[35, 36] | |
percentile_75: +[15, 15, 16, 16, 16, 18, 20] -[35, 36] | |
percentile_90: +[15, 15, 16, 16, 16, 18, 20, 35] -[36] | |
percentile_95: +[15, 15, 16, 16, 16, 18, 20, 35] -[36] | |
cluster_size = 15 nodes, targets = +[], removed = -[] | |
=========[ UNIFORM(15,45) ]========= | |
mean: +[17, 18, 18, 27, 28, 29, 29, 31, 32, 33] -[36, 38, 38, 40, 41] | |
percentile_50: +[17, 18, 18, 27, 28, 29, 29] -[31, 32, 33, 36, 38, 38, 40, 41] | |
percentile_75: +[17, 18, 18, 27, 28, 29, 29, 31, 32, 33, 36] -[38, 38, 40, 41] | |
percentile_90: +[17, 18, 18, 27, 28, 29, 29, 31, 32, 33, 36, 38, 38] -[40, 41] | |
percentile_95: +[17, 18, 18, 27, 28, 29, 29, 31, 32, 33, 36, 38, 38, 40] -[41] | |
=========[ GAUSSIAN(30,10) ]========= | |
mean: +[4, 14, 21, 22, 23, 25, 27, 28, 30, 31] -[32, 33, 35, 49, 51] | |
percentile_50: +[4, 14, 21, 22, 23, 25, 27] -[28, 30, 31, 32, 33, 35, 49, 51] | |
percentile_75: +[4, 14, 21, 22, 23, 25, 27, 28, 30, 31, 32] -[33, 35, 49, 51] | |
percentile_90: +[4, 14, 21, 22, 23, 25, 27, 28, 30, 31, 32, 33, 35] -[49, 51] | |
percentile_95: +[4, 14, 21, 22, 23, 25, 27, 28, 30, 31, 32, 33, 35, 49] -[51] | |
=========[ EXPONENTIAL(15)+10 ]========= | |
mean: +[10, 10, 11, 13, 14, 18, 19, 23, 24, 25] -[33, 35, 38, 50, 81] | |
percentile_50: +[10, 10, 11, 13, 14, 18, 19] -[23, 24, 25, 33, 35, 38, 50, 81] | |
percentile_75: +[10, 10, 11, 13, 14, 18, 19, 23, 24, 25, 33] -[35, 38, 50, 81] | |
percentile_90: +[10, 10, 11, 13, 14, 18, 19, 23, 24, 25, 33, 35, 38] -[50, 81] | |
percentile_95: +[10, 10, 11, 13, 14, 18, 19, 23, 24, 25, 33, 35, 38, 50] -[81] | |
=========[ PARETO(1.25, 15) ]========= | |
mean: +[15, 16, 16, 18, 23, 24, 29, 35, 36, 38, 44, 47, 69] -[399, 461] | |
percentile_50: +[15, 16, 16, 18, 23, 24, 29] -[35, 36, 38, 44, 47, 69, 399, 461] | |
percentile_75: +[15, 16, 16, 18, 23, 24, 29, 35, 36, 38, 44] -[47, 69, 399, 461] | |
percentile_90: +[15, 16, 16, 18, 23, 24, 29, 35, 36, 38, 44, 47, 69] -[399, 461] | |
percentile_95: +[15, 16, 16, 18, 23, 24, 29, 35, 36, 38, 44, 47, 69, 399] -[461] | |
cluster_size = 27 nodes, targets = +[], removed = -[] | |
=========[ UNIFORM(15,45) ]========= | |
mean: +[15, 16, 17, 17, 20, 22, 25, 25, 27, 27, 29, 30, 31, 33, 33, 33] -[34, 34, 35, 37, 38, 39, 39, 39, 40, 41, 44] | |
percentile_50: +[15, 16, 17, 17, 20, 22, 25, 25, 27, 27, 29, 30, 31] -[33, 33, 33, 34, 34, 35, 37, 38, 39, 39, 39, 40, 41, 44] | |
percentile_75: +[15, 16, 17, 17, 20, 22, 25, 25, 27, 27, 29, 30, 31, 33, 33, 33, 34, 34, 35, 37] -[38, 39, 39, 39, 40, 41, 44] | |
percentile_90: +[15, 16, 17, 17, 20, 22, 25, 25, 27, 27, 29, 30, 31, 33, 33, 33, 34, 34, 35, 37, 38, 39, 39, 39] -[40, 41, 44] | |
percentile_95: +[15, 16, 17, 17, 20, 22, 25, 25, 27, 27, 29, 30, 31, 33, 33, 33, 34, 34, 35, 37, 38, 39, 39, 39, 40] -[41, 44] | |
=========[ GAUSSIAN(30,10) ]========= | |
mean: +[-1, 7, 9, 11, 13, 15, 16, 18, 19, 19, 19, 19, 21, 22, 24, 24] -[30, 30, 30, 33, 33, 35, 36, 41, 44, 50, 50] | |
percentile_50: +[-1, 7, 9, 11, 13, 15, 16, 18, 19, 19, 19, 19, 21] -[22, 24, 24, 30, 30, 30, 33, 33, 35, 36, 41, 44, 50, 50] | |
percentile_75: +[-1, 7, 9, 11, 13, 15, 16, 18, 19, 19, 19, 19, 21, 22, 24, 24, 30, 30, 30, 33] -[33, 35, 36, 41, 44, 50, 50] | |
percentile_90: +[-1, 7, 9, 11, 13, 15, 16, 18, 19, 19, 19, 19, 21, 22, 24, 24, 30, 30, 30, 33, 33, 35, 36, 41] -[44, 50, 50] | |
percentile_95: +[-1, 7, 9, 11, 13, 15, 16, 18, 19, 19, 19, 19, 21, 22, 24, 24, 30, 30, 30, 33, 33, 35, 36, 41, 44] -[50, 50] | |
=========[ EXPONENTIAL(15)+10 ]========= | |
mean: +[10, 10, 10, 11, 11, 11, 11, 12, 14, 15, 15, 15, 16, 16, 16, 17, 17, 19, 19, 20, 20] -[22, 24, 32, 33, 43, 51] | |
percentile_50: +[10, 10, 10, 11, 11, 11, 11, 12, 14, 15, 15, 15, 16, 16, 16, 17, 17, 19, 19, 20, 20] -[22, 24, 32, 33, 43, 51] | |
percentile_75: +[10, 10, 10, 11, 11, 11, 11, 12, 14, 15, 15, 15, 16, 16, 16, 17, 17, 19, 19, 20, 20] -[22, 24, 32, 33, 43, 51] | |
percentile_90: +[10, 10, 10, 11, 11, 11, 11, 12, 14, 15, 15, 15, 16, 16, 16, 17, 17, 19, 19, 20, 20, 22, 24, 32] -[33, 43, 51] | |
percentile_95: +[10, 10, 10, 11, 11, 11, 11, 12, 14, 15, 15, 15, 16, 16, 16, 17, 17, 19, 19, 20, 20, 22, 24, 32, 33] -[43, 51] | |
=========[ PARETO(1.25, 15) ]========= | |
mean: +[15, 15, 16, 16, 17, 17, 18, 18, 20, 20, 21, 21, 22, 23, 24, 24, 26, 26, 26, 27, 27] -[36, 36, 38, 43, 47, 70] | |
percentile_50: +[15, 15, 16, 16, 17, 17, 18, 18, 20, 20, 21, 21, 22] -[23, 24, 24, 26, 26, 26, 27, 27, 36, 36, 38, 43, 47, 70] | |
percentile_75: +[15, 15, 16, 16, 17, 17, 18, 18, 20, 20, 21, 21, 22, 23, 24, 24, 26, 26, 26, 27] -[27, 36, 36, 38, 43, 47, 70] | |
percentile_90: +[15, 15, 16, 16, 17, 17, 18, 18, 20, 20, 21, 21, 22, 23, 24, 24, 26, 26, 26, 27, 27, 36, 36, 38] -[43, 47, 70] | |
percentile_95: +[15, 15, 16, 16, 17, 17, 18, 18, 20, 20, 21, 21, 22, 23, 24, 24, 26, 26, 26, 27, 27, 36, 36, 38, 43] -[47, 70] | |
cluster_size = 41 nodes, targets = +[], removed = -[] | |
=========[ UNIFORM(15,45) ]========= | |
mean: +[15, 15, 16, 17, 18, 21, 21, 22, 22, 23, 25, 25, 25, 26, 26, 26, 27, 27, 27, 28, 29, 30, 30, 31, 32, 32, 33] -[34, 35, 36, 37, 38, 40, 41, 41, 43, 43, 43, 44, 44, 44] | |
percentile_50: +[15, 15, 16, 17, 18, 21, 21, 22, 22, 23, 25, 25, 25, 26, 26, 26, 27, 27, 27, 28] -[29, 30, 30, 31, 32, 32, 33, 34, 35, 36, 37, 38, 40, 41, 41, 43, 43, 43, 44, 44, 44] | |
percentile_75: +[15, 15, 16, 17, 18, 21, 21, 22, 22, 23, 25, 25, 25, 26, 26, 26, 27, 27, 27, 28, 29, 30, 30, 31, 32, 32, 33, 34, 35, 36] -[37, 38, 40, 41, 41, 43, 43, 43, 44, 44, 44] | |
percentile_90: +[15, 15, 16, 17, 18, 21, 21, 22, 22, 23, 25, 25, 25, 26, 26, 26, 27, 27, 27, 28, 29, 30, 30, 31, 32, 32, 33, 34, 35, 36, 37, 38, 40, 41, 41, 43] -[43, 43, 44, 44, 44] | |
percentile_95: +[15, 15, 16, 17, 18, 21, 21, 22, 22, 23, 25, 25, 25, 26, 26, 26, 27, 27, 27, 28, 29, 30, 30, 31, 32, 32, 33, 34, 35, 36, 37, 38, 40, 41, 41, 43, 43, 43] -[44, 44, 44] | |
=========[ GAUSSIAN(30,10) ]========= | |
mean: +[6, 11, 19, 19, 20, 21, 21, 21, 23, 24, 24, 24, 25, 26, 26, 27, 28, 28, 29, 30, 31, 31, 31, 32, 32] -[33, 33, 33, 33, 33, 33, 36, 36, 36, 39, 39, 40, 40, 41, 41, 43] | |
percentile_50: +[6, 11, 19, 19, 20, 21, 21, 21, 23, 24, 24, 24, 25, 26, 26, 27, 28, 28, 29, 30] -[31, 31, 31, 32, 32, 33, 33, 33, 33, 33, 33, 36, 36, 36, 39, 39, 40, 40, 41, 41, 43] | |
percentile_75: +[6, 11, 19, 19, 20, 21, 21, 21, 23, 24, 24, 24, 25, 26, 26, 27, 28, 28, 29, 30, 31, 31, 31, 32, 32, 33, 33, 33, 33, 33] -[33, 36, 36, 36, 39, 39, 40, 40, 41, 41, 43] | |
percentile_90: +[6, 11, 19, 19, 20, 21, 21, 21, 23, 24, 24, 24, 25, 26, 26, 27, 28, 28, 29, 30, 31, 31, 31, 32, 32, 33, 33, 33, 33, 33, 33, 36, 36, 36, 39, 39] -[40, 40, 41, 41, 43] | |
percentile_95: +[6, 11, 19, 19, 20, 21, 21, 21, 23, 24, 24, 24, 25, 26, 26, 27, 28, 28, 29, 30, 31, 31, 31, 32, 32, 33, 33, 33, 33, 33, 33, 36, 36, 36, 39, 39, 40, 40] -[41, 41, 43] | |
=========[ EXPONENTIAL(15)+10 ]========= | |
mean: +[10, 10, 10, 10, 11, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 14, 15, 15, 15, 16, 16, 19, 21, 22, 23, 26, 28, 30] -[32, 32, 36, 43, 43, 43, 51, 57, 64, 71, 71, 73, 77] | |
percentile_50: +[10, 10, 10, 10, 11, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 14, 15, 15, 15, 16, 16, 19] -[21, 22, 23, 26, 28, 30, 32, 32, 36, 43, 43, 43, 51, 57, 64, 71, 71, 73, 77] | |
percentile_75: +[10, 10, 10, 10, 11, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 14, 15, 15, 15, 16, 16, 19, 21, 22, 23, 26, 28, 30, 32, 32] -[36, 43, 43, 43, 51, 57, 64, 71, 71, 73, 77] | |
percentile_90: +[10, 10, 10, 10, 11, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 14, 15, 15, 15, 16, 16, 19, 21, 22, 23, 26, 28, 30, 32, 32, 36, 43, 43, 43, 51, 57] -[64, 71, 71, 73, 77] | |
percentile_95: +[10, 10, 10, 10, 11, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 14, 15, 15, 15, 16, 16, 19, 21, 22, 23, 26, 28, 30, 32, 32, 36, 43, 43, 43, 51, 57, 64, 71] -[71, 73, 77] | |
=========[ PARETO(1.25, 15) ]========= | |
mean: +[16, 17, 17, 17, 17, 18, 18, 19, 19, 20, 21, 22, 22, 22, 23, 23, 23, 24, 24, 25, 25, 26, 26, 28, 30, 30, 36, 40, 43, 45, 63, 65, 71, 73, 75] -[82, 122, 142, 203, 335, 987] | |
percentile_50: +[16, 17, 17, 17, 17, 18, 18, 19, 19, 20, 21, 22, 22, 22, 23, 23, 23, 24, 24, 25] -[25, 26, 26, 28, 30, 30, 36, 40, 43, 45, 63, 65, 71, 73, 75, 82, 122, 142, 203, 335, 987] | |
percentile_75: +[16, 17, 17, 17, 17, 18, 18, 19, 19, 20, 21, 22, 22, 22, 23, 23, 23, 24, 24, 25, 25, 26, 26, 28, 30, 30, 36, 40, 43, 45] -[63, 65, 71, 73, 75, 82, 122, 142, 203, 335, 987] | |
percentile_90: +[16, 17, 17, 17, 17, 18, 18, 19, 19, 20, 21, 22, 22, 22, 23, 23, 23, 24, 24, 25, 25, 26, 26, 28, 30, 30, 36, 40, 43, 45, 63, 65, 71, 73, 75, 82] -[122, 142, 203, 335, 987] | |
percentile_95: +[16, 17, 17, 17, 17, 18, 18, 19, 19, 20, 21, 22, 22, 22, 23, 23, 23, 24, 24, 25, 25, 26, 26, 28, 30, 30, 36, 40, 43, 45, 63, 65, 71, 73, 75, 82, 122, 142] -[203, 335, 987] | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment