Skip to content

Instantly share code, notes, and snippets.

@prophile
Created May 7, 2020 19:08
Show Gist options
  • Save prophile/6f15c0f673ab4a1c7bb67b27a6d521b5 to your computer and use it in GitHub Desktop.
Save prophile/6f15c0f673ab4a1c7bb67b27a6d521b5 to your computer and use it in GitHub Desktop.
import math
import pickle
import random
import tqdm
known_adjacent = set()
known_non_adjacent = set()
try:
with open('/tmp/adjacencies.pkl', 'rb') as f:
known_adjacent, known_non_adjacent = pickle.load(f)
except FileNotFoundError:
pass
def is_adjacent(x, y):
if (x, y) in known_adjacent:
return True
if (x, y) in known_non_adjacent:
return False
selection = input(f"Are {x} and {y} adjacent? [y/n] ").lower()
if selection and selection[0] == 'y':
known_adjacent.add((x, y))
known_adjacent.add((y, x))
with open('/tmp/adjacencies.pkl', 'wb') as f:
pickle.dump((known_adjacent, known_non_adjacent), f)
return True
else:
known_non_adjacent.add((x, y))
known_non_adjacent.add((y, x))
with open('/tmp/adjacencies.pkl', 'wb') as f:
pickle.dump((known_adjacent, known_non_adjacent), f)
return False
ORIGINAL_BOROUGHS = [
(373, "Croydon"),
(369, "Barnet"),
(342, "Ealing"),
(321, "Enfield"),
(318, "Newham"),
(318, "Bromley"),
(317, "Brent"),
(314, "Lambeth"),
(311, "Wandsworth"),
(298, "Southwark"),
(288, "Redbridge"),
(287, "Hillingdon"),
(286, "Lewisham"),
(273, "Tower Hamlets"),
(266, "Waltham Forest"),
(264, "Greenwich"),
(263, "Haringey"),
(262, "Hounslow"),
(257, "Hackney"),
(243, "Harrow"),
(242, "Havering"),
(237, "Bexley"),
(229, "Camden"),
(227, "Westminster"),
(216, "Islington"),
(203, "Merton"),
(196, "Sutton"),
(194, "Barking and Dagenham"),
(191, "Richmond upon Thames"),
(179, "Hammersmith & Fulham"),
(167, "Kingston upon Thames"),
(156, "Kensington & Chelsea"),
]
TARGET_BOROUGHS = 13
COUNT = 10_000
def is_superborough_adjacent(x, y):
return any(is_adjacent(p, q) for p in x for q in y)
def tform_pop(pop):
#return pop
return int(random.gauss(pop, pop * 0.1))
#return math.log(pop)
def find_solution(boroughs):
boroughs = [
(count, [name])
for count, name in boroughs
]
while len(boroughs) > TARGET_BOROUGHS:
best_pop = float('inf')
best_combo = None
for index, (left_count, left_borough) in reversed(list(enumerate(boroughs))):
for right_count, right_borough in reversed(boroughs[index + 1:]):
combined_population = tform_pop(left_count + right_count)
if combined_population < best_pop and is_superborough_adjacent(left_borough, right_borough):
best_pop = combined_population
best_combo = (left_borough, right_borough)
# Do the merge
original_total = sum(x[0] for x in boroughs)
boroughs = [
(count, superborough)
for count, superborough in boroughs
if superborough is not best_combo[0] and superborough is not best_combo[1]
]
new_total = sum(x[0] for x in boroughs)
new_superborough = best_combo[0] + best_combo[1]
#print("Merged ", best_combo[0], "and", best_combo[1], " - down to", len(boroughs) + 1, "boroughs")
boroughs.append((original_total - new_total, new_superborough))
boroughs.sort(reverse=True)
return boroughs
def entropy(solution):
total_count = sum(x[0] for x in solution)
return -sum((count / total_count) * math.log(count / total_count, 2) for count, _ in solution)
best_solution = [
(
sum(x[0] for x in ORIGINAL_BOROUGHS),
[x[1] for x in ORIGINAL_BOROUGHS],
)
]
best_entropy = entropy(best_solution)
for _ in tqdm.trange(COUNT):
solution = find_solution(ORIGINAL_BOROUGHS)
ent = entropy(solution)
if ent > best_entropy:
best_entropy = ent
best_solution = solution
for count, superborough in best_solution:
print(count, ', '.join(superborough))
print("Total entropy: {} b".format(best_entropy))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment