Skip to content

Instantly share code, notes, and snippets.

@TimSC
Last active February 22, 2023 20:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save TimSC/7be7cb2efefaf908e6e9c23c55b0fe80 to your computer and use it in GitHub Desktop.
Save TimSC/7be7cb2efefaf908e6e9c23c55b0fe80 to your computer and use it in GitHub Desktop.
A script to group delivery areas into routes of roughly similar size.
# A script to group delivery areas into routes of roughly similar size.
import csv
from sklearn import cluster
from pyproj import Proj, transform
import numpy as np
from matplotlib import pyplot as plt
class EqualWeightClustering(object):
# A greedy clustering approach
def __init__(self, n_clusters):
self.n_clusters = int(n_clusters)
self.uneven_weight_cost = 1.5
self.n_restarts = 10
def fit(self, samples, sample_weight=None):
samples = np.array(samples)
if sample_weight is None:
sample_weight = np.ones(sample.shape[0])
else:
sample_weight = np.array(sample_weight)
final_assignment = None
final_cost = None
for restart_num in range(self.n_restarts):
print ("restart", restart_num, "of", self.n_restarts)
assignment = np.array(np.random.rand(samples.shape[0]) * self.n_clusters, dtype=np.uint16)
self.best_fit = assignment
self.best_cost = self.cost_func(samples, sample_weight, assignment)
#print ("initial cost", self.best_cost)
prev_score = None
prev_best = None
iter_score = None
iter_best = None
while prev_score is None or iter_score < prev_score:
prev_score = iter_score
prev_best = iter_best
iter_best = None
iter_score = None
#Try various changes to assignments and see what is best
for i in range(assignment.shape[0]):
candidate_assignment = assignment.copy()
for j in range(self.n_clusters):
if j == candidate_assignment[i]: continue
candidate_assignment[i] = j
iter_cost = self.cost_func(samples, sample_weight, candidate_assignment)
#print (i, j, candidate_assignment, iter_cost)
if iter_score is None or iter_cost < iter_score:
iter_score = iter_cost
iter_best = candidate_assignment.copy()
print ("iter_score", iter_score)
assignment = iter_best
if final_cost is None or iter_score < final_cost:
final_cost = iter_score
final_assignment = iter_best
if prev_score < final_cost:
final_cost = prev_score
final_assignment = prev_best
print ("final_cost", final_cost)
return final_assignment
def cost_func(self, samples, sample_weight, assignment):
total_cost = 0.0
for c in range(self.n_clusters):
cmask = assignment == c
csamples = samples[cmask]
cweights = sample_weight[cmask]
#Calc cluster centre
totalWeight = np.sum(cweights)
if csamples.shape[0] > 0 and totalWeight > 0.0:
r = np.average(csamples, axis=0, weights=cweights)
else:
r = np.zeros(csamples.shape[1])
#Calc distances from centre
if csamples.shape[0] > 0:
cost = np.sum(np.power(np.sum(np.power(csamples - r, 2.0), axis=1), 0.5))
else:
cost = 0.0
total_cost += cost
average_weight = np.sum(sample_weight) / self.n_clusters
for c in range(self.n_clusters):
cmask = assignment == c
cweights = sample_weight[cmask]
cost = np.abs(self.uneven_weight_cost * (np.sum(cweights) - average_weight))
total_cost += cost
return total_cost
def ClusterZone(data, areaCode="IA", n_clusters=4):
#kmeans = cluster.KMeans(n_clusters=n_clusters, algorithm='lloyd')
kmeans = EqualWeightClustering(n_clusters=n_clusters)
deliveriesCollect = []
posCollect = []
inProj = Proj(init='epsg:4326')
outProj = Proj(init='epsg:27700')
processedRows = []
for li in data:
if li['Area'] != areaCode: continue
try:
deliveries = int(li['Deliveries (estimate)'])
except ValueError:
print (0, pos, None)
deliveriesCollect.append(0)
posCollect.append((0, 0))
processedRows.append(li)
continue
pos = list(map(float, li['Lat/Lon'].split(",")))
x2,y2 = transform(inProj,outProj,pos[1],pos[0])
print (deliveries, pos, (x2, y2))
deliveriesCollect.append(deliveries)
posCollect.append((x2,y2))
processedRows.append(li)
print (deliveriesCollect)
model = kmeans.fit(posCollect, sample_weight = deliveriesCollect)
predicted = model
#predicted = model.predict(posCollect, sample_weight = deliveriesCollect)
#predicted = model.predict(posCollect)
if 0:
for i in range(n_clusters):
filteredXY = np.array(posCollect)[predicted == i]
filteredWeights = np.array(deliveriesCollect)[predicted == i]
plt.scatter(filteredXY[:,0] , filteredXY[:,1], label="Route {}".format(i))
print ("route", i, filteredWeights.sum())
out = csv.writer(open("out.csv", "wt"))
out.writerow(predicted)
del out
for pred, row in zip(predicted, data):
print ("{}\t{}".format(pred, row['Address Range'], row['Deliveries (estimate)']))
plt.legend()
plt.show()
return predicted, processedRows
if __name__=="__main__":
data = list(csv.DictReader(open("/home/tim/Desktop/NelsonRoads.csv", "rt")))
overall_n_clusters=34
areaTotals = {}
for li in data:
areaCode = li['Area']
try:
numDeliveries = int(li['Deliveries (estimate)'])
except ValueError:
numDeliveries = None
if numDeliveries is not None:
if areaCode in areaTotals:
areaTotals[areaCode] += numDeliveries
else:
areaTotals[areaCode] = numDeliveries
#Scale area deliveies to number of routes
out = csv.writer(open("out.csv", "wt"))
out.writerow(list(data[0].keys())+["route"])
totalDeliveries = sum(areaTotals.values())
totalRoutesGen = 0
for areaCode in areaTotals:
#if areaCode != "ID": continue
numRoutes = int(round(areaTotals[areaCode] * overall_n_clusters / totalDeliveries))
predicted, processedRows = ClusterZone(data, areaCode=areaCode, n_clusters=numRoutes)
for pred, row in zip(predicted, processedRows):
rowMod = list(row.values()) + [pred+totalRoutesGen+1]
out.writerow(rowMod)
totalRoutesGen += numRoutes
print (areaCode, numRoutes, totalRoutesGen)
del out
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment