Created
June 27, 2014 17:32
-
-
Save ryanjoneil/adfb2449370902f4ee3d to your computer and use it in GitHub Desktop.
preprocessing for routing problems, part 1
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Code for the adventures in optimization post: | |
# preprocessing for routing problems: part 1 | |
library(datasets) | |
library(zipcode) | |
data(zipcode) | |
# Alexandria, VA is not in Normandy, France. | |
zipcode[zipcode$zip=='22350', c('latitude', 'longitude')] <- c(38.863930, -77.055547) | |
# New York City, NY is not in Kyrgyzstan. | |
zipcode$longitude[zipcode$zip=='10200'] <- -zipcode$longitude[zipcode$zip=='10200'] | |
# Pare down to zip codes in the continental US. | |
states_continental <- state.abb[!(state.abb %in% c('AK', 'HI'))] | |
zips_continental <- subset(zipcode, state %in% states_continental) | |
zips_deduped <- zips_continental[!duplicated(zips_continental[, c('latitude', 'longitude')]), ] | |
# Geographic information for top 25 cities in the country. | |
library(maps) | |
data(us.cities) | |
largest_cities <- subset( | |
us.cities[order(us.cities$pop, decreasing=T),][1:25,], | |
select = c('name', 'lat', 'long') | |
) | |
# Plot our corporate headquarters and every unique zip code location. | |
map('state') | |
points(zips_deduped$longitude, zips_deduped$latitude, pch=21, col=rgb(0, 0, 1, .5), cex=0.1) | |
points(largest_cities$long, largest_cities$lat, pch=21, bg=rgb(1, 0, 0, .75), col='black', cex=1.5) | |
# Euclidean distance from each HQ to each zip code. | |
library(SpatialTools) | |
zips_to_hqs_dist <- dist2( | |
matrix(c(zips_deduped$longitude, zips_deduped$latitude), ncol=2), | |
matrix(c(largest_cities$long, largest_cities$lat), ncol=2) | |
) | |
# Rank HQs by their distance to each unique zip code location. | |
hqs_to_zips_rank <- matrix(nrow=nrow(largest_cities), ncol=nrow(zips_deduped)) | |
for (i in 1:nrow(zips_deduped)) { | |
hqs_to_zips_rank[,i] <- rank(zips_to_hqs_dist[i,], ties.method='first') | |
} | |
# Now we draw regions for which Dallas is one of the closest 3 HQs. | |
hq_idx <- which(largest_cities$name == 'Dallas TX') | |
redundancy_levels <- c(3, 2, 1) | |
fill_alpha <- c(0.15, 0.30, 0.45) | |
map('state') | |
for (i in 1:length(redundancy_levels)) { | |
# Find every zip for which this HQ is within n in distance rank. | |
within_n <- hqs_to_zips_rank[hq_idx,] <= redundancy_levels[i] | |
# Convex hull of zip code points. | |
hull_order <- chull( | |
zips_deduped$longitude[within_n], | |
zips_deduped$latitude[within_n] | |
) | |
hull_x <- zips_deduped$longitude[within_n][hull_order] | |
hull_y <- zips_deduped$latitude[within_n][hull_order] | |
polygon(hull_x, hull_y, border='blue', col=rgb(0, 0, 1, fill_alpha[i])) | |
} | |
# The other HQs. | |
other_hqs = 1:nrow(largest_cities) != hq_idx | |
points( | |
largest_cities$long[other_hqs], | |
largest_cities$lat[other_hqs], | |
pch = 21, | |
bg = rgb(0.4, 0.4, 0.4, 0.6), | |
col = 'black', | |
cex = 1.5 | |
) | |
# This HQ. | |
points( | |
largest_cities$long[hq_idx], | |
largest_cities$lat[hq_idx], | |
pch = 21, | |
bg = rgb(1, 0, 0, .85), | |
col = 'black', | |
cex = 1.5 | |
) | |
# Map of regions where every zip is served only by its closest HQ. | |
map('usa') | |
for (hq_idx in 1:nrow(largest_cities)) { | |
# Find every zip for which this HQ is the closest. | |
within_1 <- hqs_to_zips_rank[hq_idx,] == 1 | |
within_1[is.na(within_1)] <- F | |
# Convex hull of zip code points. | |
hull_order <- chull( | |
zips_deduped$longitude[within_1], | |
zips_deduped$latitude[within_1] | |
) | |
hull_x <- zips_deduped$longitude[within_1][hull_order] | |
hull_y <- zips_deduped$latitude[within_1][hull_order] | |
polygon( | |
hull_x, | |
hull_y, | |
border = 'black', | |
col = rgb(0, 0, 1, 0.25) | |
) | |
} | |
# All HQs | |
points( | |
largest_cities$long, | |
largest_cities$lat, | |
pch = 21, | |
bg = rgb(1, 0, 0, .75), | |
col = 'black', | |
cex = 1.5 | |
) | |
# Write out data for the next script. | |
write.csv(largest_cities, 'largest_cities.csv') | |
write.csv(zips_deduped, 'zips_deduped.csv') | |
write.csv(hqs_to_zips_rank, 'hqs_to_zips_rank.csv', row.names=F) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment