Created
July 7, 2018 05:29
-
-
Save jrwiebe/f61121a17123645a5de68c9b7cb03f39 to your computer and use it in GitHub Desktop.
7-Elevens by city (an exercise in learning R). Generate list of North American stores ("all-na.csv") with "getsevs.sh" before running R scripts.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ca_cities <- read.csv(url("https://simplemaps.com/static/data/country-cities/ca/ca.csv")) | |
# add cities to table that will be in top 10 list | |
missing_1 <- c("Burnaby","","","Canada","CA","British Columbia","",232755,232755) | |
missing_2 <- c("Surrey","","","Canada","CA","British Columbia","",517887,517887) | |
missing_df <- data.frame(rbind(missing_1, missing_2)) | |
names(missing_df) <- names(ca_cities) | |
ca_cities <- rbind.data.frame(ca_cities,missing_df) | |
prov_names <- c("British Columbia", "Alberta", "Saskatchewan", "Manitoba", "Ontario", "Québec", "New Brunswick", "Nova Scotia", "Prince Edward Island", "Newfoundland and Labrador", "Nunavut", "Northwest Territories", "Yukon") | |
prov_abb <- c("BC", "AB", "SK", "MB", "ON", "QC", "NB", "NS", "PE", "NL", "NU", "NT", "YT") | |
names(prov_abb) <- prov_names | |
names(ca_cities)[5] <- "country_abb" | |
ca_cities$state_prov_abb = prov_abb[as.character(ca_cities$admin)] | |
cols <- c("city", "state_prov_abb", "country_abb", "population", "population_proper") | |
ca_cities_pop <- subset(ca_cities, select=cols) | |
na_stores <- read.csv(file="all-na.csv", header=FALSE, sep=",") | |
names(na_stores) <- c("id", "address", "city", "state_prov_abb", "country_abb") | |
library(dplyr) | |
city_totals <- tally(group_by(filter(na_stores, country_abb=="CA"), city, state_prov_abb, country_abb), sort=TRUE) | |
get_pop <- function(city, state_prov, country) { | |
rows <- ca_cities_pop[which(ca_cities_pop$city==city & ca_cities_pop$state_prov_abb==state_prov & ca_cities_pop$country_abb==country),] | |
if(nrow(rows) > 0) { | |
return(as.numeric(rows$population)) | |
} else { | |
return(NA) | |
} | |
} | |
#city_totals$pop_per_store <- apply(city_totals, 1, function(x) get_pop(x["city"], x["state_prov_abb"], x["country_abb"])) / city_totals$n | |
city_totals$stores_per_100k <- city_totals$n / (apply(city_totals, 1, function(x) get_pop(x["city"], x["state_prov_abb"], x["country_abb"])) / 10^5) | |
library(ggplot2) | |
g <- ggplot(head(city_totals, 10), aes(x = reorder(city, -n), n)) + | |
geom_col(aes(fill=city)) + | |
scale_x_discrete(labels=paste(city_totals$city, city_totals$state_prov_abb, sep=", ")) + | |
geom_text(aes(label=n), vjust=-0.5, size=3) + | |
guides(fill=FALSE) + | |
geom_point(aes(x=city, y=n/2, size=stores_per_100k), shape=21, colour="#FF6A6A", fill="#FAFAD2", alpha=0.75, na.rm=TRUE) + | |
geom_text(aes(x=city, y=n/2, label=ifelse(stores_per_100k>0, sprintf("%0.2f", round(stores_per_100k, digits = 2)), "")), colour="#FF6A6A", size=2.5, na.rm = TRUE) + | |
scale_size_continuous(range=c(8,16), name="Stores per 100,000 people", breaks=c(5), label=c("")) + | |
theme_minimal() + | |
theme(text=element_text(colour="grey40"), axis.text.x=element_text(angle=90, hjust=1, vjust=1, margin=margin(-15,0,0,0)), plot.title = element_text(hjust = 0.5), plot.caption = element_text(hjust = 0, margin=margin(10,250,0,50)), panel.grid = element_blank(), axis.title = element_blank(), axis.text.y = element_blank(), legend.title = element_text(size=10), legend.position = "bottom", legend.justification=c(1,0), legend.margin=margin(-10,5,0,0)) + | |
labs(title = "7-Eleven stores by city in Canada (Top 10)", | |
caption = "Sources: https://www.7-eleven.com/locator, https://simplemaps.com/data/ca-cities") | |
# ggsave("chart-ca.png") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ca_cities <- read.csv(url("https://simplemaps.com/static/data/country-cities/ca/ca.csv")) | |
prov_names <- c("British Columbia", "Alberta", "Saskatchewan", "Manitoba", "Ontario", "Québec", "New Brunswick", "Nova Scotia", "Prince Edward Island", "Newfoundland and Labrador", "Nunavut", "Northwest Territories", "Yukon") | |
prov_abb <- c("BC", "AB", "SK", "MB", "ON", "QC", "NB", "NS", "PE", "NL", "NU", "NT", "YT") | |
names(prov_abb) <- prov_names | |
names(ca_cities)[5] <- "country_abb" | |
ca_cities$state_prov_abb = prov_abb[as.character(ca_cities$admin)] | |
us_cities <- read.csv(url("https://simplemaps.com/static/data/us-cities/uscitiesv1.4.csv")) | |
names(us_cities)[3] <- "state_prov_abb" | |
us_cities$country_abb <- "US" | |
cols <- c("city", "state_prov_abb", "country_abb", "population", "population_proper") | |
na_cities_pop <- rbind(subset(ca_cities, select=cols), subset(us_cities, select=cols)) | |
na_stores <- read.csv(file="all-na.csv", header=FALSE, sep=",") | |
names(na_stores) <- c("id", "address", "city", "state_prov_abb", "country_abb") | |
library(dplyr) | |
city_totals <- tally(group_by(na_stores, city, state_prov_abb, country_abb), sort=TRUE) | |
get_pop <- function(city, state_prov, country) { | |
rows <- na_cities_pop[which(na_cities_pop$city==city & na_cities_pop$state_prov_abb==state_prov & na_cities_pop$country_abb==country),] | |
if(nrow(rows) > 0) { | |
return(rows$population) | |
} else { | |
return(NA) | |
} | |
} | |
#city_totals$pop_per_store <- apply(city_totals, 1, function(x) get_pop(x["city"], x["state_prov_abb"], x["country_abb"])) / city_totals$n | |
city_totals$stores_per_100k <- city_totals$n / (apply(city_totals, 1, function(x) get_pop(x["city"], x["state_prov_abb"], x["country_abb"])) / 10^5) | |
library(ggplot2) | |
g <- ggplot(head(city_totals, 25), aes(x = reorder(city, -n), n)) + | |
geom_col(aes(fill=city)) + | |
scale_x_discrete(labels=paste(city_totals$city, city_totals$state_prov_abb, sep=", ")) + | |
geom_text(aes(label=n), vjust=-0.5, size=3) + | |
guides(fill=FALSE) + | |
geom_point(aes(x=city, y=n/2, size=stores_per_100k), shape=21, colour="#FF6A6A", fill="#FAFAD2", alpha=0.75, na.rm=TRUE) + | |
geom_text(aes(x=city, y=n/2, label=ifelse(stores_per_100k>0, sprintf("%0.2f", round(stores_per_100k, digits = 2)), "")), colour="#FF6A6A", size=2.5, na.rm = TRUE) + | |
scale_size_continuous(range=c(6,12), name="Stores per 100,000 people", breaks=c(5), label=c("")) + | |
theme_minimal() + | |
theme(text=element_text(colour="grey40"), axis.text.x=element_text(angle=90, hjust=1, vjust=1, margin=margin(-15,0,0,0)), plot.title = element_text(hjust = 0.5), plot.caption = element_text(hjust = 0, margin=margin(10,250,0,50)), panel.grid = element_blank(), axis.title = element_blank(), axis.text.y = element_blank(), legend.title = element_text(size=10), legend.position = "bottom", legend.justification=c(1,0), legend.margin=margin(-10,5,0,0)) + | |
labs(title = "7-Eleven stores by city in North America (Top 25)", | |
caption = "Sources: https://www.7-eleven.com/locator, https://simplemaps.com/data/ca-cities, https://simplemaps.com/data/us-cities") | |
# ggsave("chart-na.png") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
offset=0 | |
url='https://api.7-eleven.com/auth/token/' | |
# id and secret appear to be hard coded in https://www.7-eleven.com/locator | |
declare -a args=(--data "client_id=sl3rgdU5c5ZvsYj95FGIuexau5Nt7J5OTf7VRPfV&\ | |
client_secret=11BBlWqIeLenwAmPOKqz8WN5NIZRCCSBSEcBtp9DikLh90WL217OlaCvghuDJu\ | |
cGP5wG12VW2vQ7FRAzUMcYtOOrLtcd4eMqShsOJJKZnJOL5snAnih0uyUN8ZEURXPh&grant_type\ | |
=client_credentials") | |
token="$(curl -s $url "${args[@]}" | jq -r '.access_token')" | |
url="https://api.7-eleven.com/v4/stores/?features=&limit=500&offset=" | |
declare -a args=(-H "Authorization: Bearer $token" \ | |
-H "Accept-Encoding: gzip, deflate, br" --compressed) | |
while : ; do | |
loc="$(curl -s $url$offset "${args[@]}" | jq '.results')" | |
if [[ $loc != "[]" ]]; then | |
echo $loc| jq -r '.[] | [.id, .address, .city, .state, .country] | @csv' | |
else | |
break | |
fi | |
offset=$(($offset + 500)) | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment