Skip to content

Instantly share code, notes, and snippets.

@dggoldst
Last active August 29, 2015 14:26
#population density
#Dan Goldstein 2015
#LIBS
library("rvest")
library("ggplot2")
library("dplyr")
library("animation")
###HARDCODES
EDGE_M = 1000
if (EDGE_M == 1000) {
PS = .2
} else {
PS = 2
}
CUTOFF = 20
#PULL DOWN DATA IF YOU HAVE TO
if (!file.exists("counties.csv")) {
#H/T http://blog.corynissen.com/2015/01/using-rvest-to-scrape-html-table.html
counties =
"https://en.wikipedia.org/wiki/List_of_the_most_populous_counties_in_the_United_States" %>%
html() %>%
html_nodes(xpath = '//*[@id="mw-content-text"]/table[2]') %>%
html_table()
counties <- counties[[1]]
df = counties[1:100,c(2:5,12:14,17)]
names(df) = c("county",
"state",
"sq_km",
"sq_mi",
"pop_2014",
"density_km",
"density_mi",
"city")
cols = 3:7
df[,cols] = lapply(df[,cols],function(x) {
as.numeric(gsub(",", "", x))
})
#Was a little bug in the wikipedia today
df[5,7] = 776
write.csv(df,"counties.csv",row.names = FALSE)
} else {
df = read.csv(
"C:/Dropbox/Projects/20150730_PopulationDensity/counties.csv", stringsAsFactors =
FALSE
)
}
#help people with some county names
df[df$county == "New York County","city"] = "Manhattan"
df = df %>% arrange(-density_mi)
get_row = function(index) {
index %/% EDGE_M
}
get_column = function(index) {
index %% EDGE_M
}
#Do the densest city and progressively downsample from it
gpeople_to_draw = df[1,"density_km"]
gpeople_to_draw = round(gpeople_to_draw / (1000 ^ 2 / EDGE_M ^ 2),0) #convert to appropriate units (hectare, miles, etc)
people_ids = sample(0:(EDGE_M * EDGE_M - 1),gpeople_to_draw,replace = FALSE)
pdf = data.frame(x = get_row(people_ids),
y = get_column(people_ids))
#Graph one county
draw_a_row = function(row) {
people_to_draw = df[row,"density_km"]
people_to_draw = round(people_to_draw / (1000 ^ 2 / EDGE_M ^ 2),0) #convert to appropriate units (hectare, miles, etc)
title = sprintf(
"#%d: %s, %s\nHome of: %s\n%d people per %d square meters",
row,
df[row,"county"],
df[row,"state"],
df[row,"city"],
people_to_draw,
EDGE_M
)
filen = sprintf("figures/%03d_%s_%s.png",
row,
df[row,"county"],
df[row,"state"])
spdf = pdf[1:people_to_draw,]
p = ggplot(spdf,aes(x = x,y = y))
p = p + geom_point(size = PS) +
xlim(c(0,EDGE_M)) +
ylim(c(0,EDGE_M)) +
theme(aspect.ratio = 1) +
theme(plot.title = element_text(size = rel(1))) +
labs(x = NULL,y = NULL,title = title)
#ggsave(filename=filen,plot=p,width=4, height=4,dpi=250)
p
}
#Animate thanks to Yuhui
ani.options(convert = "C:\\ImageMagick\\convert.exe",
interval = 1)
saveGIF({
for (i in 1:CUTOFF) {
print(draw_a_row(i))
ani.pause()
}
}, movie.name = paste(EDGE_M,"pop_dens.gif",sep = "_"),
img.name = "onehui",clean = FALSE)
#Create the leading graph
p = ggplot(df[1:CUTOFF,],aes(
x = 1:CUTOFF,y = density_mi,color = state,group = 1
))
p = p + geom_point(size = 4)
p = p + scale_x_discrete(labels = df$county[1:CUTOFF])
p = p + scale_y_continuous(breaks = seq(10000,80000,by = 10000))
p = p + theme(
plot.title = element_text(size = rel(1)),
legend.position = "none",
axis.text.x = element_text(angle = 90, vjust = 0.5)
) +
labs(x = "Counties Ranked by Density",y = "People Per Square Mile")
p
ggsave(
filename = "pop_rank.png",plot = p,width = 6, height = 6
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment