Skip to content

Instantly share code, notes, and snippets.

@mittenchops
Created May 1, 2014 14:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mittenchops/09aee25e5041f567729d to your computer and use it in GitHub Desktop.
Save mittenchops/09aee25e5041f567729d to your computer and use it in GitHub Desktop.
College-age-population from census
library(XML)
#url = paste("http://www.census.gov/population/international/data/idb/region.php?N=%20Results%20&T=15&A=separate&RT=0&Y=","1980","&R=-1&C=US", sep="")
years = seq(1980,2030,1)
urls = paste("http://www.census.gov/population/international/data/idb/region.php?N=%20Results%20&T=15&A=separate&RT=0&Y=",years,"&R=-1&C=US", sep="")
# INITIALIZING
dd <- readHTMLTable(urls[1],header=T, stringsAsFactors=F)[[1]]
x <- gsub(",", "", dd[dd$Age %in% c(18,19,20,21),3])
year <- dd[1,1]
college_age<- sum(as.numeric(x))
print(c(year,college_age))
dat <- data.frame(year, college_age)
# NOW FOR ALL
for (i in seq(2,51)){
dd <- readHTMLTable(urls[i],header=T, stringsAsFactors=F)[[1]]
x <- gsub(",", "", dd[dd$Age %in% c(18,19,20,21),3])
year <- dd[1,1]
college_age<- sum(as.numeric(x))
print(c(year,college_age))
dat <- rbind(dat,data.frame(year,college_age))
}
college_age <- c(
# START 1980
17386534,
17341780,
17247510,
16942353,
16498293,
16006873,
15506789,
15243414,
15306448,
15549318,
15588574,
15236959,
14746012,
14317793,
14118348,
14285144,
14560048,
14878046,
15411520,
15776626,
16193746,
16525402,
16578517,
16649489,
16768581,
16873446,
17073589,
17229801,
17568600,
17837266,
18015350,
18100420,
17947995,
17652535,
17363580, # 2014
NA, # 2015
NA, # 2016
NA, # 2017
NA, # 2018
NA, # 2019
NA # 2020
# END 2020
)
fcast_age <- c(
# START 1980
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
17363580, # 2014
17154541, # 2015
16995028, # 2016
16916891, # 2017
16995547, # 2018
17040619, # 2019
16995030 # 2020
# END 2020
)
df <- data.frame(
date=seq(1980,2020),
college=college_age/1000000,
forecast=fcast_age/1000000)
# Make the plot object
cplot <- ggplot(df, aes(x=date, y=college)) +
theme_bw() +
geom_point(aes(y = college), colour="black", size=2) +
geom_line(aes(y=college), size=1) +
geom_line(aes(y=forecast), size=0.5,linetype=2) +
xlab("Year") +
ylab("Millions of Students") +
ggtitle("College Age Population by Year (in Millions)") +
annotate("text", label="Bottom (1994)", x=2005, y=14, size=3) +
annotate("segment", x=2003, y=14, xend=1995, yend=14.1, size=0.75,
arrow=arrow(length=unit(.2, "cm")))
ggsave(filename="students.png", cplot, height=5, width=12, units="in")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment