Skip to content

Instantly share code, notes, and snippets.

@andybega
Last active December 17, 2015 02:09
Show Gist options
  • Save andybega/5533454 to your computer and use it in GitHub Desktop.
Save andybega/5533454 to your computer and use it in GitHub Desktop.
Plot Duke average GPA from 1932 on
library(XML)
library(plyr)
# Get and format html file
duke.html <- readLines("http://www.gradeinflation.com/Duke.html")
duke.doc <- htmlParse(duke.html)
# Get table as data frame
duke <- readHTMLTable(duke.doc, header=F, as.data.frame=F)
duke <- data.frame(duke, stringsAsFactors=F)
colnames(duke) <- c("year", "gpa")
# Format columns
duke$year <- as.numeric(duke$year)
duke$gpa <- as.numeric(ifelse(duke$gpa=="n.d.", NA, duke$gpa))
# A couple of years are missing, fix this
years_covered <- seq(min(duke$year), max(duke$year))
missing_years <- years_covered[!years_covered %in% duke$year]
missing_years <- data.frame(year=missing_years, gpa=NA)
duke <- rbind(duke, missing_years)
duke <- duke[order(duke$year), ]
# Plot
plot(duke$year, duke$gpa, ylim=c(2, 4), type="p", pch=20, xaxt="n", xlab="year",
ylab="GPA")
x_ticks <- seq(round_any(min(years_covered), 10),
round_any(max(years_covered), 10), 10)
axis(1, at=x_ticks)
grid(col="gray50", lty=3)
# Add spline for missing values
duke$gpa_inter <- spline(duke$gpa, n=length(duke$year))$y
lines(duke$year, duke$gpa_inter, col="gray80")
# Year to year change
plot(duke$year[2:dim(duke)[1]], diff(duke$gpa_inter), col="gray50", pch=20,
xlab="year", ylab="Change from prev. year", type="h", lwd=2, xaxt="n")
axis(1, at=x_ticks)
grid(col="gray50", lty=3)
abline(h=0, col="red")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment