Last active
December 10, 2019 17:22
-
-
Save jmcastagnetto/59c26c2624e621a13582 to your computer and use it in GitHub Desktop.
Reproducing the analysis and graph in http://www.tylervigen.com/view_correlation?id=3890
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This is quick and dirty code that tries to reproduce the analysis and graph from the post: | |
# "Per capita consumption of mozzarella cheese (US) correlates with | |
# Civil engineering doctorates awarded (US)" | |
# URL: http://www.tylervigen.com/view_correlation?id=3890 | |
# get the data from the article | |
library(XML) | |
url <- "http://www.tylervigen.com/view_correlation?id=3890" | |
tables <- readHTMLTable(url) | |
# we need table 3, minus a couple of extra columns | |
tab3 <- tables[[3]][1:2,-c(1,12)] | |
years <- as.numeric(colnames(tab3)) | |
# per capita consumption of mozzarella | |
cheese <- as.numeric(t(tab3)[,1]) | |
# civil engineering doctorates | |
engdoc <-as.numeric(t(tab3)[,2]) | |
# code stolen+adapted from http://robjhyndman.com/hyndsight/r-graph-with-two-y-axes/ | |
par(mar=c(5,4,4,5)+.1) | |
plot(years, cheese, type="b", lty="dashed", col="blue") | |
par(new=TRUE) | |
plot(years, engdoc, type="b", col="red",xaxt="n",yaxt="n",xlab="",ylab="") | |
axis(4) | |
mtext("engdoc",side=4,line=3) | |
legend("topleft",col=c("blue","red"),lty=1,legend=c("cheese","engdoc")) | |
# calculate the correlation | |
cor(cheese, engdoc) | |
# Made to answer a question from a friend. (Hi César!) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment