Instantly share code, notes, and snippets.

Embed
What would you like to do?
# Creepypasta.com stories, Votes vs. Rating
# http://www.everydayanalytics.ca/2014/02/creepypasta-learning-ggplot.html
library(ggplot2)
# Read in the data
data <- read.csv(file = 'creepypasta_ratings.csv', header=T, sep=',')
# Base Package
# Plot
plot(data$Rating, data$Votes, pch=16, cex.main=1, cex.axis=0.8, cex=0.8, col=rgb(0,0,1,0.25), log='y',
ylab='Votes', xlab='Rating', main='Creepypasta Stories, Votes vs. Ratings')
# Fit second order polynomial
l <- lm(data$Votes ~ data$Rating + I(data$Rating^2))
points(data$Rating, predict(l), type='l')
# equivalent in ggplot
gplot <- ggplot(data, aes(Rating, log(Votes))) +
geom_point(col=rgb(0,0,1,0.25), pch=16, cex=2) +
geom_smooth(method="lm", formula=y~poly(x,2)) +
labs(title="Creepypasta Stories, Votes vs. Ratings") +
theme_bw() +
theme(axis.text=element_text(size=14), axis.title=element_text(size=14), plot.title=element_text(size=16, face="bold"))
gplot
# Data density with hexbin
hexbin <- ggplot(data, aes(Rating, log(Votes))) +
stat_binhex() + theme_bw() +
theme(axis.text=element_text(size=14), axis.title=element_text(size=14))
hexbin
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment