mylesmharrison/creepypasta1.R

## creepypasta1.R
# Creepypasta.com stories, Votes vs. Rating
# http://www.everydayanalytics.ca/2014/02/creepypasta-learning-ggplot.html

library(ggplot2)

# Read in the data
data <- read.csv(file = 'creepypasta_ratings.csv', header=T, sep=',')

# Base Package
# Plot
plot(data$Rating, data$Votes, pch=16, cex.main=1, cex.axis=0.8, cex=0.8, col=rgb(0,0,1,0.25), log='y',
     ylab='Votes', xlab='Rating', main='Creepypasta Stories, Votes vs. Ratings')

# Fit second order polynomial
l <- lm(data$Votes ~ data$Rating + I(data$Rating^2))
points(data$Rating, predict(l), type='l')

# equivalent in ggplot
gplot <- ggplot(data, aes(Rating, log(Votes))) +
  geom_point(col=rgb(0,0,1,0.25), pch=16, cex=2) +
  geom_smooth(method="lm", formula=y~poly(x,2)) +
  labs(title="Creepypasta Stories, Votes vs. Ratings") +
  theme_bw() +
  theme(axis.text=element_text(size=14), axis.title=element_text(size=14), plot.title=element_text(size=16, face="bold"))
gplot

# Data density with hexbin
hexbin <- ggplot(data, aes(Rating, log(Votes))) +
  stat_binhex() + theme_bw() +
  theme(axis.text=element_text(size=14), axis.title=element_text(size=14))
hexbin
	# Creepypasta.com stories, Votes vs. Rating
	# http://www.everydayanalytics.ca/2014/02/creepypasta-learning-ggplot.html

	library(ggplot2)

	# Read in the data
	data <- read.csv(file = 'creepypasta_ratings.csv', header=T, sep=',')

	# Base Package
	# Plot
	plot(data$Rating, data$Votes, pch=16, cex.main=1, cex.axis=0.8, cex=0.8, col=rgb(0,0,1,0.25), log='y',
	ylab='Votes', xlab='Rating', main='Creepypasta Stories, Votes vs. Ratings')

	# Fit second order polynomial
	l <- lm(data$Votes ~ data$Rating + I(data$Rating^2))
	points(data$Rating, predict(l), type='l')

	# equivalent in ggplot
	gplot <- ggplot(data, aes(Rating, log(Votes))) +
	geom_point(col=rgb(0,0,1,0.25), pch=16, cex=2) +
	geom_smooth(method="lm", formula=y~poly(x,2)) +
	labs(title="Creepypasta Stories, Votes vs. Ratings") +
	theme_bw() +
	theme(axis.text=element_text(size=14), axis.title=element_text(size=14), plot.title=element_text(size=16, face="bold"))
	gplot

	# Data density with hexbin
	hexbin <- ggplot(data, aes(Rating, log(Votes))) +
	stat_binhex() + theme_bw() +
	theme(axis.text=element_text(size=14), axis.title=element_text(size=14))
	hexbin