Skip to content

Instantly share code, notes, and snippets.

@yabyzq
Last active September 16, 2016 13:11
Show Gist options
  • Save yabyzq/cdf0ddf2c6f16320a8bfc453ed620cdb to your computer and use it in GitHub Desktop.
Save yabyzq/cdf0ddf2c6f16320a8bfc453ed620cdb to your computer and use it in GitHub Desktop.
R Tutorial - qplot
library(ggplot2)
#Get a small proportion to plot
dsample <- diamonds[sample(nrow(diamonds),500),]
#qplot(x, y, data)
qplot(carat,price,data=dsample)
#Add more measures
qplot(carat,price,data=dsample, color = color, shape =cut)
#Resize I - adjust size
qplot(carat,price,data=dsample, color = color, shape =cut, size = I(3))
#Avoid overlapping
qplot(carat,price,data=dsample, color = color, shape =cut, size = I(3), alpha = I(3/10))
#Trends and stats
qplot(carat,price,data=dsample, size = I(3), alpha = I(3/10), geom = c("point", "smooth"))
#remove confidence range
qplot(carat,price,data=dsample, size = I(3), alpha = I(3/10), geom = c("point", "smooth"), se=FALSE)
#Overfitting
qplot(carat,price,data=dsample, size = I(3), alpha = I(3/10), geom = c("point", "smooth"), se=FALSE, span=0.1)
#Using model
qplot(carat,price,data=dsample, size = I(3), alpha = I(3/10), geom = c("point", "smooth"),method="lm")
#Not only scatter plot
qplot(color, carat, data = diamonds, geom = "jitter")
#see density
qplot(color, price / carat, data = diamonds, geom = "jitter", alpha = I(1/15))
#Stats version
qplot(color, carat, data = diamonds, geom = "boxplot", color=color)
#histogram version
qplot(carat, data = diamonds, geom = "histogram")
#setup binwidth
qplot(carat, data = diamonds, geom = "histogram", binwidth=0.1)
#Adjust the range
qplot(carat, data = diamonds, geom = "histogram", binwidth=0.1, xlim = c(0,3))
#Combine
qplot(carat, data = diamonds, geom = "histogram", breaks=seq(1, 3, by=0.1))
#Smoother version
qplot(carat, data = diamonds, geom = "histogram", binwidth=0.01, xlim = c(0,3))
#add color dimension
qplot(carat, data = diamonds, geom = "histogram", binwidth=0.1, xlim = c(0,3),fill=color)
#Using distribution rather than count
qplot(carat, data = diamonds, geom = "density")
#Add color
qplot(carat, data = diamonds, geom = "density", fill=color)
#only border
qplot(carat, data = diamonds, geom = "density", color=color)
#Try multiple chart
qplot(carat, data = diamonds, facets = color ~ ., geom = "histogram", binwidth = 0.1, xlim = c(0,3))
#Use density
qplot(carat, ..density.., data = diamonds, facets = color ~ .,geom = "histogram", binwidth = 0.1, xlim = c(0,3))
#Not only static data, Time series
qplot(date, unemploy / pop, data = economics, geom = "line", ylab = 'umemployment ratio')
qplot(date, uempmed, data = economics, geom = "line", ylab = 'weeks in unemployment')
#Traditional method
par(mfrow = c(1, 2))
plot(economics$date,economics$unemploy/economics$pop,type="l")
plot(economics$date,economics$uempmed,type="l")
#use Grid library
p1 <- qplot(date, unemploy / pop, data = economics, geom = "line")
p2 <- qplot(date, uempmed, data = economics, geom = "line")
library(grid)
library(gridExtra)
grid.arrange(p1, p2, ncol = 2)
#linear relationship
qplot(unemploy / pop, uempmed, data = economics, geom = "point")
qplot(unemploy / pop, uempmed, data = economics, geom = c("point", "smooth"), se=FALSE, size = I(3))
#Longer time to find a job, what does this mean?
year <- function (x) as.POSIXlt(x)$year + 1900
qplot(unemploy / pop, uempmed, data = economics, geom = "point", color = year(date),
size=I(4),alpha = I(1/2),xlab='unemployment rate',ylab = 'weeks in unemployment')
#GGplot
ggplot(diamonds, aes(x = carat)) + layer(geom = "bar", geom_params = list(fill = "steelblue"),
stat = "bin", stat_params = list(binwidth = 0.5))
ggplot(diamonds, aes(x = carat)) + geom_histogram(binwidth = 0.5, fill = "steelblue")
p <- ggplot(diamonds, aes(x = carat))
p
p <- p+ geom_histogram(binwidth = 0.5, fill = "steelblue")
p
summary(p)
library(scales)
bestfit <- geom_smooth(method = "lm", se = F, color = alpha("steelblue", 0.5), size = 2)
qplot(sleep_rem, sleep_total, data = msleep) + bestfit
qplot(awake, brainwt, data = msleep, log = "y") + bestfit
p <- ggplot(mtcars, aes(mpg, wt, color = cyl)) + geom_point()
p
mtcars <- transform(mtcars, mpg = mpg ^ 2)
p %+% mtcars
p <- ggplot(mtcars, aes(x = mpg, y = wt))
p + geom_point()
p + geom_point(aes(colour = factor(cyl)))
p + geom_point(aes(y = disp))
p <- ggplot(mtcars, aes(mpg,wt))
p + geom_point(color = "darkblue")#p+geom_point(aes(color="darkblue"))
ggplot(diamonds, aes(carat)) + geom_histogram(aes(y= ..density..), bindwidth = 0.1)
d <- ggplot(diamonds, aes(carat)) + xlim(0, 3)
d + stat_bin(aes(ymax = ..count..), binwidth = 0.1, geom = "area")
d + stat_bin(aes(size = ..density..), binwidth = 0.1, geom = "point", position = "identity")
d + stat_bin(aes(y = 1, fill = ..count..), binwidth = 0.1, geom = "tile", position = "identity")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment