Created
May 21, 2012 23:58
-
-
Save wch/2765515 to your computer and use it in GitHub Desktop.
R meetup ggplot2 + plyr presentation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Useful online resources | |
# ggplot2 website: http://had.co.nz/ggplot2/ | |
# Mailing list: https://groups.google.com/forum/?fromgroups#!forum/ggplot2 | |
# Cookbook for R: http://wiki.stdout.org/rcookbook/ | |
install.packages('ggplot2') | |
# If you already have it installed, make sure it's up-to-date | |
update.packages() | |
library(ggplot2) | |
# ===== Demonstrate some basic plots ===== | |
# View the built-in mtcars dataset | |
mtcars | |
# Scatterplot | |
ggplot(data = mtcars, mapping = aes(x = wt, y = mpg)) + geom_point() | |
# Same, but with implicit arguments names | |
ggplot(mtcars, aes(x = wt, y = mpg)) + geom_point() | |
# Histogram | |
ggplot(mtcars, aes(x = wt)) + geom_histogram(binwidth=.25) | |
# View the pressure dataset | |
pressure | |
# Points | |
ggplot(pressure, aes(x = temperature, y = pressure)) + geom_point() | |
# Points with lines (this has multiple _layers_) | |
ggplot(pressure, aes(x = temperature, y = pressure)) + geom_point() + geom_line() | |
# Bar graph (with continous x axis) | |
ggplot(pressure, aes(x = temperature, y = pressure)) + geom_bar(stat = "identity") | |
# Bar graph with discrete x axis doesn't require stat="identity" | |
# continuous/numeric vs. discrete/categorical/factor variables are treated very differently! | |
str(mtcars) | |
str(pressure) | |
str(PlantGrowth) | |
# View the dataset | |
ToothGrowth | |
ggplot(PlantGrowth, aes(x = group, y = weight)) + geom_point() | |
# Wilkinson-style dot plot | |
ggplot(PlantGrowth, aes(x = group, y = weight)) + | |
geom_dotplot(binaxis="y", stackdir="center") | |
# Box plot | |
ggplot(PlantGrowth, aes(x = group, y = weight)) + geom_boxplot() | |
# ====== Setting aesthetic properties ====== | |
# Aesthetic properties include x, y, colour, fill, shape, size | |
ggplot(mtcars, aes(x = wt, y = mpg)) + geom_point() | |
ggplot(mtcars, aes(x = wt, y = mpg)) + geom_point(colour = "red") | |
ggplot(mtcars, aes(x = wt, y = mpg)) + geom_point(colour = "red", size = 4) | |
# ====== Mapping variables to aesthetic properties ====== | |
# Setting is different from mapping aesthetic properties | |
# Map disp to color | |
ggplot(mtcars, aes(x = wt, y = mpg, colour = disp)) + geom_point() | |
# Notice that you can specify mapping in the geom | |
# The geom will inherit other aesthetic properties from ggplot() | |
ggplot(mtcars, aes(x = wt, y = mpg)) + geom_point(aes(colour = disp)) | |
# Swap some of the variables around | |
ggplot(mtcars, aes(x = wt, y = disp, colour = mpg)) + geom_point() | |
# Map disp to color -- notice that you can specify mapping in the geom | |
# The geom will inherit properties from ggplot() | |
ggplot(mtcars, aes(x = wt, y = mpg)) + | |
geom_point(aes(colour = disp, shape = factor(am))) | |
# Treat cyl as discrete | |
ggplot(mtcars, aes(x = wt, y = mpg, colour = factor(cyl), shape = factor(am))) + | |
geom_point() | |
ggplot(mtcars, aes(colour = wt, y = mpg, x = factor(cyl), shape = factor(am))) + | |
geom_point() | |
# ====== Layers (using multiple geometric objects) ======= | |
# Example from before | |
ggplot(pressure, aes(x = temperature, y = pressure)) + geom_point() + geom_line() | |
# Change line to red | |
ggplot(pressure, aes(x = temperature, y = pressure)) + | |
geom_point(colour = "darkblue", size = 4) + | |
geom_line(colour = "red") | |
# Put points on layer above line | |
ggplot(pressure, aes(x = temperature, y = pressure)) + | |
geom_line(colour = "red") + | |
geom_point(colour = "darkblue", size = 4) | |
# Violin plot with dot plot | |
ggplot(PlantGrowth, aes(x = group, y = weight)) + geom_violin() + | |
geom_dotplot(binaxis="y", stackdir="center", alpha=.4) | |
# ====== Grouping on variables ====== | |
# Map cyl to color | |
ggplot(mtcars, aes(x = wt, y = mpg, colour = cyl)) + geom_point() | |
# Map cyl to color, but treat is as a discrete variable | |
ggplot(mtcars, aes(x = wt, y = mpg, colour = factor(cyl))) + geom_point() | |
# Use facets with a variable | |
ggplot(mtcars, aes(x = wt, y = mpg)) + geom_point() + | |
facet_grid(cyl ~ .) | |
# Faceting on two variables | |
ggplot(mtcars, aes(x = wt, y = mpg)) + geom_point() + | |
facet_grid(cyl ~ am) | |
# ====== Stats (transformations on data) ======= | |
p <- ggplot(mtcars, aes(x = wt, y = mpg)) + geom_point() | |
# LOESS regression | |
p + stat_smooth() | |
# Linear regression | |
p + stat_smooth(method = "lm") | |
p + stat_smooth(method = "lm", se = FALSE) # Without confidence region | |
ggplot(mtcars, aes(x = wt, y = mpg, colour = factor(cyl))) + geom_point() + | |
stat_smooth(method = "lm", se = FALSE) | |
ggplot(mtcars, aes(x = wt, y = mpg)) + geom_point() + | |
stat_smooth(method = "lm", se = FALSE) + | |
facet_grid(cyl ~ .) | |
# ====== The Grammar of Graphics ======= | |
# ====== Using reshape2 and plyr ====== | |
# With ggplot2, data is always in a data frame, with each column | |
# representing a separate variable. | |
# Sometimes the challenge is to get it in this format. | |
USJudgeRatings | |
# Take a subset | |
usj <- USJudgeRatings[1:6, 1:4] | |
usj | |
# == Convert to long format == | |
library(reshape2) | |
melt(usj) | |
# We lost the names because they were row names instead of a data column | |
# Let's save the row names | |
usj | |
usj$name <- rownames(usj) | |
usj # Notice new column | |
# Now it looks good | |
melt(usj) | |
# Explicitly set the id variable(s), and specify column names | |
melt(usj, id.vars = "name", variable.name = "measure", value.name = "number") | |
# Get help with ?melt.data.frame | |
# Now plot it | |
usjw <- melt(usj, id.vars = "name", variable.name = "measure", value.name = "number") | |
ggplot(usjw, aes(x = measure, y = number)) + geom_violin() | |
# == Convert to wide format == | |
dcast(usjw, name ~ measure, value.var = "number") | |
# ====== Data transformations with plyr ====== | |
library(plyr) | |
# == Summarizing by group == | |
# Basic example of summarise | |
summarise(usjw, m = mean(number)) | |
summarise(usjw, m = mean(number), s = sum(number)) | |
# How to summarise each group, split on 'measure' | |
ddply(usjw, .(measure), summarise, m = mean(number)) | |
ddply(usjw, .(measure), summarise, m = mean(number), n = length(number)) | |
# Summarizing splitting on 'name' and 'measure' (but it's not very interesting) | |
ddply(usjw, .(name, measure), summarise, m = mean(number), n = length(number)) | |
# == Transforming by group == | |
# Basic example of transform | |
transform(usjw, minus5 = number - 5) | |
transform(usjw, m = mean(number)) | |
transform(usjw, normalized = number - mean(number)) | |
# How to transform each group, split | |
ddply(usjw, .(measure), transform, m = mean(number), normalized = number - mean(number)) | |
ddply(usjw, .(measure), transform, m = mean(number), normalized = number - mean(number)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment