Skip to content

Instantly share code, notes, and snippets.

@wch
Created May 21, 2012 23:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wch/2765515 to your computer and use it in GitHub Desktop.
Save wch/2765515 to your computer and use it in GitHub Desktop.
R meetup ggplot2 + plyr presentation
# Useful online resources
# ggplot2 website: http://had.co.nz/ggplot2/
# Mailing list: https://groups.google.com/forum/?fromgroups#!forum/ggplot2
# Cookbook for R: http://wiki.stdout.org/rcookbook/
install.packages('ggplot2')
# If you already have it installed, make sure it's up-to-date
update.packages()
library(ggplot2)
# ===== Demonstrate some basic plots =====
# View the built-in mtcars dataset
mtcars
# Scatterplot
ggplot(data = mtcars, mapping = aes(x = wt, y = mpg)) + geom_point()
# Same, but with implicit arguments names
ggplot(mtcars, aes(x = wt, y = mpg)) + geom_point()
# Histogram
ggplot(mtcars, aes(x = wt)) + geom_histogram(binwidth=.25)
# View the pressure dataset
pressure
# Points
ggplot(pressure, aes(x = temperature, y = pressure)) + geom_point()
# Points with lines (this has multiple _layers_)
ggplot(pressure, aes(x = temperature, y = pressure)) + geom_point() + geom_line()
# Bar graph (with continous x axis)
ggplot(pressure, aes(x = temperature, y = pressure)) + geom_bar(stat = "identity")
# Bar graph with discrete x axis doesn't require stat="identity"
# continuous/numeric vs. discrete/categorical/factor variables are treated very differently!
str(mtcars)
str(pressure)
str(PlantGrowth)
# View the dataset
ToothGrowth
ggplot(PlantGrowth, aes(x = group, y = weight)) + geom_point()
# Wilkinson-style dot plot
ggplot(PlantGrowth, aes(x = group, y = weight)) +
geom_dotplot(binaxis="y", stackdir="center")
# Box plot
ggplot(PlantGrowth, aes(x = group, y = weight)) + geom_boxplot()
# ====== Setting aesthetic properties ======
# Aesthetic properties include x, y, colour, fill, shape, size
ggplot(mtcars, aes(x = wt, y = mpg)) + geom_point()
ggplot(mtcars, aes(x = wt, y = mpg)) + geom_point(colour = "red")
ggplot(mtcars, aes(x = wt, y = mpg)) + geom_point(colour = "red", size = 4)
# ====== Mapping variables to aesthetic properties ======
# Setting is different from mapping aesthetic properties
# Map disp to color
ggplot(mtcars, aes(x = wt, y = mpg, colour = disp)) + geom_point()
# Notice that you can specify mapping in the geom
# The geom will inherit other aesthetic properties from ggplot()
ggplot(mtcars, aes(x = wt, y = mpg)) + geom_point(aes(colour = disp))
# Swap some of the variables around
ggplot(mtcars, aes(x = wt, y = disp, colour = mpg)) + geom_point()
# Map disp to color -- notice that you can specify mapping in the geom
# The geom will inherit properties from ggplot()
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point(aes(colour = disp, shape = factor(am)))
# Treat cyl as discrete
ggplot(mtcars, aes(x = wt, y = mpg, colour = factor(cyl), shape = factor(am))) +
geom_point()
ggplot(mtcars, aes(colour = wt, y = mpg, x = factor(cyl), shape = factor(am))) +
geom_point()
# ====== Layers (using multiple geometric objects) =======
# Example from before
ggplot(pressure, aes(x = temperature, y = pressure)) + geom_point() + geom_line()
# Change line to red
ggplot(pressure, aes(x = temperature, y = pressure)) +
geom_point(colour = "darkblue", size = 4) +
geom_line(colour = "red")
# Put points on layer above line
ggplot(pressure, aes(x = temperature, y = pressure)) +
geom_line(colour = "red") +
geom_point(colour = "darkblue", size = 4)
# Violin plot with dot plot
ggplot(PlantGrowth, aes(x = group, y = weight)) + geom_violin() +
geom_dotplot(binaxis="y", stackdir="center", alpha=.4)
# ====== Grouping on variables ======
# Map cyl to color
ggplot(mtcars, aes(x = wt, y = mpg, colour = cyl)) + geom_point()
# Map cyl to color, but treat is as a discrete variable
ggplot(mtcars, aes(x = wt, y = mpg, colour = factor(cyl))) + geom_point()
# Use facets with a variable
ggplot(mtcars, aes(x = wt, y = mpg)) + geom_point() +
facet_grid(cyl ~ .)
# Faceting on two variables
ggplot(mtcars, aes(x = wt, y = mpg)) + geom_point() +
facet_grid(cyl ~ am)
# ====== Stats (transformations on data) =======
p <- ggplot(mtcars, aes(x = wt, y = mpg)) + geom_point()
# LOESS regression
p + stat_smooth()
# Linear regression
p + stat_smooth(method = "lm")
p + stat_smooth(method = "lm", se = FALSE) # Without confidence region
ggplot(mtcars, aes(x = wt, y = mpg, colour = factor(cyl))) + geom_point() +
stat_smooth(method = "lm", se = FALSE)
ggplot(mtcars, aes(x = wt, y = mpg)) + geom_point() +
stat_smooth(method = "lm", se = FALSE) +
facet_grid(cyl ~ .)
# ====== The Grammar of Graphics =======
# ====== Using reshape2 and plyr ======
# With ggplot2, data is always in a data frame, with each column
# representing a separate variable.
# Sometimes the challenge is to get it in this format.
USJudgeRatings
# Take a subset
usj <- USJudgeRatings[1:6, 1:4]
usj
# == Convert to long format ==
library(reshape2)
melt(usj)
# We lost the names because they were row names instead of a data column
# Let's save the row names
usj
usj$name <- rownames(usj)
usj # Notice new column
# Now it looks good
melt(usj)
# Explicitly set the id variable(s), and specify column names
melt(usj, id.vars = "name", variable.name = "measure", value.name = "number")
# Get help with ?melt.data.frame
# Now plot it
usjw <- melt(usj, id.vars = "name", variable.name = "measure", value.name = "number")
ggplot(usjw, aes(x = measure, y = number)) + geom_violin()
# == Convert to wide format ==
dcast(usjw, name ~ measure, value.var = "number")
# ====== Data transformations with plyr ======
library(plyr)
# == Summarizing by group ==
# Basic example of summarise
summarise(usjw, m = mean(number))
summarise(usjw, m = mean(number), s = sum(number))
# How to summarise each group, split on 'measure'
ddply(usjw, .(measure), summarise, m = mean(number))
ddply(usjw, .(measure), summarise, m = mean(number), n = length(number))
# Summarizing splitting on 'name' and 'measure' (but it's not very interesting)
ddply(usjw, .(name, measure), summarise, m = mean(number), n = length(number))
# == Transforming by group ==
# Basic example of transform
transform(usjw, minus5 = number - 5)
transform(usjw, m = mean(number))
transform(usjw, normalized = number - mean(number))
# How to transform each group, split
ddply(usjw, .(measure), transform, m = mean(number), normalized = number - mean(number))
ddply(usjw, .(measure), transform, m = mean(number), normalized = number - mean(number))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment