Skip to content

Instantly share code, notes, and snippets.

@renkun-ken
Last active August 29, 2015 14:06
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save renkun-ken/5c3acfc699ce75e2df8a to your computer and use it in GitHub Desktop.
Save renkun-ken/5c3acfc699ce75e2df8a to your computer and use it in GitHub Desktop.
A Pipe version with recommended practice

A Pipe version of Zev Ross.

An alternative version of timelyportfolio's and smbache's.

If you are not familiar with pipeR, you can read the still-in-progress pipeR tutorial. The best practices of pipeR's Pipe():

  • Keep side effects as small as possible, use it only when necessary.
  • Separate Pipes into concentrated pieces.
  • Don't avoid necessary intermediate variables.
  • The purpose is always readability, not throwing everything into a big pipe.

After all, using pipeline does not automatically make your code better!.

library(pipeR)
library(dplyr)
library(ggplot2)
library(mgcv)
# get the data ready
pdata <- Pipe("http://zevross.com/blog/wp-content/uploads/2014/08/chicago-nmmaps.csv")$
read.csv(as.is = TRUE)$
mutate(date = as.Date(date))$
filter(date > as.Date("1996-12-31"))$
mutate(year = substring(date,1,4))
# get a gam in Pipe
pgam <- pdata$
gam(formula = o3~s(temp), family = gaussian())
# make a plot
pgam$plot(residuals = TRUE, main="Yuck, not a nice plot")
# get the new data in Pipe
pnewdata <- pdata$
with(data.frame(temp = seq(min(temp),max(temp),length=300)))
# make some predictions with new data
pred <- pgam$
predict(type = "terms", newdata = pnewdata$value, se.fit=TRUE)$
value
# draw a blank board
pnewdata$.(temp)$
plot(pred$fit, type="n", lwd=3, xlim=c(-3,90), ylim=c(-20,30),
main="Ahhh, definitely better",
ylab=sprintf("s(temp, %f)",pgam$with(edf[-1])$sum()$round(2)$value))
# draw the confidence grey polygon
pnewdata$with(c(temp, rev(temp)))$
polygon(with(pred, c(fit+1.96*se.fit, rev(fit-1.96*se.fit))),
col="grey", border=NA)
# draw fit lines
pnewdata$.(temp)$lines(pred$fit, lwd=2)
# draw points
pdata$.(temp)$
points(pgam$predict(type="terms")$value + pgam$residuals()$value,
pch=16, col=rgb(0, 0, 1, 0.25))
# draw rug
pdata$.(temp)$rug()
library(pipeR)
library(dplyr)
library(ggplot2)
library(mgcv)
# get the data ready
pdata <- Pipe("http://zevross.com/blog/wp-content/uploads/2014/08/chicago-nmmaps.csv")$
read.csv(as.is = TRUE)$
mutate(date = as.Date(date))$
filter(date > as.Date("1996-12-31"))$
mutate(year = substring(date,1,4))
# get a gam in Pipe
pgam <- pdata$gam(formula = o3~s(temp), family = gaussian())
pgam$plot(residuals = TRUE, main="Yuck, not a nice plot")
# get the new data in Pipe
pnewdata <- pdata$with(data.frame(temp = seq(min(temp),max(temp),length=300)))
# make some predictions with new data
pred <- pgam$
predict(type = "terms", newdata = pnewdata$value, se.fit=TRUE)$
value
pnewdata$.(temp)$.({
plot(., pred$fit, type="n", lwd=3, xlim=c(-3,90), ylim=c(-20,30),
main="Ahhh, definitely better",
ylab=sprintf("s(temp, %f)",pgam$with(edf[-1])$sum()$round(2)$value))
polygon(c(.,rev(.)),
with(pred, fit+1.96*se.fit, rev(fit-1.96*se.fit)),
col="grey", border=NA)
lines(., pred$fit, lwd=2)
})
pdata$.(temp)$.({
points(.,
pgam$predict(type="terms")$value + pgam$residuals()$value,
pch=16, col=rgb(0, 0, 1, 0.25))
rug(.)
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment