johnmyleswhite/gist:4195980

## gistfile1.jl
# A top priority for making DataFrames useful in Julia is the development of
# good documentation and a nice API for doing plyr+reshape style operations
# in Julia. This Gist is a draft of such documentation.

load("DataFrames")
using DataFrames

load("RDatasets")

baseball = RDatasets.data("plyr", "baseball")

baberuth = subset(baseball, :(id == "ruthba01"))
baberuth = within!(baberuth, :(cyear = year - min(year) + 1))

by(baseball, "id", df -> within(df, :(cyear = year - min(year) + 1)))
by(baseball, "id", df -> within!(df, :(cyear = year - min(year) + 1)))

baseball = subset(baseball, :(ab .>= 25))

#
# Still needs to be implemented
#

#xlim = range(baseball["cyear"], na.rm = TRUE)
#ylim = range(baseball["rbi"] ./ baseball["ab"], na.rm = TRUE)

#
# Translations needed
#

# R> model <- function(df) {lm(rbi / ab ~ cyear, data = df)}
# R> model(baberuth)

# R> bmodels <- dlply(baseball, .(id), model)

# R> rsq <- function(x) summary(x)$r.squared
# R> bcoefs <- ldply(bmodels, function(x) c(coef(x), rsquare = rsq(x)))
# R> names(bcoefs)[2:3] <- c("intercept", "slope")

# R> baseballcoef <- merge(baseball, bcoefs, by = "id")
# R> subset(baseballcoef, rsquare > 0.999)$id
	# A top priority for making DataFrames useful in Julia is the development of
	# good documentation and a nice API for doing plyr+reshape style operations
	# in Julia. This Gist is a draft of such documentation.

	load("DataFrames")
	using DataFrames

	load("RDatasets")

	baseball = RDatasets.data("plyr", "baseball")

	baberuth = subset(baseball, :(id == "ruthba01"))
	baberuth = within!(baberuth, :(cyear = year - min(year) + 1))

	by(baseball, "id", df -> within(df, :(cyear = year - min(year) + 1)))
	by(baseball, "id", df -> within!(df, :(cyear = year - min(year) + 1)))

	baseball = subset(baseball, :(ab .>= 25))

	#
	# Still needs to be implemented
	#

	#xlim = range(baseball["cyear"], na.rm = TRUE)
	#ylim = range(baseball["rbi"] ./ baseball["ab"], na.rm = TRUE)

	#
	# Translations needed
	#

	# R> model <- function(df) {lm(rbi / ab ~ cyear, data = df)}
	# R> model(baberuth)

	# R> bmodels <- dlply(baseball, .(id), model)

	# R> rsq <- function(x) summary(x)$r.squared
	# R> bcoefs <- ldply(bmodels, function(x) c(coef(x), rsquare = rsq(x)))
	# R> names(bcoefs)[2:3] <- c("intercept", "slope")

	# R> baseballcoef <- merge(baseball, bcoefs, by = "id")
	# R> subset(baseballcoef, rsquare > 0.999)$id