Skip to content

Instantly share code, notes, and snippets.

@joshbode
Last active December 11, 2015 17:58
Show Gist options
  • Save joshbode/4638136 to your computer and use it in GitHub Desktop.
Save joshbode/4638136 to your computer and use it in GitHub Desktop.
SQL analytic-like functions in R
library(plyr)
# compute columns - similar to transform and plyr::mutate
compute = function(.data, ..., .append=FALSE) {
cols = as.list(substitute(list(...))[-1])
cols = cols[names(cols) != '']
env = parent.frame()
# evaluate the columns
for (name in names(cols)) {
.data[[name]] = eval(cols[[name]], .data, env)
}
if (!.append) {
# copy input data to result if required in output
.data = .data[names(cols)]
}
return(.data)
}
# test data
n = 10000
d = data.frame(x=0:(n-1) %/% 5 + 1, y=sample(1:(2*n), n), z=trunc(runif(n, 1, 4)))
# rank the data, partitioned by x, ordered by z, y
print(system.time({
r = unlist(dlply(d, .(x), function(r) {
order(with(r, order(z, y)))
}))
}))
# same, but with additional calculations, returning all cols in d
print(system.time({
d = ddply(d, .(x), transform,
r=order(order(z, y)),
s=order(order(y)),
u=max(y)
)
}))
print(system.time({
d = ddply(d, .(x), mutate,
r=order(order(z, y)),
s=order(order(y)),
u=max(y)
)
}))
print(system.time({
d = ddply(d, .(x), compute,
r=order(order(z, y)),
s=order(order(y)),
u=min(y),
l=u
)
}))
print(system.time({
d = ddply(d, .(x), compute,
r=order(order(z, y)),
s=order(order(y)),
u=min(y),
.append=TRUE
)
}))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment