Skip to content

Instantly share code, notes, and snippets.

@hafen
Created March 31, 2015 08:11
Show Gist options
  • Save hafen/bcb77bbd6c6d23447a05 to your computer and use it in GitHub Desktop.
Save hafen/bcb77bbd6c6d23447a05 to your computer and use it in GitHub Desktop.
datadr quick start with RHIPE backend
library(Rhipe)
rhinit()
rhoptions(zips = "/ln/share/RhipeLib.tar.gz")
rhoptions(runner = "sh ./RhipeLib/library/Rhipe/bin/RhipeMapReduce.sh")
rhoptions(file.types.remove.regex = "(/_meta|/_rh_meta|/_outputs|/_SUCCESS|/_LOG|/_log|rhipe_debug|rhipe_merged_index_db)")
hdfs.setwd("/ln/rhafen")
library(datadr)
housing <- drRead.csv(hdfsConn("/tmp/housing/", type = "text"),
output = hdfsConn("housing_raw"),
colClasses = c(rep("character", 3), "Date", rep("numeric", 3)))
byCounty <- divide(housing,
by = c("county", "state"), update = TRUE,
output = hdfsConn("by_county"))
byCounty
summary(byCounty)
priceQ <- drQuantile(byCounty, var = "medListPriceSqft")
xyplot(q ~ fval, data = priceQ, scales = list(y = list(log = 10)))
byCounty[[1]]
lmCoef <- function(x)
coef(lm(medListPriceSqft ~ time, data = x))[2]
byCountySlope <- addTransform(byCounty, lmCoef)
byCountySlope[[1]]
countySlopes <- recombine(byCountySlope, combRbind)
head(countySlopes)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment