Skip to content

Instantly share code, notes, and snippets.

@kenthzhang
Created April 26, 2017 21:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kenthzhang/e701f1235b5a81ef8c62b4ebf7960df2 to your computer and use it in GitHub Desktop.
Save kenthzhang/e701f1235b5a81ef8c62b4ebf7960df2 to your computer and use it in GitHub Desktop.
cc <- rxSparkConnect(reset = TRUE)
hdfsFileSystem <- RxHdfsFileSystem()
colInfo <- list(
DayOfWeek = list(
type = "factor",
levels = c(
"Monday",
"Tuesday",
"Wednesday",
"Thursday",
"Friday",
"Saturday",
"Sunday")))
parquetData <- RxParquetData(file = "/share/AirlineDemoSmall/AirlineDemoSmallParquet", fileSystem = hdfsFileSystem, colInfo = colInfo)
.Param <- function(keys, data, arg, anotherArg)
{
df <- rxImport(data)
rows <- nrow(df)
if (rows > arg)
{
warning(paste("rows >", arg))
}
paste(anotherArg[[keys[[1]]]], "has", rows, "rows")
}
params <- list(arg = 90000, anotherArg = list("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"))
result <- rxExecBy(inData = xdfData, keys = c("DayOfWeek"), func = .Param, funcParams = params)
str(result)
#List of 7
# $ :List of 3
# ..$ keys :List of 1
# .. ..$ : Factor w/ 7 levels "Monday","Tuesday",..: 3
# ..$ result: chr "Wednesday has 78875 rows"
# ..$ status:List of 3
# .. ..$ : chr "OK"
# .. ..$ : NULL
# .. ..$ : NULL
# $ :List of 3
# ..$ keys :List of 1
# .. ..$ : Factor w/ 7 levels "Monday","Tuesday",..: 5
# ..$ result: chr "Friday has 82987 rows"
# ..$ status:List of 3
# .. ..$ : chr "OK"
# .. ..$ : NULL
# .. ..$ : NULL
# $ :List of 3
# ..$ keys :List of 1
# .. ..$ : Factor w/ 7 levels "Monday","Tuesday",..: 4
# ..$ result: chr "Thursday has 81304 rows"
# ..$ status:List of 3
# .. ..$ : chr "OK"
# .. ..$ : NULL
# .. ..$ : NULL
# $ :List of 3
# ..$ keys :List of 1
# .. ..$ : Factor w/ 7 levels "Monday","Tuesday",..: 6
# ..$ result: chr "Saturday has 86159 rows"
# ..$ status:List of 3
# .. ..$ : chr "OK"
# .. ..$ : NULL
# .. ..$ : NULL
# $ :List of 3
# ..$ keys :List of 1
# .. ..$ : Factor w/ 7 levels "Monday","Tuesday",..: 1
# ..$ result: chr "Monday has 97975 rows"
# ..$ status:List of 3
# .. ..$ : chr "OK"
# .. ..$ : NULL
# .. ..$ : chr "rows > 90000"
# $ :List of 3
# ..$ keys :List of 1
# .. ..$ : Factor w/ 7 levels "Monday","Tuesday",..: 2
# ..$ result: chr "Tuesday has 77725 rows"
# ..$ status:List of 3
# .. ..$ : chr "OK"
# .. ..$ : NULL
# .. ..$ : NULL
# $ :List of 3
# ..$ keys :List of 1
# .. ..$ : Factor w/ 7 levels "Monday","Tuesday",..: 7
# ..$ result: chr "Sunday has 94975 rows"
# ..$ status:List of 3
# .. ..$ : chr "OK"
# .. ..$ : NULL
# .. ..$ : chr "rows > 90000"
rxSparkDisconnect(cc)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment