Skip to content

Instantly share code, notes, and snippets.

@kenthzhang
Last active April 26, 2017 05:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kenthzhang/e7859c7188897511927041f6e4339440 to your computer and use it in GitHub Desktop.
Save kenthzhang/e7859c7188897511927041f6e4339440 to your computer and use it in GitHub Desktop.
cc <- rxSparkConnect(reset = TRUE)
hdfsFileSystem <- RxHdfsFileSystem()
textData <- RxTextData(file = "/share/AirlineDemoSmall/AirlineDemoSmall.csv", missingValueString = "M", stringsAsFactors = TRUE, fileSystem = hdfsFileSystem)
.Summary <- function(keys, data)
{
df <- rxImport(data)
nrow(df)
}
# single key
result <- rxExecBy(inData = textData, keys = c("DayOfWeek"), func = .Summary)
str(result)
#List of 7
# $ :List of 3
# ..$ keys :List of 1
# .. ..$ : Factor w/ 7 levels "Monday","Sunday",..: 4
# ..$ result: int 78875
# ..$ status:List of 3
# .. ..$ : chr "OK"
# .. ..$ : NULL
# .. ..$ : NULL
# $ :List of 3
# ..$ keys :List of 1
# .. ..$ : Factor w/ 7 levels "Monday","Sunday",..: 6
# ..$ result: int 82987
# ..$ status:List of 3
# .. ..$ : chr "OK"
# .. ..$ : NULL
# .. ..$ : NULL
# $ :List of 3
# ..$ keys :List of 1
# .. ..$ : Factor w/ 7 levels "Monday","Sunday",..: 5
# ..$ result: int 81304
# ..$ status:List of 3
# .. ..$ : chr "OK"
# .. ..$ : NULL
# .. ..$ : NULL
# $ :List of 3
# ..$ keys :List of 1
# .. ..$ : Factor w/ 7 levels "Monday","Sunday",..: 7
# ..$ result: int 86159
# ..$ status:List of 3
# .. ..$ : chr "OK"
# .. ..$ : NULL
# .. ..$ : NULL
# $ :List of 3
# ..$ keys :List of 1
# .. ..$ : Factor w/ 7 levels "Monday","Sunday",..: 1
# ..$ result: int 97975
# ..$ status:List of 3
# .. ..$ : chr "OK"
# .. ..$ : NULL
# .. ..$ : NULL
# $ :List of 3
# ..$ keys :List of 1
# .. ..$ : Factor w/ 7 levels "Monday","Sunday",..: 3
# ..$ result: int 77725
# ..$ status:List of 3
# .. ..$ : chr "OK"
# .. ..$ : NULL
# .. ..$ : NULL
# $ :List of 3
# ..$ keys :List of 1
# .. ..$ : Factor w/ 7 levels "Monday","Sunday",..: 2
# ..$ result: int 94975
# ..$ status:List of 3
# .. ..$ : chr "OK"
# .. ..$ : NULL
# .. ..$ : NULL
# multi keys
result <- rxExecBy(inData = textData, keys = c("DayOfWeek", "ArrDelay"), func = .Summary)
length(result)
#[1] 3233
str(result[[1]])
#List of 3
# $ keys :List of 2
# ..$ : Factor w/ 7 levels "Monday","Sunday",..: 4
# ..$ : int 388
# $ result: int 2
# $ status:List of 3
# ..$ : chr "OK"
# ..$ : NULL
# ..$ : NULL
rxSparkDisconnect(cc)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment