Skip to content

Instantly share code, notes, and snippets.

@kenthzhang
Created April 26, 2017 20:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kenthzhang/ec7770a8451284bf1a39a077fe5f1adc to your computer and use it in GitHub Desktop.
Save kenthzhang/ec7770a8451284bf1a39a077fe5f1adc to your computer and use it in GitHub Desktop.
cc <- rxSparkConnect(reset = TRUE)
hdfsFileSystem <- RxHdfsFileSystem()
xdfData <- RxXdfData(file = "/share/AirlineDemoSmall/AirlineDemoSmallXDF", fileSystem = hdfsFileSystem)
.Message <- function(keys, data)
{
df <- rxImport(data)
rows <- nrow(df)
if (rows > 80000)
{
warning("rows > 80000")
}
if (rows > 85000)
{
warning("rows > 85000")
}
if (rows > 90000)
{
warning("rows > 90000")
}
if (rows > 95000)
{
stop("rows > 95000")
}
rows
}
result <- rxExecBy(inData = xdfData, keys = c("DayOfWeek"), func = .Message)
str(result)
#List of 7
# $ :List of 3
# ..$ keys :List of 1
# .. ..$ : Factor w/ 7 levels "Monday","Tuesday",..: 3
# ..$ result: int 78875
# ..$ status:List of 3
# .. ..$ : chr "OK"
# .. ..$ : NULL
# .. ..$ : NULL
# $ :List of 3
# ..$ keys :List of 1
# .. ..$ : Factor w/ 7 levels "Monday","Tuesday",..: 5
# ..$ result: int 82987
# ..$ status:List of 3
# .. ..$ : chr "OK"
# .. ..$ : NULL
# .. ..$ : chr "rows > 80000"
# $ :List of 3
# ..$ keys :List of 1
# .. ..$ : Factor w/ 7 levels "Monday","Tuesday",..: 4
# ..$ result: int 81304
# ..$ status:List of 3
# .. ..$ : chr "OK"
# .. ..$ : NULL
# .. ..$ : chr "rows > 80000"
# $ :List of 3
# ..$ keys :List of 1
# .. ..$ : Factor w/ 7 levels "Monday","Tuesday",..: 6
# ..$ result: int 86159
# ..$ status:List of 3
# .. ..$ : chr "OK"
# .. ..$ : NULL
# .. ..$ : chr [1:2] "rows > 80000" "rows > 85000"
# $ :List of 3
# ..$ keys :List of 1
# .. ..$ : Factor w/ 7 levels "Monday","Tuesday",..: 1
# ..$ result: NULL
# ..$ status:List of 3
# .. ..$ : chr "Error"
# .. ..$ : chr "rows > 95000"
# .. ..$ : chr [1:3] "rows > 80000" "rows > 85000" "rows > 90000"
# $ :List of 3
# ..$ keys :List of 1
# .. ..$ : Factor w/ 7 levels "Monday","Tuesday",..: 2
# ..$ result: int 77725
# ..$ status:List of 3
# .. ..$ : chr "OK"
# .. ..$ : NULL
# .. ..$ : NULL
# $ :List of 3
# ..$ keys :List of 1
# .. ..$ : Factor w/ 7 levels "Monday","Tuesday",..: 7
# ..$ result: int 94975
# ..$ status:List of 3
# .. ..$ : chr "OK"
# .. ..$ : NULL
# .. ..$ : chr [1:3] "rows > 80000" "rows > 85000" "rows > 90000"
rxSparkDisconnect(cc)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment