Last active
April 27, 2017 00:00
-
-
Save kenthzhang/e62d53780b74ffb41560401e771353d7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cc <- rxSparkConnect(reset = TRUE) | |
colInfo <- list( | |
ArrDelay = list( | |
type = "numeric"), | |
DayOfWeek = list( | |
type = "factor", | |
levels = c( | |
"Monday", | |
"Sunday", | |
"Wednesday", | |
"Thursday", | |
"Friday", | |
"Tuesday"))) | |
hiveData <- RxHiveData(table = "AirlineDemoSmall", colInfo = colInfo) | |
.Factor <- function(keys, data) | |
{ | |
rxSummary(~., data) | |
} | |
result <- rxExecBy(inData = hiveData, keys = c("DayOfWeek"), func = .Factor) | |
# Partitoin result of "Saturday", which is not in the factor level list | |
result[[4]] | |
#$keys | |
#$keys[[1]] | |
#[1] <NA> | |
#Levels: Monday Sunday Wednesday Thursday Friday Tuesday | |
# | |
# | |
#$result | |
#Call: | |
#rxSummary(formula = ~., data = data) | |
# | |
#Summary Statistics Results for: ~. | |
#Data: data (RxXdfData Data Source) | |
#File name: /dev/shm/MRS-sshuser/2729766045047878893/PXDF0 | |
#Number of valid observations: 86159 | |
# | |
# Name Mean StdDev Min Max ValidObs MissingObs | |
# ArrDelay 11.87533 45.245402 -73.000000 1370.00000 83851 2308 | |
# CRSDepTime 13.15288 4.598508 0.083333 23.98333 86159 0 | |
# | |
#Category Counts for DayOfWeek | |
#Number of categories: 6 | |
#Number of valid observations: 0 | |
#Number of missing observations: 86159 | |
# | |
# DayOfWeek Counts | |
# Monday 0 | |
# Sunday 0 | |
# Wednesday 0 | |
# Thursday 0 | |
# Friday 0 | |
# Tuesday 0 | |
# | |
#$status | |
#$status[[1]] | |
#[1] "OK" | |
# | |
#$status[[2]] | |
#NULL | |
# | |
#$status[[3]] | |
#NULL | |
# Partition result of "Friday" | |
result[[2]] | |
#$keys | |
#$keys[[1]] | |
#[1] Friday | |
#Levels: Monday Sunday Wednesday Thursday Friday Tuesday | |
# | |
# | |
#$result | |
#Call: | |
#rxSummary(formula = ~., data = data) | |
# | |
#Summary Statistics Results for: ~. | |
#Data: data (RxXdfData Data Source) | |
#File name: /dev/shm/MRS-sshuser/2397107652729461182/PXDF0 | |
#Number of valid observations: 82987 | |
# | |
# Name Mean StdDev Min Max ValidObs MissingObs | |
# ArrDelay 14.80433 41.792601 -78.000000 1490.00000 80142 2845 | |
# CRSDepTime 13.50271 4.739651 0.083333 23.98333 82987 0 | |
# | |
#Category Counts for DayOfWeek | |
#Number of categories: 6 | |
#Number of valid observations: 82987 | |
#Number of missing observations: 0 | |
# | |
# DayOfWeek Counts | |
# Monday 0 | |
# Sunday 0 | |
# Wednesday 0 | |
# Thursday 0 | |
# Friday 82987 | |
# Tuesday 0 | |
# | |
#$status | |
#$status[[1]] | |
#[1] "OK" | |
# | |
#$status[[2]] | |
#NULL | |
# | |
#$status[[3]] | |
#NULL | |
rxSparkDisconnect(cc) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment