Last active
November 18, 2016 19:22
-
-
Save ramnov/943892f4b0390c5e56298578b9a77da0 to your computer and use it in GitHub Desktop.
Columnar operations like Min/Max of rows/columns in xdf file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
airline_demo_xdf_file <- file.path(rxGetOption("sampleDataDir"), "AirlineDemoSmall.xdf") | |
# Find min/max value of a particular column CRSDepTime in airlineXdfData | |
single_column_minmax <- rxDataStep(airline_demo_xdf_file, transformFunc=function(varlst) { | |
rng <- range(varlst$CRSDepTime, na.rm = TRUE) | |
.min <<- min(rng[1], .min) | |
.max <<- max(rng[2], .max) | |
NULL | |
}, | |
transformVars="CRSDepTime", | |
transformObjects=list(.min=numeric(0), .max=numeric(0)), | |
returnTransformObjects=TRUE) | |
airline_single_column_minmax <- rxDataStep(airline_demo_xdf_file, transforms=list( | |
min=rep(.min, .rxNumRows), | |
max=rep(.max, .rxNumRows)), transformEnvir=list2env(single_column_minmax)) | |
head(airline_single_column_minmax) | |
# Find min/max value of a set of columns (ArrDelay and CRSDepTime) in airlineXdfData | |
multiple_columns_minmax <- rxDataStep(airline_demo_xdf_file, transformFunc=function(varlst) { | |
rng <- range(data.frame(varlst$ArrDelay,varlst$CRSDepTime), na.rm = TRUE) | |
.min <<- min(rng[1], .min) | |
.max <<- max(rng[2], .max) | |
NULL | |
}, | |
transformVars=c("CRSDepTime", "ArrDelay"), | |
transformObjects=list(.min=numeric(0), .max=numeric(0)), | |
returnTransformObjects=TRUE) | |
airline_multiple_columns_minmax <- rxDataStep(airline_demo_xdf_file, transforms=list( | |
min=rep(.min, .rxNumRows), | |
max=rep(.max, .rxNumRows)), transformEnvir=list2env(multiple_columns_minmax)) | |
head(airline_multiple_columns_minmax) | |
# Find min/max value of a particular set of rows(say rows 2000 to 3000) in airlineXdfData | |
rows_minmax <- rxDataStep(airline_demo_xdf_file, transformFunc=function(varlst) { | |
rng <- range(data.frame(varlst$ArrDelay,varlst$CRSDepTime), na.rm = TRUE) | |
.min <<- min(rng[1], .min) | |
.max <<- max(rng[2], .max) | |
NULL | |
}, | |
transformObjects=list(.min=numeric(0), .max=numeric(0)), | |
returnTransformObjects=TRUE, | |
startRow = 2000, | |
numRows = 1000) | |
airline_rows_minmax <- rxDataStep(airline_demo_xdf_file, transforms=list( | |
min=rep(.min, .rxNumRows), | |
max=rep(.max, .rxNumRows)), transformEnvir=list2env(rows_minmax)) | |
head(airline_rows_minmax) | |
# Find min/max value of a filtered set of rows in airlineXdfData (say DayOfWeek Sunday) | |
airline_filtered <- rxDataStep(airline_demo_xdf_file, rowSelection = DayOfWeek == "Sunday") | |
filtered_rows_minmax <- rxDataStep(airline_filtered, transformFunc=function(varlst) { | |
rng <- range(data.frame(varlst$ArrDelay,varlst$CRSDepTime), na.rm = TRUE) | |
.min <<- min(rng[1], .min) | |
.max <<- max(rng[2], .max) | |
NULL | |
}, | |
transformVars=c("CRSDepTime", "ArrDelay"), | |
transformObjects=list(.min=numeric(0), .max=numeric(0)), | |
returnTransformObjects=TRUE) | |
airline_filtered_rows_minmax <- rxDataStep(airline_filtered, transforms=list( | |
min=rep(.min, .rxNumRows), | |
max=rep(.max, .rxNumRows)), transformEnvir=list2env(filtered_rows_minmax)) | |
head(airline_filtered_rows_minmax) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment