Skip to content

Instantly share code, notes, and snippets.

@ramnov
Last active November 18, 2016 19:22
Show Gist options
  • Save ramnov/943892f4b0390c5e56298578b9a77da0 to your computer and use it in GitHub Desktop.
Save ramnov/943892f4b0390c5e56298578b9a77da0 to your computer and use it in GitHub Desktop.
Columnar operations like Min/Max of rows/columns in xdf file
airline_demo_xdf_file <- file.path(rxGetOption("sampleDataDir"), "AirlineDemoSmall.xdf")
# Find min/max value of a particular column CRSDepTime in airlineXdfData
single_column_minmax <- rxDataStep(airline_demo_xdf_file, transformFunc=function(varlst) {
rng <- range(varlst$CRSDepTime, na.rm = TRUE)
.min <<- min(rng[1], .min)
.max <<- max(rng[2], .max)
NULL
},
transformVars="CRSDepTime",
transformObjects=list(.min=numeric(0), .max=numeric(0)),
returnTransformObjects=TRUE)
airline_single_column_minmax <- rxDataStep(airline_demo_xdf_file, transforms=list(
min=rep(.min, .rxNumRows),
max=rep(.max, .rxNumRows)), transformEnvir=list2env(single_column_minmax))
head(airline_single_column_minmax)
# Find min/max value of a set of columns (ArrDelay and CRSDepTime) in airlineXdfData
multiple_columns_minmax <- rxDataStep(airline_demo_xdf_file, transformFunc=function(varlst) {
rng <- range(data.frame(varlst$ArrDelay,varlst$CRSDepTime), na.rm = TRUE)
.min <<- min(rng[1], .min)
.max <<- max(rng[2], .max)
NULL
},
transformVars=c("CRSDepTime", "ArrDelay"),
transformObjects=list(.min=numeric(0), .max=numeric(0)),
returnTransformObjects=TRUE)
airline_multiple_columns_minmax <- rxDataStep(airline_demo_xdf_file, transforms=list(
min=rep(.min, .rxNumRows),
max=rep(.max, .rxNumRows)), transformEnvir=list2env(multiple_columns_minmax))
head(airline_multiple_columns_minmax)
# Find min/max value of a particular set of rows(say rows 2000 to 3000) in airlineXdfData
rows_minmax <- rxDataStep(airline_demo_xdf_file, transformFunc=function(varlst) {
rng <- range(data.frame(varlst$ArrDelay,varlst$CRSDepTime), na.rm = TRUE)
.min <<- min(rng[1], .min)
.max <<- max(rng[2], .max)
NULL
},
transformObjects=list(.min=numeric(0), .max=numeric(0)),
returnTransformObjects=TRUE,
startRow = 2000,
numRows = 1000)
airline_rows_minmax <- rxDataStep(airline_demo_xdf_file, transforms=list(
min=rep(.min, .rxNumRows),
max=rep(.max, .rxNumRows)), transformEnvir=list2env(rows_minmax))
head(airline_rows_minmax)
# Find min/max value of a filtered set of rows in airlineXdfData (say DayOfWeek Sunday)
airline_filtered <- rxDataStep(airline_demo_xdf_file, rowSelection = DayOfWeek == "Sunday")
filtered_rows_minmax <- rxDataStep(airline_filtered, transformFunc=function(varlst) {
rng <- range(data.frame(varlst$ArrDelay,varlst$CRSDepTime), na.rm = TRUE)
.min <<- min(rng[1], .min)
.max <<- max(rng[2], .max)
NULL
},
transformVars=c("CRSDepTime", "ArrDelay"),
transformObjects=list(.min=numeric(0), .max=numeric(0)),
returnTransformObjects=TRUE)
airline_filtered_rows_minmax <- rxDataStep(airline_filtered, transforms=list(
min=rep(.min, .rxNumRows),
max=rep(.max, .rxNumRows)), transformEnvir=list2env(filtered_rows_minmax))
head(airline_filtered_rows_minmax)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment