Last active
October 29, 2019 14:14
-
-
Save nk773/a2ed7cd0ce8020647f5e7711f749b3b5 to your computer and use it in GitHub Desktop.
Generate Lag Features
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
lf1 <- function (dataset2, valcol2, numfeats){ | |
# Computes the lag features for specific column specified by valcol2 within dataset2 | |
# This creates total numfeats new features and it assumes dataset is already sorted by time | |
# Args: | |
# dataset2: dataset to be used for adding new features. | |
# valcol2: index of the column for which we need to create new features. | |
# numfeats: number of features to create. For example, if this is 3, then it will create 3 new lag features | |
# | |
# Returns: | |
# New dataset with additional features | |
previous <- numfeats #total number of features to be generated | |
orig_names <- names(dataset2) | |
n_rows <- dim(dataset2)[1] #number of rows in dataset | |
n_cols <- dim(dataset2)[2] #number of columns in dataset | |
base<-valcol2 #set column index to be featurized | |
for (i in 1:previous) { | |
dataset2[(i+1):n_rows,n_cols+i] <- dataset2[1:(n_rows-i),base] #copy 1:n-1 rows for specific column to 2:n as part of lag | |
dataset2[1:i,n_cols+i] <- dataset2[1:i, | |
if (i==1) base else base+(n_cols-base)+i-1 | |
] # now adjust values for the first numfeat rows | |
} | |
a <- -1:-previous | |
new_names <- paste(colnames(dataset2)[valcol2],"lag ",a) #create new column names | |
names(dataset2) <- c(orig_names,new_names) #update dataset with new column names | |
return (dataset2) | |
} | |
GenerateLagFeatures <- function (dataset1, valcol, numfeats){ | |
# Computes the lag features for specific column specified by valcol2 within dataset2 | |
# This creates total numfeats new features and it assumes dataset is already sorted by time | |
# Args: | |
# dataset1: dataset to be used for adding new features. | |
# valcol2: array of string that has names' of all columns in string format for which we need to create new features. | |
# numfeats: number of features to create. For example, if this is 3, then it will create 3 new lag features | |
# | |
# Returns: | |
# New dataset with additional features | |
orig_names <- names(dataset1) | |
bases <- which((orig_names %in% valcol)) #find columns that needs to be featurized | |
if (length(bases) == 0) | |
{ | |
print (paste("No columns matching with ", valcol, " found")) | |
# error - no column found with name that is matching in the list of names provided to the function | |
return (dataset1) | |
} | |
#generate features for each column | |
for (k in 1:length(bases)) { | |
base <- bases[k] | |
dataset1<- lf1(dataset1, base, numfeats) | |
} | |
return (dataset1) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment