Skip to content

Instantly share code, notes, and snippets.

@nk773
Last active October 29, 2019 14:14
Show Gist options
  • Save nk773/a2ed7cd0ce8020647f5e7711f749b3b5 to your computer and use it in GitHub Desktop.
Save nk773/a2ed7cd0ce8020647f5e7711f749b3b5 to your computer and use it in GitHub Desktop.
Generate Lag Features
lf1 <- function (dataset2, valcol2, numfeats){
# Computes the lag features for specific column specified by valcol2 within dataset2
# This creates total numfeats new features and it assumes dataset is already sorted by time
# Args:
# dataset2: dataset to be used for adding new features.
# valcol2: index of the column for which we need to create new features.
# numfeats: number of features to create. For example, if this is 3, then it will create 3 new lag features
#
# Returns:
# New dataset with additional features
previous <- numfeats #total number of features to be generated
orig_names <- names(dataset2)
n_rows <- dim(dataset2)[1] #number of rows in dataset
n_cols <- dim(dataset2)[2] #number of columns in dataset
base<-valcol2 #set column index to be featurized
for (i in 1:previous) {
dataset2[(i+1):n_rows,n_cols+i] <- dataset2[1:(n_rows-i),base] #copy 1:n-1 rows for specific column to 2:n as part of lag
dataset2[1:i,n_cols+i] <- dataset2[1:i,
if (i==1) base else base+(n_cols-base)+i-1
] # now adjust values for the first numfeat rows
}
a <- -1:-previous
new_names <- paste(colnames(dataset2)[valcol2],"lag ",a) #create new column names
names(dataset2) <- c(orig_names,new_names) #update dataset with new column names
return (dataset2)
}
GenerateLagFeatures <- function (dataset1, valcol, numfeats){
# Computes the lag features for specific column specified by valcol2 within dataset2
# This creates total numfeats new features and it assumes dataset is already sorted by time
# Args:
# dataset1: dataset to be used for adding new features.
# valcol2: array of string that has names' of all columns in string format for which we need to create new features.
# numfeats: number of features to create. For example, if this is 3, then it will create 3 new lag features
#
# Returns:
# New dataset with additional features
orig_names <- names(dataset1)
bases <- which((orig_names %in% valcol)) #find columns that needs to be featurized
if (length(bases) == 0)
{
print (paste("No columns matching with ", valcol, " found"))
# error - no column found with name that is matching in the list of names provided to the function
return (dataset1)
}
#generate features for each column
for (k in 1:length(bases)) {
base <- bases[k]
dataset1<- lf1(dataset1, base, numfeats)
}
return (dataset1)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment