Skip to content

Instantly share code, notes, and snippets.

@bgall
Created December 1, 2019 21:01
Show Gist options
  • Save bgall/8bd26f20a5941626f7a14588137a418f to your computer and use it in GitHub Desktop.
Save bgall/8bd26f20a5941626f7a14588137a418f to your computer and use it in GitHub Desktop.
Function to generate dummy variables from the possible sets of values
#########################################################
# Define function: create_value_dummies
# Create dummy variables indicating if a given
# profile takes on a specific value from its
# possible values. Create a dummy for the range
# of the variable, independent of whether the actual
# random sampling does (not) draw that value and assign
# it to a profile.
#########################################################
create_value_dummies <-
function(attr_df,
attr_levels) {
# Convert attribute levels to a list for
# easing referencing if not already list
if (!is.list(attr_levels)) {
attr_levels <- list(attr_levels)
}
# Initialize list to store output data frame
attr_cols <- list()
# Generate dummy variables
for (i in 1:ncol(attr_df)) {
# List to store current attribute's
# output columns
temp <- list()
# Levels of current attribute
this_attr_levels <- attr_levels[[i]]
# Generate dummies
for (j in 1:length(this_attr_levels)) {
temp[[j]] <- ifelse(attr_df[, i] == attr_levels[[i]][j], 1, 0)
}
# Convert each attribute's vectors from list to data frame
temp <- dplyr::bind_cols(temp)
# Generate column names for each
# Add variable names to columns
colnames(temp) <-
paste0(colnames(attr_df)[i], "_", attr_levels[[i]])
# Save all dummies for each attribute into an element
# of the attr_cols list
attr_cols[[i]] <- temp
}
# Collapse each attribute's columns into a single
# data frame, return
dummy_df <- dplyr::bind_cols(attr_cols)
# Joint the columns to the original data
dplyr::bind_cols(attr_df, dummy_df)
}
###################################
# Example
###################################
# Generate example data: 2 attributes,
# different number of levels per
# attribute
#df <- data.frame(foo = rep(1:4,10), bar = rep(c("a","b"),20))
#dflevels <- list(c(1:4), c("a","b"))
# Produce dummies
#create_dummies(attr_df = df, attr_levels = dflevels)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment