Skip to content

Instantly share code, notes, and snippets.

@bgall
Last active December 1, 2019 20:49
Show Gist options
  • Save bgall/9111603604c5b21320f54a0218286aad to your computer and use it in GitHub Desktop.
Save bgall/9111603604c5b21320f54a0218286aad to your computer and use it in GitHub Desktop.
Function to generate arbitrary conjoint attributes with specified probabilities values are selected
##############################################################
# Define function: create_attributes
#
# Creates randomly generated vectors of values from sets
# of potential values for a specified number of attributes
#
# *Arguments*
#
# attr_names (optional)
# vector of attribute names of attr_n length. If no values
# provided, attribute named attrib1, attrib2, etc.
#
# attr_levels
# vector or list of vectors of length N, each containing the
# unique levels each attribute can take. May include NA values.
#
# n_profiles
# number of profiles, i.e. # of values to draw for each
# attribute
#
# probs
# A vector or list of numeric vectors containing probabilities
# for sampling from the set of unique attribute values. Probs
# are assigned to each element in the attrinute value set based
# on vector index. If no value supplied, defaults to assigning
# with equal probability.
#
# NOTE: Currently no test for whether the number of probabilities
# passed to probs() is equal to the number of levels for the
# attribute, only that the number of probability vectors
# passed is equal to the number of attributes. Also, no check
# the probabilities sum to 1.
##############################################################
create_attributes <-
function(attr_levels,
n_profiles,
attr_names = NULL,
probs = NULL) {
# Convert attr_levels and probs to list to work with lists
# throughout for simplicity/speed. Since vectors are list
# but lists not necessarily vectors, use is.list(X) rather
# than is.vector(X) since, passing a list to is.vector is
# also TRUE.
if (!is.list(attr_levels)) {
attr_levels <- list(attr_levels)
}
if (!is.null(probs) & !is.list(probs)) {
probs <- list(probs)
}
# Number of attributes to generate
attr_n <- length(attr_levels)
# Test inputs are correct length
stopifnot(is.null(attr_names) | attr_n == length(attr_names))
stopifnot(is.null(probs) | attr_n == length(probs))
# If attribute names not provided, generate
# default attribute names attrib1, attrib2, ...
if (is.null(attr_names)) {
attr_names <- paste0("attrib", seq_len(attr_n))
}
# If probs not provided, generate default
# equal probabilities of assignment
if (is.null(probs)) {
probs <- list()
for (i in 1:attr_n) {
attr_i_levels_n <- length(attr_levels[[i]])
probs[[i]] <- rep(1 / attr_i_levels_n, attr_i_levels_n)
}
}
# Initialize list to store each attribute vector
attrib_list <- list()
# Generate attributes
for (i in 1:length(attr_names)) {
# If an attribute has only one level, is
# numeric, and >0, the x = argument in
# sample() thinks the level refers to the
# number of levels, incorrectly. So,
# so, separate these out since prob = 1
# for the single value. See ?sample
# Details for more info on this issue.
if (is.numeric(attr_levels[[i]]) &
length(attr_levels[[i]]) == 1) {
attrib_list[[i]] <- rep(attr_levels[[i]], n_profiles)
} else {
attrib_list[[i]] <- sample(
x = attr_levels[[i]],
size = n_profiles,
replace = TRUE,
prob = probs[[i]]
)
}
}
# Collapse list into data frame, replace names
attrib_df <- dplyr::bind_cols(attrib_list)
colnames(attrib_df) <- attr_names
# Return attribute data frame
attrib_df
}
##############################################################
# EXAMPLES WITH 50 PROFILES
##############################################################
# n_profiles <- 50
#
# # One attribute, default probs
# attr_levels <- c("a","b","c")
# create_attributes(attr_levels = attr_levels,
# n_profiles = n_profiles)
#
# # One attribute,specified probs
# attr_levels <- c("a","b","c")
# probs <- c(0.10,0.10,0.80)
# create_attributes(attr_levels = attr_levels,
# n_profiles = n_profiles,
# probs = probs)
#
# # Two attributes, default probs
# attr_levels <- list(c("a","b","c"),
# c(seq_len(6)))
#
# create_attributes(attr_levels = attr_levels,
# n_profiles = n_profiles)
#
# # Two attributes, specified probs
# attr_levels <- list(c("a","b","c"),
# c(seq_len(6)))
# probs <- list(c(0.50, 0.30, 0.20),
# c(0.05, 0.05, 0.05, 0.05, 0.40, 0.40))
#
# create_attributes(attr_levels = attr_levels,
# n_profiles = n_profiles,
# probs = probs)
# two attributes, specified probs and names
# attr_levels <- list(c("a","b","c"),
# c(seq_len(6)))
#
# probs <- list(c(0.50, 0.30, 0.20),
# c(0.05, 0.05, 0.05, 0.05, 0.40, 0.40))
#
# attr_names <- c("foo", "bar")
#
# attr_df<- create_attributes(attr_levels = attr_levels,
# n_profiles = n_profiles,
# attr_names = attr_names,
# probs = probs)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment