bgall/social_desirability_bias.R

## social_desirability_bias.R
# Show that social desirability not only produces biased
# "descriptive statistics" (e.g. group means), but can
# produce treatment effects of the opposite sign of
# the true effect. You cannot simply ignore social
# desirability under the assumption that it does
# not affect estimates of causal parameters since
# measures pre-treatment and post-treatment are
# both subject to social desirability.
#
# Note that this arises due to social desirability
# pushing people toward the maximum and minimum of
# the theoretical range of the outcome variable, such
# as when people state the probability they vote is very
# high. Because the range of the outcome truncates treatment
# effects for which the treatment would push an observation's
# outcome value outside of the range, social desirability
# that pushes outcome values toward the ceiling or floor of
# the measure increases the risk that a treatment effect is
# truncated. In the extreme case, it could cause all positive
# treatment effects to be truncated at a positive value close
# to zero and none of the negative treatment effects to be
# truncated. The consequence is that nearly all of our
# treatment effects are negative and large treatment effects
# are only found in the negative effects, so the ATE is negative.
# This does not happen when the measure is unbounded.

# There are at least two ways of conceptualizing social
# desirability bias and the below looks at both:
# - Additive SBD: true response + constant
# - Multiplicative SDB: t9rue response)*(constant)

########################################
# Set-up
########################################

# Load packages
library(dplyr)
library(magrittr)

# Randomization seed
set.seed(123)

########################################
# Parameters
########################################

# Number of participants completing study
N <- 5000

# Average treatment effect ~ Normal(mu,sigma^2)
mu <- 0.10 # positive!
sigma <- 0.10
sdb_add <- 0.9
sdb_multi <- .8

########################################
# Generate data
########################################

# Initialize "empty" data frame to store data
# with dplyr. No good way to do this with
# dplyr.
df <- data.frame(y_t0_true = rep(NA_real_, N))

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Generate control data
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

df %<>% dplyr::mutate(
  # Draw "true" vote probability for each
  # participant from Unif(0,1)
  y_t0_true = runif(n = N, min = 0, max = 1),

  # Assume there is some social desirability
  # where everyone reports a higher probability
  # of voting than they would otherwise. We can
  # think of 2 different types of SDB: additive
  # effects and multiplicative effects.

  # Additive effect: true probability + sdb,
  # where sdb in [0,1]
  y_t0_observed_add = ifelse(y_t0_true + sdb_add > 1,
                             1,
                             y_t0_true + sdb_add),

  # Multiplicative effect: true probability*(1+sdb),
  # where sdb > 0
  y_t0_observed_multi = ifelse((y_t0_true * (1 + sdb_multi)) > 1,
                               1,
                               y_t0_true * (1 + sdb_multi))
)

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Generate treated data
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

# Get treatment effects
df %<>% rnorm(n = N, mean = mu, sd = sigma)

# Treated outcome, no social desirability,
# round to [0,1] interval
df %<>% dplyr::mutate(
  y_t1_true = y_t0_true + fx,
  y_t1_true = dplyr::case_when(y_t1_true < 0 ~ 0,
                               y_t1_true > 1 ~ 1,
                               TRUE ~ y_t1_true)
)

# Treated outcome, social desirability
# round to [0,1] interval, additive
df %<>% dplyr::mutate(
  y_t1_observed_add = y_t0_observed_add + fx,
  y_t1_observed_add = dplyr::case_when(
    y_t1_observed_add < 0 ~ 0,
    y_t1_observed_add > 1 ~ 1,
    TRUE ~ y_t1_observed_add
  )
)

# Treated outcome, social desirability
# round to [0,1] interval, multiplicative

df %<>% dplyr::mutate(
  y_t1_observed_multi = y_t0_observed_multi + fx*(1 + sdb_multi),
  y_t1_observed_multi = dplyr::case_when(
    y_t1_observed_multi < 0 ~ 0,
    y_t1_observed_multi > 1 ~ 1,
    TRUE ~ y_t1_observed_multi
  )
)

########################################
# Calculate ATE for different measures
########################################

mean(df$y_t1_true - df$y_t0_true) # true
mean(df$y_t1_observed_add - df$y_t0_observed_add) # additive
mean(df$y_t1_observed_multi - df$y_t0_observed_multi) # multiplicative
	# Show that social desirability not only produces biased
	# "descriptive statistics" (e.g. group means), but can
	# produce treatment effects of the opposite sign of
	# the true effect. You cannot simply ignore social
	# desirability under the assumption that it does
	# not affect estimates of causal parameters since
	# measures pre-treatment and post-treatment are
	# both subject to social desirability.
	#
	# Note that this arises due to social desirability
	# pushing people toward the maximum and minimum of
	# the theoretical range of the outcome variable, such
	# as when people state the probability they vote is very
	# high. Because the range of the outcome truncates treatment
	# effects for which the treatment would push an observation's
	# outcome value outside of the range, social desirability
	# that pushes outcome values toward the ceiling or floor of
	# the measure increases the risk that a treatment effect is
	# truncated. In the extreme case, it could cause all positive
	# treatment effects to be truncated at a positive value close
	# to zero and none of the negative treatment effects to be
	# truncated. The consequence is that nearly all of our
	# treatment effects are negative and large treatment effects
	# are only found in the negative effects, so the ATE is negative.
	# This does not happen when the measure is unbounded.

	# There are at least two ways of conceptualizing social
	# desirability bias and the below looks at both:
	# - Additive SBD: true response + constant
	# - Multiplicative SDB: t9rue response)*(constant)

	########################################
	# Set-up
	########################################

	# Load packages
	library(dplyr)
	library(magrittr)

	# Randomization seed
	set.seed(123)

	########################################
	# Parameters
	########################################

	# Number of participants completing study
	N <- 5000

	# Average treatment effect ~ Normal(mu,sigma^2)
	mu <- 0.10 # positive!
	sigma <- 0.10
	sdb_add <- 0.9
	sdb_multi <- .8

	########################################
	# Generate data
	########################################

	# Initialize "empty" data frame to store data
	# with dplyr. No good way to do this with
	# dplyr.
	df <- data.frame(y_t0_true = rep(NA_real_, N))

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	# Generate control data
	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

	df %<>% dplyr::mutate(
	# Draw "true" vote probability for each
	# participant from Unif(0,1)
	y_t0_true = runif(n = N, min = 0, max = 1),

	# Assume there is some social desirability
	# where everyone reports a higher probability
	# of voting than they would otherwise. We can
	# think of 2 different types of SDB: additive
	# effects and multiplicative effects.

	# Additive effect: true probability + sdb,
	# where sdb in [0,1]
	y_t0_observed_add = ifelse(y_t0_true + sdb_add > 1,
	1,
	y_t0_true + sdb_add),

	# Multiplicative effect: true probability*(1+sdb),
	# where sdb > 0
	y_t0_observed_multi = ifelse((y_t0_true * (1 + sdb_multi)) > 1,
	1,
	y_t0_true * (1 + sdb_multi))
	)

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	# Generate treated data
	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

	# Get treatment effects
	df %<>% rnorm(n = N, mean = mu, sd = sigma)

	# Treated outcome, no social desirability,
	# round to [0,1] interval
	df %<>% dplyr::mutate(
	y_t1_true = y_t0_true + fx,
	y_t1_true = dplyr::case_when(y_t1_true < 0 ~ 0,
	y_t1_true > 1 ~ 1,
	TRUE ~ y_t1_true)
	)

	# Treated outcome, social desirability
	# round to [0,1] interval, additive
	df %<>% dplyr::mutate(
	y_t1_observed_add = y_t0_observed_add + fx,
	y_t1_observed_add = dplyr::case_when(
	y_t1_observed_add < 0 ~ 0,
	y_t1_observed_add > 1 ~ 1,
	TRUE ~ y_t1_observed_add
	)
	)

	# Treated outcome, social desirability
	# round to [0,1] interval, multiplicative

	df %<>% dplyr::mutate(
	y_t1_observed_multi = y_t0_observed_multi + fx*(1 + sdb_multi),
	y_t1_observed_multi = dplyr::case_when(
	y_t1_observed_multi < 0 ~ 0,
	y_t1_observed_multi > 1 ~ 1,
	TRUE ~ y_t1_observed_multi
	)
	)

	########################################
	# Calculate ATE for different measures
	########################################

	mean(df$y_t1_true - df$y_t0_true) # true
	mean(df$y_t1_observed_add - df$y_t0_observed_add) # additive
	mean(df$y_t1_observed_multi - df$y_t0_observed_multi) # multiplicative