Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save phillip-burger-sculpturearts/9012633 to your computer and use it in GitHub Desktop.
Save phillip-burger-sculpturearts/9012633 to your computer and use it in GitHub Desktop.
Interviewer effect analysis including several tests. Groups are not normally distributed. Variances are unequal. Code can be used as skeleton for other analysis of interviewer effect.
# Name: interviewer-effect-analysis.R
# Author: Phillip Burger
# Date: December 11, 2013
# Purpose: Takes an already munged and cleaned data set. Does some exploration
# of the population and the groups. The does a series of tests. This is a good
# skeleton set of code and test for testing for interviewer effect in survey
# results. A .Rmd containing analysis and code chunks is also available.
# Posted: http://www.phillipburger.net/wordpress/2014/02/10/interviewer-effect-in-survey-results/
# initializespace
suppressPackageStartupMessages(require(data.table))
suppressPackageStartupMessages(require(dplyr))
suppressPackageStartupMessages(require(ggplot2))
suppressPackageStartupMessages(require(gridExtra))
suppressPackageStartupMessages(require(lawstat))
set.seed(38249842)
# privatehist
# Builds a histogram. It is ok to extend this function to make it more flexible.
# Parameters: vector of data to explore, the number of breaks as either
# a string or a numeric scaler, and the title to display.
# Preconditions: Valid input from the calling program. Caller must want the
# color values used in this function.
# Post conditions: None
# Returns: None
privateHist <- function(x = x, n = 20, mainTitle = mainTitle) {
hist(x,
prob = FALSE,
breaks = 20,
col = rgb(0.2, 0.1, 0.4, 0.15),
xlim = c(min(x) - 0.01, 1.0),
xlab = "Score",
ylab = "Count",
main = mainTitle)
abline(v = mean(x), col = "red", lwd = 3)
abline(v = median(x), col = "blue", lwd = 3)
}
# loadandsanitize
load("dfResponse.Rdata")
dt <- data.table(unique(dfResponse[,c("surveyId", "surveyorLastName", "surveyMean")]))
setnames(dt, c(1, 2, 3), c("surveyId", "interviewer", "score"))
# sanitize the names (names already came in in dfResponse.RData)
dt[interviewer == "A_interviewer", interviewer := "A"]
dt[interviewer == "B_interviewer", interviewer := "B"]
dt[interviewer == "C_interviewer", interviewer := "C"]
dt[interviewer == "D_interviewer", interviewer := "D"]
dt$interviewer <- droplevels(dt)$interviewer
# summarystats
(means <- tapply(dt$score, dt$interviewer, mean))
(var <- tapply(dt$score, dt$interviewer, var))
# popplots
plot1 <- ggplot(dt, aes(x = factor(interviewer), y = score))
plot1 <- plot1 + geom_boxplot(fill = rgb(0.2, 0.1, 0.4),
colour = "black",
alpha = 0.15,
notch = TRUE,
width = 0.75,
outlier.size = 3.0,
outlier.shape = 21,
outlier.colour = "black") +
stat_summary(fun.y = "mean",
geom = "point",
shape = 23,
size = 3,
fill = "white") +
labs(x = "Interviewer",
y = "Score",
title = "Boxplot of Scores by Interviewer") +
theme(plot.title = element_text(size = rel(1.3),
face = 'bold'),
axis.title.y = element_text(face = 'bold',
size = 12),
axis.title.x = element_text(face = 'bold',
size = 12),
legend.position = "none")
# histogram
# values of x in the annotations will vary depending on aspect ratio
plot2 <- ggplot(dt, aes(x = score))
plot2 <- plot2 + geom_histogram(
colour = "black",
alpha = 0.15) +
labs(x = "Score",
y = "Count",
title = "Histogram of Scores (N=375)") +
theme(plot.title = element_text(size = rel(1.3),
face = 'bold'),
axis.title.y = element_text(face = 'bold',
size = 12),
axis.title.x = element_text(face = 'bold',
size = 12),
legend.position = "none") +
scale_y_continuous(limits = c(0, 39)) +
annotate("text",
x = 0.84,
y = 39,
label = "mean",
size = 4.5,
color = "#2a2a2a",
fontface = 3) +
annotate("text",
x = 0.955,
y = 39,
label = "median",
size = 4.5,
color = "#2a2a2a",
fontface = 3) +
geom_vline(xintercept = mean(dt$score),
linetype = "dashed",
color = "red",
size = 1) +
geom_vline(xintercept = median(dt$score),
linetype = "dashed",
color = "blue",
size = 1)
# output to dev or file
grid.arrange(plot1, plot2, ncol = 2)
# png("interviewer-population-charts.png",
# width = 650,
# units = "px",
# bg = "white")
# grouphists
classNames <- c("A", "B", "C", "D")
par(mfrow=c(2,2), oma=c(0,0,2,0))
for(i in 1:4) {
x = filter(dt, interviewer == classNames[i])$score
privateHist(x, 20, paste0("Histogram for Group ", classNames[i]))
}
mtext("Count By Interviewer/Group Histograms",
side = 3,
line = 0,
cex = 1.2,
col = "black",
outer = TRUE)
# getsamples
# obtain n for smallest group
interviewerGroup1 <- group_by(dt, interviewer)
interviewerGroup2 <- group_size(interviewerGroup1)
cat("size of smallest group is: ", (lowestN <- min(interviewerGroup2)))
# sample the groups with no replacement
# A
A <- as.data.frame(sample(dt[interviewer == "A", ]$score,
lowestN, replace = FALSE))
setnames(A, c(1), c("score"))
A$interviewer <- "A"
# B
B <- as.data.frame(sample(dt[interviewer == "B", ]$score,
lowestN, replace = FALSE))
setnames(B, c(1), c("score"))
B$interviewer <- "B"
# C
C <- as.data.frame(sample(dt[interviewer == "C", ]$score,
lowestN, replace = FALSE))
setnames(C, c(1), c("score"))
C$interviewer <- "C"
# D
D <- as.data.frame(sample(dt[interviewer == "D", ]$score,
lowestN, replace = FALSE))
setnames(D, c(1), c("score"))
D$interviewer <- "D"
# housekeping
rm(interviewerGroup1, interviewerGroup2)
# testonewayanova
oneway.test(rbind(A, B, C, D)$score ~ rbind(A, B, C, D)$interviewer)
# testbartlett
lawstat::levene.test(dt$score, group = dt$interviewer, location = c("mean"))
# testkruskal
kruskal.test(dt$score ~ dt$interviewer)
# testaov
aovOut <- aov(rbind(A, B, C, D)$score ~ rbind(A, B, C, D)$interviewer)
summary(aovOut)
# testtukeyhsd
TukeyHSD(aovOut)
# pairwiseunadj
pairwise.t.test(dt$score, dt$interviewer, p.adj = "none")
# pairwiseadjbonf
pairwise.t.test(dt$score, dt$interviewer, p.adj = "bonf")
# pairwiseadjholm, cache=TRUE, eval=TRUE, warning=FALSE, echo=FALSE, message=FALSE}
pairwise.t.test(dt$score, dt$interviewer, p.adj = "holm")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment