Created
February 15, 2014 00:47
-
-
Save phillip-burger-sculpturearts/9012633 to your computer and use it in GitHub Desktop.
Interviewer effect analysis including several tests. Groups are not normally distributed. Variances are unequal. Code can be used as skeleton for other analysis of interviewer effect.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Name: interviewer-effect-analysis.R | |
# Author: Phillip Burger | |
# Date: December 11, 2013 | |
# Purpose: Takes an already munged and cleaned data set. Does some exploration | |
# of the population and the groups. The does a series of tests. This is a good | |
# skeleton set of code and test for testing for interviewer effect in survey | |
# results. A .Rmd containing analysis and code chunks is also available. | |
# Posted: http://www.phillipburger.net/wordpress/2014/02/10/interviewer-effect-in-survey-results/ | |
# initializespace | |
suppressPackageStartupMessages(require(data.table)) | |
suppressPackageStartupMessages(require(dplyr)) | |
suppressPackageStartupMessages(require(ggplot2)) | |
suppressPackageStartupMessages(require(gridExtra)) | |
suppressPackageStartupMessages(require(lawstat)) | |
set.seed(38249842) | |
# privatehist | |
# Builds a histogram. It is ok to extend this function to make it more flexible. | |
# Parameters: vector of data to explore, the number of breaks as either | |
# a string or a numeric scaler, and the title to display. | |
# Preconditions: Valid input from the calling program. Caller must want the | |
# color values used in this function. | |
# Post conditions: None | |
# Returns: None | |
privateHist <- function(x = x, n = 20, mainTitle = mainTitle) { | |
hist(x, | |
prob = FALSE, | |
breaks = 20, | |
col = rgb(0.2, 0.1, 0.4, 0.15), | |
xlim = c(min(x) - 0.01, 1.0), | |
xlab = "Score", | |
ylab = "Count", | |
main = mainTitle) | |
abline(v = mean(x), col = "red", lwd = 3) | |
abline(v = median(x), col = "blue", lwd = 3) | |
} | |
# loadandsanitize | |
load("dfResponse.Rdata") | |
dt <- data.table(unique(dfResponse[,c("surveyId", "surveyorLastName", "surveyMean")])) | |
setnames(dt, c(1, 2, 3), c("surveyId", "interviewer", "score")) | |
# sanitize the names (names already came in in dfResponse.RData) | |
dt[interviewer == "A_interviewer", interviewer := "A"] | |
dt[interviewer == "B_interviewer", interviewer := "B"] | |
dt[interviewer == "C_interviewer", interviewer := "C"] | |
dt[interviewer == "D_interviewer", interviewer := "D"] | |
dt$interviewer <- droplevels(dt)$interviewer | |
# summarystats | |
(means <- tapply(dt$score, dt$interviewer, mean)) | |
(var <- tapply(dt$score, dt$interviewer, var)) | |
# popplots | |
plot1 <- ggplot(dt, aes(x = factor(interviewer), y = score)) | |
plot1 <- plot1 + geom_boxplot(fill = rgb(0.2, 0.1, 0.4), | |
colour = "black", | |
alpha = 0.15, | |
notch = TRUE, | |
width = 0.75, | |
outlier.size = 3.0, | |
outlier.shape = 21, | |
outlier.colour = "black") + | |
stat_summary(fun.y = "mean", | |
geom = "point", | |
shape = 23, | |
size = 3, | |
fill = "white") + | |
labs(x = "Interviewer", | |
y = "Score", | |
title = "Boxplot of Scores by Interviewer") + | |
theme(plot.title = element_text(size = rel(1.3), | |
face = 'bold'), | |
axis.title.y = element_text(face = 'bold', | |
size = 12), | |
axis.title.x = element_text(face = 'bold', | |
size = 12), | |
legend.position = "none") | |
# histogram | |
# values of x in the annotations will vary depending on aspect ratio | |
plot2 <- ggplot(dt, aes(x = score)) | |
plot2 <- plot2 + geom_histogram( | |
colour = "black", | |
alpha = 0.15) + | |
labs(x = "Score", | |
y = "Count", | |
title = "Histogram of Scores (N=375)") + | |
theme(plot.title = element_text(size = rel(1.3), | |
face = 'bold'), | |
axis.title.y = element_text(face = 'bold', | |
size = 12), | |
axis.title.x = element_text(face = 'bold', | |
size = 12), | |
legend.position = "none") + | |
scale_y_continuous(limits = c(0, 39)) + | |
annotate("text", | |
x = 0.84, | |
y = 39, | |
label = "mean", | |
size = 4.5, | |
color = "#2a2a2a", | |
fontface = 3) + | |
annotate("text", | |
x = 0.955, | |
y = 39, | |
label = "median", | |
size = 4.5, | |
color = "#2a2a2a", | |
fontface = 3) + | |
geom_vline(xintercept = mean(dt$score), | |
linetype = "dashed", | |
color = "red", | |
size = 1) + | |
geom_vline(xintercept = median(dt$score), | |
linetype = "dashed", | |
color = "blue", | |
size = 1) | |
# output to dev or file | |
grid.arrange(plot1, plot2, ncol = 2) | |
# png("interviewer-population-charts.png", | |
# width = 650, | |
# units = "px", | |
# bg = "white") | |
# grouphists | |
classNames <- c("A", "B", "C", "D") | |
par(mfrow=c(2,2), oma=c(0,0,2,0)) | |
for(i in 1:4) { | |
x = filter(dt, interviewer == classNames[i])$score | |
privateHist(x, 20, paste0("Histogram for Group ", classNames[i])) | |
} | |
mtext("Count By Interviewer/Group Histograms", | |
side = 3, | |
line = 0, | |
cex = 1.2, | |
col = "black", | |
outer = TRUE) | |
# getsamples | |
# obtain n for smallest group | |
interviewerGroup1 <- group_by(dt, interviewer) | |
interviewerGroup2 <- group_size(interviewerGroup1) | |
cat("size of smallest group is: ", (lowestN <- min(interviewerGroup2))) | |
# sample the groups with no replacement | |
# A | |
A <- as.data.frame(sample(dt[interviewer == "A", ]$score, | |
lowestN, replace = FALSE)) | |
setnames(A, c(1), c("score")) | |
A$interviewer <- "A" | |
# B | |
B <- as.data.frame(sample(dt[interviewer == "B", ]$score, | |
lowestN, replace = FALSE)) | |
setnames(B, c(1), c("score")) | |
B$interviewer <- "B" | |
# C | |
C <- as.data.frame(sample(dt[interviewer == "C", ]$score, | |
lowestN, replace = FALSE)) | |
setnames(C, c(1), c("score")) | |
C$interviewer <- "C" | |
# D | |
D <- as.data.frame(sample(dt[interviewer == "D", ]$score, | |
lowestN, replace = FALSE)) | |
setnames(D, c(1), c("score")) | |
D$interviewer <- "D" | |
# housekeping | |
rm(interviewerGroup1, interviewerGroup2) | |
# testonewayanova | |
oneway.test(rbind(A, B, C, D)$score ~ rbind(A, B, C, D)$interviewer) | |
# testbartlett | |
lawstat::levene.test(dt$score, group = dt$interviewer, location = c("mean")) | |
# testkruskal | |
kruskal.test(dt$score ~ dt$interviewer) | |
# testaov | |
aovOut <- aov(rbind(A, B, C, D)$score ~ rbind(A, B, C, D)$interviewer) | |
summary(aovOut) | |
# testtukeyhsd | |
TukeyHSD(aovOut) | |
# pairwiseunadj | |
pairwise.t.test(dt$score, dt$interviewer, p.adj = "none") | |
# pairwiseadjbonf | |
pairwise.t.test(dt$score, dt$interviewer, p.adj = "bonf") | |
# pairwiseadjholm, cache=TRUE, eval=TRUE, warning=FALSE, echo=FALSE, message=FALSE} | |
pairwise.t.test(dt$score, dt$interviewer, p.adj = "holm") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment