Last active
August 29, 2015 14:06
-
-
Save dalejbarr/2ebbf142c75a6e9d2312 to your computer and use it in GitHub Desktop.
Datahowler reanalysis of Fredrickson et al. (2013)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# you will need to install the package RR53 to run this script | |
# to download and install, use the following two commands: | |
# library(devtools) | |
# install_github("RR53", "dalejbarr") | |
library(RR53) | |
# download Brown et al.'s version of the data from PNAS | |
# you need only do this once to store a local copy | |
# comment out from the script after first use | |
downloadRR53Data(outfile="rr53.csv") | |
# load in and preprocess the data from the local copy | |
datalist <- readTidyData("rr53.csv") | |
# perform original analysis | |
betas <- rr53(datalist$Genetics, | |
datalist$Predictors, | |
datalist$GeneSigns) | |
betas %>% group_by(Pred) %>% do(oneSampTest(.)) | |
# gene independence analysis | |
# reshape the gene data into a matrix | |
# we need this to calculate correlations using cor() | |
gmx <- datalist$Genetics %>% group_by(SID) %>% spread(Gene, Value) %>% | |
ungroup() %>% select(-SID) %>% as.matrix | |
# cor(gmx) # uncomment to compute the correlation matrix | |
orig.mac <- meanAbsCor(gmx) | |
geneIndependenceTest <- replicate(10000, shuffleAndRecomputeMAC(gmx)) | |
# calculate p, the probability of the original data under independence | |
p.data_under_ind <- | |
sum(c(orig.mac, geneIndependenceTest) >= orig.mac) / | |
(length(geneIndependenceTest) + 1) | |
print(round(p.data_under_ind, 4)) | |
# permutation test | |
# merge the tables so we can fit the models | |
# also, apply gene contrast codes before residulization | |
dat <- inner_join(datalist$Genetics, datalist$Predictors, "SID") %>% | |
inner_join(datalist$GeneSigns, by="Gene") %>% | |
mutate(Value=Value*Sign) %>% select(-Sign) | |
datr <- dat %>% group_by(Gene) %>% do(getGeneResids(.)) %>% | |
arrange(SID, Gene) | |
pred2 <- select(datalist$Predictors, SID, ZScore_Hed, ZScore_Eud) | |
# calculate original test statistic | |
orig <- rr53Resids(datr, pred2, datalist$GeneSigns) | |
# warning! takes a long time... | |
ptest <- replicate(10000, | |
rr53Resids(datr, | |
shuffleSubjects(pred2), | |
datalist$GeneSigns)) | |
ptest.pval <- | |
sum(abs(c(orig, ptest))>=abs(orig))/ | |
length(c(orig, ptest)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment