Created
April 18, 2011 20:35
-
-
Save johncolby/926103 to your computer and use it in GitHub Desktop.
Updates to between group stats code
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
exptDir = '/Users/jcolby/Documents/LONI/along-tract-stats/along-tract-stats.git/example/between_group' | |
grpLabs = c('Control', 'FASD') | |
thresh = 0.05 | |
nPerms = 100 | |
library(nlme) # Mixed-effects models | |
library(ggplot2) # Plotting tools | |
library(plyr) # Data manipulation | |
library(RColorBrewer) # Color tables | |
################################################################################ | |
# Import and format data | |
# Read in demographics | |
demog = read.table(file.path(exptDir, 'Demographics.txt'), header=T) | |
demog$Group = factor(demog$Group, levels=rev(levels(demog$Group)), labels=grpLabs) | |
# Read in whole-track properties (ex: streamlines) and merge with demographics | |
trk_props_long = read.table(file.path(exptDir, 'trk_props_long.txt'), header=T) | |
trk_props_long = merge(trk_props_long, demog) | |
# Read in length-parameterized track data (ex: FA) and merge with demographics | |
trk_data = read.table(file.path(exptDir, 'trk_data.txt'), header=T) | |
trk_data$Point = factor(trk_data$Point) | |
trk_data[trk_data==0] = NA | |
trk_data = merge(trk_data, trk_props_long) | |
# Add a Position column for easier plotting | |
trk_data = ddply(trk_data, c("Tract", "Hemisphere"), | |
transform, Position = (as.numeric(Point)-1) * 100/(max(as.numeric(Point))-1)) | |
################################################################################ | |
# Fit LME models | |
# Overall ANOVA for Group and Point:Group effects | |
fit_trk_model1 <- function(df){ | |
lme.trk = lme(FA ~ Point*Group, data=df, random = ~ 1 | ID, na.action=na.omit) | |
data.frame(Term = rownames(anova(lme.trk)), anova(lme.trk)) | |
} | |
models = list() | |
models$anova = ddply(trk_data, c("Tract", "Hemisphere"), fit_trk_model1) | |
# Fit a cell-means version to get effect sizes relative to controls | |
fit_trk_model2 <- function(df){ | |
lme.trk = tryCatch(lme(FA ~ Point/Group - 1, data=df, random = ~ 1 | ID, na.action=na.omit), error = function(e) data.frame()) | |
if(length(lme.trk)!=0){ | |
term.RE = paste('Point[0-9]+:', 'Group', '.+', sep='') | |
term.rows = grep(term.RE, row.names(summary(lme.trk)$tTable)) | |
data.frame(Point = as.numeric(levels(factor(df$Point))), | |
summary(lme.trk)$tTable[term.rows,]) | |
} else data.frame() | |
} | |
models$tTable = ddply(trk_data, c("Tract", "Hemisphere"), fit_trk_model2) | |
# Extract a subset like this: subset(models$tTable, Tract=='ILF' & Hemisphere=='L') | |
# Add weights=varFunc(~I(1/SD^2)) to do weighted fitting | |
# Model number of streamlines | |
summary(lm(Streamlines ~ Group + Hemisphere + Tract, data=trk_props_long)) | |
################################################################################ | |
# Permutation testing | |
# Function to permute groups and return max t-stat distribution | |
doPerms <- function(df, demog, nPerms=100){ | |
cat('Permuting data:\n') | |
pb = txtProgressBar(1,nPerms,1, style=3) | |
maxT = NULL | |
df = df[,!colnames(df) %in% colnames(demog)[-1]] | |
for(i in 1:nPerms){ | |
# Permute group membership, but respect ID groupings | |
perm.labs = sample(nrow(demog)) | |
perm.demog = data.frame(ID=demog$ID[perm.labs], demog[,2:ncol(demog)]) | |
perm.df = merge(df, perm.demog) | |
# Fit models and obtain max t-stat | |
tTable = ddply(perm.df, c("Tract", "Hemisphere"), fit_trk_model2) | |
maxT = c(maxT, max(abs(tTable$t))) | |
setTxtProgressBar(pb,i) | |
} | |
close(pb) | |
maxT | |
} | |
# Only correct for multiple comparisons across the tracts with a significant | |
# Point:Group interaction F-test. Set the corrected p-values of other tracts to 1 | |
Ftests = subset(models$anova, Term=='Point:Group') | |
sigFtests = which(Ftests$p.value<0.05) | |
sigF.Tract = Ftests$Tract[sigFtests] | |
sigF.Hemisphere = Ftests$Hemisphere[sigFtests] | |
sigF.trk_data = subset(trk_data, Tract==sigF.Tract & | |
Hemisphere==sigF.Hemisphere) | |
maxT = doPerms(sigF.trk_data, demog, nPerms) | |
# Execute in a terminal to utilize multiple cores and do lots of perms! | |
#library(multicore) | |
#doPerm <- function(iPerm, df, demog) { | |
# df = df[,!colnames(df) %in% colnames(demog)[-1]] | |
# perm.labs = sample(nrow(demog)) | |
# perm.demog = data.frame(ID=demog$ID[perm.labs], demog[,2:ncol(demog)]) | |
# perm.df = merge(df, perm.demog) | |
# tTable = ddply(perm.df, c("Tract", "Hemisphere"), fit_trk_model2) | |
# max(abs(tTable$t)) | |
#} | |
#maxT = as.numeric(mclapply(1:10000, doPerm, df=sigF.trk_data, demog=demog, mc.cores=7)) | |
#save(maxT, file=file.path(exptDir, 'maxT.Rdata')) | |
#load(file.path(exptDir, 'maxT.Rdata')) | |
# Function to adjust point:group p-values according to where a given t-stat lies | |
# along the max t-stat distribution | |
getP <- function(testT, maxT){ | |
# If you do 1000 permutations and the result from the real dataset is the | |
# most extreme, then the empirical p-value is 1/1001 | |
(1+length(maxT[abs(testT) < maxT]))/(length(maxT)+1) | |
} | |
maxT.no.na = maxT[is.na(maxT)==F] | |
crit = floor((1-thresh)*length(maxT.no.na)) | |
models$tTable$p.val.adj = sapply(models$tTable$t, getP, maxT.no.na) | |
models$tTable$p.val.adj[-as.numeric(row.names(subset(models$tTable, Tract==sigF.Tract & Hemisphere==sigF.Hemisphere)))] = 1 | |
# Add a Position column for easier plotting | |
models$tTable = ddply(models$tTable, c("Tract", "Hemisphere"), | |
transform, Position = (as.numeric(Point)-1) * 100/(max(as.numeric(Point))-1)) | |
################################################################################ | |
# Figures | |
######## | |
# Plot the distribution of the max t-stat across all points/tracts under | |
# the null hypothesis of no group effect | |
dev.new(width=4, height=3) | |
p1 = ggplot(data.frame(maxT.no.na), aes(x=maxT.no.na, y=..density..)) | |
p1 + geom_histogram(binwidth=0.1) + geom_density(color='blue') + geom_vline(xint = sort(maxT.no.na)[crit], color='red') + xlim(0,5) + ylim(0,1.75) + labs(x=expression(paste('Empirical max ', group('|', italic(t), '|'))), y='Density') | |
######## | |
# Plot # of streamlines by Group, tract, and hemisphere | |
p2 = ggplot(trk_props_long, aes(x=Group, y=Streamlines, fill=Group)) + geom_bar(stat='summary', fun.y=mean) + facet_grid(Tract~Hemisphere) | |
# Set colors | |
brew_fill = scale_fill_manual(values=rev(brewer.pal(2, 'Set1')[1:2])) | |
# Generate error bars (pointwise 95% CIs) | |
stderr <- function(x) sqrt(var(x)/length(x)) | |
get_bars <- function(y.in){ | |
data.frame(y=mean(y.in), ymax=mean(y.in)+1.96*stderr(y.in), ymin=mean(y.in)-1.96*stderr(y.in)) | |
} | |
error_bars = geom_errorbar(stat='summary', fun.data=get_bars, size=0.25, width=0.25) | |
# Set scales and coordinate systems | |
set_coords = c(scale_y_continuous(breaks=100*c(0:3)), scale_x_discrete('', breaks='')) | |
# Add points for individual subjects | |
jitter = position_jitter(width=0.2) | |
sub_pts = geom_point(fill=NA, position=jitter, alpha=0.3) | |
# Options | |
set_opts = ylab("Streamlines") | |
# Make final plot | |
dev.new(width=3.5, height=4.9) | |
p2 + set_coords + brew_fill + sub_pts + error_bars + set_opts | |
######## | |
# Plot FA vs. position, conditioned on hemisphere, tract, and group | |
brew_cols = scale_colour_manual(values=rev(brewer.pal(2, 'Set1')[1:2])) | |
p3 = qplot(Position, FA, group=ID, colour=Group, size=Streamlines, alpha=I(0.3), data=trk_data, facets = Tract~Hemisphere, geom='line', xlab='Position along tract (%)') + scale_size(to=c(0.25,3)) + brew_cols | |
# Add group means | |
means_smooth = stat_smooth(aes(ymax=..y..+1.96*..se.., ymin=..y..-1.96*..se.., group=Group), alpha=0.8, span=0.5) | |
means = stat_summary(fun.y=mean, geom='line', size=0.6, aes(group=Group)) | |
# Add an asterisk if there is a significant main group effect | |
anova_grp = subset(models$anova, Term=="Group") | |
Caption = rep('', nrow(anova_grp)) | |
Caption[anova_grp$p.value<thresh] = '*' | |
anova_grp = data.frame(anova_grp, Caption) | |
grp_effect = geom_text(aes(x=0, y=0.2, label=Caption, group=NULL, size=NULL), data=anova_grp, colour='black') | |
# If the F-test across the Point:Group terms in a panel is significant, plot a bar at the bottom to indicate which pointwise t-tests are significant | |
get_breaks <- function(df, thresh=0.05){ | |
sig = df$p.value < thresh | |
dsig = c(diff(sig), 0) | |
if(subset(models$anova, Term=="Point:Group" & Tract==df$Tract[1] & Hemisphere==df$Hemisphere[1])$p.value < thresh){ | |
onpts = df$Point[dsig==1]+0.5 | |
offpts = df$Point[dsig==-1]+0.5 | |
# Check for any unclosed segments | |
if(dsig[which(dsig != 0)[1]] == -1) { | |
onpts = c(1, onpts) | |
} | |
if(dsig[rev(which(dsig != 0))[1]] == 1) { | |
offpts = c(offpts, length(dsig)) | |
} | |
data.frame(on = (onpts-1) * 100/(length(df$Point)-1), | |
off = (offpts-1) * 100/(length(df$Point)-1)) | |
} else data.frame(on=0, off=0) | |
} | |
break_list = ddply(models$tTable, c("Tract", "Hemisphere"), get_breaks, thresh=thresh) | |
sig_bars = geom_segment(aes(x=on, y=0.2, xend=off, yend=0.2, group=NULL, size=NULL), data=break_list, colour='black') | |
# Make final plot | |
dev.new(width=7, height=5) | |
p3 + means_smooth + grp_effect + sig_bars | |
######## | |
# Plot p-values | |
# Generate a version of stats results with finer spacing for highlighting | |
# significant areas in green in p/t-value plots | |
lin_interp = function(x, spacing=0.01) { | |
approx(1:length(x), x, xout=seq(1,length(x), spacing))$y | |
} | |
models$tTable.interp = ddply(models$tTable, c("Tract", "Hemisphere"), colwise(lin_interp, c('Point','t.value', 'p.value', 'Position'))) | |
p4 = ggplot(data=models$tTable.interp, aes(x=Position, y=p.value, color=p.value < 0.05, group=1)) + geom_line() + facet_grid(facets=Tract~Hemisphere) + xlab('Position along tract (%)') + scale_color_manual('p.value < 0.05', values=c('red', 'green')) | |
# Gray out non-significant areas | |
grayout_p = annotate('rect', xmin=0, xmax=100, ymin=0.05, ymax=1, alpha=0.25) | |
# Make final plot | |
dev.new(width=7, height=5) | |
p4 + grayout_p + scale_y_log10(limits=c(0.001, 1), breaks=c(0.001, 0.01, 0.1, 1)) | |
######## | |
# Plot t-statistics | |
# Gray out non-significant areas | |
tcrits = ddply(models$tTable, c("Tract", "Hemisphere"), summarize, t.crit=qt(0.05/2, DF[1])) | |
grayout_t = geom_rect(aes(x=1, y=1, ymin=t.crit, ymax=-t.crit), color=NA, xmin=0, xmax=100, alpha=0.25, data=tcrits) | |
# Make final plot | |
dev.new(width=7, height=5) | |
p4 + grayout_t + aes(y=t.value, color=t.value < tcrits$t.crit | t.value > -tcrits$t.crit) + ylim(c(-4, 4)) + geom_hline(yint=0, linetype=3) | |
################################################################################ | |
# Output statistical results for import into MATLAB and overlay onto mean tract # geometry | |
write.table(models$tTable, file=file.path(exptDir, 'effects_table.txt'), quote=F, row.names=F) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
index febbe9e..ea586b5 100644 | |
--- a/Users/jcolby/Downloads/between_group 2/stats_along_tracts.R | |
+++ b/Users/jcolby/Documents/LONI/along-tract-stats/along-tract-stats.git/example/between_group/between_grp.R | |
@@ -1,7 +1,7 @@ | |
exptDir = '/Users/jcolby/Documents/LONI/along-tract-stats/along-tract-stats.git/example/between_group' | |
grpLabs = c('Control', 'FASD') | |
thresh = 0.05 | |
-nPerms = 1000 | |
+nPerms = 100 | |
library(nlme) # Mixed-effects models | |
library(ggplot2) # Plotting tools | |
@@ -11,18 +11,21 @@ library(RColorBrewer) # Color tables | |
################################################################################ | |
# Import and format data | |
# Read in demographics | |
-demog = read.table(file.path(exptDir, 'Demographics.txt'), row.names=1, header=T) | |
+demog = read.table(file.path(exptDir, 'Demographics.txt'), header=T) | |
demog$Group = factor(demog$Group, levels=rev(levels(demog$Group)), labels=grpLabs) | |
# Read in whole-track properties (ex: streamlines) and merge with demographics | |
trk_props_long = read.table(file.path(exptDir, 'trk_props_long.txt'), header=T) | |
-trk_props_long = data.frame(trk_props_long, demog[as.character(trk_props_long$ID),]) | |
+trk_props_long = merge(trk_props_long, demog) | |
# Read in length-parameterized track data (ex: FA) and merge with demographics | |
trk_data = read.table(file.path(exptDir, 'trk_data.txt'), header=T) | |
trk_data$Point = factor(trk_data$Point) | |
trk_data[trk_data==0] = NA | |
trk_data = merge(trk_data, trk_props_long) | |
+# Add a Position column for easier plotting | |
+trk_data = ddply(trk_data, c("Tract", "Hemisphere"), | |
+ transform, Position = (as.numeric(Point)-1) * 100/(max(as.numeric(Point))-1)) | |
################################################################################ | |
# Fit LME models | |
@@ -38,7 +41,10 @@ models$anova = ddply(trk_data, c("Tract", "Hemisphere"), fit_trk_model1) | |
fit_trk_model2 <- function(df){ | |
lme.trk = tryCatch(lme(FA ~ Point/Group - 1, data=df, random = ~ 1 | ID, na.action=na.omit), error = function(e) data.frame()) | |
if(length(lme.trk)!=0){ | |
- data.frame(Point = as.numeric(levels(trk_data$Point)), summary(lme.trk)$tTable[-(1:length(levels(trk_data$Point))),]) | |
+ term.RE = paste('Point[0-9]+:', 'Group', '.+', sep='') | |
+ term.rows = grep(term.RE, row.names(summary(lme.trk)$tTable)) | |
+ data.frame(Point = as.numeric(levels(factor(df$Point))), | |
+ summary(lme.trk)$tTable[term.rows,]) | |
} else data.frame() | |
} | |
models$tTable = ddply(trk_data, c("Tract", "Hemisphere"), fit_trk_model2) | |
@@ -46,25 +52,24 @@ models$tTable = ddply(trk_data, c("Tract", "Hemisphere"), fit_trk_model2) | |
# Add weights=varFunc(~I(1/SD^2)) to do weighted fitting | |
-# Model number of streamlines, just for fun | |
+# Model number of streamlines | |
summary(lm(Streamlines ~ Group + Hemisphere + Tract, data=trk_props_long)) | |
################################################################################ | |
# Permutation testing | |
# Function to permute groups and return max t-stat distribution | |
-doPerms <- function(df, nPerms=nPerms){ | |
+doPerms <- function(df, demog, nPerms=100){ | |
cat('Permuting data:\n') | |
- pb = txtProgressBar(1,nPerms,1, style=3) | |
- maxT = NULL | |
- permGrp = df[duplicated(df$ID)==F,] | |
- row.names(permGrp) = permGrp$ID | |
+ pb = txtProgressBar(1,nPerms,1, style=3) | |
+ maxT = NULL | |
+ df = df[,!colnames(df) %in% colnames(demog)[-1]] | |
for(i in 1:nPerms){ | |
# Permute group membership, but respect ID groupings | |
- permGrp$Group = sample(permGrp$Group) | |
- row.names(permGrp) = permGrp$ID | |
- df$Group = factor(permGrp[as.character(df$ID),]$Group) | |
+ perm.labs = sample(nrow(demog)) | |
+ perm.demog = data.frame(ID=demog$ID[perm.labs], demog[,2:ncol(demog)]) | |
+ perm.df = merge(df, perm.demog) | |
# Fit models and obtain max t-stat | |
- tTable = ddply(df, c("Tract", "Hemisphere"), fit_trk_model2) | |
+ tTable = ddply(perm.df, c("Tract", "Hemisphere"), fit_trk_model2) | |
maxT = c(maxT, max(abs(tTable$t))) | |
setTxtProgressBar(pb,i) | |
} | |
@@ -72,31 +77,34 @@ doPerms <- function(df, nPerms=nPerms){ | |
maxT | |
} | |
-# Only correct for multiple comparisons across the tracts with a significant Point:Group interaction F-test | |
+# Only correct for multiple comparisons across the tracts with a significant | |
+# Point:Group interaction F-test. Set the corrected p-values of other tracts to 1 | |
Ftests = subset(models$anova, Term=='Point:Group') | |
sigFtests = which(Ftests$p.value<0.05) | |
sigF.Tract = Ftests$Tract[sigFtests] | |
sigF.Hemisphere = Ftests$Hemisphere[sigFtests] | |
-sigF.trk_data = subset(trk_data, Tract==sigF.Tract & Hemisphere==sigF.Hemisphere) | |
+sigF.trk_data = subset(trk_data, Tract==sigF.Tract & | |
+ Hemisphere==sigF.Hemisphere) | |
-maxT = doPerms(sigF.trk_data, 100) | |
+maxT = doPerms(sigF.trk_data, demog, nPerms) | |
# Execute in a terminal to utilize multiple cores and do lots of perms! | |
#library(multicore) | |
-#doPerm <- function(iPerm, df) { | |
-# permGrp = df[duplicated(df$ID)==F,] | |
-# permGrp$Group = sample(permGrp$Group) | |
-# df$Group = factor(permGrp[as.character(df$ID),]$Group) | |
-# tTable = ddply(df, c("Tract", "Hemisphere"), fit_trk_model2) | |
+#doPerm <- function(iPerm, df, demog) { | |
+# df = df[,!colnames(df) %in% colnames(demog)[-1]] | |
+# perm.labs = sample(nrow(demog)) | |
+# perm.demog = data.frame(ID=demog$ID[perm.labs], demog[,2:ncol(demog)]) | |
+# perm.df = merge(df, perm.demog) | |
+# tTable = ddply(perm.df, c("Tract", "Hemisphere"), fit_trk_model2) | |
# max(abs(tTable$t)) | |
#} | |
-#maxT = mclapply(1:10000, doPerm, df=trk_data, mc.cores=7) | |
+#maxT = as.numeric(mclapply(1:10000, doPerm, df=sigF.trk_data, demog=demog, mc.cores=7)) | |
#save(maxT, file=file.path(exptDir, 'maxT.Rdata')) | |
#load(file.path(exptDir, 'maxT.Rdata')) | |
-# Function to adjust p-values according to where a given t-stat lies along the | |
-# max t-stat distribution | |
+# Function to adjust point:group p-values according to where a given t-stat lies | |
+# along the max t-stat distribution | |
getP <- function(testT, maxT){ | |
# If you do 1000 permutations and the result from the real dataset is the | |
# most extreme, then the empirical p-value is 1/1001 | |
@@ -107,6 +115,10 @@ crit = floor((1-thresh)*length(maxT.no.na)) | |
models$tTable$p.val.adj = sapply(models$tTable$t, getP, maxT.no.na) | |
models$tTable$p.val.adj[-as.numeric(row.names(subset(models$tTable, Tract==sigF.Tract & Hemisphere==sigF.Hemisphere)))] = 1 | |
+# Add a Position column for easier plotting | |
+models$tTable = ddply(models$tTable, c("Tract", "Hemisphere"), | |
+ transform, Position = (as.numeric(Point)-1) * 100/(max(as.numeric(Point))-1)) | |
+ | |
################################################################################ | |
# Figures | |
######## | |
@@ -146,9 +158,8 @@ p2 + set_coords + brew_fill + sub_pts + error_bars + set_opts | |
######## | |
# Plot FA vs. position, conditioned on hemisphere, tract, and group | |
-scale = 100/(max(as.numeric(trk_data$Point))-1) | |
brew_cols = scale_colour_manual(values=rev(brewer.pal(2, 'Set1')[1:2])) | |
-p3 = qplot((as.numeric(Point)-1)*scale, FA, group=ID, colour=Group, size=Streamlines, alpha=I(0.3), data=trk_data, facets = Tract~Hemisphere, geom='line', xlab='Position along tract (%)') + scale_size(to=c(0.25,3)) + brew_cols | |
+p3 = qplot(Position, FA, group=ID, colour=Group, size=Streamlines, alpha=I(0.3), data=trk_data, facets = Tract~Hemisphere, geom='line', xlab='Position along tract (%)') + scale_size(to=c(0.25,3)) + brew_cols | |
# Add group means | |
means_smooth = stat_smooth(aes(ymax=..y..+1.96*..se.., ymin=..y..-1.96*..se.., group=Group), alpha=0.8, span=0.5) | |
@@ -164,17 +175,58 @@ grp_effect = geom_text(aes(x=0, y=0.2, label=Caption, group=NULL, size=NULL), da | |
# If the F-test across the Point:Group terms in a panel is significant, plot a bar at the bottom to indicate which pointwise t-tests are significant | |
get_breaks <- function(df, thresh=0.05){ | |
sig = df$p.value < thresh | |
- dsig = diff(sig) | |
+ dsig = c(diff(sig), 0) | |
if(subset(models$anova, Term=="Point:Group" & Tract==df$Tract[1] & Hemisphere==df$Hemisphere[1])$p.value < thresh){ | |
- data.frame(on = df$Point[dsig==1]+0.5, off = df$Point[dsig==-1]+0.5) | |
- } else data.frame() | |
+ onpts = df$Point[dsig==1]+0.5 | |
+ offpts = df$Point[dsig==-1]+0.5 | |
+ | |
+ # Check for any unclosed segments | |
+ if(dsig[which(dsig != 0)[1]] == -1) { | |
+ onpts = c(1, onpts) | |
+ } | |
+ if(dsig[rev(which(dsig != 0))[1]] == 1) { | |
+ offpts = c(offpts, length(dsig)) | |
+ } | |
+ | |
+ data.frame(on = (onpts-1) * 100/(length(df$Point)-1), | |
+ off = (offpts-1) * 100/(length(df$Point)-1)) | |
+ } else data.frame(on=0, off=0) | |
} | |
break_list = ddply(models$tTable, c("Tract", "Hemisphere"), get_breaks, thresh=thresh) | |
-sig_bars = geom_segment(aes(x = (on-1)*scale, y=0.2, xend=(off-1)*scale, yend=0.2, group=NULL, size=NULL), data=break_list, colour='black') | |
+sig_bars = geom_segment(aes(x=on, y=0.2, xend=off, yend=0.2, group=NULL, size=NULL), data=break_list, colour='black') | |
# Make final plot | |
dev.new(width=7, height=5) | |
p3 + means_smooth + grp_effect + sig_bars | |
+######## | |
+# Plot p-values | |
+# Generate a version of stats results with finer spacing for highlighting | |
+# significant areas in green in p/t-value plots | |
+lin_interp = function(x, spacing=0.01) { | |
+ approx(1:length(x), x, xout=seq(1,length(x), spacing))$y | |
+} | |
+models$tTable.interp = ddply(models$tTable, c("Tract", "Hemisphere"), colwise(lin_interp, c('Point','t.value', 'p.value', 'Position'))) | |
+ | |
+p4 = ggplot(data=models$tTable.interp, aes(x=Position, y=p.value, color=p.value < 0.05, group=1)) + geom_line() + facet_grid(facets=Tract~Hemisphere) + xlab('Position along tract (%)') + scale_color_manual('p.value < 0.05', values=c('red', 'green')) | |
+ | |
+# Gray out non-significant areas | |
+grayout_p = annotate('rect', xmin=0, xmax=100, ymin=0.05, ymax=1, alpha=0.25) | |
+ | |
+# Make final plot | |
+dev.new(width=7, height=5) | |
+p4 + grayout_p + scale_y_log10(limits=c(0.001, 1), breaks=c(0.001, 0.01, 0.1, 1)) | |
+ | |
+######## | |
+# Plot t-statistics | |
+# Gray out non-significant areas | |
+tcrits = ddply(models$tTable, c("Tract", "Hemisphere"), summarize, t.crit=qt(0.05/2, DF[1])) | |
+grayout_t = geom_rect(aes(x=1, y=1, ymin=t.crit, ymax=-t.crit), color=NA, xmin=0, xmax=100, alpha=0.25, data=tcrits) | |
+ | |
+# Make final plot | |
+dev.new(width=7, height=5) | |
+p4 + grayout_t + aes(y=t.value, color=t.value < tcrits$t.crit | t.value > -tcrits$t.crit) + ylim(c(-4, 4)) + geom_hline(yint=0, linetype=3) | |
+ | |
+################################################################################ | |
# Output statistical results for import into MATLAB and overlay onto mean tract # geometry | |
write.table(models$tTable, file=file.path(exptDir, 'effects_table.txt'), quote=F, row.names=F) | |
\ No newline at end of file |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
exptDir = '/Users/jcolby/Documents/LONI/along-tract-stats/along-tract-stats.git/example/between_group' | |
grpLabs = c('Control', 'FASD') | |
thresh = 0.05 | |
nPerms = 1000 | |
library(nlme) # Mixed-effects models | |
library(ggplot2) # Plotting tools | |
library(plyr) # Data manipulation | |
library(RColorBrewer) # Color tables | |
################################################################################ | |
# Import and format data | |
# Read in demographics | |
demog = read.table(file.path(exptDir, 'Demographics.txt'), row.names=1, header=T) | |
demog$Group = factor(demog$Group, levels=rev(levels(demog$Group)), labels=grpLabs) | |
# Read in whole-track properties (ex: streamlines) and merge with demographics | |
trk_props_long = read.table(file.path(exptDir, 'trk_props_long.txt'), header=T) | |
trk_props_long = data.frame(trk_props_long, demog[as.character(trk_props_long$ID),]) | |
# Read in length-parameterized track data (ex: FA) and merge with demographics | |
trk_data = read.table(file.path(exptDir, 'trk_data.txt'), header=T) | |
trk_data$Point = factor(trk_data$Point) | |
trk_data[trk_data==0] = NA | |
trk_data = merge(trk_data, trk_props_long) | |
################################################################################ | |
# Fit LME models | |
# Overall ANOVA for Group and Point:Group effects | |
fit_trk_model1 <- function(df){ | |
lme.trk = lme(FA ~ Point*Group, data=df, random = ~ 1 | ID, na.action=na.omit) | |
data.frame(Term = rownames(anova(lme.trk)), anova(lme.trk)) | |
} | |
models = list() | |
models$anova = ddply(trk_data, c("Tract", "Hemisphere"), fit_trk_model1) | |
# Fit a cell-means version to get effect sizes relative to controls | |
fit_trk_model2 <- function(df){ | |
lme.trk = tryCatch(lme(FA ~ Point/Group - 1, data=df, random = ~ 1 | ID, na.action=na.omit), error = function(e) data.frame()) | |
if(length(lme.trk)!=0){ | |
data.frame(Point = as.numeric(levels(trk_data$Point)), summary(lme.trk)$tTable[-(1:length(levels(trk_data$Point))),]) | |
} else data.frame() | |
} | |
models$tTable = ddply(trk_data, c("Tract", "Hemisphere"), fit_trk_model2) | |
# Extract a subset like this: subset(models$tTable, Tract=='ILF' & Hemisphere=='L') | |
# Add weights=varFunc(~I(1/SD^2)) to do weighted fitting | |
# Model number of streamlines, just for fun | |
summary(lm(Streamlines ~ Group + Hemisphere + Tract, data=trk_props_long)) | |
################################################################################ | |
# Permutation testing | |
# Function to permute groups and return max t-stat distribution | |
doPerms <- function(df, nPerms=nPerms){ | |
cat('Permuting data:\n') | |
pb = txtProgressBar(1,nPerms,1, style=3) | |
maxT = NULL | |
permGrp = df[duplicated(df$ID)==F,] | |
row.names(permGrp) = permGrp$ID | |
for(i in 1:nPerms){ | |
# Permute group membership, but respect ID groupings | |
permGrp$Group = sample(permGrp$Group) | |
row.names(permGrp) = permGrp$ID | |
df$Group = factor(permGrp[as.character(df$ID),]$Group) | |
# Fit models and obtain max t-stat | |
tTable = ddply(df, c("Tract", "Hemisphere"), fit_trk_model2) | |
maxT = c(maxT, max(abs(tTable$t))) | |
setTxtProgressBar(pb,i) | |
} | |
close(pb) | |
maxT | |
} | |
# Only correct for multiple comparisons across the tracts with a significant Point:Group interaction F-test | |
Ftests = subset(models$anova, Term=='Point:Group') | |
sigFtests = which(Ftests$p.value<0.05) | |
sigF.Tract = Ftests$Tract[sigFtests] | |
sigF.Hemisphere = Ftests$Hemisphere[sigFtests] | |
sigF.trk_data = subset(trk_data, Tract==sigF.Tract & Hemisphere==sigF.Hemisphere) | |
maxT = doPerms(sigF.trk_data, 100) | |
# Execute in a terminal to utilize multiple cores and do lots of perms! | |
#library(multicore) | |
#doPerm <- function(iPerm, df) { | |
# permGrp = df[duplicated(df$ID)==F,] | |
# permGrp$Group = sample(permGrp$Group) | |
# df$Group = factor(permGrp[as.character(df$ID),]$Group) | |
# tTable = ddply(df, c("Tract", "Hemisphere"), fit_trk_model2) | |
# max(abs(tTable$t)) | |
#} | |
#maxT = mclapply(1:10000, doPerm, df=trk_data, mc.cores=7) | |
#save(maxT, file=file.path(exptDir, 'maxT.Rdata')) | |
#load(file.path(exptDir, 'maxT.Rdata')) | |
# Function to adjust p-values according to where a given t-stat lies along the | |
# max t-stat distribution | |
getP <- function(testT, maxT){ | |
# If you do 1000 permutations and the result from the real dataset is the | |
# most extreme, then the empirical p-value is 1/1001 | |
(1+length(maxT[abs(testT) < maxT]))/(length(maxT)+1) | |
} | |
maxT.no.na = maxT[is.na(maxT)==F] | |
crit = floor((1-thresh)*length(maxT.no.na)) | |
models$tTable$p.val.adj = sapply(models$tTable$t, getP, maxT.no.na) | |
models$tTable$p.val.adj[-as.numeric(row.names(subset(models$tTable, Tract==sigF.Tract & Hemisphere==sigF.Hemisphere)))] = 1 | |
################################################################################ | |
# Figures | |
######## | |
# Plot the distribution of the max t-stat across all points/tracts under | |
# the null hypothesis of no group effect | |
dev.new(width=4, height=3) | |
p1 = ggplot(data.frame(maxT.no.na), aes(x=maxT.no.na, y=..density..)) | |
p1 + geom_histogram(binwidth=0.1) + geom_density(color='blue') + geom_vline(xint = sort(maxT.no.na)[crit], color='red') + xlim(0,5) + ylim(0,1.75) + labs(x=expression(paste('Empirical max ', group('|', italic(t), '|'))), y='Density') | |
######## | |
# Plot # of streamlines by Group, tract, and hemisphere | |
p2 = ggplot(trk_props_long, aes(x=Group, y=Streamlines, fill=Group)) + geom_bar(stat='summary', fun.y=mean) + facet_grid(Tract~Hemisphere) | |
# Set colors | |
brew_fill = scale_fill_manual(values=rev(brewer.pal(2, 'Set1')[1:2])) | |
# Generate error bars (pointwise 95% CIs) | |
stderr <- function(x) sqrt(var(x)/length(x)) | |
get_bars <- function(y.in){ | |
data.frame(y=mean(y.in), ymax=mean(y.in)+1.96*stderr(y.in), ymin=mean(y.in)-1.96*stderr(y.in)) | |
} | |
error_bars = geom_errorbar(stat='summary', fun.data=get_bars, size=0.25, width=0.25) | |
# Set scales and coordinate systems | |
set_coords = c(scale_y_continuous(breaks=100*c(0:3)), scale_x_discrete('', breaks='')) | |
# Add points for individual subjects | |
jitter = position_jitter(width=0.2) | |
sub_pts = geom_point(fill=NA, position=jitter, alpha=0.3) | |
# Options | |
set_opts = ylab("Streamlines") | |
# Make final plot | |
dev.new(width=3.5, height=4.9) | |
p2 + set_coords + brew_fill + sub_pts + error_bars + set_opts | |
######## | |
# Plot FA vs. position, conditioned on hemisphere, tract, and group | |
scale = 100/(max(as.numeric(trk_data$Point))-1) | |
brew_cols = scale_colour_manual(values=rev(brewer.pal(2, 'Set1')[1:2])) | |
p3 = qplot((as.numeric(Point)-1)*scale, FA, group=ID, colour=Group, size=Streamlines, alpha=I(0.3), data=trk_data, facets = Tract~Hemisphere, geom='line', xlab='Position along tract (%)') + scale_size(to=c(0.25,3)) + brew_cols | |
# Add group means | |
means_smooth = stat_smooth(aes(ymax=..y..+1.96*..se.., ymin=..y..-1.96*..se.., group=Group), alpha=0.8, span=0.5) | |
means = stat_summary(fun.y=mean, geom='line', size=0.6, aes(group=Group)) | |
# Add an asterisk if there is a significant main group effect | |
anova_grp = subset(models$anova, Term=="Group") | |
Caption = rep('', nrow(anova_grp)) | |
Caption[anova_grp$p.value<thresh] = '*' | |
anova_grp = data.frame(anova_grp, Caption) | |
grp_effect = geom_text(aes(x=0, y=0.2, label=Caption, group=NULL, size=NULL), data=anova_grp, colour='black') | |
# If the F-test across the Point:Group terms in a panel is significant, plot a bar at the bottom to indicate which pointwise t-tests are significant | |
get_breaks <- function(df, thresh=0.05){ | |
sig = df$p.value < thresh | |
dsig = diff(sig) | |
if(subset(models$anova, Term=="Point:Group" & Tract==df$Tract[1] & Hemisphere==df$Hemisphere[1])$p.value < thresh){ | |
data.frame(on = df$Point[dsig==1]+0.5, off = df$Point[dsig==-1]+0.5) | |
} else data.frame() | |
} | |
break_list = ddply(models$tTable, c("Tract", "Hemisphere"), get_breaks, thresh=thresh) | |
sig_bars = geom_segment(aes(x = (on-1)*scale, y=0.2, xend=(off-1)*scale, yend=0.2, group=NULL, size=NULL), data=break_list, colour='black') | |
# Make final plot | |
dev.new(width=7, height=5) | |
p3 + means_smooth + grp_effect + sig_bars | |
# Output statistical results for import into MATLAB and overlay onto mean tract # geometry | |
write.table(models$tTable, file=file.path(exptDir, 'effects_table.txt'), quote=F, row.names=F) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Improved robustness of significance bar plotting @ line 175 of new version (between_grp.R)