Skip to content

Instantly share code, notes, and snippets.

@johncolby
Created April 18, 2011 20:35
Show Gist options
  • Save johncolby/926103 to your computer and use it in GitHub Desktop.
Save johncolby/926103 to your computer and use it in GitHub Desktop.
Updates to between group stats code
exptDir = '/Users/jcolby/Documents/LONI/along-tract-stats/along-tract-stats.git/example/between_group'
grpLabs = c('Control', 'FASD')
thresh = 0.05
nPerms = 100
library(nlme) # Mixed-effects models
library(ggplot2) # Plotting tools
library(plyr) # Data manipulation
library(RColorBrewer) # Color tables
################################################################################
# Import and format data
# Read in demographics
demog = read.table(file.path(exptDir, 'Demographics.txt'), header=T)
demog$Group = factor(demog$Group, levels=rev(levels(demog$Group)), labels=grpLabs)
# Read in whole-track properties (ex: streamlines) and merge with demographics
trk_props_long = read.table(file.path(exptDir, 'trk_props_long.txt'), header=T)
trk_props_long = merge(trk_props_long, demog)
# Read in length-parameterized track data (ex: FA) and merge with demographics
trk_data = read.table(file.path(exptDir, 'trk_data.txt'), header=T)
trk_data$Point = factor(trk_data$Point)
trk_data[trk_data==0] = NA
trk_data = merge(trk_data, trk_props_long)
# Add a Position column for easier plotting
trk_data = ddply(trk_data, c("Tract", "Hemisphere"),
transform, Position = (as.numeric(Point)-1) * 100/(max(as.numeric(Point))-1))
################################################################################
# Fit LME models
# Overall ANOVA for Group and Point:Group effects
fit_trk_model1 <- function(df){
lme.trk = lme(FA ~ Point*Group, data=df, random = ~ 1 | ID, na.action=na.omit)
data.frame(Term = rownames(anova(lme.trk)), anova(lme.trk))
}
models = list()
models$anova = ddply(trk_data, c("Tract", "Hemisphere"), fit_trk_model1)
# Fit a cell-means version to get effect sizes relative to controls
fit_trk_model2 <- function(df){
lme.trk = tryCatch(lme(FA ~ Point/Group - 1, data=df, random = ~ 1 | ID, na.action=na.omit), error = function(e) data.frame())
if(length(lme.trk)!=0){
term.RE = paste('Point[0-9]+:', 'Group', '.+', sep='')
term.rows = grep(term.RE, row.names(summary(lme.trk)$tTable))
data.frame(Point = as.numeric(levels(factor(df$Point))),
summary(lme.trk)$tTable[term.rows,])
} else data.frame()
}
models$tTable = ddply(trk_data, c("Tract", "Hemisphere"), fit_trk_model2)
# Extract a subset like this: subset(models$tTable, Tract=='ILF' & Hemisphere=='L')
# Add weights=varFunc(~I(1/SD^2)) to do weighted fitting
# Model number of streamlines
summary(lm(Streamlines ~ Group + Hemisphere + Tract, data=trk_props_long))
################################################################################
# Permutation testing
# Function to permute groups and return max t-stat distribution
doPerms <- function(df, demog, nPerms=100){
cat('Permuting data:\n')
pb = txtProgressBar(1,nPerms,1, style=3)
maxT = NULL
df = df[,!colnames(df) %in% colnames(demog)[-1]]
for(i in 1:nPerms){
# Permute group membership, but respect ID groupings
perm.labs = sample(nrow(demog))
perm.demog = data.frame(ID=demog$ID[perm.labs], demog[,2:ncol(demog)])
perm.df = merge(df, perm.demog)
# Fit models and obtain max t-stat
tTable = ddply(perm.df, c("Tract", "Hemisphere"), fit_trk_model2)
maxT = c(maxT, max(abs(tTable$t)))
setTxtProgressBar(pb,i)
}
close(pb)
maxT
}
# Only correct for multiple comparisons across the tracts with a significant
# Point:Group interaction F-test. Set the corrected p-values of other tracts to 1
Ftests = subset(models$anova, Term=='Point:Group')
sigFtests = which(Ftests$p.value<0.05)
sigF.Tract = Ftests$Tract[sigFtests]
sigF.Hemisphere = Ftests$Hemisphere[sigFtests]
sigF.trk_data = subset(trk_data, Tract==sigF.Tract &
Hemisphere==sigF.Hemisphere)
maxT = doPerms(sigF.trk_data, demog, nPerms)
# Execute in a terminal to utilize multiple cores and do lots of perms!
#library(multicore)
#doPerm <- function(iPerm, df, demog) {
# df = df[,!colnames(df) %in% colnames(demog)[-1]]
# perm.labs = sample(nrow(demog))
# perm.demog = data.frame(ID=demog$ID[perm.labs], demog[,2:ncol(demog)])
# perm.df = merge(df, perm.demog)
# tTable = ddply(perm.df, c("Tract", "Hemisphere"), fit_trk_model2)
# max(abs(tTable$t))
#}
#maxT = as.numeric(mclapply(1:10000, doPerm, df=sigF.trk_data, demog=demog, mc.cores=7))
#save(maxT, file=file.path(exptDir, 'maxT.Rdata'))
#load(file.path(exptDir, 'maxT.Rdata'))
# Function to adjust point:group p-values according to where a given t-stat lies
# along the max t-stat distribution
getP <- function(testT, maxT){
# If you do 1000 permutations and the result from the real dataset is the
# most extreme, then the empirical p-value is 1/1001
(1+length(maxT[abs(testT) < maxT]))/(length(maxT)+1)
}
maxT.no.na = maxT[is.na(maxT)==F]
crit = floor((1-thresh)*length(maxT.no.na))
models$tTable$p.val.adj = sapply(models$tTable$t, getP, maxT.no.na)
models$tTable$p.val.adj[-as.numeric(row.names(subset(models$tTable, Tract==sigF.Tract & Hemisphere==sigF.Hemisphere)))] = 1
# Add a Position column for easier plotting
models$tTable = ddply(models$tTable, c("Tract", "Hemisphere"),
transform, Position = (as.numeric(Point)-1) * 100/(max(as.numeric(Point))-1))
################################################################################
# Figures
########
# Plot the distribution of the max t-stat across all points/tracts under
# the null hypothesis of no group effect
dev.new(width=4, height=3)
p1 = ggplot(data.frame(maxT.no.na), aes(x=maxT.no.na, y=..density..))
p1 + geom_histogram(binwidth=0.1) + geom_density(color='blue') + geom_vline(xint = sort(maxT.no.na)[crit], color='red') + xlim(0,5) + ylim(0,1.75) + labs(x=expression(paste('Empirical max ', group('|', italic(t), '|'))), y='Density')
########
# Plot # of streamlines by Group, tract, and hemisphere
p2 = ggplot(trk_props_long, aes(x=Group, y=Streamlines, fill=Group)) + geom_bar(stat='summary', fun.y=mean) + facet_grid(Tract~Hemisphere)
# Set colors
brew_fill = scale_fill_manual(values=rev(brewer.pal(2, 'Set1')[1:2]))
# Generate error bars (pointwise 95% CIs)
stderr <- function(x) sqrt(var(x)/length(x))
get_bars <- function(y.in){
data.frame(y=mean(y.in), ymax=mean(y.in)+1.96*stderr(y.in), ymin=mean(y.in)-1.96*stderr(y.in))
}
error_bars = geom_errorbar(stat='summary', fun.data=get_bars, size=0.25, width=0.25)
# Set scales and coordinate systems
set_coords = c(scale_y_continuous(breaks=100*c(0:3)), scale_x_discrete('', breaks=''))
# Add points for individual subjects
jitter = position_jitter(width=0.2)
sub_pts = geom_point(fill=NA, position=jitter, alpha=0.3)
# Options
set_opts = ylab("Streamlines")
# Make final plot
dev.new(width=3.5, height=4.9)
p2 + set_coords + brew_fill + sub_pts + error_bars + set_opts
########
# Plot FA vs. position, conditioned on hemisphere, tract, and group
brew_cols = scale_colour_manual(values=rev(brewer.pal(2, 'Set1')[1:2]))
p3 = qplot(Position, FA, group=ID, colour=Group, size=Streamlines, alpha=I(0.3), data=trk_data, facets = Tract~Hemisphere, geom='line', xlab='Position along tract (%)') + scale_size(to=c(0.25,3)) + brew_cols
# Add group means
means_smooth = stat_smooth(aes(ymax=..y..+1.96*..se.., ymin=..y..-1.96*..se.., group=Group), alpha=0.8, span=0.5)
means = stat_summary(fun.y=mean, geom='line', size=0.6, aes(group=Group))
# Add an asterisk if there is a significant main group effect
anova_grp = subset(models$anova, Term=="Group")
Caption = rep('', nrow(anova_grp))
Caption[anova_grp$p.value<thresh] = '*'
anova_grp = data.frame(anova_grp, Caption)
grp_effect = geom_text(aes(x=0, y=0.2, label=Caption, group=NULL, size=NULL), data=anova_grp, colour='black')
# If the F-test across the Point:Group terms in a panel is significant, plot a bar at the bottom to indicate which pointwise t-tests are significant
get_breaks <- function(df, thresh=0.05){
sig = df$p.value < thresh
dsig = c(diff(sig), 0)
if(subset(models$anova, Term=="Point:Group" & Tract==df$Tract[1] & Hemisphere==df$Hemisphere[1])$p.value < thresh){
onpts = df$Point[dsig==1]+0.5
offpts = df$Point[dsig==-1]+0.5
# Check for any unclosed segments
if(dsig[which(dsig != 0)[1]] == -1) {
onpts = c(1, onpts)
}
if(dsig[rev(which(dsig != 0))[1]] == 1) {
offpts = c(offpts, length(dsig))
}
data.frame(on = (onpts-1) * 100/(length(df$Point)-1),
off = (offpts-1) * 100/(length(df$Point)-1))
} else data.frame(on=0, off=0)
}
break_list = ddply(models$tTable, c("Tract", "Hemisphere"), get_breaks, thresh=thresh)
sig_bars = geom_segment(aes(x=on, y=0.2, xend=off, yend=0.2, group=NULL, size=NULL), data=break_list, colour='black')
# Make final plot
dev.new(width=7, height=5)
p3 + means_smooth + grp_effect + sig_bars
########
# Plot p-values
# Generate a version of stats results with finer spacing for highlighting
# significant areas in green in p/t-value plots
lin_interp = function(x, spacing=0.01) {
approx(1:length(x), x, xout=seq(1,length(x), spacing))$y
}
models$tTable.interp = ddply(models$tTable, c("Tract", "Hemisphere"), colwise(lin_interp, c('Point','t.value', 'p.value', 'Position')))
p4 = ggplot(data=models$tTable.interp, aes(x=Position, y=p.value, color=p.value < 0.05, group=1)) + geom_line() + facet_grid(facets=Tract~Hemisphere) + xlab('Position along tract (%)') + scale_color_manual('p.value < 0.05', values=c('red', 'green'))
# Gray out non-significant areas
grayout_p = annotate('rect', xmin=0, xmax=100, ymin=0.05, ymax=1, alpha=0.25)
# Make final plot
dev.new(width=7, height=5)
p4 + grayout_p + scale_y_log10(limits=c(0.001, 1), breaks=c(0.001, 0.01, 0.1, 1))
########
# Plot t-statistics
# Gray out non-significant areas
tcrits = ddply(models$tTable, c("Tract", "Hemisphere"), summarize, t.crit=qt(0.05/2, DF[1]))
grayout_t = geom_rect(aes(x=1, y=1, ymin=t.crit, ymax=-t.crit), color=NA, xmin=0, xmax=100, alpha=0.25, data=tcrits)
# Make final plot
dev.new(width=7, height=5)
p4 + grayout_t + aes(y=t.value, color=t.value < tcrits$t.crit | t.value > -tcrits$t.crit) + ylim(c(-4, 4)) + geom_hline(yint=0, linetype=3)
################################################################################
# Output statistical results for import into MATLAB and overlay onto mean tract # geometry
write.table(models$tTable, file=file.path(exptDir, 'effects_table.txt'), quote=F, row.names=F)
index febbe9e..ea586b5 100644
--- a/Users/jcolby/Downloads/between_group 2/stats_along_tracts.R
+++ b/Users/jcolby/Documents/LONI/along-tract-stats/along-tract-stats.git/example/between_group/between_grp.R
@@ -1,7 +1,7 @@
exptDir = '/Users/jcolby/Documents/LONI/along-tract-stats/along-tract-stats.git/example/between_group'
grpLabs = c('Control', 'FASD')
thresh = 0.05
-nPerms = 1000
+nPerms = 100
library(nlme) # Mixed-effects models
library(ggplot2) # Plotting tools
@@ -11,18 +11,21 @@ library(RColorBrewer) # Color tables
################################################################################
# Import and format data
# Read in demographics
-demog = read.table(file.path(exptDir, 'Demographics.txt'), row.names=1, header=T)
+demog = read.table(file.path(exptDir, 'Demographics.txt'), header=T)
demog$Group = factor(demog$Group, levels=rev(levels(demog$Group)), labels=grpLabs)
# Read in whole-track properties (ex: streamlines) and merge with demographics
trk_props_long = read.table(file.path(exptDir, 'trk_props_long.txt'), header=T)
-trk_props_long = data.frame(trk_props_long, demog[as.character(trk_props_long$ID),])
+trk_props_long = merge(trk_props_long, demog)
# Read in length-parameterized track data (ex: FA) and merge with demographics
trk_data = read.table(file.path(exptDir, 'trk_data.txt'), header=T)
trk_data$Point = factor(trk_data$Point)
trk_data[trk_data==0] = NA
trk_data = merge(trk_data, trk_props_long)
+# Add a Position column for easier plotting
+trk_data = ddply(trk_data, c("Tract", "Hemisphere"),
+ transform, Position = (as.numeric(Point)-1) * 100/(max(as.numeric(Point))-1))
################################################################################
# Fit LME models
@@ -38,7 +41,10 @@ models$anova = ddply(trk_data, c("Tract", "Hemisphere"), fit_trk_model1)
fit_trk_model2 <- function(df){
lme.trk = tryCatch(lme(FA ~ Point/Group - 1, data=df, random = ~ 1 | ID, na.action=na.omit), error = function(e) data.frame())
if(length(lme.trk)!=0){
- data.frame(Point = as.numeric(levels(trk_data$Point)), summary(lme.trk)$tTable[-(1:length(levels(trk_data$Point))),])
+ term.RE = paste('Point[0-9]+:', 'Group', '.+', sep='')
+ term.rows = grep(term.RE, row.names(summary(lme.trk)$tTable))
+ data.frame(Point = as.numeric(levels(factor(df$Point))),
+ summary(lme.trk)$tTable[term.rows,])
} else data.frame()
}
models$tTable = ddply(trk_data, c("Tract", "Hemisphere"), fit_trk_model2)
@@ -46,25 +52,24 @@ models$tTable = ddply(trk_data, c("Tract", "Hemisphere"), fit_trk_model2)
# Add weights=varFunc(~I(1/SD^2)) to do weighted fitting
-# Model number of streamlines, just for fun
+# Model number of streamlines
summary(lm(Streamlines ~ Group + Hemisphere + Tract, data=trk_props_long))
################################################################################
# Permutation testing
# Function to permute groups and return max t-stat distribution
-doPerms <- function(df, nPerms=nPerms){
+doPerms <- function(df, demog, nPerms=100){
cat('Permuting data:\n')
- pb = txtProgressBar(1,nPerms,1, style=3)
- maxT = NULL
- permGrp = df[duplicated(df$ID)==F,]
- row.names(permGrp) = permGrp$ID
+ pb = txtProgressBar(1,nPerms,1, style=3)
+ maxT = NULL
+ df = df[,!colnames(df) %in% colnames(demog)[-1]]
for(i in 1:nPerms){
# Permute group membership, but respect ID groupings
- permGrp$Group = sample(permGrp$Group)
- row.names(permGrp) = permGrp$ID
- df$Group = factor(permGrp[as.character(df$ID),]$Group)
+ perm.labs = sample(nrow(demog))
+ perm.demog = data.frame(ID=demog$ID[perm.labs], demog[,2:ncol(demog)])
+ perm.df = merge(df, perm.demog)
# Fit models and obtain max t-stat
- tTable = ddply(df, c("Tract", "Hemisphere"), fit_trk_model2)
+ tTable = ddply(perm.df, c("Tract", "Hemisphere"), fit_trk_model2)
maxT = c(maxT, max(abs(tTable$t)))
setTxtProgressBar(pb,i)
}
@@ -72,31 +77,34 @@ doPerms <- function(df, nPerms=nPerms){
maxT
}
-# Only correct for multiple comparisons across the tracts with a significant Point:Group interaction F-test
+# Only correct for multiple comparisons across the tracts with a significant
+# Point:Group interaction F-test. Set the corrected p-values of other tracts to 1
Ftests = subset(models$anova, Term=='Point:Group')
sigFtests = which(Ftests$p.value<0.05)
sigF.Tract = Ftests$Tract[sigFtests]
sigF.Hemisphere = Ftests$Hemisphere[sigFtests]
-sigF.trk_data = subset(trk_data, Tract==sigF.Tract & Hemisphere==sigF.Hemisphere)
+sigF.trk_data = subset(trk_data, Tract==sigF.Tract &
+ Hemisphere==sigF.Hemisphere)
-maxT = doPerms(sigF.trk_data, 100)
+maxT = doPerms(sigF.trk_data, demog, nPerms)
# Execute in a terminal to utilize multiple cores and do lots of perms!
#library(multicore)
-#doPerm <- function(iPerm, df) {
-# permGrp = df[duplicated(df$ID)==F,]
-# permGrp$Group = sample(permGrp$Group)
-# df$Group = factor(permGrp[as.character(df$ID),]$Group)
-# tTable = ddply(df, c("Tract", "Hemisphere"), fit_trk_model2)
+#doPerm <- function(iPerm, df, demog) {
+# df = df[,!colnames(df) %in% colnames(demog)[-1]]
+# perm.labs = sample(nrow(demog))
+# perm.demog = data.frame(ID=demog$ID[perm.labs], demog[,2:ncol(demog)])
+# perm.df = merge(df, perm.demog)
+# tTable = ddply(perm.df, c("Tract", "Hemisphere"), fit_trk_model2)
# max(abs(tTable$t))
#}
-#maxT = mclapply(1:10000, doPerm, df=trk_data, mc.cores=7)
+#maxT = as.numeric(mclapply(1:10000, doPerm, df=sigF.trk_data, demog=demog, mc.cores=7))
#save(maxT, file=file.path(exptDir, 'maxT.Rdata'))
#load(file.path(exptDir, 'maxT.Rdata'))
-# Function to adjust p-values according to where a given t-stat lies along the
-# max t-stat distribution
+# Function to adjust point:group p-values according to where a given t-stat lies
+# along the max t-stat distribution
getP <- function(testT, maxT){
# If you do 1000 permutations and the result from the real dataset is the
# most extreme, then the empirical p-value is 1/1001
@@ -107,6 +115,10 @@ crit = floor((1-thresh)*length(maxT.no.na))
models$tTable$p.val.adj = sapply(models$tTable$t, getP, maxT.no.na)
models$tTable$p.val.adj[-as.numeric(row.names(subset(models$tTable, Tract==sigF.Tract & Hemisphere==sigF.Hemisphere)))] = 1
+# Add a Position column for easier plotting
+models$tTable = ddply(models$tTable, c("Tract", "Hemisphere"),
+ transform, Position = (as.numeric(Point)-1) * 100/(max(as.numeric(Point))-1))
+
################################################################################
# Figures
########
@@ -146,9 +158,8 @@ p2 + set_coords + brew_fill + sub_pts + error_bars + set_opts
########
# Plot FA vs. position, conditioned on hemisphere, tract, and group
-scale = 100/(max(as.numeric(trk_data$Point))-1)
brew_cols = scale_colour_manual(values=rev(brewer.pal(2, 'Set1')[1:2]))
-p3 = qplot((as.numeric(Point)-1)*scale, FA, group=ID, colour=Group, size=Streamlines, alpha=I(0.3), data=trk_data, facets = Tract~Hemisphere, geom='line', xlab='Position along tract (%)') + scale_size(to=c(0.25,3)) + brew_cols
+p3 = qplot(Position, FA, group=ID, colour=Group, size=Streamlines, alpha=I(0.3), data=trk_data, facets = Tract~Hemisphere, geom='line', xlab='Position along tract (%)') + scale_size(to=c(0.25,3)) + brew_cols
# Add group means
means_smooth = stat_smooth(aes(ymax=..y..+1.96*..se.., ymin=..y..-1.96*..se.., group=Group), alpha=0.8, span=0.5)
@@ -164,17 +175,58 @@ grp_effect = geom_text(aes(x=0, y=0.2, label=Caption, group=NULL, size=NULL), da
# If the F-test across the Point:Group terms in a panel is significant, plot a bar at the bottom to indicate which pointwise t-tests are significant
get_breaks <- function(df, thresh=0.05){
sig = df$p.value < thresh
- dsig = diff(sig)
+ dsig = c(diff(sig), 0)
if(subset(models$anova, Term=="Point:Group" & Tract==df$Tract[1] & Hemisphere==df$Hemisphere[1])$p.value < thresh){
- data.frame(on = df$Point[dsig==1]+0.5, off = df$Point[dsig==-1]+0.5)
- } else data.frame()
+ onpts = df$Point[dsig==1]+0.5
+ offpts = df$Point[dsig==-1]+0.5
+
+ # Check for any unclosed segments
+ if(dsig[which(dsig != 0)[1]] == -1) {
+ onpts = c(1, onpts)
+ }
+ if(dsig[rev(which(dsig != 0))[1]] == 1) {
+ offpts = c(offpts, length(dsig))
+ }
+
+ data.frame(on = (onpts-1) * 100/(length(df$Point)-1),
+ off = (offpts-1) * 100/(length(df$Point)-1))
+ } else data.frame(on=0, off=0)
}
break_list = ddply(models$tTable, c("Tract", "Hemisphere"), get_breaks, thresh=thresh)
-sig_bars = geom_segment(aes(x = (on-1)*scale, y=0.2, xend=(off-1)*scale, yend=0.2, group=NULL, size=NULL), data=break_list, colour='black')
+sig_bars = geom_segment(aes(x=on, y=0.2, xend=off, yend=0.2, group=NULL, size=NULL), data=break_list, colour='black')
# Make final plot
dev.new(width=7, height=5)
p3 + means_smooth + grp_effect + sig_bars
+########
+# Plot p-values
+# Generate a version of stats results with finer spacing for highlighting
+# significant areas in green in p/t-value plots
+lin_interp = function(x, spacing=0.01) {
+ approx(1:length(x), x, xout=seq(1,length(x), spacing))$y
+}
+models$tTable.interp = ddply(models$tTable, c("Tract", "Hemisphere"), colwise(lin_interp, c('Point','t.value', 'p.value', 'Position')))
+
+p4 = ggplot(data=models$tTable.interp, aes(x=Position, y=p.value, color=p.value < 0.05, group=1)) + geom_line() + facet_grid(facets=Tract~Hemisphere) + xlab('Position along tract (%)') + scale_color_manual('p.value < 0.05', values=c('red', 'green'))
+
+# Gray out non-significant areas
+grayout_p = annotate('rect', xmin=0, xmax=100, ymin=0.05, ymax=1, alpha=0.25)
+
+# Make final plot
+dev.new(width=7, height=5)
+p4 + grayout_p + scale_y_log10(limits=c(0.001, 1), breaks=c(0.001, 0.01, 0.1, 1))
+
+########
+# Plot t-statistics
+# Gray out non-significant areas
+tcrits = ddply(models$tTable, c("Tract", "Hemisphere"), summarize, t.crit=qt(0.05/2, DF[1]))
+grayout_t = geom_rect(aes(x=1, y=1, ymin=t.crit, ymax=-t.crit), color=NA, xmin=0, xmax=100, alpha=0.25, data=tcrits)
+
+# Make final plot
+dev.new(width=7, height=5)
+p4 + grayout_t + aes(y=t.value, color=t.value < tcrits$t.crit | t.value > -tcrits$t.crit) + ylim(c(-4, 4)) + geom_hline(yint=0, linetype=3)
+
+################################################################################
# Output statistical results for import into MATLAB and overlay onto mean tract # geometry
write.table(models$tTable, file=file.path(exptDir, 'effects_table.txt'), quote=F, row.names=F)
\ No newline at end of file
exptDir = '/Users/jcolby/Documents/LONI/along-tract-stats/along-tract-stats.git/example/between_group'
grpLabs = c('Control', 'FASD')
thresh = 0.05
nPerms = 1000
library(nlme) # Mixed-effects models
library(ggplot2) # Plotting tools
library(plyr) # Data manipulation
library(RColorBrewer) # Color tables
################################################################################
# Import and format data
# Read in demographics
demog = read.table(file.path(exptDir, 'Demographics.txt'), row.names=1, header=T)
demog$Group = factor(demog$Group, levels=rev(levels(demog$Group)), labels=grpLabs)
# Read in whole-track properties (ex: streamlines) and merge with demographics
trk_props_long = read.table(file.path(exptDir, 'trk_props_long.txt'), header=T)
trk_props_long = data.frame(trk_props_long, demog[as.character(trk_props_long$ID),])
# Read in length-parameterized track data (ex: FA) and merge with demographics
trk_data = read.table(file.path(exptDir, 'trk_data.txt'), header=T)
trk_data$Point = factor(trk_data$Point)
trk_data[trk_data==0] = NA
trk_data = merge(trk_data, trk_props_long)
################################################################################
# Fit LME models
# Overall ANOVA for Group and Point:Group effects
fit_trk_model1 <- function(df){
lme.trk = lme(FA ~ Point*Group, data=df, random = ~ 1 | ID, na.action=na.omit)
data.frame(Term = rownames(anova(lme.trk)), anova(lme.trk))
}
models = list()
models$anova = ddply(trk_data, c("Tract", "Hemisphere"), fit_trk_model1)
# Fit a cell-means version to get effect sizes relative to controls
fit_trk_model2 <- function(df){
lme.trk = tryCatch(lme(FA ~ Point/Group - 1, data=df, random = ~ 1 | ID, na.action=na.omit), error = function(e) data.frame())
if(length(lme.trk)!=0){
data.frame(Point = as.numeric(levels(trk_data$Point)), summary(lme.trk)$tTable[-(1:length(levels(trk_data$Point))),])
} else data.frame()
}
models$tTable = ddply(trk_data, c("Tract", "Hemisphere"), fit_trk_model2)
# Extract a subset like this: subset(models$tTable, Tract=='ILF' & Hemisphere=='L')
# Add weights=varFunc(~I(1/SD^2)) to do weighted fitting
# Model number of streamlines, just for fun
summary(lm(Streamlines ~ Group + Hemisphere + Tract, data=trk_props_long))
################################################################################
# Permutation testing
# Function to permute groups and return max t-stat distribution
doPerms <- function(df, nPerms=nPerms){
cat('Permuting data:\n')
pb = txtProgressBar(1,nPerms,1, style=3)
maxT = NULL
permGrp = df[duplicated(df$ID)==F,]
row.names(permGrp) = permGrp$ID
for(i in 1:nPerms){
# Permute group membership, but respect ID groupings
permGrp$Group = sample(permGrp$Group)
row.names(permGrp) = permGrp$ID
df$Group = factor(permGrp[as.character(df$ID),]$Group)
# Fit models and obtain max t-stat
tTable = ddply(df, c("Tract", "Hemisphere"), fit_trk_model2)
maxT = c(maxT, max(abs(tTable$t)))
setTxtProgressBar(pb,i)
}
close(pb)
maxT
}
# Only correct for multiple comparisons across the tracts with a significant Point:Group interaction F-test
Ftests = subset(models$anova, Term=='Point:Group')
sigFtests = which(Ftests$p.value<0.05)
sigF.Tract = Ftests$Tract[sigFtests]
sigF.Hemisphere = Ftests$Hemisphere[sigFtests]
sigF.trk_data = subset(trk_data, Tract==sigF.Tract & Hemisphere==sigF.Hemisphere)
maxT = doPerms(sigF.trk_data, 100)
# Execute in a terminal to utilize multiple cores and do lots of perms!
#library(multicore)
#doPerm <- function(iPerm, df) {
# permGrp = df[duplicated(df$ID)==F,]
# permGrp$Group = sample(permGrp$Group)
# df$Group = factor(permGrp[as.character(df$ID),]$Group)
# tTable = ddply(df, c("Tract", "Hemisphere"), fit_trk_model2)
# max(abs(tTable$t))
#}
#maxT = mclapply(1:10000, doPerm, df=trk_data, mc.cores=7)
#save(maxT, file=file.path(exptDir, 'maxT.Rdata'))
#load(file.path(exptDir, 'maxT.Rdata'))
# Function to adjust p-values according to where a given t-stat lies along the
# max t-stat distribution
getP <- function(testT, maxT){
# If you do 1000 permutations and the result from the real dataset is the
# most extreme, then the empirical p-value is 1/1001
(1+length(maxT[abs(testT) < maxT]))/(length(maxT)+1)
}
maxT.no.na = maxT[is.na(maxT)==F]
crit = floor((1-thresh)*length(maxT.no.na))
models$tTable$p.val.adj = sapply(models$tTable$t, getP, maxT.no.na)
models$tTable$p.val.adj[-as.numeric(row.names(subset(models$tTable, Tract==sigF.Tract & Hemisphere==sigF.Hemisphere)))] = 1
################################################################################
# Figures
########
# Plot the distribution of the max t-stat across all points/tracts under
# the null hypothesis of no group effect
dev.new(width=4, height=3)
p1 = ggplot(data.frame(maxT.no.na), aes(x=maxT.no.na, y=..density..))
p1 + geom_histogram(binwidth=0.1) + geom_density(color='blue') + geom_vline(xint = sort(maxT.no.na)[crit], color='red') + xlim(0,5) + ylim(0,1.75) + labs(x=expression(paste('Empirical max ', group('|', italic(t), '|'))), y='Density')
########
# Plot # of streamlines by Group, tract, and hemisphere
p2 = ggplot(trk_props_long, aes(x=Group, y=Streamlines, fill=Group)) + geom_bar(stat='summary', fun.y=mean) + facet_grid(Tract~Hemisphere)
# Set colors
brew_fill = scale_fill_manual(values=rev(brewer.pal(2, 'Set1')[1:2]))
# Generate error bars (pointwise 95% CIs)
stderr <- function(x) sqrt(var(x)/length(x))
get_bars <- function(y.in){
data.frame(y=mean(y.in), ymax=mean(y.in)+1.96*stderr(y.in), ymin=mean(y.in)-1.96*stderr(y.in))
}
error_bars = geom_errorbar(stat='summary', fun.data=get_bars, size=0.25, width=0.25)
# Set scales and coordinate systems
set_coords = c(scale_y_continuous(breaks=100*c(0:3)), scale_x_discrete('', breaks=''))
# Add points for individual subjects
jitter = position_jitter(width=0.2)
sub_pts = geom_point(fill=NA, position=jitter, alpha=0.3)
# Options
set_opts = ylab("Streamlines")
# Make final plot
dev.new(width=3.5, height=4.9)
p2 + set_coords + brew_fill + sub_pts + error_bars + set_opts
########
# Plot FA vs. position, conditioned on hemisphere, tract, and group
scale = 100/(max(as.numeric(trk_data$Point))-1)
brew_cols = scale_colour_manual(values=rev(brewer.pal(2, 'Set1')[1:2]))
p3 = qplot((as.numeric(Point)-1)*scale, FA, group=ID, colour=Group, size=Streamlines, alpha=I(0.3), data=trk_data, facets = Tract~Hemisphere, geom='line', xlab='Position along tract (%)') + scale_size(to=c(0.25,3)) + brew_cols
# Add group means
means_smooth = stat_smooth(aes(ymax=..y..+1.96*..se.., ymin=..y..-1.96*..se.., group=Group), alpha=0.8, span=0.5)
means = stat_summary(fun.y=mean, geom='line', size=0.6, aes(group=Group))
# Add an asterisk if there is a significant main group effect
anova_grp = subset(models$anova, Term=="Group")
Caption = rep('', nrow(anova_grp))
Caption[anova_grp$p.value<thresh] = '*'
anova_grp = data.frame(anova_grp, Caption)
grp_effect = geom_text(aes(x=0, y=0.2, label=Caption, group=NULL, size=NULL), data=anova_grp, colour='black')
# If the F-test across the Point:Group terms in a panel is significant, plot a bar at the bottom to indicate which pointwise t-tests are significant
get_breaks <- function(df, thresh=0.05){
sig = df$p.value < thresh
dsig = diff(sig)
if(subset(models$anova, Term=="Point:Group" & Tract==df$Tract[1] & Hemisphere==df$Hemisphere[1])$p.value < thresh){
data.frame(on = df$Point[dsig==1]+0.5, off = df$Point[dsig==-1]+0.5)
} else data.frame()
}
break_list = ddply(models$tTable, c("Tract", "Hemisphere"), get_breaks, thresh=thresh)
sig_bars = geom_segment(aes(x = (on-1)*scale, y=0.2, xend=(off-1)*scale, yend=0.2, group=NULL, size=NULL), data=break_list, colour='black')
# Make final plot
dev.new(width=7, height=5)
p3 + means_smooth + grp_effect + sig_bars
# Output statistical results for import into MATLAB and overlay onto mean tract # geometry
write.table(models$tTable, file=file.path(exptDir, 'effects_table.txt'), quote=F, row.names=F)
@johncolby
Copy link
Author

Improved robustness of significance bar plotting @ line 175 of new version (between_grp.R)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment