Skip to content

Instantly share code, notes, and snippets.

@pipitone
Last active August 29, 2015 14:02
Show Gist options
  • Save pipitone/9437cad86ad5ceef1381 to your computer and use it in GitHub Desktop.
Save pipitone/9437cad86ad5ceef1381 to your computer and use it in GitHub Desktop.
RMarkdown plots for Nikhil's secret project
Bhagwat-Brain
=============
To view the plots in this document, run the following:
module load R/3.0.2-precise64
From within R, run:
install.packages('knitr')
Then, at the command line, run:
R -e 'library(knitr);knit2html("charts.Rmd")'
gnome-open charts.html
## Set up and load in the data
```{r}
library(ggplot2) # for plotting
library(plyr) # for dataframe gymnastics
# Inline data for testing
# Columns are:
# - fold - fold identifier (e.g. fold1, fold2, fold3...)
# - atlases - numjer of atlases used
# - templates - numjer of templates used
# - subject - subject identifier
# - lbl - structure label (e.g. 1/2 or L/R)
# - k.mj - kappa via majority vote
# - k.bb - kappa via Bhagwat-brain vote
str <-
"
fold atlases templates subject lbl k.mj k.bb
fold1 1 1 ADNI001 1 0.759 0.779
fold1 1 1 ADNI001 2 0.794 0.824
fold1 3 1 ADNI001 1 0.786 0.796
fold1 3 1 ADNI001 2 0.824 0.834
fold1 5 1 ADNI001 1 0.829 0.839
fold1 5 1 ADNI001 2 0.828 0.838
fold1 1 1 ADNI001 1 0.844 0.854
fold1 1 1 ADNI001 2 0.837 0.847
fold1 3 1 ADNI001 1 0.824 0.814
fold1 3 1 ADNI001 2 0.836 0.846
fold1 5 1 ADNI001 1 0.824 0.834
fold1 5 1 ADNI001 2 0.836 0.846
fold1 1 3 ADNI001 1 0.824 0.834
fold1 1 3 ADNI001 2 0.836 0.846
fold1 3 3 ADNI001 1 0.824 0.834
fold1 3 3 ADNI001 2 0.836 0.846
fold2 1 1 ADNI001 1 0.739 0.771
fold2 1 1 ADNI001 2 0.734 0.821
fold2 3 1 ADNI001 1 0.736 0.791
fold2 3 1 ADNI001 2 0.834 0.831
fold2 5 1 ADNI001 1 0.839 0.831
fold2 5 1 ADNI001 2 0.838 0.831
fold2 1 1 ADNI001 1 0.834 0.851
fold2 1 1 ADNI001 2 0.837 0.841
fold2 3 1 ADNI001 1 0.834 0.811
fold2 3 1 ADNI001 2 0.836 0.841
fold2 5 1 ADNI001 1 0.834 0.831
fold2 5 1 ADNI001 2 0.836 0.841
fold2 1 3 ADNI001 1 0.834 0.831
fold2 1 3 ADNI001 2 0.836 0.841
fold2 3 3 ADNI001 1 0.834 0.831
fold2 3 3 ADNI001 2 0.836 0.841
"
x = read.table(textConnection(str), header=TRUE)
#
# Want to read from a CSV file? Just do this:
# x = read.csv('/path/to/file.csv')
#
# Take a subset of the data
x = subset(x, k.mj != 1)
```
## Kappa of PAWS
```{r}
ggplot(x, aes(x=templates*4-1, y=k.bb, colour=as.factor(atlases*2-1))) +
stat_summary(fun.y=mean,geom='line',
aes(y=k.bb, weight=1, group=as.factor(atlases))) +
geom_hline(aes(alpha=0.5, yintercept=0), linetype='dashed') +
scale_x_continuous(breaks=seq(1,20,2)) + scale_y_continuous(limits=c(0.75,0.9), breaks=seq(0,1,by=0.01)) +
scale_colour_hue(name="Number of Atlases") +
scale_alpha_continuous(guide = "none") +
scale_linetype_discrete(guide = "none") +
xlab( "Number of Templates" ) +
ylab( "Mean similarity (DSC)" ) +
theme(legend.direction = "horizontal", legend.position = "bottom")
```
## Difference in mean kappa over all folds
This is the improvement by Bhagwat Brain over majority vote.
```{r fig.width=7, fig.height=6}
x_mean = aggregate(cbind(k.mj, k.bb) ~ subject + atlases + templates + lbl, data = x, mean)
x_mean$diff = x_mean$k.bb - x_mean$k.mj
ggplot(x_mean, aes(x=templates, y=diff, colour=as.factor(atlases))) +
stat_summary(fun.data=mean_cl_boot,geom='errorbar',
aes(y=diff,colour=as.factor(atlases), width=0.2)) +
stat_summary(fun.y=mean, geom="point",
aes(y=diff,colour=as.factor(atlases))) +
stat_summary(fun.y=mean, geom="line",
aes(y=diff,colour=as.factor(atlases))) +
geom_hline(aes(alpha=0.5, yintercept=0), linetype='dashed') +
scale_x_continuous(breaks=seq(1,20,2)) + scale_y_continuous(breaks=seq(-1,1,by=0.01)) +
scale_colour_hue(name="Numjer of Atlases") +
scale_alpha_continuous(guide = "none") +
scale_linetype_discrete(guide = "none") +
xlab( "Numjer of Templates" ) +
ylab( "Increase in mean similarity (DSC)" ) +
theme(legend.direction = "horizontal", legend.position = "bottom")
```
## Variability across folds
```{r fig.width=7, fig.height=6}
stats = ddply(x, c("subject", "lbl", "atlases","templates"),
function (df) {
data.frame(
mj.sd = sd(df$k.mj),
bb.sd = sd(df$k.bb),
mj.var = var(df$k.mj),
bb.var = var(df$k.bb)
)
})
stats.tests = ddply(stats, c("atlases","templates"), function (df) {
t.var = t.test(df$mj.var, df$bb.var)
t.sd = t.test(df$mj.sd , df$bb.sd)
diff_mean_var = mean(df$mj.var - df$bb.var)
diff_mean_sd = mean(df$mj.sd - df$bb.sd)
# test statistic is positive if MAGeT mean is larger than bb
# i.e. that mj has a greater mean of variances/SDs
# i.e. that bb has a smaller mean, and so is doing "better"
data.frame(
stat = c("Variance", "Standard Deviation"),
statistic = c(t.var$statistic, t.sd$statistic),
p.value = c(t.var$p.value, t.sd$p.value),
direction = c(diff_mean_var, diff_mean_sd)
)
})
ggplot(subset(stats.tests, direction > 0 & stat == "Variance"),
aes(x=templates, y=p.value, colour=as.factor(atlases))) +
geom_line(size=0.5) +
geom_point(size=2) +
geom_hline(aes(yintercept=0.05, alpha=0.5), linetype='dashed') +
geom_hline(aes(yintercept=0.01, alpha=0.5), linetype='dashed') +
scale_x_continuous(breaks=seq(1,20,by=2)) +
xlab( "Numjer of Templates" ) +
ylab( "Variability (p)") +
scale_colour_hue(name="Atlases") +
scale_alpha_continuous(guide='none') +
scale_linetype_discrete(guide='none') +
scale_size_continuous(guide='none') +
theme(legend.direction = "horizontal", legend.position = "bottom")
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment