pipitone/charts.Rmd

## charts.Rmd
Bhagwat-Brain
=============

To view the plots in this document, run the following:

    module load R/3.0.2-precise64

From within R, run:

    install.packages('knitr')

Then, at the command line, run:

    R -e 'library(knitr);knit2html("charts.Rmd")'
    gnome-open charts.html


## Set up and load in the data
```{r}
library(ggplot2)   # for plotting
library(plyr)      # for dataframe gymnastics


# Inline data for testing
# Columns are:
#   - fold         - fold identifier (e.g. fold1, fold2, fold3...)
#   - atlases      - numjer of atlases used
#   - templates    - numjer of templates used
#   - subject      - subject identifier
#   - lbl          - structure label (e.g. 1/2 or L/R)
#   - k.mj         - kappa via majority vote
#   - k.bb         - kappa via Bhagwat-brain vote
str <-
"
fold   atlases  templates  subject  lbl  k.mj   k.bb
fold1   1        1          ADNI001  1    0.759  0.779
fold1   1        1          ADNI001  2    0.794  0.824
fold1   3        1          ADNI001  1    0.786  0.796
fold1   3        1          ADNI001  2    0.824  0.834
fold1   5        1          ADNI001  1    0.829  0.839
fold1   5        1          ADNI001  2    0.828  0.838
fold1   1        1          ADNI001  1    0.844  0.854
fold1   1        1          ADNI001  2    0.837  0.847
fold1   3        1          ADNI001  1    0.824  0.814
fold1   3        1          ADNI001  2    0.836  0.846
fold1   5        1          ADNI001  1    0.824  0.834
fold1   5        1          ADNI001  2    0.836  0.846
fold1   1        3          ADNI001  1    0.824  0.834
fold1   1        3          ADNI001  2    0.836  0.846
fold1   3        3          ADNI001  1    0.824  0.834
fold1   3        3          ADNI001  2    0.836  0.846
fold2   1        1          ADNI001  1    0.739  0.771
fold2   1        1          ADNI001  2    0.734  0.821
fold2   3        1          ADNI001  1    0.736  0.791
fold2   3        1          ADNI001  2    0.834  0.831
fold2   5        1          ADNI001  1    0.839  0.831
fold2   5        1          ADNI001  2    0.838  0.831
fold2   1        1          ADNI001  1    0.834  0.851
fold2   1        1          ADNI001  2    0.837  0.841
fold2   3        1          ADNI001  1    0.834  0.811
fold2   3        1          ADNI001  2    0.836  0.841
fold2   5        1          ADNI001  1    0.834  0.831
fold2   5        1          ADNI001  2    0.836  0.841
fold2   1        3          ADNI001  1    0.834  0.831
fold2   1        3          ADNI001  2    0.836  0.841
fold2   3        3          ADNI001  1    0.834  0.831
fold2   3        3          ADNI001  2    0.836  0.841
"
x = read.table(textConnection(str), header=TRUE)
#
# Want to read from a CSV file? Just do this:
#     x = read.csv('/path/to/file.csv')
#

# Take a subset of the data
x = subset(x, k.mj != 1)
```

## Kappa of PAWS

```{r}
ggplot(x, aes(x=templates*4-1, y=k.bb, colour=as.factor(atlases*2-1))) +
  stat_summary(fun.y=mean,geom='line',
               aes(y=k.bb, weight=1, group=as.factor(atlases))) +
  geom_hline(aes(alpha=0.5, yintercept=0), linetype='dashed') +
  scale_x_continuous(breaks=seq(1,20,2)) + scale_y_continuous(limits=c(0.75,0.9), breaks=seq(0,1,by=0.01)) +
  scale_colour_hue(name="Number of Atlases") +
  scale_alpha_continuous(guide = "none") +
  scale_linetype_discrete(guide = "none") +
  xlab( "Number of Templates" ) +
  ylab( "Mean similarity (DSC)" ) +
  theme(legend.direction = "horizontal", legend.position = "bottom")
```

## Difference in mean kappa over all folds

This is the improvement by Bhagwat Brain over majority vote.


```{r fig.width=7, fig.height=6}
x_mean = aggregate(cbind(k.mj, k.bb) ~ subject + atlases + templates + lbl, data = x, mean)
x_mean$diff = x_mean$k.bb - x_mean$k.mj

ggplot(x_mean, aes(x=templates, y=diff, colour=as.factor(atlases))) +
  stat_summary(fun.data=mean_cl_boot,geom='errorbar',
               aes(y=diff,colour=as.factor(atlases), width=0.2)) +
  stat_summary(fun.y=mean, geom="point",
               aes(y=diff,colour=as.factor(atlases))) +
  stat_summary(fun.y=mean, geom="line",
               aes(y=diff,colour=as.factor(atlases))) +
  geom_hline(aes(alpha=0.5, yintercept=0), linetype='dashed') +
  scale_x_continuous(breaks=seq(1,20,2)) + scale_y_continuous(breaks=seq(-1,1,by=0.01)) +
  scale_colour_hue(name="Numjer of Atlases") +
  scale_alpha_continuous(guide = "none") +
  scale_linetype_discrete(guide = "none") +
  xlab( "Numjer of Templates" ) +
  ylab( "Increase in mean similarity (DSC)" ) +
  theme(legend.direction = "horizontal", legend.position = "bottom")
```

## Variability across folds
```{r fig.width=7, fig.height=6}
stats = ddply(x, c("subject", "lbl", "atlases","templates"),
                   function (df) {
                     data.frame(
                       mj.sd  =  sd(df$k.mj),
                       bb.sd  =  sd(df$k.bb),
                       mj.var = var(df$k.mj),
                       bb.var = var(df$k.bb)
                       )
                   })

stats.tests = ddply(stats, c("atlases","templates"), function (df) {
  t.var = t.test(df$mj.var, df$bb.var)
  t.sd  = t.test(df$mj.sd , df$bb.sd)
  diff_mean_var = mean(df$mj.var - df$bb.var)
  diff_mean_sd  = mean(df$mj.sd -  df$bb.sd)

  # test statistic is positive if MAGeT mean is larger than bb
  # i.e. that mj has a greater mean of variances/SDs
  # i.e. that bb has a smaller mean, and so is doing "better"
  data.frame(
    stat      = c("Variance", "Standard Deviation"),
    statistic = c(t.var$statistic, t.sd$statistic),
    p.value   = c(t.var$p.value, t.sd$p.value),
    direction = c(diff_mean_var, diff_mean_sd)
  )
})

ggplot(subset(stats.tests, direction > 0 & stat == "Variance"),
  aes(x=templates, y=p.value, colour=as.factor(atlases))) +
  geom_line(size=0.5) +
  geom_point(size=2) +
  geom_hline(aes(yintercept=0.05, alpha=0.5), linetype='dashed') +
  geom_hline(aes(yintercept=0.01, alpha=0.5), linetype='dashed') +
  scale_x_continuous(breaks=seq(1,20,by=2)) +
  xlab( "Numjer of Templates" ) +
  ylab( "Variability (p)") +
  scale_colour_hue(name="Atlases") +
  scale_alpha_continuous(guide='none') +
  scale_linetype_discrete(guide='none') +
  scale_size_continuous(guide='none') +
  theme(legend.direction = "horizontal", legend.position = "bottom")
```
	Bhagwat-Brain
	=============

	To view the plots in this document, run the following:

	module load R/3.0.2-precise64

	From within R, run:

	install.packages('knitr')

	Then, at the command line, run:

	R -e 'library(knitr);knit2html("charts.Rmd")'
	gnome-open charts.html


	## Set up and load in the data
	```{r}
	library(ggplot2) # for plotting
	library(plyr) # for dataframe gymnastics


	# Inline data for testing
	# Columns are:
	# - fold - fold identifier (e.g. fold1, fold2, fold3...)
	# - atlases - numjer of atlases used
	# - templates - numjer of templates used
	# - subject - subject identifier
	# - lbl - structure label (e.g. 1/2 or L/R)
	# - k.mj - kappa via majority vote
	# - k.bb - kappa via Bhagwat-brain vote
	str <-
	"
	fold atlases templates subject lbl k.mj k.bb
	fold1 1 1 ADNI001 1 0.759 0.779
	fold1 1 1 ADNI001 2 0.794 0.824
	fold1 3 1 ADNI001 1 0.786 0.796
	fold1 3 1 ADNI001 2 0.824 0.834
	fold1 5 1 ADNI001 1 0.829 0.839
	fold1 5 1 ADNI001 2 0.828 0.838
	fold1 1 1 ADNI001 1 0.844 0.854
	fold1 1 1 ADNI001 2 0.837 0.847
	fold1 3 1 ADNI001 1 0.824 0.814
	fold1 3 1 ADNI001 2 0.836 0.846
	fold1 5 1 ADNI001 1 0.824 0.834
	fold1 5 1 ADNI001 2 0.836 0.846
	fold1 1 3 ADNI001 1 0.824 0.834
	fold1 1 3 ADNI001 2 0.836 0.846
	fold1 3 3 ADNI001 1 0.824 0.834
	fold1 3 3 ADNI001 2 0.836 0.846
	fold2 1 1 ADNI001 1 0.739 0.771
	fold2 1 1 ADNI001 2 0.734 0.821
	fold2 3 1 ADNI001 1 0.736 0.791
	fold2 3 1 ADNI001 2 0.834 0.831
	fold2 5 1 ADNI001 1 0.839 0.831
	fold2 5 1 ADNI001 2 0.838 0.831
	fold2 1 1 ADNI001 1 0.834 0.851
	fold2 1 1 ADNI001 2 0.837 0.841
	fold2 3 1 ADNI001 1 0.834 0.811
	fold2 3 1 ADNI001 2 0.836 0.841
	fold2 5 1 ADNI001 1 0.834 0.831
	fold2 5 1 ADNI001 2 0.836 0.841
	fold2 1 3 ADNI001 1 0.834 0.831
	fold2 1 3 ADNI001 2 0.836 0.841
	fold2 3 3 ADNI001 1 0.834 0.831
	fold2 3 3 ADNI001 2 0.836 0.841
	"
	x = read.table(textConnection(str), header=TRUE)
	#
	# Want to read from a CSV file? Just do this:
	# x = read.csv('/path/to/file.csv')
	#

	# Take a subset of the data
	x = subset(x, k.mj != 1)
	```

	## Kappa of PAWS

	```{r}
	ggplot(x, aes(x=templates4-1, y=k.bb, colour=as.factor(atlases2-1))) +
	stat_summary(fun.y=mean,geom='line',
	aes(y=k.bb, weight=1, group=as.factor(atlases))) +
	geom_hline(aes(alpha=0.5, yintercept=0), linetype='dashed') +
	scale_x_continuous(breaks=seq(1,20,2)) + scale_y_continuous(limits=c(0.75,0.9), breaks=seq(0,1,by=0.01)) +
	scale_colour_hue(name="Number of Atlases") +
	scale_alpha_continuous(guide = "none") +
	scale_linetype_discrete(guide = "none") +
	xlab( "Number of Templates" ) +
	ylab( "Mean similarity (DSC)" ) +
	theme(legend.direction = "horizontal", legend.position = "bottom")
	```

	## Difference in mean kappa over all folds

	This is the improvement by Bhagwat Brain over majority vote.


	```{r fig.width=7, fig.height=6}
	x_mean = aggregate(cbind(k.mj, k.bb) ~ subject + atlases + templates + lbl, data = x, mean)
	x_mean$diff = x_mean$k.bb - x_mean$k.mj

	ggplot(x_mean, aes(x=templates, y=diff, colour=as.factor(atlases))) +
	stat_summary(fun.data=mean_cl_boot,geom='errorbar',
	aes(y=diff,colour=as.factor(atlases), width=0.2)) +
	stat_summary(fun.y=mean, geom="point",
	aes(y=diff,colour=as.factor(atlases))) +
	stat_summary(fun.y=mean, geom="line",
	aes(y=diff,colour=as.factor(atlases))) +
	geom_hline(aes(alpha=0.5, yintercept=0), linetype='dashed') +
	scale_x_continuous(breaks=seq(1,20,2)) + scale_y_continuous(breaks=seq(-1,1,by=0.01)) +
	scale_colour_hue(name="Numjer of Atlases") +
	scale_alpha_continuous(guide = "none") +
	scale_linetype_discrete(guide = "none") +
	xlab( "Numjer of Templates" ) +
	ylab( "Increase in mean similarity (DSC)" ) +
	theme(legend.direction = "horizontal", legend.position = "bottom")
	```

	## Variability across folds
	```{r fig.width=7, fig.height=6}
	stats = ddply(x, c("subject", "lbl", "atlases","templates"),
	function (df) {
	data.frame(
	mj.sd = sd(df$k.mj),
	bb.sd = sd(df$k.bb),
	mj.var = var(df$k.mj),
	bb.var = var(df$k.bb)
	)
	})

	stats.tests = ddply(stats, c("atlases","templates"), function (df) {
	t.var = t.test(df$mj.var, df$bb.var)
	t.sd = t.test(df$mj.sd , df$bb.sd)
	diff_mean_var = mean(df$mj.var - df$bb.var)
	diff_mean_sd = mean(df$mj.sd - df$bb.sd)

	# test statistic is positive if MAGeT mean is larger than bb
	# i.e. that mj has a greater mean of variances/SDs
	# i.e. that bb has a smaller mean, and so is doing "better"
	data.frame(
	stat = c("Variance", "Standard Deviation"),
	statistic = c(t.var$statistic, t.sd$statistic),
	p.value = c(t.var$p.value, t.sd$p.value),
	direction = c(diff_mean_var, diff_mean_sd)
	)
	})

	ggplot(subset(stats.tests, direction > 0 & stat == "Variance"),
	aes(x=templates, y=p.value, colour=as.factor(atlases))) +
	geom_line(size=0.5) +
	geom_point(size=2) +
	geom_hline(aes(yintercept=0.05, alpha=0.5), linetype='dashed') +
	geom_hline(aes(yintercept=0.01, alpha=0.5), linetype='dashed') +
	scale_x_continuous(breaks=seq(1,20,by=2)) +
	xlab( "Numjer of Templates" ) +
	ylab( "Variability (p)") +
	scale_colour_hue(name="Atlases") +
	scale_alpha_continuous(guide='none') +
	scale_linetype_discrete(guide='none') +
	scale_size_continuous(guide='none') +
	theme(legend.direction = "horizontal", legend.position = "bottom")
	```