smacarthur/splitAndPlot.R

## splitAndPlot.R
if (!require(RColorBrewer)){
  install.packages("RColorBrewer")
  library(RColorBrewer)
}

### Generate a random data set
data <- data.frame(names=c("Type1","Type2")[as.numeric((runif(n=100)>=0.5))+1],data=rnorm(100,100,sd=25))

### Use the aggregate function to split and get the mean of the data
aggregate(data$data,list(data$names),mean)

### Use the sapply and split functions to do the same thing
s <- split(data$data,list(data$names))
sapply(s,mean)
# Or the same thing in one line
sapply(split(data$data,list(data$names)),mean)

### Below is a function which is just a group of commands used to stop
### you having to type the same code in again for another dataset

plotData <- function(data,cols){
### Draw a box plot of the data
plot(data$data ~ data$names,col=cols,pch=20)
### Run a t-test on the split data
pval <- t.test(data$data ~ data$names)$p.value
### Are they significantly different ?
areSig <- c("Not Significant","Significant")[as.numeric(pval<=0.05)+1]
### Calculate the density of the data, after splitting
dens <- lapply(split(data$data,data$names),density)
### Draw an empty figure with the correct x and y limits of the data
plot(1,xlim=c(0,max(sapply(dens,function(x) max(x$x)))),ylim=c(0,max(sapply(dens,function(x) max(x$y)))))
### Draw the density plots for each data type
lapply(1:length(dens),function(x) lines(dens[[x]],col=cols[x],lwd=3))
### Add a legens
legend("topleft",legend=names(dens),col=cols,lwd=4)
### Add a title with the p-value and wether it is significant or not
title(paste("P-value=",format.pval(pval),areSig))
}

### Draw figures in a 2 x 2 grid
par(mfrow=c(2,2))
### Run the plotData function on the data object
plotData(data,cols=brewer.pal(8,"Dark2"))

### Make a new version of the data object, which should be significantly different, as they have different means
data <- data.frame(names=rep(c("Type3","Type4"),each=50),data=c(rnorm(50,100,sd=20),rnorm(50,50,sd=10)))
### Plot the new version of the data
plotData(data,cols=brewer.pal(8,"Set1"))
	if (!require(RColorBrewer)){
	install.packages("RColorBrewer")
	library(RColorBrewer)
	}

	### Generate a random data set
	data <- data.frame(names=c("Type1","Type2")[as.numeric((runif(n=100)>=0.5))+1],data=rnorm(100,100,sd=25))

	### Use the aggregate function to split and get the mean of the data
	aggregate(data$data,list(data$names),mean)

	### Use the sapply and split functions to do the same thing
	s <- split(data$data,list(data$names))
	sapply(s,mean)
	# Or the same thing in one line
	sapply(split(data$data,list(data$names)),mean)

	### Below is a function which is just a group of commands used to stop
	### you having to type the same code in again for another dataset

	plotData <- function(data,cols){
	### Draw a box plot of the data
	plot(data$data ~ data$names,col=cols,pch=20)
	### Run a t-test on the split data
	pval <- t.test(data$data ~ data$names)$p.value
	### Are they significantly different ?
	areSig <- c("Not Significant","Significant")[as.numeric(pval<=0.05)+1]
	### Calculate the density of the data, after splitting
	dens <- lapply(split(data$data,data$names),density)
	### Draw an empty figure with the correct x and y limits of the data
	plot(1,xlim=c(0,max(sapply(dens,function(x) max(x$x)))),ylim=c(0,max(sapply(dens,function(x) max(x$y)))))
	### Draw the density plots for each data type
	lapply(1:length(dens),function(x) lines(dens[[x]],col=cols[x],lwd=3))
	### Add a legens
	legend("topleft",legend=names(dens),col=cols,lwd=4)
	### Add a title with the p-value and wether it is significant or not
	title(paste("P-value=",format.pval(pval),areSig))
	}

	### Draw figures in a 2 x 2 grid
	par(mfrow=c(2,2))
	### Run the plotData function on the data object
	plotData(data,cols=brewer.pal(8,"Dark2"))

	### Make a new version of the data object, which should be significantly different, as they have different means
	data <- data.frame(names=rep(c("Type3","Type4"),each=50),data=c(rnorm(50,100,sd=20),rnorm(50,50,sd=10)))
	### Plot the new version of the data
	plotData(data,cols=brewer.pal(8,"Set1"))