sashaphanes

## ggplot2BoxplotGrouping.R
#A solution with ggplot2

#library(reshape2)
#test.m <- melt(test)

#sample of dataframe:
head(combined.data)
>  region expression species
1    NAc  0.4711284 macaque
2    NAc -0.1003201 macaque

## combinedANOVA.R
#read combined data
setwd("/home/rosseraa/work.dir/allen.brain/")
raw.data = read.csv("common.regions.ANOVA.csv")

#run ANOVA
aov.result = aov(expression ~ region * species, data=raw.data)
summary(aov.result)
TukeyHSD(aov.result)
summary(aov.result)[[1]][["Pr(>F)"]]
print(model.tables(aov.result, "means"), digits=4)

## sidebysideANOVAboxplots.R
#read macaque data
setwd("/home/rosseraa/work.dir/allen.brain/")
macaque.raw.data = read.csv("macaque.common.regions.ANOVA.csv")

#standardize (z-score) macaque expression values
macaque.raw.data$expression = scale(macaque.raw.data$expression)

#run macaque ANOVA
aov.result = aov(expression ~ region, data=macaque.raw.data)
summary(aov.result) #is there a significant difference between any of the group means? If so, move onto a post-hoc test

## human.maoa.ANOVA.R
#read human data
setwd("/home/rosseraa/work.dir/allen.brain/")
human.raw.data = read.csv("human.common.regions.ANOVA.csv")

#run human ANOVA
aov.result = aov(expression ~ region, data=human.raw.data)
summary(aov.result) #is there a significant difference between any of the group means? If so, move onto a post-hoc test
TukeyHSD(aov.result) #post-hoc for multiple pairwise comparisons to see where the significant differences lie.
summary(aov.result)[[1]][["Pr(>F)"]] #p-values
print(model.tables(aov.result, "means"), digits=4)

## macaque.ANOVA.R
#read macaque data
setwd("/home/rosseraa/work.dir/allen.brain/")
raw.data = read.csv("macaque.common.regions.ANOVA.csv")

#standardize expression values
raw.data$expression = scale(raw.data$expression)

#run ANOVA
aov.result = aov(expression ~ region, data=raw.data)
summary(aov.result) #is there a significant difference between any of the group means? If so, move onto a post-hoc test

## macaque.analysis.R
#Thsdf
#read macaque data
setwd("/home/rosseraa/work.dir/allen.brain/macaque")
raw.data = read.csv("maoaOut.csv")

#get pairwise combinations
com = combn(colnames(raw.data), 2)

#get p-values
p.values = apply(com, 2, function(x) t.test(raw.data[,x[1]], raw.data[,x[2]])$p.val)

## simple_web_form.php
#index.php
<html>
<form name="input" action="html_form_action.php" method="get">
Please enter your name: <input type="text" name="user_name">
<input type="submit" value="Submit">
</form>
</html>


## homozygousAltImpact.bash


lftp -c 'open -e "zcat mgp.v3.snps.rsIDdbSNPv137.vcf.gz” ftp-mouse.sanger.ac.uk/current_snps/' | perl -ne '{chomp; if (/^#CHROM/) {print "$_\n"; }else {@a = split (/\t/, $_); print "$_\n" if ($a[0] ==18 and $a[1] >= 12972252 and $a[1] <= 12992948 and $a[10] =~ /^1\/1|^1\|1/); } }' > mm10.129S1.Impact.altHom &

## qnormdata.R
http://156.40.185.32/pub/human.Hela.ALDH.ethanol.RNAseq/features/allHela.sorted.wig.exon.high50Ave.peakFeats.allChr.exonLen.subTotal.RPKM.highest_in_gene.transposed.txt.gz
README file: http://156.40.185.32/pub/human.Hela.ALDH.ethanol.RNAseq/README.human.Hela.ALDH.ethanol.RNAseq.txt

dataFile = "/mnt/xdat/pub/human.Hela.ALDH.ethanol.RNAseq/features/allHela.sorted.wig.exon.high50Ave.peakFeats.allChr.exonLen.subTotal.RPKM.highest_in_gene.transposed.txt.gz"
rawdata = read.delim(gzfile(dataFile), sep="\t", header=T)
rownames(rawdata) = paste(rawdata[,"Chr"],  rawdata[,"GeneSym"], sep = ".")
data.RPKM = rawdata[, grep("RPKM", colnames(rawdata))]
rowMax = do.call("pmax", c(data.RPKM[,1:ncol(data.RPKM)],na.rm=TRUE))
data.RPKM.rowMax.gt1 = data.RPKM[rowMax >1,]
data.RPKM.rowMax.gt1[is.na(data.RPKM.rowMax.gt1)] = 0

## binom.R
plot(x=c(0:2)/2, dbinom(c(0:2),2,0.5), pch=20, col="red", type ="b", xlim =c(0,1), ylim =c(0, 0.6), xlab = "AltFreq", ylab="Probability" )
lines (x=c(0:5)/5, dbinom(c(0:5),5,0.5), pch=20, col="brown",type ="b" )
lines (x=c(0:10)/10, dbinom(c(0:10),10,0.5), pch=20, col="green",type ="b")
lines (x=c(0:20)/20, dbinom(c(0:20),20,0.5), pch=20, col="blue",type ="b")

legend(0.8, 0.58, legend="2x", col="red",  pch=20,  lty = 1, bty="n" )
legend(0.8, 0.56, legend="5x", col="brown",  pch=20,  lty = 1, bty="n" )
legend(0.8, 0.54, legend="10x", col="green",  pch=20,  lty = 1, bty="n" )
legend(0.8, 0.52, legend="20x", col="blue",  pch=20,  lty = 1, bty="n" )
	#A solution with ggplot2

	#library(reshape2)
	#test.m <- melt(test)

	#sample of dataframe:
	head(combined.data)
	> region expression species
	1 NAc 0.4711284 macaque
	2 NAc -0.1003201 macaque
	#read combined data
	setwd("/home/rosseraa/work.dir/allen.brain/")
	raw.data = read.csv("common.regions.ANOVA.csv")

	#run ANOVA
	aov.result = aov(expression ~ region * species, data=raw.data)
	summary(aov.result)
	TukeyHSD(aov.result)
	summary(aov.result)[[1]][["Pr(>F)"]]
	print(model.tables(aov.result, "means"), digits=4)
	#read macaque data
	setwd("/home/rosseraa/work.dir/allen.brain/")
	macaque.raw.data = read.csv("macaque.common.regions.ANOVA.csv")

	#standardize (z-score) macaque expression values
	macaque.raw.data$expression = scale(macaque.raw.data$expression)

	#run macaque ANOVA
	aov.result = aov(expression ~ region, data=macaque.raw.data)
	summary(aov.result) #is there a significant difference between any of the group means? If so, move onto a post-hoc test
	#read human data
	setwd("/home/rosseraa/work.dir/allen.brain/")
	human.raw.data = read.csv("human.common.regions.ANOVA.csv")

	#run human ANOVA
	aov.result = aov(expression ~ region, data=human.raw.data)
	summary(aov.result) #is there a significant difference between any of the group means? If so, move onto a post-hoc test
	TukeyHSD(aov.result) #post-hoc for multiple pairwise comparisons to see where the significant differences lie.
	summary(aov.result)[[1]][["Pr(>F)"]] #p-values
	print(model.tables(aov.result, "means"), digits=4)
	#read macaque data
	setwd("/home/rosseraa/work.dir/allen.brain/")
	raw.data = read.csv("macaque.common.regions.ANOVA.csv")

	#standardize expression values
	raw.data$expression = scale(raw.data$expression)

	#run ANOVA
	aov.result = aov(expression ~ region, data=raw.data)
	summary(aov.result) #is there a significant difference between any of the group means? If so, move onto a post-hoc test
	#Thsdf
	#read macaque data
	setwd("/home/rosseraa/work.dir/allen.brain/macaque")
	raw.data = read.csv("maoaOut.csv")

	#get pairwise combinations
	com = combn(colnames(raw.data), 2)

	#get p-values
	p.values = apply(com, 2, function(x) t.test(raw.data[,x[1]], raw.data[,x[2]])$p.val)
	#index.php
	<html>
	<form name="input" action="html_form_action.php" method="get">
	Please enter your name: <input type="text" name="user_name">
	<input type="submit" value="Submit">
	</form>
	</html>


	lftp -c 'open -e "zcat mgp.v3.snps.rsIDdbSNPv137.vcf.gz” ftp-mouse.sanger.ac.uk/current_snps/' \| perl -ne '{chomp; if (/^#CHROM/) {print "$_\n"; }else {@a = split (/\t/, $_); print "$_\n" if ($a[0] ==18 and $a[1] >= 12972252 and $a[1] <= 12992948 and $a[10] =~ /^1\/1\|^1\\|1/); } }' > mm10.129S1.Impact.altHom &
	http://156.40.185.32/pub/human.Hela.ALDH.ethanol.RNAseq/features/allHela.sorted.wig.exon.high50Ave.peakFeats.allChr.exonLen.subTotal.RPKM.highest_in_gene.transposed.txt.gz
	README file: http://156.40.185.32/pub/human.Hela.ALDH.ethanol.RNAseq/README.human.Hela.ALDH.ethanol.RNAseq.txt

	dataFile = "/mnt/xdat/pub/human.Hela.ALDH.ethanol.RNAseq/features/allHela.sorted.wig.exon.high50Ave.peakFeats.allChr.exonLen.subTotal.RPKM.highest_in_gene.transposed.txt.gz"
	rawdata = read.delim(gzfile(dataFile), sep="\t", header=T)
	rownames(rawdata) = paste(rawdata[,"Chr"], rawdata[,"GeneSym"], sep = ".")
	data.RPKM = rawdata[, grep("RPKM", colnames(rawdata))]
	rowMax = do.call("pmax", c(data.RPKM[,1:ncol(data.RPKM)],na.rm=TRUE))
	data.RPKM.rowMax.gt1 = data.RPKM[rowMax >1,]
	data.RPKM.rowMax.gt1[is.na(data.RPKM.rowMax.gt1)] = 0
	plot(x=c(0:2)/2, dbinom(c(0:2),2,0.5), pch=20, col="red", type ="b", xlim =c(0,1), ylim =c(0, 0.6), xlab = "AltFreq", ylab="Probability" )
	lines (x=c(0:5)/5, dbinom(c(0:5),5,0.5), pch=20, col="brown",type ="b" )
	lines (x=c(0:10)/10, dbinom(c(0:10),10,0.5), pch=20, col="green",type ="b")
	lines (x=c(0:20)/20, dbinom(c(0:20),20,0.5), pch=20, col="blue",type ="b")

	legend(0.8, 0.58, legend="2x", col="red", pch=20, lty = 1, bty="n" )
	legend(0.8, 0.56, legend="5x", col="brown", pch=20, lty = 1, bty="n" )
	legend(0.8, 0.54, legend="10x", col="green", pch=20, lty = 1, bty="n" )
	legend(0.8, 0.52, legend="20x", col="blue", pch=20, lty = 1, bty="n" )