Skip to content

Instantly share code, notes, and snippets.

#A solution with ggplot2
#library(reshape2)
#test.m <- melt(test)
#sample of dataframe:
head(combined.data)
> region expression species
1 NAc 0.4711284 macaque
2 NAc -0.1003201 macaque
#read combined data
setwd("/home/rosseraa/work.dir/allen.brain/")
raw.data = read.csv("common.regions.ANOVA.csv")
#run ANOVA
aov.result = aov(expression ~ region * species, data=raw.data)
summary(aov.result)
TukeyHSD(aov.result)
summary(aov.result)[[1]][["Pr(>F)"]]
print(model.tables(aov.result, "means"), digits=4)
#read macaque data
setwd("/home/rosseraa/work.dir/allen.brain/")
macaque.raw.data = read.csv("macaque.common.regions.ANOVA.csv")
#standardize (z-score) macaque expression values
macaque.raw.data$expression = scale(macaque.raw.data$expression)
#run macaque ANOVA
aov.result = aov(expression ~ region, data=macaque.raw.data)
summary(aov.result) #is there a significant difference between any of the group means? If so, move onto a post-hoc test
#read human data
setwd("/home/rosseraa/work.dir/allen.brain/")
human.raw.data = read.csv("human.common.regions.ANOVA.csv")
#run human ANOVA
aov.result = aov(expression ~ region, data=human.raw.data)
summary(aov.result) #is there a significant difference between any of the group means? If so, move onto a post-hoc test
TukeyHSD(aov.result) #post-hoc for multiple pairwise comparisons to see where the significant differences lie.
summary(aov.result)[[1]][["Pr(>F)"]] #p-values
print(model.tables(aov.result, "means"), digits=4)
#read macaque data
setwd("/home/rosseraa/work.dir/allen.brain/")
raw.data = read.csv("macaque.common.regions.ANOVA.csv")
#standardize expression values
raw.data$expression = scale(raw.data$expression)
#run ANOVA
aov.result = aov(expression ~ region, data=raw.data)
summary(aov.result) #is there a significant difference between any of the group means? If so, move onto a post-hoc test
#Thsdf
#read macaque data
setwd("/home/rosseraa/work.dir/allen.brain/macaque")
raw.data = read.csv("maoaOut.csv")
#get pairwise combinations
com = combn(colnames(raw.data), 2)
#get p-values
p.values = apply(com, 2, function(x) t.test(raw.data[,x[1]], raw.data[,x[2]])$p.val)
@sashaphanes
sashaphanes / simple_web_form.php
Last active December 16, 2015 16:59
web form
#index.php
<html>
<form name="input" action="html_form_action.php" method="get">
Please enter your name: <input type="text" name="user_name">
<input type="submit" value="Submit">
</form>
</html>
@sashaphanes
sashaphanes / homozygousAltImpact.bash
Created April 19, 2013 18:55
Sanger recently released mouse SNPs (in VCF format) from next-generation sequencing for 18 strains. Parse all SNPs where the alternative alleles are homozygous for strain 129S1 for gene Impact, no local save #ftp://ftp-mouse.sanger.ac.uk/current_snps/mgp.v3.snps.rsIDdbSNPv137.vcf.gz
lftp -c 'open -e "zcat mgp.v3.snps.rsIDdbSNPv137.vcf.gz” ftp-mouse.sanger.ac.uk/current_snps/' | perl -ne '{chomp; if (/^#CHROM/) {print "$_\n"; }else {@a = split (/\t/, $_); print "$_\n" if ($a[0] ==18 and $a[1] >= 12972252 and $a[1] <= 12992948 and $a[10] =~ /^1\/1|^1\|1/); } }' > mm10.129S1.Impact.altHom &
@sashaphanes
sashaphanes / qnormdata.R
Created April 19, 2013 18:42
The following is a real data file and contains four different types of gene expression measurements (including RPKM) from one most recent finished RNAseq data. Find the file path from your Linux machine or rhdna account and load the data directly into R. Then select RPKM measurements for all samples. After excluding genes that no samples have RP…
http://156.40.185.32/pub/human.Hela.ALDH.ethanol.RNAseq/features/allHela.sorted.wig.exon.high50Ave.peakFeats.allChr.exonLen.subTotal.RPKM.highest_in_gene.transposed.txt.gz
README file: http://156.40.185.32/pub/human.Hela.ALDH.ethanol.RNAseq/README.human.Hela.ALDH.ethanol.RNAseq.txt
dataFile = "/mnt/xdat/pub/human.Hela.ALDH.ethanol.RNAseq/features/allHela.sorted.wig.exon.high50Ave.peakFeats.allChr.exonLen.subTotal.RPKM.highest_in_gene.transposed.txt.gz"
rawdata = read.delim(gzfile(dataFile), sep="\t", header=T)
rownames(rawdata) = paste(rawdata[,"Chr"], rawdata[,"GeneSym"], sep = ".")
data.RPKM = rawdata[, grep("RPKM", colnames(rawdata))]
rowMax = do.call("pmax", c(data.RPKM[,1:ncol(data.RPKM)],na.rm=TRUE))
data.RPKM.rowMax.gt1 = data.RPKM[rowMax >1,]
data.RPKM.rowMax.gt1[is.na(data.RPKM.rowMax.gt1)] = 0
@sashaphanes
sashaphanes / binom.R
Created April 19, 2013 18:41
RNAseq reads covered 5,000 biallelic human autosomal SNPs (all are heterogeneous, assume they are not in LD). Simple R statements plot the distribution of the alternative allelic frequency under sequencing coverage 2x, 5x, 10x, 20x, respectively. Calculate the p-values with the hypothesis that there are no differential allelic expression for the…
plot(x=c(0:2)/2, dbinom(c(0:2),2,0.5), pch=20, col="red", type ="b", xlim =c(0,1), ylim =c(0, 0.6), xlab = "AltFreq", ylab="Probability" )
lines (x=c(0:5)/5, dbinom(c(0:5),5,0.5), pch=20, col="brown",type ="b" )
lines (x=c(0:10)/10, dbinom(c(0:10),10,0.5), pch=20, col="green",type ="b")
lines (x=c(0:20)/20, dbinom(c(0:20),20,0.5), pch=20, col="blue",type ="b")
legend(0.8, 0.58, legend="2x", col="red", pch=20, lty = 1, bty="n" )
legend(0.8, 0.56, legend="5x", col="brown", pch=20, lty = 1, bty="n" )
legend(0.8, 0.54, legend="10x", col="green", pch=20, lty = 1, bty="n" )
legend(0.8, 0.52, legend="20x", col="blue", pch=20, lty = 1, bty="n" )