Last active
July 19, 2017 14:21
-
-
Save coralzhang/fc4e51609ff316486c1682feed6404a9 to your computer and use it in GitHub Desktop.
Code for Zebrafish-pde6c project
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rm(list=ls()) | |
setwd("/path/to/directory") | |
suppressMessages(library(DESeq2)) | |
packageVersion("DESeq2") | |
### DESeq 2 ### | |
dat1 = read.table("outtxtnamefromfeatureCounts.txt",header=TRUE) | |
outname = "fc-pval-wgtf.csv" | |
dim(dat1) | |
dat = floor(dat1[,c(7:12)]) | |
cond.a = factor(rep(c("PR","WR"), each=3),levels=c("WR","PR")) ### Need to check the order of files | |
coldata = data.frame(cond.a=cond.a) | |
dds <- DESeqDataSetFromMatrix(dat, DataFrame(cond.a), ~ cond.a) | |
dds <- estimateSizeFactors(dds) | |
dds <- estimateDispersions(dds) | |
deseq.obj = DESeq(dds) | |
res = results(deseq.obj) | |
logfc = (res$log2FoldChange) | |
pval = res$padj | |
datout=cbind(logfc,pval) | |
sampleDists <- dist(t(assay(rld))) | |
#### MA plot from DESeq2 here | |
pdf("MA.pdf",height=4,width=4) | |
plotMA(res, ylim=c(-2,2),main="MA plot") | |
dev.off() | |
pdf("heatmap.pdf",height=4,width=4) | |
library("RColorBrewer") | |
library("pheatmap") | |
rld <- rlog(dds, blind=FALSE) | |
sampleDists <- dist(t(assay(rld))) | |
sampleDistMatrix <- as.matrix(sampleDists) | |
rownames(sampleDistMatrix) <- paste(rld$cond.a,rep(1:3,2)) | |
colnames(sampleDistMatrix) <- paste(rld$cond.a,rep(1:3,2)) | |
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255) | |
pheatmap(sampleDistMatrix, | |
clustering_distance_rows=sampleDists, | |
clustering_distance_cols=sampleDists, | |
col=colors) | |
dev.off() | |
pdf("PCA.pdf",height=4,width=4) | |
plotPCA(rld, intgroup=c("cond.a")) | |
dev.off() | |
########### RNA Seq & qRT-PCR correlation | |
fc = 2^logfc | |
gene.id1=c("ENSDARG00000044199","ENSDARG00000042529","ENSDARG00000044861","ENSDARG00000097008","ENSDARG00000017274","ENSDARG00000045677","ENSDARG00000002193") | |
gene.id = paste0("gene:",gene.id1) | |
gene.id.ind = vector(length=length(gene.id)) | |
for (i in 1:length(gene.id)){ | |
gene.id.ind[i]=which(as.character(dat1[,1])==gene.id[i]) | |
} | |
rnafc = fc[gene.id.ind] | |
gname.fai= c("gnat1", "rho", "gnat2", "uv", "blue", "green", "red") | |
qPCR.data.fai <- c(0.694142669, 0.659446831, 0.178241305,0.179138536, | |
0.448949859, 0.185187476, 0.64875601) | |
indswitch=c(1,3,7,6,5,4,2) | |
gname.fai[indswitch] | |
qPCR.data <- qPCR.data.fai[indswitch] | |
cor.test(rnafc,qPCR.data) | |
#gene.name=c("gnat1","gnat2","opn1lw2 (red opsin)","opn1mw1 (green opsin)","opn1sw2 (blue opsin)","opn1sw1 (uv opsin)","rhodopsin") | |
gene.name=c("gnat1","gnat2","red","green","blue","uv","rhodopsin") | |
min1=min(c(rnafc,qPCR.data));max1=max(c(rnafc,qPCR.data))+0.05 | |
col1=c("black","black","red","green","blue","purple","black") | |
pdf("validatation.pdf",width=5,height=5) | |
plot(rnafc,qPCR.data, xlim=c(min1,max1),ylim=c(min1,max1),col=col1, | |
xlab="Fold Change by RNA seq",ylab="Fold Change by RT-qPCR") | |
text(rnafc,qPCR.data+c(0.03,0.04,rep(0.03,5)), labels=as.character(gene.name),col=col1, srt=45,font=2) # text | |
abline(0,1,lty=3) | |
dev.off() | |
library(vioplot) | |
dat.fpkm1 = read.table("cufnorm_out/genes.fpkm_table_from_Cuffnorm",sep="\t",header=TRUE) | |
### cufnorm_out is the name of the cuffnorm output. | |
dat.fpkm1[1,] | |
summary(dat.fpkm[,-1]) | |
dat.fpkm = dat.fpkm1[,c(2,1,3:6)+1] | |
dat.fpkm[1,] | |
dim(dat.fpkm) | |
labvec = paste0(rep(c("PR","WR"),each=3),rep(1:3,2)) ## Make sure that the order of output matches. | |
par(mfrow=c(2,3)) | |
for (i in 1:6){vioplot(log(dat.fpkm[dat.fpkm[,i]!=0,i]),ylim=c(0,15),names=c(labvec[i]),main=perct[i])} | |
colnames(dat.fpkm)=labvec | |
jpeg("pair1PR.jpeg",width=400,height=400,quality=20) | |
pairs(log(dat.fpkm[,1:3],2)) | |
dev.off() | |
jpeg("pair2WR.jpeg",width=400,height=400,quality=20) | |
pairs(log(dat.fpkm[,4:6],2)) | |
dev.off() | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
############## Trimmomatic | |
java -jar /path/to/Trimmomatic/trimmomatic-0.36.jar PE -phred33 $ingzfile1 $ingzfile2 $outfile1 $unmapoutfile1 $outfile2 $unmapoutfile2 ILLUMINACLIP:/path/to/Trimmomatic/adapters/TruSeq3-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36 > $logname2 2>&1 | |
############## FastQC | |
/path/to//FastQC/fastqc $infqfile1 -f fastq --nogroup -o $outdirect | |
############## STAR: Generate Genome index | |
/path/to/STAR/bin/Linux_x86_64/STAR --runThreadN 6 --runMode genomeGenerate --genomeDir /file/to/store/index --genomeFastaFiles /path/to/genome/fastaq/Danio_rerio.GRCz10.dna.toplevel.fa --sjdbGTFfile /path/to/genome/gtf/Danio_rerio.GRCz10.89.gtf | |
############## STAR: Alignment | |
/path/to/STAR/bin/Linux_x86_64/STAR --runThreadN 16 --genomeDir $gnomedir --readFilesIn $infqgzR1 $infqgzR2 --readFilesCommand gunzip -c --outSAMstrandField intronMotif --limitBAMsortRAM 41000000000 --outFileNamePrefix $outname --outSAMtype BAM SortedByCoordinate --outBAMsortingThreadN 8 --outFilterMultimapNmax 50 --winAnchorMultimapNmax 50 --alignIntronMax 10000 --alignEndsType EndToEnd --outFilterScoreMinOverLread 0.3 --outFilterMatchNminOverLread 0.3 | |
############## Counting: with intron: not fixed yet | |
less Danio_rerio.GRCz10.89gene.gff3 | grep -v KN1 | grep -v MT | grep protein_coding > Danio_rerio.GRCz10.89gene2.gff3 | |
############## featureCount: Read counting | |
/path/to/subread-1.5.2-Linux-x86_64/bin/featureCounts -p -B -t gene -g ID -C -T 6 -a /path/to/genome/Danio_rerio.GRCz10.89gene2.gff3 -o $outtxtname $arrayofbamfiles | |
############## Resort to R for DESeq2 analysis | |
############## Run Cufflinks/Cuffdiff | |
/path/to/cufflinks/cufflinks -G /path/to/gtf/Danio_rerio.GRCz10.89.gtf $inbam -o $outdir | |
/path/to/cufflinks/cuffdiff -p 6 -o $outdir --emit-count-tables -b /path/to/genome/Danio_rerio.GRCz10.dna.toplevel.fa /path/to/gtf/Danio_rerio.GRCz10.89.gtf \ | |
$mutantbam1,$mutantbam2,$mutantbam3 \ | |
$wildbam1,$wildbam2,$wildbam3 > cufdiff_out.log 2>&1 | |
/path/to/cufflinks/cuffnorm -p 6 -o $outdir --emit-count-tables -b /path/to/genome/Danio_rerio.GRCz10.dna.toplevel.fa /path/to/gtf/Danio_rerio.GRCz10.89.gtf \ | |
$mutantbam1,$mutantbam2,$mutantbam3 \ | |
$wildbam1,$wildbam2,$wildbam3 > cufdiff_out.log 2>&1 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment