Skip to content

Instantly share code, notes, and snippets.

@coralzhang
Last active July 19, 2017 14:21
Show Gist options
  • Save coralzhang/fc4e51609ff316486c1682feed6404a9 to your computer and use it in GitHub Desktop.
Save coralzhang/fc4e51609ff316486c1682feed6404a9 to your computer and use it in GitHub Desktop.
Code for Zebrafish-pde6c project
rm(list=ls())
setwd("/path/to/directory")
suppressMessages(library(DESeq2))
packageVersion("DESeq2")
### DESeq 2 ###
dat1 = read.table("outtxtnamefromfeatureCounts.txt",header=TRUE)
outname = "fc-pval-wgtf.csv"
dim(dat1)
dat = floor(dat1[,c(7:12)])
cond.a = factor(rep(c("PR","WR"), each=3),levels=c("WR","PR")) ### Need to check the order of files
coldata = data.frame(cond.a=cond.a)
dds <- DESeqDataSetFromMatrix(dat, DataFrame(cond.a), ~ cond.a)
dds <- estimateSizeFactors(dds)
dds <- estimateDispersions(dds)
deseq.obj = DESeq(dds)
res = results(deseq.obj)
logfc = (res$log2FoldChange)
pval = res$padj
datout=cbind(logfc,pval)
sampleDists <- dist(t(assay(rld)))
#### MA plot from DESeq2 here
pdf("MA.pdf",height=4,width=4)
plotMA(res, ylim=c(-2,2),main="MA plot")
dev.off()
pdf("heatmap.pdf",height=4,width=4)
library("RColorBrewer")
library("pheatmap")
rld <- rlog(dds, blind=FALSE)
sampleDists <- dist(t(assay(rld)))
sampleDistMatrix <- as.matrix(sampleDists)
rownames(sampleDistMatrix) <- paste(rld$cond.a,rep(1:3,2))
colnames(sampleDistMatrix) <- paste(rld$cond.a,rep(1:3,2))
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,
clustering_distance_rows=sampleDists,
clustering_distance_cols=sampleDists,
col=colors)
dev.off()
pdf("PCA.pdf",height=4,width=4)
plotPCA(rld, intgroup=c("cond.a"))
dev.off()
########### RNA Seq & qRT-PCR correlation
fc = 2^logfc
gene.id1=c("ENSDARG00000044199","ENSDARG00000042529","ENSDARG00000044861","ENSDARG00000097008","ENSDARG00000017274","ENSDARG00000045677","ENSDARG00000002193")
gene.id = paste0("gene:",gene.id1)
gene.id.ind = vector(length=length(gene.id))
for (i in 1:length(gene.id)){
gene.id.ind[i]=which(as.character(dat1[,1])==gene.id[i])
}
rnafc = fc[gene.id.ind]
gname.fai= c("gnat1", "rho", "gnat2", "uv", "blue", "green", "red")
qPCR.data.fai <- c(0.694142669, 0.659446831, 0.178241305,0.179138536,
0.448949859, 0.185187476, 0.64875601)
indswitch=c(1,3,7,6,5,4,2)
gname.fai[indswitch]
qPCR.data <- qPCR.data.fai[indswitch]
cor.test(rnafc,qPCR.data)
#gene.name=c("gnat1","gnat2","opn1lw2 (red opsin)","opn1mw1 (green opsin)","opn1sw2 (blue opsin)","opn1sw1 (uv opsin)","rhodopsin")
gene.name=c("gnat1","gnat2","red","green","blue","uv","rhodopsin")
min1=min(c(rnafc,qPCR.data));max1=max(c(rnafc,qPCR.data))+0.05
col1=c("black","black","red","green","blue","purple","black")
pdf("validatation.pdf",width=5,height=5)
plot(rnafc,qPCR.data, xlim=c(min1,max1),ylim=c(min1,max1),col=col1,
xlab="Fold Change by RNA seq",ylab="Fold Change by RT-qPCR")
text(rnafc,qPCR.data+c(0.03,0.04,rep(0.03,5)), labels=as.character(gene.name),col=col1, srt=45,font=2) # text
abline(0,1,lty=3)
dev.off()
library(vioplot)
dat.fpkm1 = read.table("cufnorm_out/genes.fpkm_table_from_Cuffnorm",sep="\t",header=TRUE)
### cufnorm_out is the name of the cuffnorm output.
dat.fpkm1[1,]
summary(dat.fpkm[,-1])
dat.fpkm = dat.fpkm1[,c(2,1,3:6)+1]
dat.fpkm[1,]
dim(dat.fpkm)
labvec = paste0(rep(c("PR","WR"),each=3),rep(1:3,2)) ## Make sure that the order of output matches.
par(mfrow=c(2,3))
for (i in 1:6){vioplot(log(dat.fpkm[dat.fpkm[,i]!=0,i]),ylim=c(0,15),names=c(labvec[i]),main=perct[i])}
colnames(dat.fpkm)=labvec
jpeg("pair1PR.jpeg",width=400,height=400,quality=20)
pairs(log(dat.fpkm[,1:3],2))
dev.off()
jpeg("pair2WR.jpeg",width=400,height=400,quality=20)
pairs(log(dat.fpkm[,4:6],2))
dev.off()
#!/bin/bash
############## Trimmomatic
java -jar /path/to/Trimmomatic/trimmomatic-0.36.jar PE -phred33 $ingzfile1 $ingzfile2 $outfile1 $unmapoutfile1 $outfile2 $unmapoutfile2 ILLUMINACLIP:/path/to/Trimmomatic/adapters/TruSeq3-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36 > $logname2 2>&1
############## FastQC
/path/to//FastQC/fastqc $infqfile1 -f fastq --nogroup -o $outdirect
############## STAR: Generate Genome index
/path/to/STAR/bin/Linux_x86_64/STAR --runThreadN 6 --runMode genomeGenerate --genomeDir /file/to/store/index --genomeFastaFiles /path/to/genome/fastaq/Danio_rerio.GRCz10.dna.toplevel.fa --sjdbGTFfile /path/to/genome/gtf/Danio_rerio.GRCz10.89.gtf
############## STAR: Alignment
/path/to/STAR/bin/Linux_x86_64/STAR --runThreadN 16 --genomeDir $gnomedir --readFilesIn $infqgzR1 $infqgzR2 --readFilesCommand gunzip -c --outSAMstrandField intronMotif --limitBAMsortRAM 41000000000 --outFileNamePrefix $outname --outSAMtype BAM SortedByCoordinate --outBAMsortingThreadN 8 --outFilterMultimapNmax 50 --winAnchorMultimapNmax 50 --alignIntronMax 10000 --alignEndsType EndToEnd --outFilterScoreMinOverLread 0.3 --outFilterMatchNminOverLread 0.3
############## Counting: with intron: not fixed yet
less Danio_rerio.GRCz10.89gene.gff3 | grep -v KN1 | grep -v MT | grep protein_coding > Danio_rerio.GRCz10.89gene2.gff3
############## featureCount: Read counting
/path/to/subread-1.5.2-Linux-x86_64/bin/featureCounts -p -B -t gene -g ID -C -T 6 -a /path/to/genome/Danio_rerio.GRCz10.89gene2.gff3 -o $outtxtname $arrayofbamfiles
############## Resort to R for DESeq2 analysis
############## Run Cufflinks/Cuffdiff
/path/to/cufflinks/cufflinks -G /path/to/gtf/Danio_rerio.GRCz10.89.gtf $inbam -o $outdir
/path/to/cufflinks/cuffdiff -p 6 -o $outdir --emit-count-tables -b /path/to/genome/Danio_rerio.GRCz10.dna.toplevel.fa /path/to/gtf/Danio_rerio.GRCz10.89.gtf \
$mutantbam1,$mutantbam2,$mutantbam3 \
$wildbam1,$wildbam2,$wildbam3 > cufdiff_out.log 2>&1
/path/to/cufflinks/cuffnorm -p 6 -o $outdir --emit-count-tables -b /path/to/genome/Danio_rerio.GRCz10.dna.toplevel.fa /path/to/gtf/Danio_rerio.GRCz10.89.gtf \
$mutantbam1,$mutantbam2,$mutantbam3 \
$wildbam1,$wildbam2,$wildbam3 > cufdiff_out.log 2>&1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment