Skip to content

Instantly share code, notes, and snippets.

@linnil1
Last active July 22, 2021 06:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save linnil1/3612800133b55d4d73dfc8d43731ae56 to your computer and use it in GitHub Desktop.
Save linnil1/3612800133b55d4d73dfc8d43731ae56 to your computer and use it in GitHub Desktop.
Python pipeline using docker follow this tutorial https://rnabio.org/module-01-inputs/0001/05/01/RNAseq_Data/
import os
import time
os.makedirs("data/", exist_ok=True)
hg38 = "data/GRCh38_latest_genomic"
image_fastqc = "quay.io/biocontainers/fastqc:0.11.9--0"
image_trimmomatic = "quay.io/biocontainers/trimmomatic:0.39--hdfd78af_2"
image_hisat = "quay.io/biocontainers/hisat2:2.2.1--he1b5a44_2"
image_samtool = "quay.io/biocontainers/samtools:1.10--h9402c20_2"
image_stringtie = "quay.io/biocontainers/stringtie:2.1.7--h978d192_0"
image_deseq2 = "quay.io/biocontainers/bioconductor-deseq2:1.28.0--r40h5f743cb_0"
image_gffcompare = "quay.io/biocontainers/gffcompare:0.11.2--h7d875b9_2"
folder_fastqc = "data/fastqc"
folder_hisat = "hisat2/genome"
stringtie_list = "data/merge.list"
stringtie_merge = "data/merge.stringtie"
thread = 20
def run(cmd):
print(f'[{time.strftime("%c")}] ' + cmd)
os.system(cmd)
def docker_run(image, cmd):
run(f"docker run -it --rm --security-opt label=disable -v $PWD:/app "
f"-w /app {image} {cmd}")
def download():
run(f"wget https://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_genomic.fna.gz "
f"-O {hg38}.fna.gz")
run(f"gunzip {hg38}.fna.gz")
run(f"wget https://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_genomic.gff.gz "
f"-O {hg38}.gff.gz")
run(f"gunzip {hg38}.gff.gz")
run(f"wget http://genomedata.org/rnaseq-tutorial/practical.tar "
f"-O data/practical.tar")
run(f"tar xf data/practical.tar -C data/")
def getSample():
return [
"data/hcc1395_normal_rep1",
"data/hcc1395_normal_rep2",
"data/hcc1395_normal_rep3",
"data/hcc1395_tumor_rep1",
"data/hcc1395_tumor_rep2",
"data/hcc1395_tumor_rep3",
]
def fastqc():
run(f"mkdir -p {folder_fastqc}")
fastq = map(lambda i: i + '*' + suffix + '.fastq.gz', getSample())
docker_run(image_fastqc,
f"fastqc -t {thread} {' '.join(fastq)} -o {folder_fastqc}")
def trimmomatic():
for name in getSample():
docker_run(image_trimmomatic,
f"trimmomatic PE -threads {thread} "
f"{name}_r1.fastq.gz {name}_r2.fastq.gz "
f"{name}_r1.clean.fastq.gz {name}_r1.unpair.fastq.gz "
f"{name}_r2.clean.fastq.gz {name}_r2.unpair.fastq.gz "
f"ILLUMINACLIP:./TruSeq3-PE.fa:2:30:10:2:keepBothReads "
f"LEADING:3 TRAILING:3 MINLEN:36")
def hisatInit():
run(f"mkdir -p {folder_hisat}")
docker_run(image_hisat,
f"hisat2-build -p {thread} {hg38}.fna {folder_hisat}")
def hisatRun():
for name in getSample():
docker_run(image_hisat,
f"hisat2 -p {thread} --dta -x {folder_hisat} "
f"-1 {name}_r1{suffix}.fastq.gz -2 {name}_r2{suffix}.fastq.gz "
f"-S {name}{suffix}.hisat.sam")
docker_run(image_samtool,
f"samtools sort -@ {thread} "
f"{name}{suffix}.hisat.sam -o {name}{suffix}.hisat.bam")
def stringtieRun():
for name in getSample():
docker_run(image_stringtie,
f"stringtie -p {thread} -G {hg38}.gff {name}{suffix}.bam "
f"-o {name}{suffix}.stringtie.gff")
open(stringtie_list, "w").writelines(
map(lambda i: i + suffix + ".stringtie.gff\n", getSample()))
docker_run(image_stringtie,
f"stringtie -p {thread} --merge -G {hg38}.gff {stringtie_list} "
f"-o {stringtie_merge}.gff")
for name in getSample():
docker_run(image_stringtie,
f"stringtie -e -B -p {thread} -G {stringtie_merge}.gff "
f"{name}{suffix}.bam -o {name}{suffix}.stringtie.merge.gff")
def gffcompareRun():
docker_run(image_gffcompare,
f"gffcompare -r {hg38}.gff -G {stringtie_merge}.gff "
f"-o {stringtie_merge}.compare")
def stringtieAfter():
open(stringtie_list, "w").writelines(
map(lambda i: f"{i}{suffix} {i}{suffix}.gff\n", getSample()))
run(f"python3 prepDE.py -i {stringtie_list} "
f"-g {stringtie_merge}.result.gene.csv -t {stringtie_merge}.result.tx.csv")
def deseqRun():
docker_run(image_deseq2, f"Rscript deseq2.R {stringtie_merge}.result.tx")
if __name__ == "__main__":
suffix = ""
download()
fastqc()
trimmomatic()
suffix = ".clean"
fastqc()
hisatInit()
hisatRun()
suffix = ".clean.hisat"
stringtieRun()
gffcompareRun()
suffix = ".clean.hisat.stringtie.merge"
stringtieAfter()
deseqRun()
@linnil1
Copy link
Author

linnil1 commented Jul 22, 2021

data/
├── fastqc
│   ├── hcc1395_normal_rep1_r1.clean_fastqc.html
│   ├── hcc1395_normal_rep1_r1.clean_fastqc.zip
│   ├── hcc1395_normal_rep1_r1_fastqc.html
│   ├── hcc1395_normal_rep1_r1_fastqc.zip
│   ├── hcc1395_normal_rep1_r2.clean_fastqc.html
│   ├── hcc1395_normal_rep1_r2.clean_fastqc.zip
│   ├── hcc1395_normal_rep1_r2_fastqc.html
│   ├── hcc1395_normal_rep1_r2_fastqc.zip
│   ├── hcc1395_normal_rep2_r1.clean_fastqc.html
│   ├── hcc1395_normal_rep2_r1.clean_fastqc.zip
│   ├── hcc1395_normal_rep2_r1_fastqc.html
│   ├── hcc1395_normal_rep2_r1_fastqc.zip
│   ├── hcc1395_normal_rep2_r2.clean_fastqc.html
│   ├── hcc1395_normal_rep2_r2.clean_fastqc.zip
│   ├── hcc1395_normal_rep2_r2_fastqc.html
│   ├── hcc1395_normal_rep2_r2_fastqc.zip
│   ├── hcc1395_normal_rep3_r1.clean_fastqc.html
│   ├── hcc1395_normal_rep3_r1.clean_fastqc.zip
│   ├── hcc1395_normal_rep3_r1_fastqc.html
│   ├── hcc1395_normal_rep3_r1_fastqc.zip
│   ├── hcc1395_normal_rep3_r2.clean_fastqc.html
│   ├── hcc1395_normal_rep3_r2.clean_fastqc.zip
│   ├── hcc1395_normal_rep3_r2_fastqc.html
│   ├── hcc1395_normal_rep3_r2_fastqc.zip
│   ├── hcc1395_tumor_rep1_r1.clean_fastqc.html
│   ├── hcc1395_tumor_rep1_r1.clean_fastqc.zip
│   ├── hcc1395_tumor_rep1_r1_fastqc.html
│   ├── hcc1395_tumor_rep1_r1_fastqc.zip
│   ├── hcc1395_tumor_rep1_r2.clean_fastqc.html
│   ├── hcc1395_tumor_rep1_r2.clean_fastqc.zip
│   ├── hcc1395_tumor_rep1_r2_fastqc.html
│   ├── hcc1395_tumor_rep1_r2_fastqc.zip
│   ├── hcc1395_tumor_rep2_r1.clean_fastqc.html
│   ├── hcc1395_tumor_rep2_r1.clean_fastqc.zip
│   ├── hcc1395_tumor_rep2_r1_fastqc.html
│   ├── hcc1395_tumor_rep2_r1_fastqc.zip
│   ├── hcc1395_tumor_rep2_r2.clean_fastqc.html
│   ├── hcc1395_tumor_rep2_r2.clean_fastqc.zip
│   ├── hcc1395_tumor_rep2_r2_fastqc.html
│   ├── hcc1395_tumor_rep2_r2_fastqc.zip
│   ├── hcc1395_tumor_rep3_r1.clean_fastqc.html
│   ├── hcc1395_tumor_rep3_r1.clean_fastqc.zip
│   ├── hcc1395_tumor_rep3_r1_fastqc.html
│   ├── hcc1395_tumor_rep3_r1_fastqc.zip
│   ├── hcc1395_tumor_rep3_r2.clean_fastqc.html
│   ├── hcc1395_tumor_rep3_r2.clean_fastqc.zip
│   ├── hcc1395_tumor_rep3_r2_fastqc.html
│   └── hcc1395_tumor_rep3_r2_fastqc.zip
├── GRCh38_latest_genomic.blast.nhr
├── GRCh38_latest_genomic.blast.nin
├── GRCh38_latest_genomic.blast.nog
├── GRCh38_latest_genomic.blast.nsd
├── GRCh38_latest_genomic.blast.nsi
├── GRCh38_latest_genomic.blast.nsq
├── GRCh38_latest_genomic.fna
├── GRCh38_latest_genomic.fna.gz
├── GRCh38_latest_genomic.gff
├── GRCh38_latest_rna.blast.nhr
├── GRCh38_latest_rna.blast.nin
├── GRCh38_latest_rna.blast.nog
├── GRCh38_latest_rna.blast.nsd
├── GRCh38_latest_rna.blast.nsi
├── GRCh38_latest_rna.blast.nsq
├── GRCh38_latest_rna.fna
├── GRCh38_latest_rna.fna.NM_007297.4.fa
├── hcc1395_normal_rep1.clean.hisat.bam
├── hcc1395_normal_rep1.clean.hisat.sam
├── hcc1395_normal_rep1.clean.hisat.stringtie.gff
├── hcc1395_normal_rep1.clean.hisat.stringtie.merge.gff
├── hcc1395_normal_rep1_r1.clean.fastq.gz
├── hcc1395_normal_rep1_r1.fastq.gz
├── hcc1395_normal_rep1_r1.unpair.fastq.gz
├── hcc1395_normal_rep1_r2.clean.fastq.gz
├── hcc1395_normal_rep1_r2.fastq.gz
├── hcc1395_normal_rep1_r2.unpair.fastq.gz
├── hcc1395_normal_rep2.clean.hisat.bam
├── hcc1395_normal_rep2.clean.hisat.sam
├── hcc1395_normal_rep2.clean.hisat.stringtie.gff
├── hcc1395_normal_rep2.clean.hisat.stringtie.merge.gff
├── hcc1395_normal_rep2_r1.clean.fastq.gz
├── hcc1395_normal_rep2_r1.fastq.gz
├── hcc1395_normal_rep2_r1.unpair.fastq.gz
├── hcc1395_normal_rep2_r2.clean.fastq.gz
├── hcc1395_normal_rep2_r2.fastq.gz
├── hcc1395_normal_rep2_r2.unpair.fastq.gz
├── hcc1395_normal_rep3.clean.hisat.bam
├── hcc1395_normal_rep3.clean.hisat.sam
├── hcc1395_normal_rep3.clean.hisat.stringtie.gff
├── hcc1395_normal_rep3.clean.hisat.stringtie.merge.gff
├── hcc1395_normal_rep3_r1.clean.fastq.gz
├── hcc1395_normal_rep3_r1.fastq.gz
├── hcc1395_normal_rep3_r1.unpair.fastq.gz
├── hcc1395_normal_rep3_r2.clean.fastq.gz
├── hcc1395_normal_rep3_r2.fastq.gz
├── hcc1395_normal_rep3_r2.unpair.fastq.gz
├── hcc1395_tumor_rep1.clean.hisat.bam
├── hcc1395_tumor_rep1.clean.hisat.sam
├── hcc1395_tumor_rep1.clean.hisat.stringtie.gff
├── hcc1395_tumor_rep1.clean.hisat.stringtie.merge.gff
├── hcc1395_tumor_rep1_r1.clean.fastq.gz
├── hcc1395_tumor_rep1_r1.fastq.gz
├── hcc1395_tumor_rep1_r1.unpair.fastq.gz
├── hcc1395_tumor_rep1_r2.clean.fastq.gz
├── hcc1395_tumor_rep1_r2.fastq.gz
├── hcc1395_tumor_rep1_r2.unpair.fastq.gz
├── hcc1395_tumor_rep2.clean.hisat.bam
├── hcc1395_tumor_rep2.clean.hisat.sam
├── hcc1395_tumor_rep2.clean.hisat.stringtie.gff
├── hcc1395_tumor_rep2.clean.hisat.stringtie.merge.gff
├── hcc1395_tumor_rep2_r1.clean.fastq.gz
├── hcc1395_tumor_rep2_r1.fastq.gz
├── hcc1395_tumor_rep2_r1.unpair.fastq.gz
├── hcc1395_tumor_rep2_r2.clean.fastq.gz
├── hcc1395_tumor_rep2_r2.fastq.gz
├── hcc1395_tumor_rep2_r2.unpair.fastq.gz
├── hcc1395_tumor_rep3.clean.hisat.bam
├── hcc1395_tumor_rep3.clean.hisat.sam
├── hcc1395_tumor_rep3.clean.hisat.stringtie.gff
├── hcc1395_tumor_rep3.clean.hisat.stringtie.merge.gff
├── hcc1395_tumor_rep3_r1.clean.fastq.gz
├── hcc1395_tumor_rep3_r1.fastq.gz
├── hcc1395_tumor_rep3_r1.unpair.fastq.gz
├── hcc1395_tumor_rep3_r2.clean.fastq.gz
├── hcc1395_tumor_rep3_r2.fastq.gz
├── hcc1395_tumor_rep3_r2.unpair.fastq.gz
├── merge.list
├── merge.stringtie.compare
├── merge.stringtie.compare.annotated.gtf
├── merge.stringtie.compare.loci
├── merge.stringtie.compare.merge.stringtie.gff.refmap
├── merge.stringtie.compare.merge.stringtie.gff.tmap
├── merge.stringtie.compare.tracking
├── merge.stringtie.gff
├── merge.stringtie.result.gene.csv
├── merge.stringtie.result.tx.csv
├── merge.stringtie.result.tx.deseq.csv
├── practical.tar
└── t_data.ctab

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment