Skip to content

Instantly share code, notes, and snippets.

@ccwang002
Last active November 17, 2017 19:36
Show Gist options
  • Save ccwang002/2686840e90574a67a673ec4b48e9f036 to your computer and use it in GitHub Desktop.
Save ccwang002/2686840e90574a67a673ec4b48e9f036 to your computer and use it in GitHub Desktop.
Example Snakefile for the new Tuxedo RNA-Seq pipeline (remote on Google Cloud Storage)
from pathlib import Path
from snakemake.remote.GS import RemoteProvider as GSRemoteProvider
GS = GSRemoteProvider()
GS_PREFIX = "lbwang-playground/snakemake_rnaseq"
GENOME_FA = GS.remote(f"{GS_PREFIX}/griffithlab_brain_vs_uhr/GRCh38_Ens87_chr22_ERCC/chr22_ERCC92.fa")
GENOME_GTF = GS.remote(f"{GS_PREFIX}/griffithlab_brain_vs_uhr/GRCh38_Ens87_chr22_ERCC/genes_chr22_ERCC92.gtf")
HISAT2_INDEX_PREFIX = "hisat2_index/chr22_ERCC92"
FULL_HISAT2_INDEX_PREFIX = "dinglab/lbwang/snakemake_demo/hisat2_index/chr22_ERCC92"
# # Use GS.glob_wildcards only after the following issues is resolved
# # https://bitbucket.org/snakemake/snakemake/issues/700
# SAMPLES, *_ = GS.glob_wildcards(GS_PREFIX + '/griffithlab_brain_vs_uhr/HBR_UHR_ERCC_ds_10pc/{sample}.read1.fastq.gz')
SAMPLES = [
'HBR_Rep1_ERCC-Mix2_Build37-ErccTranscripts-chr22',
'HBR_Rep2_ERCC-Mix2_Build37-ErccTranscripts-chr22',
'HBR_Rep3_ERCC-Mix2_Build37-ErccTranscripts-chr22',
'UHR_Rep1_ERCC-Mix1_Build37-ErccTranscripts-chr22',
'UHR_Rep2_ERCC-Mix1_Build37-ErccTranscripts-chr22',
'UHR_Rep3_ERCC-Mix1_Build37-ErccTranscripts-chr22',
]
rule extract_genome_splice_sites:
input: GENOME_GTF
output: "hisat2_index/chr22_ERCC92.ss"
shell: "hisat2_extract_splice_sites.py {input} > {output}"
rule extract_genome_exons:
input: GENOME_GTF
output: "hisat2_index/chr22_ERCC92.exon"
shell: "hisat2_extract_exons.py {input} > {output}"
rule build_hisat_index:
input:
genome_fa=GENOME_FA,
splice_sites="hisat2_index/chr22_ERCC92.ss",
exons="hisat2_index/chr22_ERCC92.exon",
output: expand(f"{HISAT2_INDEX_PREFIX}.{{ix}}.ht2", ix=range(1, 9))
log: "hisat2_index/build.log"
threads: 8
shell:
"hisat2-build -p {threads} {input.genome_fa} "
"--ss {input.splice_sites} --exon {input.exons} {FULL_HISAT2_INDEX_PREFIX} "
"2>{log}"
rule align_hisat:
input:
hisat2_index=expand(f"{HISAT2_INDEX_PREFIX}.{{ix}}.ht2", ix=range(1, 9)),
fastq1=GS.remote(GS_PREFIX + "/griffithlab_brain_vs_uhr/HBR_UHR_ERCC_ds_10pc/{sample}.read1.fastq.gz"),
fastq2=GS.remote(GS_PREFIX + "/griffithlab_brain_vs_uhr/HBR_UHR_ERCC_ds_10pc/{sample}.read2.fastq.gz"),
output: "align_hisat2/{sample}.bam"
log: "align_hisat2/{sample}.log"
threads: 4
shell:
"hisat2 -p {threads} --dta -x {FULL_HISAT2_INDEX_PREFIX} "
"-1 {input.fastq1} -2 {input.fastq2} 2>{log} | "
"samtools sort -@ {threads} -o {output}"
rule align_all_samples:
input: expand("align_hisat2/{sample}.bam", sample=SAMPLES)
rule stringtie_assemble:
input:
genome_gtf=GENOME_GTF,
bam="align_hisat2/{sample}.bam"
output: "stringtie/assembled/{sample}.gtf"
threads: 4
shell:
"stringtie -p {threads} -G {input.genome_gtf} "
"-o {output} -l {wildcards.sample} {input.bam}"
rule stringtie_merge_list:
input: expand("stringtie/assembled/{sample}.gtf", sample=SAMPLES)
output: "stringtie/merged_list.txt"
run:
with open(output[0], 'w') as f:
for gtf in input:
print(Path(gtf).resolve(), file=f)
rule stringtie_merge:
input:
genome_gtf=GENOME_GTF,
merged_list="stringtie/merged_list.txt",
sample_gtfs=expand("stringtie/assembled/{sample}.gtf", sample=SAMPLES)
output: "stringtie/merged.gtf"
threads: 4
shell:
"stringtie --merge -p {threads} -G {input.genome_gtf} "
"-o {output} {input.merged_list}"
rule stringtie_quant:
input:
merged_gtf="stringtie/merged.gtf",
sample_bam="align_hisat2/{sample}.bam"
output:
gtf="stringtie/quant/{sample}/{sample}.gtf",
ctabs=expand(
"stringtie/quant/{{sample}}/{name}.ctab",
name=['i2t', 'e2t', 'i_data', 'e_data', 't_data']
)
threads: 4
shell:
"stringtie -e -B -p {threads} -G {input.merged_gtf} "
"-o {output.gtf} {input.sample_bam}"
rule quant_all_samples:
input: expand("stringtie/quant/{sample}/{sample}.gtf", sample=SAMPLES)
rule clean:
shell:
"rm -rf align_hisat2 hisat2_index stringtie"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment