Skip to content

Instantly share code, notes, and snippets.

@hanfeisun
Created November 21, 2013 10:05
Show Gist options
  • Save hanfeisun/7579044 to your computer and use it in GitHub Desktop.
Save hanfeisun/7579044 to your computer and use it in GitHub Desktop.
def _macs2(workflow, conf):
# merge all treatments into one
merge_bams_treat = ShellCommand(
"{tool} merge {output[merged]} {param[bams]}",
tool="samtools",
input=[target + ".bam" for target in conf.treatment_targets],
output={"merged": conf.prefix + "_treatment.bam"})
merge_bams_treat.param = {"bams": " ".join(merge_bams_treat.input)}
if len(conf.treatment_targets) > 1:
attach_back(workflow, merge_bams_treat)
else:
# when there's only one treatment sample, use copying instead of merging
attach_back(workflow, make_copy_command(merge_bams_treat.input[0], merge_bams_treat.output["merged"]))
# merging step will be skipped if control sample does not exist
# So be careful to check whether there are control samples before using `_control.bam`
if len(conf.control_targets) > 1:
merge_bams_control = merge_bams_treat.clone
merge_bams_control.input = [target + ".bam" for target in conf.control_targets]
merge_bams_control.output = {"merged": conf.prefix + "_control.bam"}
merge_bams_control.param = {"bams": " ".join(merge_bams_control.input)}
attach_back(workflow, merge_bams_control)
elif len(conf.control_targets) == 1:
attach_back(workflow, make_copy_command(conf.control_targets[0] + ".bam", conf.prefix + "_control.bam"))
macs2_on_merged = attach_back(workflow, ShellCommand(
"{tool} callpeak -B -q 0.01 --keep-dup {param[keep_dup]} --shiftsize={param[shiftsize]} --nomodel \
{param[treat_opt]} {param[control_opt]} -n {param[description]}",
tool="macs2",
input={"treat": conf.prefix + "_treatment.bam"},
output={"peaks": conf.prefix + "_peaks.bed",
"summit": conf.prefix + "_summits.bed",
"treat_bdg": conf.prefix + "_treat_pileup.bdg",
"ENCODE": conf.prefix + "_peaks.encodePeak",
"peaks_xls": conf.prefix + "_peaks.xls",
"control_bdg": conf.prefix + "_control_lambda.bdg"},
param={"description": conf.prefix,
"keep_dup": 1,
"shiftsize": 73},
name="macs2_callpeak_merged"))
macs2_on_merged.param["treat_opt"] = "-t " + macs2_on_merged.input["treat"]
# control option is skipped if control samples does not exist
if len(conf.control_targets) >= 1:
macs2_on_merged.input["control"] = conf.prefix + "_control.bam"
macs2_on_merged.param["control_opt"] = "-c " + macs2_on_merged.input["control"]
else:
macs2_on_merged.param["control_opt"] = ""
macs2_on_merged.update(param=conf.items("macs2"))
# For bedGraphToBigwiggle bugs, we need to remove coordinates over-border coordinates
# As _control_lambda.bdg always exist. There are no need to check whether there are control samples.
bdg_trim_control = attach_back(workflow,
ShellCommand(
'{tool} intersect -a {input[bdg]} -b {input[chrom_bed]} -wa -f 1.00 > {output}',
tool="bedtools",
input={"bdg": conf.prefix + "_control_lambda.bdg",
'chrom_bed': conf.get_path("lib", "chrom_bed")},
output=conf.prefix + "_control_lambda.bdg.tmp",
name="bedGraph filtering"))
bdg_trim_treat = bdg_trim_control.clone
bdg_trim_treat.input["bdg"] = conf.prefix + "_treat_pileup.bdg"
bdg_trim_treat.output = conf.prefix + "_treat_pileup.bdg.tmp"
attach_back(workflow, bdg_trim_treat)
bdg2bw_treat = attach_back(workflow,
ShellCommand(
"{tool} {input[bdg]} {input[chrom_len]} {output[bw]}",
tool="bedGraphToBigWig",
input={"bdg": conf.prefix + "_control_lambda.bdg.tmp",
"chrom_len": conf.get("lib", "chrom_len")},
output={"bw": conf.prefix + "_control.bw"},
name="bdg_to_bw"))
# prototype used here to do the similar thing on treatment file
bdg2bw_control = bdg2bw_treat.clone
bdg2bw_control.input["bdg"] = conf.prefix + "_treat_pileup.bdg.tmp"
bdg2bw_control.output["bw"] = conf.prefix + "_treat.bw"
attach_back(workflow, bdg2bw_control)
attach_back(workflow, PythonCommand(
stat_macs2,
input={"macs2_peaks_xls": conf.prefix + "_peaks.xls",
"db": ChiLinQC_db,
"template": rlang_template},
output={"json": conf.json_prefix + "_macs2.json",
"R": conf.prefix + "_macs2.R",
"pdf": conf.prefix + "_macs2.pdf"},
param={"id": conf.id},
name="MACS2 summary"))
@johanneskoester
Copy link

Hi Hanfei,
I have created a Snakemake version of the workflow. Actually the logic control that you need here is very well possible with snakemake, using functions as input and some python code inside the run blocks. Further, I think the code is a bit less verbose, however this might very well be a matter of personal preference.
Please have a look:
https://gist.github.com/johanneskoester/7584671

Of course the stuff is untested and I might have misinterpreted some things.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment