Created
November 21, 2013 10:05
-
-
Save hanfeisun/7579044 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def _macs2(workflow, conf): | |
# merge all treatments into one | |
merge_bams_treat = ShellCommand( | |
"{tool} merge {output[merged]} {param[bams]}", | |
tool="samtools", | |
input=[target + ".bam" for target in conf.treatment_targets], | |
output={"merged": conf.prefix + "_treatment.bam"}) | |
merge_bams_treat.param = {"bams": " ".join(merge_bams_treat.input)} | |
if len(conf.treatment_targets) > 1: | |
attach_back(workflow, merge_bams_treat) | |
else: | |
# when there's only one treatment sample, use copying instead of merging | |
attach_back(workflow, make_copy_command(merge_bams_treat.input[0], merge_bams_treat.output["merged"])) | |
# merging step will be skipped if control sample does not exist | |
# So be careful to check whether there are control samples before using `_control.bam` | |
if len(conf.control_targets) > 1: | |
merge_bams_control = merge_bams_treat.clone | |
merge_bams_control.input = [target + ".bam" for target in conf.control_targets] | |
merge_bams_control.output = {"merged": conf.prefix + "_control.bam"} | |
merge_bams_control.param = {"bams": " ".join(merge_bams_control.input)} | |
attach_back(workflow, merge_bams_control) | |
elif len(conf.control_targets) == 1: | |
attach_back(workflow, make_copy_command(conf.control_targets[0] + ".bam", conf.prefix + "_control.bam")) | |
macs2_on_merged = attach_back(workflow, ShellCommand( | |
"{tool} callpeak -B -q 0.01 --keep-dup {param[keep_dup]} --shiftsize={param[shiftsize]} --nomodel \ | |
{param[treat_opt]} {param[control_opt]} -n {param[description]}", | |
tool="macs2", | |
input={"treat": conf.prefix + "_treatment.bam"}, | |
output={"peaks": conf.prefix + "_peaks.bed", | |
"summit": conf.prefix + "_summits.bed", | |
"treat_bdg": conf.prefix + "_treat_pileup.bdg", | |
"ENCODE": conf.prefix + "_peaks.encodePeak", | |
"peaks_xls": conf.prefix + "_peaks.xls", | |
"control_bdg": conf.prefix + "_control_lambda.bdg"}, | |
param={"description": conf.prefix, | |
"keep_dup": 1, | |
"shiftsize": 73}, | |
name="macs2_callpeak_merged")) | |
macs2_on_merged.param["treat_opt"] = "-t " + macs2_on_merged.input["treat"] | |
# control option is skipped if control samples does not exist | |
if len(conf.control_targets) >= 1: | |
macs2_on_merged.input["control"] = conf.prefix + "_control.bam" | |
macs2_on_merged.param["control_opt"] = "-c " + macs2_on_merged.input["control"] | |
else: | |
macs2_on_merged.param["control_opt"] = "" | |
macs2_on_merged.update(param=conf.items("macs2")) | |
# For bedGraphToBigwiggle bugs, we need to remove coordinates over-border coordinates | |
# As _control_lambda.bdg always exist. There are no need to check whether there are control samples. | |
bdg_trim_control = attach_back(workflow, | |
ShellCommand( | |
'{tool} intersect -a {input[bdg]} -b {input[chrom_bed]} -wa -f 1.00 > {output}', | |
tool="bedtools", | |
input={"bdg": conf.prefix + "_control_lambda.bdg", | |
'chrom_bed': conf.get_path("lib", "chrom_bed")}, | |
output=conf.prefix + "_control_lambda.bdg.tmp", | |
name="bedGraph filtering")) | |
bdg_trim_treat = bdg_trim_control.clone | |
bdg_trim_treat.input["bdg"] = conf.prefix + "_treat_pileup.bdg" | |
bdg_trim_treat.output = conf.prefix + "_treat_pileup.bdg.tmp" | |
attach_back(workflow, bdg_trim_treat) | |
bdg2bw_treat = attach_back(workflow, | |
ShellCommand( | |
"{tool} {input[bdg]} {input[chrom_len]} {output[bw]}", | |
tool="bedGraphToBigWig", | |
input={"bdg": conf.prefix + "_control_lambda.bdg.tmp", | |
"chrom_len": conf.get("lib", "chrom_len")}, | |
output={"bw": conf.prefix + "_control.bw"}, | |
name="bdg_to_bw")) | |
# prototype used here to do the similar thing on treatment file | |
bdg2bw_control = bdg2bw_treat.clone | |
bdg2bw_control.input["bdg"] = conf.prefix + "_treat_pileup.bdg.tmp" | |
bdg2bw_control.output["bw"] = conf.prefix + "_treat.bw" | |
attach_back(workflow, bdg2bw_control) | |
attach_back(workflow, PythonCommand( | |
stat_macs2, | |
input={"macs2_peaks_xls": conf.prefix + "_peaks.xls", | |
"db": ChiLinQC_db, | |
"template": rlang_template}, | |
output={"json": conf.json_prefix + "_macs2.json", | |
"R": conf.prefix + "_macs2.R", | |
"pdf": conf.prefix + "_macs2.pdf"}, | |
param={"id": conf.id}, | |
name="MACS2 summary")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi Hanfei,
I have created a Snakemake version of the workflow. Actually the logic control that you need here is very well possible with snakemake, using functions as input and some python code inside the run blocks. Further, I think the code is a bit less verbose, however this might very well be a matter of personal preference.
Please have a look:
https://gist.github.com/johanneskoester/7584671
Of course the stuff is untested and I might have misinterpreted some things.