Last active
February 2, 2017 20:50
-
-
Save ihodes/426666b3836929304c758687a033d268 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#use "topfind";; | |
#thread | |
#require "coclobas.ketrew_backend,biokepi,cmdliner,nonstd";; | |
(* You must have a biokepi_machine.ml file in this directory that has a | |
biokepi_machine defined at the top level. *) | |
#use "biokepi_machine.ml" | |
open Nonstd | |
(** The actual pipeline that does work. *) | |
module Pipeline(Bfx: Biokepi.EDSL.Semantics) = struct | |
let pipeline ~bam_path ~name ~reference_build = | |
let bam = Bfx.(bam ~sample_name:name ~reference_build (input_url bam_path)) in | |
let fastq = Bfx.bam_to_fastq `PE bam in | |
let fastqc = Bfx.fastqc fastq in | |
let kallisto_results = Bfx.kallisto ~reference_build fastq in | |
let cufflinks_results = Bfx.cufflinks ~reference_build bam in | |
Bfx.list [Bfx.to_unit fastqc; | |
Bfx.to_unit kallisto_results; | |
Bfx.to_unit cufflinks_results] | |
let run ~bam_path ~name ~reference_build = | |
Bfx.observe (fun () -> pipeline ~bam_path ~name ~reference_build | |
|> Bfx.to_unit) | |
end | |
(** These describe the CLI interface, and is not specific to Biokepi at all | |
(uses the Cmdliner library). *) | |
let args f = | |
let open Cmdliner in | |
let open Cmdliner.Term in | |
app f begin | |
(* We use these "wrapper" functions adding polymorphic types in order to | |
further type the various returned bools and strings from the CLI. *) | |
pure (fun s -> `Name s) | |
$ Arg.( | |
required & opt (some string) None | |
& info ["name"] | |
~doc:"Name of the run") | |
end | |
$ begin | |
pure (fun s -> `Dry_run s) | |
$ Arg.( | |
value & flag & info ["dry-run"] | |
~doc:"Don't submit this job to Ketrew") | |
end | |
$ begin | |
pure (fun s -> `Bam_path s) | |
$ Arg.( | |
required & opt (some string) None | |
& info ["bam"] | |
~doc:"Path to the BAM") | |
end | |
$ begin | |
let references = | |
["b37decoy", "b37decoy"; "b37", "b37"; "b38", "b38"; "mm10", "mm10"] | |
in | |
pure (fun s -> `Reference_build s) | |
$ Arg.( | |
required & opt (some (enum references)) None | |
& info ["reference-build"; "R"] | |
~doc:"Reference build to use.") | |
end | |
(** Here we compile & submit the pipeline. *) | |
let run () = | |
fun | |
(`Name name) | |
(`Dry_run dry_run) | |
(`Bam_path bam_path) | |
(`Reference_build reference_build) | |
-> | |
(** This Config module is required to pass configuration & the | |
Biokepi.Machine.t (describing the compute infrastructure) to the | |
To_workflow compiler. This is used to compile the above pipeline to | |
a bunch of Ketrew jobs. *) | |
let module Config = struct | |
include Biokepi.EDSL.Compile.To_workflow.Defaults | |
(* This is where all the files generated by the workflow will end up. *) | |
let work_dir = | |
let subname = sprintf "results-%s-%s" reference_build name in | |
Filename.concat (Biokepi.Machine.work_dir biokepi_machine) subname | |
let machine = biokepi_machine | |
end in | |
let module Workflow_compiler = | |
Biokepi.EDSL.Compile.To_workflow.Make(Config) in | |
let module Compiled_pipeline = Pipeline(Workflow_compiler) in | |
let workflow = | |
Compiled_pipeline.run ~bam_path ~name ~reference_build | |
|> Biokepi.EDSL.Compile.To_workflow.File_type_specification. | |
get_unit_workflow ~name | |
in | |
if dry_run then | |
printf "dry run, not submitting\n" | |
else | |
let _ = Ketrew.Client.submit_workflow workflow ~add_tags:[name] in | |
printf "Submitted workflow %s\n" name | |
(** Here we join the arguments in `args` with the actual `run` function taking | |
those arguments into the complete CLI. *) | |
let cli () = | |
let open Cmdliner in | |
let info = Term.(info "Tavi's pipeline" ~doc:"Kallisto magic") in | |
let term = Term.(pure (run ())) |> args in | |
(term, info) | |
(** This is the "main" function that's actually called. *) | |
let () = | |
let open Cmdliner in | |
match Term.eval (cli ()) with | |
| `Error _ -> exit 1 | |
| `Ok f -> f | |
| _ -> exit 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment