Skip to content

Instantly share code, notes, and snippets.

@ihodes
Last active February 2, 2017 20:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ihodes/426666b3836929304c758687a033d268 to your computer and use it in GitHub Desktop.
Save ihodes/426666b3836929304c758687a033d268 to your computer and use it in GitHub Desktop.
#use "topfind";;
#thread
#require "coclobas.ketrew_backend,biokepi,cmdliner,nonstd";;
(* You must have a biokepi_machine.ml file in this directory that has a
biokepi_machine defined at the top level. *)
#use "biokepi_machine.ml"
open Nonstd
(** The actual pipeline that does work. *)
module Pipeline(Bfx: Biokepi.EDSL.Semantics) = struct
let pipeline ~bam_path ~name ~reference_build =
let bam = Bfx.(bam ~sample_name:name ~reference_build (input_url bam_path)) in
let fastq = Bfx.bam_to_fastq `PE bam in
let fastqc = Bfx.fastqc fastq in
let kallisto_results = Bfx.kallisto ~reference_build fastq in
let cufflinks_results = Bfx.cufflinks ~reference_build bam in
Bfx.list [Bfx.to_unit fastqc;
Bfx.to_unit kallisto_results;
Bfx.to_unit cufflinks_results]
let run ~bam_path ~name ~reference_build =
Bfx.observe (fun () -> pipeline ~bam_path ~name ~reference_build
|> Bfx.to_unit)
end
(** These describe the CLI interface, and is not specific to Biokepi at all
(uses the Cmdliner library). *)
let args f =
let open Cmdliner in
let open Cmdliner.Term in
app f begin
(* We use these "wrapper" functions adding polymorphic types in order to
further type the various returned bools and strings from the CLI. *)
pure (fun s -> `Name s)
$ Arg.(
required & opt (some string) None
& info ["name"]
~doc:"Name of the run")
end
$ begin
pure (fun s -> `Dry_run s)
$ Arg.(
value & flag & info ["dry-run"]
~doc:"Don't submit this job to Ketrew")
end
$ begin
pure (fun s -> `Bam_path s)
$ Arg.(
required & opt (some string) None
& info ["bam"]
~doc:"Path to the BAM")
end
$ begin
let references =
["b37decoy", "b37decoy"; "b37", "b37"; "b38", "b38"; "mm10", "mm10"]
in
pure (fun s -> `Reference_build s)
$ Arg.(
required & opt (some (enum references)) None
& info ["reference-build"; "R"]
~doc:"Reference build to use.")
end
(** Here we compile & submit the pipeline. *)
let run () =
fun
(`Name name)
(`Dry_run dry_run)
(`Bam_path bam_path)
(`Reference_build reference_build)
->
(** This Config module is required to pass configuration & the
Biokepi.Machine.t (describing the compute infrastructure) to the
To_workflow compiler. This is used to compile the above pipeline to
a bunch of Ketrew jobs. *)
let module Config = struct
include Biokepi.EDSL.Compile.To_workflow.Defaults
(* This is where all the files generated by the workflow will end up. *)
let work_dir =
let subname = sprintf "results-%s-%s" reference_build name in
Filename.concat (Biokepi.Machine.work_dir biokepi_machine) subname
let machine = biokepi_machine
end in
let module Workflow_compiler =
Biokepi.EDSL.Compile.To_workflow.Make(Config) in
let module Compiled_pipeline = Pipeline(Workflow_compiler) in
let workflow =
Compiled_pipeline.run ~bam_path ~name ~reference_build
|> Biokepi.EDSL.Compile.To_workflow.File_type_specification.
get_unit_workflow ~name
in
if dry_run then
printf "dry run, not submitting\n"
else
let _ = Ketrew.Client.submit_workflow workflow ~add_tags:[name] in
printf "Submitted workflow %s\n" name
(** Here we join the arguments in `args` with the actual `run` function taking
those arguments into the complete CLI. *)
let cli () =
let open Cmdliner in
let info = Term.(info "Tavi's pipeline" ~doc:"Kallisto magic") in
let term = Term.(pure (run ())) |> args in
(term, info)
(** This is the "main" function that's actually called. *)
let () =
let open Cmdliner in
match Term.eval (cli ()) with
| `Error _ -> exit 1
| `Ok f -> f
| _ -> exit 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment