Skip to content

Instantly share code, notes, and snippets.

@willfurnass
Created January 11, 2017 12:27
Show Gist options
  • Save willfurnass/c5b5e82b198d8f468d55b7661e556c79 to your computer and use it in GitHub Desktop.
Save willfurnass/c5b5e82b198d8f468d55b7661e556c79 to your computer and use it in GitHub Desktop.
Simple test of the Ruffus workflow manager
#!/usr/bin/env python
from __future__ import print_function
from ruffus import transform, suffix, pipeline_run
# A test of using Ruffus to locally run a very simple pipeline comprised of
# tasks defined as Python functions
# Can run this from a conda environnment created using
# conda create -n drmaatest -c bioconda python=2.7 ruffus drmaa
starting_files = ["a.stg1", "b.stg1", "c.stg1"]
# where each file contains multiple lines with one word per line
@transform(starting_files, suffix(".stg1"), ".stg2")
def stg1_func(input_file, output_file):
"""Extract just the first three lines of the input file."""
with open(input_file, 'r') as ii:
with open(output_file, 'w') as oo:
for i, line in enumerate(ii):
oo.write(line)
if i >= 2:
break
@transform(stg1_func, suffix(".stg2"), ".stg3")
def stg2_func(input_file, output_file):
"""Join the lines of the input file."""
with open(input_file, 'r') as ii:
with open(output_file, 'w') as oo:
print(' '.join((line.strip() for line in ii)), file=oo)
if __name__ == '__main__':
pipeline_run(target_tasks=[stg2_func])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment