Skip to content

Instantly share code, notes, and snippets.

@alexiswl
Last active March 12, 2019 05:39
Show Gist options
  • Save alexiswl/6974927ab8da81f1e3dc2222db467486 to your computer and use it in GitHub Desktop.
Save alexiswl/6974927ab8da81f1e3dc2222db467486 to your computer and use it in GitHub Desktop.
Generate Singularity Container from a quay
#!/usr/bin/env python3
import yaml
import pandas as pd
import argparse
import os
import in_place
import logging
import shutil
import sys
import re
logging.basicConfig(level=logging.INFO)
VALID_REPLACEMENTS = {"executable_name": "__EXECUTABLE_NAME__",
"software": "__SOFTWARE__",
"software_quay": "__SOFTWARE_QUAY__",
"version": "__VERSION__",
"version_quay": "__VERSION_QUAY__",
"clean_env": "__CLEAN_ENV__",
}
# Globals
subdirs = {"module_dir": "module",
"shell_dir": "shell",
"singularity_dir": "image",
"binary_dir": "bin"}
args_list = ["module_template", "bash_template", "singularity_template"]
singularity_keys = ['runscript', 'run', 'labels', 'help', 'env', 'environment', 'post', 'install']
def get_args():
parser = argparse.ArgumentParser(description="Import a yaml file and write out all of the "
"containers and modules you need. This doesn't"
"build the container but does:"
"1 Create a recipe for the container to be built"
"2 Creates bash scripts(s) to wrap around runscripts "
"in the container"
"3 Creates modules files that link to the container"
"4 Creates an installation file to also be built.")
# Input and outputs
parser.add_argument("--yaml", type=str, required=True, help="Path to yaml file")
parser.add_argument("--output-dir", type=str, required=False, default=os.getcwd(),
help="Write scripts to stdout")
# Specify template files
parser.add_argument("--module-template", type=str, required=False, default=None,
help="Provide a module template")
parser.add_argument("--bash-template", type=str, required=False, default=None,
help="Provide a bash template")
parser.add_argument("--singularity-template", type=str, required=True, default=None,
help="Provide a singularity template")
return parser.parse_args()
def check_args(args):
# Make sure yaml exists
if not os.path.isfile(args.yaml):
logging.info("Error, yaml file must exists, cannot find %s" % args.yaml)
sys.exit(1)
# Make sure output-dir can be created
if not os.path.isdir(os.path.dirname(os.path.abspath(args.output_dir))):
logging.error("Error, cannot create dir %s, make sure its parent exists" % args.output_dir)
sys.exit(1)
# Create output
if not os.path.isdir(args.output_dir):
os.mkdir(args.output_dir)
# Check each file exists first
for arg in args_list:
if getattr(args, arg, None) is not None:
if not os.path.isfile(getattr(args, arg)):
logging.info("Error could not find file %s" % getattr(args, arg))
def create_subdirs(output_dir, args):
software_subdirs = {}
# Create folders based on templates available
subdir_list = ["module_dir", "shell_dir", "singularity_dir"]
# Create directories
for subdir, arg in zip(subdir_list, args_list):
if getattr(args, arg, None) is not None:
software_subdirs[subdir] = os.path.join(output_dir, subdirs[subdir])
if not os.path.isdir(software_subdirs[subdir]):
os.makedirs(software_subdirs[subdir])
# Also create binary dir if it doesn't exist
if subdir == 'shell_dir':
software_subdirs['binary_dir'] = os.path.join(output_dir, 'bin')
if not os.path.isdir(software_subdirs['binary_dir']):
os.makedirs(software_subdirs['binary_dir'])
return software_subdirs
def import_yaml(yaml_file):
# Open file
with open(yaml_file, 'r') as f:
config_data = yaml.load(f)
# Load as pandas dataframe
return pd.io.json.json_normalize(config_data)
def modify_template(yaml_item, template_file, app=None):
with in_place.InPlace(template_file) as file:
for line in file:
for key, value in VALID_REPLACEMENTS.items():
line = line.replace(str(value), str(yaml_item[key]))
# Manually add SOFTWARE_UPPER
if "__SOFTWARE_UPPER__" in line:
line = line.replace("__SOFTWARE_UPPER__", yaml_item["software"].upper())
if "__BIND_PATHS_ARRAY__" in line:
line = line.replace("__BIND_PATHS_ARRAY__", "(%s)" % ' '.join("%s" % array
for array in yaml_item['bind_paths']))
if "__BIND_PATHS__" in line:
line = line.replace("__BIND_PATHS__", "(%s)" % ' '.join("%s" % array
for array in yaml_item['bind_paths']))
if '__APP__' in line:
if app is None:
line = line.replace("__APP__", "")
else:
line = line.replace("__APP__", app['name'])
file.write(line)
def modify_singularity_template(yaml_item, template_file, app=None, app_name=None):
"""
For appending apps to singularity file and initialising singularity file
:param yaml_item:
:param template_file:
:return:
"""
with open(template_file, 'a') as recipe_h:
if app is None:
# Work with series data
for key in singularity_keys:
value = getattr(yaml_item, key, None)
if value is not None:
recipe_h.write("%%%s\n" % key)
recipe_h.write("%s\n" % value)
# Work with app data
else:
for key in singularity_keys:
if key in app.keys():
recipe_h.write("%%app%s %s\n" % (key, app_name))
recipe_h.write("%s\n" % app[key])
def main():
# Get args
args = get_args()
# Check files/directories exist
check_args(args)
# Import yaml
config_df = import_yaml(args.yaml)
# Break by name into list of dfs
items = list(set([col.rsplit('.', 1)[0] for col in config_df.columns]))
config_dfs = [config_df.filter(axis='columns', regex='^%s' % software_version).\
rename(columns=lambda x: re.sub("%s." % software_version, "", x)).\
dropna(axis='rows').\
reset_index().\
transpose()[0]
for software_version in items]
# Iterate through each config and generate module, bash and singularity file
for item in config_dfs:
# Create subdirs for module, image and shell
software_subdirs = create_subdirs(os.path.join(args.output_dir, item.software, item.version), args)
# Copy and modify over module template
if args.module_template is not None:
# Set names
module_file = os.path.join(software_subdirs['module_dir'], 'module')
# Copy file
shutil.copy(args.module_template, module_file)
# Modify module file
modify_template(item, module_file)
# Copy over bash template, link to binary and edit
if args.bash_template is not None:
# Set names
bash_file = os.path.join(software_subdirs['shell_dir'], item.executable_name + ".sh")
binary_file = os.path.join(software_subdirs['binary_dir'], item.executable_name)
# Copy file
shutil.copy(args.bash_template, bash_file)
# Modify bash
modify_template(item, bash_file)
# Link to binary
if os.path.islink(binary_file):
os.unlink(binary_file)
os.symlink(os.path.relpath(bash_file, os.path.dirname(binary_file)),
binary_file)
# Copy over singularity template, and modify
if args.singularity_template is not None:
# Set names
singularity_recipe = os.path.join(software_subdirs['singularity_dir'],
'_'.join([item.software, str(item.version)]) + ".recipe")
singularity_image = os.path.join(software_subdirs['singularity_dir'],
'_'.join([item.software, str(item.version)]) + ".simg")
# Copy template
shutil.copy(args.singularity_template, singularity_recipe)
# Modify template
modify_template(item, singularity_recipe)
modify_singularity_template(item, singularity_recipe, app=None)
# Log executable
logging.info("Now run 'sudo singularity build %s %s" % (singularity_image, singularity_recipe))
# Iterate through apps
if getattr(item, 'apps', None) is None:
continue
for app_item in item.apps:
for app_name, app in app_item.items():
# Create new executable
# Copy over bash template, link to binary and edit
if args.bash_template is not None:
# Set names
bash_file = os.path.join(software_subdirs['shell_dir'], app['executable_name'] + ".sh")
binary_file = os.path.join(software_subdirs['binary_dir'], app['executable_name'])
# Copy file
shutil.copy(args.bash_template, bash_file)
# Modify bash
modify_template(item, bash_file, app=app)
# Link to binary
if os.path.islink(binary_file):
os.unlink(binary_file)
os.symlink(os.path.relpath(bash_file, os.path.dirname(binary_file)),
binary_file)
# Write to end of recipe
# Copy over singularity template, and modify
if args.singularity_template is not None:
# Set names
singularity_recipe = os.path.join(software_subdirs['singularity_dir'],
'_'.join([item.software, str(item.version)]) + ".recipe")
# Modify template
modify_singularity_template(item, singularity_recipe, app=app, app_name=app_name)
if __name__=="__main__":
main()
- star_2.7.0:
executable_name: STAR
runscript: |
exec STAR "${@}"
help: |
Run the STAR aligner
software: star
bind_paths:
- /data
- /Databases
version: 2.7.0
labels: |
MAINTAINER Alexis Lucattini
VERSION 2.7.0b
CONDA_VERSION 3
BUILD_DATE 11/03/2019
software_quay: star
version_quay: 2.7.0b--0
clean_env: "1"
apps:
- longSTAR:
name: long
executable_name: STARlong
run: |
exec STARlong "${@}"
help: |
Run the STARlong version
- centrifuge_1.0.4:
clean_env: "1"
executable_name: centrifuge
runscript: |
exec centrifuge "${@}"
help: |
Run the centrifuge metagenomic classifier
software: centrifuge
bind_paths:
- /data
- /Databases
version: 1.0.4
labels: |
MAINTAINER Alexis Lucattini
VERSION 1.0.4
CONDA_VERSION 3
BUILD_DATE 11/03/2019
software_quay: centrifuge
version_quay: 1.0.4_beta--py36pl526he941832_2
#!/bin/bash
# Write echo to stderr
echoerr() { echo "$@" 1>&2; }
# Global variables (Debugger determined by DRY_RUN_CENTRIFUGE env variable)
## Name of software (for container, this may be different to the command)
SOFTWARE="__SOFTWARE__"
## Software version
VERSION="__VERSION__"
## WHAT Bind paths are to be set by default
BIND_PATHS_ARRAY=__BIND_PATHS_ARRAY__
## Would we like a clean environment 0: No, 1: Yes
CLEAN_ENV=__CLEAN_ENV__
## Do we need a specific app to run
APP="__APP__"
# Local variables (Standard between images)
HERE=$(dirname "${BASH_SOURCE[0]}")
CONTAINER_DIR=$(dirname ${HERE})
IMAGE=${CONTAINER_DIR}/image/${SOFTWARE}_${VERSION}.simg
SOFTWARE_UPPER=__SOFTWARE_UPPER__
# Initialise command opts
COMMAND_OPTS=""
# Check for clean env
if [[ "${CLEAN_ENV}" == "1" ]]; then
COMMAND_OPTS="${COMMAND_OPTS} --cleanenv"
elif [[ "${CLEAN_ENV}" == "0" ]]; then
:
else
echoerr "Clean env toggle not specified correctly, must be between zero or one, not ${CLEAN_ENV}"
exit 1
fi
# Check debugger
if [[ -v DRY_RUN_${SOFTWARE_UPPER} ]]; then
echoerr "This will be a dry-run of the software"
DRY_RUN=$(eval 'echo "${DRY_RUN_'"${SOFTWARE_UPPER}"'}"')
echoerr "Verbosity level has been set at ${DRY_RUN}"
else
DRY_RUN=0
fi
# Check image exists
if [[ -f ${IMAGE} ]]; then
:
else
echoerr "Cannot find image. ${IMAGE} does not exist"
exit 1
fi
# Check / add APP
if [[ ! -z ${APP} ]]; then
# Evaulate grep
grep -q ${APP} <(singularity apps ${IMAGE})
# Check app is inside by using grep exit code
if [[ "$?" == 0 ]]; then
COMMAND_OPTS="${COMMAND_OPTS} --app ${APP}"
else
echoerr "Could not find app ${APP} in image ${IMAGE}"
exit 1
fi
fi
# Add in bind_paths
BIND_PATHS_STR=""
for BIND_PATH in "${BIND_PATHS_ARRAY[@]}"; do
# Check path exists in filesystem before binding
if [[ ! -d "${BIND_PATH}" ]]; then
echoerr "Could not bind path ${BIND_PATH}. Exiting"
exit 1
else
BIND_PATHS_STR="${BIND_PATHS_STR} --bind ${BIND_PATH}:${BIND_PATH}"
fi
done
# Do we need to unset the xdg runtime dir
if [[ -v XDG_RUNTIME_DIR && ! -z "$XDG_RUNTIME_DIR" ]]; then
unset XDG_RUNTIME_DIR
fi
# Merge as command options
if [[ ! -z ${BIND_PATHS_STR} ]]; then
COMMAND_OPTS="${COMMAND_OPTS} ${BIND_PATHS_STR}"
fi
# Set command
COMMAND="singularity run ${COMMAND_OPTS} ${IMAGE} ${@}"
# Would you like to see the environment we're running it in
if [[ "${DRY_RUN}" == "2" ]]; then
echoerr ""
echoerr "### Printing out current environment ###"
printenv 2>&1
echoerr "### Completed printing of the environment ###"
echoerr ""
fi
# Would you like to printout the command we're running
# Or run it instead?
if [[ ! "${DRY_RUN}" == "0" ]]; then
echoerr ""
echoerr "### Would have run command ###"
echoerr "${COMMAND}"
echoerr "### Completed printing of the command ###"
echoerr ""
else
# Run command
eval ${COMMAND}
fi
#%Module######################################################################
#
# __SOFTWARE__ __VERSION__ modulefile
#
# get software and version in 1
set components [ file split [ module-info name ] ]
set software [ lindex $components 0 ]
set version [ lindex $components 1 ]
# Get help
proc ModulesHelp { } {
global version modroot
puts stderr "Sets up the paths you need to use centrifuge version $version."
puts stderr "Note this application uses singularity containers and aliases please ensure that: "
puts stderr "1. You own your own home directory"
puts stderr "2. Your data is in either of __BIND_PATHS__"
puts stderr "To use the DRY run mode, please run export DRY_RUN___SOFTWARE_UPPER__=1"
puts stderr "To use verbose dry run mode (which also prints the system environment, please use export DRY_RUN__SOFTWARE_UPPER__=2"
}
# Conflict with other centrifuge modules
conflict centrifuge
# Set the apps directory
set CONTAINER_DIR __CONTAINER_DIR__
set __SOFTWARE___CONTAINER_DIR $CONTAINER_DIR/$software/$version
# Prepend the path
prepend-path PATH $__SOFTWARE___CONTAINER_DIR/bin
Bootstrap: docker
From: quay.io/biocontainers/__SOFTWARE_QUAY__:__VERSION_QUAY__
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment