Skip to content

Instantly share code, notes, and snippets.

@natefoo
Last active July 21, 2021 02:53
Show Gist options
  • Save natefoo/f4ddd72f1a07a70f6703d1b640deef17 to your computer and use it in GitHub Desktop.
Save natefoo/f4ddd72f1a07a70f6703d1b640deef17 to your computer and use it in GitHub Desktop.
#!/bin/bash
#
# Run Galaxy and Pulsar in Docker for development.
#
# Galaxy and Pulsar should be cloned at ./galaxy and ./pulsar, override with vars below. Necessary configs, as well as
# venvs for each, will be placed in ./config. These configs can be modified once created, if needed. Once created, they
# are not overwritten.
#
# To stop, run:
# docker kill pulsar galaxy-job galaxy-web rabbitmq postgres
#
# To restart Galaxy or Pulsar, `docker kill` the relevant service(s) and rerun this script.
#
# These users may be useful:
# - admin@example.org: Galaxy Admin
# - local@example.org: Runs all jobs locally
# - pulsar@example.org: Runs all jobs (except upload1) via Pulsar
#
# Galaxy's Pulsar lib, pulsar_galaxy_lib, will be built from the Pulsar clone and installed in to Galaxy's venv if the
# galaxy-job container is not already running.
set -euo pipefail
: ${SKIP_PULSAR_GALAXY_LIB:='false'}
: ${POSTGRES_IMAGE:='postgres:latest'}
: ${RABBITMQ_IMAGE:='rabbitmq:latest'}
: ${PULSAR_IMAGE:='python:3.8-buster'}
: ${GALAXY_IMAGE:='python:3.8-buster'}
: ${GALAXY_ROOT:="$(pwd)/galaxy"}
: ${PULSAR_ROOT:="$(pwd)/pulsar"}
: ${CONFIG_DIR:="$(pwd)/config"}
: ${RABBITMQ_USER:='guest'}
: ${RABBITMQ_PASS:='guest'}
: ${POSTGRES_USER:='galaxy'}
: ${POSTGRES_PASS:='galaxy'}
: ${UID:="$(id -u)"}
: ${GID:="$(id -g)"}
function log() {
[ -t 0 ] && echo -e '\033[1;32m#' "$@" '\033[0m' 1>&2 || echo '#' "$@" 1>&2
}
function extract_lines() {
local marker="$1"
local begin=$(awk "/^__BEGIN_${marker}__/ {print NR + 1; exit 0; }" "$0")
local end=$(awk "/^__END_${marker}__/ {print NR; exit 0; }" "$0")
tail -n+${begin} "$0" | head -n $(($end - $begin))
}
if [ ! -d "$CONFIG_DIR" ]; then
log "Creating config dir '${CONFIG_DIR}'"
mkdir -p "$CONFIG_DIR"
fi
for cf in galaxy.yml job_conf.xml run-galaxy.sh run-pulsar.sh app.yml job_resource_params_conf.xml rule.py; do
if [ ! -f "${CONFIG_DIR}/${cf}" ]; then
log "Creating ${CONFIG_DIR}/${cf}"
case "$cf" in
galaxy.yml)
extract_lines GALAXY_YML | sed -e "s/PGUSER/${POSTGRES_USER}/" -e "s/PGPASS/${POSTGRES_PASS}/" > "${CONFIG_DIR}/${cf}"
;;
job_conf.xml)
extract_lines JOB_CONF_XML > "${CONFIG_DIR}/${cf}"
;;
run-galaxy.sh)
extract_lines RUN_GALAXY_SH | sed -e "s/UID/${UID}/g" -e "s/GID/${GID}/g" > "${CONFIG_DIR}/${cf}"
chmod +x "${CONFIG_DIR}/${cf}"
;;
run-pulsar.sh)
extract_lines RUN_PULSAR_SH | sed -e "s/UID/${UID}/g" -e "s/GID/${GID}/g" > "${CONFIG_DIR}/${cf}"
chmod +x "${CONFIG_DIR}/${cf}"
;;
app.yml)
extract_lines APP_YML > "${CONFIG_DIR}/${cf}"
;;
job_resource_params_conf.xml)
extract_lines JOB_RESOURCE_PARAMS_CONF_XML > "${CONFIG_DIR}/${cf}"
;;
rule.py)
extract_lines DYNAMIC_RULE > "${CONFIG_DIR}/${cf}"
;;
esac
else
log "${CONFIG_DIR}/${cf} already exists, remove to reinitialize"
fi
done
if [ ! -d "${CONFIG_DIR}/venv-galaxy" ]; then
# common_startup.sh doesn't upgrade pip or use python3's builtin venv, it probably should.
log "Creating Galaxy .venv"
docker run --rm -it --name galaxy-init -v "${CONFIG_DIR}:/galaxy" -w /galaxy --user "${UID}:${GID}" "$GALAXY_IMAGE" python3 -m venv .venv
mv "${CONFIG_DIR}/.venv" "${CONFIG_DIR}/venv-galaxy"
log "Upgrading pip"
docker run --rm -it --name galaxy-init -v "${GALAXY_ROOT}:/galaxy" -v "${CONFIG_DIR}/venv-galaxy:/galaxy/.venv" -w /galaxy --user "${UID}:${GID}" "$GALAXY_IMAGE" ./.venv/bin/pip install --upgrade pip setuptools wheel
log "Running common_startup.sh"
docker run --rm -it --name galaxy-init -v "${GALAXY_ROOT}:/galaxy" -v "${CONFIG_DIR}/venv-galaxy:/galaxy/.venv" -v "${CONFIG_DIR}/galaxy.yml:/galaxy/config/galaxy.yml:ro" -v "${CONFIG_DIR}/job_conf.xml:/galaxy/config/job_conf.xml:ro" -e HOME=/tmp/galaxy -w /galaxy --user "${UID}:${GID}" "$GALAXY_IMAGE" bash ./scripts/common_startup.sh
else
log "Skipping common_startup.sh, remove ${GALAXY_ROOT}/venv-galaxy to reinitialize"
fi
if [ ! -d "${CONFIG_DIR}/venv-pulsar" ]; then
log "Creating Pulsar .venv"
docker run --rm -it --name pulsar-init -v "${CONFIG_DIR}:/pulsar" -w /pulsar --user "${UID}:${GID}" "$PULSAR_IMAGE" python3 -m venv .venv
mv "${CONFIG_DIR}/.venv" "${CONFIG_DIR}/venv-pulsar"
log "Upgrading pip"
docker run --rm -it --name pulsar-init -v "${PULSAR_ROOT}:/pulsar" -v "${CONFIG_DIR}/venv-pulsar:/pulsar/.venv" -w /pulsar --user "${UID}:${GID}" "$PULSAR_IMAGE" ./.venv/bin/pip install --upgrade pip setuptools wheel
log "Installing devmode Pulsar install"
docker run --rm -it --name pulsar-init -v "${PULSAR_ROOT}:/pulsar" -v "${CONFIG_DIR}/venv-pulsar:/pulsar/.venv" -w /pulsar --user "${UID}:${GID}" "$PULSAR_IMAGE" ./.venv/bin/pip install -e .
log "Installing Pulsar additional dependencies"
docker run --rm -it --name pulsar-init -v "${PULSAR_ROOT}:/pulsar" -v "${CONFIG_DIR}/venv-pulsar:/pulsar/.venv" -w /pulsar --user "${UID}:${GID}" "$PULSAR_IMAGE" ./.venv/bin/pip install kombu pycurl
fi
# Generate a fresh pulsar-galaxy-client and install it in Galaxy's venv
if ! $SKIP_PULSAR_GALAXY_LIB && [ -z "$(docker ps -q --filter name=galaxy-job)" ]; then
log "Building minty fresh pulsar-galaxy-lib from ${PULSAR_ROOT}"
rm -rf "${PULSAR_ROOT}/build" "${PULSAR_ROOT}/dist" "${PULSAR_ROOT}"/*.egg-info
docker run --rm -it --name pulsar-galaxy-lib -v "${PULSAR_ROOT}:/pulsar" -v "${CONFIG_DIR}/venv-galaxy:/galaxy/.venv" -w /pulsar --user "${UID}:${GID}" -e PULSAR_GALAXY_LIB=1 "$GALAXY_IMAGE" /galaxy/.venv/bin/python setup.py bdist_wheel
whl=$(basename "${PULSAR_ROOT}/dist"/pulsar_galaxy_lib-*.whl)
cp "${PULSAR_ROOT}/dist/${whl}" "${GALAXY_ROOT}"
rm -rf "${PULSAR_ROOT}/build" "${PULSAR_ROOT}/dist" "${PULSAR_ROOT}"/*.egg-info
log "Uninstalling pulsar-galaxy-lib from ${GALAXY_ROOT} venv"
docker run --rm -it --name pulsar-galaxy-lib -v "${GALAXY_ROOT}:/galaxy" -v "${CONFIG_DIR}/venv-galaxy:/galaxy/.venv" -w /galaxy --user "${UID}:${GID}" "$GALAXY_IMAGE" ./.venv/bin/pip uninstall -y pulsar-galaxy-lib
log "Installing pulsar-galaxy-lib from ${PULSAR_ROOT} in ${GALAXY_ROOT} venv"
docker run --rm -it --name pulsar-galaxy-lib -v "${GALAXY_ROOT}:/galaxy" -v "${CONFIG_DIR}/venv-galaxy:/galaxy/.venv" -w /galaxy --user "${UID}:${GID}" "$GALAXY_IMAGE" ./.venv/bin/pip install "/galaxy/${whl}"
log "Fixing devmode Pulsar after wheel build"
docker run --rm -it --name pulsar-init -v "${PULSAR_ROOT}:/pulsar" -v "${CONFIG_DIR}/venv-pulsar:/pulsar/.venv" -w /pulsar --user "${UID}:${GID}" "$PULSAR_IMAGE" ./.venv/bin/pip install -e .
elif ! $SKIP_PULSAR_GALAXY_LIB; then
log "Skipped rebuilding pulsar-galaxy-lib because 'galaxy-job' is running, kill that container to allow rebuilding"
else
log 'Skipped rebuilding pulsar-galaxy-lib because $SKIP_PULSAR_GALAXY_LIB is true'
fi
# Run PostgreSQL
POSTGRES_ID="$(docker ps -q --filter name=postgres)"
if [ -z "$POSTGRES_ID" ]; then
log "Starting PostgreSQL"
docker run --rm -d --name postgres -e "POSTGRES_USER=$POSTGRES_USER" -e "POSTGRES_PASSWORD=$POSTGRES_PASS" "$POSTGRES_IMAGE"
else
log "PostgreSQL already running in container id '$POSTGRES_ID'"
fi
POSTGRES_IP=$(docker inspect -f '{{ .NetworkSettings.IPAddress }}' postgres)
log "PostgreSQL IP is ${POSTGRES_IP}"
sed -i.old -e "s#PGHOST#${POSTGRES_IP}#" "${CONFIG_DIR}/galaxy.yml"
# Run RabbitMQ
RABBITMQ_ID="$(docker ps -q --filter name=rabbitmq)"
if [ -z "$RABBITMQ_ID" ]; then
log "Starting RabbitMQ"
docker run --rm -d --hostname rabbitmq --name rabbitmq "$RABBITMQ_IMAGE"
else
log "RabbitMQ already running in container id '$RABBITMQ_ID'"
fi
RABBITMQ_IP=$(docker inspect -f '{{ .NetworkSettings.IPAddress }}' rabbitmq)
log "RabbitMQ IP is ${RABBITMQ_IP}"
sed -i.old -e "s#MQHOST#${RABBITMQ_IP}#" "${CONFIG_DIR}/job_conf.xml"
sed -i.old -e "s#MQHOST#${RABBITMQ_IP}#" "${CONFIG_DIR}/app.yml"
# Run Galaxy Web
rm -f "${GALAXY_ROOT}/docker-ip"
GALAXY_WEB_ID="$(docker ps -q --filter name=galaxy-web)"
if [ -z "$GALAXY_WEB_ID" ]; then
# TODO: get output when it crashes
log "Running Galaxy Web"
#docker run --rm -it --name galaxy-web \
docker run --rm -d --name galaxy-web \
-p 8080:8080 \
-v "${GALAXY_ROOT}:/galaxy" \
-v "${CONFIG_DIR}/venv-galaxy:/galaxy/.venv:ro" \
-v "${CONFIG_DIR}/job_conf.xml:/tmp/job_conf.xml.sed-in:ro" \
-v "${CONFIG_DIR}/job_resource_params_conf.xml:/galaxy/config/job_resource_params_conf.xml:ro" \
-v "${CONFIG_DIR}/galaxy.yml:/galaxy/config/galaxy.yml:ro" \
-v "${CONFIG_DIR}/run-galaxy.sh:/galaxy/run-galaxy.sh:ro" \
-v "${CONFIG_DIR}/rule.py:/galaxy/lib/galaxy/jobs/rules/rule.py:ro" \
-w /galaxy "$GALAXY_IMAGE" ./run-galaxy.sh web
else
log "Galaxy Web already running in container id '$GALAXY_WEB_ID'"
fi
GALAXY_WEB_IP=$(docker inspect -f '{{ .NetworkSettings.IPAddress }}' galaxy-web)
log "Galaxy Web IP is ${GALAXY_WEB_IP}"
echo "$GALAXY_WEB_IP" > "${GALAXY_ROOT}/docker-ip"
# Run Galaxy Job
GALAXY_JOB_ID="$(docker ps -q --filter name=galaxy-job)"
if [ -z "$GALAXY_JOB_ID" ]; then
log "Running Galaxy Job"
docker run --rm -d --name galaxy-job \
-v "${GALAXY_ROOT}:/galaxy" \
-v "${CONFIG_DIR}/venv-galaxy:/galaxy/.venv:ro" \
-v "${CONFIG_DIR}/job_conf.xml:/tmp/job_conf.xml.sed-in:ro" \
-v "${CONFIG_DIR}/job_resource_params_conf.xml:/galaxy/config/job_resource_params_conf.xml:ro" \
-v "${CONFIG_DIR}/galaxy.yml:/galaxy/config/galaxy.yml:ro" \
-v "${CONFIG_DIR}/run-galaxy.sh:/galaxy/run-galaxy.sh:ro" \
-v "${CONFIG_DIR}/rule.py:/galaxy/lib/galaxy/jobs/rules/rule.py:ro" \
-w /galaxy "$GALAXY_IMAGE" ./run-galaxy.sh job
else
log "Galaxy Job already running in container id '$GALAXY_JOB_ID'"
fi
GALAXY_JOB_IP=$(docker inspect -f '{{ .NetworkSettings.IPAddress }}' galaxy-job)
log "Galaxy Job IP is ${GALAXY_JOB_IP}"
# Run Pulsar
PULSAR_ID="$(docker ps -q --filter name=pulsar)"
if [ -z "$PULSAR_ID" ]; then
log "Running Pulsar"
#set -x; docker run --rm -it --name pulsar \
docker run --rm -d --name pulsar \
-v "${PULSAR_ROOT}:/pulsar" \
-v "${CONFIG_DIR}/venv-pulsar:/pulsar/.venv:ro" \
-v "${CONFIG_DIR}/app.yml:/pulsar/app.yml:ro" \
-v "${CONFIG_DIR}/run-pulsar.sh:/pulsar/run-pulsar.sh:ro" \
-w /pulsar "$PULSAR_IMAGE" ./run-pulsar.sh
else
log "Pulsar already running in container id '$PULSAR_ID'"
fi
PULSAR_IP=$(docker inspect -f '{{ .NetworkSettings.IPAddress }}' pulsar)
log "Pulsar IP is ${PULSAR_IP}"
exit 0
__BEGIN_GALAXY_YML__
uwsgi:
http: :8080
buffer-size: 16384
processes: 1
threads: 4
offload-threads: 2
static-map: /static/style=static/style/blue
static-map: /static=static
static-map: /favicon.ico=static/favicon.ico
master: true
virtualenv: .venv
pythonpath: lib
module: galaxy.webapps.galaxy.buildapp:uwsgi_app()
thunder-lock: false
die-on-term: true
hook-master-start: unix_signal:2 gracefully_kill_them_all
hook-master-start: unix_signal:15 gracefully_kill_them_all
py-call-osafterfork: false
enable-threads: true
galaxy:
database_connection: postgresql://PGUSER:PGPASS@PGHOST/galaxy
job_config_file: /tmp/job_conf.xml
admin_users: admin@example.org
cleanup_job: never
__END_GALAXY_YML__
__BEGIN_JOB_CONF_XML__
<?xml version="1.0"?>
<job_conf>
<plugins>
<plugin id="local" type="runner" load="galaxy.jobs.runners.local:LocalJobRunner" workers="4"/>
<plugin id="pulsar_mq" type="runner" load="galaxy.jobs.runners.pulsar:PulsarMQJobRunner" workers="4">
<param id="amqp_url">amqp://guest:guest@MQHOST:5672//</param>
<param id="galaxy_url">http://GXHOST:8080</param>
</plugin>
</plugins>
<handlers assign_with="db-skip-locked"/>
<destinations default="dynamic">
<destination id="dynamic" runner="dynamic">
<param id="function">rule</param>
</destination>
<destination id="local" runner="local"/>
<destination id="pulsar_mq" runner="pulsar_mq" >
<param id="jobs_directory">/tmp/pulsar/staging</param>
</destination>
</destinations>
<resources default="default">
<group id="default">force_dest</group>
</resources>
</job_conf>
__END_JOB_CONF_XML__
__BEGIN_JOB_RESOURCE_PARAMS_CONF_XML__
<parameters>
<param label="Force Destination" name="force_dest" type="select">
<option value="local">local</option>
<option value="pulsar_mq">pulsar_mq</option>
</param>
</parameters>
__END_JOB_RESOURCE_PARAMS_CONF_XML__
__BEGIN_DYNAMIC_RULE__
import logging
log = logging.getLogger(__name__)
def rule(tool_id, user_email, resource_params):
destination_id = 'pulsar_mq'
if tool_id in ('upload1'):
destination_id = 'local'
elif user_email == 'local@example.org':
destination_id = 'local'
log.debug(f'User {user_email} force mapped to {destination_id}')
elif user_email == 'pulsar@example.org':
destination_id = 'pulsar_mq'
log.debug(f'User {user_email} force mapped to {destination_id}')
override = resource_params.get('force_dest')
if override:
destination_id = override
log.debug(f'Override dest selected: {destination_id}')
else:
log.debug(f'Default dest for tool selected: {destination_id}')
return destination_id
__END_DYNAMIC_RULE__
__BEGIN_RUN_GALAXY_SH__
#!/bin/sh
groupadd -g GID galaxy
useradd -u UID -g GID -s /bin/bash -m -d /home/galaxy galaxy
while [ ! -f 'docker-ip' ];
do
echo "Waiting for Galaxy Docker IP"
sleep 1
done
DOCKER_IP=$(cat docker-ip)
echo "Galaxy Docker IP is ${DOCKER_IP}"
sed -e "s#GXHOST#${DOCKER_IP}#" /tmp/job_conf.xml.sed-in > /tmp/job_conf.xml
case $1 in
web)
su - galaxy -c '/bin/bash -c "cd /galaxy ; . ./.venv/bin/activate ; uwsgi --yaml config/galaxy.yml"'
;;
job)
su - galaxy -c '/bin/bash -c "cd /galaxy ; . ./.venv/bin/activate ; python3 ./scripts/galaxy-main --attach-to-pool=job-handlers"'
;;
esac
__END_RUN_GALAXY_SH__
__BEGIN_RUN_PULSAR_SH__
#!/bin/sh
groupadd -g GID pulsar
useradd -u UID -g GID -s /bin/bash -m -d /home/pulsar pulsar
su - pulsar -c '/bin/bash -c "cd /pulsar ; . ./.venv/bin/activate ; pulsar-main"'
__END_RUN_PULSAR_SH__
__BEGIN_APP_YML__
message_queue_url: amqp://guest:guest@MQHOST:5672//
persistence_directory: /tmp/pulsar/persistence
staging_directory: /tmp/pulsar/staging
dependency_resolution:
resolvers:
- prefix: /pulsar/_conda
type: conda
auto_init: true
auto_install: true
__END_APP_YML__
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment