Skip to content

Instantly share code, notes, and snippets.

@ksuderman
Created July 20, 2021 22:36
Show Gist options
  • Save ksuderman/5c460d563560b3458fa07e203d4339e6 to your computer and use it in GitHub Desktop.
Save ksuderman/5c460d563560b3458fa07e203d4339e6 to your computer and use it in GitHub Desktop.
My version of gxadmin
#!/bin/bash
# gxadmin: Galaxy administration swiss army knife
# license: GPLv3
#
# This is the result of admins across the universe working together :)
#
# Thanks to everyone for their contributions
# https://github.com/galaxyproject/gxadmin/graphs/contributors
version() {
echo 19
}
GXADMIN_SITE_SPECIFIC=${GXADMIN_SITE_SPECIFIC:-~/.config/gxadmin-local.sh}
hexencodefield9=$(cat <<EOF
import csv
import binascii
import sys
csv.field_size_limit(sys.maxsize)
spamreader = csv.reader(sys.stdin, delimiter=',', quotechar='"')
spwamwrite = csv.writer(sys.stdout, delimiter=',', quotechar='"')
for row in spamreader:
if row[9][0] != "\\\\":
row[9] = "\\\\x" + binascii.hexlify(row[9])
spwamwrite.writerow(row)
EOF
)
identicon_script=$(cat <<EOF
import sys
import hashlib
def colored(fg, bg, value):
return '\x1b[38;5;{0}m\x1b[48;5;{1}m{2}\x1b[0m'.format(fg, bg, value)
class Iden:
def build(self, h):
pixels = [[0 for _ in range(5)] for _ in range(5)]
for i in range(0, 5):
for x in range(0, 5):
pixels[x][i] = self.showPixel(i, x, h)
return pixels
def showPixel(self, x, y, h):
m = 6 + abs(2-x) * 5 + y
return int(h[m:m+1], 16) % 2 == 0
def getIcon(self, num, hash_string):
for x in self.build(hash_string):
yield ''.join([
colored(num, num + 15 % 255, '██' if q else ' ')
for q in x
])
data = sys.stdin.read().strip()
m = hashlib.sha256()
m.update(data.encode('UTF-8'))
hash_string = m.hexdigest()
num = int(hash_string[0:2], 16)
for line in Iden().getIcon(num, hash_string):
print(line)
EOF
)
ifmore() {
q="$(cat)"
lines=$(echo "$q" | wc -l)
# 2 more than the head command since we'll 'spend' two for the ... anyway.
if (( lines > 8 )); then
echo "$q" | head -n 6
echo " ..."
echo " run '$0 $1 help' for more"
else
echo "$q"
fi
}
colour_word() {
word=$1
color=$2
if [[ $color == "red" ]]; then
color_idx=1
elif [[ $color == "orange" ]]; then
color_idx=2
elif [[ $color == "green" ]]; then
color_idx=4
fi
cat | sed "s|${word}|$(tput setab $color_idx)${word}$(tput sgr0)|g"
}
filter_commands() {
cat | grep "^$1 " | sort -k2 | column -s: -t | sed 's/^/ /' | colour_word Deprecated orange | colour_word '(NEW)' green
}
locate_cmds() {
grep -s -h -o '^[a-z0-9_-]*()\s*{ ##?\? .*' "$0" "$GXADMIN_SITE_SPECIFIC" | grep -v grep | grep -v '| sed' | sort
}
locate_cmds_nolocal() {
grep -s -h -o '^[a-z0-9_-]*()\s*{ ##?\? .*' "$0" | grep -v grep | grep -v '| sed' | sort
}
correct_cmd() {
cat | sed 's/_/ /;s/()\s*{ ##?\?//;s/ :/:/'
}
fzf_autorun_cmd() {
read -p "Would you like to run '$0 $res'? [N|y]: " choice
if [[ "$choice" == "y" ]] || [[ "$choice" == "Y" ]]; then
$0 $res
fi
}
didyoumean() {
# Given a query
# Is this a known subcommand
known_command=0
for x in $registered_subcommands; do
if [[ "$1" == "$x" ]]; then
known_command=1
fi
done
# If it is, we check the second part.
if (( known_command == 1 )); then
if [[ "$2" == "" ]]; then
usage "$1"
exit 1;
fi
error "Unknown subcommand: $1 $2"
warning "Did you mean one of the following?"
echo
if ! command -v fzf &> /dev/null; then
echo "(Top hits by levenshtein distance)"
locate_cmds | correct_cmd | grep "^$1 " | \
cut -f 2 -d' ' | sed -s "s/://g;s/\$/ $2/g" | \
levenshtein_filter | sed 's/^/ - [/;s/\t/]: /g'
else
res=$(locate_cmds | correct_cmd | grep "^$1 " | fzf -q "$2" --header="Unknown subcommand: $1 $2, did you mean..." | sed 's/[:<\[].*//g')
fzf_autorun_cmd
fi
exit 1;
else
error "Unknown command: $1"
usage
fi
exit 1;
}
usage(){
cat <<-EOF
gxadmin usage:
EOF
if (( $# == 0 )); then
for x in $registered_subcommands; do
vn="_${x}_short_help"
printf " %10s: %s\n" "${x}" "${!vn}"
done
else
for x in $registered_subcommands; do
if [[ "$1" == "$x" ]]; then
vn="_${x}_short_help"
echo "${!vn}"
vn="_${x}_long_help"
echo "${!vn}"
fi
done
fi
if [[ -f "$GXADMIN_SITE_SPECIFIC" ]]; then
if (( $# == 0 )) || [[ "$1" == "local" ]]; then
printf " %10s: %s\n" "local" "(These can be configured in "$GXADMIN_SITE_SPECIFIC")"
fi
else
cat <<-EOF
Local-only commands can be configured in $GXADMIN_SITE_SPECIFIC
EOF
fi
if (( $# == 0 )) || [[ "$1" == "search" ]]; then
cat <<-EOF
search <term>: Search gxadmin for functions with specific terms
EOF
fi
if (( $# == 1 )); then
echo
locate_cmds | correct_cmd | filter_commands "$1"
fi
cat <<-EOF
All commands can be prefixed with "time" to print execution time to stderr
EOF
cat <<-EOF
help / -h / --help : this message. Invoke '--help' on any subcommand for help specific to that subcommand
Tip: Run "gxadmin meta whatsnew" to find out what's new in this release!
EOF
exit 0;
}
handle_help() {
if [[ ! -z "${GXADMIN_POPCON_ENABLE}" ]]; then
if [[ "${query_name}" != "user-info" ]]; then
echo "${mode:-mode} ${query_name:-sf}" >> ~/.gxadmin-popcon.log
fi
fi
for i in "$@"; do
if [[ "$i" = --help || "$i" = -h ]]; then
if [[ -n "${query_name}" ]]; then
key="${query_type}_${query_name}"
fi
invoke_desc=$(locate_cmds | grep "${key}()" | correct_cmd | sed "s/^/gxadmin /g")
short_desc=$(echo "$invoke_desc" | sed 's/.*://g')
short_parm=$(echo "$invoke_desc" | sed 's/:.*//g')
echo "${mode} ${query_name} - ${short_desc}"
echo
echo "**SYNOPSIS**"
echo
echo " $short_parm"
echo
manual="$(cat -)"
manual_wc="${#manual}"
if (( manual_wc > 3 )); then
echo "**NOTES**"
echo
echo "$manual"
echo
fi
# exit after printing the documentation!
exit 0;
fi
done
}
assert_restart_lock(){
if [ -f "$HOME/.restart-lock" ]; then
echo "A restart lock exists. This means someone is probably already restarting galaxy."
exit 3
fi
}
assert_set_env() {
env_var=$1
if [[ -z "${!env_var}" ]]; then
error Please set "\$${env_var}"
exit 1
fi
}
wait_for_url() {
url=$1; shift;
while [ "$(curl --connect-timeout 5 --silent "$url" | wc -c)" -eq "0" ]; do
sleep 5;
echo -n '.'
done
}
assert_count() {
if (( $1 != $2 )); then
error "$3"
exit 1
fi
}
assert_count_ge() {
if (( $1 < $2 )); then
error "$3"
exit 1
fi
}
assert_file() {
if [[ ! -f "$1" ]]; then
error "File $1 does not exist"
exit 1
fi
}
assert_file_warn() {
if [[ ! -f "$1" ]]; then
warning "File $1 does not exist"
fi
}
query_tbl() {
psql <<-EOF
$1
EOF
}
query_tsv() {
psql <<-EOF
COPY ($1) to STDOUT with CSV DELIMITER E'\t'
EOF
}
query_json() {
psql <<-EOF
COPY (
SELECT array_to_json(array_agg(row_to_json(t)))
FROM ($1) t
) to STDOUT with (FORMAT CSV, QUOTE ' ')
EOF
}
query_tsv_json() {
psql <<-EOF
COPY ($1) to STDOUT with (FORMAT CSV, QUOTE ' ')
EOF
}
query_csv() {
psql <<-EOF
COPY ($1) to STDOUT with CSV DELIMITER ','
EOF
}
query_exp() {
psql <<-EOF
EXPLAIN ANALYZE VERBOSE $1
EOF
}
query_expj() {
echo "$1"
echo
psql -qAt <<-EOF
EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS, FORMAT JSON) $1
EOF
}
query_echo() {
echo "$1"
}
query_influx() {
local query="$1"
local rename="$2"
local fields="$3"
local tags="$4"
local timestamp="$5"
if [[ -z "$fields" ]]; then
exit 0;
fi
arr2py=$(cat <<EOF
import sys
query_name = sys.argv[1]
fields = {x.split('=')[0]: int(x.split('=')[1]) for x in sys.argv[2].split(';')}
tags = []
if len(sys.argv) > 3 and len(sys.argv[3]) > 0:
tags = {x.split('=')[0]: int(x.split('=')[1]) for x in sys.argv[3].split(';')}
timestamp = None
if len(sys.argv) > 4 and sys.argv[4] != '':
timestamp = int(sys.argv[4])
for line in sys.stdin.read().split('\n'):
if len(line) == 0:
continue
parsed = line.split('\t')
metric = query_name
if len(tags):
tag_data = ['%s=%s' % (k, parsed[v].replace(' ', '\\ ').replace(',', '\\,').replace('=', '\\=')) for (k, v) in tags.items()]
metric += ',' + ','.join(tag_data)
field_data = ['%s=%s' % (k, parsed[v]) for (k, v) in fields.items()]
metric += ' ' + ','.join(field_data)
if timestamp is not None:
metric += ' ' + str(parsed[timestamp])
print(metric)
EOF
)
psql -c "COPY ($query) to STDOUT with CSV DELIMITER E'\t'"| $GXADMIN_PYTHON -c "$arr2py" "$rename" "$fields" "$tags" "$timestamp"
}
gdpr_safe() {
local coalesce_to
if (( $# > 2 )); then
coalesce_to="$3"
else
coalesce_to="__UNKNOWN__"
fi
if [ -z "$GDPR_MODE" ]; then
echo "COALESCE($1::text, '$coalesce_to') as $2"
else
# Try and be privacy respecting while generating numbers that can allow
# linking data across tables if need be?
echo "substring(md5(COALESCE($1::text, '$coalesce_to') || now()::date || '$GDPR_MODE'), 0, 12) as ${2:-$1}"
fi
}
# Borrowed from https://stackoverflow.com/questions/1527049/how-can-i-join-elements-of-an-array-in-bash
function join_by {
local d=$1; shift;
echo -n "$1";
shift;
printf "%s" "${@/#/$d}";
}
summary_statistics() {
local v=$1
local ishuman=$2
# TODO: there has got to be a less ugly way to do this
if (( ishuman == 1 )); then
human_size="pg_size_pretty("
human_after=")"
else
human_size=""
human_after=""
fi
cat <<-EOF
${human_size}min($v)${human_after} AS min,
${human_size}percentile_cont(0.25) WITHIN GROUP (ORDER BY $v) ::bigint${human_after} AS quant_1st,
${human_size}percentile_cont(0.50) WITHIN GROUP (ORDER BY $v) ::bigint${human_after} AS median,
${human_size}avg($v)${human_after} AS mean,
${human_size}percentile_cont(0.75) WITHIN GROUP (ORDER BY $v) ::bigint${human_after} AS quant_3rd,
${human_size}percentile_cont(0.95) WITHIN GROUP (ORDER BY $v) ::bigint${human_after} AS perc_95,
${human_size}percentile_cont(0.99) WITHIN GROUP (ORDER BY $v) ::bigint${human_after} AS perc_99,
${human_size}max($v)${human_after} AS max,
${human_size}sum($v)${human_after} AS sum,
${human_size}stddev($v)${human_after} AS stddev
EOF
}
get_user_filter(){
echo "(galaxy_user.email = '$1' or galaxy_user.username = '$1' or galaxy_user.id = CAST('$1' AS INTEGER))"
}
error() {
(>&2 echo "$(tput setab 1)$*$(tput sgr0)")
}
warning() {
(>&2 echo "$(tput setab 2)$*$(tput sgr0)")
}
success() {
echo "$(tput setaf 40)$*$(tput sgr0)"
}
# Find a python binary, hopefully.
if [[ -z "${GXADMIN_PYTHON}" ]]; then
if hash python3 2>/dev/null; then
export GXADMIN_PYTHON=$(command -v python3)
elif hash python2 >/dev/null; then
export GXADMIN_PYTHON=$(command -v python2)
elif hash python >/dev/null; then
export GXADMIN_PYTHON=$(command -v python)
else
warning "Some features require python support, sorry. If you have python installed somewhere that is not on the path or under a weird name, you can set GXADMIN_PYTHON to the path."
fi
fi
CHANGELOG=$(cat <<EOF
# 20-pre
- Fixed:
- Broken things in preview anonymisation script
- Unified uwsgi-status command (thanks @gmauro)
- Unified galaxy user/email/id filtering and made it stricter in what it accepts ([#60](https://github.com/galaxyproject/gxadmin/issues/60)).
- Updated "query user-disk-usage" to add a "--use-precalc" flag which uses the value calculated by Galaxy.
- Enabled GXADMIN_PYTHON from the environment, by [@gmauro](https://github.com/gmauro).
- Updated "galaxy cleanup" to avoid to set update_time on updated objects, by [@gmauro](https://github.com/gmauro).
- fixed broken latest-users query
- fixed broken errored-jobs query, by [@gmauro](https://github.com/gmauro).
- several fixes in galaxy ie-show command, [@gmauro](https://github.com/gmauro).
- Generalized the "uwsgi lastlog" function to proper handle journalctl's output, by [@gmauro](https://github.com/gmauro).
- Added:
- query dump-users, for [@shiltemann](https://github.com/shiltemann)
- "did you mean" when a command isn't found.
- query job-metrics, by [@anuprulez](https://github.com/anuprulez) and [@simonbray](https://github.com/simonbray).
- uwsgi active-user count
- exports PGAPPNAME setting the application name for postgres connections. This becomes visible in pg_stat_activity.
- mutate set_quota_for_oidc_user, by [@gmauro](https://github.com/gmauro).
- iquery support for queue-detail query
- query tool-usage-over-time which can be used to make charts of specific tools over time.
- improved use of FZF for both the search and 'typo' functions, when it is available.
- query pulsar-gb-transferred: sums up data in/out (minus collections) of pulsar.
# 19
- Fixed:
- Fixed broken slurping from previous release (Thanks [@slugger70](https://github.com/Slugger70) for catching it.)
- Made changelog updates mandatory so I don't have to ask people for it. Finally.
- Added:
- encode/decode ID
- Benchmarking via ASV (preview)
- anonymise-db-for-release (preview)
- Add '--by_group' flag to several monthly queries (Thanks [@slugger70](https://github.com/Slugger70))
- query monthly-cpu-stats (Thanks [@gmauro](https://github.com/gmauro))
- workflow trace archive commands
- query queue has a new optional parameter for splitting queues by various attributes (Thanks @natefoo)
- query pg-rows-per-table to find out which tables have data.
- mutate fail-wfi
- mutate oidc-by-emails, mutate users affected by https://github.com/galaxyproject/galaxy/issues/9981
- mutate now supports echo/explain prefixes.
- very-unsafe flag to most mutate methods, avoiding the transaction.
- added the [wonderful argument parsing](https://github.com/hexylena/wap) for parsing function signatures into arguments automatically, making contribution easier.
# 18
- Fixed:
- All previous '--nice' flags are rewritten to '--human' (Thanks @lldelisle)
- Update 'user-disk-usage' to support iquery (Thanks @lldelisle)
- Update 'largest-histories' to support iquery (Thanks @lldelisle)
- uwsgi-memory now looks at workflow schedulers
- Exposed bash autocompletion and documented it
- Added:
- query workers, only works on Galaxy 20.01 or newer and retrieves the hostname and PID of Galaxy worker processes
- '--details' option to query errored-jobs to include the job_stderr column
- total number of jobs with exit states for galaxxy instance (Thanks @bruggerk)
- query workflow-invocation-totals
- server subcommand exposing previously hidden functions
- Removed:
- filter hexdecodelines, this is now replaced by a built-in postgres function
- Changed:
- Refactored internal function search to unify local, query, mutate, with the rest.
# 17
Testing our new release message
- Fixed:
- Issue with py3 only machines
- Added:
- jobs ready to run
# 16
- Fixed:
- figured out how to decode hex blobs in postgres
# 15
- Fancy [new docs site](https://galaxyproject.github.io/gxadmin/#/)
- Added:
- search :(
- query server-groups-allocated-cpu, thanks @selten
- query server-groups-disk-usage, thanks @selten
- query user-disk-quota, thanks @selten
- query job-info, thanks @selten
- query queue-detail-by-handler
- mutate reassign-job-to-handler
- a LOT of query pg-... commands from [heroku's pg-extras](https://github.com/heroku/heroku-pg-extras/tree/master/commands)
- mutate drop-extraneous-workflow-step-output-associations
- mutate reassign-workflows-to-handler
- query data-origin-distribution
- query data-origin-distribution-summary
- query user-history-list
# 14
- Added:
- Job working directory cleaner
- meta slurp-upto
- meta slurp-day
- Imported jobs-queued-internal-by-handler and jobs-queued queries from main
- query users-with-oidc
- Three new queries for checking history execution time, thanks [@mmiladi](https://github.com/mmiladi)
- query history-runtime-system-by-tool
- query history-runtime-system
- query history-runtime-wallclock
- Three new queries for checking potentially broken tools, thanks [@jmchilton](https://github.com/jmchilton)
- query tool-new-errors
- query tool-errors
- query tool-likely-broken
- Import script from https://github.com/galaxyproject/grafana-dashboards
- query for datasets uploaded in last N hours
- mutate to approve users
- mutate assign-unassigned-workflow: workaround for [galaxyproject/galaxy#8209](https://github.com/galaxyproject/galaxy#8209)
- mutate oidc-role-find-affected: workaround for [galaxyproject/galaxy#8244](https://github.com/galaxyproject/galaxy#8244)
- mutate oidc-role-fix: workaround for [galaxyproject/galaxy#8244](https://github.com/galaxyproject/galaxy#8244)
- query user-disk-usage, thanks @selten
- query group-cpu-seconds, thanks @selten
- Fixed:
- Correct bug in queue-detail and a couple other functions which did not
correctly include data from anonymous users.
# 13
- Added:
- local functions support querying when prefixed with "query-"
- "meta influx-post" and "meta influx-query" were added to get data into
and out of Influx
- "explainquery" is added as an alternative to csv/tsv/etc queries, which
does an "EXPLAIN ANALYZE" of the current SQL
- "explainjsonquery" added for use with http://tatiyants.com/pev/
- "time" prefix for all functions added to print execution time to stderr
- Function to fail a specific job ID
- "query user-recent-aggregate-jobs"
- "query history-contents"
- "query hdca-info"
- "query hdca-datasets"
- "mutate fail-history", when failing individual jobs is too slow
- "galaxy migrate-tool-install-from-sqlite"
- "query user-cpu-years"
- Shellchecking of entire script
- Fixed:
- Escaped commas in influx outputs, switched to tabs to further prevent
comma issues.
- Correct date filter in "query {server-groups,server-datasets}"
- Removed unnecessary function name duplication, e.g.
"query_filter() { ## query filter [id]: Does thing"
can now be written as
"query_filter() { ## [id]: Does thing"
# 12
- Added:
- local functions, users can add functions to a separate file which are
made available in gxadmin. @erasche resisted implementing these for a
long time for fears that they won't be contributed back and everyone
would keep their precious sql private. So they may nag you occasionally
to contribute them back.
- (semi) EU specific systemd handler/zergling management commands
- filter digest-color: command to colour some text
- filter identicon: command to generate an identicon
- Some basic testing of the overall script
- Fixed:
- Correct time zones in all queries to be client-side time zones rather
than UTC encoded timestamps as stored in db (Thanks [@slugger70](https://github.com/Slugger70))
- Renamed: "query monthly-users" → "query monthly-users-active"
- Removed:
- Removed highly EU specific handler and zerg functions
- Deprecated: "query active-users"
- Changed:
- Major internal reorganisation and split into parts for easier editing. A
bash script that needs a build system, truly horrifying.
- User info query now shows recent WF invocations and whether their largest
histories are deleted or purged or not.
- Separated out 'report' types that are markdown-only outputs and don't
support tsv/csv queries
# 11
- Added:
- filter hexdecode: to help decode hex blobs in postgres queries
- uwsgi stats\_influx: which fetches stats from uWSGI zerglings
- filter pg2md: Convert postgres tables into markdown compatible tables
- "GDPR_MODE" env var which blanks out usernames and emails
- errored-jobs: Lists jobs that errored in the last N hours
- workflow-connections: Exports worfklow connections, input to output tools
- dump-config: Dump Galaxy configuration as JSON
- tool-popularity: Most popular tools per month
- Changed:
- user-details: now reports in markdown compatible output and with more
information about the user
- job-info: now reports in markdown compatible output, including job inputs
+ outputs
- queue-overview: now includes user ID by default ("GDPR_MODE=1" will set
the value to "0"), and allows using shorter tool IDs
- user-details: renamed to user-info
- user-info: includes largest histories
# 10
- Added:
- old-histories: Old histories query to find histories that haven't been
used for X weeks.
- user-details: query details about a specific user ID
- Fixed:
- Included date in cleanup logs
# 9
- Added:
- Influx queries for:
- active-users
- collection-usage
- disk-usage
- groups-list
- largest-collection
- queue
- queue-overview
- tool-usage
- ts-repos
- users-count
- users-total
- tool-metrics
- tool-available-metrics
- largest-collection
- Fixed:
- Improved "latest-users" to include any groups they're part of and their
registration status (ack'd email/not)
- Influx queries now functional (but not automatic)
- Collection usage
# 8
- Added:
- Help documentation in-tool
- Fixed:
- rewrote internals to use functions + case/esac
# 7
- Added:
- cleanup
- Fixed:
- removed all temporary tables for CTEs, allowing tsv/csv queries for
everything
- internal cleanup
# 6
- Added functions:
- active-users
- training-memberof
- training-remove-member
- Fixed:
- update script
- removed more EU paths for env vars
# 5
- Added functions:
- disk-usage
- users-count
- users-total
- groups-list
- collection-usage
- ts-repos
- Fixed job-inputs/job-outputs
- Fixed internal variable names
# 4
- Implemented update function
- removed all support for legacy "_" calls
- Fancy new automatically updated help function
# 3
- Added migration to sqlite for toolshed install database
- Removed some EU specific stuff
# 2
- Added training-queue and job-history functions
- replaced "_" with "-"
# 1 - Initial Release
Included functions for:
- validating configs
- zerg tasks
- handler tasks
- queries:
- latest-users
- tool-usage
- job-info
- job-outputs
- queue
- queue-detail
- recent-jobs
- jobs-per-user
- runtime-per-user
- training
- training-members
- queue-time
- datasets-created-daily
EOF
)
registered_subcommands="$registered_subcommands config"
_config_short_help="Access Galaxy configuration and other assorted things"
config_validate() { ## : validate config files
handle_help "$@" <<-EOF
Validate the configuration files
**Warning**:
!> - This requires you to have \`\$GALAXY_DIST\` set and to have config under \`\$GALAXY_DIST/config\`.
!> - This only validates that it is well formed XML, and does **not** validate against any schemas.
!>
!> $ gxadmin validate
!> OK: galaxy-dist/data_manager_conf.xml
!> ...
!> OK: galaxy-dist/config/tool_data_table_conf.xml
!> OK: galaxy-dist/config/tool_sheds_conf.xml
!> All XML files validated
EOF
assert_set_env GALAXY_CONFIG_DIR
assert_set_env GALAXY_MUTABLE_CONFIG_DIR
fail_count=0
for file in ${GALAXY_CONFIG_DIR}/*.xml; do
xmllint "$file" > /dev/null 2>/dev/null;
ec=$?
if (( ec > 0 )); then
fail_count=$(echo "$fail_count + 1" | bc)
error " FAIL: $file ($ec)";
else
success " OK: $file";
fi
done;
for file in ${GALAXY_MUTABLE_CONFIG_DIR}/*.xml; do
xmllint "$file" > /dev/null 2>/dev/null;
ec=$?
if (( ec > 0 )); then
fail_count=$(echo "$fail_count + 1" | bc)
error " FAIL: $file ($ec)";
else
success " OK: $file";
fi
done;
if (( fail_count == 0 )); then
success "All XML files validated"
else
error "XML validation failed, cancelling any actions."
exit 1
fi
}
config_dump() { ## : Dump Galaxy configuration as JSON
handle_help "$@" <<-EOF
This function was added with the intention to use it internally, but it may be useful in your workflows. It uses the python code from the Galaxy codebase in order to properly load the configuration which is then dumped as JSON.
(.venv)$ gxadmin dump-config | jq -S . | head
{
"activation_grace_period": 3,
"admin_users": "hxr@local.host",
"admin_users_list": [
"hxr@local.host"
],
"allow_library_path_paste": false,
"allow_path_paste": false,
"allow_user_creation": true,
"allow_user_dataset_purge": true,
EOF
assert_set_env GALAXY_ROOT
assert_set_env GALAXY_CONFIG_FILE
dump_config_python=$(cat <<EOF
import argparse
import json
import os
import sys
sys.path.insert(1, '$GALAXY_ROOT/lib')
import galaxy.config
from galaxy.util.script import app_properties_from_args, populate_config_args
parser = argparse.ArgumentParser()
populate_config_args(parser)
args = parser.parse_args()
args.config_file = '$GALAXY_CONFIG_FILE'
app_properties = app_properties_from_args(args)
config = galaxy.config.Configuration(**app_properties)
sys.stdout.write(json.dumps(config.__dict__, default=lambda o: '<fail>'))
EOF
)
echo "$dump_config_python" | $GXADMIN_PYTHON
}
registered_subcommands="$registered_subcommands mutate"
_mutate_short_help="DB Mutations, CSV/TSV queries are NOT available"
txn_prefix() {
if [[ "$1" == "--very-unsafe" ]]; then
printf "";
else
printf "BEGIN TRANSACTION;\n"
fi
}
txn_postfix() {
if [[ "$1" == "--commit" ]] || [[ "$1" == "1" ]]; then
printf "\nCOMMIT;\n"
elif [[ "$1" == "--very-unsafe" ]]; then
printf "";
else
printf "\nROLLBACK;\n"
fi
}
mutate_fail-terminal-datasets() { ## [--commit]: Causes the output datasets of jobs which were manually failed, to be marked as failed
handle_help "$@" <<-EOF
Whenever an admin marks a job as failed manually (e.g. by updating the
state in the database), the output datasets are not accordingly updated
by default. And this causes users to mistakenly think their jobs are
still running when they have long since failed.
This command provides a way to select those jobs in error states
(deleted, deleted_new, error, error_manually_dropped,
new_manually_dropped), find their associated output datasets, and fail
them with a blurb mentionining that they should contact the admin in
case of any question
Running without any arguments will execute the command within a
transaction and then roll it back, allowing you to see counts of rows
and giving you an idea if it is doing the right thing.
**WARNINGS**
!> This does NOT currently work on collections
**EXAMPLES**
The process is to first query how many datasets will be failed, if this looks correct you're ready to go.
$ gxadmin mutate fail-terminal-datasets
BEGIN
SELECT 1
jobs_per_month_to_be_failed | count
-----------------------------+-------
2019-02-01 00:00:00 | 1
(1 row)
UPDATE 1
UPDATE 1
ROLLBACK
Then to run with the --commit flag to commit the changes
$ gxadmin mutate fail-terminal-datasets --commit
BEGIN
SELECT 1
jobs_per_month_to_be_failed | count
-----------------------------+-------
2019-02-01 00:00:00 | 1
(1 row)
UPDATE 1
UPDATE 1
COMMIT
EOF
# TODO(hxr): support collections
read -r -d '' QUERY <<-EOF
CREATE TEMP TABLE terminal_jobs_temp AS
SELECT
dataset.id as ds_id,
history_dataset_association.id as hda_id,
dataset.create_time AT TIME ZONE 'UTC' as ds_create
FROM
dataset,
history_dataset_association,
job_to_output_dataset,
job
WHERE
dataset.id = history_dataset_association.dataset_id
AND history_dataset_association.id = job_to_output_dataset.dataset_id
AND job.id = job_to_output_dataset.job_id
AND dataset.state IN ('queued', 'running', 'new')
AND job.state
IN ('deleted', 'deleted_new', 'error', 'error_manually_dropped', 'new_manually_dropped');
SELECT
date_trunc('month', ds_create) as jobs_per_month_to_be_failed, count(*)
FROM terminal_jobs_temp
GROUP BY jobs_per_month_to_be_failed
ORDER BY date_trunc('month', ds_create) desc;
UPDATE dataset
SET
state = 'error'
WHERE id in (select ds_id from terminal_jobs_temp);
UPDATE history_dataset_association
SET
blurb = 'execution error',
info = 'This dataset''s job failed and has been manually addressed by a Galaxy administrator. Please use the bug icon to report this if you need assistance.'
WHERE id in (select hda_id from terminal_jobs_temp)
EOF
txn_pre=$(txn_prefix "$1")
txn_pos=$(txn_postfix "$1")
QUERY="$txn_pre $QUERY; $txn_pos"
}
mutate_fail-job() { ## <job_id> [--commit]: Sets a job state to error
handle_help "$@" <<-EOF
Sets a job's state to "error"
EOF
assert_count_ge $# 1 "Must supply a job ID"
id=$1
read -r -d '' QUERY <<-EOF
UPDATE
job
SET
state = 'error'
WHERE
id = '$id'
EOF
txn_pre=$(txn_prefix "$2")
txn_pos=$(txn_postfix "$2")
QUERY="$txn_pre $QUERY; $txn_pos"
}
mutate_fail-history() { ## <history_id> [--commit]: Mark all jobs within a history to state error
handle_help "$@" <<-EOF
Set all jobs within a history to error
EOF
assert_count_ge $# 1 "Must supply a history ID"
id=$1
read -r -d '' QUERY <<-EOF
SELECT
id, state
FROM
job
WHERE
id
IN (
SELECT
job_id
FROM
job_to_output_dataset
WHERE
dataset_id
IN (
SELECT
id
FROM
history_dataset_association
WHERE
history_id = $1
)
)
AND state NOT IN ('ok', 'error')
EOF
txn_pre=$(txn_prefix "$2")
txn_pos=$(txn_postfix "$2")
QUERY="$txn_pre $QUERY; $txn_pos"
}
mutate_delete-group-role() { ## <group_name> [--commit]: Remove the group, role, and any user-group + user-role associations
handle_help "$@" <<-EOF
Wipe out a group+role, and user associations.
EOF
assert_count_ge $# 1 "Must supply a group name"
id=$1
read -r -d '' QUERY <<-EOF
DELETE FROM group_role_association
WHERE group_id = (SELECT id FROM galaxy_group WHERE name = '$1');
DELETE FROM user_group_association
WHERE group_id = (SELECT id FROM galaxy_group WHERE name = '$1');
DELETE FROM user_role_association
WHERE role_id = (SELECT role_id FROM group_role_association WHERE group_id= (SELECT id FROM galaxy_group WHERE name = '$1'));
DELETE FROM role
WHERE id = (SELECT role_id FROM group_role_association WHERE group_id = (SELECT id FROM galaxy_group WHERE name = '$1'));
DELETE FROM galaxy_group
WHERE name = '$1'
EOF
txn_pre=$(txn_prefix "$2")
txn_pos=$(txn_postfix "$2")
QUERY="$txn_pre $QUERY; $txn_pos"
}
mutate_assign-unassigned-workflows() { ## <handler_prefix> <handler_count> [--commit]: Randomly assigns unassigned workflows to handlers. Workaround for galaxyproject/galaxy#8209
handle_help "$@" <<-EOF
Workaround for https://github.com/galaxyproject/galaxy/issues/8209
Handler names should have number as postfix, so "some_string_##". In
this case handler_prefix is "some_string_" and count is however many
handlers you want to schedule workflows across.
EOF
assert_count_ge $# 1 "Must supply a handler_prefix"
assert_count_ge $# 2 "Must supply a count"
prefix=$1
count=$2
read -r -d '' QUERY <<-EOF
UPDATE workflow_invocation
SET handler = '$prefix' || (random() * $count)::integer
WHERE state = 'new' and handler = '_default_'
RETURNING workflow_invocation.id
EOF
txn_pre=$(txn_prefix "$3")
txn_pos=$(txn_postfix "$3")
QUERY="$txn_pre $QUERY; $txn_pos"
}
mutate_reassign-workflows-to-handler() { ## <handler_from> <handler_to> [--commit]: Reassign workflows in 'new' state to a different handler.
handle_help "$@" <<-EOF
Another workaround for https://github.com/galaxyproject/galaxy/issues/8209
Need to use the full handler names e.g. handler_main_0
EOF
assert_count_ge $# 1 "Must supply a handler_from"
assert_count_ge $# 2 "Must supply a handler_to"
read -r -d '' QUERY <<-EOF
UPDATE workflow_invocation
SET handler = '$2'
WHERE state = 'new' and handler = '$1'
RETURNING workflow_invocation.id
EOF
txn_pre=$(txn_prefix "$3")
txn_pos=$(txn_postfix "$3")
QUERY="$txn_pre $QUERY; $txn_pos"
}
mutate_approve-user() { ## <username|email|user_id>: Approve a user in the database
handle_help "$@" <<-EOF
There is no --commit flag on this because it is relatively safe
EOF
assert_count_ge $# 1 "Must supply a username/email/user-id"
user_filter=$(get_user_filter "$1")
read -r -d '' QUERY <<-EOF
UPDATE galaxy_user
SET active = true
WHERE $user_filter
EOF
}
mutate_oidc-role-find-affected() { ## : Find users affected by galaxyproject/galaxy#8244
handle_help "$@" <<-EOF
Workaround for https://github.com/galaxyproject/galaxy/issues/8244
This finds all of the OIDC authenticated users which do not have any
roles associated to them. (Should be sufficient?)
EOF
read -r -d '' QUERY <<-EOF
SELECT
user_id
FROM
oidc_user_authnz_tokens AS ouat
WHERE
ouat.user_id NOT IN (SELECT user_id FROM user_role_association)
EOF
# Not proud of this.
FLAVOR='tsv'
}
mutate_oidc-role-fix() { ## <username|email|user_id>: Fix permissions for users logged in via OIDC. Workaround for galaxyproject/galaxy#8244
handle_help "$@" <<-EOF
Workaround for https://github.com/galaxyproject/galaxy/issues/8244
EOF
user_filter=$(get_user_filter "$1")
# Coerce to user ID
read -r -d '' qstr <<-EOF
SELECT id, email
FROM galaxy_user
WHERE $user_filter
EOF
echo "QUERY: $qstr"
results="$(query_tsv "$qstr")"
echo "RESULTS: $results"
user_id=$(echo "$results" | awk '{print $1}')
email=$(echo "$results" | awk '{print $2}')
# check if there is an existing role.
read -r -d '' qstr <<-EOF
select count(*) from user_role_association left join role on user_role_association.role_id = role.id where user_id = $user_id and role.type = 'private'
EOF
echo "QUERY: $qstr"
results="$(query_tsv "$qstr")"
echo "RESULTS: $results"
# Some (mild) sanity checking. Should probably wrap this up in some nice.
if (( results != 0 )); then
error "A default private role already exists for this account."
exit 1
fi
# Create the role since it does not exist
read -r -d '' qstr <<-EOF
INSERT INTO role (create_time, update_time, name, description, type, deleted)
VALUES (now(), now(), '$email', 'Private Role for $email', 'private', false)
RETURNING id
EOF
echo "QUERY: $qstr"
role_id="$(query_tsv "$qstr")"
echo "RESULTS: $results"
# Associate with the user
read -r -d '' qstr <<-EOF
INSERT INTO user_role_association (user_id, role_id, create_time, update_time)
VALUES ($user_id, $role_id, now(), now())
EOF
echo "QUERY: $qstr"
query_tbl "$qstr"
# Insert into their personal default user permissions
read -r -d '' qstr <<-EOF
INSERT INTO default_user_permissions (user_id, action, role_id)
VALUES ($user_id, 'manage permissions', $role_id), ($user_id, 'access', $role_id)
EOF
echo "QUERY: $qstr"
results="$(query_tbl "$qstr")"
echo "RESULTS: $results"
# Fix dataset_permissions. Manage rows are created but have a null entry in their role_id
read -r -d '' qstr <<-EOF
UPDATE dataset_permissions
SET role_id = $role_id
WHERE
id
IN (
SELECT
id
FROM
dataset_permissions
WHERE
dataset_id
IN (
SELECT
ds.id
FROM
history AS h
LEFT JOIN history_dataset_association AS hda ON
h.id = hda.history_id
LEFT JOIN dataset AS ds ON hda.dataset_id = ds.id
WHERE
h.user_id = $user_id
)
AND role_id IS NULL
AND action = 'manage permissions'
)
EOF
echo "QUERY: $qstr"
results="$(query_tbl "$qstr")"
echo "RESULTS: $results"
# We could update history permissions but this would make those histories
# private and the current behaviour is that they're public, if the user has
# tried to share, then we'd be changing permissions and going against user
# expectations.
read -r -d '' QUERY <<-EOF
select 'done'
EOF
}
mutate_reassign-job-to-handler() { ## <job_id> <handler_id> [--commit]: Reassign a job to a different handler
handle_help "$@" <<-EOF
EOF
assert_count_ge $# 2 "Must supply a job and handler ID"
job_id=$1
handler_id=$2
read -r -d '' QUERY <<-EOF
UPDATE
job
SET
handler = '$handler_id'
WHERE
job.id = $job_id
EOF
txn_pre=$(txn_prefix "$3")
txn_pos=$(txn_postfix "$3")
QUERY="$txn_pre $QUERY; $txn_pos"
}
mutate_drop-extraneous-workflow-step-output-associations() { ## [--commit]: #8418, drop extraneous connection
handle_help "$@" <<-EOF
Per https://github.com/galaxyproject/galaxy/pull/8418, this drops the
workflow step output associations that are not necessary.
This only needs to be run once, on servers which have run Galaxy<=19.05
to remove duplicate entries in the following tables:
- workflow_invocation_step_output_dataset_association
- workflow_invocation_step_output_dataset_collection_association
EOF
read -r -d '' QUERY <<-EOF
WITH exclude_list AS (
SELECT max(w.id) as id
FROM workflow_invocation_step_output_dataset_association as w
GROUP BY workflow_invocation_step_id, dataset_id, output_name
)
DELETE
FROM workflow_invocation_step_output_dataset_association wisoda
WHERE NOT EXISTS (SELECT 1 FROM exclude_list WHERE wisoda.id = exclude_list.id);
WITH exclude_list AS (
SELECT max(w.id) as id
FROM workflow_invocation_step_output_dataset_collection_association as w
GROUP BY workflow_invocation_step_id, dataset_collection_id, output_name
)
DELETE
FROM workflow_invocation_step_output_dataset_collection_association wisodca
WHERE NOT EXISTS (SELECT 1 FROM exclude_list WHERE wisodca.id = exclude_list.id)
EOF
txn_pre=$(txn_prefix "$1")
txn_pos=$(txn_postfix "$1")
QUERY="$txn_pre $QUERY; $txn_pos"
}
mutate_restart-jobs() { ## [--commit] <-|job_id [job_id [...]]> : Restart some jobs
handle_help "$@" <<-EOF
Restart jobs
EOF
commit_flag=""
if [[ $1 == "--commit" ]]; then
commit_flag="$1"
shift;
fi
if [[ "$1" == "-" ]]; then
# read jobs from stdin
job_ids=$(cat | paste -s -d' ')
else
# read from $@
job_ids=$@;
fi
job_ids_string=$(join_by ',' ${job_ids[@]})
read -r -d '' QUERY <<-EOF
UPDATE job
SET state = 'new'
WHERE job.id in ($job_ids_string)
EOF
txn_pre=$(txn_prefix "$commit_flag")
txn_pos=$(txn_postfix "$commit_flag")
QUERY="$txn_pre $QUERY; $txn_pos"
}
mutate_generate-unset-api-keys() { ## [--commit]: Generate API keys for users which do not have one set.
handle_help "$@" <<-EOF
For some use cases (IEs), it is preferrable that everyone has an API
key set for them, if they don't choose to set one themselves. So we set
a base64'd key to be a bit extra secure just in case. These work just
fine like hex keys.
EOF
commit_flag=""
if [[ $1 == "--commit" ]]; then
commit_flag="$1"
shift;
fi
read -r -d '' QUERY <<-EOF
INSERT INTO api_keys (create_time, user_id, key)
(
SELECT
now(),
galaxy_user.id,
substring(regexp_replace(encode(random_bytea(64), 'base64')::TEXT, '[^a-z0-9A-Z]', '', 'g'), 0, 32)
FROM
galaxy_user LEFT JOIN api_keys ON galaxy_user.id = api_keys.user_id
WHERE
api_keys.key IS NULL
)
EOF
txn_pre=$(txn_prefix "$commit_flag")
txn_pos=$(txn_postfix "$commit_flag")
QUERY="$txn_pre $QUERY; $txn_pos"
}
mutate_anonymise-db-for-release() { ## [--commit|--very-unsafe]: This will attempt to make a database completely safe to release publicly.
handle_help "$@" <<-EOF
THIS WILL DESTROY YOUR DATABASE.
--commit will do it and wrap it in a transaction
--very-unsafe will just run it without the transaction
EOF
commit_flag=""
if [[ "$1" == "--commit" ]] || [[ "$1" == "--very-unsafe" ]]; then
commit_flag="$1"
shift;
fi
read -r -d '' QUERY <<-EOF
--DONE
CREATE OR REPLACE FUNCTION gxadmin_digest_value(input text)
RETURNS float
AS \$\$
SELECT
('0.' || abs(('x' || substr(md5(current_date ||input::text),1,16))::bit(64)::bigint))::float
\$\$ LANGUAGE SQL STABLE;
--DONE
CREATE OR REPLACE FUNCTION gxadmin_random_seed(input text, size integer)
RETURNS table(value float)
AS \$\$
SELECT gxadmin_digest_value(input::text || generate_series) as value
FROM generate_series(1, size)
\$\$ LANGUAGE SQL STABLE;
-- DONE
CREATE OR REPLACE FUNCTION gxadmin_random_number(input text, size integer)
RETURNS integer
AS \$\$
SELECT (power(10, size) * gxadmin_digest_value(input))::integer
\$\$ LANGUAGE SQL IMMUTABLE;
-- DONE
CREATE OR REPLACE FUNCTION gxadmin_random_word(input text, size integer)
RETURNS text
AS \$\$
SELECT array_to_string(
ARRAY(
SELECT chr((97 + round(value * 25)) :: integer)
FROM gxadmin_random_seed(input, size)
),
''
);
\$\$ LANGUAGE SQL STABLE;
-- DONE
CREATE OR REPLACE FUNCTION gxadmin_random_slug(input text, size integer)
RETURNS text
AS \$\$
SELECT
substr(
gxadmin_random_word(input || 'a' || size, 5 + (gxadmin_digest_value(input || 'a' || size) * size)::integer)
|| '-' ||
gxadmin_random_word(input || 'b' || size, 3 + (gxadmin_digest_value(input || 'b' || size) * size)::integer)
|| '-' ||
gxadmin_random_word(input || 'c' || size, 4 + (gxadmin_digest_value(input || 'c' || size) * size)::integer)
, 1, size)
\$\$ LANGUAGE SQL STABLE;
-- DONEish
CREATE OR REPLACE FUNCTION gxadmin_random_email(input text)
RETURNS text
AS \$\$
with tmp as (
select gxadmin_digest_value(input) as a
)
SELECT
CASE
WHEN a < 0.3 THEN gxadmin_random_number(input, 7) || '@example.com'
WHEN a < 0.8 THEN gxadmin_random_slug(input, 6) || '@example.com'
ELSE gxadmin_random_slug(input, 20) || '@example.com'
END
from tmp
\$\$ LANGUAGE SQL STABLE;
-- DONE
CREATE OR REPLACE FUNCTION gxadmin_random_ip(input text)
RETURNS text
AS \$\$
-- Uses documentation range IPs
with tmp as (
select gxadmin_digest_value(input::text) as a
)
SELECT
CASE
WHEN a < 0.7 THEN '192.0.2.' || (gxadmin_digest_value(input) * 255)::integer
ELSE '2001:0db8:85a3:8d3:1319:8a2e:0370:' || to_hex((gxadmin_digest_value(input) * 65536)::integer)
END
from tmp
\$\$ LANGUAGE SQL STABLE;
-- DONE
CREATE OR REPLACE FUNCTION gxadmin_random_string(input text, size integer, start integer, width integer)
RETURNS text
AS \$\$
SELECT array_to_string(
ARRAY(
SELECT chr((start + round(value * width)) :: integer)
FROM gxadmin_random_seed(input, size)
),
''
);
\$\$ LANGUAGE SQL STABLE;
-- DONE
CREATE OR REPLACE FUNCTION gxadmin_random_tag(input text)
RETURNS text
AS \$\$
with tmp as (
select gxadmin_digest_value(input) as a
)
SELECT
CASE
WHEN a < 0.1 THEN gxadmin_random_string(input, 3 + gxadmin_random_number(input, 1), 65, 26)
WHEN a < 0.2 THEN gxadmin_random_string(input, 3 + gxadmin_random_number(input, 1), 192, 200)
WHEN a < 0.3 THEN gxadmin_random_string(input, 3 + gxadmin_random_number(input, 1), 1025, 100)
WHEN a < 0.4 THEN gxadmin_random_string(input, 3 + gxadmin_random_number(input, 1), 5121, 200)
WHEN a < 0.5 THEN gxadmin_random_string(input, 3 + gxadmin_random_number(input, 1), 9728, 400)
WHEN a < 0.6 THEN gxadmin_random_string(input, 3 + gxadmin_random_number(input, 1), 14300, 100)
WHEN a < 0.8 THEN gxadmin_random_slug(input, 3 + gxadmin_random_number(input, 1))
ELSE gxadmin_random_string(input, 3 + gxadmin_random_number(input, 1), 48, 10)
END
from tmp
\$\$ LANGUAGE SQL;
-- DONE
CREATE OR REPLACE FUNCTION gxadmin_random_pw(input text, size integer)
RETURNS text
AS \$\$
SELECT gxadmin_random_string(input, size, 48, 59);
\$\$ LANGUAGE SQL STABLE;
-- DONE
CREATE OR REPLACE FUNCTION gxadmin_random_text(input text, size integer)
RETURNS text
AS \$\$
SELECT array_to_string(
ARRAY(
SELECT gxadmin_random_tag(input || generate_series)
FROM generate_series(1, size)
),
' '
);
\$\$ LANGUAGE SQL;
-- DONE
-- https://stackoverflow.com/a/48901446
CREATE OR REPLACE FUNCTION gxadmin_round_sf(n numeric, digits integer)
RETURNS numeric
AS \$\$
SELECT floor(n / (10 ^ floor(log(n) - digits + 1))) * (10 ^ floor(log(n) - digits + 1))
\$\$ LANGUAGE SQL IMMUTABLE STRICT;
-- api_keys
COPY (SELECT 'api_keys') to STDOUT;
update api_keys set key = 'random-key-' || user_id;
-- cleanup_event is OK
-- cleanup_event_dataset_association is OK
-- cleanup_event_hda_association is OK
-- cleanup_event_his tory_association is OK
-- cleanup_event_icda_association is OK
-- cleanup_event_ldda_association is EMPTY on AU
-- cleanup_event_library_association is EMPTY on AU
-- cleanup_event_library_dataset_association is EMPTY on AU
-- cleanup_event_library_folder_association is EMPTY on AU
-- cleanup_event_metadata_file_association is OK
-- cleanup_event_user_association is EMPTY on AU, EU
-- cloudauthz is EMPTY on AU, EU
-- custos_authnz_token is EMPTY on AU, EU
-- data_manager_history_association is OK (user_id, history_id)
-- data_manager_job_association is OK (job_id, DM id)
-- dataset
COPY (SELECT 'dataset') to STDOUT;
-- TODO: Do external_filename, and _extra_files_path need to be cleaned?
-- TODO: this is imperfect, we do something better in GRT where we take 2 SDs
-- https://stackoverflow.com/questions/48900936/postgresql-rounding-to-significant-figures
-- MAYBE use synthetic data here?
update dataset set
file_size = round(file_size, -3),
total_size = round(total_size, -3);
-- dataset_collection is OK (type, count, etc.)
-- dataset_collection_element
COPY (SELECT 'dataset_collection_element') to STDOUT;
update dataset_collection_element set
element_identifier = gxadmin_random_slug(id::text, 10);
-- dataset_hash is EMPTY on AU, EU
-- dataset_permissions is OK (role, dataset)
-- dataset_source
COPY (SELECT 'dataset_source') to STDOUT;
update dataset_source set
source_uri = 'https://example.org/test.dat';
-- dataset_source_hash is EMPTY on AU, EU
-- dataset_tag_association is EMPTY on AU, EU
-- default_history_permissions is OK (hist_id, action, role_id)
-- default_quota_association is OK
-- default_user_permissions is OK (user, action, role)
-- deferred_job is EMPTY on AU, EU
-- dynamic_tool is EMPTY on AU, EU
-- event is EMPTY on AU, EU
-- extended_metadata is EMPTY on AU, EU
-- extended_metadata_index is EMPTY on AU, EU
-- external_service is EMPTY on AU, EU
-- form_definition is EMPTY on AU, EU
-- form_definition_current is EMPTY on AU, EU
-- form_values is EMPTY on AU, EU
-- galaxy_group
COPY (SELECT 'galaxy_group') to STDOUT;
update galaxy_group set
name = 'group-' || id;
-- galaxy_session
COPY (SELECT 'galaxy_session') to STDOUT;
update galaxy_session set
remote_host = gxadmin_random_ip(remote_host)
where remote_host is not null;
update galaxy_session set
remote_addr = gxadmin_random_ip(remote_addr)
where remote_addr is not null;
update galaxy_session set
referer = 'https://example.org'
where referer is not null;
update galaxy_session set
session_key = gxadmin_random_pw(id::text, 16), prev_session_id = null;
-- galaxy_session_to_history is OK (time, session_id, hist_id)
-- galaxy_user
COPY (SELECT 'galaxy_user') to STDOUT;
-- TODO: better email length/content distribution
-- TODO: better username length/content distribution. UNICODE.
-- TODO: rounding to SDs.
update galaxy_user set
email = 'user-' || id || '@example.org',
password = 'x',
username = 'user-' || id,
form_values_id = null,
activation_token = '',
disk_usage = round(disk_usage, -5);
-- galaxy_user_openid
COPY (SELECT 'galaxy_user_openid') to STDOUT;
update galaxy_user_openid set
openid = 'https://example.org/identity/' || user_id;
-- genome_index_tool_data is EMPTY on AU, EU
-- group_quota_association is OK (group_id, quota id)
-- group_role_association is OK (group_id, role id)
-- history
COPY (SELECT 'history') to STDOUT;
-- TODO: better name distribution. UNICODE. (I see greek, chinese, etc on EU)
update history set
name = gxadmin_random_tag(id::text)
where name != 'Unnamed history';
update history set
slug = gxadmin_random_slug(id::text, 30)
where slug != '' or slug is not null;
-- history_annotation_association
COPY (SELECT 'history_annotation_association') to STDOUT;
-- TODO: better distribution. UNICODE.
update history_annotation_association set
annotation = gxadmin_random_text(id::text, 30)
where annotation is not null;
-- history_dataset_association
COPY (SELECT 'history_dataset_association') to STDOUT;
-- TODO: SIGNIFICANT validation needed.
update history_dataset_association set
name = gxadmin_random_tag(id::text) || '.' || extension;
update history_dataset_association set
peek = 'redacted' where peek is not null;
update history_dataset_association set
designation = 'hda-' || id::text
where designation is not null;
update history_dataset_association set
info = 'redacted'
where info is not null and info != 'Job output deleted by user before job completed';
update history_dataset_association set
metadata = null;
-- history_dataset_association_annotation_association
COPY (SELECT 'history_dataset_association_annotation_association') to STDOUT;
update history_dataset_association_annotation_association set
annotation = gxadmin_random_text(id::text, 30)
where annotation is not null;
-- history_dataset_association_display_at_authorization is OK (user_id, hda, site=ucsc_main)
-- history_dataset_association_history
COPY (SELECT 'history_dataset_association_history') to STDOUT;
-- TODO: distribution. Consistency with HDA?
update history_dataset_association_history set
name = gxadmin_random_tag(history_dataset_association_id::text) || '.' || extension;
update history_dataset_association_history set
metadata = null;
-- history_dataset_association_rating_association is EMPTY on AU, EU
-- history_dataset_association_subset is OK (hda id, hdas id, location)
-- history_dataset_association_tag_association
COPY (SELECT 'history_dataset_association_tag_association') to STDOUT;
-- user_tname == 'group' and 'name' are special.
update history_dataset_association_tag_association set
value = lower(gxadmin_random_tag(id::text)),
user_value = gxadmin_random_tag(id::text)
where user_tname in ('group', 'name');
-- Sometimes people use a diffferent prefix
update history_dataset_association_tag_association set
user_tname = gxadmin_random_tag(id::text || 'tname'),
value = lower(gxadmin_random_tag(id::text)),
user_value = gxadmin_random_tag(id::text)
where user_tname not in ('group', 'name') and user_tname is not null;
-- Otherwise just set some data
update history_dataset_association_tag_association set
user_tname = gxadmin_random_tag(id::text)
where user_tname is null;
-- history_dataset_collection_annotation_association is EMPTY on AU, EU
-- history_dataset_collection_association
COPY (SELECT 'history_dataset_collection_association') to STDOUT;
update history_dataset_collection_association set
name = 'hdca-' || id;
-- history_dataset_collection_rating_association is EMPTY on AU
-- history_dataset_collection_tag_association
COPY (SELECT 'history_dataset_collection_tag_association') to STDOUT;
-- user_tname == 'group' and 'name' are special.
update history_dataset_collection_tag_association set
value = lower(gxadmin_random_tag(id::text)),
user_value = gxadmin_random_tag(id::text)
where user_tname in ('group', 'name');
-- Sometimes people use a diffferent prefix
update history_dataset_collection_tag_association set
user_tname = gxadmin_random_tag(id::text || 'tname'),
value = lower(gxadmin_random_tag(id::text)),
user_value = gxadmin_random_tag(id::text)
where user_tname not in ('group', 'name') and user_tname is not null;
-- Otherwise just set some data
update history_dataset_collection_tag_association set
user_tname = gxadmin_random_tag(id::text)
where user_tname is null;
-- history_rating_association is OK
-- history_tag_association
COPY (SELECT 'history_tag_association') to STDOUT;
-- user_tname == 'group' and 'name' are special.
update history_tag_association set
value = lower(gxadmin_random_tag(id::text)),
user_value = gxadmin_random_tag(id::text)
where user_tname in ('group', 'name');
-- Sometimes people use a diffferent prefix
update history_tag_association set
user_tname = gxadmin_random_tag(id::text || 'tname'),
value = lower(gxadmin_random_tag(id::text)),
user_value = gxadmin_random_tag(id::text)
where user_tname not in ('group', 'name') and user_tname is not null;
-- Otherwise just set some data
update history_tag_association set
user_tname = gxadmin_random_tag(id::text)
where user_tname is null;
-- history_user_share_association is MAYBE OK (hist_id, user_id) You can determine networks, but same with groups.
-- implicit_collection_jobs is OK (id, pop state)
-- implicit_collection_jobs_job_association is OK
-- implicitly_converted_dataset_association is OK
-- implicitly_created_dataset_collection_inputs is OK
-- interactivetool_entry_point is EMPTY on AU
-- job
COPY (SELECT 'job') to STDOUT;
update job set
command_line = 'redacted',
destination_params = null,
dependencies = null;
update job set
tool_stdout = 'redacted'
where tool_stdout != null;
update job set
tool_stderr = 'redacted'
where tool_stderr != null;
update job set
traceback = 'redacted'
where traceback != null;
update job set
params = 'redacted'
where params != null;
update job set
job_messages = 'redacted'
where job_messages != null;
update job set
job_stdout = 'redacted'
where job_stdout != null;
update job set
job_stderr = 'redacted'
where job_stderr != null;
-- job_container_association
COPY (SELECT 'job_container_association') to STDOUT;
update job_container_association set
container_name = '',
container_info = '';
-- job_export_history_archive
COPY (SELECT 'job_export_history_archive') to STDOUT;
update job_export_history_archive set
history_attrs_filename = '/tmp/tmp' || gxadmin_random_pw(id::text || 'h', 6),
datasets_attrs_filename = '/tmp/tmp' || gxadmin_random_pw(id::text || 'd', 6),
jobs_attrs_filename = '/tmp/tmp' || gxadmin_random_pw(id::text || 'j', 6);
-- job_external_output_metadata
COPY (SELECT 'job_external_output_metadata') to STDOUT;
update job_external_output_metadata set
filename_in = '/tmp/job_working_directory/' || job_id || '/metadata_in_' || gxadmin_random_pw(id::text || 'i', 6),
filename_out = '/tmp/job_working_directory/' || job_id || '/metadata_out_' || gxadmin_random_pw(id::text || 'o', 6),
filename_results_code = '/tmp/job_working_directory/' || job_id || '/metadata_results_' || gxadmin_random_pw(id::text || 'm', 6),
filename_kwds = '/tmp/job_working_directory/' || job_id || '/metadata_kwds_' || gxadmin_random_pw(id::text || 'k', 6),
filename_override_metadata = '/tmp/job_working_directory/' || job_id || '/metadata_override_' || gxadmin_random_pw(id::text || 'o', 6);
-- job_import_history_archive
COPY (SELECT 'job_import_history_archive') to STDOUT;
update job_import_history_archive set
archive_dir = '/tmp/tmp' || gxadmin_random_pw(id::text, 6);
-- job_metric_numeric is OK
-- job_metric_text
COPY (SELECT 'job_metric_text') to STDOUT;
truncate job_metric_text;
-- job_parameter
COPY (SELECT 'job_parameter') to STDOUT;
-- TODO: length distribution? Name distribution?
update job_parameter set
name = 'param-name',
value = 'param-value';
-- job_state_history is OK
-- job_to_implicit_output_dataset_collection is MAYBE OK (param name?)
-- job_to_input_dataset is MAYBE OK (param name?)
-- job_to_input_dataset_collection is MAYBE OK (param name?)
-- job_to_input_library_dataset is EMPTY on AU
-- job_to_output_dataset is MAYBE OK (param name?)
-- job_to_output_dataset_collection is MAYBE OK (param name?)
-- job_to_output_library_dataset is OK
-- kombu_message
COPY (SELECT 'kombu_message') to STDOUT;
truncate kombu_message;
-- kombu_queue is OK (queue name only)
-- library
COPY (SELECT 'library') to STDOUT;
update library set
name = gxadmin_random_tag(id::text),
description = gxadmin_random_tag(id::text || 'desc'),
synopsis = gxadmin_random_tag(id::text || 'synopsis');
-- library_dataset
COPY (SELECT 'library_dataset') to STDOUT;
update library_dataset set
name = 'lda-' || id,
info = '';
-- library_dataset_collection_annotation_association is EMPTY on AU, EU
-- library_dataset_collection_association is EMPTY on AU, EU
-- library_dataset_collection_rating_association is EMPTY on AU, EU
-- library_dataset_collection_tag_association is EMPTY on AU, EU
-- library_dataset_dataset_association
COPY (SELECT 'library_dataset_dataset_association') to STDOUT;
-- TODO: SIGNIFICANT validation needed.
update library_dataset_dataset_association set
name = gxadmin_random_tag(id::text) || '.' || extension;
update library_dataset_dataset_association set
peek = 'redacted' where peek is not null;
update library_dataset_dataset_association set
designation = 'hda-' || id::text
where designation is not null;
update library_dataset_dataset_association set
info = 'redacted' where info is not null;
update library_dataset_dataset_association set
message = 'redacted' where message is not null;
update library_dataset_dataset_association set
metadata = null;
-- library_dataset_dataset_association_permissions is OK (permissions, ldda id, role id)
-- library_dataset_dataset_association_tag_association is EMPTY on AU, EU
-- library_dataset_dataset_info_association is EMPTY on AU, EU
-- library_dataset_permissions is OK (permissions, ld id, role id)
-- library_folder
COPY (SELECT 'library_folder') to STDOUT;
update library_folder set
name = gxadmin_random_tag(id::text),
description = gxadmin_random_text(id::text || 'desc', 10);
-- library_folder_info_association is EMPTY on AU, EU
-- library_folder_permissions is OK (permissions, lf id, role id)
-- library_info_association is EMPTY on AU
-- library_permissions is OK (permissions, lib id, role id)
-- metadata_file is OK
-- migrate_tools is EMPTY on AU, EU
-- migrate_version is OK
-- oidc_user_authnz_tokens
COPY (SELECT 'oidc_user_authnz_tokens') to STDOUT;
update oidc_user_authnz_tokens set
uid = 'redacted',
extra_data = null;
-- page
COPY (SELECT 'page') to STDOUT;
update page set
title = gxadmin_random_tag(id::text),
slug = gxadmin_random_slug(id::text, 10);
-- page_annotation_association
COPY (SELECT 'page_annotation_association') to STDOUT;
update page_annotation_association set
annotation = gxadmin_random_tag(id::text);
-- page_rating_association is EMPTY on AU
-- page_revision
COPY (SELECT 'page_revision') to STDOUT;
update page_revision set
content = '<p>Some <b>content</b></p>';
-- page_tag_association is EMPTY on AU
-- user_tname == 'group' and 'name' are special.
update page_tag_association set
value = lower(gxadmin_random_tag(id::text)),
user_value = gxadmin_random_tag(id::text)
where user_tname in ('group', 'name');
-- Sometimes people use a diffferent prefix
update page_tag_association set
user_tname = gxadmin_random_tag(id::text || 'tname'),
value = lower(gxadmin_random_tag(id::text)),
user_value = gxadmin_random_tag(id::text)
where user_tname not in ('group', 'name') and user_tname is not null;
-- Otherwise just set some data
update page_tag_association set
user_tname = gxadmin_random_tag(id::text)
where user_tname is null;
-- page_user_share_association is EMPTY on AU, fine though
-- password_reset_token
COPY (SELECT 'password_reset_token') to STDOUT;
update password_reset_token set
token = md5('x' || random());
-- post_job_action
COPY (SELECT 'post_job_action') to STDOUT;
update post_job_action set
action_arguments = convert_to('{}', 'UTF8')
where action_type != 'RenameDatasetAction';
update post_job_action set
action_arguments = convert_to('{"newname": "Control check"}', 'UTF8')
where action_type = 'RenameDatasetAction';
-- post_job_action_association is OK
-- psa_association
COPY (SELECT 'psa_association') to STDOUT;
update psa_association set
handle = 'redacted',
assoc_type = 'redacted';
-- psa_code is EMPTY on AU, EU
-- psa_nonce is EMPTY on AU, EU
-- psa_partial is EMPTY on AU, EU
-- quota
COPY (SELECT 'quota') to STDOUT;
update quota set
name = pg_size_pretty(bytes),
description = '';
-- repository_dependency is EMPTY on AU, OK on EU (ts repo id)
-- repository_repository_dependency_association is EMPTY on AU, OK on EU
-- request is EMPTY on AU, EU
-- request_event is EMPTY on AU, EU
-- request_type is EMPTY on AU, EU
-- request_type_external_service_association is EMPTY on AU, EU
-- request_type_permissions is EMPTY on AU, EU
-- request_type_run_association is EMPTY on AU, EU
-- role
COPY (SELECT 'role') to STDOUT;
update role set
name = gxadmin_random_email(id::text),
description = 'Private role for ' || gxadmin_random_email(id::text)
where type = 'private';
update role set
name = gxadmin_random_text(id::text, 3),
description = 'System role'
where type = 'system';
update role set
name = gxadmin_random_text(id::text, 5),
description = 'Sharing role'
where type = 'sharing';
update role set
name = gxadmin_random_text(id::text, 2),
description = ''
where type not in ('private', 'system', 'sharing');
-- run is EMPTY on AU, EU
-- sample is EMPTY on AU, EU
-- sample_dataset is EMPTY on AU, EU
-- sample_event is EMPTY on AU, EU
-- sample_run_association is EMPTY on AU, EU
-- sample_state is EMPTY on AU, EU
-- stored_workflow
COPY (SELECT 'stored_workflow') to STDOUT;
update stored_workflow set
name = gxadmin_random_tag(id::text),
slug = gxadmin_random_slug(id::text, 10);
-- stored_workflow_annotation_association
COPY (SELECT 'stored_workflow_annotation_association') to STDOUT;
update stored_workflow set
annotation = gxadmin_random_text(id::text)
where annotation is not null;
-- stored_workflow_menu_entry is OK
-- stored_workflow_rating_association is OK
-- stored_workflow_tag_association
COPY (SELECT 'stored_workflow_tag_association') to STDOUT;
-- user_tname == 'group' and 'name' are special.
update stored_workflow_tag_association set
value = lower(gxadmin_random_tag(id::text)),
user_value = gxadmin_random_tag(id::text)
where user_tname in ('group', 'name');
-- Sometimes people use a diffferent prefix
update stored_workflow_tag_association set
user_tname = gxadmin_random_tag(tag_id::text),
value = lower(gxadmin_random_tag(id::text)),
user_value = gxadmin_random_tag(id::text)
where user_tname not in ('group', 'name') and user_tname is not null;
-- Otherwise just set some data
update stored_workflow_tag_association set
user_tname = gxadmin_random_tag(tag_id::text)
where user_tname is null;
-- stored_workflow_user_share_connection is OK
-- tag
COPY (SELECT 'tag') to STDOUT;
update tag set
name = gxadmin_random_tag(id::text)
where name not in ('name', 'group');
-- task is EMPTY on AU, not on EU
update task set
command_line = '',
param_filename = '',
runner_name = '',
tool_stdout = '',
tool_stderr = '',
task_runner_name = '',
task_runner_external_id = '',
prepare_input_files_cmd = '',
working_directory = '',
info = '',
job_messages = '',
job_stdout = '',
job_stderr = '';
-- task_metric_numeric is EMPTY on AU, EU
-- task_metric_text is EMPTY on AU, EU
-- tool_dependency is EMPTY on AU, OK on EU
-- tool_shed_repository is EMPTY on AU, OK on EU
-- tool_tag_association is EMPTY on AU, EU
-- tool_version is EMPTY on AU, OK on EU
-- tool_version_association is EMPTY on AU, EU
-- transfer_job is EMPTY on AU, OK on EU
-- user_action is EMPTY on AU, EU
-- user_address
COPY (SELECT 'user_address') to STDOUT;
update user_address set
"desc" = gxadmin_random_text(id::text, 1),
name = gxadmin_random_text(id::text || 'name', 1),
institution = gxadmin_random_text(id::text || 'inst', 1),
address = gxadmin_random_text(id::text || 'addr', 1),
city = gxadmin_random_text(id::text || 'city', 1),
state = gxadmin_random_text(id::text || 'stat', 1),
postal_code = gxadmin_random_number(id::text, 5),
country = 'Australia',
phone = gxadmin_random_number(id::text, 10);
-- user_group_association is OK
-- user_preference
COPY (SELECT 'user_preference') to STDOUT;
-- TODO: make this better? I just don't feel safe given genomespace tokens, etc.
truncate user_preference;
-- user_quota_association is OK
-- user_role_association is OK
-- validation_error is EMPTY on AU
-- visualization
COPY (SELECT 'visualization') to STDOUT;
update visualization set
title = gxadmin_random_text(id::text, 3),
slug = gxadmin_random_slug(id::text, 20);
-- visualization_annotation_association is EMPTY on AU
update visualization_annotation_association set
annotation = gxadmin_random_text(id::text, 10);
-- visualization_rating_association is EMPTY on AU
-- visualization_revision is MAYBE OK
-- visualization_tag_association is EMPTY on AU
-- visualization_user_share_association is EMPTY on AU
-- worker_process is OK
-- workflow
COPY (SELECT 'workflow') to STDOUT;
update workflow set
name = gxadmin_random_text(id::text, 3);
update workflow set
reports_config = convert_to('{}', 'UTF8')
where reports_config is not null;
-- workflow_invocation is OK
-- workflow_invocation_output_dataset_association is OK
-- workflow_invocation_output_dataset_collection_association is OK
-- workflow_invocation_output_value is EMPTY on AU
-- workflow_invocation_step is OK
-- workflow_invocation_step_output_dataset_association is MAYBE OK (output_name?)
-- workflow_invocation_step_output_dataset_collection_association is MAYBE OK (output_name?)
-- workflow_invocation_to_subworkflow_invocation_association is OK
-- workflow_output is MAYBE OK (output_name)
-- workflow_request_input_parameters is OK
-- workflow_request_input_step_parameter
COPY (SELECT 'workflow_request_input_step_parameter') to STDOUT;
update workflow_request_input_step_parameter set
parameter_value = convert_to('asdf', 'UTF8');
-- workflow_request_step_states
COPY (SELECT 'workflow_request_step_states') to STDOUT;
update workflow_request_step_states set
value = convert_to('{}', 'UTF8');
-- workflow_request_to_input_collection_dataset is OK
-- workflow_request_to_input_dataset is OK
-- workflow_step
COPY (SELECT 'workflow_step') to STDOUT;
update workflow_step set
tool_inputs = convert_to('{}', 'UTF8');
update workflow_step set
label = gxadmin_random_slug(id::text, 10)
where label is not null;
-- workflow_step_annotation_association
COPY (SELECT 'workflow_step_annotation_association') to STDOUT;
update workflow_step_annotation_association set
annotation = gxadmin_random_text(id::text, 10);
-- workflow_step_connection is OK
-- workflow_step_input is OK
-- workflow_step_tag_association is EMPTY on AU, EU
-- workflow_tag_association is EMPTY on AU, EU
EOF
txn_pre=$(txn_prefix "$commit_flag")
txn_pos=$(txn_postfix "$commit_flag")
QUERY="$txn_pre $QUERY; $txn_pos"
}
mutate_fail-wfi() { ## <wf-invocation-d> [--commit]: Sets a workflow invocation state to failed
handle_help "$@" <<-EOF
Sets a workflow invocation's state to "failed"
EOF
assert_count_ge $# 1 "Must supply a wf-invocation-id"
id=$1
read -r -d '' QUERY <<-EOF
UPDATE
workflow_invocation
SET
state = 'failed'
WHERE
id = '$id'
EOF
txn_pre=$(txn_prefix "$2")
txn_pos=$(txn_postfix "$2")
QUERY="$txn_pre $QUERY; $txn_pos"
}
mutate_oidc-by-emails() { ## <email_from> <email_to> [--commit]: Reassign OIDC account between users.
handle_help "$@" <<-EOF
Workaround for users creating a new account by clicking the OIDC button, with case mismatching between existing accounts.
Please note that this function is case-sensitive. Fixes https://github.com/galaxyproject/galaxy/issues/9981.
EOF
assert_count_ge $# 2 "Must supply an email_from and an email_to";
read -r -d '' QUERY <<-EOF
UPDATE oidc_user_authnz_tokens
SET user_id=correctuser.id
FROM (
SELECT id FROM galaxy_user WHERE email='$2'
) AS correctuser
WHERE user_id = (SELECT id FROM galaxy_user WHERE email='$1')
EOF
txn_pre=$(txn_prefix "$3")
txn_pos=$(txn_postfix "$3")
QUERY="$txn_pre $QUERY; $txn_pos"
}
mutate_set-quota-for-oidc-user() { ##? <provider_name> <quota_name> [--commit]: Set quota for OIDC users.
handle_help "$@" <<-EOF
Set quota for OIDC users.
EOF
read -r -d '' QUERY <<-EOF
WITH qid AS (
SELECT id FROM quota WHERE name='$arg_quota_name'
)
DELETE FROM user_quota_association WHERE quota_id = ( SELECT id FROM qid );
WITH qid AS (
SELECT id FROM quota WHERE name='$arg_quota_name'
), t AS (
SELECT user_id, ( SELECT id FROM qid ), now(), now() FROM oidc_user_authnz_tokens WHERE provider='$arg_provider_name'
)
INSERT INTO user_quota_association (user_id, quota_id, create_time, update_time)
SELECT * FROM t
EOF
txn_pre=$(txn_prefix "$arg_commit")
txn_pos=$(txn_postfix "$arg_commit")
QUERY="$txn_pre $QUERY; $txn_pos"
}
registered_subcommands="$registered_subcommands query"
_query_short_help="DB Queries"
_query_long_help="
'query' can be exchanged with 'tsvquery' or 'csvquery' for tab- and comma-separated variants.
In some cases 'iquery' is supported for InfluxDB compatible output.
In all cases 'explainquery' will show you the query plan, in case you need to optimise or index data. 'explainjsonquery' is useful with PEV: http://tatiyants.com/pev/
"
query_latest-users() { ## : 40 recently registered users
handle_help "$@" <<-EOF
Returns 40 most recently registered users
$ gxadmin query latest-users
id | create_time | disk_usage | username | email | groups | active
----+-------------------------------+------------+----------+----------------+-----------------------------------+--------
3 | 2019-03-07 13:06:37.945403+00 | | beverly | b@example.com | | t
2 | 2019-03-07 13:06:23.369201+00 | 826 bytes | alice | a@example.com | | t
1 | 2018-11-19 14:54:30.969713+00 | 869 MB | helena | hxr@local.host | training-asdf training-hogeschool | t
(3 rows)
EOF
username=$(gdpr_safe galaxy_user.username username)
email=$(gdpr_safe galaxy_user.email email)
read -r -d '' QUERY <<-EOF
SELECT
id,
create_time AT TIME ZONE 'UTC' as create_time,
pg_size_pretty(disk_usage) as disk_usage,
$username,
$email,
array_to_string(ARRAY(
select galaxy_group.name from galaxy_group where id in (
select group_id from user_group_association where user_group_association.user_id = galaxy_user.id
)
), ' ') as groups,
active
FROM galaxy_user
ORDER BY create_time desc
LIMIT 40
EOF
}
query_tool-usage() { ##? [weeks]: Counts of tool runs in the past weeks (default = all)
handle_help "$@" <<-EOF
$ gxadmin tool-usage
tool_id | count
------------------------------------------------------------------------+--------
toolshed.g2.bx.psu.edu/repos/devteam/column_maker/Add_a_column1/1.1.0 | 958154
Grouping1 | 638890
toolshed.g2.bx.psu.edu/repos/devteam/intersect/gops_intersect_1/1.0.0 | 326959
toolshed.g2.bx.psu.edu/repos/devteam/get_flanks/get_flanks1/1.0.0 | 320236
addValue | 313470
toolshed.g2.bx.psu.edu/repos/devteam/join/gops_join_1/1.0.0 | 312735
upload1 | 103595
toolshed.g2.bx.psu.edu/repos/rnateam/graphclust_nspdk/nspdk_sparse/9.2 | 52861
Filter1 | 43253
EOF
where=
if (( arg_weeks > 0 )); then
where="WHERE j.create_time > (now() - '${arg_weeks} weeks'::interval)"
fi
fields="count=1"
tags="tool_id=0"
read -r -d '' QUERY <<-EOF
SELECT
j.tool_id, count(*) AS count
FROM job j
$where
GROUP BY j.tool_id
ORDER BY count DESC
EOF
}
query_tool-usage-over-time() { ##? [searchterm]: Counts of tool runs by month, filtered by a tool id search
handle_help "$@" <<-EOF
$ gxadmin tool-usage-over-time
tool_id | count
------------------------------------------------------------------------+--------
toolshed.g2.bx.psu.edu/repos/devteam/column_maker/Add_a_column1/1.1.0 | 958154
Grouping1 | 638890
toolshed.g2.bx.psu.edu/repos/devteam/intersect/gops_intersect_1/1.0.0 | 326959
toolshed.g2.bx.psu.edu/repos/devteam/get_flanks/get_flanks1/1.0.0 | 320236
addValue | 313470
toolshed.g2.bx.psu.edu/repos/devteam/join/gops_join_1/1.0.0 | 312735
upload1 | 103595
toolshed.g2.bx.psu.edu/repos/rnateam/graphclust_nspdk/nspdk_sparse/9.2 | 52861
Filter1 | 43253
EOF
where=
if [[ "$arg_searchterm" != "" ]]; then
where="WHERE tool_id like '%$arg_searchterm%'"
fi
read -r -d '' QUERY <<-EOF
WITH
cte
AS (
SELECT
date_trunc('month', create_time),
tool_id
FROM
job
$where
)
SELECT
date_trunc, tool_id, count(*)
FROM
cte
GROUP BY
date_trunc, tool_id
ORDER BY
date_trunc ASC, count DESC
EOF
}
query_tool-popularity() { ##? [months|24]: Most run tools by month (tool_predictions)
handle_help "$@" <<-EOF
See most popular tools by month
$ ./gxadmin query tool-popularity 1
tool_id | month | count
---------------------------+------------+-------
circos | 2019-02-01 | 20
upload1 | 2019-02-01 | 12
require_format | 2019-02-01 | 9
circos_gc_skew | 2019-02-01 | 7
circos_wiggle_to_scatter | 2019-02-01 | 3
test_history_sanitization | 2019-02-01 | 2
circos_interval_to_tile | 2019-02-01 | 1
__SET_METADATA__ | 2019-02-01 | 1
(8 rows)
EOF
fields="count=2"
tags="tool_id=0;month=1"
read -r -d '' QUERY <<-EOF
SELECT
tool_id,
date_trunc('month', create_time AT TIME ZONE 'UTC')::date as month,
count(*)
FROM job
WHERE create_time > (now() AT TIME ZONE 'UTC' - '$arg_months months'::interval)
GROUP BY tool_id, month
ORDER BY month desc, count desc
EOF
}
query_workflow-connections() { ##? [--all]: The connections of tools, from output to input, in the latest (or all) versions of user workflows (tool_predictions)
handle_help "$@" <<-EOF
This is used by the usegalaxy.eu tool prediction workflow, allowing for building models out of tool connections in workflows.
$ gxadmin query workflow-connections
wf_id | wf_updated | in_id | in_tool | in_tool_v | out_id | out_tool | out_tool_v | published | deleted | has_errors
-------+---------------------+-------+-------------------+-----------+--------+-------------------+----------------------------------------------
3 | 2013-02-07 16:48:00 | 5 | Grep1 | 1.0.1 | 12 | | | f | f | f
3 | 2013-02-07 16:48:00 | 6 | Cut1 | 1.0.1 | 7 | Remove beginning1 | 1.0.0 | f | f | f
3 | 2013-02-07 16:48:00 | 7 | Remove beginning1 | 1.0.0 | 5 | Grep1 | 1.0.1 | f | f | f
3 | 2013-02-07 16:48:00 | 8 | addValue | 1.0.0 | 6 | Cut1 | 1.0.1 | t | f | f
3 | 2013-02-07 16:48:00 | 9 | Cut1 | 1.0.1 | 7 | Remove beginning1 | 1.0.0 | f | f | f
3 | 2013-02-07 16:48:00 | 10 | addValue | 1.0.0 | 11 | Paste1 | 1.0.0 | t | f | f
3 | 2013-02-07 16:48:00 | 11 | Paste1 | 1.0.0 | 9 | Cut1 | 1.0.1 | f | f | f
3 | 2013-02-07 16:48:00 | 11 | Paste1 | 1.0.0 | 8 | addValue | 1.0.0 | t | t | f
4 | 2013-02-07 16:48:00 | 13 | cat1 | 1.0.0 | 18 | addValue | 1.0.0 | t | f | f
4 | 2013-02-07 16:48:00 | 13 | cat1 | 1.0.0 | 20 | Count1 | 1.0.0 | t | t | f
EOF
read -r -d '' wf_filter <<-EOF
WHERE
workflow.id in (
SELECT
workflow.id
FROM
stored_workflow
LEFT JOIN
workflow on stored_workflow.latest_workflow_id = workflow.id
)
EOF
if [[ -n "$arg_all" ]]; then
wf_filter=""
fi
read -r -d '' QUERY <<-EOF
SELECT
workflow.id as wf_id,
workflow.update_time::DATE as wf_updated,
ws_in.id as in_id,
ws_in.tool_id as in_tool,
ws_in.tool_version as in_tool_v,
ws_out.id as out_id,
ws_out.tool_id as out_tool,
ws_out.tool_version as out_tool_v,
sw.published as published,
sw.deleted as deleted,
workflow.has_errors as has_errors
FROM workflow_step_connection wfc
LEFT JOIN workflow_step ws_in ON ws_in.id = wfc.output_step_id
LEFT JOIN workflow_step_input wsi ON wfc.input_step_input_id = wsi.id
LEFT JOIN workflow_step ws_out ON ws_out.id = wsi.workflow_step_id
LEFT JOIN workflow_output as wo ON wsi.workflow_step_id = wfc.output_step_id
LEFT JOIN workflow on ws_in.workflow_id = workflow.id
LEFT JOIN stored_workflow as sw on sw.latest_workflow_id = workflow.id
$wf_filter
EOF
}
query_history-connections() { ## : The connections of tools, from output to input, in histories (tool_predictions)
handle_help "$@" <<-EOF
This is used by the usegalaxy.eu tool prediction workflow, allowing for building models out of tool connections.
EOF
read -r -d '' QUERY <<-EOF
SELECT
h.id AS h_id,
h.update_time::DATE AS h_update,
jtod.job_id AS in_id,
j.tool_id AS in_tool,
j.tool_version AS in_tool_v,
jtid.job_id AS out_id,
j2.tool_id AS out_tool,
j2.tool_version AS out_ver
FROM
job AS j
LEFT JOIN history AS h ON j.history_id = h.id
LEFT JOIN job_to_output_dataset AS jtod ON j.id = jtod.job_id
LEFT JOIN job_to_input_dataset AS jtid ON jtod.dataset_id = jtid.dataset_id
LEFT JOIN job AS j2 ON jtid.job_id = j2.id
WHERE
jtid.job_id IS NOT NULL
EOF
}
query_datasets-created-daily() { ## : The min/max/average/p95/p99 of total size of datasets created in a single day.
handle_help "$@" <<-EOF
$ gxadmin query datasets-created-daily
min | quant_1st | median | mean | quant_3rd | perc_95 | perc_99 | max | sum | stddev
-----+-----------+---------+-----------------------+-----------+-----------+-----------+-----------+------------+---------------
2 | 303814 | 6812862 | 39653071.914285714286 | 30215616 | 177509882 | 415786146 | 533643009 | 1387857517 | 96920615.1745
(1 row)
or more readably:
$ gxadmin query datasets-created-daily --human
min | quant_1st | median | mean | quant_3rd | perc_95 | perc_99 | max | sum | stddev
---------+-----------+---------+-------+-----------+---------+---------+--------+---------+--------
2 bytes | 297 kB | 6653 kB | 38 MB | 29 MB | 169 MB | 397 MB | 509 MB | 1324 MB | 92 MB
(1 row)
EOF
if [[ $1 == "--human" ]]; then
summary="$(summary_statistics sum 1)"
else
summary="$(summary_statistics sum)"
fi
read -r -d '' QUERY <<-EOF
WITH temp_queue_times AS
(select
date_trunc('day', create_time AT TIME ZONE 'UTC'),
sum(coalesce(total_size, file_size))
from dataset
group by date_trunc
order by date_trunc desc)
select
$summary
from temp_queue_times
EOF
}
query_largest-collection() { ## : Returns the size of the single largest collection
handle_help "$@" <<-EOF
EOF
fields="count=0"
tags=""
read -r -d '' QUERY <<-EOF
WITH temp_table_collection_count AS (
SELECT count(*)
FROM dataset_collection_element
GROUP BY dataset_collection_id
ORDER BY count desc
)
select max(count) as count from temp_table_collection_count
EOF
}
query_queue-time() { ##? <tool_id>: The average/95%/99% a specific tool spends in queue state.
handle_help "$@" <<-EOF
$ gxadmin query queue-time toolshed.g2.bx.psu.edu/repos/nilesh/rseqc/rseqc_geneBody_coverage/2.6.4.3
min | perc_95 | perc_99 | max
-----------------+-----------------+-----------------+-----------------
00:00:15.421457 | 00:00:55.022874 | 00:00:59.974171 | 00:01:01.211995
EOF
read -r -d '' QUERY <<-EOF
WITH temp_queue_times AS
(select
min(a.create_time - b.create_time) as queue_time
from
job_state_history as a
inner join
job_state_history as b
on
(a.job_id = b.job_id)
where
a.job_id in (select id from job where tool_id like '%${arg_tool_id}%' and state = 'ok' and create_time > (now() AT TIME ZONE 'UTC' - '3 months'::interval))
and a.state = 'running'
and b.state = 'queued'
group by
a.job_id
order by
queue_time desc
)
select
min(queue_time),
percentile_cont(0.95) WITHIN GROUP (ORDER BY queue_time) as perc_95,
percentile_cont(0.99) WITHIN GROUP (ORDER BY queue_time) as perc_99,
max(queue_time)
from temp_queue_times
EOF
}
query_queue() { ## [--by (tool|destination|user)]: Brief overview of currently running jobs grouped by tool (default) or other columns
handle_help "$@" <<-EOF
$ gxadmin query queue
tool_id | state | count
-------------------------------------------------------------------+---------+-------
toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.6.0 | queued | 9
toolshed.g2.bx.psu.edu/repos/iuc/dexseq/dexseq_count/1.24.0.0 | running | 7
toolshed.g2.bx.psu.edu/repos/nml/spades/spades/1.2 | queued | 6
ebi_sra_main | running | 6
toolshed.g2.bx.psu.edu/repos/iuc/trinity/trinity/2.8.3 | queued | 5
toolshed.g2.bx.psu.edu/repos/devteam/bowtie2/bowtie2/2.3.4.2 | running | 5
toolshed.g2.bx.psu.edu/repos/nml/spades/spades/3.11.1+galaxy1 | queued | 4
toolshed.g2.bx.psu.edu/repos/iuc/mothur_venn/mothur_venn/1.36.1.0 | running | 2
toolshed.g2.bx.psu.edu/repos/nml/metaspades/metaspades/3.9.0 | running | 2
upload1 | running | 2
$ gxadmin query queue --by destination
destination_id | state | job_count
----------------+---------+-----------
normal | running | 128
multicore | running | 64
multicore | queued | 16
$ gxadmin iquery queue --by destination
queue-summary-by-destination,state=running,destination_id=normal count=128
queue-summary-by-destination,state=running,destination_id=multicore count=64
queue-summary-by-destination,state=queued,destination_id=multicore count=16
EOF
fields="count=2"
tags="tool_id=0;state=1"
column="tool_id"
title="tool"
if [[ "$1" == "--by" ]]; then
if [[ "$2" == "user" ]]; then
tags="user_id=0;state=1"
column="user_id"
title="user"
query_name="queue_by_user"
elif [[ "$2" == "destination" ]]; then
tags="destination_id=0;state=1"
column="destination_id"
title="destination"
query_name="queue_by_destination"
elif [[ "$2" == "tool" ]]; then
query_name="queue_by_tool"
# nothing else needed
else
error "Unknown attribute"
exit 1
fi
fi
read -r -d '' QUERY <<-EOF
SELECT
${column}, state, count(${column}) as ${title}_count
FROM
job
WHERE
state in ('queued', 'running')
GROUP BY
${column}, state
ORDER BY
${title}_count desc
EOF
}
query_queue-overview() { ##? [--short-tool-id]: View used mostly for monitoring
handle_help "$@" <<-EOF
Primarily for monitoring of queue. Optimally used with 'iquery' and passed to Telegraf.
$ gxadmin iquery queue-overview
queue-overview,tool_id=upload1,tool_version=0.0.1,state=running,handler=main.web.1,destination_id=condor,job_runner_name=condor,user=1 count=1
EOF
# Use full tool id by default
tool_id="tool_id"
if [[ -n "$arg_short_tool_id" ]]; then
tool_id="regexp_replace(tool_id, '.*toolshed.*/repos/', '')"
fi
# Include by default
if [ -z "$GDPR_MODE" ]; then
user_id='user_id'
else
user_id="'0'"
fi
fields="count=6"
tags="tool_id=0;tool_version=1;destination_id=2;handler=3;state=4;job_runner_name=5;user_id=7"
read -r -d '' QUERY <<-EOF
WITH queue AS (
SELECT
regexp_replace($tool_id, '/[0-9.a-z+-]+$', '')::TEXT AS tool_id,
tool_version::TEXT,
COALESCE(destination_id, 'unknown')::TEXT AS destination_id,
COALESCE(handler, 'unknown')::TEXT AS handler,
state::TEXT,
COALESCE(job_runner_name, 'unknown')::TEXT AS job_runner_name,
count(*) AS count,
$user_id::TEXT AS user_id
FROM
job
WHERE
state = 'running' OR state = 'queued' OR state = 'new'
GROUP BY
tool_id, tool_version, destination_id, handler, state, job_runner_name, user_id
)
SELECT
tool_id, tool_version, destination_id, handler, state, job_runner_name, sum(count), user_id
FROM
queue
GROUP BY
tool_id, tool_version, destination_id, handler, state, job_runner_name, user_id
EOF
}
query_queue-details() {
query_queue-detail $@
}
query_queue-detail() { ##? [--all] [--seconds] [--since-update]: Detailed overview of running and queued jobs
handle_help "$@" <<-EOF
$ gxadmin query queue-detail
state | id | extid | tool_id | username | time_since_creation
---------+---------+---------+---------------------------------------------------------------------------+----------+---------------------
running | 4360629 | 229333 | toolshed.g2.bx.psu.edu/repos/bgruening/infernal/infernal_cmsearch/1.1.2.0 | xxxx | 5 days 11:00:00
running | 4362676 | 230237 | toolshed.g2.bx.psu.edu/repos/iuc/mothur_venn/mothur_venn/1.36.1.0 | xxxx | 4 days 18:00:00
running | 4364499 | 231055 | toolshed.g2.bx.psu.edu/repos/iuc/mothur_venn/mothur_venn/1.36.1.0 | xxxx | 4 days 05:00:00
running | 4366604 | 5183013 | toolshed.g2.bx.psu.edu/repos/iuc/dexseq/dexseq_count/1.24.0.0 | xxxx | 3 days 20:00:00
running | 4366605 | 5183016 | toolshed.g2.bx.psu.edu/repos/iuc/dexseq/dexseq_count/1.24.0.0 | xxxx | 3 days 20:00:00
queued | 4350274 | 225743 | toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.6.0 | xxxx | 9 days 05:00:00
queued | 4353435 | 227038 | toolshed.g2.bx.psu.edu/repos/iuc/trinity/trinity/2.8.3 | xxxx | 8 days 08:00:00
queued | 4361914 | 229712 | toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.6.0 | xxxx | 5 days -01:00:00
queued | 4361812 | 229696 | toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.6.0 | xxxx | 5 days -01:00:00
queued | 4361939 | 229728 | toolshed.g2.bx.psu.edu/repos/nml/spades/spades/1.2 | xxxx | 4 days 21:00:00
queued | 4361941 | 229731 | toolshed.g2.bx.psu.edu/repos/nml/spades/spades/1.2 | xxxx | 4 days 21:00:00
EOF
fields="count=9"
tags="state=0;id=1;extid=2;tool_id=3;username=4;handler=6;job_runner_name=7;destination_id=8"
d=""
nonpretty="("
time_column="job.create_time"
time_column_name="time_since_creation"
if [[ -n "$arg_all" ]]; then
d=", 'new'"
fi
if [[ -n "$arg_seconds" ]]; then
fields="$fields;time_since_creation=5"
nonpretty="EXTRACT(EPOCH FROM "
fi
if [[ -n "$arg_since_update" ]]; then
time_column="job.update_time"
time_column_name="time_since_update"
fi
username=$(gdpr_safe galaxy_user.username username "Anonymous User")
read -r -d '' QUERY <<-EOF
SELECT
job.state,
job.id,
job.job_runner_external_id as extid,
job.tool_id,
$username,
$nonpretty now() AT TIME ZONE 'UTC' - $time_column) as $time_column_name,
job.handler,
job.job_runner_name,
COALESCE(job.destination_id, 'none') as destination_id,
1 as count
FROM job
FULL OUTER JOIN galaxy_user ON job.user_id = galaxy_user.id
WHERE
state in ('running', 'queued'$d)
ORDER BY
state desc,
$time_column_name desc
EOF
}
query_runtime-per-user() { ##? <email>: computation time of user (by email)
handle_help "$@" <<-EOF
$ gxadmin query runtime-per-user hxr@informatik.uni-freiburg.de
sum
----------
14:07:39
EOF
read -r -d '' QUERY <<-EOF
SELECT sum((metric_value || ' second')::interval)
FROM job_metric_numeric
WHERE job_id in (
SELECT id
FROM job
WHERE user_id in (
SELECT id
FROM galaxy_user
where email = '$arg_email'
)
) AND metric_name = 'runtime_seconds'
EOF
}
query_jobs-nonterminal() { ## [--states=new,queued,running] [--update-time] [--older-than=<interval>] [username|id|email]: Job info of nonterminal jobs separated by user
handle_help "$@" <<-EOF
You can request the user information by username, id, and user email
$ gxadmin query jobs-nonterminal helena-rasche
id | tool_id | state | create_time | runner | ext_id | handler | user_id
---------+---------------------+---------+----------------------------+--------+--------+-----------------+---------
4760549 | featurecounts/1.6.3 | running | 2019-01-18 14:05:14.871711 | condor | 197549 | handler_main_7 | 599
4760552 | featurecounts/1.6.3 | running | 2019-01-18 14:05:16.205867 | condor | 197552 | handler_main_7 | 599
4760554 | featurecounts/1.6.3 | running | 2019-01-18 14:05:17.170157 | condor | 197580 | handler_main_8 | 599
4760557 | featurecounts/1.6.3 | running | 2019-01-18 14:05:18.25044 | condor | 197545 | handler_main_10 | 599
4760573 | featurecounts/1.6.3 | running | 2019-01-18 14:05:47.20392 | condor | 197553 | handler_main_2 | 599
4760984 | deseq2/2.11.40.4 | new | 2019-01-18 14:56:37.700714 | | | handler_main_1 | 599
4766092 | deseq2/2.11.40.4 | new | 2019-01-21 07:24:16.232376 | | | handler_main_5 | 599
4811598 | cuffnorm/2.2.1.2 | running | 2019-02-01 13:08:30.400016 | condor | 248432 | handler_main_0 | 599
(8 rows)
You can also query all non-terminal jobs by all users
$ gxadmin query jobs-nonterminal | head
id | tool_id | state | create_time | runner | ext_id | handler | user_id
---------+---------------------+---------+----------------------------+--------+--------+-----------------+---------
4760549 | featurecounts/1.6.3 | running | 2019-01-18 14:05:14.871711 | condor | 197549 | handler_main_7 | 599
4760552 | featurecounts/1.6.3 | running | 2019-01-18 14:05:16.205867 | condor | 197552 | handler_main_7 | 599
4760554 | featurecounts/1.6.3 | running | 2019-01-18 14:05:17.170157 | condor | 197580 | handler_main_8 | 599
4760557 | featurecounts/1.6.3 | running | 2019-01-18 14:05:18.25044 | condor | 197545 | handler_main_10 | 599
4760573 | featurecounts/1.6.3 | running | 2019-01-18 14:05:47.20392 | condor | 197553 | handler_main_2 | 599
4760588 | featurecounts/1.6.3 | new | 2019-01-18 14:11:03.766558 | | | handler_main_9 | 11
4760589 | featurecounts/1.6.3 | new | 2019-01-18 14:11:05.895232 | | | handler_main_1 | 11
4760590 | featurecounts/1.6.3 | new | 2019-01-18 14:11:07.328533 | | | handler_main_2 | 11
By default jobs in the states 'new', 'queued', and 'running' are considered non-terminal, but this can
be controlled by passing a comma-separated list to the '--states=' parameter. In addition, by default,
all non-terminal jobs are displayed, but you can limit this to only jobs created or updated before a
certain time with '--older-than='. This option takes a value in the PostgreSQL date/time interval
format, see documentation: https://www.postgresql.org/docs/current/functions-datetime.html
Be sure to quote intervals containing spaces. Finally, by default, the column returned (and filtered
with in the case of '--older-than=') is 'job.create_time', but this can be changed to 'job.update_time'
with '--update-time'. So to return all queued and running jobs that have not been updated in the past 2
days:
$ gxadmin query jobs-nonterminal --states=queued,running --older-than='2 days' --update-time | head -5
id | tool_id | state | update_time | runner | ext_id | handler | user_id
--------+----------------------+---------+---------------------+--------------+--------+------------------+---------
335897 | trinity/2.9.1 | queued | 2021-03-10 10:44:09 | bridges | 335897 | main_w3_handler2 | 599
338554 | repeatmasker/4.0.9 | running | 2021-03-09 10:41:30 | jetstream_iu | 338554 | main_w4_handler2 | 11
338699 | hisat2/2.1.0+galaxy7 | queued | 2021-03-10 05:36:26 | jetstream_iu | 338699 | main_w3_handler2 | 42
EOF
states='new,queued,running'
interval=
user_filter='true'
time_column='create_time'
if (( $# > 0 )); then
for args in "$@"; do
if [ "$args" = '--update-time' ]; then
time_column='update_time'
elif [ "${args:0:9}" = '--states=' ]; then
states="${args:9}"
elif [ "${args:0:13}" = '--older-than=' ]; then
interval="${args:13}"
elif [ "${args:0:2}" != '==' ]; then
user_filter=$(get_user_filter "$1")
fi
done
fi
states="'$(echo "$states" | sed "s/,/', '/g")'"
if [ -n "$interval" ]; then
interval="AND job.$time_column < NOW() - interval '$interval'"
fi
user_id=$(gdpr_safe job.user_id user_id "anon")
read -r -d '' QUERY <<-EOF
SELECT
job.id, job.tool_id, job.state, job.$time_column AT TIME ZONE 'UTC' AS $time_column, job.job_runner_name, job.job_runner_external_id, job.handler, $user_id
FROM
job
LEFT OUTER JOIN
galaxy_user ON job.user_id = galaxy_user.id
WHERE
$user_filter AND job.state IN ($states) $interval
ORDER BY job.id ASC
EOF
}
query_jobs-per-user() { ##? <user>: Number of jobs run by a specific user
handle_help "$@" <<-EOF
$ gxadmin query jobs-per-user helena
count | user_id
-------+---------
999 | 1
(1 row)
EOF
user_filter=$(get_user_filter "$arg_user")
read -r -d '' QUERY <<-EOF
SELECT count(*), user_id
FROM job
WHERE user_id in (
SELECT id
FROM galaxy_user
WHERE $user_filter
)
GROUP BY user_id
EOF
}
query_recent-jobs() { ##? <hours>: Jobs run in the past <hours> (in any state)
handle_help "$@" <<-EOF
$ gxadmin query recent-jobs 2.1
id | create_time | tool_id | state | username
---------+---------------------+-----------------------+-------+-----------------
4383997 | 2018-10-05 16:07:00 | Filter1 | ok |
4383994 | 2018-10-05 16:04:00 | echo_main_condor | ok |
4383993 | 2018-10-05 16:04:00 | echo_main_drmaa | error |
4383992 | 2018-10-05 16:04:00 | echo_main_handler11 | ok |
4383983 | 2018-10-05 16:04:00 | echo_main_handler2 | ok |
4383982 | 2018-10-05 16:04:00 | echo_main_handler1 | ok |
4383981 | 2018-10-05 16:04:00 | echo_main_handler0 | ok |
EOF
username=$(gdpr_safe galaxy_user.username username)
read -r -d '' QUERY <<-EOF
SELECT
job.id,
job.create_time AT TIME ZONE 'UTC' as create_time,
job.tool_id,
job.state, $username
FROM job, galaxy_user
WHERE job.create_time > (now() AT TIME ZONE 'UTC' - '$arg_hours hours'::interval) AND job.user_id = galaxy_user.id
ORDER BY id desc
EOF
}
query_training-list() { ##? [--all]: List known trainings
handle_help "$@" <<-EOF
This module is specific to EU's implementation of Training Infrastructure as a Service. But this specifically just checks for all groups with the name prefix 'training-'
$ gxadmin query training-list
name | created
------------+------------
hogeschool | 2020-01-22
asdf | 2019-08-28
(2 rows)
EOF
d1=""
d2="AND deleted = false"
if [[ -n "$arg_all" ]]; then
d1=", deleted"
d2=""
fi
read -r -d '' QUERY <<-EOF
SELECT
substring(name from 10) as name,
date_trunc('day', create_time AT TIME ZONE 'UTC')::date as created
$d1
FROM galaxy_group
WHERE name like 'training-%' $d2
ORDER BY create_time DESC
EOF
}
query_training-members() { ##? <tr_id>: List users in a specific training
handle_help "$@" <<-EOF
$ gxadmin query training-members hts2018
username | joined
--------------------+---------------------
helena-rasche | 2018-09-21 21:42:01
EOF
# Remove training- if they used it.
ww=$(echo "$arg_tr_id" | sed 's/^training-//g')
username=$(gdpr_safe galaxy_user.username username)
read -r -d '' QUERY <<-EOF
SELECT DISTINCT ON ($username)
$username,
date_trunc('second', user_group_association.create_time AT TIME ZONE 'UTC') as joined
FROM galaxy_user, user_group_association, galaxy_group
WHERE galaxy_group.name = 'training-$ww'
AND galaxy_group.id = user_group_association.group_id
AND user_group_association.user_id = galaxy_user.id
EOF
}
query_training-members-remove() { ##? <training> <username> [--yesdoit]: Remove a user from a training
handle_help "$@" <<-EOF
EOF
# TODO: Move to mutate
# Remove training- if they used it.
ww=$(echo "$arg_training" | sed 's/^training-//g')
if [[ -n $arg_yesdoit ]]; then
results="$(query_tsv "$qstr")"
uga_id=$(echo "$results" | awk -F'\t' '{print $1}')
if (( uga_id > -1 )); then
qstr="delete from user_group_association where id = $uga_id"
fi
echo "$qstr"
else
read -r -d '' QUERY <<-EOF
SELECT
user_group_association.id,
galaxy_user.username,
galaxy_group.name
FROM
user_group_association
LEFT JOIN galaxy_user ON user_group_association.user_id = galaxy_user.id
LEFT JOIN galaxy_group ON galaxy_group.id = user_group_association.group_id
WHERE
galaxy_group.name = 'training-$ww'
AND galaxy_user.username = '$arg_username'
EOF
fi
}
query_largest-histories() { ##? [--human]: Largest histories in Galaxy
handle_help "$@" <<-EOF
Finds all histories and print by decreasing size
$ gxadmin query largest-histories
total_size | id | substring | username
------------+----+------------+----------
17215831 | 6 | Unnamed hi | helena
45433 | 8 | Unnamed hi | helena
42846 | 9 | Unnamed hi | helena
1508 | 10 | Circos | helena
365 | 2 | Tag Testin | helena
158 | 44 | test | helena
16 | 45 | Unnamed hi | alice
Or you can supply the --human flag, but this should not be used with iquery/InfluxDB
$ gxadmin query largest-histories --human
total_size | id | substring | userna
------------+----+------------+-------
16 MB | 6 | Unnamed hi | helena
44 kB | 8 | Unnamed hi | helena
42 kB | 9 | Unnamed hi | helena
1508 bytes | 10 | Circos | helena
365 bytes | 2 | Tag Testin | helena
158 bytes | 44 | test | helena
16 bytes | 45 | Unnamed hi | alice
EOF
username=$(gdpr_safe galaxy_user.username username)
fields="size=0"
tags="id=1;name=2;username=3"
total_size="sum(coalesce(dataset.total_size, dataset.file_size, 0)) as total_size"
if [[ -n "$arg_human" ]]; then
total_size="pg_size_pretty(sum(coalesce(dataset.total_size, dataset.file_size, 0))) as total_size"
fi
read -r -d '' QUERY <<-EOF
SELECT
$total_size,
history.id,
substring(history.name, 1, 10),
$username
FROM
dataset
JOIN history_dataset_association on dataset.id = history_dataset_association.dataset_id
JOIN history on history_dataset_association.history_id = history.id
JOIN galaxy_user on history.user_id = galaxy_user.id
GROUP BY history.id, history.name, history.user_id, galaxy_user.username
ORDER BY sum(coalesce(dataset.total_size, dataset.file_size, 0)) DESC
EOF
}
query_training-queue() { ##? <training_id>: Jobs currently being run by people in a given training
handle_help "$@" <<-EOF
Finds all jobs by people in that queue (including things they are executing that are not part of a training)
$ gxadmin query training-queue hts2018
state | id | extid | tool_id | username | created
--------+---------+--------+---------+---------------+---------------------
queued | 4350274 | 225743 | upload1 | | 2018-09-26 10:00:00
EOF
# Remove training- if they used it.
ww=$(echo "$arg_training_id" | sed 's/^training-//g')
username=$(gdpr_safe galaxy_user.username username)
read -r -d '' QUERY <<-EOF
SELECT
job.state,
job.id,
job.job_runner_external_id AS extid,
job.tool_id,
$username,
job.create_time AT TIME ZONE 'UTC' AS created
FROM
job, galaxy_user
WHERE
job.user_id = galaxy_user.id
AND job.create_time > (now() AT TIME ZONE 'UTC' - '3 hours'::interval)
AND galaxy_user.id
IN (
SELECT
galaxy_user.id
FROM
galaxy_user, user_group_association, galaxy_group
WHERE
galaxy_group.name = 'training-$ww'
AND galaxy_group.id = user_group_association.group_id
AND user_group_association.user_id = galaxy_user.id
)
ORDER BY
job.create_time ASC
EOF
}
query_disk-usage() { ##? [--human]: Disk usage per object store.
handle_help "$@" <<-EOF
Query the different object stores referenced in your Galaxy database
$ gxadmin query disk-usage
object_store_id | sum
-----------------+------------
| 1387857517
(1 row)
Or you can supply the --human flag, but this should not be used with iquery/InfluxDB
$ gxadmin query disk-usage --human
object_store_id | sum
-----------------+------------
| 1324 MB
(1 row)
EOF
fields="count=1"
tags="object_store_id=0"
size="sum(coalesce(dataset.total_size, dataset.file_size, 0))"
if [[ -n "$arg_human" ]]; then
size="pg_size_pretty(sum(coalesce(dataset.total_size, dataset.file_size, 0))) as sum"
fi
read -r -d '' QUERY <<-EOF
SELECT
object_store_id, $size
FROM dataset
WHERE NOT purged
GROUP BY object_store_id
ORDER BY sum(coalesce(dataset.total_size, dataset.file_size, 0)) DESC
EOF
}
query_users-count() { ## : Shows sums of active/external/deleted/purged accounts
handle_help "$@" <<-EOF
active | external | deleted | purged | count
--------+----------+---------+--------+-------
f | f | f | f | 182
t | f | t | t | 2
t | f | t | f | 6
t | f | f | f | 2350
f | f | t | t | 36
EOF
fields="count=4"
tags="active=0;external=1;deleted=2;purged=3"
read -r -d '' QUERY <<-EOF
SELECT
active, external, deleted, purged, count(*) as count
FROM
galaxy_user
GROUP BY
active, external, deleted, purged
EOF
}
query_tool-last-used-date() { ## : When was the most recent invocation of every tool
handle_help "$@" <<-EOF
Example invocation:
$ gxadmin query tool-last-used-date
max | tool_id
---------------------+---------------------------
2019-02-01 00:00:00 | test_history_sanitization
2018-12-01 00:00:00 | require_format
2018-11-01 00:00:00 | upload1
(3 rows)
**WARNING**
!> It is not truly every tool, there is no easy way to find the tools which have never been run.
EOF
read -r -d '' QUERY <<-EOF
select max(date_trunc('month', create_time AT TIME ZONE 'UTC')), tool_id from job group by tool_id order by max desc
EOF
}
query_users-total() { ## : Total number of Galaxy users (incl deleted, purged, inactive)
handle_help "$@" <<-EOF
EOF
fields="count=0"
tags=""
read -r -d '' QUERY <<-EOF
SELECT count(*) FROM galaxy_user
EOF
}
query_groups-list() { ## : List all groups known to Galaxy
handle_help "$@" <<-EOF
EOF
fields="count=1"
tags="group_name=0"
read -r -d '' QUERY <<-EOF
SELECT
galaxy_group.name, count(*)
FROM
galaxy_group, user_group_association
WHERE
user_group_association.group_id = galaxy_group.id
GROUP BY name
EOF
}
query_collection-usage() { ## : Information about how many collections of various types are used
handle_help "$@" <<-EOF
EOF
fields="count=1"
tags="group_name=0"
read -r -d '' QUERY <<-EOF
SELECT
dc.collection_type, count(*)
FROM
history_dataset_collection_association as hdca
INNER JOIN
dataset_collection as dc
ON hdca.collection_id = dc.id
GROUP BY
dc.collection_type
EOF
}
query_ts-repos() { ## : Counts of toolshed repositories by toolshed and owner.
handle_help "$@" <<-EOF
EOF
fields="count=2"
tags="tool_shed=0;owner=1"
read -r -d '' QUERY <<-EOF
SELECT
tool_shed, owner, count(*)
FROM
tool_shed_repository
GROUP BY
tool_shed, owner
EOF
}
query_tool-metrics() { ##? <tool_id> <metric_id> [--like]: See values of a specific metric
handle_help "$@" <<-EOF
A good way to use this is to fetch the memory usage of a tool and then
do some aggregations. The following requires [data_hacks](https://github.com/bitly/data_hacks)
$ gxadmin tsvquery tool-metrics %rgrnastar/rna_star% memory.max_usage_in_bytes --like | \\
awk '{print \$1 / 1024 / 1024 / 1024}' | \\
histogram.py --percentage
# NumSamples = 441; Min = 2.83; Max = 105.88
# Mean = 45.735302; Variance = 422.952289; SD = 20.565804; Median 51.090900
# each ∎ represents a count of 1
2.8277 - 13.1324 [ 15]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ (3.40%)
13.1324 - 23.4372 [ 78]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ (17.69%)
23.4372 - 33.7419 [ 47]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ (10.66%)
33.7419 - 44.0466 [ 31]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ (7.03%)
44.0466 - 54.3514 [ 98]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ (22.22%)
54.3514 - 64.6561 [ 102]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ (23.13%)
64.6561 - 74.9608 [ 55]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ (12.47%)
74.9608 - 85.2655 [ 11]: ∎∎∎∎∎∎∎∎∎∎∎ (2.49%)
85.2655 - 95.5703 [ 3]: ∎∎∎ (0.68%)
95.5703 - 105.8750 [ 1]: ∎ (0.23%)
EOF
tool_subquery="SELECT id FROM job WHERE tool_id = '$arg_tool_id'"
if [[ -n "$arg_like" ]]; then
tool_subquery="SELECT id FROM job WHERE tool_id like '$arg_tool_id'"
fi
read -r -d '' QUERY <<-EOF
SELECT
metric_value
FROM job_metric_numeric
WHERE
metric_name = '$arg_metric_id'
and
job_id in (
$tool_subquery
)
EOF
}
query_tool-available-metrics() { ##? <tool_id>: list all available metrics for a given tool
handle_help "$@" <<-EOF
Gives a list of available metrics, which can then be used to query.
$ gxadmin query tool-available-metrics upload1
metric_name
-------------------------------------
memory.stat.total_rss
memory.stat.total_swap
memory.stat.total_unevictable
memory.use_hierarchy
...
EOF
read -r -d '' QUERY <<-EOF
SELECT
distinct metric_name
FROM job_metric_numeric
WHERE job_id in (
SELECT id FROM job WHERE tool_id = '$arg_tool_id'
)
ORDER BY metric_name asc
EOF
}
query_tool-memory-per-inputs() { ##? <tool_id> [--like]: See memory usage and inout size data
handle_help "$@" <<-EOF
Display details about tool input counts and sizes along with memory usage and the relation between them,
to aid in determining appropriate memory allocations for tools.
$ gxadmin query tool-memory-per-inputs %/unicycler/% --like
id | tool_id | input_count | total_input_size_mb | mean_input_size_mb | median_input_size_mb | memory_used_mb | memory_used_per_input_mb | memory_mean_input_ratio | memory_median_input_ratio
----------+--------------------------------------------------------------+-------------+---------------------+--------------------+----------------------+----------------+--------------------------+-------------------------+---------------------------
34663027 | toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.8.0 | 2 | 245 | 122 | 122 | 4645 | 19 | 38 | 38
34657045 | toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.8.0 | 2 | 51 | 25 | 25 | 1739 | 34 | 68 | 68
34655863 | toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.8.0 | 2 | 1829 | 915 | 915 | 20635 | 11 | 23 | 23
34650581 | toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.8.0 | 3 | 235 | 78 | 112 | 30550 | 130 | 391 | 274
34629187 | toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.8.0 | 2 | 2411 | 1206 | 1206 | 50018 | 21 | 41 | 41
A good way to use this is to fetch the data and then do some aggregations. The following requires
[data_hacks](https://github.com/bitly/data_hacks):
$ gxadmin tsvquery tool-memory-per-inputs %/unicycler/% --like | \\
awk '{print \$10}' | histogram.py --percentage --max=256
# NumSamples = 870; Min = 4.00; Max = 256.00
# 29 values outside of min/max
# Mean = 67.804598; Variance = 15461.789404; SD = 124.345444; Median 37.000000
# each ∎ represents a count of 4
4.0000 - 29.2000 [ 368]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ (42.30%)
29.2000 - 54.4000 [ 226]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ (25.98%)
54.4000 - 79.6000 [ 133]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ (15.29%)
79.6000 - 104.8000 [ 45]: ∎∎∎∎∎∎∎∎∎∎∎ (5.17%)
104.8000 - 130.0000 [ 28]: ∎∎∎∎∎∎∎ (3.22%)
130.0000 - 155.2000 [ 12]: ∎∎∎ (1.38%)
155.2000 - 180.4000 [ 9]: ∎∎ (1.03%)
180.4000 - 205.6000 [ 6]: ∎ (0.69%)
205.6000 - 230.8000 [ 10]: ∎∎ (1.15%)
230.8000 - 256.0000 [ 4]: ∎ (0.46%)
EOF
tool_clause="j.tool_id = '$arg_tool_id'"
if [[ -n "$arg_like" ]]; then
tool_clause="j.tool_id like '$arg_tool_id'"
fi
read -r -d '' QUERY <<-EOF
WITH job_cte AS (
SELECT
j.id,
j.tool_id
FROM
job j
WHERE
$tool_clause
AND
j.state = 'ok'
),
mem_cte AS (
SELECT
j.id,
jmn.metric_value AS memory_used
FROM
job_cte j
JOIN
job_metric_numeric jmn ON j.id = jmn.job_id
WHERE
jmn.plugin = 'cgroup'
AND
jmn.metric_name = 'memory.memsw.max_usage_in_bytes'
),
data_cte AS (
SELECT
j.id,
count(jtid.id) AS input_count,
sum(d.total_size) AS total_input_size,
avg(d.total_size) AS mean_input_size,
percentile_cont(0.5) WITHIN GROUP (ORDER BY d.total_size) AS median_input_size
FROM
job_cte j
JOIN
job_to_input_dataset jtid ON j.id = jtid.job_id
JOIN
history_dataset_association hda ON jtid.dataset_id = hda.id
JOIN
dataset d ON hda.dataset_id = d.id
GROUP BY
j.id
)
SELECT
j.*,
d.input_count,
(d.total_input_size/1024/1024)::bigint AS total_input_size_mb,
(d.mean_input_size/1024/1024)::bigint AS mean_input_size_mb,
(d.median_input_size/1024/1024)::bigint AS median_input_size_mb,
(m.memory_used/1024/1024)::bigint AS memory_used_mb,
(m.memory_used/d.total_input_size)::bigint AS memory_used_per_input_mb,
(m.memory_used/d.mean_input_size)::bigint AS memory_mean_input_ratio,
(m.memory_used/d.median_input_size)::bigint AS memory_median_input_ratio
FROM
job_cte j
JOIN
mem_cte m on j.id = m.id
JOIN
data_cte d on j.id = d.id
ORDER BY
j.id DESC
EOF
}
query_monthly-cpu-stats() { ##? [year] : CPU years/hours allocated to tools by month
handle_help "$@" <<-EOF
This uses the galaxy_slots and runtime_seconds metrics in order to
calculate allocated CPU years/hours. This will not be the value of what is
actually consumed by your jobs, you should use cgroups.
$ gxadmin query monthly-cpu-stats
month | cpu_years | cpu_hours
------------+-----------+-----------
2020-05-01 | 53.55 | 469088.02
2020-04-01 | 59.55 | 521642.60
2020-03-01 | 57.04 | 499658.86
2020-02-01 | 53.93 | 472390.31
2020-01-01 | 56.49 | 494887.37
...
EOF
if [ ! -z $arg_year ] && date -d "$arg_year" >/dev/null
then
filter_by_year="date_trunc('year', job.create_time AT TIME ZONE 'UTC') = '$arg_year-01-01'::date"
fi
read -r -d '' QUERY <<-EOF
SELECT
date_trunc('month', job.create_time AT TIME ZONE 'UTC')::date as month,
round(sum((a.metric_value * b.metric_value) / 3600 / 24 / 365 ), 2) as cpu_years,
round(sum((a.metric_value * b.metric_value) / 3600 ), 2) as cpu_hours
FROM
job_metric_numeric a,
job_metric_numeric b,
job
WHERE
b.job_id = a.job_id
AND a.job_id = job.id
AND a.metric_name = 'runtime_seconds'
AND b.metric_name = 'galaxy_slots'
$filter_by_year
GROUP BY month
ORDER BY month DESC
EOF
}
query_monthly-cpu-years() { ## : CPU years allocated to tools by month
handle_help "$@" <<-EOF
This uses the galaxy_slots and runtime_seconds metrics in order to
calculate allocated CPU years. This will not be the value of what is
actually consumed by your jobs, you should use cgroups.
$ gxadmin query monthly-cpu-years
month | cpu_years
------------+-----------
2019-04-01 | 2.95
2019-03-01 | 12.38
2019-02-01 | 11.47
2019-01-01 | 8.27
2018-12-01 | 11.42
2018-11-01 | 16.99
2018-10-01 | 12.09
2018-09-01 | 6.27
2018-08-01 | 9.06
2018-07-01 | 6.17
2018-06-01 | 5.73
2018-05-01 | 7.36
2018-04-01 | 10.21
2018-03-01 | 5.20
2018-02-01 | 4.53
2018-01-01 | 4.05
2017-12-01 | 2.44
EOF
read -r -d '' QUERY <<-EOF
SELECT
date_trunc('month', job.create_time)::date as month,
round(sum((a.metric_value * b.metric_value) / 3600 / 24 / 365), 2) as cpu_years
FROM
job_metric_numeric a,
job_metric_numeric b,
job
WHERE
b.job_id = a.job_id
AND a.job_id = job.id
AND a.metric_name = 'runtime_seconds'
AND b.metric_name = 'galaxy_slots'
GROUP BY date_trunc('month', job.create_time)
ORDER BY date_trunc('month', job.create_time) DESC
EOF
}
query_monthly-data(){ ##? [year] [--human]: Number of active users per month, running jobs
handle_help "$@" <<-EOF
Find out how much data was ingested or created by Galaxy during the past months.
$ gxadmin query monthly-data 2018
month | pg_size_pretty
------------+----------------
2018-12-01 | 62 TB
2018-11-01 | 50 TB
2018-10-01 | 59 TB
2018-09-01 | 32 TB
2018-08-01 | 26 TB
2018-07-01 | 42 TB
2018-06-01 | 34 TB
2018-05-01 | 33 TB
2018-04-01 | 27 TB
2018-03-01 | 32 TB
2018-02-01 | 18 TB
2018-01-01 | 16 TB
EOF
size="sum(coalesce(dataset.total_size, dataset.file_size, 0))"
if [[ -n $arg_human ]]; then
size="pg_size_pretty(sum(coalesce(dataset.total_size, dataset.file_size, 0)))"
fi
if [[ -n $arg_year ]]; then
where="WHERE date_trunc('year', dataset.create_time AT TIME ZONE 'UTC') = '$arg_year-01-01'::date"
fi
read -r -d '' QUERY <<-EOF
SELECT
date_trunc('month', dataset.create_time AT TIME ZONE 'UTC')::date AS month,
$size
FROM
dataset
$where
GROUP BY
month
ORDER BY
month DESC
EOF
}
query_monthly-gpu-years() { ## : GPU years allocated to tools by month
handle_help "$@" <<-EOF
This uses the CUDA_VISIBLE_DEVICES and runtime_seconds metrics in order to
calculate allocated GPU years. This will not be the value of what is
actually consumed by your jobs, you should use cgroups. Only works if the
environment variable 'CUDA_VISIBLE_DEVICES' is recorded as job metric by Galaxy.
Requires Nvidia GPUs.
$ gxadmin query monthly-gpu-years
month | gpu_years
------------+-----------
2019-04-01 | 2.95
2019-03-01 | 12.38
2019-02-01 | 11.47
2019-01-01 | 8.27
2018-12-01 | 11.42
2018-11-01 | 16.99
2018-10-01 | 12.09
2018-09-01 | 6.27
2018-08-01 | 9.06
2018-07-01 | 6.17
2018-06-01 | 5.73
2018-05-01 | 7.36
2018-04-01 | 10.21
2018-03-01 | 5.20
2018-02-01 | 4.53
2018-01-01 | 4.05
2017-12-01 | 2.44
EOF
read -r -d '' QUERY <<-EOF
SELECT
date_trunc('month', job.create_time)::date as month,
round(sum((a.metric_value * length(replace(b.metric_value, ',', ''))) / 3600 / 24 / 365), 2) as gpu_years
FROM
job_metric_numeric a,
job_metric_text b,
job
WHERE
b.job_id = a.job_id
AND a.job_id = job.id
AND a.metric_name = 'runtime_seconds'
AND b.metric_name = 'CUDA_VISIBLE_DEVICES'
GROUP BY date_trunc('month', job.create_time)
ORDER BY date_trunc('month', job.create_time) DESC
EOF
}
query_user-cpu-years() { ## : CPU years allocated to tools by user
handle_help "$@" <<-EOF
This uses the galaxy_slots and runtime_seconds metrics in order to
calculate allocated CPU years. This will not be the value of what is
actually consumed by your jobs, you should use cgroups.
rank | user_id | username | cpu_years
----- | ------- | ----------- | ----------
1 | | 123f911b5f1 | 20.35
2 | | cb0fabc0002 | 14.93
3 | | 7e9e9b00b89 | 14.24
4 | | 42f211e5e87 | 14.06
5 | | 26cdba62c93 | 12.97
6 | | fa87cddfcae | 7.01
7 | | 44d2a648aac | 6.70
8 | | 66c57b41194 | 6.43
9 | | 6b1467ac118 | 5.45
10 | | d755361b59a | 5.19
EOF
username=$(gdpr_safe galaxy_user.username username 'Anonymous')
read -r -d '' QUERY <<-EOF
SELECT
row_number() OVER (ORDER BY round(sum((a.metric_value * b.metric_value) / 3600 / 24 / 365), 2) DESC) as rank,
job.user_id,
$username,
round(sum((a.metric_value * b.metric_value) / 3600 / 24 / 365), 2) as cpu_years
FROM
job_metric_numeric a,
job_metric_numeric b,
job
FULL OUTER JOIN galaxy_user ON job.user_id = galaxy_user.id
WHERE
b.job_id = a.job_id
AND a.job_id = job.id
AND a.metric_name = 'runtime_seconds'
AND b.metric_name = 'galaxy_slots'
GROUP BY job.user_id, galaxy_user.username
ORDER BY round(sum((a.metric_value * b.metric_value) / 3600 / 24 / 365), 2) DESC
LIMIT 50
EOF
}
query_user-gpu-years() { ## : GPU years allocated to tools by user
handle_help "$@" <<-EOF
This uses the CUDA_VISIBLE_DEVICES and runtime_seconds metrics in order to
calculate allocated GPU years. This will not be the value of what is
actually consumed by your jobs, you should use cgroups. Only works if the
environment variable 'CUDA_VISIBLE_DEVICES' is recorded as job metric by Galaxy.
Requires Nvidia GPUs.
rank | user_id | username | gpu_years
----- | ------- | ----------- | ----------
1 | | 123f911b5f1 | 20.35
2 | | cb0fabc0002 | 14.93
3 | | 7e9e9b00b89 | 14.24
4 | | 42f211e5e87 | 14.06
5 | | 26cdba62c93 | 12.97
6 | | fa87cddfcae | 7.01
7 | | 44d2a648aac | 6.70
8 | | 66c57b41194 | 6.43
9 | | 6b1467ac118 | 5.45
10 | | d755361b59a | 5.19
EOF
username=$(gdpr_safe galaxy_user.username username 'Anonymous')
read -r -d '' QUERY <<-EOF
SELECT
row_number() OVER (ORDER BY round(sum((a.metric_value * length(replace(b.metric_value, ',', ''))) / 3600 / 24 / 365), 2) DESC) as rank,
job.user_id,
$username,
round(sum((a.metric_value * length(replace(b.metric_value, ',', ''))) / 3600 / 24 / 365), 2) as gpu_years
FROM
job_metric_numeric a,
job_metric_text b,
job
FULL OUTER JOIN galaxy_user ON job.user_id = galaxy_user.id
WHERE
b.job_id = a.job_id
AND a.job_id = job.id
AND a.metric_name = 'runtime_seconds'
AND b.metric_name = 'CUDA_VISIBLE_DEVICES'
GROUP BY job.user_id, galaxy_user.username
ORDER BY round(sum((a.metric_value * length(replace(b.metric_value, ',', ''))) / 3600 / 24 / 365), 2) DESC
LIMIT 50
EOF
}
query_user-disk-usage() { ##? [--human] [--use-precalc]: Retrieve an approximation of the disk usage for users
handle_help "$@" <<-EOF
This uses the dataset size and the history association in order to
calculate total disk usage for a user. This is currently limited
to the 50 users with the highest storage usage.
By default it prints the storage usage in bytes but you can use --human:
rank | user id | username | email | storage usage
----- | -------- | ----------- | ----------- | -------------
1 | 5 | 123f911b5f1 | 123@911.5f1 | 20.35 TB
2 | 6 | cb0fabc0002 | cb0@abc.002 | 14.93 TB
3 | 9 | 7e9e9b00b89 | 7e9@9b0.b89 | 14.24 TB
4 | 11 | 42f211e5e87 | 42f@11e.e87 | 14.06 GB
5 | 2 | 26cdba62c93 | 26c@ba6.c93 | 12.97 GB
6 | 1005 | fa87cddfcae | fa8@cdd.cae | 7.01 GB
7 | 2009 | 44d2a648aac | 44d@a64.aac | 6.70 GB
8 | 432 | 66c57b41194 | 66c@7b4.194 | 6.43 GB
9 | 58945 | 6b1467ac118 | 6b1@67a.118 | 5.45 MB
10 | 10 | d755361b59a | d75@361.59a | 5.19 KB
A flag, --use-precalc, is provided which reads the disk_usage column of the galaxy_user table, using the values precisely as displayed to users in Galaxy.
EOF
username=$(gdpr_safe galaxy_user.username user_name 'Anonymous')
useremail=$(gdpr_safe galaxy_user.email user_email 'Anonymous')
fields="size=4"
tags="userid=1;username=2"
if [[ -n $arg_use_precalc ]]; then
size="disk_usage as \"storage usage\""
if [[ -n $arg_human ]]; then
size="pg_size_pretty(disk_usage) as \"storage usage\""
fi
read -r -d '' QUERY <<-EOF
SELECT
row_number() OVER (ORDER BY galaxy_user.disk_usage DESC) as rank,
galaxy_user.id as "user id",
$username,
$useremail,
$size
FROM
galaxy_user
GROUP BY galaxy_user.id
ORDER BY 1
LIMIT 50
EOF
else
size="sum(coalesce(dataset.total_size, dataset.file_size, 0)) as \"storage usage\""
if [[ -n $arg_human ]]; then
size="pg_size_pretty(sum(coalesce(dataset.total_size, dataset.file_size, 0))) as \"storage usage\""
fi
read -r -d '' QUERY <<-EOF
SELECT
row_number() OVER (ORDER BY sum(coalesce(dataset.total_size, dataset.file_size, 0)) DESC) as rank,
galaxy_user.id as "user id",
$username,
$useremail,
$size
FROM
dataset,
galaxy_user,
history_dataset_association,
history
WHERE
NOT dataset.purged
AND dataset.id = history_dataset_association.dataset_id
AND history_dataset_association.history_id = history.id
AND history.user_id = galaxy_user.id
GROUP BY galaxy_user.id
ORDER BY 1
LIMIT 50
EOF
fi
}
query_user-disk-quota() { ## : Retrieves the 50 users with the largest quotas
handle_help "$@" <<-EOF
This calculates the total assigned disk quota to users.
It only displays the top 50 quotas.
rank | user_id | username | quota
----- | -------- | ------------ | ------------
1 | | 123f911b5f1 | 20.35
2 | | cb0fabc0002 | 14.93
3 | | 7e9e9b00b89 | 14.24
4 | | 42f211e5e87 | 14.06
5 | | 26cdba62c93 | 12.97
6 | | fa87cddfcae | 7.01
7 | | 44d2a648aac | 6.70
8 | | 66c57b41194 | 6.43
9 | | 6b1467ac118 | 5.45
10 | | d755361b59a | 5.19
EOF
username=$(gdpr_safe galaxy_user.username username 'Anonymous')
read -r -d '' QUERY <<-EOF
WITH user_basequota_list AS (
SELECT galaxy_user.id as "user_id",
basequota.bytes as "quota"
FROM galaxy_user,
quota basequota,
user_quota_association
WHERE galaxy_user.id = user_quota_association.user_id
AND basequota.id = user_quota_association.quota_id
AND basequota.operation = '='
AND NOT basequota.deleted
GROUP BY galaxy_user.id, basequota.bytes
),
user_basequota AS (
SELECT user_basequota_list.user_id,
MAX(user_basequota_list.quota) as "quota"
FROM user_basequota_list
GROUP BY user_basequota_list.user_id
),
user_addquota_list AS (
SELECT galaxy_user.id as "user_id",
addquota.bytes as "quota"
FROM galaxy_user,
quota addquota,
user_quota_association
WHERE galaxy_user.id = user_quota_association.user_id
AND addquota.id = user_quota_association.quota_id
AND addquota.operation = '+'
AND NOT addquota.deleted
GROUP BY galaxy_user.id, addquota.bytes
),
user_addquota AS (
SELECT user_addquota_list.user_id,
sum(user_addquota_list.quota) AS "quota"
FROM user_addquota_list
GROUP BY user_addquota_list.user_id
),
user_minquota_list AS (
SELECT galaxy_user.id as "user_id",
minquota.bytes as "quota"
FROM galaxy_user,
quota minquota,
user_quota_association
WHERE galaxy_user.id = user_quota_association.user_id
AND minquota.id = user_quota_association.quota_id
AND minquota.operation = '-'
AND NOT minquota.deleted
GROUP BY galaxy_user.id, minquota.bytes
),
user_minquota AS (
SELECT user_minquota_list.user_id,
sum(user_minquota_list.quota) AS "quota"
FROM user_minquota_list
GROUP BY user_minquota_list.user_id
),
group_basequota_list AS (
SELECT galaxy_user.id as "user_id",
galaxy_group.id as "group_id",
basequota.bytes as "quota"
FROM galaxy_user,
galaxy_group,
quota basequota,
group_quota_association,
user_group_association
WHERE galaxy_user.id = user_group_association.user_id
AND galaxy_group.id = user_group_association.group_id
AND basequota.id = group_quota_association.quota_id
AND galaxy_group.id = group_quota_association.group_id
AND basequota.operation = '='
AND NOT basequota.deleted
GROUP BY galaxy_user.id, galaxy_group.id, basequota.bytes
),
group_basequota AS (
SELECT group_basequota_list.user_id,
group_basequota_list.group_id,
MAX(group_basequota_list.quota) as "quota"
FROM group_basequota_list
GROUP BY group_basequota_list.user_id, group_basequota_list.group_id
),
group_addquota_list AS (
SELECT galaxy_user.id as "user_id",
addquota.bytes as "quota"
FROM galaxy_user,
galaxy_group,
quota addquota,
group_quota_association,
user_group_association
WHERE galaxy_user.id = user_group_association.user_id
AND galaxy_group.id = user_group_association.group_id
AND addquota.id = group_quota_association.quota_id
AND galaxy_group.id = group_quota_association.group_id
AND addquota.operation = '+'
AND NOT addquota.deleted
GROUP BY galaxy_user.id, addquota.bytes
),
group_addquota AS (
SELECT group_addquota_list.user_id,
sum(group_addquota_list.quota) AS "quota"
FROM group_addquota_list
GROUP BY group_addquota_list.user_id
),
group_minquota_list AS (
SELECT galaxy_user.id as "user_id",
minquota.bytes as "quota"
FROM galaxy_user,
galaxy_group,
quota minquota,
group_quota_association,
user_group_association
WHERE galaxy_user.id = user_group_association.user_id
AND galaxy_group.id = user_group_association.group_id
AND minquota.id = group_quota_association.quota_id
AND galaxy_group.id = group_quota_association.group_id
AND minquota.operation = '-'
AND NOT minquota.deleted
GROUP BY galaxy_user.id, galaxy_group.id, galaxy_group.name, minquota.bytes
),
group_minquota AS (
SELECT group_minquota_list.user_id,
sum(group_minquota_list.quota) AS "quota"
FROM group_minquota_list
GROUP BY group_minquota_list.user_id
),
all_user_default_quota AS (
SELECT galaxy_user.id as "user_id",
quota.bytes
FROM galaxy_user,
quota
WHERE quota.id = (SELECT quota_id FROM default_quota_association)
),
quotas AS (
SELECT all_user_default_quota.user_id as "aud_uid",
all_user_default_quota.bytes as "aud_quota",
user_basequota.user_id as "ubq_uid",
user_basequota.quota as "ubq_quota",
user_addquota.user_id as "uaq_uid",
user_addquota.quota as "uaq_quota",
user_minquota.user_id as "umq_uid",
user_minquota.quota as "umq_quota",
group_basequota.user_id as "gbq_uid",
group_basequota.quota as "gbq_quota",
group_addquota.user_id as "gaq_uid",
group_addquota.quota as "gaq_quota",
group_minquota.user_id as "gmq_uid",
group_minquota.quota as "gmq_quota"
FROM all_user_default_quota
FULL OUTER JOIN user_basequota ON all_user_default_quota.user_id = user_basequota.user_id
FULL OUTER JOIN user_addquota ON all_user_default_quota.user_id = user_addquota.user_id
FULL OUTER JOIN user_minquota ON all_user_default_quota.user_id = user_minquota.user_id
FULL OUTER JOIN group_basequota ON all_user_default_quota.user_id = group_basequota.user_id
FULL OUTER JOIN group_addquota ON all_user_default_quota.user_id = group_addquota.user_id
FULL OUTER JOIN group_minquota ON all_user_default_quota.user_id = group_minquota.user_id
),
computed_quotas AS (
SELECT aud_uid as "user_id",
COALESCE(GREATEST(ubq_quota, gbq_quota), aud_quota) as "base_quota",
(COALESCE(uaq_quota, 0) + COALESCE(gaq_quota, 0)) as "add_quota",
(COALESCE(umq_quota, 0) + COALESCE(gmq_quota, 0)) as "min_quota"
FROM quotas
)
SELECT row_number() OVER (ORDER BY (computed_quotas.base_quota + computed_quotas.add_quota - computed_quotas.min_quota) DESC) as rank,
galaxy_user.id as "user_id",
$username,
pg_size_pretty(computed_quotas.base_quota + computed_quotas.add_quota - computed_quotas.min_quota) as "quota"
FROM computed_quotas,
galaxy_user
WHERE computed_quotas.user_id = galaxy_user.id
GROUP BY galaxy_user.id, galaxy_user.username, computed_quotas.base_quota, computed_quotas.add_quota, computed_quotas.min_quota
ORDER BY (computed_quotas.base_quota + computed_quotas.add_quota - computed_quotas.min_quota) DESC
LIMIT 50
EOF
}
query_group-cpu-seconds() { ##? [group]: Retrieve an approximation of the CPU time in seconds for group(s)
handle_help "$@" <<-EOF
This uses the galaxy_slots and runtime_seconds metrics in order to
calculate allocated CPU time in seconds. This will not be the value of
what is actually consumed by jobs of the group, you should use cgroups instead.
rank | group_id | group_name | cpu_seconds
----- | -------- | ------------ | ------------
1 | | 123f911b5f1 | 20.35
2 | | cb0fabc0002 | 14.93
3 | | 7e9e9b00b89 | 14.24
4 | | 42f211e5e87 | 14.06
5 | | 26cdba62c93 | 12.97
6 | | fa87cddfcae | 7.01
7 | | 44d2a648aac | 6.70
8 | | 66c57b41194 | 6.43
9 | | 6b1467ac118 | 5.45
10 | | d755361b59a | 5.19
EOF
where=""
if [[ -n $arg_group ]]; then
where="AND galaxy_group.name = '$arg_group'"
fi
groupname=$(gdpr_safe galaxy_group.name group_name 'Anonymous')
read -r -d '' QUERY <<-EOF
WITH jobs_info AS (
SELECT job.user_id,
round(sum(a.metric_value * b.metric_value), 2) AS cpu_seconds
FROM job_metric_numeric AS a,
job_metric_numeric AS b,
job
WHERE job.id = a.job_id
AND job.id = b.job_id
AND a.metric_name = 'runtime_seconds'
AND b.metric_name = 'galaxy_slots'
GROUP BY job.user_id
), user_job_info AS (
SELECT user_id,
sum(cpu_seconds) AS cpu_seconds
FROM jobs_info
GROUP BY user_id
)
SELECT row_number() OVER (ORDER BY round(sum(user_job_info.cpu_seconds), 0) DESC) as rank,
galaxy_group.id as group_id,
$groupname,
round(sum(user_job_info.cpu_seconds), 0) as cpu_seconds
FROM user_job_info,
galaxy_group,
user_group_association
WHERE user_job_info.user_id = user_group_association.user_id
AND user_group_association.group_id = galaxy_group.id
$where
GROUP BY galaxy_group.id, galaxy_group.name
ORDER BY round(sum(user_job_info.cpu_seconds), 0) DESC
LIMIT 50
EOF
}
query_group-gpu-time() { ##? [group]: Retrieve an approximation of the GPU time for users
handle_help "$@" <<-EOF
This uses the galaxy_slots and runtime_seconds metrics in order to
calculate allocated GPU time. This will not be the value of what is
actually consumed by jobs of the group, you should use cgroups instead.
Only works if the environment variable 'CUDA_VISIBLE_DEVICES' is
recorded as job metric by Galaxy. Requires Nvidia GPUs.
rank | group_id | group_name | gpu_seconds
----- | -------- | ----------- | -----------
1 | | 123f911b5f1 | 20.35
2 | | cb0fabc0002 | 14.93
3 | | 7e9e9b00b89 | 14.24
4 | | 42f211e5e87 | 14.06
5 | | 26cdba62c93 | 12.97
6 | | fa87cddfcae | 7.01
7 | | 44d2a648aac | 6.70
8 | | 66c57b41194 | 6.43
9 | | 6b1467ac118 | 5.45
10 | | d755361b59a | 5.19
EOF
where=""
if [[ -n $arg_group ]]; then
where="AND galaxy_group.name = '$arg_group'"
fi
groupname=$(gdpr_safe galaxy_group.name group_name 'Anonymous')
read -r -d '' QUERY <<-EOF
WITH jobs_info AS (
SELECT job.user_id,
round(sum(a.metric_value * length(replace(b.metric_value, ',', ''))), 2) AS gpu_seconds
FROM job_metric_numeric AS a,
job_metric_text AS b,
job
WHERE job.id = a.job_id
AND job.id = b.job_id
AND a.metric_name = 'runtime_seconds'
AND b.metric_name = 'CUDA_VISIBLE_DEVICES'
GROUP BY job.user_id
), user_job_info AS (
SELECT user_id,
sum(gpu_seconds) AS gpu_seconds
FROM jobs_info
GROUP BY user_id
)
SELECT row_number() OVER (ORDER BY round(sum(user_job_info.gpu_seconds), 0) DESC) as rank,
galaxy_group.id as group_id,
$groupname,
round(sum(user_job_info.gpu_seconds), 0) as gpu_seconds
FROM user_job_info,
galaxy_group,
user_group_association
WHERE user_job_info.user_id = user_group_association.user_id
AND user_group_association.group_id = galaxy_group.id
$where
GROUP BY galaxy_group.id, galaxy_group.name
ORDER BY round(sum(user_job_info.gpu_seconds), 0) DESC
LIMIT 50
EOF
}
query_monthly-users-registered(){ ## [year] [--by_group]: Number of users registered each month
handle_help "$@" <<-EOF
Number of users that registered each month. **NOTE**: Does not include anonymous users or users in no group.
Parameters:
--by_group: Will separate out registrations by galaxy user group as well
year: Will return monthly user registrations from the start of [year] till now
$ gxadmin query monthly-users 2020 --by_group
month | Group name | count
------------+------------+-------
2020-08-01 | Group_1 | 1
2020-08-01 | Group_2 | 1
2020-08-01 | Group_3 | 1
2020-08-01 | Group_4 | 3
2020-07-01 | Group_1 | 1
2020-07-01 | Group_2 | 6
2020-07-01 | Group_3 | 2
2020-07-01 | Group_4 | 6
2020-07-01 | Group_5 | 2
2020-07-01 | Group_6 | 1
...
EOF
if (( $# > 0 )); then
for args in "$@"; do
if [ "$args" = "--by_group" ]; then
where_g="galaxy_user.id = user_group_association.user_id and galaxy_group.id = user_group_association.group_id"
select="galaxy_group.name,"
from="galaxy_group, user_group_association,"
group=", galaxy_group.name"
else
where_y="date_trunc('year', galaxy_user.create_time AT TIME ZONE 'UTC') = '$args-01-01'::date"
fi
done
if (( $# > 1 )); then
where="WHERE $where_y and $where_g"
else
where="WHERE $where_y $where_g"
fi
fi
read -r -d '' QUERY <<-EOF
SELECT
date_trunc('month', galaxy_user.create_time)::DATE AS month,
$select
count(*)
FROM
$from
galaxy_user
$where
GROUP BY
month
$group
ORDER BY
month DESC
EOF
}
query_monthly-users-active(){ ## [year] [--by_group]: Number of active users per month, running jobs
handle_help "$@" <<-EOF
Number of unique users each month who ran jobs. **NOTE**: does not include anonymous users.
Parameters:
--by_group: Separate out active users by galaxy user group
year: Will return monthly active users from the start of [year] till now
$ gxadmin query monthly-users-active 2018
month | active_users
------------+--------------
2018-12-01 | 811
2018-11-01 | 658
2018-10-01 | 583
2018-09-01 | 444
2018-08-01 | 342
2018-07-01 | 379
2018-06-01 | 370
2018-05-01 | 330
2018-04-01 | 274
2018-03-01 | 186
2018-02-01 | 168
2018-01-01 | 122
EOF
if (( $# > 0 )); then
for args in "$@"; do
if [ "$args" = "--by_group" ]; then
where_g="job.user_id = user_group_association.user_id and user_group_association.group_id = galaxy_group.id"
select="galaxy_group.name,"
from=", user_group_association, galaxy_group"
group=", galaxy_group.name"
else
where_y="date_trunc('year', job.create_time AT TIME ZONE 'UTC') = '$args-01-01'::date"
fi
done
if (( $# > 1 )); then
where="WHERE $where_y and $where_g"
else
where="WHERE $where_y $where_g"
fi
fi
read -r -d '' QUERY <<-EOF
SELECT
date_trunc('month', job.create_time AT TIME ZONE 'UTC')::date as month,
$select
count(distinct job.user_id) as active_users
FROM
job
$from
$where
GROUP BY
month
$group
ORDER BY
month DESC
EOF
}
query_monthly-jobs(){ ## [year] [--by_group]: Number of jobs run each month
handle_help "$@" <<-EOF
Count jobs run each month
Parameters:
--by_group: Will separate out job counts for each month by galaxy user group
year: Will return number of monthly jobs run from the start of [year] till now
$ gxadmin query monthly-jobs 2018
month | count
------------+--------
2018-12-01 | 96941
2018-11-01 | 94625
2018-10-01 | 156940
2018-09-01 | 103331
2018-08-01 | 128658
2018-07-01 | 90852
2018-06-01 | 230470
2018-05-01 | 182331
2018-04-01 | 109032
2018-03-01 | 197125
2018-02-01 | 260931
2018-01-01 | 25378
EOF
if (( $# > 0 )); then
for args in "$@"; do
if [ "$args" = "--by_group" ]; then
where_g="job.user_id = user_group_association.user_id and galaxy_group.id = user_group_association.group_id"
select="galaxy_group.name,"
from="galaxy_group, user_group_association,"
group=", galaxy_group.name"
else
where_y="date_trunc('year', job.create_time AT TIME ZONE 'UTC') = '$args-01-01'::date"
fi
done
if (( $# > 1 )); then
where="WHERE $where_y and $where_g"
else
where="WHERE $where_y $where_g"
fi
fi
read -r -d '' QUERY <<-EOF
SELECT
date_trunc('month', job.create_time AT TIME ZONE 'UTC')::DATE AS month,
$select
count(*)
FROM
$from
job
$where
GROUP BY
month
$group
ORDER BY
month DESC
EOF
}
query_total-jobs(){ ## : Total number of jobs run by galaxy instance
handle_help "$@" <<-EOF
Count total number of jobs
$ gxadmin query total-jobs
state | count
---------+-------
deleted | 21
error | 197
ok | 798
(3 rows)
EOF
fields="count=1"
tags="state=0"
read -r -d '' QUERY <<-EOF
SELECT
state, count(*)
FROM
job
GROUP BY
state
ORDER BY
state
EOF
}
query_job-history() { ##? <id>: Job state history for a specific job
handle_help "$@" <<-EOF
$ gxadmin query job-history 1
time | state
-------------------------------+--------
2018-11-20 17:15:09.297907+00 | error
2018-11-20 17:15:08.911972+00 | queued
2018-11-20 17:15:08.243363+00 | new
2018-11-20 17:15:08.198301+00 | upload
2018-11-20 17:15:08.19655+00 | new
(5 rows)
EOF
read -r -d '' QUERY <<-EOF
SELECT
create_time AT TIME ZONE 'UTC' as time,
state
FROM job_state_history
WHERE job_id = $arg_id
EOF
}
query_job-inputs() { ##? <id>: Input datasets to a specific job
handle_help "$@" <<-EOF
EOF
read -r -d '' QUERY <<-EOF
SELECT
hda.id AS hda_id,
hda.state AS hda_state,
hda.deleted AS hda_deleted,
hda.purged AS hda_purged,
d.id AS d_id,
d.state AS d_state,
d.deleted AS d_deleted,
d.purged AS d_purged,
d.object_store_id AS object_store_id
FROM job j
JOIN job_to_input_dataset jtid
ON j.id = jtid.job_id
JOIN history_dataset_association hda
ON hda.id = jtid.dataset_id
JOIN dataset d
ON hda.dataset_id = d.id
WHERE j.id = $arg_id
EOF
}
query_job-outputs() { ##? <id>: Output datasets from a specific job
handle_help "$@" <<-EOF
EOF
read -r -d '' QUERY <<-EOF
SELECT
hda.id AS hda_id,
hda.state AS hda_state,
hda.deleted AS hda_deleted,
hda.purged AS hda_purged,
d.id AS d_id,
d.state AS d_state,
d.deleted AS d_deleted,
d.purged AS d_purged,
d.object_store_id AS object_store_id
FROM job j
JOIN job_to_output_dataset jtod
ON j.id = jtod.job_id
JOIN history_dataset_association hda
ON hda.id = jtod.dataset_id
JOIN dataset d
ON hda.dataset_id = d.id
WHERE j.id = $arg_id
EOF
}
query_job-info() { ## <-|job_id [job_id [...]]> : Retrieve information about jobs given some job IDs
handle_help "$@" <<-EOF
Retrieves information on a job, like the host it ran on,
how long it ran for and the total memory.
id | create_time | update_time | tool_id | hostname | handler | runtime_seconds | memtotal
----- | ------------ | ----------- | ------------ | ------------ | -------- | --------------- | --------
1 | | | 123f911b5f1 | 123f911b5f1 | handler0 | 20.35 | 20.35 GB
2 | | | cb0fabc0002 | cb0fabc0002 | handler1 | 14.93 | 5.96 GB
3 | | | 7e9e9b00b89 | 7e9e9b00b89 | handler1 | 14.24 | 3.53 GB
4 | | | 42f211e5e87 | 42f211e5e87 | handler4 | 14.06 | 1.79 GB
5 | | | 26cdba62c93 | 26cdba62c93 | handler0 | 12.97 | 1.21 GB
6 | | | fa87cddfcae | fa87cddfcae | handler1 | 7.01 | 987 MB
7 | | | 44d2a648aac | 44d2a648aac | handler3 | 6.70 | 900 MB
8 | | | 66c57b41194 | 66c57b41194 | handler1 | 6.43 | 500 MB
9 | | | 6b1467ac118 | 6b1467ac118 | handler0 | 5.45 | 250 MB
10 | | | d755361b59a | d755361b59a | handler2 | 5.19 | 100 MB
EOF
assert_count_ge $# 1 "Missing job IDs"
if [[ "$1" == "-" ]]; then
# read jobs from stdin
job_ids=$(cat | paste -s -d' ')
else
# read from $@
job_ids=$@;
fi
job_ids_string=$(join_by ',' ${job_ids[@]})
read -r -d '' QUERY <<-EOF
WITH hostname_query AS (
SELECT job_id,
metric_value as hostname
FROM job_metric_text
WHERE job_id IN ($job_ids_string)
AND metric_name='HOSTNAME'
),
metric_num_query AS (
SELECT job_id,
SUM(CASE WHEN metric_name='runtime_seconds' THEN metric_value END) runtime_seconds,
pg_size_pretty(SUM(CASE WHEN metric_name='memtotal' THEN metric_value END)) memtotal
FROM job_metric_numeric
WHERE job_id IN ($job_ids_string)
AND metric_name IN ('runtime_seconds', 'memtotal')
GROUP BY job_id
)
SELECT job.id,
job.create_time,
job.update_time,
job.tool_id,
job.handler,
hostname_query.hostname,
metric_num_query.runtime_seconds,
metric_num_query.memtotal
FROM job
FULL OUTER JOIN hostname_query ON hostname_query.job_id = job.id
FULL OUTER JOIN metric_num_query ON metric_num_query.job_id = job.id
WHERE job.id IN ($job_ids_string)
EOF
}
query_old-histories(){ ##? <weeks>: Lists histories that haven't been updated (used) for <weeks>
handle_help "$@" <<-EOF
Histories and their users who haven't been updated for a specified number of weeks. Default number of weeks is 15.
$gxadmin query old-histories 52
id | update_time | user_id | email | name | published | deleted | purged | hid_counter
-------+----------------------------+---------+---------+--------------------+-----------+---------+--------+-------------
39903 | 2017-06-13 12:35:07.174749 | 834 | xxx@xxx | Unnamed history | f | f | f | 23
1674 | 2017-06-13 14:08:30.017574 | 9 | xxx@xxx | SAHA project | f | f | f | 47
40088 | 2017-06-15 04:10:48.879122 | 986 | xxx@xxx | Unnamed history | f | f | f | 3
39023 | 2017-06-15 09:33:12.007002 | 849 | xxx@xxx | prac 4 new final | f | f | f | 297
35437 | 2017-06-16 04:41:13.15785 | 731 | xxx@xxx | Unnamed history | f | f | f | 98
40123 | 2017-06-16 13:43:24.948344 | 987 | xxx@xxx | Unnamed history | f | f | f | 22
40050 | 2017-06-19 00:46:29.032462 | 193 | xxx@xxx | Telmatactis | f | f | f | 74
12212 | 2017-06-20 14:41:03.762881 | 169 | xxx@xxx | Unnamed history | f | f | f | 24
39523 | 2017-06-21 01:34:52.226653 | 9 | xxx@xxx | OSCC Cell Lines | f | f | f | 139
EOF
email=$(gdpr_safe galaxy_user.email 'email')
read -r -d '' QUERY <<-EOF
SELECT
history.id,
history.update_time AT TIME ZONE 'UTC' as update_time,
history.user_id,
$email,
history.name,
history.published,
history.deleted,
history.purged,
history.hid_counter
FROM
history,
galaxy_user
WHERE
history.update_time < (now() AT TIME ZONE 'UTC' - '$arg_weeks weeks'::interval) AND
history.user_id = galaxy_user.id AND
history.deleted = FALSE AND
history.published = FALSE
ORDER BY
history.update_time desc
EOF
}
# TODO(hxr): generic summation by metric? Leave math to consumer?
query_jobs-max-by-cpu-hours() { ## : Top 10 jobs by CPU hours consumed (requires CGroups metrics)
handle_help "$@" <<-EOF
EOF
read -r -d '' QUERY <<-EOF
SELECT
job.id,
job.tool_id,
job.create_time,
metric_value/1000000000/3600/24 as cpu_days
FROM job, job_metric_numeric
WHERE
job.id = job_metric_numeric.job_id
AND metric_name = 'cpuacct.usage'
ORDER BY cpu_hours desc
LIMIT 30
EOF
}
query_errored-jobs(){ ##? <hours> [--details]: Lists jobs that errored in the last N hours.
handle_help "$@" <<-EOF
Lists details of jobs that have status = 'error' for the specified number of hours. Default = 24 hours
$ gxadmin query errored-jobs 2
id | create_time | tool_id | tool_version | handler | destination_id | job_runner_external_id | email
----+-------------+---------+--------------+----------+----------------+------------------------+------------------
1 | | upload1 | 1.1.0 | handler2 | slurm_normal | 42 | user@example.org
2 | | cut1 | 1.1.1 | handler1 | slurm_normal | 43 | user@example.org
3 | | bwa | 0.7.17.1 | handler0 | slurm_multi | 44 | map@example.org
4 | | trinity | 2.9.1 | handler1 | pulsar_bigmem | 4 | rna@example.org
EOF
email=$(gdpr_safe galaxy_user.email 'email')
details=
if [[ -n "$arg_details" ]]; then
details="job.job_stderr,"
fi
read -r -d '' QUERY <<-EOF
SELECT
job.id,
job.create_time AT TIME ZONE 'UTC' as create_time,
job.tool_id,
job.tool_version,
job.handler,
job.destination_id,
job.job_runner_external_id,
$details
$email
FROM
job,
galaxy_user
WHERE
job.create_time >= (now() AT TIME ZONE 'UTC' - '$arg_hours hours'::interval) AND
job.state = 'error' AND
job.user_id = galaxy_user.id
ORDER BY
job.id
EOF
}
query_workflow-invocation-status() { ## : Report on how many workflows are in new state by handler
handle_help "$@" <<-EOF
Really only intended to be used in influx queries.
EOF
fields="count=3"
tags="scheduler=0;handler=1;state=2"
read -r -d '' QUERY <<-EOF
SELECT
COALESCE(scheduler, 'none'),
COALESCE(handler, 'none'),
state,
count(*)
FROM
workflow_invocation
WHERE state in ('new', 'ready')
GROUP BY handler, scheduler, state
EOF
}
query_workflow-invocation-totals() { ## : Report on overall workflow counts, to ensure throughput
handle_help "$@" <<-EOF
Really only intended to be used in influx queries.
EOF
fields="count=1"
tags="state=0"
read -r -d '' QUERY <<-EOF
SELECT
COALESCE(state, 'unknown'), count(*)
FROM
workflow_invocation
GROUP BY state
EOF
}
query_tool-new-errors() { ##? [weeks|4] [--short-tool-id]: Summarize percent of tool runs in error over the past weeks for "new tools"
handle_help "$@" <<-EOF
See jobs-in-error summary for recent tools (tools whose first execution is in recent weeks).
$ gxadmin query tool-errors --short-tool-id 1
tool_id | tool_runs | percent_errored | percent_failed | count_errored | count_failed | handler
-----------------------------------+-----------+-------------------+----------------+---------------+--------------+-----------------
rnateam/graphclust_align_cluster/ | 55 | 0.145454545454545 | 0 | 8 | 0 | handler_main_10
iuc/rgrnastar/rna_star/2.6.0b-2 | 46 | 0.347826086956522 | 0 | 16 | 0 | handler_main_3
iuc/rgrnastar/rna_star/2.6.0b-2 | 43 | 0.186046511627907 | 0 | 8 | 0 | handler_main_0
iuc/rgrnastar/rna_star/2.6.0b-2 | 41 | 0.390243902439024 | 0 | 16 | 0 | handler_main_4
iuc/rgrnastar/rna_star/2.6.0b-2 | 40 | 0.325 | 0 | 13 | 0 | handler_main_6
Filter1 | 40 | 0.125 | 0 | 5 | 0 | handler_main_0
devteam/bowtie2/bowtie2/2.3.4.3 | 40 | 0.125 | 0 | 5 | 0 | handler_main_7
iuc/rgrnastar/rna_star/2.6.0b-2 | 40 | 0.3 | 0 | 12 | 0 | handler_main_2
EOF
tool_id="j.tool_id"
if [[ -n $arg_short_tool_id ]]; then
tool_id="regexp_replace(j.tool_id, '.*toolshed.*/repos/', '') as tool_id"
fi
fields="tool_runs=1;percent_errored=2;percent_failed=3;count_errored=4;count_failed=5"
tags="tool_id=0;handler=6"
read -r -d '' QUERY <<-EOF
SELECT
$tool_id,
count(*) AS tool_runs,
sum(CASE WHEN j.state = 'error' THEN 1 ELSE 0 END)::float / count(*) AS percent_errored,
sum(CASE WHEN j.state = 'failed' THEN 1 ELSE 0 END)::float / count(*) AS percent_failed,
sum(CASE WHEN j.state = 'error' THEN 1 ELSE 0 END) AS count_errored,
sum(CASE WHEN j.state = 'failed' THEN 1 ELSE 0 END) AS count_failed,
j.handler
FROM job AS j
WHERE
j.tool_id
IN (
SELECT tool_id
FROM job AS j
WHERE j.create_time > (now() - '$arg_weeks weeks'::INTERVAL)
GROUP BY j.tool_id
)
GROUP BY j.tool_id, j.handler
ORDER BY percent_failed_errored DESC
EOF
}
query_tool-errors() { ##? [--short-tool-id] [weeks|4]: Summarize percent of tool runs in error over the past weeks for all tools that have failed (most popular tools first)
handle_help "$@" <<-EOF
See jobs-in-error summary for recently executed tools that have failed at least 10% of the time.
$ gxadmin query tool-errors --short-tool-id 1
tool_id | tool_runs | percent_errored | percent_failed | count_errored | count_failed | handler
-----------------------------------+-----------+-------------------+----------------+---------------+--------------+-----------------
rnateam/graphclust_align_cluster/ | 55 | 0.145454545454545 | 0 | 8 | 0 | handler_main_10
iuc/rgrnastar/rna_star/2.6.0b-2 | 46 | 0.347826086956522 | 0 | 16 | 0 | handler_main_3
iuc/rgrnastar/rna_star/2.6.0b-2 | 43 | 0.186046511627907 | 0 | 8 | 0 | handler_main_0
iuc/rgrnastar/rna_star/2.6.0b-2 | 41 | 0.390243902439024 | 0 | 16 | 0 | handler_main_4
iuc/rgrnastar/rna_star/2.6.0b-2 | 40 | 0.325 | 0 | 13 | 0 | handler_main_6
Filter1 | 40 | 0.125 | 0 | 5 | 0 | handler_main_0
devteam/bowtie2/bowtie2/2.3.4.3 | 40 | 0.125 | 0 | 5 | 0 | handler_main_7
iuc/rgrnastar/rna_star/2.6.0b-2 | 40 | 0.3 | 0 | 12 | 0 | handler_main_2
EOF
# TODO: Fix this nonsense for proper args
tool_id="j.tool_id"
if [[ -n $arg_short_tool_id ]]; then
tool_id="regexp_replace(j.tool_id, '.*toolshed.*/repos/', '') as tool_id"
fi
fields="tool_runs=1;percent_errored=2;percent_failed=3;count_errored=4;count_failed=5"
tags="tool_id=0;handler=6"
read -r -d '' QUERY <<-EOF
SELECT
$tool_id,
count(*) AS tool_runs,
sum(CASE WHEN j.state = 'error' THEN 1 ELSE 0 END)::float / count(*) AS percent_errored,
sum(CASE WHEN j.state = 'failed' THEN 1 ELSE 0 END)::float / count(*) AS percent_failed,
sum(CASE WHEN j.state = 'error' THEN 1 ELSE 0 END) AS count_errored,
sum(CASE WHEN j.state = 'failed' THEN 1 ELSE 0 END) AS count_failed,
j.handler
FROM
job AS j
WHERE
j.create_time > (now() - '$arg_weeks weeks'::INTERVAL)
GROUP BY
j.tool_id, j.handler
HAVING
sum(CASE WHEN j.state IN ('error', 'failed') THEN 1 ELSE 0 END) * 100.0 / count(*) > 10.0
ORDER BY
tool_runs DESC
EOF
}
query_tool-likely-broken() { ##? [--short-tool-id] [weeks|4]: Find tools that have been executed in recent weeks that are (or were due to job running) likely substantially broken
handle_help "$@" <<-EOF
This runs an identical query to tool-errors, except filtering for tools
which were run more than 4 times, and have a failure rate over 95%.
tool_id | tool_runs | percent_errored | percent_failed | count_errored | count_failed | handler
-------------------------------------------------------+-----------+-----------------+----------------+---------------+--------------+-----------------
simon-gladman/velvetoptimiser/velvetoptimiser/2.2.6 | 14 | 1 | 0 | 14 | 0 | handler_main_7
bgruening/hicexplorer_hicplottads/hicexplorer_hicplott| 9 | 1 | 0 | 9 | 0 | handler_main_0
bgruening/text_processing/tp_replace_in_column/1.1.3 | 8 | 1 | 0 | 8 | 0 | handler_main_3
bgruening/text_processing/tp_awk_tool/1.1.1 | 7 | 1 | 0 | 7 | 0 | handler_main_5
rnateam/dorina/dorina_search/1.0.0 | 7 | 1 | 0 | 7 | 0 | handler_main_2
bgruening/text_processing/tp_replace_in_column/1.1.3 | 6 | 1 | 0 | 6 | 0 | handler_main_9
rnateam/dorina/dorina_search/1.0.0 | 6 | 1 | 0 | 6 | 0 | handler_main_11
rnateam/dorina/dorina_search/1.0.0 | 6 | 1 | 0 | 6 | 0 | handler_main_8
EOF
# TODO: Fix this nonsense for proper args
tool_id="j.tool_id"
if [[ -n $arg_short_tool_id ]]; then
tool_id="regexp_replace(j.tool_id, '.*toolshed.*/repos/', '') as tool_id"
fi
fields="tool_runs=1;percent_errored=2;percent_failed=3;count_errored=4;count_failed=5"
tags="tool_id=0;handler=6"
read -r -d '' QUERY <<-EOF
SELECT
$tool_id,
count(*) AS tool_runs,
sum(CASE WHEN j.state = 'error' THEN 1 ELSE 0 END)::float / count(*) AS percent_errored,
sum(CASE WHEN j.state = 'failed' THEN 1 ELSE 0 END)::float / count(*) AS percent_failed,
sum(CASE WHEN j.state = 'error' THEN 1 ELSE 0 END) AS count_errored,
sum(CASE WHEN j.state = 'failed' THEN 1 ELSE 0 END) AS count_failed,
j.handler
FROM
job AS j
WHERE
j.create_time > (now() - '$arg_weeks weeks'::INTERVAL)
GROUP BY
j.tool_id, j.handler
HAVING
sum(CASE WHEN j.state IN ('error', 'failed') THEN 1 ELSE 0 END) * 100.0 / count(*) > 95.0
AND count(*) > 4
ORDER BY
tool_runs DESC
EOF
}
query_user-recent-aggregate-jobs() { ##? <user> [days|7]: Show aggregate information for jobs in past N days for user (by email/id/username)
handle_help "$@" <<-EOF
Obtain an overview of tools that a user has run in the past N days
EOF
# args
user_filter=$(get_user_filter "$arg_user")
read -r -d '' QUERY <<-EOF
SELECT
date_trunc('day', create_time), tool_id, state, count(*)
FROM
job
JOIN
galaxy_user on galaxy_user.id = job.user_id
WHERE
$user_filter AND create_time > (now() - '$arg_days days'::INTERVAL)
GROUP BY
date_trunc, tool_id, state
ORDER BY
date_trunc DESC
EOF
}
query_user-history-list() { ##? <user> [--size]: List a user's (by email/id/username) histories.
handle_help "$@" <<-EOF
Obtain an overview of histories of a user. By default orders the histories by date.
When using '--size' it overrides the order to size.
$ gxadmin query user-history-list <username|id|email>
ID | Name | Last Updated | Size
-------+--------------------------------------+----------------------------+-----------
52 | Unnamed history | 2019-08-08 15:15:32.284678 | 293 MB
30906 | Unnamed history | 2019-07-23 16:25:36.084019 | 13 kB
EOF
# args
user_filter=$(get_user_filter "$arg_user")
order_col="uh.update_time"
if [[ -n "$arg_size" ]]; then
order_col="hs.hist_size"
fi
read -r -d '' QUERY <<-EOF
WITH user_histories AS (
SELECT id,
name,
update_time
FROM history
WHERE user_id IN (
SELECT id
FROM galaxy_user
WHERE $user_filter
) AND NOT purged
), history_sizes AS (
SELECT history_id,
sum(coalesce(dataset.total_size, dataset.file_size, 0)) as "hist_size"
FROM history_dataset_association,
dataset
WHERE history_id IN (
SELECT id
FROM user_histories
) AND history_dataset_association.dataset_id = dataset.id
GROUP BY history_id
)
SELECT uh.id as "ID",
uh.name as "Name",
uh.update_time as "Last Updated",
pg_size_pretty(hs.hist_size) as "Size"
FROM user_histories uh,
history_sizes hs
WHERE uh.id = hs.history_id
ORDER BY $order_col DESC
EOF
}
query_history-contents() { ## <history_id> [--dataset|--collection]: List datasets and/or collections in a history
handle_help "$@" <<-EOF
Obtain an overview of tools that a user has run in the past N days
EOF
local dsq clq q
dsq="select dataset_id, name, hid, visible, deleted, copied_from_history_dataset_association_id as copied_from from history_dataset_association where history_id = $1"
clq="select collection_id, name, hid, visible, deleted, copied_from_history_dataset_collection_association_id as copied_from from history_dataset_collection_association where history_id = $1;"
if [[ $2 == "--dataset" ]] || [[ $2 == "--datasets" ]]; then
q="$dsq"
elif [[ $2 == "--collection" ]] || [[ $2 == "--collections" ]]; then
q="$clq"
else
q="$dsq;$clq"
fi
read -r -d '' QUERY <<-EOF
$q
EOF
}
query_hdca-info() { ##? <hdca_id>: Information on a dataset collection
handle_help "$@" <<-EOF
EOF
read -r -d '' QUERY <<-EOF
SELECT *
FROM dataset_collection
WHERE id = $arg_hdca_id
EOF
}
query_hdca-datasets() { ##? <hdca_id>: List of files in a dataset collection
handle_help "$@" <<-EOF
EOF
read -r -d '' QUERY <<-EOF
SELECT element_index, hda_id, ldda_id, child_collection_id, element_identifier
FROM dataset_collection_element
WHERE dataset_collection_id = $arg_hdca_id
ORDER by element_index asc
EOF
}
query_jobs-queued-internal-by-handler() { ## : How many queued jobs do not have external IDs, by handler
handle_help "$@" <<-EOF
Identify which handlers have a backlog of jobs which should be
receiving external cluster IDs but have not yet.
handler | count
---------------- + ------
handler_main_0 | 14
handler_main_1 | 4
handler_main_10 | 13
handler_main_2 | 11
handler_main_3 | 14
handler_main_4 | 12
handler_main_5 | 9
handler_main_6 | 7
handler_main_7 | 13
handler_main_8 | 9
handler_main_9 | 14
EOF
fields="count=1"
tags="handler=0"
read -r -d '' QUERY <<-EOF
SELECT
handler,
count(handler)
FROM
job
WHERE
state = 'queued'
AND job_runner_external_id IS null
GROUP BY
handler
EOF
}
query_jobs-queued() { ## : How many queued jobs have external cluster IDs
handle_help "$@" <<-EOF
Shows the distribution of jobs in queued state, whether or not they have received an external ID.
n | count
------------ | ------
unprocessed | 118
processed | 37
EOF
fields="count=1"
tags="group=0"
read -r -d '' QUERY <<-EOF
SELECT
CASE WHEN job_runner_external_id IS NOT null THEN 'processed' ELSE 'unprocessed' END as n,
count(*)
FROM
job
WHERE
state = 'queued'
GROUP BY n
EOF
}
query_users-with-oidc() { ## : How many users logged in with OIDC
handle_help "$@" <<-EOF
provider | count
-------- | ------
elixir | 5
EOF
fields="count=1"
tags="provider=0"
read -r -d '' QUERY <<-EOF
SELECT provider, count(distinct user_id) FROM oidc_user_authnz_tokens GROUP BY provider
EOF
}
query_history-runtime-system() { ##? <history_id>: Sum of runtimes by all jobs in a history
handle_help "$@" <<-EOF
EOF
read -r -d '' QUERY <<-EOF
SELECT
(sum(job_metric_numeric.metric_value)::INT8 || 'seconds')::INTERVAL
FROM
job LEFT JOIN job_metric_numeric ON job.id = job_metric_numeric.job_id
WHERE
job.history_id = $arg_history_id AND metric_name = 'runtime_seconds'
EOF
}
query_history-runtime-wallclock() { ##? <history_id>: Time as elapsed by a clock on the wall
handle_help "$@" <<-EOF
EOF
read -r -d '' QUERY <<-EOF
SELECT
max(job.update_time) - min(job.create_time)
FROM
job
WHERE
job.history_id = $arg_history_id
EOF
}
query_history-runtime-system-by-tool() { ##? <history_id>: Sum of runtimes by all jobs in a history, split by tool
handle_help "$@" <<-EOF
EOF
read -r -d '' QUERY <<-EOF
SELECT
job.tool_id,
(sum(job_metric_numeric.metric_value)::INT || 'seconds')::INTERVAL
FROM
job LEFT JOIN job_metric_numeric ON job.id = job_metric_numeric.job_id
WHERE
job.history_id = $arg_history_id AND metric_name = 'runtime_seconds'
GROUP BY
job.tool_id
ORDER BY
"interval" DESC
EOF
}
query_upload-gb-in-past-hour() { ##? [hours|1]: Sum in bytes of files uploaded in the past hour
handle_help "$@" <<-EOF
Quick output, mostly useful for graphing, to produce a nice graph of how heavily are people uploading currently.
EOF
fields="count=0"
tags="hours=1"
read -r -d '' QUERY <<-EOF
SELECT
coalesce(sum(coalesce(dataset.total_size, coalesce(dataset.file_size, 0))), 0),
$arg_hours as hours
FROM
job
LEFT JOIN job_to_output_dataset ON job.id = job_to_output_dataset.job_id
LEFT JOIN history_dataset_association ON
job_to_output_dataset.dataset_id = history_dataset_association.id
LEFT JOIN dataset ON history_dataset_association.dataset_id = dataset.id
WHERE
job.tool_id = 'upload1'
AND job.create_time AT TIME ZONE 'UTC' > (now() - '$arg_hours hours'::INTERVAL)
EOF
}
query_queue-detail-by-handler() { ##? <handler_id>: List jobs for a specific handler
handle_help "$@" <<-EOF
List the jobs currently being processed by a specific handler
EOF
read -r -d '' QUERY <<-EOF
SELECT
id,
create_time,
state,
regexp_replace(tool_id, '.*toolshed.*/repos/', ''),
job_runner_name,
job_runner_external_id,
destination_id
FROM
job
WHERE
handler = '$arg_handler_id' AND state IN ('new', 'queued', 'running')
EOF
}
query_pg-cache-hit() { ## : Check postgres in-memory cache hit ratio
handle_help "$@" <<-EOF
Query from: https://www.citusdata.com/blog/2019/03/29/health-checks-for-your-postgres-database/
Tells you about the cache hit ratio, is Postgres managing to store
commonly requested objects in memory or are they being loaded every
time?
heap_read | heap_hit | ratio
----------- ---------- ------------------------
29 | 64445 | 0.99955020628470391165
EOF
fields="read=0;hit=1;ratio=2"
tags=""
read -r -d '' QUERY <<-EOF
SELECT
sum(heap_blks_read) as heap_read,
sum(heap_blks_hit) as heap_hit,
sum(heap_blks_hit) / (sum(heap_blks_hit) + sum(heap_blks_read)) as ratio
FROM
pg_statio_user_tables
EOF
}
query_pg-table-bloat() { ##? [--human]: show table and index bloat in your database ordered by most wasteful
handle_help "$@" <<-EOF
Query from: https://www.citusdata.com/blog/2019/03/29/health-checks-for-your-postgres-database/
Originally from: https://github.com/heroku/heroku-pg-extras/tree/master/commands
EOF
if [[ -n "$arg_human" ]]; then
waste_query="pg_size_pretty(raw_waste)"
else
waste_query="raw_waste"
fi
fields="bloat=3;ratio=4"
tags="type=0;schema=1;object=2"
read -r -d '' QUERY <<-EOF
WITH constants AS (
SELECT current_setting('block_size')::numeric AS bs, 23 AS hdr, 4 AS ma
), bloat_info AS (
SELECT
ma,bs,schemaname,tablename,
(datawidth+(hdr+ma-(case when hdr%ma=0 THEN ma ELSE hdr%ma END)))::numeric AS datahdr,
(maxfracsum*(nullhdr+ma-(case when nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2
FROM (
SELECT
schemaname, tablename, hdr, ma, bs,
SUM((1-null_frac)*avg_width) AS datawidth,
MAX(null_frac) AS maxfracsum,
hdr+(
SELECT 1+count(*)/8
FROM pg_stats s2
WHERE null_frac<>0 AND s2.schemaname = s.schemaname AND s2.tablename = s.tablename
) AS nullhdr
FROM pg_stats s, constants
GROUP BY 1,2,3,4,5
) AS foo
), table_bloat AS (
SELECT
schemaname, tablename, cc.relpages, bs,
CEIL((cc.reltuples*((datahdr+ma-
(CASE WHEN datahdr%ma=0 THEN ma ELSE datahdr%ma END))+nullhdr2+4))/(bs-20::float)) AS otta
FROM bloat_info
JOIN pg_class cc ON cc.relname = bloat_info.tablename
JOIN pg_namespace nn ON cc.relnamespace = nn.oid AND nn.nspname = bloat_info.schemaname AND nn.nspname <> 'information_schema'
), index_bloat AS (
SELECT
schemaname, tablename, bs,
coalesce(c2.relname,'?') AS iname, COALESCE(c2.reltuples,0) AS ituples, c2.relpages,0 AS ipages,
COALESCE(CEIL((c2.reltuples*(datahdr-12))/(bs-20::float)),0) AS iotta -- very rough approximation, assumes all cols
FROM bloat_info
JOIN pg_class cc ON cc.relname = bloat_info.tablename
JOIN pg_namespace nn ON cc.relnamespace = nn.oid AND nn.nspname = bloat_info.schemaname AND nn.nspname <> 'information_schema'
JOIN pg_index i ON indrelid = cc.oid
JOIN pg_class c2 ON c2.oid = i.indexrelid
)
SELECT
type, schemaname, object_name, bloat, $waste_query as waste
FROM
(SELECT
'table' as type,
schemaname,
tablename as object_name,
ROUND(CASE WHEN otta=0 THEN 0.0 ELSE table_bloat.relpages/otta::numeric END,1) AS bloat,
CASE WHEN relpages < otta THEN '0' ELSE (bs*(table_bloat.relpages-otta)::bigint)::bigint END AS raw_waste
FROM
table_bloat
UNION
SELECT
'index' as type,
schemaname,
tablename || '::' || iname as object_name,
ROUND(CASE WHEN iotta=0 OR ipages=0 THEN 0.0 ELSE ipages/iotta::numeric END,1) AS bloat,
CASE WHEN ipages < iotta THEN '0' ELSE (bs*(ipages-iotta))::bigint END AS raw_waste
FROM
index_bloat) bloat_summary
ORDER BY raw_waste DESC, bloat DESC
EOF
}
query_pg-mandelbrot() { ## : show the mandlebrot set
handle_help "$@" <<-EOF
Copied from: https://github.com/heroku/heroku-pg-extras/tree/master/commands
EOF
read -r -d '' QUERY <<-EOF
WITH RECURSIVE Z(IX, IY, CX, CY, X, Y, I) AS (
SELECT IX, IY, X::float, Y::float, X::float, Y::float, 0
FROM (select -2.2 + 0.031 * i, i from generate_series(0,101) as i) as xgen(x,ix),
(select -1.5 + 0.031 * i, i from generate_series(0,101) as i) as ygen(y,iy)
UNION ALL
SELECT IX, IY, CX, CY, X * X - Y * Y + CX AS X, Y * X * 2 + CY, I + 1
FROM Z
WHERE X * X + Y * Y < 16::float
AND I < 100
)
SELECT array_to_string(array_agg(SUBSTRING(' .,,,-----++++%%%%@@@@#### ', LEAST(GREATEST(I,1),27), 1)),'')
FROM (
SELECT IX, IY, MAX(I) AS I
FROM Z
GROUP BY IY, IX
ORDER BY IY, IX
) AS ZT
GROUP BY IY
ORDER BY IY
EOF
}
query_pg-index-usage() { ## : calculates your index hit rate (effective databases are at 99% and up)
handle_help "$@" <<-EOF
Originally from: https://github.com/heroku/heroku-pg-extras/tree/master/commands
-1 means "Insufficient Data", this was changed to a numeric value to be acceptable to InfluxDB
EOF
fields="index_usage=1;rows=2"
tags="relname=0"
read -r -d '' QUERY <<-EOF
SELECT relname,
CASE COALESCE(idx_scan, 0)
WHEN 0 THEN -1
ELSE (100 * idx_scan / (seq_scan + idx_scan))
END percent_of_times_index_used,
n_live_tup rows_in_table
FROM
pg_stat_user_tables
ORDER BY
n_live_tup DESC
EOF
}
query_pg-index-size() { ##? [--human]: show table and index bloat in your database ordered by most wasteful
handle_help "$@" <<-EOF
Originally from: https://github.com/heroku/heroku-pg-extras/tree/master/commands
EOF
if [[ -n "$arg_human" ]]; then
human_size="pg_size_pretty(sum(c.relpages::bigint*8192)::bigint)"
else
human_size="sum(c.relpages::bigint*8192)::bigint"
fi
fields="size=1"
tags="relname=0"
read -r -d '' QUERY <<-EOF
SELECT
c.relname AS name,
$human_size AS size
FROM pg_class c
LEFT JOIN pg_namespace n ON (n.oid = c.relnamespace)
WHERE n.nspname NOT IN ('pg_catalog', 'information_schema')
AND n.nspname !~ '^pg_toast'
AND c.relkind='i'
GROUP BY c.relname
ORDER BY sum(c.relpages) DESC
EOF
}
query_pg-long-running-queries() { ## : show all queries longer than five minutes by descending duration
handle_help "$@" <<-EOF
Originally from: https://github.com/heroku/heroku-pg-extras/tree/master/commands
EOF
read -r -d '' QUERY <<-EOF
SELECT
pid,
now() - pg_stat_activity.query_start AS duration,
query AS query
FROM
pg_stat_activity
WHERE
pg_stat_activity.query <> ''::text
AND state <> 'idle'
AND now() - pg_stat_activity.query_start > interval '5 minutes'
ORDER BY
now() - pg_stat_activity.query_start DESC
EOF
}
query_pg-table-size() { ##? [--human]: show the size of the tables (excluding indexes), descending by size
handle_help "$@" <<-EOF
Originally from: https://github.com/heroku/heroku-pg-extras/tree/master/commands
EOF
if [[ -n "$arg_human" ]]; then
# TODO: there has got to be a less ugly way to do this
human_size="pg_size_pretty("
human_after=")"
else
human_size=""
human_after=""
fi
fields="table_size=1;index_size=2"
tags="relname=0"
read -r -d '' QUERY <<-EOF
SELECT
c.relname AS name,
${human_size}pg_table_size(c.oid)${human_after} AS size,
${human_size}pg_indexes_size(c.oid)${human_after} AS index_size
FROM pg_class c
LEFT JOIN pg_namespace n ON (n.oid = c.relnamespace)
WHERE n.nspname NOT IN ('pg_catalog', 'information_schema')
AND n.nspname !~ '^pg_toast'
AND c.relkind='r'
ORDER BY pg_table_size(c.oid) DESC
EOF
}
query_pg-unused-indexes() { ##? [--human]: show unused and almost unused indexes
handle_help "$@" <<-EOF
Originally from: https://github.com/heroku/heroku-pg-extras/tree/master/commands
From their documentation:
> "Ordered by their size relative to the number of index scans.
> Exclude indexes of very small tables (less than 5 pages),
> where the planner will almost invariably select a sequential scan,
> but may not in the future as the table grows"
EOF
if [[ -n "$arg_human" ]]; then
# TODO: there has got to be a less ugly way to do this
human_size="pg_size_pretty("
human_after=")"
else
human_size=""
human_after=""
fi
fields="index_size=2;index_scans=3"
tags="table=0;index=1"
read -r -d '' QUERY <<-EOF
SELECT
schemaname || '.' || relname AS table,
indexrelname AS index,
${human_size}pg_relation_size(i.indexrelid)${human_after} AS index_size,
COALESCE(idx_scan, 0) as index_scans
FROM pg_stat_user_indexes ui
JOIN pg_index i ON ui.indexrelid = i.indexrelid
WHERE NOT indisunique AND idx_scan < 50 AND pg_relation_size(relid) > 5 * 8192
ORDER BY
pg_relation_size(i.indexrelid) / nullif(idx_scan, 0) DESC NULLS FIRST,
pg_relation_size(i.indexrelid) DESC
EOF
}
query_pg-vacuum-stats() { ## : show dead rows and whether an automatic vacuum is expected to be triggered
handle_help "$@" <<-EOF
Originally from: https://github.com/heroku/heroku-pg-extras/tree/master/commands
EOF
read -r -d '' QUERY <<-EOF
WITH table_opts AS (
SELECT
pg_class.oid, relname, nspname, array_to_string(reloptions, '') AS relopts
FROM
pg_class INNER JOIN pg_namespace ns ON relnamespace = ns.oid
), vacuum_settings AS (
SELECT
oid, relname, nspname,
CASE
WHEN relopts LIKE '%autovacuum_vacuum_threshold%'
THEN substring(relopts, '.*autovacuum_vacuum_threshold=([0-9.]+).*')::integer
ELSE current_setting('autovacuum_vacuum_threshold')::integer
END AS autovacuum_vacuum_threshold,
CASE
WHEN relopts LIKE '%autovacuum_vacuum_scale_factor%'
THEN substring(relopts, '.*autovacuum_vacuum_scale_factor=([0-9.]+).*')::real
ELSE current_setting('autovacuum_vacuum_scale_factor')::real
END AS autovacuum_vacuum_scale_factor
FROM
table_opts
)
SELECT
vacuum_settings.nspname AS schema,
vacuum_settings.relname AS table,
to_char(psut.last_vacuum, 'YYYY-MM-DD HH24:MI') AS last_vacuum,
to_char(psut.last_autovacuum, 'YYYY-MM-DD HH24:MI') AS last_autovacuum,
to_char(pg_class.reltuples, '9G999G999G999') AS rowcount,
to_char(psut.n_dead_tup, '9G999G999G999') AS dead_rowcount,
to_char(autovacuum_vacuum_threshold
+ (autovacuum_vacuum_scale_factor::numeric * pg_class.reltuples), '9G999G999G999') AS autovacuum_threshold,
CASE
WHEN autovacuum_vacuum_threshold + (autovacuum_vacuum_scale_factor::numeric * pg_class.reltuples) < psut.n_dead_tup
THEN 'yes'
END AS expect_autovacuum
FROM
pg_stat_user_tables psut INNER JOIN pg_class ON psut.relid = pg_class.oid
INNER JOIN vacuum_settings ON pg_class.oid = vacuum_settings.oid
ORDER BY 1
EOF
}
query_pg-stat-bgwriter() { ## : Stats about the behaviour of the bgwriter, checkpoints, buffers, etc.
handle_help "$@" <<-EOF
EOF
fields="checkpoints_timed=0;checkpoints_req=1;checkpoint_write_time=2;checkpoint_sync_time=3;buffers_checkpoint=4;buffers_clean=5;maxwritten_clean=6;buffers_backend=7;buffers_backend_fsync=8;buffers_alloc=9"
read -r -d '' QUERY <<-EOF
SELECT
checkpoints_timed,
checkpoints_req,
checkpoint_write_time,
checkpoint_sync_time,
buffers_checkpoint,
buffers_clean,
maxwritten_clean,
buffers_backend,
buffers_backend_fsync,
buffers_alloc
FROM
pg_stat_bgwriter
EOF
}
query_pg-stat-user-tables() { ## : stats about tables (tuples, index scans, vacuums, analyzes)
handle_help "$@" <<-EOF
EOF
tags="schemaname=0;relname=1"
fields="seq_scan=2;seq_tup_read=3;idx_scan=4;idx_tup_fetch=5;n_tup_ins=6;n_tup_upd=7;n_tup_del=8;n_tup_hot_upd=9;n_live_tup=10;n_dead_tup=11;vacuum_count=12;autovacuum_count=13;analyze_count=14;autoanalyze_count=15"
read -r -d '' QUERY <<-EOF
SELECT
schemaname,
relname,
seq_scan,
seq_tup_read,
COALESCE(idx_scan, 0),
COALESCE(idx_tup_fetch, 0),
n_tup_ins,
n_tup_upd,
n_tup_del,
n_tup_hot_upd,
n_live_tup,
n_dead_tup,
vacuum_count,
autovacuum_count,
analyze_count,
autoanalyze_count
FROM
pg_stat_user_tables
EOF
}
query_data-origin-distribution-merged() {
summary="$(summary_statistics data $human)"
username=$(gdpr_safe job.user_id galaxy_user)
read -r -d '' QUERY <<-EOF
WITH asdf AS (
SELECT
'total' as origin,
sum(coalesce(dataset.total_size, dataset.file_size, 0)) AS data,
date_trunc('month', dataset.create_time) as created,
$username
FROM job
LEFT JOIN job_to_output_dataset ON job.id = job_to_output_dataset.job_id
LEFT JOIN history_dataset_association ON job_to_output_dataset.dataset_id = history_dataset_association.id
LEFT JOIN dataset ON history_dataset_association.dataset_id = dataset.id
GROUP BY
origin, job.user_id, created, galaxy_user
)
SELECT
origin,
round(data, 2 - length(data::text)),
created,
galaxy_user
FROM asdf
ORDER BY galaxy_user desc
EOF
}
query_data-origin-distribution() { ## : data sources (uploaded vs derived)
handle_help "$@" <<-EOF
Break down the source of data in the server, uploaded data vs derived (created as output from a tool)
Recommendation is to run with GDPR_MODE so you can safely share this information:
GDPR_MODE=\$(openssl rand -hex 24 2>/dev/null) gxadmin tsvquery data-origin-distribution | gzip > data-origin.tsv.gz
Output looks like:
derived 130000000000 2019-07-01 00:00:00 fff4f423d06
derived 61000000000 2019-08-01 00:00:00 fff4f423d06
created 340000000 2019-08-01 00:00:00 fff4f423d06
created 19000000000 2019-07-01 00:00:00 fff4f423d06
derived 180000000000 2019-04-01 00:00:00 ffd28c0cf8c
created 21000000000 2019-04-01 00:00:00 ffd28c0cf8c
derived 1700000000 2019-06-01 00:00:00 ffd28c0cf8c
derived 120000000 2019-06-01 00:00:00 ffcb567a837
created 62000000 2019-05-01 00:00:00 ffcb567a837
created 52000000 2019-06-01 00:00:00 ffcb567a837
derived 34000000 2019-07-01 00:00:00 ffcb567a837
EOF
username=$(gdpr_safe job.user_id galaxy_user)
echo "$username"
read -r -d '' QUERY <<-EOF
WITH asdf AS (
SELECT
case when job.tool_id = 'upload1' then 'created' else 'derived' end AS origin,
sum(coalesce(dataset.total_size, dataset.file_size, 0)) AS data,
date_trunc('month', dataset.create_time) as created,
$username
FROM job
LEFT JOIN job_to_output_dataset ON job.id = job_to_output_dataset.job_id
LEFT JOIN history_dataset_association ON job_to_output_dataset.dataset_id = history_dataset_association.id
LEFT JOIN dataset ON history_dataset_association.dataset_id = dataset.id
GROUP BY
origin, job.user_id, created, galaxy_user
)
SELECT
origin,
round(data, 2 - length(data::text)),
created,
galaxy_user
FROM asdf
ORDER BY galaxy_user, created desc
EOF
}
query_data-origin-distribution-summary() { ##? [--human]: breakdown of data sources (uploaded vs derived)
handle_help "$@" <<-EOF
Break down the source of data in the server, uploaded data vs derived (created as output from a tool)
This query builds a table with the volume of derivced and uploaded data per user, and then summarizes this:
origin | min | quant_1st | median | mean | quant_3rd | perc_95 | perc_99 | max | stddev
------- | ------- | --------- | ------- | ------ | --------- | ------- | ------- | ----- | --------
created | 0 bytes | 17 MB | 458 MB | 36 GB | 11 GB | 130 GB | 568 GB | 11 TB | 257 GB
derived | 0 bytes | 39 MB | 1751 MB | 200 GB | 28 GB | 478 GB | 2699 GB | 90 TB | 2279 GB
EOF
tags="dataorigin=0"
fields="min=1;q1=2;median=3;mean=4;q3=5;p95=6;p99=7;max=8;sum=9;stddev=10"
summary="$(summary_statistics data $arg_human)"
read -r -d '' QUERY <<-EOF
WITH user_job_data AS (
SELECT
case when job.tool_id = 'upload1' then 'created' else 'derived' end AS origin,
sum(coalesce(dataset.total_size, dataset.file_size, 0)) AS data,
job.user_id
FROM job
LEFT JOIN job_to_output_dataset ON job.id = job_to_output_dataset.job_id
LEFT JOIN history_dataset_association ON job_to_output_dataset.dataset_id = history_dataset_association.id
LEFT JOIN dataset ON history_dataset_association.dataset_id = dataset.id
GROUP BY
origin, job.user_id
)
SELECT
origin,
$summary
FROM user_job_data
GROUP BY origin
EOF
}
query_aq() { ## <table> <column> <-|job_id [job_id [...]]>: Given a list of IDs from a table (e.g. 'job'), access a specific column from that table
handle_help "$@" <<-EOF
EOF
table=$1; shift
column=$1; shift
if [[ "$1" == "-" ]]; then
# read jobs from stdin
ids=$(cat | paste -s -d' ')
else
# read from $@
ids=$@;
fi
ids_string=$(join_by ',' ${ids[@]})
read -r -d '' QUERY <<-EOF
SELECT
$column
FROM $table
WHERE id in ($ids_string)
EOF
}
query_q() { ## <query>: Passes a raw SQL query directly through to the database
handle_help "$@" <<-EOF
EOF
QUERY="$@"
}
query_good-for-pulsar() { ## : Look for jobs EU would like to send to pulsar
handle_help "$@" <<-EOF
This selects all jobs and finds two things:
- sum of input sizes
- runtime
and then returns a simple /score/ of (input/runtime) and sorts on that
hopefully identifying things with small inputs and long runtimes.
EOF
read -r -d '' QUERY <<-EOF
WITH job_data AS (
SELECT
regexp_replace(j.tool_id, '.*toolshed.*/repos/', '') as tool_id,
SUM(d.total_size) AS size,
MIN(jmn.metric_value) AS runtime,
SUM(d.total_size) / min(jmn.metric_value) AS score
FROM job j
LEFT JOIN job_to_input_dataset jtid ON j.id = jtid.job_id
LEFT JOIN history_dataset_association hda ON jtid.dataset_id = hda.id
LEFT JOIN dataset d ON hda.dataset_id = d.id
LEFT JOIN job_metric_numeric jmn ON j.id = jmn.job_id
WHERE jmn.metric_name = 'runtime_seconds'
AND d.total_size IS NOT NULL
GROUP BY j.id
)
SELECT
tool_id,
percentile_cont(0.50) WITHIN GROUP (ORDER BY score) ::bigint AS median_score,
percentile_cont(0.50) WITHIN GROUP (ORDER BY runtime) ::bigint AS median_runtime,
pg_size_pretty(percentile_cont(0.50) WITHIN GROUP (ORDER BY size) ::bigint) AS median_size,
count(*)
FROM job_data
GROUP BY tool_id
ORDER BY median_score ASC
EOF
}
query_jobs-ready-to-run() { ## : Find jobs ready to run (Mostly a performance test)
handle_help "$@" <<-EOF
Mostly a performance test
EOF
read -r -d '' QUERY <<-EOF
SELECT
EXISTS(
SELECT
history_dataset_association.id,
history_dataset_association.history_id,
history_dataset_association.dataset_id,
history_dataset_association.create_time,
history_dataset_association.update_time,
history_dataset_association.state,
history_dataset_association.copied_from_history_dataset_association_id,
history_dataset_association.copied_from_library_dataset_dataset_association_id,
history_dataset_association.name,
history_dataset_association.info,
history_dataset_association.blurb,
history_dataset_association.peek,
history_dataset_association.tool_version,
history_dataset_association.extension,
history_dataset_association.metadata,
history_dataset_association.parent_id,
history_dataset_association.designation,
history_dataset_association.deleted,
history_dataset_association.visible,
history_dataset_association.extended_metadata_id,
history_dataset_association.version,
history_dataset_association.hid,
history_dataset_association.purged,
history_dataset_association.hidden_beneath_collection_instance_id
FROM
history_dataset_association,
job_to_output_dataset
WHERE
job.id = job_to_output_dataset.job_id
AND history_dataset_association.id
= job_to_output_dataset.dataset_id
AND history_dataset_association.deleted = true
)
AS anon_1,
EXISTS(
SELECT
history_dataset_collection_association.id
FROM
history_dataset_collection_association,
job_to_output_dataset_collection
WHERE
job.id = job_to_output_dataset_collection.job_id
AND history_dataset_collection_association.id
= job_to_output_dataset_collection.dataset_collection_id
AND history_dataset_collection_association.deleted
= true
)
AS anon_2,
job.id AS job_id,
job.create_time AS job_create_time,
job.update_time AS job_update_time,
job.history_id AS job_history_id,
job.library_folder_id AS job_library_folder_id,
job.tool_id AS job_tool_id,
job.tool_version AS job_tool_version,
job.state AS job_state,
job.info AS job_info,
job.copied_from_job_id AS job_copied_from_job_id,
job.command_line AS job_command_line,
job.dependencies AS job_dependencies,
job.param_filename AS job_param_filename,
job.runner_name AS job_runner_name_1,
job.stdout AS job_stdout,
job.stderr AS job_stderr,
job.exit_code AS job_exit_code,
job.traceback AS job_traceback,
job.session_id AS job_session_id,
job.user_id AS job_user_id,
job.job_runner_name AS job_job_runner_name,
job.job_runner_external_id
AS job_job_runner_external_id,
job.destination_id AS job_destination_id,
job.destination_params AS job_destination_params,
job.object_store_id AS job_object_store_id,
job.imported AS job_imported,
job.params AS job_params,
job.handler AS job_handler
FROM
job
WHERE
job.state = 'new'
AND job.handler IS NULL
AND job.handler = 'handler0'
EOF
}
query_workers() { ## : Retrieve a list of Galaxy worker processes
handle_help "$@" <<-EOF
This retrieves a list of Galaxy worker processes.
This functionality is only available on Galaxy
20.01 or later.
server_name | hostname | pid
------------------- | -------- | ---
main.web.1 | server1 | 123
main.job-handlers.1 | server2 | 456
EOF
read -r -d '' QUERY <<-EOF
SELECT
server_name,
hostname,
pid
FROM
worker_process
WHERE
pid IS NOT NULL
EOF
}
query_pg-rows-per-table() { ## : Print rows per table
handle_help "$@" <<-EOF
This retrieves a list of tables in the database and their size
EOF
read -r -d '' QUERY <<-EOF
SELECT
n.nspname AS table_schema,
c.relname AS table_name,
c.reltuples AS rows
FROM
pg_class AS c
JOIN pg_namespace AS n ON
n.oid = c.relnamespace
WHERE
c.relkind = 'r'
AND n.nspname
NOT IN (
'information_schema',
'pg_catalog'
)
ORDER BY
c.reltuples DESC
EOF
}
query_dump-users() { ##? [--apikey] [--email] : Dump the list of users and their emails
handle_help "$@" <<-EOF
This retrieves a list of all users
EOF
if [[ -n "$arg_email" ]]; then
email=",$(gdpr_safe email email)"
else
email=""
fi
if [[ -n "$arg_apikey" ]]; then
apikey="apikey"
apikeyjoin="left join api_keys "
else
email=""
fi
read -r -d '' QUERY <<-EOF
SELECT
username
$email
FROM
galaxy_user
ORDER BY
id desc
EOF
}
query_job-metrics() { ## : Retrieves input size, runtime, memory for all executed jobs
handle_help "$@" <<-EOF
Dump runtime stats for ALL jobs:
$ gxadmin query job-metrics
job_id | tool_id | state | total_filesize | num_files | runtime_seconds | slots | memory_bytes | create_time
--------+--------------------------------------+---------+----------------+-----------+-----------------+-----------+--------------+----------------------------
19 | require_format | ok | 5098 | 1 | 4.0000000 | 1.0000000 | | 2018-12-04 17:17:02.148239
48 | __SET_METADATA__ | ok | | 0 | 4.0000000 | 1.0000000 | | 2019-02-05 22:46:33.848141
49 | upload1 | ok | | | 6.0000000 | 1.0000000 | | 2019-02-05 22:58:41.610146
50 | upload1 | ok | | | 5.0000000 | 1.0000000 | | 2019-02-07 21:30:11.645826
51 | upload1 | ok | | | 5.0000000 | 1.0000000 | | 2019-02-07 21:30:12.18259
52 | upload1 | ok | | | 7.0000000 | 1.0000000 | | 2019-02-07 21:31:15.304868
54 | upload1 | ok | | | 5.0000000 | 1.0000000 | | 2019-02-07 21:31:16.116164
53 | upload1 | ok | | | 7.0000000 | 1.0000000 | | 2019-02-07 21:31:15.665948
...
989 | circos | error | 671523 | 12 | 14.0000000 | 1.0000000 | | 2020-04-30 10:13:33.872872
990 | circos | error | 671523 | 12 | 10.0000000 | 1.0000000 | | 2020-04-30 10:19:36.72646
991 | circos | error | 671523 | 12 | 10.0000000 | 1.0000000 | | 2020-04-30 10:21:00.460471
992 | circos | ok | 671523 | 12 | 21.0000000 | 1.0000000 | | 2020-04-30 10:31:35.366913
993 | circos | error | 588747 | 6 | 8.0000000 | 1.0000000 | | 2020-04-30 11:12:17.340591
994 | circos | error | 588747 | 6 | 9.0000000 | 1.0000000 | | 2020-04-30 11:15:27.076502
995 | circos | error | 588747 | 6 | 42.0000000 | 1.0000000 | | 2020-04-30 11:16:41.19449
996 | circos | ok | 588747 | 6 | 48.0000000 | 1.0000000 | | 2020-04-30 11:21:51.49684
997 | circos | ok | 588747 | 6 | 46.0000000 | 1.0000000 | | 2020-04-30 11:23:52.455536
**WARNING**
!> This can be very slow for large databases and there is no tool filtering; every job + dataset table record are scanned.
EOF
read -r -d '' QUERY <<-EOF
WITH dataset_filesizes AS (
SELECT
job_to_input_dataset.job_id, sum(file_size) AS total_filesize,
count(file_size) AS num_files FROM dataset
LEFT JOIN job_to_input_dataset ON dataset.id = job_to_input_dataset.dataset_id
GROUP BY job_to_input_dataset.job_id
)
SELECT
job.id AS job_id,
job.tool_id,
job.state,
dataset_filesizes.total_filesize,
dataset_filesizes.num_files,
jmn1.metric_value AS runtime_seconds,
jmn2.metric_value AS slots,
jmn3.metric_value AS memory_bytes,
job.create_time AS create_time
FROM job
LEFT JOIN dataset_filesizes ON job.id = dataset_filesizes.job_id
LEFT JOIN (SELECT * FROM job_metric_numeric WHERE job_metric_numeric.metric_name = 'runtime_seconds') jmn1 ON jmn1.job_id = job.id
LEFT JOIN (SELECT * FROM job_metric_numeric WHERE job_metric_numeric.metric_name = 'galaxy_slots') jmn2 ON jmn2.job_id = job.id
LEFT JOIN (SELECT * FROM job_metric_numeric WHERE job_metric_numeric.metric_name = 'memory.memsw.max_usage_in_bytes') jmn3 ON jmn3.job_id = job.id
EOF
}
query_history-core-hours() { ##? [history-name-ilike]: Produces the median core hour count for histories matching a name filter
handle_help "$@" <<-EOF
EOF
read -r -d '' QUERY <<-EOF
WITH
toolavg
AS (
SELECT
tool_id, history_id, round(sum(a.metric_value * b.metric_value / 3600), 2) AS cpu_hours
FROM
job_metric_numeric AS a, job_metric_numeric AS b, job
WHERE
b.job_id = a.job_id
AND a.job_id = job.id
AND a.metric_name = 'runtime_seconds'
AND b.metric_name = 'galaxy_slots'
AND history_id in (select id from history where name ilike '%$arg_history_name%')
GROUP BY
tool_id, history_id
),
toolmedian
AS (
SELECT
toolavg.tool_id, percentile_cont(0.5) WITHIN GROUP (ORDER BY cpu_hours)
FROM
toolavg
GROUP BY
toolavg.tool_id
)
SELECT
sum(toolmedian.percentile_cont)
FROM
toolmedian
EOF
}
query_pulsar-gb-transferred() { ##? [--bymonth] [--byrunner] [--human]: Counts up datasets transferred and output file size produced by jobs running on destinations like pulsar_*
handle_help "$@" <<-EOF
EOF
orderby=""
declare -a ordering
if [[ -n "$arg_bymonth" ]]; then
orderby="ORDER BY sent.month ASC"
ordering+=("sent.month")
fi
if [[ -n "$arg_byrunner" ]]; then
if [[ ! -n "$arg_bymonth" ]]; then
orderby="ORDER BY sent.runner ASC"
fi
ordering+=("sent.runner")
fi
if [[ -n "$arg_human" ]]; then
pg_size_pretty_a="pg_size_pretty("
pg_size_pretty_b=")"
fi
groupby=""
data_string="${ordering[*]}"
csvcols="${data_string//${IFS:0:1}/,}"
if (( ${#ordering[@]} > 0 )); then
groupby="GROUP BY $csvcols"
csvcols="$csvcols,"
fi
read -r -d '' QUERY <<-EOF
WITH
sent
AS (
SELECT
job.id AS job,
date_trunc('month', job.create_time)::DATE AS month,
job.job_runner_name AS runner,
ds_in.total_size AS size
FROM
job
LEFT JOIN job_to_input_dataset AS jtid ON job.id = jtid.job_id
LEFT JOIN history_dataset_association AS hda_in ON jtid.dataset_id = hda_in.id
LEFT JOIN dataset AS ds_in ON hda_in.dataset_id = ds_in.id
WHERE
job_runner_name LIKE 'pulsar%'
ORDER BY
job.id DESC
),
recv
AS (
SELECT
job.id AS job,
date_trunc('month', job.create_time)::DATE AS month,
job.job_runner_name AS runner,
ds_out.total_size AS size
FROM
job
LEFT JOIN job_to_output_dataset AS jtid ON job.id = jtid.job_id
LEFT JOIN history_dataset_association AS hda_out ON jtid.dataset_id = hda_out.id
LEFT JOIN dataset AS ds_out ON hda_out.dataset_id = ds_out.id
WHERE
job_runner_name LIKE 'pulsar%'
ORDER BY
job.id DESC
)
SELECT
$csvcols ${pg_size_pretty_a}sum(sent.size)${pg_size_pretty_b} AS sent, ${pg_size_pretty_a}sum(recv.size)${pg_size_pretty_b} AS recv, count(sent.size) as job_count
FROM
sent FULL JOIN recv ON sent.job = recv.job
$groupby
$orderby
EOF
}
registered_subcommands="$registered_subcommands filter"
_filter_short_help="Some text filtering and processing commands"
filter_pg2md() { ## : Convert postgres table format outputs to something that can be pasted as markdown
handle_help "$@" <<-EOF
Imagine doing something like:
$ gxadmin query active-users 2018 | gxadmin filter pg2md
unique_users | month
------------- | --------------------
811 | 2018-12-01 00:00:00
658 | 2018-11-01 00:00:00
583 | 2018-10-01 00:00:00
444 | 2018-09-01 00:00:00
342 | 2018-08-01 00:00:00
379 | 2018-07-01 00:00:00
370 | 2018-06-01 00:00:00
330 | 2018-05-01 00:00:00
274 | 2018-04-01 00:00:00
186 | 2018-03-01 00:00:00
168 | 2018-02-01 00:00:00
122 | 2018-01-01 00:00:00
and it should produce a nicely formatted table
EOF
cat | sed 's/--+--/- | -/g;s/^\(\s\+\)\([^|]\+\) |/\2 \1|/g' | head -n -2
}
filter_identicon(){ ## : Convert an input data stream into an identicon (e.g. with hostname)
handle_help "$@" <<-EOF
Given an input data stream, digest it, and colour it using the same logic as digest-color
$ echo test | ./gxadmin filter identicon
██████
██ ██
██ ██ ██
██████
██ ██ ██
(Imagine that it is a nice pink/blue colour scheme)
EOF
cat | $GXADMIN_PYTHON -c "$identicon_script"
}
filter_digest-color() { ## : Color an input stream based on the contents (e.g. hostname)
handle_help "$@" <<-EOF
Colors entire input stream based on digest of entire input's contents.
Mostly useful for colouring a hostname or some similar value.
$ echo test | ./gxadmin filter digest-color
test
(Imagine that it is light blue text on a pink background)
**NOTE** If the output isn't coloured properly, try:
export TERM=screen-256color
EOF
data="$(cat)"
fg_color=$((16#$(echo "$data" | perl -pe "chomp if eof" | sha256sum | cut -c1-2)))
bg_color=$((fg_color + 15))
echo "$(tput setaf $fg_color)$(tput setab $bg_color)${data}$(tput sgr0)"
}
registered_subcommands="$registered_subcommands uwsgi"
_uwsgi_short_help="Galaxy Handler/Zergling management (SystemD only)"
uwsgi_stats-influx(){ ## <addr>: InfluxDB formatted output for the current stats
handle_help "$@" <<-EOF
Contact a specific uWSGI stats address (requires uwsgi binary on path)
and requests the current stats + formats them for InfluxDB. For some
reason it has trouble with localhost vs IP address, so recommend that
you use IP.
$ gxadmin uwsgi stats-influx 127.0.0.1:9191
uwsgi.locks,addr=127.0.0.1:9191,group=user_0 count=0
uwsgi.locks,addr=127.0.0.1:9191,group=signal count=0
uwsgi.locks,addr=127.0.0.1:9191,group=filemon count=0
uwsgi.locks,addr=127.0.0.1:9191,group=timer count=0
uwsgi.locks,addr=127.0.0.1:9191,group=rbtimer count=0
uwsgi.locks,addr=127.0.0.1:9191,group=cron count=0
uwsgi.locks,addr=127.0.0.1:9191,group=thunder count=2006859
uwsgi.locks,addr=127.0.0.1:9191,group=rpc count=0
uwsgi.locks,addr=127.0.0.1:9191,group=snmp count=0
uwsgi.general,addr=127.0.0.1:9191 listen_queue=0,listen_queue_errors=0,load=0,signal_queue=0
uwsgi.sockets,addr=127.0.0.1:9191,name=127.0.0.1:4001,proto=uwsgi queue=0,max_queue=100,shared=0,can_offload=0
uwsgi.worker,addr=127.0.0.1:9191,id=1 accepting=1,requests=65312,exceptions=526,harakiri_count=26,signals=0,signal_queue=0,status="idle",rss=0,vsz=0,running_time=17433008661,respawn_count=27,tx=15850829410,avg_rt=71724
uwsgi.worker,addr=127.0.0.1:9191,id=2 accepting=1,requests=67495,exceptions=472,harakiri_count=51,signals=0,signal_queue=0,status="idle",rss=0,vsz=0,running_time=15467746010,respawn_count=52,tx=15830867066,avg_rt=65380
uwsgi.worker,addr=127.0.0.1:9191,id=3 accepting=1,requests=67270,exceptions=520,harakiri_count=35,signals=0,signal_queue=0,status="idle",rss=0,vsz=0,running_time=14162158015,respawn_count=36,tx=15799661545,avg_rt=73366
uwsgi.worker,addr=127.0.0.1:9191,id=4 accepting=1,requests=66434,exceptions=540,harakiri_count=34,signals=0,signal_queue=0,status="idle",rss=0,vsz=0,running_time=15740205807,respawn_count=35,tx=16231969649,avg_rt=75468
uwsgi.worker,addr=127.0.0.1:9191,id=5 accepting=1,requests=67021,exceptions=534,harakiri_count=38,signals=0,signal_queue=0,status="idle",rss=0,vsz=0,running_time=14573155758,respawn_count=39,tx=16517287963,avg_rt=140855
uwsgi.worker,addr=127.0.0.1:9191,id=6 accepting=1,requests=66810,exceptions=483,harakiri_count=24,signals=0,signal_queue=0,status="idle",rss=0,vsz=0,running_time=19107513635,respawn_count=25,tx=15945313469,avg_rt=64032
uwsgi.worker,addr=127.0.0.1:9191,id=7 accepting=1,requests=66544,exceptions=460,harakiri_count=35,signals=0,signal_queue=0,status="idle",rss=0,vsz=0,running_time=14240478391,respawn_count=36,tx=15499531841,avg_rt=114981
uwsgi.worker,addr=127.0.0.1:9191,id=8 accepting=1,requests=67577,exceptions=517,harakiri_count=35,signals=0,signal_queue=0,status="idle",rss=0,vsz=0,running_time=14767971195,respawn_count=36,tx=15780639229,avg_rt=201275
For multiple zerglings you can run this for each and just 2>/dev/null
PATH=/opt/galaxy/venv/bin:/sbin:/bin:/usr/sbin:/usr/bin gxadmin uwsgi stats-influx 127.0.0.1:9190 2>/dev/null
PATH=/opt/galaxy/venv/bin:/sbin:/bin:/usr/sbin:/usr/bin gxadmin uwsgi stats-influx 127.0.0.1:9191 2>/dev/null
exit 0
And it will fetch only data for responding uwsgis.
EOF
address="$1"; shift
# fetch data
uwsgi=$(which uwsgi)
data="$($uwsgi --connect-and-read "$address" 2>&1)"
echo "$data" | \
jq -r '.locks[] | to_entries[] | "uwsgi.locks,addr='"$address"',group=\(.key) count=\(.value)"' | \
sed 's/group=user 0/group=user_0/g'
echo "$data" | \
jq -r '. | "uwsgi.general,addr='"$address"' listen_queue=\(.listen_queue),listen_queue_errors=\(.listen_queue_errors),load=\(.load),signal_queue=\(.signal_queue)"'
echo "$data" | \
jq -r '.sockets[] | "uwsgi.sockets,addr='"$address"',name=\(.name),proto=\(.proto) queue=\(.queue),max_queue=\(.max_queue),shared=\(.shared),can_offload=\(.can_offload)"'
echo "$data" | \
jq -r '.workers[] | "uwsgi.worker,addr='"$address"',id=\(.id) accepting=\(.accepting),requests=\(.requests),exceptions=\(.exceptions),harakiri_count=\(.harakiri_count),signals=\(.signals),signal_queue=\(.signal_queue),status=\"\(.status)\",rss=\(.rss),vsz=\(.vsz),running_time=\(.running_time),respawn_count=\(.respawn_count),tx=\(.tx),avg_rt=\(.avg_rt)"' | \
sed 's/"busy"/1/g;s/"idle"/0/g;'
}
uwsgi_status() { ## : Current system status
handle_help "$@" <<-EOF
Current status of all uwsgi processes
EOF
echo "galaxy-zergpool: $(systemctl status galaxy-zergpool | grep Active:)"
for folder in $(find /sys/fs/cgroup/memory/system.slice/system-galaxy\\x2dzergling.slice/ -mindepth 1 -type d -name 'galaxy-zergling*service'); do
service=$(basename "$folder")
echo "$service $(systemctl status $service | grep Active:)"
done
for folder in $(find /sys/fs/cgroup/memory/system.slice/system-galaxy\\x2dhandler.slice/ -mindepth 1 -type d -name 'galaxy-handler*service'); do
service=$(basename "$folder")
echo "$service $(systemctl status $service | grep Active:)"
done
for folder in $(find /sys/fs/cgroup/memory/system.slice/system-galaxy\\x2dworkflow\\x2dscheduler.slice/ -mindepth 1 -type d -name 'galaxy-workflow*service'); do
service=$(basename "$folder")
echo "$service $(systemctl status $service | grep Active:)"
done
}
uwsgi_memory() { ## : Current system memory usage
handle_help "$@" <<-EOF
Obtain memory usage of the various Galaxy processes
Also consider using systemd-cgtop
EOF
echo "galaxy-zergpool.service $(cat /sys/fs/cgroup/memory/system.slice/galaxy-zergpool.service/memory.memsw.usage_in_bytes)"
for folder in $(find /sys/fs/cgroup/memory/system.slice/system-galaxy\\x2dzergling.slice/ -mindepth 1 -type d -name 'galaxy-zergling*service'); do
service=$(basename "$folder")
echo "$service $(cat "$folder/memory.memsw.usage_in_bytes")"
done
for folder in $(find /sys/fs/cgroup/memory/system.slice/system-galaxy\\x2dhandler.slice/ -mindepth 1 -type d -name 'galaxy-handler*service'); do
service=$(basename "$folder")
echo "$service $(cat "$folder/memory.memsw.usage_in_bytes")"
done
for folder in $(find /sys/fs/cgroup/memory/system.slice/system-galaxy\\x2dworkflow\\x2dscheduler.slice/ -mindepth 1 -type d -name 'galaxy-workflow*service'); do
service=$(basename "$folder")
echo "$service $(cat "$folder/memory.memsw.usage_in_bytes")"
done
}
uwsgi_pids() { ## : Galaxy process PIDs
handle_help "$@" <<-EOF
Obtain memory usage of the various Galaxy processes
EOF
echo "galaxy-zergpool: $(systemctl status galaxy-zergpool | grep PID: | cut -d' ' -f 4)"
for i in {0..3}; do
echo "galaxy-zergling@$i: $(systemctl status galaxy-zergling@$i | grep PID: | cut -d' ' -f 4)"
done
for i in {0..11}; do
echo "galaxy-handler@$i: $(systemctl status galaxy-handler@$i | grep PID: | cut -d' ' -f 4)"
done
}
uwsgi_stats() { ## : uwsgi stats
handle_help "$@" <<-EOF
EOF
uwsgi_stats-influx 127.0.0.1:4010 2>/dev/null || true
uwsgi_stats-influx 127.0.0.1:4011 2>/dev/null || true
uwsgi_stats-influx 127.0.0.1:4012 2>/dev/null || true
uwsgi_stats-influx 127.0.0.1:4013 2>/dev/null || true
echo "systemd service=galaxy-zergpool memory=$(cat /sys/fs/cgroup/memory/system.slice/galaxy-zergpool.service/memory.memsw.usage_in_bytes)"
for folder in $(find /sys/fs/cgroup/memory/system.slice/system-galaxy\\x2dzergling.slice/ -mindepth 1 -type d -name 'galaxy-zergling*service'); do
service=$(basename "$folder")
echo "systemd service=$service memory=$(cat "$folder/memory.memsw.usage_in_bytes")"
done
for folder in $(find /sys/fs/cgroup/memory/system.slice/system-galaxy\\x2dhandler.slice/ -mindepth 1 -type d -name 'galaxy-handler*service'); do
service=$(basename "$folder")
echo "systemd service=$service memory=$(cat "$folder/memory.memsw.usage_in_bytes")"
done
}
uwsgi_zerg-swap() { ## : Swap zerglings in order (unintelligent version)
handle_help "$@" <<-EOF
This is the "dumb" version which loops across the zerglings and restarts them in series
EOF
active="$(uwsgi_status | grep -v inactive | grep zergling | cut -d: -f1 | wc -l)"
if (( active == 1 )); then
error "Won't restart when only a single zergling is active as this will trigger downtime"
exit 1
fi
# Obtain current running ones
for zergling in $(uwsgi_status | grep -v inactive | grep zergling | cut -d: -f1); do
echo "Restarting $zergling"
systemctl restart "$zergling"
number=$(echo "$zergling" | sed 's/.*@//g')
stats="127.0.0.1:401$number"
wait_for_url "$stats"
success "$zergling is alive"
done
}
uwsgi_zerg-scale-up() { ## : Add another zergling to deal with high load
handle_help "$@" <<-EOF
EOF
# Obtain current running ones
an_inactive=$(status | grep inactive | grep zergling | head -n1 | cut -d: -f1)
echo "$an_inactive is inactive, starting"
systemctl start "$an_inactive"
}
#uwsgi_zerg-scale-down() { ## : Remove an extraneous zergling
#handle_help "$@" <<-EOF
#EOF
## Obtain current running ones
#active="$(status | grep -v inactive | grep zergling | cut -d: -f1)"
#num_active=$(echo "$active" | wc -l)
#if (( num_active <= 2 )); then
#error "We do not permit scaling below 2 zerglings"
#exit 1
#fi
## Kill highest numbered one
#warning "$to_kill will be stopped"
#systemctl stop "$to_kill"
#}
uwsgi_zerg-strace() { ## [number]: Strace a zergling
handle_help "$@" <<-EOF
EOF
if (( $# > 0 )); then
number=$1;
fi
procs=$(uwsgi_pids | grep "zergling@${number}" | cut -d':' -f2 | sed 's/\s*//g;' | tr '\n' ' ' | sed 's/^\s*//;s/\s*$//g;s/ / -p /g')
strace -e open,openat -p "$procs"
}
uwsgi_handler-strace() { ## [number]: Strace a handler
handle_help "$@" <<-EOF
EOF
if (( $# > 0 )); then
number=$1;
fi
procs=$(uwsgi_pids | grep "handler@${number}" | cut -d':' -f2 | sed 's/\s*//g;' | tr '\n' ' ' | sed 's/^\s*//;s/\s*$//g;s/ / -p /g')
strace -e open,openat -p "$procs"
}
uwsgi_handler-restart() { ## : Restart all handlers
handle_help "$@" <<-EOF
EOF
for i in {0..11}; do
systemctl restart galaxy-handler@$i &
done
}
uwsgi_lastlog(){ ## : Fetch the number of seconds since the last log message was written
handle_help "$@" <<-EOF
Lets you know if any of your workers or handlers have maybe stopped processing jobs.
$ gxadmin uwsgi lastlog
journalctl.lastlog,service=galaxy-handler@0 seconds=8
journalctl.lastlog,service=galaxy-handler@1 seconds=2
journalctl.lastlog,service=galaxy-handler@2 seconds=186
journalctl.lastlog,service=galaxy-handler@3 seconds=19
journalctl.lastlog,service=galaxy-handler@4 seconds=6
journalctl.lastlog,service=galaxy-handler@5 seconds=80
journalctl.lastlog,service=galaxy-handler@6 seconds=52
journalctl.lastlog,service=galaxy-handler@7 seconds=1
journalctl.lastlog,service=galaxy-handler@8 seconds=79
journalctl.lastlog,service=galaxy-handler@9 seconds=40
journalctl.lastlog,service=galaxy-handler@10 seconds=123
journalctl.lastlog,service=galaxy-handler@11 seconds=13
journalctl.lastlog,service=galaxy-zergling@0 seconds=0
journalctl.lastlog,service=galaxy-zergling@1 seconds=0
journalctl.lastlog,service=galaxy-zergling@2 seconds=2866
EOF
NOW=$(date +%s)
for i in {0..11}; do
lines=$(journalctl -u galaxy-handler@$i -n 1 --no-pager)
if [[ ! $lines == *"No entries"* ]]; then
timestamp=$(journalctl -u galaxy-handler@$i -n 1 --no-pager | grep -v 'Logs begin' | awk '{print $1" "$2" "$3}');
unix=$(date -d "$timestamp" +%s)
date_diff=$((NOW - unix));
echo "journalctl.lastlog,service=galaxy-handler@$i seconds=$date_diff";
fi
done
for i in {0..4}; do
lines=$(journalctl -u galaxy-zergling@$i -n 1 --no-pager)
if [[ ! $lines == *"No entries"* ]]; then
timestamp=$(journalctl -u galaxy-zergling@$i -n 1 --no-pager | grep -v 'Logs begin' | awk '{print $1" "$2" "$3}');
unix=$(date -d "$timestamp" +%s)
date_diff=$((NOW - unix));
echo "journalctl.lastlog,service=galaxy-zergling@$i seconds=$date_diff";
fi
done
}
uwsgi_active-users() { ## : Count active users
handle_help "$@" <<-EOF
Count active users and return an influx compatible measurement
EOF
users=$(journalctl -u galaxy-zerg* --since '10 minutes ago' | grep pid: | awk '{print $10}' | sort -u | wc -l)
echo "journalctl.activeusers,service=galaxy count=${users}"
}
registered_subcommands="$registered_subcommands galaxy"
_galaxy_short_help="Galaxy Administration"
galaxy_cleanup() { ## [days]: Cleanup histories/hdas/etc for past N days (default=30)
handle_help "$@" <<-EOF
Cleanup histories/hdas/etc for past N days using the python objects-based method
EOF
days=30
if (( $# > 0 )); then
days=$1
fi
assert_set_env GALAXY_ROOT
assert_set_env GALAXY_LOG_DIR
mkdir -p $GALAXY_LOG_DIR
run_date=$(date --rfc-3339=seconds)
for action in {delete_userless_histories,delete_exported_histories,purge_deleted_users,purge_deleted_histories,purge_deleted_hdas,purge_historyless_hdas,purge_hdas_of_purged_histories,delete_datasets,purge_datasets}; do
start_time=$(date +%s)
cd "$GALAXY_ROOT";
$GXADMIN_PYTHON \
"$GALAXY_ROOT/scripts/cleanup_datasets/pgcleanup.py" \
-U \
-o "$days" \
-l "$GALAXY_LOG_DIR" \
-s $action \
-w 128MB \
>> "$GALAXY_LOG_DIR/cleanup-${run_date}-${action}.log" \
2>> "$GALAXY_LOG_DIR/cleanup-${run_date}-${action}.err";
finish_time=$(date +%s)
runtime=$(( finish_time - start_time ))
# Something that telegraf can consume
ec=$?
if (( ec == 0 )); then
echo "cleanup_datasets,group=$action success=1,runtime=$runtime"
else
echo "cleanup_datasets,group=$action success=0,runtime=$runtime"
fi
done
}
galaxy_encode() { ## <encoded-id>: Encode an ID
handle_help "$@" <<-EOF
Encode an unecoded ID
$ GALAXY_CONFIG_FILE=/srv/galaxy/config/galaxy.yml GALAXY_ROOT=/srv/galaxy/server gxadmin galaxy encode 123
6fe4eea8c591a9c4
EOF
assert_set_env VIRTUAL_ENV
assert_set_env GALAXY_ROOT
assert_set_env GALAXY_CONFIG_FILE
assert_count $# 1 "Missing ID"
python "$GALAXY_ROOT/scripts/secret_decoder_ring.py" -c "$GALAXY_CONFIG_FILE" encode "$1"
}
galaxy_decode() { ## <encoded-id>: Decode an encoded ID
handle_help "$@" <<-EOF
Encode an unecoded ID
$ GALAXY_CONFIG_FILE=/srv/galaxy/config/galaxy.yml GALAXY_ROOT=/srv/galaxy/server gxadmin galaxy decode 6fe4eea8c591a9c4
123
EOF
assert_set_env VIRTUAL_ENV
assert_set_env GALAXY_ROOT
assert_set_env GALAXY_CONFIG_FILE
assert_count $# 1 "Missing ID"
python "$GALAXY_ROOT/scripts/secret_decoder_ring.py" -c "$GALAXY_CONFIG_FILE" decode "$1"
}
galaxy_migrate-tool-install-to-sqlite() { ## : Converts normal potsgres toolshed repository tables into the SQLite version
handle_help "$@" <<-EOF
$ gxadmin migrate-tool-install-to-sqlite
Creating new sqlite database: galaxy_install.sqlite
Migrating tables
export: tool_shed_repository
import: tool_shed_repository
...
export: repository_repository_dependency_association
import: repository_repository_dependency_association
Complete
EOF
# Export tables
if [[ -f galaxy_install.sqlite ]]; then
error "galaxy_install.sqlite exists, not overwriting"
exit 1
fi
success "Creating new sqlite database: galaxy_install.sqlite"
empty_schema=$(mktemp)
echo "
PRAGMA foreign_keys=OFF;
BEGIN TRANSACTION;
CREATE TABLE migrate_version (
repository_id VARCHAR(250) NOT NULL,
repository_path TEXT,
version INTEGER,
PRIMARY KEY (repository_id)
);
CREATE TABLE tool_shed_repository (
id INTEGER NOT NULL,
create_time DATETIME,
update_time DATETIME,
tool_shed VARCHAR(255),
name VARCHAR(255),
description TEXT,
owner VARCHAR(255),
changeset_revision VARCHAR(255),
deleted BOOLEAN,
metadata BLOB,
includes_datatypes BOOLEAN,
installed_changeset_revision VARCHAR(255),
uninstalled BOOLEAN,
dist_to_shed BOOLEAN,
ctx_rev VARCHAR(10),
status VARCHAR(255),
error_message TEXT,
tool_shed_status BLOB,
PRIMARY KEY (id),
CHECK (deleted IN (0, 1))
);
CREATE TABLE tool_version (
id INTEGER NOT NULL,
create_time DATETIME,
update_time DATETIME,
tool_id VARCHAR(255),
tool_shed_repository_id INTEGER,
PRIMARY KEY (id),
FOREIGN KEY(tool_shed_repository_id) REFERENCES tool_shed_repository (id)
);
CREATE TABLE tool_version_association (
id INTEGER NOT NULL,
tool_id INTEGER NOT NULL,
parent_id INTEGER NOT NULL,
PRIMARY KEY (id),
FOREIGN KEY(tool_id) REFERENCES tool_version (id),
FOREIGN KEY(parent_id) REFERENCES tool_version (id)
);
CREATE TABLE migrate_tools (
repository_id VARCHAR(255),
repository_path TEXT,
version INTEGER
);
CREATE TABLE tool_dependency (
id INTEGER NOT NULL,
create_time DATETIME,
update_time DATETIME,
tool_shed_repository_id INTEGER NOT NULL,
name VARCHAR(255),
version VARCHAR(40),
type VARCHAR(40),
status VARCHAR(255),
error_message TEXT,
PRIMARY KEY (id),
FOREIGN KEY(tool_shed_repository_id) REFERENCES tool_shed_repository (id)
);
CREATE TABLE repository_dependency (
id INTEGER NOT NULL,
create_time DATETIME,
update_time DATETIME,
tool_shed_repository_id INTEGER NOT NULL,
PRIMARY KEY (id),
FOREIGN KEY(tool_shed_repository_id) REFERENCES tool_shed_repository (id)
);
CREATE TABLE repository_repository_dependency_association (
id INTEGER NOT NULL,
create_time DATETIME,
update_time DATETIME,
tool_shed_repository_id INTEGER,
repository_dependency_id INTEGER,
PRIMARY KEY (id),
FOREIGN KEY(tool_shed_repository_id) REFERENCES tool_shed_repository (id),
FOREIGN KEY(repository_dependency_id) REFERENCES repository_dependency (id)
);
CREATE INDEX ix_tool_shed_repository_name ON tool_shed_repository (name);
CREATE INDEX ix_tool_shed_repository_deleted ON tool_shed_repository (deleted);
CREATE INDEX ix_tool_shed_repository_tool_shed ON tool_shed_repository (tool_shed);
CREATE INDEX ix_tool_shed_repository_changeset_revision ON tool_shed_repository (changeset_revision);
CREATE INDEX ix_tool_shed_repository_owner ON tool_shed_repository (owner);
CREATE INDEX ix_tool_shed_repository_includes_datatypes ON tool_shed_repository (includes_datatypes);
CREATE INDEX ix_tool_version_tool_shed_repository_id ON tool_version (tool_shed_repository_id);
CREATE INDEX ix_tool_version_association_tool_id ON tool_version_association (tool_id);
CREATE INDEX ix_tool_version_association_parent_id ON tool_version_association (parent_id);
CREATE INDEX ix_tool_dependency_tool_shed_repository_id ON tool_dependency (tool_shed_repository_id);
CREATE INDEX ix_repository_dependency_tool_shed_repository_id ON repository_dependency (tool_shed_repository_id);
CREATE INDEX ix_repository_repository_dependency_association_tool_shed_repository_id ON repository_repository_dependency_association (tool_shed_repository_id);
CREATE INDEX ix_repository_repository_dependency_association_repository_dependency_id ON repository_repository_dependency_association (repository_dependency_id);
COMMIT;
" > "${empty_schema}"
sqlite3 galaxy_install.sqlite < "${empty_schema}"
rm "${empty_schema}"
success "Migrating tables"
# tool_shed_repository is special :(
table=tool_shed_repository
success " export: ${table}"
export_csv=$(mktemp /tmp/tmp.gxadmin.${table}.XXXXXXXXXXX)
psql -c "COPY (select
id, create_time, update_time, tool_shed, name, description, owner, changeset_revision, case when deleted then 1 else 0 end, metadata, includes_datatypes, installed_changeset_revision, uninstalled, dist_to_shed, ctx_rev, status, error_message, tool_shed_status from $table) to STDOUT with CSV" > "$export_csv";
success " import: ${table}"
echo ".mode csv
.import ${export_csv} ${table}" | sqlite3 galaxy_install.sqlite
ec=$?
if (( ec == 0 )); then
rm "${export_csv}";
else
error " sql: ${export_csv}"
fi
sqlite3 galaxy_install.sqlite "insert into migrate_version values ('ToolShedInstall', 'lib/galaxy/model/tool_shed_install/migrate', 17)"
# the rest are sane!
for table in {tool_version,tool_version_association,migrate_tools,tool_dependency,repository_dependency,repository_repository_dependency_association}; do
success " export: ${table}"
export_csv=$(mktemp /tmp/tmp.gxadmin.${table}.XXXXXXXXXXX)
psql -c "COPY (select * from $table) to STDOUT with CSV" > "$export_csv";
success " import: ${table}"
echo ".mode csv
.import ${export_csv} ${table}" | sqlite3 galaxy_install.sqlite
ec=$?
if (( ec == 0 )); then
rm "${export_csv}"
else
error " sql: ${export_csv}"
fi
done
success "Complete"
}
galaxy_migrate-tool-install-from-sqlite() { ## [sqlite-db]: Converts SQLite version into normal potsgres toolshed repository tables
handle_help "$@" <<-EOF
$ gxadmin migrate-tool-install-from-sqlite db.sqlite
Migrating tables
export: tool_shed_repository
import: tool_shed_repository
...
export: repository_repository_dependency_association
import: repository_repository_dependency_association
Complete
EOF
assert_count $# 1 "Must provide database"
success "Migrating tables"
# Truncate first, since need to be removed in a specific ordering (probably
# cascade would work but cascade is SCARY)
psql -c "TRUNCATE TABLE repository_repository_dependency_association, repository_dependency, tool_dependency, migrate_tools, tool_version_association, tool_version, tool_shed_repository"
ec1=$?
# If you truncate this one, then it also removes galaxy codebase version,
# breaking everything.
psql -c "delete from migrate_version where repository_id = 'ToolShedInstall'"
ec2=$?
if (( ec1 == 0 && ec2 == 0 )); then
success " Cleaned"
else
error " Failed to clean"
fi
# Then load data in same 'safe' order as in sqlite version
for table in {migrate_version,tool_shed_repository,tool_version,tool_version_association,migrate_tools,tool_dependency,repository_dependency,repository_repository_dependency_association}; do
success " export: ${table}"
export_csv=$(mktemp /tmp/tmp.gxadmin.${table}.XXXXXXXXXXX)
if [[ "$table" == "tool_shed_repository" ]]; then
# Might have json instead of hexencoded json
sqlite3 -csv "$1" "select * from $table" | python -c "$hexencodefield9" > "$export_csv";
elif [[ "$table" == "tool_version" ]]; then
# Might have null values quoted as empty string
sqlite3 -csv "$1" "select * from $table" | sed 's/""$//' > "$export_csv";
else
sqlite3 -csv "$1" "select * from $table" > "$export_csv";
fi
psql -c "COPY $table FROM STDIN with CSV" < "$export_csv";
ec=$?
if (( ec == 0 )); then
rm "${export_csv}"
success " import: ${table}"
else
error " csv: ${export_csv}"
break
fi
done
# Update sequences
success "Updating sequences"
for table in {tool_shed_repository,tool_version,tool_version_association,tool_dependency,repository_dependency,repository_repository_dependency_association}; do
psql -c "SELECT setval('${table}_id_seq', (SELECT MAX(id) FROM ${table}));"
done
success "Comparing table counts"
for table in {migrate_version,tool_shed_repository,tool_version,tool_version_association,migrate_tools,tool_dependency,repository_dependency,repository_repository_dependency_association}; do
postgres=$(psql -c "COPY (select count(*) from $table) to STDOUT with CSV")
sqlite=$(sqlite3 -csv "$1" "select count(*) from $table")
if (( postgres == sqlite )); then
success " $table: $postgres == $sqlite"
else
error " $table: $postgres != $sqlite"
fi
done
success "Complete"
}
galaxy_amqp-test() { ## <amqp_url>: Test a given AMQP URL for connectivity
handle_help "$@" <<-EOF
**Note**: must be run in Galaxy Virtualenv
Simple script to test an AMQP URL. If connection works, it will
immediately exit with a python object:
$ gxadmin galaxy amqp-test pyamqp://user:pass@host:port/vhost
<kombu.transport.pyamqp.Channel object at 0x7fe56a836290>
$ gxadmin galaxy amqp-test pyamqp://user:pass@host:port/vhost?ssl=1
<kombu.transport.pyamqp.Channel object at 0x7fe56a836290>
Some errors look weird:
*wrong password*:
$ gxadmin galaxy amqp-test ...
Traceback
...
amqp.exceptions.AccessRefused: (0, 0): (403) ACCESS_REFUSED - Login was refused using authentication mechanism AMQPLAIN. For details see the broker logfile.
*wrong host*, *inaccessible host*, basically any other problem:
$ gxadmin galaxy amqp-test ...
[hangs forever]
Basically any error results in a hang forever. It is recommended you run it with a timeout:
$ timeout 1 gxadmin galaxy amqp-test
$
EOF
assert_count $# 1 "Must provide URL"
URL="$1"
script=$(cat <<EOF
from kombu import Connection
from kombu import Exchange
with Connection('$URL') as conn:
print(conn.default_channel)
EOF
)
python -c "$script"
}
galaxy_cleanup-jwd() { ## <working_dir> [1|months ago]: (NEW) Cleanup job working directories
handle_help "$@" <<-EOF
Scans through a provided job working directory subfolder, e.g.
job_working_directory/ without the 005 subdir to find all folders which
were changed less recently than N months.
Then it takes the first 1000 entries and cleans them up. This was more
of a hack to handle the fact that the list produced by find is really
long, and the for loop hangs until it's done generating the list.
EOF
assert_count_ge $# 1 "Must supply at least working dir"
jwd=$1
months=${2:-1}
# scan a given directory for jwds.
for possible_dir in $(find "$jwd" -maxdepth 3 -mindepth 3 -not -newermt "$months month ago" | grep -v _cleared_contents | head -n 1000); do
job_id=$(basename "$possible_dir")
if [[ "$job_id" =~ ^[0-9]{3,}$ ]]; then
state=$(psql -c "COPY (select state from job where id = $job_id) to STDOUT with CSV")
if [[ "$state" == "error" ]] || [[ "$state" == "ok" ]] || [[ "$state" == "deleted" ]] || [[ "$state" == "paused" ]] || [[ "$state" == "new_manually_dropped" ]]; then
echo "- $possible_dir $job_id $state"
rm -rf "$possible_dir"
else
echo "? $possible_dir $job_id $state"
fi
fi
done
}
galaxy_fix-conda-env() { ## <conda_dir/envs/>: Fix broken conda environments
handle_help "$@" <<-EOF
Fixes any broken conda environments which are missing the activate scripts in their correct locations.
MUST end in /envs/
EOF
conda_dir=$(dirname $1)
for i in $(find ${conda_dir}/envs/ -mindepth 1 -maxdepth 1 -type d);
do
if [ ! -f $i/bin/conda ]; then
${conda_dir}/bin/conda ..checkenv bash $i
fi
done
}
galaxy_fav_tools() { ## : Favourite tools in Galaxy DB
handle_help "$@" <<-EOF
What are people's fav tools
EOF
psql -qAt -c "select value::json->'tools' from user_preference where name = 'favorites'" | jq -s add | jq '. | @tsv' -r | sed 's/\t/\n/g' | sort | uniq -c | sort -n
}
galaxy_ie-list() { ## : List GIEs
handle_help "$@" <<-EOF
List running IEs (based on output of queue-detail)
$ gxadmin local ie-list
running 6134209 jupyter_notebook helena-rasche
EOF
gxadmin query queue-detail | grep interactive | awk -F'|' '{print $1" "$2" "$4" "$5}' | sed 's/interactive_tool_//g' | column -t
}
galaxy_ie-show() { ## [gie-galaxy-job-id]: Report on a GIE [HTCondor Only!]
handle_help "$@" <<-EOF
The new versions of IEs are IMPOSSIBLE to track down, so here's a handy
function for you to make you hate life a lil bit less.
$ gxadmin local ie-show 6134209
Galaxy ID: 6134209
Condor ID: 1489026
Container: jupyter_notebook
Running on: cn032.bi.uni-freiburg.de
Job script: /data/dnb01/galaxy_db/pbs/galaxy_6134209.sh
Working dir: /data/dnb02/galaxy_db/job_working_directory/006/134/6134209
Runtime:
{
8888: {
host: 132.230.68.65,
port: 1035,
protocol: tcp,
tool_port: 8888
}
}
Mapping
Key: aaaabbbbccccdddd
KeyType: interactivetoolentrypoint
Token: 00001111222233334444555566667777
URL: https://aaaabbbbccccdddd-00001111222233334444555566667777.interactivetoolentrypoint.interactivetool.usegalaxy.eu
It's probably only going to work for us? Sorry :)
EOF
assert_set_env GALAXY_CONFIG_FILE
id=$1
cluster_id=$(gxadmin jsonquery queue-detail | jq ".[] | select(.id == $1) | .extid" -r)
cluster_id_c=$(echo -n "$cluster_id" | wc -c)
echo "Galaxy ID: $id"
echo "Condor ID: $cluster_id"
if (( cluster_id_c == 0 )); then
exit 1
fi
container=$(condor_q $cluster_id -autoformat JobDescription | sed 's/interactive_tool_//g')
echo "Container: $container"
running_on=$(condor_q $cluster_id -autoformat RemoteHost | sed 's/.*@//g')
echo "Running on: $running_on"
job_script=$(condor_q $cluster_id -autoformat Cmd)
echo "Job script: $job_script"
working_dir=$(grep ^cd $job_script | grep working_directory | sed 's/^cd //g')
echo "Working dir: $working_dir"
echo "Runtime:"
cat $working_dir/container_runtime.json | jq -S
port=$(cat $working_dir/container_runtime.json | jq .[].port -r)
host=$(cat $working_dir/container_runtime.json | jq .[].host -r)
echo
echo "Mapping"
token=$(gxadmin tsvquery q "select token from interactivetool_entry_point where job_id = $id")
data_dir=$(cat "$GALAXY_CONFIG_FILE" | grep interactivetools_map |cut -f2 -d'='| tr -d " ")
key=$(sqlite3 $data_dir 'select key from gxitproxy where token="'$token'"')
key_type=$(sqlite3 $data_dir 'select key_type from gxitproxy where token="'$token'"')
token=$(sqlite3 $data_dir 'select token from gxitproxy where token="'$token'"')
echo "Key: $key"
echo "KeyType: $key_type"
echo "Token: $token"
echo "URL: https://${key}-${token}.${key_type}.interactivetool.usegalaxy.eu"
}
registered_subcommands="$registered_subcommands report"
_report_short_help="Various (rich) reports. Consider https://github.com/ttscoff/mdless for easier reading in the terminal! Or | pandoc | lynx -stdin"
align_cols() {
#cat | sed 's/\t/\t | /g' | column -t -s' '
cat | sed 's/\t/ | /g'
}
report_user-info(){ ## <user_id|username|email>: Quick overview of a Galaxy user in your system
handle_help "$@" <<-EOF
This command lets you quickly find out information about a user. The output is formatted as markdown by default.
Consider [mdless](https://github.com/ttscoff/mdless) for easier reading in the terminal!
$ gxadmin report user-info helena-rasche
# Galaxy User 580
Property | Value
---------- | -----
ID | helena-rasche (id=580) hxr@informatik.uni-freiburg.de
Created | 2017-07-26 14:47:37.575484
Properties | ext=f deleted=f purged=f active=t
Disk Usage | 137 GB
## Groups/Roles
Groups: training-freiburg-rnaseq-2018, training-emc2018
Roles: admin, Backofen
## Recent Jobs
Tool ID | Status | Created | Exit Code | Runtime
---- | ---- | ---- | --- | ----
Grep1 | ok | 2019-01-21 07:27:24.472706 | 0 | 00:01:19
CONVERTER_fasta_to_tabular | ok | 2019-01-21 07:27:24.339862 | 0 | 00:03:34
secure_hash_message_digest | ok | 2019-01-18 16:43:44.262265 | 0 | 00:00:08
CONVERTER_gz_to_uncompressed | ok | 2019-01-18 10:18:23.99171 | 0 | 00:10:02
upload1 | ok | 2019-01-18 08:44:24.955622 | 0 | 01:11:07
echo_main_env | ok | 2019-01-17 16:45:04.019233 | 0 | 00:00:29
secure_hash_message_digest | ok | 2019-01-17 16:03:21.33665 | 0 | 00:00:07
secure_hash_message_digest | ok | 2019-01-17 16:03:20.937433 | 0 | 00:00:09
## Largest Histories
History ID | Name | Size
---- | ---- | ----
20467 | imported: RNASEQ | 52 GB
94748 | imported: ChIP-seq | 49 GB
94761 | reduced history-export problem | 49 GB
102448 | Wheat Genome | 42 GB
38923 | imported: Zooplankton | 29 GB
64974 | imported: 65991-A | 17 GB
20488 | Unnamed history | 15 GB
19414 | Unnamed history | 12 GB
92407 | Testing | 11 GB
60522 | example1/wf3-shed-tools.ga | 5923 MB
EOF
# Metada
read -r -d '' qstr <<-EOF
SELECT
username, id, email, create_time AT TIME ZONE 'UTC' as create_time, external, deleted, purged, active, pg_size_pretty(disk_usage)
FROM
galaxy_user
WHERE
(galaxy_user.email = '$1' or galaxy_user.username = '$1' or galaxy_user.id = CAST(REGEXP_REPLACE(COALESCE('$1','0'), '[^0-9]+', '0', 'g') AS INTEGER))
EOF
results=$(query_tsv "$qstr")
user_id=$(echo "$results" | awk '{print $2}')
if [[ -z "$user_id" ]]; then
error "Unknown user"
exit 1
fi
# Groups
read -r -d '' qstr <<-EOF
select string_agg(galaxy_group.name, ', ') from user_group_association, galaxy_group where user_id = $user_id and user_group_association.group_id = galaxy_group.id
EOF
group_membership=$(query_tsv "$qstr")
read -r -d '' qstr <<-EOF
select string_agg(role.name, ', ') from user_role_association, role where user_id = $user_id and user_role_association.role_id = role.id and role.type not in ('private', 'sharing')
EOF
role_membership=$(query_tsv "$qstr")
# Recent jobs
read -r -d '' qstr <<-EOF
SELECT
job.id, tool_id, state, create_time AT TIME ZONE 'UTC' as create_time, exit_code, metric_value::text::interval
FROM
job, job_metric_numeric
WHERE
job.user_id = $user_id and job.id = job_metric_numeric.job_id and metric_name = 'runtime_seconds' order by job.id desc limit 10
EOF
recent_jobs=$(query_tsv "$qstr")
recent_jobs2=$(printf "ID\tTool ID\tStatus\tCreated\tExit Code\tRuntime\n----\t----\t----\t----\t---\t----\n%s" "$recent_jobs" | align_cols)
# running jobs
read -r -d '' qstr <<-EOF
SELECT
id, tool_id, tool_version, handler, destination_id, state, create_time AT TIME ZONE 'UTC' as create_time, now() AT TIME ZONE 'UTC' - create_time AT TIME ZONE 'UTC' as runtime
FROM
job
WHERE
job.user_id = $user_id and job.state in ('running', 'queued', 'new') order by job.id desc
EOF
running_jobs=$(query_tsv "$qstr")
running_jobs2=$(printf "Tool ID\tTool Version\tHandler\tDestination\tState\tCreated\tRuntime\n----\t----\t----\t----\t----\t---\t----\n%s" "$running_jobs" | align_cols)
# Recent workflows
read -r -d '' qstr <<-EOF
SELECT
workflow_invocation.id,
workflow_invocation.create_time,
state,
scheduler,
handler,
substring(workflow.name, 0, 20) AS workflow,
substring(history.name, 0, 20) AS history
FROM
workflow_invocation
JOIN workflow ON workflow_invocation.workflow_id = workflow.id
JOIN history ON workflow_invocation.history_id = history.id
WHERE
history.user_id = $user_id
ORDER BY
create_time DESC
LIMIT
25
EOF
recent_wf=$(query_tsv "$qstr")
recent_wf=$(printf "ID\tCreated\tState\tScheduler\tHandler\tWorkflow\tHistory\n----\t----\t----\t----\t----\t----\t----\n%s" "$recent_wf" | align_cols)
# Largest Histories
read -r -d '' qstr <<-EOF
SELECT
history.id, history.name, pg_size_pretty(sum(COALESCE(dataset.total_size, 0))) AS size, history.deleted, history.purged
FROM
history
LEFT JOIN history_dataset_association ON history.id = history_dataset_association.history_id
LEFT JOIN dataset ON history_dataset_association.dataset_id = dataset.id
WHERE
history.user_id = $user_id
GROUP BY
history.id, history.name
ORDER BY
sum(COALESCE(dataset.total_size, 0)) DESC
LIMIT
10
EOF
largest_histories=$(query_tsv "$qstr")
largest_histories=$(printf "History ID\tName\tSize\tDeleted\tPurged\n----\t----\t----\t----\t----\n%s" "$largest_histories" | align_cols)
read -r -d '' template <<EOF
# Galaxy User $user_id
Property | Value
---------- | -----
ID | %s (id=%s) %s
Created | %s %s
Properties | ext=%s deleted=%s purged=%s active=%s
Disk Usage | %s %s
## Groups/Roles
Groups: %s
Roles: %s
## Recent Jobs
%s
## Running Jobs
%s
## Recent Workflow Invocations
%s
## Largest Histories
%s
\n
EOF
# shellcheck disable=SC2086
# shellcheck disable=SC2059
printf "$template" $results "$group_membership" "$role_membership" "$recent_jobs2" "$running_jobs2" "$recent_wf" "$largest_histories"
}
report_job-info(){ ## <id>: Information about a specific job
handle_help "$@" <<-EOF
$ gxadmin report job-info 1
tool_id | state | username | create_time | job_runner_name | job_runner_external_id
---------+-------+----------+----------------------------+-----------------+------------------------
upload1 | ok | admin | 2012-12-06 16:34:27.492711 | local:/// | 9347
EOF
assert_count $# 1 "Missing Job ID"
username=$(gdpr_safe galaxy_user.username username)
job_id=$1
### ###
# JOB INFO #
### ###
read -r -d '' qstr <<-EOF
SELECT
job.tool_id,
job.state,
job.handler,
job.create_time AT TIME ZONE 'UTC' as create_time,
COALESCE(job.job_runner_name, '?'),
COALESCE(job.job_runner_external_id, '?')
FROM job
WHERE job.id = $job_id
EOF
results=$(query_tsv "$qstr")
read -r -d '' template <<EOF
# Galaxy Job $job_id
Property | Value
------------- | -----
Tool | %s
State | %s
Handler | %s
Created | %s %s
Job Runner/ID | %s / %s
EOF
# shellcheck disable=SC2059
# shellcheck disable=SC2086
printf "$template" $results
### ###
# USER INFO #
### ###
read -r -d '' qstr <<-EOF
SELECT
$username,
job.user_id
FROM job, galaxy_user
WHERE job.id = $job_id AND job.user_id = galaxy_user.id
EOF
job_owner=$(query_tsv "$qstr")
# shellcheck disable=SC2183
# shellcheck disable=SC2086
printf "\n Owner | %s (id=%s)\n\n" $job_owner
### ###
# DEST PARAMS #
### ###
read -r -d '' qstr <<-EOF
SELECT convert_from(destination_params::bytea, 'UTF8')
FROM job
WHERE job.id = $job_id
EOF
results=$(query_tsv_json "$qstr")
printf "## Destination Parameters\n\n"
# shellcheck disable=SC2016
tbl=$(echo "$results" | jq -S '. | to_entries[] | [.key, .value] | @tsv' -r | sed 's/\t/\t`/g;s/$/`/g')
# shellcheck disable=SC2059
printf "Key\tValue\n---\t---\n$tbl" | align_cols
### ###
# DEPENDENCIES #
### ###
read -r -d '' qstr <<-EOF
SELECT convert_from(dependencies::bytea, 'UTF8')
FROM job
WHERE job.id = $job_id
EOF
results=$(query_tsv_json "$qstr")
printf "\n## Dependencies\n\n"
tbl=$(echo "$results" | jq -S '.[] | [.name, .version, .dependency_type, .cacheable, .exact, .environment_path, .model_class] | @tsv' -r)
# shellcheck disable=SC2059
printf "Name\tVersion\tDependency Type\tCacheable\tExact\tEnvironment Path\tModel Class\n----\t-------\t---------------\t---------\t-----\t----------------\t-----------\n$tbl\n" | align_cols
### ###
# JOB PARAMS #
### ###
read -r -d '' qstr <<-EOF
SELECT j.name, j.value
FROM job_parameter j
WHERE j.job_id = $job_id
EOF
results=$(query_tsv "$qstr")
printf "\n## Tool Parameters\n\n"
# shellcheck disable=SC2059
printf "Name\tSettings\n-----\t------------\n$results\n\n" | sed 's/[\"\`]//g' | align_cols
### ###
# INPUTS #
### ###
read -r -d '' qstr <<-EOF
SELECT
jtod.job_id,
hda.name,
hda.extension,
hda.id,
hda.state,
hda.deleted,
hda.purged,
ds.id,
ds.state,
ds.deleted,
ds.purged,
pg_size_pretty(ds.total_size)
FROM
job_to_input_dataset AS jtid,
history_dataset_association AS hda,
dataset AS ds,
job_to_output_dataset as jtod
WHERE
jtid.job_id = $1
AND jtid.dataset_id = hda.id
AND hda.dataset_id = ds.id
AND jtod.dataset_id = ds.id
EOF
input_ds=$(query_tsv "$qstr")
input_ds_tbl=$(printf "\nJob ID\tName\tExtension\thda-id\thda-state\thda-deleted\thda-purged\tds-id\tds-state\tds-deleted\tds-purged\tSize\n----\t----\t----\t----\t----\t----\t----\t----\t----\t----\t----\t----\n%s" "$input_ds" | align_cols)
### ###
# OUTPUTS #
### ###
read -r -d '' qstr <<-EOF
SELECT
hda.name,
hda.extension,
hda.id,
hda.state,
hda.deleted,
hda.purged,
ds.id,
ds.state,
ds.deleted,
ds.purged,
pg_size_pretty(ds.total_size)
FROM
job_to_output_dataset AS jtod,
history_dataset_association AS hda,
dataset AS ds
WHERE
jtod.job_id = $1
AND jtod.dataset_id = hda.id
AND hda.dataset_id = ds.id
EOF
output_ds=$(query_tsv "$qstr")
output_ds_tbl=$(printf "Name\tExtension\thda-id\thda-state\thda-deleted\thda-purged\tds-id\tds-state\tds-deleted\tds-purged\tSize\n----\t----\t----\t----\t----\t----\t----\t----\t----\t----\t----\n%s" "$output_ds" | align_cols)
read -r -d '' template3 <<EOF
## Inputs
%s
## Outputs
%s
\n
EOF
# shellcheck disable=SC2059
printf "$template3" "$input_ds_tbl" "$output_ds_tbl"
}
report_assigned-to-handler(){ ## <handler>: Report what items are assigned to a handler currently.
handle_help "$@" <<-EOF
EOF
assert_count $# 1 "Missing Handler ID"
printf "# Handler %s\n\n" "$1"
# Jerbs
printf "## Jobs\n\n"
read -r -d '' qstr <<-EOF
SELECT
id, create_time, tool_id, state, object_store_id, user_id
FROM job
WHERE handler = '$1' and state in ('new', 'queued', 'running')
ORDER BY create_time DESC
EOF
output_ds=$(query_tsv "$qstr")
printf "ID\tCreate Time\tTool ID\tState\tObject Store\tUser ID\n----\t----\t----\t----\t----\t----\n%s" "$output_ds" | align_cols
# Workflows
printf "## Workflows\n\n"
read -r -d '' qstr <<-EOF
SELECT
id, create_time, workflow_id, history_id, state, scheduler, uuid
FROM workflow_invocation
WHERE handler = '$1' and state = 'new'
EOF
output_ds=$(query_tsv "$qstr")
printf "ID\tCreate Time\tWorkflow ID\tHistory ID\tState\tScheduler\tUUID\n----\t----\t----\t----\t----\t----\t----\n%s" "$output_ds" | align_cols
}
# Some unlisted functions that are probably not useful as things people might
# want to query (and thus have cluttering their query menu), but should be
# shared because that's the purpose of gxadmin, sharing all of these weird
# queries that we have :)
query_memory-and-cpu-on-same-node() {
handle_help "$@" <<-EOF
Helena needed to check the reported values of memory/cpu over a series
of jobs on a specific host, to debug what went wrong with the cgroup
metrics.
TODO(hxr): find some way to note "unlisted" functions and let people do the help for them?
EOF
assert_count $# 1 "Missing host name"
host="$1"
read -r -d '' QUERY <<-EOF
SELECT
jmn_a.metric_value AS "memory.memsw.max_usage_in_bytes",
jmn_b.metric_value AS "cpuacct.usage",
job.update_time
FROM
job_metric_numeric AS jmn_a, job_metric_numeric AS jmn_b, job
WHERE
jmn_a.job_id
IN (
SELECT
job_id
FROM
job_metric_text
WHERE
metric_value = '$host'
ORDER BY
job_id DESC
LIMIT
200
)
AND jmn_a.metric_name = 'memory.memsw.max_usage_in_bytes'
AND jmn_b.job_id
IN (
SELECT
job_id
FROM
job_metric_text
WHERE
metric_value = '$host'
ORDER BY
job_id DESC
LIMIT
200
)
AND jmn_b.metric_name = 'cpuacct.usage'
AND jmn_a.job_id = job.id
AND jmn_b.job_id = job.id
ORDER BY
job.create_time DESC
EOF
}
registered_subcommands="$registered_subcommands cluster"
_cluster_short_help="WIP"
cluster_slurm-stats() { ## : Collect stats on slurm
handle_help "$@" <<-EOF
EOF
error "Not yet implemented"
}
registered_subcommands="$registered_subcommands server"
_server_short_help="Various overall statistics"
_server_long_help="
'query' can be exchanged with 'tsvquery' or 'csvquery' for tab- and comma-separated variants.
In some cases 'iquery' is supported for InfluxDB compatible output.
In all cases 'explainquery' will show you the query plan, in case you need to optimise or index data. 'explainjsonquery' is useful with PEV: http://tatiyants.com/pev/
"
server_users() { ## : Count of different classifications of users
handle_help "$@" <<-EOF
EOF
op="="
if (( $# > 1 )); then
op="$2"
fi
date_filter=""
if (( $# > 0 )); then
date_filter="WHERE date_trunc('day', create_time AT TIME ZONE 'UTC') $op '$1'::date"
fi
fields="count=4"
tags="active=0;external=1;deleted=2;purged=3"
read -r -d '' QUERY <<-EOF
SELECT
active, external, deleted, purged, count(*) as count
FROM
galaxy_user
$date_filter
GROUP BY
active, external, deleted, purged
EOF
}
server_groups() { ## : Counts of group memberships
handle_help "$@" <<-EOF
EOF
op="="
if (( $# > 1 )); then
op="$2"
fi
date_filter=""
if (( $# > 0 )); then
date_filter="AND galaxy_group.create_time AT TIME ZONE 'UTC' <= '$1'::date AND date_trunc('day', user_group_association.create_time AT TIME ZONE 'UTC') $op '$1'::date"
fi
fields="count=1"
tags="name=0"
read -r -d '' QUERY <<-EOF
SELECT
galaxy_group.name, count(*)
FROM
galaxy_group, user_group_association
WHERE
user_group_association.group_id = galaxy_group.id
$date_filter
GROUP BY name
EOF
}
server_datasets() { ## : Counts of datasets
handle_help "$@" <<-EOF
EOF
op="="
if (( $# > 1 )); then
op="$2"
fi
date_filter=""
if (( $# > 0 )); then
date_filter="WHERE date_trunc('day', create_time AT TIME ZONE 'UTC') $op '$1'::date"
fi
fields="count=4;size=5"
tags="state=0;deleted=1;purged=2;object_store_id=3"
read -r -d '' QUERY <<-EOF
SELECT
COALESCE(state, '__unknown__'),
deleted,
purged,
COALESCE(object_store_id, 'none'),
count(*),
sum(coalesce(dataset.total_size, dataset.file_size, 0))
FROM
dataset
$date_filter
GROUP BY
state, deleted, purged, object_store_id
EOF
}
server_hda() { ## : Counts of HDAs
handle_help "$@" <<-EOF
EOF
op="="
if (( $# > 1 )); then
op="$2"
fi
date_filter=""
if (( $# > 0 )); then
date_filter="AND date_trunc('day', history_dataset_association.create_time AT TIME ZONE 'UTC') $op '$1'::date"
fi
fields="sum=2;avg=3;min=4;max=5;count=6"
tags="extension=0;deleted=1"
read -r -d '' QUERY <<-EOF
SELECT
COALESCE(history_dataset_association.extension, '__unknown__'),
history_dataset_association.deleted,
COALESCE(sum(dataset.file_size), 0),
COALESCE(avg(dataset.file_size), 0)::bigint,
COALESCE(min(dataset.file_size), 0),
COALESCE(max(dataset.file_size), 0),
count(*)
FROM
history_dataset_association, dataset
WHERE
history_dataset_association.dataset_id = dataset.id
${date_filter}
GROUP BY
history_dataset_association.extension, history_dataset_association.deleted
EOF
}
server_ts-repos() { ## : Counts of TS repos
handle_help "$@" <<-EOF
EOF
op="="
if (( $# > 1 )); then
op="$2"
fi
date_filter=""
if (( $# > 0 )); then
date_filter="WHERE date_trunc('day', create_time AT TIME ZONE 'UTC') $op '$1'::date"
fi
fields="count=2"
tags="tool_shed=0;owner=1"
read -r -d '' QUERY <<-EOF
SELECT
tool_shed, owner, count(*)
FROM
tool_shed_repository
$date_filter
GROUP BY
tool_shed, owner
ORDER BY count desc
EOF
}
server_histories() { ## : Counts of histories and sharing
handle_help "$@" <<-EOF
EOF
op="="
if (( $# > 1 )); then
op="$2"
fi
date_filter=""
if (( $# > 0 )); then
date_filter="WHERE date_trunc('day', create_time AT TIME ZONE 'UTC') $op '$1'::date"
fi
fields="count=6"
tags="deleted=0;purged=1;published=2;importable=3;importing=4;genome_build=5"
read -r -d '' QUERY <<-EOF
SELECT
deleted, purged, published, importable, importing, COALESCE(genome_build, '__unknown__'), count(*)
FROM history
${date_filter}
GROUP BY
deleted, purged, published, importable, importing, genome_build
EOF
}
server_jobs() { ## : Counts of jobs
handle_help "$@" <<-EOF
EOF
op="="
if (( $# > 1 )); then
op="$2"
fi
date_filter=""
if (( $# > 0 )); then
date_filter="WHERE date_trunc('day', create_time AT TIME ZONE 'UTC') $op '$1'::date"
fi
fields="count=3"
tags="state=0;job_runner_name=1;destination_id=2"
read -r -d '' QUERY <<-EOF
SELECT
COALESCE(state, '__unknown__'),
COALESCE(job_runner_name, '__unknown__'),
COALESCE(destination_id, '__unknown__'), count(*)
FROM
job
${date_filter}
GROUP BY
state, job_runner_name, destination_id
EOF
}
server_allocated-cpu() { ## : CPU time per job runner
handle_help "$@" <<-EOF
EOF
op="="
if (( $# > 1 )); then
op="$2"
fi
date_filter=""
if (( $# > 0 )); then
date_filter="AND date_trunc('day', job.create_time AT TIME ZONE 'UTC') $op '$1'::date"
fi
# TODO: here we select the hostname, don't do that.
# Hack for EU's test vs main separation, both submitting job stats.
# Solve by either running IN telegraf or doing it in the meta functions
fields="cpu_seconds=1"
tags="job_runner_name=0;host=2"
read -r -d '' QUERY <<-EOF
SELECT
job.job_runner_name,
round(sum(a.metric_value * b.metric_value), 2) AS cpu_seconds,
'$HOSTNAME' as host
FROM
job_metric_numeric AS a,
job_metric_numeric AS b,
job
WHERE
b.job_id = a.job_id
AND a.job_id = job.id
AND a.metric_name = 'runtime_seconds'
AND b.metric_name = 'galaxy_slots'
${date_filter}
GROUP BY
job.job_runner_name
EOF
}
server_allocated-gpu() { ## : GPU time per job runner
handle_help "$@" <<-EOF
EOF
op="="
if (( $# > 1 )); then
op="$2"
fi
date_filter=""
if (( $# > 0 )); then
date_filter="AND date_trunc('day', job.create_time AT TIME ZONE 'UTC') $op '$1'::date"
fi
fields="gpu_seconds=1"
tags="job_runner_name=0"
read -r -d '' QUERY <<-EOF
SELECT
job.job_runner_name,
round(sum(a.metric_value * length(replace(b.metric_value, ',', ''))), 2) AS gpu_seconds
FROM
job_metric_numeric AS a,
job_metric_text AS b,
job
WHERE
b.job_id = a.job_id
AND a.job_id = job.id
AND a.metric_name = 'runtime_seconds'
AND b.metric_name = 'CUDA_VISIBLE_DEVICES'
${date_filter}
GROUP BY
job.job_runner_name
EOF
}
server_workflows() { ## : Counts of workflows
handle_help "$@" <<-EOF
EOF
op="="
if (( $# > 1 )); then
op="$2"
fi
date_filter=""
if (( $# > 0 )); then
date_filter="WHERE date_trunc('day', create_time AT TIME ZONE 'UTC') $op '$1'::date"
fi
fields="count=3"
tags="deleted=0;importable=1;published=2"
read -r -d '' QUERY <<-EOF
SELECT
deleted, importable, published, count(*)
FROM
stored_workflow
${date_filter}
GROUP BY
deleted, importable, published
EOF
}
server_workflow-invocations() { ## : Counts of workflow invocations
handle_help "$@" <<-EOF
EOF
op="="
if (( $# > 1 )); then
op="$2"
fi
date_filter=""
if (( $# > 0 )); then
date_filter="WHERE date_trunc('day', create_time AT TIME ZONE 'UTC') $op '$1'::date"
fi
fields="count=2"
tags="scheduler=0;handler=1"
read -r -d '' QUERY <<-EOF
SELECT
COALESCE(scheduler, '__unknown__'),
COALESCE(handler, '__unknown__'),
count(*)
FROM
workflow_invocation
${date_filter}
GROUP BY
scheduler, handler
EOF
}
server_groups-disk-usage() { ## [YYYY-MM-DD] [=, <=, >= operators]: Retrieve an approximation of the disk usage for groups
handle_help "$@" <<-EOF
EOF
op="="
if (( $# > 1 )); then
op="$2"
fi
date_filter=""
if (( $# > 0 )); then
date_filter="AND date_trunc('day', dataset.create_time AT TIME ZONE 'UTC') $op '$1'::date"
fi
groupname=$(gdpr_safe galaxy_group.name group_name 'Anonymous')
fields="storage_usage=1"
tags="group_name=0"
read -r -d '' QUERY <<-EOF
SELECT $groupname,
sum(coalesce(dataset.total_size, dataset.file_size, 0)) as "storage_usage"
FROM dataset,
galaxy_group,
user_group_association,
history_dataset_association,
history
WHERE NOT dataset.purged
AND dataset.id = history_dataset_association.dataset_id
AND history_dataset_association.history_id = history.id
AND history.user_id = user_group_association.user_id
AND user_group_association.group_id = galaxy_group.id
$date_filter
GROUP BY galaxy_group.name
EOF
}
server_groups-allocated-cpu() { ## [YYYY-MM-DD] [=, <=, >= operators]: Retrieve an approximation of the CPU allocation for groups
handle_help "$@" <<-EOF
EOF
op="="
if (( $# > 1 )); then
op="$2"
fi
date_filter=""
if (( $# > 0 )); then
date_filter="AND date_trunc('day', job.create_time AT TIME ZONE 'UTC') $op '$1'::date"
fi
groupname=$(gdpr_safe galaxy_group.name group_name 'Anonymous')
fields="cpu_seconds=1"
tags="group_name=0"
read -r -d '' QUERY <<-EOF
WITH jobs_info AS (
SELECT job.user_id,
round(sum(a.metric_value * b.metric_value), 2) AS cpu_seconds
FROM job_metric_numeric AS a,
job_metric_numeric AS b,
job
WHERE job.id = a.job_id
AND job.id = b.job_id
AND a.metric_name = 'runtime_seconds'
AND b.metric_name = 'galaxy_slots'
$date_filter
GROUP BY job.user_id
), user_job_info AS (
SELECT user_id,
sum(cpu_seconds) AS cpu_seconds
FROM jobs_info
GROUP BY user_id
)
SELECT $groupname,
round(sum(user_job_info.cpu_seconds), 2) as cpu_seconds
FROM user_job_info,
galaxy_group,
user_group_association
WHERE user_job_info.user_id = user_group_association.user_id
AND user_group_association.group_id = galaxy_group.id
GROUP BY galaxy_group.name
EOF
}
server_groups-allocated-gpu() { ## [YYYY-MM-DD] [=, <=, >= operators]: Retrieve an approximation of the GPU allocation for groups
handle_help "$@" <<-EOF
EOF
op="="
if (( $# > 1 )); then
op="$2"
fi
date_filter=""
if (( $# > 0 )); then
date_filter="AND date_trunc('day', job.create_time AT TIME ZONE 'UTC') $op '$1'::date"
fi
groupname=$(gdpr_safe galaxy_group.name group_name 'Anonymous')
fields="gpu_seconds=1"
tags="group_name=0"
read -r -d '' QUERY <<-EOF
WITH jobs_info AS (
SELECT job.user_id,
round(sum(a.metric_value * length(replace(b.metric_value, ',', ''))), 2) AS gpu_seconds
FROM job_metric_numeric AS a,
job_metric_text AS b,
job
WHERE job.id = a.job_id
AND job.id = b.job_id
AND a.metric_name = 'runtime_seconds'
AND b.metric_name = 'CUDA_VISIBLE_DEVICES'
$date_filter
GROUP BY job.user_id
), user_job_info AS (
SELECT user_id,
sum(gpu_seconds) AS gpu_seconds
FROM jobs_info
GROUP BY user_id
)
SELECT $groupname,
round(sum(user_job_info.gpu_seconds), 2) as gpu_seconds
FROM user_job_info,
galaxy_group,
user_group_association
WHERE user_job_info.user_id = user_group_association.user_id
AND user_group_association.group_id = galaxy_group.id
GROUP BY galaxy_group.name
EOF
}
server_workflow-trace-archive-metrics() { ## : [Workflow Trace Archive] Export numeric metrics
handle_help "$@" <<-EOF
Helper for WTA
EOF
read -r -d '' QUERY <<-EOF
SELECT * FROM job_metric_numeric
EOF
}
server_workflow-trace-archive-jobs() { ## : [Workflow Trace Archive] Export jobs
handle_help "$@" <<-EOF
Helper for WTA
EOF
read -r -d '' QUERY <<-EOF
SELECT
id,
create_time,
update_time,
tool_id,
tool_version,
state,
runner_name,
job_runner_name,
imported,
object_store_id,
handler,
exit_code,
destination_id,
copied_from_job_id,
dynamic_tool_id,
galaxy_version
FROM
job
EOF
}
server_workflow-trace-archive-workflows() { ## : [Workflow Trace Archive] Export workflows
handle_help "$@" <<-EOF
Helper for WTA
EOF
read -r -d '' QUERY <<-EOF
SELECT
id,
create_time,
update_time,
stored_workflow_id,
has_cycles,
has_errors,
parent_workflow_id,
uuid
FROM workflow
EOF
}
server_workflow-trace-archive-workflow-invocations() { ## : [Workflow Trace Archive] Export workflow invocations
handle_help "$@" <<-EOF
Helper for WTA
EOF
read -r -d '' QUERY <<-EOF
SELECT
id,
create_time,
update_time,
workflow_id,
state,
scheduler,
handler
FROM workflow_invocation
EOF
}
server_workflow-trace-archive-workflow-steps() { ## : [Workflow Trace Archive] Export workflow steps
handle_help "$@" <<-EOF
Helper for WTA
EOF
read -r -d '' QUERY <<-EOF
SELECT id,
create_time,
update_time,
workflow_id,
type,
tool_id,
tool_version,
order_index,
subworkflow_id,
dynamic_tool_id
FROM workflow_step
EOF
}
server_workflow-trace-archive-workflow-invocation-steps() { ## : [Workflow Trace Archive] Export workflow invocation steps
handle_help "$@" <<-EOF
Helper for WTA
EOF
read -r -d '' QUERY <<-EOF
SELECT
id, create_time, update_time, workflow_invocation_id,
workflow_step_id, job_id, state
FROM workflow_invocation_step
EOF
}
server_workflow-trace-archive-workflow-connections() { ## : [Workflow Trace Archive] Export workflow connections
handle_help "$@" <<-EOF
Helper for WTA
EOF
read -r -d '' QUERY <<-EOF
SELECT
id, output_step_id, input_step_input_id, output_name, input_subworkflow_step_id
FROM workflow_step_connection
EOF
}
server_workflow-trace-archive-workflow-step-input() { ## : [Workflow Trace Archive] Export workflow step-input
handle_help "$@" <<-EOF
Helper for WTA
EOF
read -r -d '' QUERY <<-EOF
SELECT
id, workflow_step_id, name
FROM workflow_step_input
EOF
}
registered_subcommands="$registered_subcommands meta"
_meta_short_help="Miscellaneous"
meta_find_slurpable() {
grep -s -h -o '^server_[a-z-]*' "$0" "$GXADMIN_SITE_SPECIFIC" | grep -v 'workflow-trace-archive' | sort | sed 's/server_//g'
}
meta_update() { ## : Update the script
handle_help "$@" <<-EOF
EOF
tmp=$(mktemp);
curl https://raw.githubusercontent.com/galaxyproject/gxadmin/main/gxadmin > "$tmp";
chmod ugo+rx "$tmp";
mv "$tmp" "$0";
exit 0;
}
meta_cmdlist() {
handle_help "$@" <<-EOF
EOF
IFS=$'\n'
# TOC
for section in $(locate_cmds_nolocal | correct_cmd | awk '{print $1}' | sort -u); do
# Progress
echo $section
# contents
echo "# $section" > "docs/README.${section}.md"
echo >> "docs/README.${section}.md"
echo "Command | Description" >> "docs/README.${section}.md"
echo "------- | -----------" >> "docs/README.${section}.md"
for command in $(locate_cmds_nolocal | correct_cmd | grep "^$section"); do
cmd_part="$(echo "$command" | sed 's/:.*//g;s/\s*<.*//g;s/\s*\[.*//')"
desc_part="$(echo "$command" | sed 's/^[^:]*:\s*//g')"
key_part="$(echo "$cmd_part" | sed 's/ /-/g')"
if [[ "$command" != *"Deprecated"* ]]; then
# Main ToC
echo "[\`${cmd_part}\`](#${key_part}) | $desc_part" >> "docs/README.${section}.md"
else
echo "\`${cmd_part}\` | $desc_part" >> "docs/README.${section}.md"
fi
done
for command in $(locate_cmds_nolocal | correct_cmd | grep "^$section"); do
cmd_part="$(echo "$command" | sed 's/:.*//g;s/\s*<.*//g;s/\s*\[.*//;s/\s*$//')"
desc_part="$(echo "$command" | sed 's/^[^:]*:\s*//g;s/\s*$//')"
key_part="$(echo "$cmd_part" | sed 's/ /-/g')"
url_part="$(echo "$cmd_part" | sed 's/ /_/g')"
if [[ "$command" != *"Deprecated"* ]]; then
# Subsec documentation
echo >> "docs/README.${section}.md"
echo "## $cmd_part" >> "docs/README.${section}.md"
echo >> "docs/README.${section}.md"
echo "([*source*](https://github.com/galaxyproject/gxadmin/search?q=${url_part}&type=Code))" >> "docs/README.${section}.md"
bash -c "$0 $cmd_part --help" >> "docs/README.${section}.md"
fi
done
done
}
meta_cmdlist2() {
handle_help "$@" <<-EOF
EOF
IFS=$'\n'
# TOC
for section in $(locate_cmds_nolocal | correct_cmd | awk '{print $1}' | sort -u); do
for command in $(locate_cmds_nolocal | correct_cmd | grep "^$section"); do
echo "$command" | sed 's/:.*//g;s/\s*<.*//g;s/\s*\[.*//;s/\s*$//'
done
done
}
meta_slurp-current() { ## [--date] [slurp-name [2nd-slurp-name [...]]]: Executes what used to be "Galaxy Slurp"
handle_help "$@" <<-EOF
Obtain influx compatible metrics regarding the current state of the
server. UseGalaxy.EU uses this to display things like "Current user
count" and "Current dataset size/distribution/etc."
It is expected that you are passing this straight to telegraf, there is
no non-influx output supported currently.
Supplying --date will include the current timestamp at the end of the
line, making it compatible for "gxadmin meta influx-post" usage
You can add your own functions which are included in this output, using
the \$GXADMIN_SITE_SPECIFIC file. They must start with the prefix
"server_", e.g. "server_mymetric".
$ gxadmin meta slurp-current
server-allocated-cpu,job_runner_name=condor cpu_years=102.00
server-allocated-cpu,job_runner_name=local cpu_years=1346.00
server-datasets,deleted=f,object_store_id=,state=error,purged=f count=37,size=29895528
server-datasets,deleted=f,object_store_id=,state=ok,purged=f count=72,size=76739510
server-datasets,deleted=f,object_store_id=,state=discarded,purged=f count=2,size=0
server-hda,deleted=f,extension=gff3 max=2682565,sum=2682565,avg=2682565,min=2682565
server-hda,deleted=t,extension=tabular max=468549,sum=597843,avg=22142,min=2
server-hda,deleted=f,extension=fastqsanger max=3,sum=3,avg=3,min=3
server-hda,deleted=f,extension=tabular max=2819293,sum=3270268,avg=155727,min=3
server-hda,deleted=f,extension=png max=800459,sum=7047695,avg=503407,min=142863
server-hda,deleted=t,extension=auto max=9571,sum=9571,avg=9571,min=9571
server-hda,deleted=t,extension=data max=2,sum=2,avg=2,min=2
server-hda,deleted=t,extension=txt max=5023,sum=5039,avg=2520,min=16
server-hda,deleted=t,extension=bigwig max=2972569,sum=5857063,avg=1464266,min=0
server-hda,deleted=t,extension=tar.gz max=209034,sum=1318690,avg=188384,min=91580
server-hda,deleted=f,extension=vcf_bgzip max=3965889,sum=3965889,avg=3965889,min=3965889
server-hda,deleted=t,extension=png max=2969711,sum=6584812,avg=1097469,min=183
server-hda,deleted=f,extension=txt max=9584828,sum=132124407,avg=4556014,min=0
server-hda,deleted=f,extension=bigwig max=14722,sum=17407,avg=8704,min=2685
server-hda,deleted=f,extension=tar.gz max=209025,sum=390421,avg=195211,min=181396
server-histories,importable=f,deleted=f,genome_build=?,published=f,importing=f,purged=f count=14
server-jobs,state=ok,destination_id=slurm_singularity,job_runner_name=local count=26
server-jobs,state=error,destination_id=condor_resub,job_runner_name=condor count=1
server-jobs,state=deleted,destination_id=local,job_runner_name=local count=1
server-jobs,state=error,destination_id=condor,job_runner_name=condor count=8
server-jobs,state=ok,destination_id=local,job_runner_name=local count=13
server-jobs,state=ok,destination_id=condor,job_runner_name=condor count=2
server-jobs,state=error,destination_id=local,job_runner_name=local count=3
server-users,active=t,deleted=f,external=f,purged=f count=3
server-workflows,deleted=f,importable=f,published=f count=3
EOF
append=""
if [[ $1 == "--date" ]]; then
append=" "$(date +%s%N)
shift;
fi
specific_slurp=($@)
# shellcheck disable=SC2013
for func in $(meta_find_slurpable); do
# To allow only slurping the one that was requested, if this was done.
if (( ${#specific_slurp[@]} > 0 )); then
if [[ ! "${specific_slurp[*]}" =~ "${func}" ]]; then
continue
fi
fi
obtain_func "server" "$func"
$wrapper query_influx "$QUERY" "server-$query_name" "$fields" "$tags" | sed "s/$/$append/"
done
}
meta_slurp-upto() { ## <yyyy-mm-dd> [slurp-name [2nd-slurp-name [...]]]: Slurps data up to a specific date.
handle_help "$@" <<-EOF
Obtain influx compatible metrics regarding the summed state of the
server up to a specific date. UseGalaxy.EU uses this to display things
like charts of "how many users were registered as of date X".
This calls all of the same functions as 'gxadmin meta slurp-current',
but with date filters for the entries' creation times.
EOF
date=$1; shift
specific_slurp=($@)
# shellcheck disable=SC2013
for func in $(meta_find_slurpable); do
# To allow only slurping the one that was requested, if this was done.
if (( ${#specific_slurp[@]} > 0 )); then
if [[ ! "${specific_slurp[*]}" =~ "${func}" ]]; then
continue
fi
fi
obtain_func server "$func" "$date" "<="
$wrapper query_influx "$QUERY" "server-$query_name.upto" "$fields" "$tags" | \
sed "s/$/ $(date -d "$date" +%s%N)/"
done
}
meta_slurp-day() { ## <yyyy-mm-dd> [slurp-name [2nd-slurp-name [...]]]: Slurps data on a specific date.
handle_help "$@" <<-EOF
Obtain influx compatible metrics regarding the state of the
server on a specific date. UseGalaxy.EU uses this to display things
like charts of "how many users were registered as of date X". You can
backfill data from your server by running a for loop like:
#!/bin/bash
for i in range {1..365}; do
gxadmin meta slurp-day \$(date -d "$i days ago" "+%Y-%m-%d") | <get into influx somehow>
done
It is expected that you are passing this straight to telegraf, there is
no non-influx output supported currently.
This calls all of the same functions as 'gxadmin meta slurp-current',
but with date filters for the entries' creation times.
You can add your own functions which are included in this output, using
the \$GXADMIN_SITE_SPECIFIC file. They must start with the prefix
"server_", e.g. "server_mymetric". They should include a date filter as
well, or the metrics reported here will be less useful.
$ gxadmin meta slurp-day 2019-01-01
server-allocated-cpu.daily,job_runner_name=condor cpu_years=102.00
server-datasets.daily,deleted=f,object_store_id=,state=error,purged=f count=37,size=29895528
server-datasets.daily,deleted=f,object_store_id=,state=ok,purged=f count=72,size=76739510
server-datasets.daily,deleted=f,object_store_id=,state=discarded,purged=f count=2,size=0
server-hda.daily,deleted=t,extension=data max=2,sum=2,avg=2,min=2
server-hda.daily,deleted=t,extension=txt max=5023,sum=5039,avg=2520,min=16
server-hda.daily,deleted=f,extension=fastqsanger max=3,sum=3,avg=3,min=3
server-hda.daily,deleted=f,extension=tabular max=3,sum=51,avg=3,min=3
server-hda.daily,deleted=t,extension=tabular max=468549,sum=552788,avg=21261,min=2
server-histories.daily,importable=f,deleted=f,genome_build=?,published=f,importing=f,purged=f count=5
server-jobs.daily,state=error,destination_id=condor,job_runner_name=condor count=8
server-jobs.daily,state=error,destination_id=condor_resub,job_runner_name=condor count=1
server-jobs.daily,state=error,destination_id=condor_a,job_runner_name=condor count=11
server-jobs.daily,state=ok,destination_id=condor,job_runner_name=condor count=2
server-jobs.daily,state=ok,destination_id=condor_a,job_runner_name=condor count=23
server-users.daily,active=t,deleted=f,external=f,purged=f count=1
server-workflows.daily,deleted=f,importable=f,published=f count=1
EOF
date=$1; shift;
specific_slurp=($@)
# shellcheck disable=SC2013
for func in $(meta_find_slurpable); do
# To allow only slurping the one that was requested, if this was done.
if (( ${#specific_slurp[@]} > 0 )); then
if [[ ! "${specific_slurp[*]}" =~ "${func}" ]]; then
continue
fi
fi
obtain_func server "$func" "$date"
$wrapper query_influx "$QUERY" "server-$query_name.daily" "$fields" "$tags" | \
sed "s/$/ $(date -d "$date" +%s%N)/"
done
}
meta_slurp-initial() { ## <yyyy-mm-dd> <yyyy-mm-dd> [slurp-name [2nd-slurp-name [...]]]: Slurps data starting at the first date until the second date.
handle_help "$@" <<-EOF
Obtains influx compatible metrics between dates and posts this to Influx.
This function calls 'gxadmin meta slurp-upto' and 'gxadmin meta slurp-day'.
It requires a start and end date. Allows to run specific slurp queries.
EOF
# Variables
begindate=$1; shift
enddate=$1; shift
specific_slurp=($@)
specific_slurp_string=""
if (( ${#specific_slurp[@]} > 0 )); then
for specificslurppiece in "${specific_slurp[@]}"; do
if (( ${#specific_slurp_string} == 0 )); then
specific_slurp_string="$specificslurppiece"
else
specific_slurp_string="$specific_slurp_string $specificslurppiece"
fi
done
fi
tmpfile=/tmp/gxadmin-meta-slurp-initial-$(date +%s)
# Create temporary file
echo "" > $tmpfile
# Validate environment
assert_set_env INFLUX_DB
assert_set_env INFLUX_URL
assert_set_env INFLUX_PASS
assert_set_env INFLUX_USER
# Loop for the data
d=$begindate
while [ "$d" != $enddate ]; do
echo "Slurping for $d"
# Slurp the data
meta_slurp-upto $d $specific_slurp_string > $tmpfile
meta_slurp-day $d $specific_slurp_string >> $tmpfile
# Post to influxdb
meta_influx-post $INFLUX_DB $tmpfile
d=$(date -I -d "$d + 1 day")
done
rm $tmpfile
}
meta_error() {
error "$@"
}
meta_warning() {
warning "$@"
}
meta_success() {
success "$@"
}
meta_influx-post() { ## <db> <file>: Post contents of file (in influx line protocol) to influx
handle_help "$@" <<-EOF
Post data to InfluxDB. Must be [influx line protocol formatted](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/)
Posting data from a file:
$ gxadmin meta influx-post galaxy file.inflx
Posting data from the output of a command
$ gxadmin meta influx-post galaxy <(echo "weather,location=us-midwest temperature=\$RANDOM \$(date +%s%N)")
Posting data from the output of a gxadmin command
$ gxadmin meta influx-post galaxy <(gxadmin meta slurp-current --date)
**WARNING**
!> If you are sending a LOT of data points, consider splitting
!> them. Influx recommends 5-10k lines:
!>
!> $ split --lines=5000 data.iflx PREFIX
!> $ for file in PREFIX*; do gxadmin meta influx-post galaxy $file; done
EOF
assert_set_env INFLUX_URL
assert_set_env INFLUX_PASS
assert_set_env INFLUX_USER
assert_count_ge $# 2 "Must supply DB and then data file path"
DB="$1"
FILE="$2"
# If the user is reading the output of a command then it'll be a transient
# FD and might fail this check? Or it has for me. So if proc is in there,
# don't bother asserting that it exists.
if [[ "$FILE" != "/proc/"* ]]; then
assert_file "$FILE"
fi
curl --silent -XPOST "${INFLUX_URL}/write?db=${DB}&u=${INFLUX_USER}&p=${INFLUX_PASS}" --data-binary @"${FILE}"
}
meta_influx-query() { ## <db> "<query>": Query an influx DB
handle_help "$@" <<-EOF
Query an InfluxDB
Query percentage of memory used over last hour.
$ gxadmin meta influx-query galaxy "select mean(used_percent) from mem where host='X' AND time > now() - 1h group by time(10m)" | \\
jq '.results[0].series[0].values[] | @tsv' -r
2019-04-26T09:30:00Z 64.83119975586277
2019-04-26T09:40:00Z 64.58284600472675
2019-04-26T09:50:00Z 64.62714491344244
2019-04-26T10:00:00Z 64.62339148181154
2019-04-26T10:10:00Z 63.95268353798708
2019-04-26T10:20:00Z 64.66849537282599
2019-04-26T10:30:00Z 65.06069941790024
EOF
assert_set_env INFLUX_URL
assert_set_env INFLUX_PASS
assert_set_env INFLUX_USER
assert_count_ge $# 2 "Must supply DB and then query"
DB="$1"
QUERY="$2"
curl --silent "${INFLUX_URL}/query?db=${DB}&u=${INFLUX_USER}&p=${INFLUX_PASS}" --data-urlencode "q=${QUERY}"
}
meta_iquery-grt-export() { ## : Export data from a GRT database for sending to influx
handle_help "$@" <<-EOF
**WARNING**:
!> GRT database specific query, will not work with a galaxy database!
EOF
fields="count=4"
timestamp="3"
tags="tool_id=0;tool_version=1;instance=2"
read -r -d '' QUERY <<-EOF
SELECT
api_job.tool_id,
api_job.tool_version,
api_galaxyinstance.title,
extract(epoch from date_trunc('week', api_job.create_time)) || '000000000' as date,
count(*)
FROM
api_job, api_galaxyinstance
WHERE
api_job.instance_id = api_galaxyinstance.id
GROUP BY
api_job.tool_id,
api_job.tool_version,
api_galaxyinstance.title,
date
EOF
}
meta_whatsnew() { ## : What's new in this version of gxadmin
handle_help "$@" <<-EOF
Informs users of what's new in the changelog since their version
EOF
current_version=$(version)
prev_version=$(( current_version - 1 ))
#sed -n '1,/^# 12/d;/^# 11/q;p'
echo "$CHANGELOG" | sed -n "/^# ${prev_version}/q;p"
}
meta_export-grafana-dashboards() { ## [grafana_db|/var/lib/grafana/grafana.db]: Export all dashboards from a Grafana database to CWD and commit them to git.
handle_help "$@" <<-EOF
Given a grafana database, use sqlite3 to access all of the dashboards within, and then write them out to the current working directly. Next, commit them and update a README.
This script forms the basis of https://github.com/usegalaxy-eu/grafana-dashboards
**WARNING**
!> This script will silently remove all json files from CWD
!> as a first step. Additionally it will commit and push at the end, so it
!> should be run in a directory that has a git repo initialised, where you
!> are not concerned about accidentally pushing to wrong remote.
EOF
db_path=${1:-/var/lib/grafana/grafana.db}
rm -f *.json
sqlite3 --csv -separator "$(printf '\t')" $db_path \
'select title,data from dashboard;' | \
awk -F'\t' '{gsub("\"", "", $1); print $2 > $1".json" }'
for i in *.json; do
q=$(mktemp)
cat "$i" | sed 's/^"//;s/"$//g;s/""/"/g' | jq -S . > "$q"
mv "$q" "$i";
lines=$(wc -l "$i" | awk '{print $1}')
if (( lines < 10 )); then
rm "$i"
fi
done;
cat > README.md <<-EOF
# Grafana Dashbaords
Name | Tags | Version | Live | JSON
--- | --- | --- | --- | ---
EOF
sqlite3 --csv -separator "$(printf '\t')" $db_path \
'SELECT title,uid,title,version,GROUP_CONCAT(dashboard_tag.term) FROM dashboard left outer join dashboard_tag on dashboard.id = dashboard_tag.dashboard_id WHERE dashboard.is_folder = 0 GROUP BY title, uid, title order by title asc' | \
awk -F'\t' '{gsub("\"", "", $1); gsub("\"", "", $3); gsub(" ", "%20", $3); print $1" | "$5" | "$4" | [Live](https://stats.galaxyproject.eu/d/"$2") | [File](./"$3".json)"}' \
>> README.md
git add *.json README.md
git commit -a -m "Automated commit for $(date)"
git push --quiet
}
meta_wta-report() { ## Export all workflow trace archive queries
handle_help "$@" <<-EOF
Run through several WTA commands and export those to CSV
EOF
tmpdir=$(mktemp -d)
obtain_func "server" "workflow-trace-archive-metrics"
query_csv "$QUERY" > "$tmpdir/job_metrics_numeric.csv"
obtain_func "server" "workflow-trace-archive-jobs"
query_csv "$QUERY" > "$tmpdir/jobs.csv"
obtain_func "server" "workflow-trace-archive-workflows"
query_csv "$QUERY" > "$tmpdir/workflows.csv"
obtain_func "server" "workflow-trace-archive-workflow-invocations"
query_csv "$QUERY" > "$tmpdir/workflow-invocations.csv"
obtain_func "server" "workflow-trace-archive-workflow-steps"
query_csv "$QUERY" > "$tmpdir/workflow-steps.csv"
obtain_func "server" "workflow-trace-archive-workflow-invocation-steps"
query_csv "$QUERY" > "$tmpdir/workflow-invoke-steps.csv"
obtain_func "server" "workflow-trace-archive-workflow-connections"
query_csv "$QUERY" > "$tmpdir/workflow-connections.csv"
obtain_func "server" "workflow-trace-archive-workflow-step-input"
query_csv "$QUERY" > "$tmpdir/workflow-step-input.csv"
tar cfz $tmpdir.tar.gz $tmpdir/*
echo $tmpdir.tar.gz
}
meta_complete-bash() { ## : export bash autocompletion
handle_help "$@" <<-EOF
Produces the bash autocompletion for gxadmin
Write the output of this somewhere useful, and source it in your bash profile.
$ gxadmin meta bash-complete > ~/.cache/gxadmin-autocomplete.sh
$ . ~/.cache/gxadmin-autocomplete.sh
$ gxadmin<TAB>
EOF
IFS=$'\n'
commands=$(locate_cmds_nolocal | correct_cmd | sed 's/://g')
leading=$(echo "$commands" | awk '{print $1}' | sort -u | paste -s -d' ')
subcommands=""
for cmd in $(echo "$commands" | awk '{print $1}' | sort -u); do
subcmds=$(echo "$commands" | awk '($1 == "'"$cmd"'"){ print $2 }' | sort -u | paste -s -d' ')
if (( ${#subcmds} > 0 )); then
subcommands="${subcommands}\n\n\t\t$cmd)\n\t\t\tCOMPREPLY=( \$(compgen -W \"${subcmds}\" \${cur}) )\n\t\t\treturn 0\n\t\t;;"
fi
done;
# Fix newlines
subcommands="$(echo -e "$subcommands")"
# Template
BASH_COMPLETION_TEMPLATE=$(cat <<EOF
_gxadmin() {
local cur prev opts
COMPREPLY=()
cur="\${COMP_WORDS[COMP_CWORD]}"
prev="\${COMP_WORDS[COMP_CWORD-1]}"
local -r cmds="%s"
case "\${prev}" in
%s
esac
COMPREPLY=( \$(compgen -W "\${cmds}" -- \${cur}) )
return 0
}
complete -F _gxadmin gxadmin
EOF
)
# shellcheck disable=SC2059
printf "${BASH_COMPLETION_TEMPLATE}\n" "$leading" "$subcommands"
}
obtain_func() {
category="$1"; shift
query_name="$1"; shift
if [[ "$category" != "" ]]; then
category="${category}_"
fi
fn="${category}${query_name}"
LC_ALL=C type "$fn" 2> /dev/null | grep -q 'function'
ec=$?
# ##? enables fancy argument parsing.
grep -s -h -o "${fn}()\s*{ ##?\? .*" "$0" | fgrep --quiet -o '##?'
# Nothing needs to be done, just let that function handle and auto-parse
# and auto-export
enable_wap=$?
if (( enable_wap == 0 )); then
wonderful_argument_parser "$fn" "$@"
fi
if (( ec == 0 )); then
$fn "$@";
else
export QUERY="ERROR"
fi
}
wonderful_argument_parser() {
# from the top, the function to look for
fn=$1;
# make it drop, so we can parse the rest
shift;
declare -a parsed_keys
declare -a parsed_vals
declare -a positional_args
declare -a optional_flag_args
declare -a optional_args
declare -a args
positional_index=0
optional_index=0
# shellcheck disable=SC2207
read -r -a signature <<< $(grep -s -h -o "${fn}()\s*{ ##?\? .*" "$0" | sed 's/.*##?//g;s/: .*//g;')
signature+=('[--help]') # This is always available
# shellcheck disable=SC2068
for x in $@; do
args+=("$x");
# If the help flag is in there, we can short-circuit
if [[ "$x" == "--help" ]] || [[ "$x" == "-h" ]]; then
return
fi
shift;
done
for arg in "${signature[@]}"; do
if [[ "$arg" == '<'* ]]; then
# This is a required argument
positional_args+=("$(echo "$arg" | sed 's/^<//;s/>$//')")
elif [[ "$arg" == '[--'* ]]; then
# This is an optional argument
optional_flag_args+=("$arg")
elif [[ "$arg" == '['* ]]; then
# This is an optional argument
optional_args+=("$(echo "$arg" | sed 's/^\[//;s/\]$//')")
else
# This is an error
echo "ERROR!!! Bad argument specification: $arg"
fi
done
# Size of the arrays
positional_count=${#positional_args[@]}
optional_count=${#optional_args[@]}
offset=0
while true; do
# Get the first bit of content from the arguments
a_cur=${args[$offset]}
if [[ "$a_cur" == "" ]]; then
break
fi
if [[ "$a_cur" == "--"* ]]; then
# This is a flag. So find the matching flag definition.
for arg in "${optional_flag_args[@]}"; do
# Two types of args: with, without values
if [[ "$arg" == "[--"* ]]; then
if [[ "$arg" == *'|'* ]]; then
# This has another argument.
# So we need to pop something else.
# And increment offset so we don't re-process it
if [[ "$arg" == '['$a_cur'|'* ]]; then
val="${args[$offset+1]}"
offset=$(( offset + 1 ))
k="$(echo "$a_cur" | sed 's/^--//;s/-/_/g')"
parsed_keys+=("${k}")
parsed_vals+=("$val")
fi
else
# This is just a flag
if [[ "$arg" == '['$a_cur']' ]]; then
k="$(echo "$a_cur" | sed 's/^--//;s/-/_/g')"
parsed_keys+=("${k}")
parsed_vals+=(1)
fi
fi
fi
done
else
# This is a non-flag, so maybe a positional, maybe an optional argument.
# So we need to find the Nth positional (via positional_index)
if (( (positional_index + optional_index) >= (positional_count + optional_count) )); then
echo "Error: more positional arguments than should be possible"
exit 1;
fi
if (( positional_index < positional_count )); then
parsed_keys+=("${positional_args[$positional_index]}")
parsed_vals+=("${a_cur}")
positional_index=$((positional_index + 1))
else
# We're past the positional and into the optional
k="${optional_args[$optional_index]}"
parsed_keys+=("$(echo "$k" | sed 's/|.*//g')")
parsed_vals+=("${a_cur}")
optional_index=$(( optional_index + 1 ))
fi
fi
offset=$(( offset + 1 ))
done
# Set all default optional args, if they aren't set
if (( optional_index < optional_count )); then
for i in $(seq $optional_index $((optional_count - 1)) ); do
if [[ "${optional_args[$i]}" == *'|'* ]]; then
k="${optional_args[$i]}"
parsed_keys+=("$(echo "$k" | sed 's/|.*//g')")
parsed_vals+=("$(echo "$k" | sed 's/.*|//g')")
fi
done
fi
if (( positional_index < positional_count )); then
for i in $(seq $positional_index $(( positional_count - 1 )) ); do
error "Required argument <${positional_args[$i]}> is missing"
done
exit 1;
fi
size=${#parsed_keys[@]}
for i in $(seq 0 $((size - 1))); do
#printf "\t%10s=%-10s\n" "${parsed_keys[$i]}" "${parsed_vals[$i]}"
export arg_${parsed_keys[$i]}=${parsed_vals[$i]}
done
}
look_for() {
query_type="$1"; shift
group_name="$1"; shift
query_name="$1"; shift
# query_type group_name query_name
# query tsvquery users-total
# galaxy none somethingelse
#echo "HIII $query_type $group_name $query_name"
if [[ $query_type == "local" ]]; then
if [[ ! -f ${GXADMIN_SITE_SPECIFIC} ]]; then
error "No local functions are defined in ${GXADMIN_SITE_SPECIFIC}"
exit 1
else
# Load functions
# shellcheck disable=SC1090
. "${GXADMIN_SITE_SPECIFIC}"
fi
fi
# Check that FN exists
fn="${query_type}_${query_name}"
LC_ALL=C type "$fn" 2> /dev/null | grep -q 'function'
ec=$?
if (( ec != 0 )); then
didyoumean "${query_type}" "${query_name}"
fi
# Set the application name to something useful. (Makes it easier to figure
# out which gxadmin command is hogging resources, etc.)
export PGAPPNAME=gxadmin.${query_type}.${query_name}
if [[ $query_type == "query" ]]; then
obtain_func "$query_type" "$query_name" "$@"
# If query in error, exit.
if [[ "$QUERY" == "ERROR" ]]; then
error "Error"
usage query
fi
# Run the queries
case "$group_name" in
tsvquery ) query_tsv "$QUERY";;
csvquery ) query_csv "$QUERY";;
query ) query_tbl "$QUERY";;
jsonquery ) query_json "$QUERY";;
iquery ) query_influx "$QUERY" "$query_name" "$fields" "$tags" "$timestamp";;
explainquery ) query_exp "$QUERY";;
explainjsonquery ) query_expj "$QUERY";;
echoquery ) query_echo "$QUERY";;
# default
* ) usage "Error";;
esac
elif [[ $query_type == "server" ]]; then
obtain_func "$query_type" "$query_name" "$@"
# If query in error, exit.
if [[ "$QUERY" == "ERROR" ]]; then
error "Error"
usage server
fi
# Run the queries
case "$group_name" in
tsvserver ) query_tsv "$QUERY";;
csvserver ) query_csv "$QUERY";;
server ) query_tbl "$QUERY";;
jsonserver ) query_json "$QUERY";;
iserver ) query_influx "$QUERY" "$query_name" "$fields" "$tags" "$timestamp";;
explainserver ) query_exp "$QUERY";;
explainjsonserver ) query_expj "$QUERY";;
echoserver ) query_echo "$QUERY";;
# default
* ) usage "Error";;
esac
elif [[ $query_type == "mutate" ]]; then
obtain_func "$query_type" "$query_name" "$@"
# If query in error, exit.
if [[ "$QUERY" == "ERROR" ]]; then
error "Error"
usage mutate
fi
# Run the queries
case "$group_name" in
mutate ) query_tbl "$QUERY";;
explainmutate ) query_exp "$QUERY";;
explainjsonmutate ) query_expj "$QUERY";;
echomutate ) query_echo "$QUERY";;
# default
* ) usage "Error";;
esac
elif [[ $query_type == "local" ]]; then
if [[ -z "${GXADMIN_BUGGER_OFF}" ]] && (( (RANDOM % 25) == 0 )); then
warning "Hey! It's great that you're using gxadmin! You should contribute these functions back :) Other people might find these useful, or could learn something from the code you've written, even if you think it is too specific to your site."
fi
$fn "$@";
if [[ "${query_name}" == "query"* ]]; then
query_tbl "$QUERY"
fi
else
$fn "$@";
if [[ "${query_name}" == "query"* ]]; then
query_tbl "$QUERY"
elif [[ "${query_name}" == "iquery"* ]]; then
query_influx "$QUERY" "$query_name" "$fields" "$tags" "$timestamp"
fi
fi
}
# https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance
levenshtein='
function levenshtein(str1, str2, cost_ins, cost_rep, cost_del, str1_len, str2_len, matrix, is_match, i, j, x, y, z) {
if(cost_ins == "") cost_ins = 1
if(cost_rep == "") cost_rep = 1
if(cost_del == "") cost_del = 1
str1_len = length(str1)
str2_len = length(str2)
if(str1_len == 0) return str2_len * cost_ins
if(str2_len == 0) return str1_len * cost_del
matrix[0, 0] = 0
for(i = 1; i <= str1_len; i++) {
matrix[i, 0] = i * cost_del
for(j = 1; j <= str2_len; j++) {
matrix[0, j] = j * cost_ins
x = matrix[i - 1, j] + cost_del
y = matrix[i, j - 1] + cost_ins
z = matrix[i - 1, j - 1] + (substr(str1, i, 1) == substr(str2, j, 1) ? 0 : cost_rep)
x = x < y ? x : y
matrix[i, j] = x < z ? x : z
}
}
return matrix[str1_len, str2_len]
}
{
print levenshtein($1, $2)"\t"$1;
}'
levenshtein_filter(){
cat | awk -f <(echo "$levenshtein") | sort -nk1 | head -n 10
}
wrap_time() {
local TIMEFORMAT="Time to execute $1: %R seconds"
time "$@"
}
search() { # <term>: Search for a specific command
if (( $# > 0 )); then
if command -v fzf &> /dev/null; then
res=$(locate_cmds | correct_cmd | fzf -q "$1" | sed 's/[:<\[].*//g')
fzf_autorun_cmd
else
locate_cmds | correct_cmd | grep "$1" | colour_word "$1" orange
fi
else
if command -v fzf &> /dev/null; then
res=$(locate_cmds | correct_cmd | fzf | sed 's/[:<\[].*//g')
fzf_autorun_cmd
else
locate_cmds | correct_cmd
fi
fi
}
if (( $# == 0 )); then
usage
fi
mode="$1"; shift
wrapper=
if [[ "$mode" == time* ]]; then
wrapper=wrap_time
mode=${mode:4}
fi
case "$mode" in
#*query ) $wrapper query "$mode" "$@" ;;
*query ) $wrapper look_for "query" "$mode" "$@" ;;
*server) $wrapper look_for "server" "$mode" "$@" ;;
*mutate) $wrapper look_for "mutate" "$mode" "$@" ;;
config ) $wrapper look_for "$mode" "none" "$@" ;;
cluster) $wrapper look_for "$mode" "none" "$@" ;;
filter ) $wrapper look_for "$mode" "none" "$@" ;;
galaxy ) $wrapper look_for "$mode" "none" "$@" ;;
meta ) $wrapper look_for "$mode" "none" "$@" ;;
report ) $wrapper look_for "$mode" "none" "$@" ;;
uwsgi ) $wrapper look_for "$mode" "none" "$@" ;;
local ) $wrapper look_for "$mode" "none" "$@" ;;
s ) search "$@" ;;
find ) search "$@" ;;
search ) search "$@" ;;
# version commands
version ) version ;;
-v ) version ;;
--version ) version ;;
# help
help ) usage;;
-h ) usage;;
--help ) usage;;
# anything else
* ) didyoumean "$mode" "$@";;
esac
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment