ksuderman/gxadmin

## gxadmin
#!/bin/bash
# gxadmin: Galaxy administration swiss army knife
# license: GPLv3
#
# This is the result of admins across the universe working together :)
#
# Thanks to everyone for their contributions
# https://github.com/galaxyproject/gxadmin/graphs/contributors
version() {
	echo 19
}
GXADMIN_SITE_SPECIFIC=${GXADMIN_SITE_SPECIFIC:-~/.config/gxadmin-local.sh}

hexencodefield9=$(cat <<EOF
import csv
import binascii
import sys
csv.field_size_limit(sys.maxsize)


spamreader = csv.reader(sys.stdin, delimiter=',', quotechar='"')
spwamwrite = csv.writer(sys.stdout, delimiter=',', quotechar='"')

for row in spamreader:
	if row[9][0] != "\\\\":
		row[9] = "\\\\x" + binascii.hexlify(row[9])
	spwamwrite.writerow(row)
EOF
)

identicon_script=$(cat <<EOF
import sys
import hashlib

def colored(fg, bg, value):
	return '\x1b[38;5;{0}m\x1b[48;5;{1}m{2}\x1b[0m'.format(fg, bg, value)

class Iden:
	def build(self, h):
		pixels = [[0 for _ in range(5)] for _ in range(5)]
		for i in range(0, 5):
			for x in range(0, 5):
				pixels[x][i] = self.showPixel(i, x, h)
		return pixels

	def showPixel(self, x, y, h):
		m = 6 + abs(2-x) * 5 + y
		return int(h[m:m+1], 16) % 2 == 0

	def getIcon(self, num, hash_string):
		for x in self.build(hash_string):
			yield ''.join([
				colored(num, num + 15 % 255, '██' if q else '  ')
				for q in x
			])

data = sys.stdin.read().strip()
m = hashlib.sha256()
m.update(data.encode('UTF-8'))
hash_string = m.hexdigest()
num = int(hash_string[0:2], 16)
for line in Iden().getIcon(num, hash_string):
	print(line)
EOF
)
ifmore() {
	q="$(cat)"
	lines=$(echo "$q" | wc -l)
	# 2 more than the head command since we'll 'spend' two for the ... anyway.
	if (( lines > 8 )); then
		echo "$q" | head -n 6
		echo "    ..."
		echo "    run '$0 $1 help' for more"
	else
		echo "$q"
	fi
}

colour_word() {
	word=$1
	color=$2
	if [[ $color == "red" ]]; then
		color_idx=1
	elif [[ $color == "orange" ]]; then
		color_idx=2
	elif [[ $color == "green" ]]; then
		color_idx=4
	fi

	cat | sed "s|${word}|$(tput setab $color_idx)${word}$(tput sgr0)|g"
}

filter_commands() {
	cat | grep "^$1 " | sort -k2 | column -s: -t | sed 's/^/    /' | colour_word Deprecated orange | colour_word '(NEW)' green
}

locate_cmds() {
	grep -s -h -o '^[a-z0-9_-]*()\s*{ ##?\? .*' "$0" "$GXADMIN_SITE_SPECIFIC" | grep -v grep | grep -v '| sed' | sort
}

locate_cmds_nolocal() {
	grep -s -h -o '^[a-z0-9_-]*()\s*{ ##?\? .*' "$0" | grep -v grep | grep -v '| sed' | sort
}

correct_cmd() {
	cat | sed 's/_/ /;s/()\s*{ ##?\?//;s/ :/:/'
}

fzf_autorun_cmd() {
	read -p "Would you like to run '$0 $res'? [N|y]: " choice
	if [[ "$choice" == "y" ]] || [[ "$choice" == "Y" ]]; then
		$0 $res
	fi
}


didyoumean() {
	# Given a query

	# Is this a known subcommand
	known_command=0
	for x in $registered_subcommands; do
		if [[ "$1" == "$x" ]]; then
			known_command=1
		fi
	done

	# If it is, we check the second part.
	if (( known_command == 1 )); then
		if [[ "$2" == "" ]]; then
			usage "$1"
			exit 1;
		fi
		error "Unknown subcommand: $1 $2"
		warning "Did you mean one of the following?"
		echo

		if ! command -v fzf &> /dev/null; then
			echo "(Top hits by levenshtein distance)"
			locate_cmds | correct_cmd | grep "^$1 " | \
				cut -f 2 -d' ' | sed -s "s/://g;s/\$/ $2/g" | \
				levenshtein_filter | sed 's/^/ - [/;s/\t/]: /g'
		else
			res=$(locate_cmds | correct_cmd | grep "^$1 " | fzf -q "$2" --header="Unknown subcommand: $1 $2, did you mean..." | sed 's/[:<\[].*//g')
			fzf_autorun_cmd
		fi
		exit 1;


	else
		error "Unknown command: $1"
		usage
	fi

	exit 1;
}

usage(){
	cat <<-EOF
		gxadmin usage:

	EOF

	if (( $# == 0 )); then
		for x in $registered_subcommands; do
			vn="_${x}_short_help"
			printf "  %10s: %s\n" "${x}" "${!vn}"
		done
	else
		for x in $registered_subcommands; do
			if [[ "$1" == "$x" ]]; then
				vn="_${x}_short_help"
				echo "${!vn}"
				vn="_${x}_long_help"
				echo "${!vn}"
			fi
		done
	fi

	if [[ -f "$GXADMIN_SITE_SPECIFIC" ]]; then
		if (( $# == 0  )) || [[ "$1" == "local" ]]; then
			printf "  %10s: %s\n" "local" "(These can be configured in "$GXADMIN_SITE_SPECIFIC")"
		fi
	else
		cat <<-EOF
			Local-only commands can be configured in $GXADMIN_SITE_SPECIFIC

		EOF
	fi

	if (( $# == 0  )) || [[ "$1" == "search" ]]; then
		cat <<-EOF

			search <term>: Search gxadmin for functions with specific terms
		EOF
	fi

	if (( $# == 1 )); then
		echo
		locate_cmds | correct_cmd | filter_commands "$1"
	fi

	cat <<-EOF

		All commands can be prefixed with "time" to print execution time to stderr

	EOF


	cat <<-EOF
		help / -h / --help : this message. Invoke '--help' on any subcommand for help specific to that subcommand
		Tip: Run "gxadmin meta whatsnew" to find out what's new in this release!
	EOF

	exit 0;
}

handle_help() {
	if [[ ! -z "${GXADMIN_POPCON_ENABLE}" ]]; then
		if [[ "${query_name}" != "user-info" ]]; then
			echo "${mode:-mode} ${query_name:-sf}" >> ~/.gxadmin-popcon.log
		fi
	fi

	for i in "$@"; do
		if [[ "$i" = --help || "$i" = -h ]]; then

			if [[ -n "${query_name}" ]]; then
				key="${query_type}_${query_name}"
			fi

			invoke_desc=$(locate_cmds | grep "${key}()" | correct_cmd | sed "s/^/gxadmin /g")
			short_desc=$(echo "$invoke_desc" | sed 's/.*://g')
			short_parm=$(echo "$invoke_desc" | sed 's/:.*//g')
			echo "${mode} ${query_name} - ${short_desc}"
			echo
			echo "**SYNOPSIS**"
			echo
			echo "    $short_parm"
			echo
			manual="$(cat -)"
			manual_wc="${#manual}"
			if (( manual_wc > 3 )); then
				echo "**NOTES**"
				echo
				echo "$manual"
				echo
			fi
			# exit after printing the documentation!
			exit 0;
		fi
	done
}
assert_restart_lock(){
	if [ -f "$HOME/.restart-lock" ]; then
		echo "A restart lock exists. This means someone is probably already restarting galaxy."
		exit 3
	fi
}

assert_set_env() {
	env_var=$1
	if [[ -z "${!env_var}" ]]; then
		error Please set "\$${env_var}"
		exit 1
	fi
}

wait_for_url() {
	url=$1; shift;

	while [ "$(curl --connect-timeout 5 --silent "$url" | wc -c)" -eq "0" ]; do
		sleep 5;
		echo -n '.'
	done
}

assert_count() {
	if (( $1 != $2 )); then
		error "$3"
		exit 1
	fi
}

assert_count_ge() {
	if (( $1 < $2 )); then
		error "$3"
		exit 1
	fi
}

assert_file() {
	if [[ ! -f "$1" ]]; then
		error "File $1 does not exist"
		exit 1
	fi
}

assert_file_warn() {
	if [[ ! -f "$1" ]]; then
		warning "File $1 does not exist"
	fi
}
query_tbl() {
	psql <<-EOF
	$1
	EOF
}

query_tsv() {
	psql <<-EOF
	COPY ($1) to STDOUT with CSV DELIMITER E'\t'
	EOF
}

query_json() {
	psql <<-EOF
	COPY (
		SELECT array_to_json(array_agg(row_to_json(t)))
		FROM ($1) t
	) to STDOUT with (FORMAT CSV, QUOTE ' ')
	EOF
}

query_tsv_json() {
	psql <<-EOF
	COPY ($1) to STDOUT with (FORMAT CSV, QUOTE ' ')
	EOF
}

query_csv() {
	psql <<-EOF
	COPY ($1) to STDOUT with CSV DELIMITER ','
	EOF
}

query_exp() {
	psql <<-EOF
	EXPLAIN ANALYZE VERBOSE $1
	EOF
}

query_expj() {
	echo "$1"
	echo
	psql -qAt <<-EOF
	EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS, FORMAT JSON) $1
	EOF
}

query_echo() {
	echo "$1"
}

query_influx() {
	local query="$1"
	local rename="$2"
	local fields="$3"
	local tags="$4"
	local timestamp="$5"

	if [[ -z "$fields" ]]; then
		exit 0;
	fi

	arr2py=$(cat <<EOF
import sys
query_name = sys.argv[1]
fields = {x.split('=')[0]: int(x.split('=')[1]) for x in sys.argv[2].split(';')}
tags = []
if len(sys.argv) > 3 and len(sys.argv[3]) > 0:
	tags = {x.split('=')[0]: int(x.split('=')[1]) for x in sys.argv[3].split(';')}

timestamp = None
if len(sys.argv) > 4 and sys.argv[4] != '':
	timestamp = int(sys.argv[4])

for line in sys.stdin.read().split('\n'):
	if len(line) == 0:
		continue
	parsed = line.split('\t')
	metric = query_name
	if len(tags):
		tag_data = ['%s=%s' % (k, parsed[v].replace(' ', '\\ ').replace(',', '\\,').replace('=', '\\='))  for (k, v) in tags.items()]
		metric += ',' + ','.join(tag_data)
	field_data = ['%s=%s' % (k, parsed[v])  for (k, v) in fields.items()]
	metric += ' ' + ','.join(field_data)

	if timestamp is not None:
		metric += ' ' + str(parsed[timestamp])
	print(metric)
EOF
)

	psql -c "COPY ($query) to STDOUT with CSV DELIMITER E'\t'"| $GXADMIN_PYTHON -c "$arr2py" "$rename" "$fields" "$tags" "$timestamp"
}

gdpr_safe() {
	local coalesce_to
	if (( $# > 2 )); then
		coalesce_to="$3"
	else
		coalesce_to="__UNKNOWN__"
	fi

	if [ -z "$GDPR_MODE" ]; then
		echo "COALESCE($1::text, '$coalesce_to') as $2"
	else
		# Try and be privacy respecting while generating numbers that can allow
		# linking data across tables if need be?
		echo "substring(md5(COALESCE($1::text, '$coalesce_to') || now()::date || '$GDPR_MODE'), 0, 12) as ${2:-$1}"
	fi
}

# Borrowed from https://stackoverflow.com/questions/1527049/how-can-i-join-elements-of-an-array-in-bash
function join_by {
	local d=$1; shift;
	echo -n "$1";
	shift;
	printf "%s" "${@/#/$d}";
}

summary_statistics() {
	local v=$1
	local ishuman=$2

	# TODO: there has got to be a less ugly way to do this
	if (( ishuman == 1 )); then
		human_size="pg_size_pretty("
		human_after=")"
	else
		human_size=""
		human_after=""
	fi

	cat <<-EOF
		${human_size}min($v)${human_after} AS min,
		${human_size}percentile_cont(0.25) WITHIN GROUP (ORDER BY $v) ::bigint${human_after} AS quant_1st,
		${human_size}percentile_cont(0.50) WITHIN GROUP (ORDER BY $v) ::bigint${human_after} AS median,
		${human_size}avg($v)${human_after} AS mean,
		${human_size}percentile_cont(0.75) WITHIN GROUP (ORDER BY $v) ::bigint${human_after} AS quant_3rd,
		${human_size}percentile_cont(0.95) WITHIN GROUP (ORDER BY $v) ::bigint${human_after} AS perc_95,
		${human_size}percentile_cont(0.99) WITHIN GROUP (ORDER BY $v) ::bigint${human_after} AS perc_99,
		${human_size}max($v)${human_after} AS max,
		${human_size}sum($v)${human_after} AS sum,
		${human_size}stddev($v)${human_after} AS stddev
	EOF
}

get_user_filter(){
	echo "(galaxy_user.email = '$1' or galaxy_user.username = '$1' or galaxy_user.id = CAST('$1' AS INTEGER))"
}
error() {
	(>&2 echo "$(tput setab 1)$*$(tput sgr0)")
}

warning() {
	(>&2 echo "$(tput setab 2)$*$(tput sgr0)")
}

success() {
	echo "$(tput setaf 40)$*$(tput sgr0)"
}
# Find a python binary, hopefully.
if [[ -z "${GXADMIN_PYTHON}" ]]; then
	if hash python3 2>/dev/null; then
		export GXADMIN_PYTHON=$(command -v python3)
	elif hash python2 >/dev/null; then
		export GXADMIN_PYTHON=$(command -v python2)
	elif hash python >/dev/null; then
		export GXADMIN_PYTHON=$(command -v python)
	else
		warning "Some features require python support, sorry. If you have python installed somewhere that is not on the path or under a weird name, you can set GXADMIN_PYTHON to the path."
	fi
fi
CHANGELOG=$(cat <<EOF
# 20-pre

- Fixed:
	- Broken things in preview anonymisation script
	- Unified uwsgi-status command (thanks @gmauro)
	- Unified galaxy user/email/id filtering and made it stricter in what it accepts ([#60](https://github.com/galaxyproject/gxadmin/issues/60)).
	- Updated "query user-disk-usage" to add a "--use-precalc" flag which uses the value calculated by Galaxy.
	- Enabled GXADMIN_PYTHON from the environment, by [@gmauro](https://github.com/gmauro).
	- Updated "galaxy cleanup" to avoid to set update_time on updated objects, by [@gmauro](https://github.com/gmauro).
	- fixed broken latest-users query
	- fixed broken errored-jobs query, by [@gmauro](https://github.com/gmauro).
	- several fixes in galaxy ie-show command, [@gmauro](https://github.com/gmauro).
	- Generalized the "uwsgi lastlog" function to proper handle journalctl's output, by [@gmauro](https://github.com/gmauro).
- Added:
	- query dump-users, for [@shiltemann](https://github.com/shiltemann)
	- "did you mean" when a command isn't found.
	- query job-metrics, by [@anuprulez](https://github.com/anuprulez) and [@simonbray](https://github.com/simonbray).
	- uwsgi active-user count
	- exports PGAPPNAME setting the application name for postgres connections. This becomes visible in pg_stat_activity.
	- mutate set_quota_for_oidc_user, by [@gmauro](https://github.com/gmauro).
	- iquery support for queue-detail query
	- query tool-usage-over-time which can be used to make charts of specific tools over time.
	- improved use of FZF for both the search and 'typo' functions, when it is available.
	- query pulsar-gb-transferred: sums up data in/out (minus collections) of pulsar.

# 19

- Fixed:
	- Fixed broken slurping from previous release (Thanks [@slugger70](https://github.com/Slugger70) for catching it.)
	- Made changelog updates mandatory so I don't have to ask people for it. Finally.
- Added:
	- encode/decode ID
	- Benchmarking via ASV (preview)
	- anonymise-db-for-release (preview)
	- Add '--by_group' flag to several monthly queries (Thanks [@slugger70](https://github.com/Slugger70))
	- query monthly-cpu-stats (Thanks [@gmauro](https://github.com/gmauro))
	- workflow trace archive commands
	- query queue has a new optional parameter for splitting queues by various attributes (Thanks @natefoo)
	- query pg-rows-per-table to find out which tables have data.
	- mutate fail-wfi
	- mutate oidc-by-emails, mutate users affected by https://github.com/galaxyproject/galaxy/issues/9981
	- mutate now supports echo/explain prefixes.
	- very-unsafe flag to most mutate methods, avoiding the transaction.
	- added the [wonderful argument parsing](https://github.com/hexylena/wap) for parsing function signatures into arguments automatically, making contribution easier.

# 18

- Fixed:
	- All previous '--nice' flags are rewritten to '--human' (Thanks @lldelisle)
	- Update 'user-disk-usage' to support iquery (Thanks @lldelisle)
	- Update 'largest-histories' to support iquery (Thanks @lldelisle)
	- uwsgi-memory now looks at workflow schedulers
	- Exposed bash autocompletion and documented it
- Added:
	- query workers, only works on Galaxy 20.01 or newer and retrieves the hostname and PID of Galaxy worker processes
	- '--details' option to query errored-jobs to include the job_stderr column
	- total number of jobs with exit states for galaxxy instance (Thanks @bruggerk)
	- query workflow-invocation-totals
	- server subcommand exposing previously hidden functions
- Removed:
	- filter hexdecodelines, this is now replaced by a built-in postgres function
- Changed:
	- Refactored internal function search to unify local, query, mutate, with the rest.

# 17

Testing our new release message

- Fixed:
	- Issue with py3 only machines
- Added:
	- jobs ready to run

# 16

- Fixed:
	- figured out how to decode hex blobs in postgres

# 15

- Fancy [new docs site](https://galaxyproject.github.io/gxadmin/#/)
- Added:
	- search :(
	- query server-groups-allocated-cpu, thanks @selten
	- query server-groups-disk-usage, thanks @selten
	- query user-disk-quota, thanks @selten
	- query job-info, thanks @selten
	- query queue-detail-by-handler
	- mutate reassign-job-to-handler
	- a LOT of query pg-... commands from [heroku's pg-extras](https://github.com/heroku/heroku-pg-extras/tree/master/commands)
	- mutate drop-extraneous-workflow-step-output-associations
	- mutate reassign-workflows-to-handler
	- query data-origin-distribution
	- query data-origin-distribution-summary
	- query user-history-list

# 14

- Added:
	- Job working directory cleaner
	- meta slurp-upto
	- meta slurp-day
	- Imported jobs-queued-internal-by-handler and jobs-queued queries from main
	- query users-with-oidc
	- Three new queries for checking history execution time, thanks [@mmiladi](https://github.com/mmiladi)
		- query history-runtime-system-by-tool
		- query history-runtime-system
		- query history-runtime-wallclock
	- Three new queries for checking potentially broken tools, thanks [@jmchilton](https://github.com/jmchilton)
		- query tool-new-errors
		- query tool-errors
		- query tool-likely-broken
	- Import script from https://github.com/galaxyproject/grafana-dashboards
	- query for datasets uploaded in last N hours
	- mutate to approve users
	- mutate assign-unassigned-workflow: workaround for [galaxyproject/galaxy#8209](https://github.com/galaxyproject/galaxy#8209)
	- mutate oidc-role-find-affected: workaround for [galaxyproject/galaxy#8244](https://github.com/galaxyproject/galaxy#8244)
	- mutate oidc-role-fix: workaround for [galaxyproject/galaxy#8244](https://github.com/galaxyproject/galaxy#8244)
	- query user-disk-usage, thanks @selten
	- query group-cpu-seconds, thanks @selten
- Fixed:
	- Correct bug in queue-detail and a couple other functions which did not
	  correctly include data from anonymous users.

# 13

- Added:
	- local functions support querying when prefixed with "query-"
	- "meta influx-post" and "meta influx-query" were added to get data into
	  and out of Influx
	- "explainquery" is added as an alternative to csv/tsv/etc queries, which
	  does an "EXPLAIN ANALYZE" of the current SQL
	- "explainjsonquery" added for use with http://tatiyants.com/pev/
	- "time" prefix for all functions added to print execution time to stderr
	- Function to fail a specific job ID
	- "query user-recent-aggregate-jobs"
	- "query history-contents"
	- "query hdca-info"
	- "query hdca-datasets"
	- "mutate fail-history", when failing individual jobs is too slow
	- "galaxy migrate-tool-install-from-sqlite"
	- "query user-cpu-years"
	- Shellchecking of entire script
- Fixed:
	- Escaped commas in influx outputs, switched to tabs to further prevent
	  comma issues.
	- Correct date filter in "query {server-groups,server-datasets}"
	- Removed unnecessary function name duplication, e.g.
	    "query_filter() { ## query filter [id]: Does thing"
	  can now be written as
	    "query_filter() { ## [id]: Does thing"

# 12

- Added:
	- local functions, users can add functions to a separate file which are
	  made available in gxadmin. @erasche resisted implementing these for a
	  long time for fears that they won't be contributed back and everyone
	  would keep their precious sql private. So they may nag you occasionally
	  to contribute them back.
	- (semi) EU specific systemd handler/zergling management commands
	- filter digest-color: command to colour some text
	- filter identicon: command to generate an identicon
	- Some basic testing of the overall script
- Fixed:
	- Correct time zones in all queries to be client-side time zones rather
	  than UTC encoded timestamps as stored in db (Thanks [@slugger70](https://github.com/Slugger70))
	- Renamed: "query monthly-users" → "query monthly-users-active"
- Removed:
	- Removed highly EU specific handler and zerg functions
	- Deprecated: "query active-users"
- Changed:
	- Major internal reorganisation and split into parts for easier editing. A
	  bash script that needs a build system, truly horrifying.
	- User info query now shows recent WF invocations and whether their largest
	  histories are deleted or purged or not.
	- Separated out 'report' types that are markdown-only outputs and don't
	  support tsv/csv queries

# 11

- Added:
	- filter hexdecode: to help decode hex blobs in postgres queries
	- uwsgi stats\_influx: which fetches stats from uWSGI zerglings
	- filter pg2md: Convert postgres tables into markdown compatible tables
	- "GDPR_MODE" env var which blanks out usernames and emails
	- errored-jobs: Lists jobs that errored in the last N hours
	- workflow-connections: Exports worfklow connections, input to output tools
	- dump-config: Dump Galaxy configuration as JSON
	- tool-popularity: Most popular tools per month
- Changed:
	- user-details: now reports in markdown compatible output and with more
	  information about the user
	- job-info: now reports in markdown compatible output, including job inputs
	  + outputs
	- queue-overview: now includes user ID by default ("GDPR_MODE=1" will set
	  the value to "0"), and allows using shorter tool IDs
	- user-details: renamed to user-info
	- user-info: includes largest histories

# 10

- Added:
	- old-histories: Old histories query to find histories that haven't been
	  used for X weeks.
	- user-details: query details about a specific user ID
- Fixed:
	- Included date in cleanup logs

# 9

- Added:
	- Influx queries for:
		- active-users
		- collection-usage
		- disk-usage
		- groups-list
		- largest-collection
		- queue
		- queue-overview
		- tool-usage
		- ts-repos
		- users-count
		- users-total
	- tool-metrics
	- tool-available-metrics
	- largest-collection
- Fixed:
	- Improved "latest-users" to include any groups they're part of and their
	  registration status (ack'd email/not)
	- Influx queries now functional (but not automatic)
	- Collection usage

# 8

- Added:
	- Help documentation in-tool
- Fixed:
	- rewrote internals to use functions + case/esac

# 7

- Added:
	- cleanup
- Fixed:
	- removed all temporary tables for CTEs, allowing tsv/csv queries for
	  everything
	- internal cleanup

# 6

- Added functions:
	- active-users
	- training-memberof
	- training-remove-member
- Fixed:
	- update script
	- removed more EU paths for env vars


# 5

- Added functions:
	- disk-usage
	- users-count
	- users-total
	- groups-list
	- collection-usage
	- ts-repos
- Fixed job-inputs/job-outputs
- Fixed internal variable names

# 4

- Implemented update function
- removed all support for legacy "_" calls
- Fancy new automatically updated help function

# 3

- Added migration to sqlite for toolshed install database
- Removed some EU specific stuff

# 2

- Added training-queue and job-history functions
- replaced "_" with "-"

# 1 - Initial Release

Included functions for:

- validating configs
- zerg tasks
- handler tasks
- queries:
	- latest-users
	- tool-usage
	- job-info
	- job-outputs
	- queue
	- queue-detail
	- recent-jobs
	- jobs-per-user
	- runtime-per-user
	- training
	- training-members
	- queue-time
	- datasets-created-daily
EOF
)
registered_subcommands="$registered_subcommands config"
_config_short_help="Access Galaxy configuration and other assorted things"


config_validate() { ## : validate config files
	handle_help "$@" <<-EOF
		Validate the configuration files

		**Warning**:

		!> - This requires you to have \`\$GALAXY_DIST\` set and to have config under \`\$GALAXY_DIST/config\`.
		!> - This only validates that it is well formed XML, and does **not** validate against any schemas.
		!>
		!>     $ gxadmin validate
		!>       OK: galaxy-dist/data_manager_conf.xml
		!>       ...
		!>       OK: galaxy-dist/config/tool_data_table_conf.xml
		!>       OK: galaxy-dist/config/tool_sheds_conf.xml
		!>     All XML files validated
	EOF

	assert_set_env GALAXY_CONFIG_DIR
	assert_set_env GALAXY_MUTABLE_CONFIG_DIR

	fail_count=0
	for file in ${GALAXY_CONFIG_DIR}/*.xml; do
		xmllint "$file" > /dev/null 2>/dev/null;
		ec=$?
		if (( ec > 0 )); then
			fail_count=$(echo "$fail_count + 1" | bc)
			error "  FAIL: $file ($ec)";
		else
			success "  OK: $file";
		fi
	done;

	for file in ${GALAXY_MUTABLE_CONFIG_DIR}/*.xml; do
		xmllint "$file" > /dev/null 2>/dev/null;
		ec=$?
		if (( ec > 0 )); then
			fail_count=$(echo "$fail_count + 1" | bc)
			error "  FAIL: $file ($ec)";
		else
			success "  OK: $file";
		fi
	done;

	if (( fail_count == 0 )); then
		success "All XML files validated"
	else
		error "XML validation failed, cancelling any actions."
		exit 1
	fi
}

config_dump() { ## : Dump Galaxy configuration as JSON
	handle_help "$@" <<-EOF
		This function was added with the intention to use it internally, but it may be useful in your workflows. It uses the python code from the Galaxy codebase in order to properly load the configuration which is then dumped as JSON.

		    (.venv)$ gxadmin dump-config | jq -S . | head
		    {
		      "activation_grace_period": 3,
		      "admin_users": "hxr@local.host",
		      "admin_users_list": [
		        "hxr@local.host"
		      ],
		      "allow_library_path_paste": false,
		      "allow_path_paste": false,
		      "allow_user_creation": true,
		      "allow_user_dataset_purge": true,
	EOF

	assert_set_env GALAXY_ROOT
	assert_set_env GALAXY_CONFIG_FILE

	dump_config_python=$(cat <<EOF
import argparse
import json
import os
import sys

sys.path.insert(1, '$GALAXY_ROOT/lib')

import galaxy.config
from galaxy.util.script import app_properties_from_args, populate_config_args

parser = argparse.ArgumentParser()
populate_config_args(parser)
args = parser.parse_args()
args.config_file = '$GALAXY_CONFIG_FILE'

app_properties = app_properties_from_args(args)
config = galaxy.config.Configuration(**app_properties)

sys.stdout.write(json.dumps(config.__dict__, default=lambda o: '<fail>'))
EOF
)
	echo "$dump_config_python" | $GXADMIN_PYTHON
}

registered_subcommands="$registered_subcommands mutate"
_mutate_short_help="DB Mutations, CSV/TSV queries are NOT available"

txn_prefix() {
	if [[ "$1" == "--very-unsafe" ]]; then
		printf "";
	else
		printf "BEGIN TRANSACTION;\n"
	fi
}

txn_postfix() {
	if [[ "$1" == "--commit" ]] || [[ "$1" == "1" ]]; then
		printf "\nCOMMIT;\n"
	elif [[ "$1" == "--very-unsafe" ]]; then
		printf "";
	else
		printf "\nROLLBACK;\n"
	fi
}

mutate_fail-terminal-datasets() { ## [--commit]: Causes the output datasets of jobs which were manually failed, to be marked as failed
	handle_help "$@" <<-EOF
		Whenever an admin marks a job as failed manually (e.g. by updating the
		state in the database), the output datasets are not accordingly updated
		by default. And this causes users to mistakenly think their jobs are
		still running when they have long since failed.

		This command provides a way to select those jobs in error states
		(deleted, deleted_new, error, error_manually_dropped,
		new_manually_dropped), find their associated output datasets, and fail
		them with a blurb mentionining that they should contact the admin in
		case of any question

		Running without any arguments will execute the command within a
		transaction and then roll it back, allowing you to see counts of rows
		and giving you an idea if it is doing the right thing.

		**WARNINGS**

		!> This does NOT currently work on collections

		**EXAMPLES**

		The process is to first query how many datasets will be failed, if this looks correct you're ready to go.

		    $ gxadmin mutate fail-terminal-datasets
		    BEGIN
		    SELECT 1
		    jobs_per_month_to_be_failed | count
		    -----------------------------+-------
		    2019-02-01 00:00:00         |     1
		    (1 row)

		    UPDATE 1
		    UPDATE 1
		    ROLLBACK

		Then to run with the --commit flag to commit the changes

		    $ gxadmin mutate fail-terminal-datasets --commit
		    BEGIN
		    SELECT 1
		    jobs_per_month_to_be_failed | count
		    -----------------------------+-------
		    2019-02-01 00:00:00         |     1
		    (1 row)

		    UPDATE 1
		    UPDATE 1
		    COMMIT
	EOF
	# TODO(hxr): support collections

	read -r -d '' QUERY <<-EOF
		CREATE TEMP TABLE terminal_jobs_temp AS
			SELECT
				dataset.id as ds_id,
				history_dataset_association.id as hda_id,
				dataset.create_time AT TIME ZONE 'UTC' as ds_create
			FROM
				dataset,
				history_dataset_association,
				job_to_output_dataset,
				job
			WHERE
				dataset.id = history_dataset_association.dataset_id
				AND history_dataset_association.id = job_to_output_dataset.dataset_id
				AND job.id = job_to_output_dataset.job_id
				AND dataset.state IN ('queued', 'running', 'new')
				AND job.state
					IN ('deleted', 'deleted_new', 'error', 'error_manually_dropped', 'new_manually_dropped');

		SELECT
			date_trunc('month', ds_create) as jobs_per_month_to_be_failed, count(*)
		FROM terminal_jobs_temp
		GROUP BY jobs_per_month_to_be_failed
		ORDER BY date_trunc('month', ds_create) desc;

		UPDATE dataset
		SET
			state = 'error'
		WHERE id in (select ds_id from terminal_jobs_temp);

		UPDATE history_dataset_association
		SET
			blurb = 'execution error',
			info = 'This dataset''s job failed and has been manually addressed by a Galaxy administrator. Please use the bug icon to report this if you need assistance.'
		WHERE id in (select hda_id from terminal_jobs_temp)
	EOF

	txn_pre=$(txn_prefix "$1")
	txn_pos=$(txn_postfix "$1")
	QUERY="$txn_pre $QUERY; $txn_pos"
}

mutate_fail-job() { ## <job_id> [--commit]: Sets a job state to error
	handle_help "$@" <<-EOF
		Sets a job's state to "error"
	EOF

	assert_count_ge $# 1 "Must supply a job ID"
	id=$1

	read -r -d '' QUERY <<-EOF
		UPDATE
			job
		SET
			state = 'error'
		WHERE
			id = '$id'
	EOF

	txn_pre=$(txn_prefix  "$2")
	txn_pos=$(txn_postfix "$2")
	QUERY="$txn_pre $QUERY; $txn_pos"
}

mutate_fail-history() { ## <history_id> [--commit]: Mark all jobs within a history to state error
	handle_help "$@" <<-EOF
		Set all jobs within a history to error
	EOF

	assert_count_ge $# 1 "Must supply a history ID"
	id=$1

	read -r -d '' QUERY <<-EOF
		SELECT
			id, state
		FROM
			job
		WHERE
			id
			IN (
					SELECT
						job_id
					FROM
						job_to_output_dataset
					WHERE
						dataset_id
						IN (
								SELECT
									id
								FROM
									history_dataset_association
								WHERE
									history_id = $1
							)
				)
			AND state NOT IN ('ok', 'error')
	EOF

	txn_pre=$(txn_prefix  "$2")
	txn_pos=$(txn_postfix "$2")
	QUERY="$txn_pre $QUERY; $txn_pos"
}

mutate_delete-group-role() { ## <group_name> [--commit]: Remove the group, role, and any user-group + user-role associations
	handle_help "$@" <<-EOF
		Wipe out a group+role, and user associations.
	EOF

	assert_count_ge $# 1 "Must supply a group name"
	id=$1

	read -r -d '' QUERY <<-EOF
		DELETE FROM group_role_association
		WHERE group_id = (SELECT id FROM galaxy_group WHERE name = '$1');

		DELETE FROM user_group_association
		WHERE group_id = (SELECT id FROM galaxy_group WHERE name = '$1');

		DELETE FROM user_role_association
		WHERE role_id = (SELECT role_id FROM group_role_association WHERE group_id= (SELECT id FROM galaxy_group WHERE name = '$1'));

		DELETE FROM role
		WHERE id = (SELECT role_id FROM group_role_association WHERE group_id = (SELECT id FROM galaxy_group WHERE name = '$1'));

		DELETE FROM galaxy_group
		WHERE name = '$1'
	EOF

	txn_pre=$(txn_prefix  "$2")
	txn_pos=$(txn_postfix "$2")
	QUERY="$txn_pre $QUERY; $txn_pos"
}

mutate_assign-unassigned-workflows() { ## <handler_prefix> <handler_count> [--commit]: Randomly assigns unassigned workflows to handlers. Workaround for galaxyproject/galaxy#8209
	handle_help "$@" <<-EOF
		Workaround for https://github.com/galaxyproject/galaxy/issues/8209

		Handler names should have number as postfix, so "some_string_##". In
		this case handler_prefix is "some_string_" and count is however many
		handlers you want to schedule workflows across.
	EOF

	assert_count_ge $# 1 "Must supply a handler_prefix"
	assert_count_ge $# 2 "Must supply a count"

	prefix=$1
	count=$2

	read -r -d '' QUERY <<-EOF
		UPDATE workflow_invocation
		SET handler = '$prefix' || (random() * $count)::integer
		WHERE state = 'new' and handler = '_default_'
		RETURNING workflow_invocation.id
	EOF

	txn_pre=$(txn_prefix  "$3")
	txn_pos=$(txn_postfix "$3")
	QUERY="$txn_pre $QUERY; $txn_pos"
}

mutate_reassign-workflows-to-handler() { ## <handler_from> <handler_to> [--commit]: Reassign workflows in 'new' state to a different handler.
	handle_help "$@" <<-EOF
		Another workaround for https://github.com/galaxyproject/galaxy/issues/8209

		Need to use the full handler names e.g. handler_main_0
	EOF

	assert_count_ge $# 1 "Must supply a handler_from"
	assert_count_ge $# 2 "Must supply a handler_to"

	read -r -d '' QUERY <<-EOF
		UPDATE workflow_invocation
		SET handler = '$2'
		WHERE state = 'new' and handler = '$1'
		RETURNING workflow_invocation.id
	EOF

	txn_pre=$(txn_prefix  "$3")
	txn_pos=$(txn_postfix "$3")
	QUERY="$txn_pre $QUERY; $txn_pos"
}

mutate_approve-user() { ## <username|email|user_id>: Approve a user in the database
	handle_help "$@" <<-EOF
		There is no --commit flag on this because it is relatively safe
	EOF

	assert_count_ge $# 1 "Must supply a username/email/user-id"

	user_filter=$(get_user_filter "$1")

	read -r -d '' QUERY <<-EOF
		UPDATE galaxy_user
		SET active = true
		WHERE $user_filter
	EOF
}

mutate_oidc-role-find-affected() { ## : Find users affected by galaxyproject/galaxy#8244
	handle_help "$@" <<-EOF
		Workaround for https://github.com/galaxyproject/galaxy/issues/8244

		This finds all of the OIDC authenticated users which do not have any
		roles associated to them. (Should be sufficient?)
	EOF

	read -r -d '' QUERY <<-EOF
		SELECT
			user_id
		FROM
			oidc_user_authnz_tokens AS ouat
		WHERE
			ouat.user_id NOT IN (SELECT user_id FROM user_role_association)
	EOF

	# Not proud of this.
	FLAVOR='tsv'
}

mutate_oidc-role-fix() { ## <username|email|user_id>: Fix permissions for users logged in via OIDC. Workaround for galaxyproject/galaxy#8244
	handle_help "$@" <<-EOF
		Workaround for https://github.com/galaxyproject/galaxy/issues/8244
	EOF

	user_filter=$(get_user_filter "$1")

	# Coerce to user ID
	read -r -d '' qstr <<-EOF
		SELECT id, email
		FROM galaxy_user
		WHERE $user_filter
	EOF
	echo "QUERY: $qstr"
	results="$(query_tsv "$qstr")"
	echo "RESULTS: $results"
	user_id=$(echo "$results" | awk '{print $1}')
	email=$(echo "$results" | awk '{print $2}')


	# check if there is an existing role.
	read -r -d '' qstr <<-EOF
		select count(*) from user_role_association left join role on user_role_association.role_id = role.id where user_id = $user_id and role.type = 'private'
	EOF
	echo "QUERY: $qstr"
	results="$(query_tsv "$qstr")"
	echo "RESULTS: $results"

	# Some (mild) sanity checking. Should probably wrap this up in some nice.
	if (( results != 0 )); then
		error "A default private role already exists for this account."
		exit 1
	fi


	# Create the role since it does not exist
	read -r -d '' qstr <<-EOF
		INSERT INTO role (create_time, update_time, name, description, type, deleted)
		VALUES (now(), now(), '$email', 'Private Role for $email', 'private', false)
		RETURNING id
	EOF
	echo "QUERY: $qstr"
	role_id="$(query_tsv "$qstr")"
	echo "RESULTS: $results"

	# Associate with the user
	read -r -d '' qstr <<-EOF
		INSERT INTO user_role_association (user_id, role_id, create_time, update_time)
		VALUES ($user_id, $role_id, now(), now())
	EOF
	echo "QUERY: $qstr"
	query_tbl "$qstr"

	# Insert into their personal default user permissions
	read -r -d '' qstr <<-EOF
		INSERT INTO default_user_permissions (user_id, action, role_id)
		VALUES ($user_id, 'manage permissions', $role_id), ($user_id, 'access', $role_id)
	EOF
	echo "QUERY: $qstr"
	results="$(query_tbl "$qstr")"
	echo "RESULTS: $results"

	# Fix dataset_permissions. Manage rows are created but have a null entry in their role_id
	read -r -d '' qstr <<-EOF
		UPDATE dataset_permissions
		SET role_id = $role_id
		WHERE
			id
			IN (
					SELECT
						id
					FROM
						dataset_permissions
					WHERE
						dataset_id
						IN (
								SELECT
									ds.id
								FROM
									history AS h
									LEFT JOIN history_dataset_association AS hda ON
											h.id = hda.history_id
									LEFT JOIN dataset AS ds ON hda.dataset_id = ds.id
								WHERE
									h.user_id = $user_id
							)
						AND role_id IS NULL
						AND action = 'manage permissions'
				)
	EOF
	echo "QUERY: $qstr"
	results="$(query_tbl "$qstr")"
	echo "RESULTS: $results"

	# We could update history permissions but this would make those histories
	# private and the current behaviour is that they're public, if the user has
	# tried to share, then we'd be changing permissions and going against user
	# expectations.
	read -r -d '' QUERY <<-EOF
		select 'done'
	EOF
}

mutate_reassign-job-to-handler() { ## <job_id> <handler_id> [--commit]: Reassign a job to a different handler
	handle_help "$@" <<-EOF
	EOF

	assert_count_ge $# 2 "Must supply a job and handler ID"
	job_id=$1
	handler_id=$2

	read -r -d '' QUERY <<-EOF
		UPDATE
			job
		SET
			handler = '$handler_id'
		WHERE
			job.id = $job_id
	EOF

	txn_pre=$(txn_prefix  "$3")
	txn_pos=$(txn_postfix "$3")
	QUERY="$txn_pre $QUERY; $txn_pos"
}

mutate_drop-extraneous-workflow-step-output-associations() { ## [--commit]: #8418, drop extraneous connection
	handle_help "$@" <<-EOF
		Per https://github.com/galaxyproject/galaxy/pull/8418, this drops the
		workflow step output associations that are not necessary.

		This only needs to be run once, on servers which have run Galaxy<=19.05
		to remove duplicate entries in the following tables:

		- workflow_invocation_step_output_dataset_association
		- workflow_invocation_step_output_dataset_collection_association
	EOF

	read -r -d '' QUERY <<-EOF
		WITH exclude_list AS (
			SELECT max(w.id) as id
			FROM workflow_invocation_step_output_dataset_association as w
			GROUP BY workflow_invocation_step_id, dataset_id, output_name
		)

		DELETE
		FROM workflow_invocation_step_output_dataset_association wisoda
		WHERE NOT EXISTS (SELECT 1 FROM exclude_list WHERE wisoda.id = exclude_list.id);


		WITH exclude_list AS (
			SELECT max(w.id) as id
			FROM workflow_invocation_step_output_dataset_collection_association as w
			GROUP BY workflow_invocation_step_id, dataset_collection_id, output_name
		)

		DELETE
		FROM workflow_invocation_step_output_dataset_collection_association wisodca
		WHERE NOT EXISTS (SELECT 1 FROM exclude_list WHERE wisodca.id = exclude_list.id)
	EOF

	txn_pre=$(txn_prefix  "$1")
	txn_pos=$(txn_postfix "$1")
	QUERY="$txn_pre $QUERY; $txn_pos"
}

mutate_restart-jobs() { ## [--commit] <-|job_id [job_id [...]]> : Restart some jobs
	handle_help "$@" <<-EOF
		Restart jobs
	EOF

	commit_flag=""
	if [[ $1 == "--commit" ]]; then
		commit_flag="$1"
		shift;
	fi

	if [[ "$1" == "-" ]]; then
		# read jobs from stdin
		job_ids=$(cat | paste -s -d' ')
	else
		# read from $@
		job_ids=$@;
	fi

	job_ids_string=$(join_by ',' ${job_ids[@]})

	read -r -d '' QUERY <<-EOF
		UPDATE job
		SET state = 'new'
		WHERE job.id in ($job_ids_string)
	EOF

	txn_pre=$(txn_prefix  "$commit_flag")
	txn_pos=$(txn_postfix "$commit_flag")
	QUERY="$txn_pre $QUERY; $txn_pos"
}

mutate_generate-unset-api-keys() { ## [--commit]: Generate API keys for users which do not have one set.
	handle_help "$@" <<-EOF
		For some use cases (IEs), it is preferrable that everyone has an API
		key set for them, if they don't choose to set one themselves. So we set
		a base64'd key to be a bit extra secure just in case. These work just
		fine like hex keys.
	EOF

	commit_flag=""
	if [[ $1 == "--commit" ]]; then
		commit_flag="$1"
		shift;
	fi

	read -r -d '' QUERY <<-EOF
		INSERT INTO api_keys (create_time, user_id, key)
		(
			SELECT
				now(),
				galaxy_user.id,
				substring(regexp_replace(encode(random_bytea(64), 'base64')::TEXT, '[^a-z0-9A-Z]', '', 'g'), 0, 32)
			FROM
				galaxy_user LEFT JOIN api_keys ON galaxy_user.id = api_keys.user_id
			WHERE
				api_keys.key IS NULL
		)
	EOF

	txn_pre=$(txn_prefix  "$commit_flag")
	txn_pos=$(txn_postfix "$commit_flag")
	QUERY="$txn_pre $QUERY; $txn_pos"

}

mutate_anonymise-db-for-release() { ## [--commit|--very-unsafe]: This will attempt to make a database completely safe to release publicly.
	handle_help "$@" <<-EOF
		THIS WILL DESTROY YOUR DATABASE.

		--commit will do it and wrap it in a transaction
		--very-unsafe will just run it without the transaction
	EOF

	commit_flag=""
	if [[ "$1" == "--commit" ]] || [[ "$1" == "--very-unsafe" ]]; then
		commit_flag="$1"
		shift;
	fi

	read -r -d '' QUERY <<-EOF
--DONE
		CREATE OR REPLACE FUNCTION gxadmin_digest_value(input text)
		RETURNS float
		AS \$\$
			SELECT
				('0.' || abs(('x' || substr(md5(current_date ||input::text),1,16))::bit(64)::bigint))::float
		\$\$ LANGUAGE SQL STABLE;

--DONE
		CREATE OR REPLACE FUNCTION gxadmin_random_seed(input text, size integer)
		RETURNS table(value float)
		AS \$\$
			SELECT gxadmin_digest_value(input::text || generate_series) as value
			FROM generate_series(1, size)
		\$\$ LANGUAGE SQL STABLE;

-- DONE
		CREATE OR REPLACE FUNCTION gxadmin_random_number(input text, size integer)
		RETURNS integer
		AS \$\$
			SELECT (power(10, size) * gxadmin_digest_value(input))::integer
		\$\$ LANGUAGE SQL IMMUTABLE;

-- DONE
		CREATE OR REPLACE FUNCTION gxadmin_random_word(input text, size integer)
		RETURNS text
		AS \$\$
			SELECT array_to_string(
				ARRAY(
					SELECT chr((97 + round(value * 25)) :: integer)
					FROM gxadmin_random_seed(input, size)
				),
				''
			);
		\$\$ LANGUAGE SQL STABLE;

-- DONE
		CREATE OR REPLACE FUNCTION gxadmin_random_slug(input text, size integer)
		RETURNS text
		AS \$\$
			SELECT
				substr(
					gxadmin_random_word(input || 'a' || size, 5 + (gxadmin_digest_value(input || 'a' || size) * size)::integer)
					|| '-' ||
					gxadmin_random_word(input || 'b' || size, 3 + (gxadmin_digest_value(input || 'b' || size) * size)::integer)
					|| '-' ||
					gxadmin_random_word(input || 'c' || size, 4 + (gxadmin_digest_value(input || 'c' || size) * size)::integer)
				, 1, size)
		\$\$ LANGUAGE SQL STABLE;

-- DONEish
		CREATE OR REPLACE FUNCTION gxadmin_random_email(input text)
		RETURNS text
		AS \$\$
			with tmp as (
				select gxadmin_digest_value(input) as a
			)
			SELECT
				CASE
					WHEN a < 0.3 THEN gxadmin_random_number(input, 7) || '@example.com'
					WHEN a < 0.8 THEN gxadmin_random_slug(input, 6) || '@example.com'
					ELSE gxadmin_random_slug(input, 20) || '@example.com'
				END
			from tmp
		\$\$ LANGUAGE SQL STABLE;

-- DONE
		CREATE OR REPLACE FUNCTION gxadmin_random_ip(input text)
		RETURNS text
		AS \$\$
			-- Uses documentation range IPs
			with tmp as (
				select gxadmin_digest_value(input::text) as a
			)
			SELECT
				CASE
					WHEN a < 0.7 THEN '192.0.2.' || (gxadmin_digest_value(input) * 255)::integer
					ELSE '2001:0db8:85a3:8d3:1319:8a2e:0370:' || to_hex((gxadmin_digest_value(input) * 65536)::integer)
				END
			from tmp
		\$\$ LANGUAGE SQL STABLE;

-- DONE
		CREATE OR REPLACE FUNCTION gxadmin_random_string(input text, size integer, start integer, width integer)
		RETURNS text
		AS \$\$
			SELECT array_to_string(
				ARRAY(
					SELECT chr((start + round(value * width)) :: integer)
					FROM gxadmin_random_seed(input, size)
				),
				''
			);
		\$\$ LANGUAGE SQL STABLE;

-- DONE
		CREATE OR REPLACE FUNCTION gxadmin_random_tag(input text)
		RETURNS text
		AS \$\$
			with tmp as (
				select gxadmin_digest_value(input) as a
			)
			SELECT
				CASE
					WHEN a < 0.1 THEN gxadmin_random_string(input, 3 + gxadmin_random_number(input, 1), 65, 26)
					WHEN a < 0.2 THEN gxadmin_random_string(input, 3 + gxadmin_random_number(input, 1), 192, 200)
					WHEN a < 0.3 THEN gxadmin_random_string(input, 3 + gxadmin_random_number(input, 1), 1025, 100)
					WHEN a < 0.4 THEN gxadmin_random_string(input, 3 + gxadmin_random_number(input, 1), 5121, 200)
					WHEN a < 0.5 THEN gxadmin_random_string(input, 3 + gxadmin_random_number(input, 1), 9728, 400)
					WHEN a < 0.6 THEN gxadmin_random_string(input, 3 + gxadmin_random_number(input, 1), 14300, 100)
					WHEN a < 0.8 THEN gxadmin_random_slug(input, 3 + gxadmin_random_number(input, 1))
					ELSE gxadmin_random_string(input, 3 + gxadmin_random_number(input, 1), 48, 10)
				END
			from tmp
		\$\$ LANGUAGE SQL;

-- DONE
		CREATE OR REPLACE FUNCTION gxadmin_random_pw(input text, size integer)
		RETURNS text
		AS \$\$
			SELECT gxadmin_random_string(input, size, 48, 59);
		\$\$ LANGUAGE SQL STABLE;

-- DONE
		CREATE OR REPLACE FUNCTION gxadmin_random_text(input text, size integer)
		RETURNS text
		AS \$\$
			SELECT array_to_string(
				ARRAY(
					SELECT gxadmin_random_tag(input || generate_series)
					FROM generate_series(1, size)
				),
				' '
			);
		\$\$ LANGUAGE SQL;

-- DONE
		-- https://stackoverflow.com/a/48901446
		CREATE OR REPLACE FUNCTION gxadmin_round_sf(n numeric, digits integer)
		RETURNS numeric
		AS \$\$
			SELECT floor(n / (10 ^ floor(log(n) - digits + 1))) * (10 ^ floor(log(n) - digits + 1))
		\$\$ LANGUAGE SQL IMMUTABLE STRICT;


		-- api_keys
		COPY (SELECT 'api_keys') to STDOUT;
			update api_keys set key = 'random-key-' || user_id;

		-- cleanup_event                                        is OK
		-- cleanup_event_dataset_association                    is OK
		-- cleanup_event_hda_association                        is OK
		-- cleanup_event_his tory_association                   is OK
		-- cleanup_event_icda_association                       is OK
		-- cleanup_event_ldda_association                       is EMPTY on AU
		-- cleanup_event_library_association                    is EMPTY on AU
		-- cleanup_event_library_dataset_association            is EMPTY on AU
		-- cleanup_event_library_folder_association             is EMPTY on AU
		-- cleanup_event_metadata_file_association              is OK
		-- cleanup_event_user_association                       is EMPTY on AU, EU
		-- cloudauthz                                           is EMPTY on AU, EU
		-- custos_authnz_token                                  is EMPTY on AU, EU
		-- data_manager_history_association                     is OK (user_id, history_id)
		-- data_manager_job_association                         is OK (job_id, DM id)
		-- dataset
		COPY (SELECT 'dataset') to STDOUT;
			-- TODO: Do external_filename, and _extra_files_path need to be cleaned?
			-- TODO: this is imperfect, we do something better in GRT where we take 2 SDs
			-- https://stackoverflow.com/questions/48900936/postgresql-rounding-to-significant-figures
			-- MAYBE use synthetic data here?
			update dataset set
				file_size = round(file_size, -3),
				total_size = round(total_size, -3);
		-- dataset_collection                                   is OK (type, count, etc.)
		-- dataset_collection_element
		COPY (SELECT 'dataset_collection_element') to STDOUT;
			update dataset_collection_element set
				element_identifier = gxadmin_random_slug(id::text, 10);
		-- dataset_hash                                         is EMPTY on AU, EU
		-- dataset_permissions                                  is OK (role, dataset)
		-- dataset_source
		COPY (SELECT 'dataset_source') to STDOUT;
			update dataset_source set
				source_uri = 'https://example.org/test.dat';

		-- dataset_source_hash                                  is EMPTY on AU, EU
		-- dataset_tag_association                              is EMPTY on AU, EU
		-- default_history_permissions                          is OK (hist_id, action, role_id)
		-- default_quota_association                            is OK
		-- default_user_permissions                             is OK (user, action, role)
		-- deferred_job                                         is EMPTY on AU, EU
		-- dynamic_tool                                         is EMPTY on AU, EU
		-- event                                                is EMPTY on AU, EU
		-- extended_metadata                                    is EMPTY on AU, EU
		-- extended_metadata_index                              is EMPTY on AU, EU
		-- external_service                                     is EMPTY on AU, EU
		-- form_definition                                      is EMPTY on AU, EU
		-- form_definition_current                              is EMPTY on AU, EU
		-- form_values                                          is EMPTY on AU, EU
		-- galaxy_group
		COPY (SELECT 'galaxy_group') to STDOUT;
			update galaxy_group set
				name = 'group-' || id;

		-- galaxy_session
		COPY (SELECT 'galaxy_session') to STDOUT;
			update galaxy_session set
				remote_host = gxadmin_random_ip(remote_host)
				where remote_host is not null;

			update galaxy_session set
				remote_addr = gxadmin_random_ip(remote_addr)
				where remote_addr is not null;
			update galaxy_session set
				referer = 'https://example.org'
				where  referer is not null;

			update galaxy_session set
				session_key = gxadmin_random_pw(id::text, 16), prev_session_id = null;

		-- galaxy_session_to_history                            is OK (time, session_id, hist_id)
		-- galaxy_user
		COPY (SELECT 'galaxy_user') to STDOUT;
			-- TODO: better email length/content distribution
			-- TODO: better username length/content distribution. UNICODE.
			-- TODO: rounding to SDs.
			update galaxy_user set
				email = 'user-' || id || '@example.org',
				password = 'x',
				username = 'user-' || id,
				form_values_id = null,
				activation_token = '',
				disk_usage = round(disk_usage, -5);

		-- galaxy_user_openid
		COPY (SELECT 'galaxy_user_openid') to STDOUT;
			update galaxy_user_openid set
				openid = 'https://example.org/identity/' || user_id;

		-- genome_index_tool_data                               is EMPTY on AU, EU
		-- group_quota_association                              is OK (group_id, quota id)
		-- group_role_association                               is OK (group_id, role id)
		-- history
		COPY (SELECT 'history') to STDOUT;

			-- TODO: better name distribution. UNICODE. (I see greek, chinese, etc on EU)
			update history set
				name = gxadmin_random_tag(id::text)
				where name != 'Unnamed history';

			update history set
				slug = gxadmin_random_slug(id::text, 30)
				where  slug != '' or slug is not null;

		-- history_annotation_association
		COPY (SELECT 'history_annotation_association') to STDOUT;
			-- TODO: better distribution. UNICODE.
			update history_annotation_association set
				annotation = gxadmin_random_text(id::text, 30)
				where  annotation is not null;

		-- history_dataset_association
		COPY (SELECT 'history_dataset_association') to STDOUT;
			-- TODO: SIGNIFICANT validation needed.
			update history_dataset_association set
				name = gxadmin_random_tag(id::text) || '.' || extension;

			update history_dataset_association set
				peek = 'redacted' where peek is not null;

			update history_dataset_association set
				designation = 'hda-' || id::text
				where designation is not null;

			update history_dataset_association set
				info = 'redacted'
				where info is not null and info != 'Job output deleted by user before job completed';

			update history_dataset_association set
				metadata = null;

		-- history_dataset_association_annotation_association
		COPY (SELECT 'history_dataset_association_annotation_association') to STDOUT;

			update history_dataset_association_annotation_association set
				annotation = gxadmin_random_text(id::text, 30)
				where annotation is not null;

		-- history_dataset_association_display_at_authorization is OK (user_id, hda, site=ucsc_main)
		-- history_dataset_association_history
		COPY (SELECT 'history_dataset_association_history') to STDOUT;

			-- TODO: distribution. Consistency with HDA?
			update history_dataset_association_history set
				name = gxadmin_random_tag(history_dataset_association_id::text) || '.' || extension;


			update history_dataset_association_history set
				metadata = null;

		-- history_dataset_association_rating_association       is EMPTY on AU, EU
		-- history_dataset_association_subset                   is OK (hda id, hdas id, location)
		-- history_dataset_association_tag_association
		COPY (SELECT 'history_dataset_association_tag_association') to STDOUT;

			-- user_tname == 'group' and 'name' are special.
			update history_dataset_association_tag_association set
				value = lower(gxadmin_random_tag(id::text)),
				user_value = gxadmin_random_tag(id::text)
				where user_tname in ('group', 'name');

			-- Sometimes people use a diffferent prefix
			update history_dataset_association_tag_association set
				user_tname = gxadmin_random_tag(id::text || 'tname'),
				value = lower(gxadmin_random_tag(id::text)),
				user_value = gxadmin_random_tag(id::text)
				where user_tname not in ('group', 'name') and user_tname is not null;

			-- Otherwise just set some data
			update history_dataset_association_tag_association set
				user_tname = gxadmin_random_tag(id::text)
				where user_tname is null;

		-- history_dataset_collection_annotation_association    is EMPTY on AU, EU
		-- history_dataset_collection_association
		COPY (SELECT 'history_dataset_collection_association') to STDOUT;

			update history_dataset_collection_association set
				name = 'hdca-' || id;

		-- history_dataset_collection_rating_association        is EMPTY on AU
		-- history_dataset_collection_tag_association
		COPY (SELECT 'history_dataset_collection_tag_association') to STDOUT;

			-- user_tname == 'group' and 'name' are special.
			update history_dataset_collection_tag_association set
				value = lower(gxadmin_random_tag(id::text)),
				user_value = gxadmin_random_tag(id::text)
				where user_tname in ('group', 'name');

			-- Sometimes people use a diffferent prefix
			update history_dataset_collection_tag_association set
				user_tname = gxadmin_random_tag(id::text || 'tname'),
				value = lower(gxadmin_random_tag(id::text)),
				user_value = gxadmin_random_tag(id::text)
				where user_tname not in ('group', 'name') and user_tname is not null;

			-- Otherwise just set some data
			update history_dataset_collection_tag_association set
				user_tname = gxadmin_random_tag(id::text)
				where user_tname is null;

		-- history_rating_association is OK
		-- history_tag_association
		COPY (SELECT 'history_tag_association') to STDOUT;

			-- user_tname == 'group' and 'name' are special.
			update history_tag_association set
				value = lower(gxadmin_random_tag(id::text)),
				user_value = gxadmin_random_tag(id::text)
				where user_tname in ('group', 'name');

			-- Sometimes people use a diffferent prefix
			update history_tag_association set
				user_tname = gxadmin_random_tag(id::text || 'tname'),
				value = lower(gxadmin_random_tag(id::text)),
				user_value = gxadmin_random_tag(id::text)
				where user_tname not in ('group', 'name') and user_tname is not null;

			-- Otherwise just set some data
			update history_tag_association set
				user_tname = gxadmin_random_tag(id::text)
				where user_tname is null;

		-- history_user_share_association                       is MAYBE OK (hist_id, user_id) You can determine networks, but same with groups.
		-- implicit_collection_jobs                             is OK (id, pop state)
		-- implicit_collection_jobs_job_association             is OK
		-- implicitly_converted_dataset_association             is OK
		-- implicitly_created_dataset_collection_inputs         is OK
		-- interactivetool_entry_point                          is EMPTY on AU
		-- job
		COPY (SELECT 'job') to STDOUT;
			update job set
				command_line = 'redacted',
				destination_params = null,
				dependencies = null;

			update job set
				tool_stdout = 'redacted'
				where tool_stdout != null;

			update job set
				tool_stderr = 'redacted'
				where tool_stderr != null;

			update job set
				traceback = 'redacted'
				where traceback != null;

			update job set
				params = 'redacted'
				where params != null;

			update job set
				job_messages = 'redacted'
				where job_messages != null;

			update job set
				job_stdout = 'redacted'
				where job_stdout != null;

			update job set
				job_stderr = 'redacted'
				where job_stderr != null;

		-- job_container_association
		COPY (SELECT 'job_container_association') to STDOUT;

			update job_container_association set
				container_name = '',
				container_info = '';

		-- job_export_history_archive
		COPY (SELECT 'job_export_history_archive') to STDOUT;

			update job_export_history_archive set
				history_attrs_filename = '/tmp/tmp' || gxadmin_random_pw(id::text || 'h', 6),
				datasets_attrs_filename = '/tmp/tmp' || gxadmin_random_pw(id::text || 'd', 6),
				jobs_attrs_filename = '/tmp/tmp' || gxadmin_random_pw(id::text || 'j', 6);

		-- job_external_output_metadata
		COPY (SELECT 'job_external_output_metadata') to STDOUT;

			update job_external_output_metadata set
				filename_in = '/tmp/job_working_directory/' || job_id || '/metadata_in_' || gxadmin_random_pw(id::text || 'i', 6),
				filename_out = '/tmp/job_working_directory/' || job_id || '/metadata_out_' || gxadmin_random_pw(id::text || 'o', 6),
				filename_results_code = '/tmp/job_working_directory/' || job_id || '/metadata_results_' || gxadmin_random_pw(id::text || 'm', 6),
				filename_kwds = '/tmp/job_working_directory/' || job_id || '/metadata_kwds_' || gxadmin_random_pw(id::text || 'k', 6),
				filename_override_metadata = '/tmp/job_working_directory/' || job_id || '/metadata_override_' || gxadmin_random_pw(id::text || 'o', 6);

		-- job_import_history_archive
		COPY (SELECT 'job_import_history_archive') to STDOUT;

			update job_import_history_archive set
				archive_dir = '/tmp/tmp' || gxadmin_random_pw(id::text, 6);

		-- job_metric_numeric is OK
		-- job_metric_text
		COPY (SELECT 'job_metric_text') to STDOUT;

			truncate job_metric_text;

		-- job_parameter
		COPY (SELECT 'job_parameter') to STDOUT;

			-- TODO: length distribution? Name distribution?
			update job_parameter set
				name = 'param-name',
				value = 'param-value';

		-- job_state_history                          is OK
		-- job_to_implicit_output_dataset_collection  is MAYBE OK (param name?)
		-- job_to_input_dataset                       is MAYBE OK (param name?)
		-- job_to_input_dataset_collection            is MAYBE OK (param name?)
		-- job_to_input_library_dataset               is EMPTY on AU
		-- job_to_output_dataset                      is MAYBE OK (param name?)
		-- job_to_output_dataset_collection           is MAYBE OK (param name?)
		-- job_to_output_library_dataset              is OK
		-- kombu_message
		COPY (SELECT 'kombu_message') to STDOUT;
			truncate kombu_message;
		-- kombu_queue                                        is OK (queue name only)
		-- library
		COPY (SELECT 'library') to STDOUT;

			update library set
				name = gxadmin_random_tag(id::text),
				description = gxadmin_random_tag(id::text || 'desc'),
				synopsis = gxadmin_random_tag(id::text || 'synopsis');

		-- library_dataset
		COPY (SELECT 'library_dataset') to STDOUT;

			update library_dataset set
				name = 'lda-' || id,
				info = '';

		-- library_dataset_collection_annotation_association    is EMPTY on AU, EU
		-- library_dataset_collection_association               is EMPTY on AU, EU
		-- library_dataset_collection_rating_association        is EMPTY on AU, EU
		-- library_dataset_collection_tag_association           is EMPTY on AU, EU
		-- library_dataset_dataset_association
		COPY (SELECT 'library_dataset_dataset_association') to STDOUT;

			-- TODO: SIGNIFICANT validation needed.
			update library_dataset_dataset_association set
				name = gxadmin_random_tag(id::text) || '.' || extension;

			update library_dataset_dataset_association set
				peek = 'redacted' where peek is not null;

			update library_dataset_dataset_association set
				designation = 'hda-' || id::text
				where designation is not null;

			update library_dataset_dataset_association set
				info = 'redacted' where info is not null;

			update library_dataset_dataset_association set
				message = 'redacted' where message is not null;

			update library_dataset_dataset_association set
				metadata = null;


		-- library_dataset_dataset_association_permissions      is OK (permissions, ldda id, role id)
		-- library_dataset_dataset_association_tag_association  is EMPTY on AU, EU
		-- library_dataset_dataset_info_association             is EMPTY on AU, EU
		-- library_dataset_permissions                          is OK (permissions, ld id, role id)
		-- library_folder
		COPY (SELECT 'library_folder') to STDOUT;

			update library_folder set
				name = gxadmin_random_tag(id::text),
				description = gxadmin_random_text(id::text || 'desc', 10);

		-- library_folder_info_association                      is EMPTY on AU, EU
		-- library_folder_permissions                           is OK (permissions, lf id, role id)
		-- library_info_association                             is EMPTY on AU
		-- library_permissions                                  is OK (permissions, lib id, role id)
		-- metadata_file                                        is OK
		-- migrate_tools                                        is EMPTY on AU, EU
		-- migrate_version                                      is OK
		-- oidc_user_authnz_tokens
		COPY (SELECT 'oidc_user_authnz_tokens') to STDOUT;

			update oidc_user_authnz_tokens set
				uid = 'redacted',
				extra_data = null;

		-- page
		COPY (SELECT 'page') to STDOUT;

			update page set
				title = gxadmin_random_tag(id::text),
				slug = gxadmin_random_slug(id::text, 10);

		-- page_annotation_association
		COPY (SELECT 'page_annotation_association') to STDOUT;

			update page_annotation_association set
				annotation = gxadmin_random_tag(id::text);

		-- page_rating_association                              is EMPTY on AU
		-- page_revision
		COPY (SELECT 'page_revision') to STDOUT;

			update page_revision set
				content = '<p>Some <b>content</b></p>';

		-- page_tag_association                                 is EMPTY on AU

			-- user_tname == 'group' and 'name' are special.
			update page_tag_association set
				value = lower(gxadmin_random_tag(id::text)),
				user_value = gxadmin_random_tag(id::text)
				where user_tname in ('group', 'name');

			-- Sometimes people use a diffferent prefix
			update page_tag_association set
				user_tname = gxadmin_random_tag(id::text || 'tname'),
				value = lower(gxadmin_random_tag(id::text)),
				user_value = gxadmin_random_tag(id::text)
				where user_tname not in ('group', 'name') and user_tname is not null;

			-- Otherwise just set some data
			update page_tag_association set
				user_tname = gxadmin_random_tag(id::text)
				where user_tname is null;

		-- page_user_share_association                          is EMPTY on AU, fine though
		-- password_reset_token
		COPY (SELECT 'password_reset_token') to STDOUT;

			update password_reset_token set
				token = md5('x' || random());

		-- post_job_action
		COPY (SELECT 'post_job_action') to STDOUT;

			update post_job_action set
				action_arguments = convert_to('{}', 'UTF8')
				where action_type != 'RenameDatasetAction';

			update post_job_action set
				action_arguments = convert_to('{"newname": "Control check"}', 'UTF8')
				where action_type = 'RenameDatasetAction';

		-- post_job_action_association                          is OK
		-- psa_association
		COPY (SELECT 'psa_association') to STDOUT;

			update psa_association set
				handle = 'redacted',
				assoc_type = 'redacted';

		-- psa_code                                             is EMPTY on AU, EU
		-- psa_nonce                                            is EMPTY on AU, EU
		-- psa_partial                                          is EMPTY on AU, EU
		-- quota
		COPY (SELECT 'quota') to STDOUT;

			update quota set
				name = pg_size_pretty(bytes),
				description = '';

		-- repository_dependency                                is EMPTY on AU, OK on EU (ts repo id)
		-- repository_repository_dependency_association         is EMPTY on AU, OK on EU
		-- request                                              is EMPTY on AU, EU
		-- request_event                                        is EMPTY on AU, EU
		-- request_type                                         is EMPTY on AU, EU
		-- request_type_external_service_association            is EMPTY on AU, EU
		-- request_type_permissions                             is EMPTY on AU, EU
		-- request_type_run_association                         is EMPTY on AU, EU
		-- role
		COPY (SELECT 'role') to STDOUT;

			update role set
				name = gxadmin_random_email(id::text),
				description = 'Private role for ' || gxadmin_random_email(id::text)
				where type = 'private';

			update role set
				name = gxadmin_random_text(id::text, 3),
				description = 'System role'
				where type = 'system';

			update role set
				name = gxadmin_random_text(id::text, 5),
				description = 'Sharing role'
				where type = 'sharing';

			update role set
				name = gxadmin_random_text(id::text, 2),
				description = ''
				where type not in ('private', 'system', 'sharing');

		-- run                                                  is EMPTY on AU, EU
		-- sample                                               is EMPTY on AU, EU
		-- sample_dataset                                       is EMPTY on AU, EU
		-- sample_event                                         is EMPTY on AU, EU
		-- sample_run_association                               is EMPTY on AU, EU
		-- sample_state                                         is EMPTY on AU, EU
		-- stored_workflow
		COPY (SELECT 'stored_workflow') to STDOUT;

			update stored_workflow set
				name = gxadmin_random_tag(id::text),
				slug = gxadmin_random_slug(id::text, 10);

		-- stored_workflow_annotation_association
		COPY (SELECT 'stored_workflow_annotation_association') to STDOUT;

			update stored_workflow set
				annotation = gxadmin_random_text(id::text)
				where annotation is not null;

		-- stored_workflow_menu_entry                           is OK
		-- stored_workflow_rating_association                   is OK
		-- stored_workflow_tag_association
		COPY (SELECT 'stored_workflow_tag_association') to STDOUT;

			-- user_tname == 'group' and 'name' are special.
			update stored_workflow_tag_association set
				value = lower(gxadmin_random_tag(id::text)),
				user_value = gxadmin_random_tag(id::text)
				where user_tname in ('group', 'name');

			-- Sometimes people use a diffferent prefix
			update stored_workflow_tag_association set
				user_tname = gxadmin_random_tag(tag_id::text),
				value = lower(gxadmin_random_tag(id::text)),
				user_value = gxadmin_random_tag(id::text)
				where user_tname not in ('group', 'name') and user_tname is not null;

			-- Otherwise just set some data
			update stored_workflow_tag_association set
				user_tname = gxadmin_random_tag(tag_id::text)
				where user_tname is null;

		-- stored_workflow_user_share_connection is OK
		-- tag
		COPY (SELECT 'tag') to STDOUT;

			update tag set
				name = gxadmin_random_tag(id::text)
				where name not in ('name', 'group');

		-- task                                                 is EMPTY on AU, not on EU

			update task set
				command_line = '',
				param_filename = '',
				runner_name = '',
				tool_stdout = '',
				tool_stderr = '',
				task_runner_name = '',
				task_runner_external_id = '',
				prepare_input_files_cmd = '',
				working_directory = '',
				info = '',
				job_messages = '',
				job_stdout = '',
				job_stderr = '';

		-- task_metric_numeric                                  is EMPTY on AU, EU
		-- task_metric_text                                     is EMPTY on AU, EU
		-- tool_dependency                                      is EMPTY on AU, OK on EU
		-- tool_shed_repository                                 is EMPTY on AU, OK on EU
		-- tool_tag_association                                 is EMPTY on AU, EU
		-- tool_version                                         is EMPTY on AU, OK on EU
		-- tool_version_association                             is EMPTY on AU, EU
		-- transfer_job                                         is EMPTY on AU, OK on EU
		-- user_action                                          is EMPTY on AU, EU
		-- user_address
		COPY (SELECT 'user_address') to STDOUT;

			update user_address set
				"desc" = gxadmin_random_text(id::text, 1),
				name = gxadmin_random_text(id::text || 'name', 1),
				institution = gxadmin_random_text(id::text || 'inst', 1),
				address     = gxadmin_random_text(id::text || 'addr', 1),
				city        = gxadmin_random_text(id::text || 'city', 1),
				state       = gxadmin_random_text(id::text || 'stat', 1),
				postal_code = gxadmin_random_number(id::text, 5),
				country     = 'Australia',
				phone       = gxadmin_random_number(id::text, 10);

		-- user_group_association                                          is OK
		-- user_preference
		COPY (SELECT 'user_preference') to STDOUT;

			-- TODO: make this better? I just don't feel safe given genomespace tokens, etc.
			truncate user_preference;

		-- user_quota_association                                          is OK
		-- user_role_association                                           is OK
		-- validation_error                                                is EMPTY on AU
		-- visualization
		COPY (SELECT 'visualization') to STDOUT;

			update visualization set
				title = gxadmin_random_text(id::text, 3),
				slug = gxadmin_random_slug(id::text, 20);

		-- visualization_annotation_association                            is EMPTY on AU

			update visualization_annotation_association set
				annotation = gxadmin_random_text(id::text, 10);

		-- visualization_rating_association                                is EMPTY on AU
		-- visualization_revision                                          is MAYBE OK
		-- visualization_tag_association                                   is EMPTY on AU
		-- visualization_user_share_association                            is EMPTY on AU
		-- worker_process                                                  is OK
		-- workflow
		COPY (SELECT 'workflow') to STDOUT;

			update workflow set
				name = gxadmin_random_text(id::text, 3);

			update workflow set
				reports_config = convert_to('{}', 'UTF8')
				where  reports_config is not null;

		-- workflow_invocation                                             is OK
		-- workflow_invocation_output_dataset_association                  is OK
		-- workflow_invocation_output_dataset_collection_association       is OK
		-- workflow_invocation_output_value                                is EMPTY on AU
		-- workflow_invocation_step                                        is OK
		-- workflow_invocation_step_output_dataset_association             is MAYBE OK (output_name?)
		-- workflow_invocation_step_output_dataset_collection_association  is MAYBE OK (output_name?)
		-- workflow_invocation_to_subworkflow_invocation_association       is OK
		-- workflow_output                                                 is MAYBE OK (output_name)
		-- workflow_request_input_parameters                               is OK
		-- workflow_request_input_step_parameter
		COPY (SELECT 'workflow_request_input_step_parameter') to STDOUT;

			update workflow_request_input_step_parameter set
				parameter_value = convert_to('asdf', 'UTF8');
		-- workflow_request_step_states
		COPY (SELECT 'workflow_request_step_states') to STDOUT;

			update workflow_request_step_states set
				value = convert_to('{}', 'UTF8');

		-- workflow_request_to_input_collection_dataset                    is OK
		-- workflow_request_to_input_dataset                               is OK
		-- workflow_step
		COPY (SELECT 'workflow_step') to STDOUT;

			update workflow_step set
				tool_inputs = convert_to('{}', 'UTF8');

			update workflow_step set
				label = gxadmin_random_slug(id::text, 10)
				where label is not null;

		-- workflow_step_annotation_association
		COPY (SELECT 'workflow_step_annotation_association') to STDOUT;

			update workflow_step_annotation_association set
				annotation = gxadmin_random_text(id::text, 10);

		-- workflow_step_connection                                        is OK
		-- workflow_step_input                                             is OK
		-- workflow_step_tag_association                                   is EMPTY on AU, EU
		-- workflow_tag_association                                        is EMPTY on AU, EU
	EOF


	txn_pre=$(txn_prefix  "$commit_flag")
	txn_pos=$(txn_postfix "$commit_flag")
	QUERY="$txn_pre $QUERY; $txn_pos"
}

mutate_fail-wfi() { ## <wf-invocation-d> [--commit]: Sets a workflow invocation state to failed
	handle_help "$@" <<-EOF
		Sets a workflow invocation's state to "failed"
	EOF

	assert_count_ge $# 1 "Must supply a wf-invocation-id"
	id=$1

	read -r -d '' QUERY <<-EOF
		UPDATE
			workflow_invocation
		SET
			state = 'failed'
		WHERE
			id = '$id'
	EOF

	txn_pre=$(txn_prefix  "$2")
	txn_pos=$(txn_postfix "$2")
	QUERY="$txn_pre $QUERY; $txn_pos"
}

mutate_oidc-by-emails() { ## <email_from> <email_to> [--commit]: Reassign OIDC account between users.
	handle_help "$@" <<-EOF
		Workaround for users creating a new account by clicking the OIDC button, with case mismatching between existing accounts.
		Please note that this function is case-sensitive. Fixes https://github.com/galaxyproject/galaxy/issues/9981.
	EOF

	assert_count_ge $# 2 "Must supply an email_from and an email_to";

	read -r -d '' QUERY <<-EOF
		UPDATE oidc_user_authnz_tokens
		SET user_id=correctuser.id
		FROM (
			SELECT id FROM galaxy_user WHERE email='$2'
		) AS correctuser
		WHERE user_id = (SELECT id FROM galaxy_user WHERE email='$1')
	EOF

	txn_pre=$(txn_prefix  "$3")
	txn_pos=$(txn_postfix "$3")
	QUERY="$txn_pre $QUERY; $txn_pos"
}

mutate_set-quota-for-oidc-user() { ##? <provider_name> <quota_name> [--commit]: Set quota for OIDC users.
	handle_help "$@" <<-EOF
		Set quota for OIDC users.
	EOF

	read -r -d '' QUERY <<-EOF
		WITH qid AS (
		   SELECT id FROM quota WHERE name='$arg_quota_name'
		)
		DELETE FROM user_quota_association WHERE quota_id = ( SELECT id FROM qid );

		WITH qid AS (
			SELECT id FROM quota WHERE name='$arg_quota_name'
		), t AS (
			SELECT user_id, ( SELECT id FROM qid ), now(), now() FROM oidc_user_authnz_tokens WHERE provider='$arg_provider_name'
		)
		INSERT INTO user_quota_association (user_id, quota_id, create_time, update_time)
		SELECT * FROM t
	EOF

	txn_pre=$(txn_prefix "$arg_commit")
	txn_pos=$(txn_postfix "$arg_commit")
	QUERY="$txn_pre $QUERY; $txn_pos"
}
registered_subcommands="$registered_subcommands query"
_query_short_help="DB Queries"
_query_long_help="
	'query' can be exchanged with 'tsvquery' or 'csvquery' for tab- and comma-separated variants.
	In some cases 'iquery' is supported for InfluxDB compatible output.
	In all cases 'explainquery' will show you the query plan, in case you need to optimise or index data. 'explainjsonquery' is useful with PEV: http://tatiyants.com/pev/
"

query_latest-users() { ## : 40 recently registered users
	handle_help "$@" <<-EOF
		Returns 40 most recently registered users

		    $ gxadmin query latest-users
		     id |          create_time          | disk_usage | username |     email      |              groups               | active
		    ----+-------------------------------+------------+----------+----------------+-----------------------------------+--------
		      3 | 2019-03-07 13:06:37.945403+00 |            | beverly  | b@example.com  |                                   | t
		      2 | 2019-03-07 13:06:23.369201+00 | 826 bytes  | alice    | a@example.com  |                                   | t
		      1 | 2018-11-19 14:54:30.969713+00 | 869 MB     | helena   | hxr@local.host | training-asdf training-hogeschool | t
		    (3 rows)
	EOF

	username=$(gdpr_safe galaxy_user.username username)
	email=$(gdpr_safe galaxy_user.email email)

	read -r -d '' QUERY <<-EOF
		SELECT
			id,
			create_time AT TIME ZONE 'UTC' as create_time,
			pg_size_pretty(disk_usage) as disk_usage,
			$username,
			$email,
			array_to_string(ARRAY(
				select galaxy_group.name from galaxy_group where id in (
					select group_id from user_group_association where user_group_association.user_id = galaxy_user.id
				)
			), ' ') as groups,
			active
		FROM galaxy_user
		ORDER BY create_time desc
		LIMIT 40
	EOF
}

query_tool-usage() { ##? [weeks]: Counts of tool runs in the past weeks (default = all)
	handle_help "$@" <<-EOF
		    $ gxadmin tool-usage
		                                    tool_id                                 | count
		    ------------------------------------------------------------------------+--------
		     toolshed.g2.bx.psu.edu/repos/devteam/column_maker/Add_a_column1/1.1.0  | 958154
		     Grouping1                                                              | 638890
		     toolshed.g2.bx.psu.edu/repos/devteam/intersect/gops_intersect_1/1.0.0  | 326959
		     toolshed.g2.bx.psu.edu/repos/devteam/get_flanks/get_flanks1/1.0.0      | 320236
		     addValue                                                               | 313470
		     toolshed.g2.bx.psu.edu/repos/devteam/join/gops_join_1/1.0.0            | 312735
		     upload1                                                                | 103595
		     toolshed.g2.bx.psu.edu/repos/rnateam/graphclust_nspdk/nspdk_sparse/9.2 |  52861
		     Filter1                                                                |  43253
	EOF

	where=
	if (( arg_weeks > 0 )); then
		where="WHERE j.create_time > (now() - '${arg_weeks} weeks'::interval)"
	fi

	fields="count=1"
	tags="tool_id=0"

	read -r -d '' QUERY <<-EOF
		SELECT
			j.tool_id, count(*) AS count
		FROM job j
		$where
		GROUP BY j.tool_id
		ORDER BY count DESC
	EOF
}

query_tool-usage-over-time() { ##? [searchterm]: Counts of tool runs by month, filtered by a tool id search
	handle_help "$@" <<-EOF
		    $ gxadmin tool-usage-over-time
		                                    tool_id                                 | count
		    ------------------------------------------------------------------------+--------
		     toolshed.g2.bx.psu.edu/repos/devteam/column_maker/Add_a_column1/1.1.0  | 958154
		     Grouping1                                                              | 638890
		     toolshed.g2.bx.psu.edu/repos/devteam/intersect/gops_intersect_1/1.0.0  | 326959
		     toolshed.g2.bx.psu.edu/repos/devteam/get_flanks/get_flanks1/1.0.0      | 320236
		     addValue                                                               | 313470
		     toolshed.g2.bx.psu.edu/repos/devteam/join/gops_join_1/1.0.0            | 312735
		     upload1                                                                | 103595
		     toolshed.g2.bx.psu.edu/repos/rnateam/graphclust_nspdk/nspdk_sparse/9.2 |  52861
		     Filter1                                                                |  43253
	EOF

	where=
	if [[ "$arg_searchterm" != "" ]]; then
		where="WHERE tool_id like '%$arg_searchterm%'"
	fi

	read -r -d '' QUERY <<-EOF
		WITH
			cte
				AS (
					SELECT
						date_trunc('month', create_time),
						tool_id
					FROM
						job
					$where
				)
		SELECT
			date_trunc, tool_id, count(*)
		FROM
			cte
		GROUP BY
			date_trunc, tool_id
		ORDER BY
			date_trunc ASC, count DESC
	EOF
}

query_tool-popularity() { ##? [months|24]: Most run tools by month (tool_predictions)
	handle_help "$@" <<-EOF
		See most popular tools by month

		    $ ./gxadmin query tool-popularity 1
		              tool_id          |   month    | count
		    ---------------------------+------------+-------
		     circos                    | 2019-02-01 |    20
		     upload1                   | 2019-02-01 |    12
		     require_format            | 2019-02-01 |     9
		     circos_gc_skew            | 2019-02-01 |     7
		     circos_wiggle_to_scatter  | 2019-02-01 |     3
		     test_history_sanitization | 2019-02-01 |     2
		     circos_interval_to_tile   | 2019-02-01 |     1
		     __SET_METADATA__          | 2019-02-01 |     1
		    (8 rows)
	EOF

	fields="count=2"
	tags="tool_id=0;month=1"

	read -r -d '' QUERY <<-EOF
		SELECT
			tool_id,
			date_trunc('month', create_time AT TIME ZONE 'UTC')::date as month,
			count(*)
		FROM job
		WHERE create_time > (now() AT TIME ZONE 'UTC' - '$arg_months months'::interval)
		GROUP BY tool_id, month
		ORDER BY month desc, count desc
	EOF
}

query_workflow-connections() { ##? [--all]: The connections of tools, from output to input, in the latest (or all) versions of user workflows (tool_predictions)
	handle_help "$@" <<-EOF
		This is used by the usegalaxy.eu tool prediction workflow, allowing for building models out of tool connections in workflows.

		    $ gxadmin query workflow-connections
		     wf_id |     wf_updated      | in_id |      in_tool      | in_tool_v | out_id |     out_tool      | out_tool_v | published | deleted | has_errors
		    -------+---------------------+-------+-------------------+-----------+--------+-------------------+----------------------------------------------
		         3 | 2013-02-07 16:48:00 |     5 | Grep1             | 1.0.1     |     12 |                   |            |    f      |    f    |    f
		         3 | 2013-02-07 16:48:00 |     6 | Cut1              | 1.0.1     |      7 | Remove beginning1 | 1.0.0      |    f      |    f    |    f
		         3 | 2013-02-07 16:48:00 |     7 | Remove beginning1 | 1.0.0     |      5 | Grep1             | 1.0.1      |    f      |    f    |    f
		         3 | 2013-02-07 16:48:00 |     8 | addValue          | 1.0.0     |      6 | Cut1              | 1.0.1      |    t      |    f    |    f
		         3 | 2013-02-07 16:48:00 |     9 | Cut1              | 1.0.1     |      7 | Remove beginning1 | 1.0.0      |    f      |    f    |    f
		         3 | 2013-02-07 16:48:00 |    10 | addValue          | 1.0.0     |     11 | Paste1            | 1.0.0      |    t      |    f    |    f
		         3 | 2013-02-07 16:48:00 |    11 | Paste1            | 1.0.0     |      9 | Cut1              | 1.0.1      |    f      |    f    |    f
		         3 | 2013-02-07 16:48:00 |    11 | Paste1            | 1.0.0     |      8 | addValue          | 1.0.0      |    t      |    t    |    f
		         4 | 2013-02-07 16:48:00 |    13 | cat1              | 1.0.0     |     18 | addValue          | 1.0.0      |    t      |    f    |    f
		         4 | 2013-02-07 16:48:00 |    13 | cat1              | 1.0.0     |     20 | Count1            | 1.0.0      |    t      |    t    |    f
	EOF

	read -r -d '' wf_filter <<-EOF
	WHERE
		workflow.id in (
			SELECT
			 workflow.id
			FROM
			 stored_workflow
			LEFT JOIN
			 workflow on stored_workflow.latest_workflow_id = workflow.id
		)
	EOF
	if [[ -n "$arg_all" ]]; then
		wf_filter=""
	fi

	read -r -d '' QUERY <<-EOF
		SELECT
			workflow.id as wf_id,
			workflow.update_time::DATE as wf_updated,
			ws_in.id as in_id,
			ws_in.tool_id as in_tool,
			ws_in.tool_version as in_tool_v,
			ws_out.id as out_id,
			ws_out.tool_id as out_tool,
			ws_out.tool_version as out_tool_v,
			sw.published as published,
			sw.deleted as deleted,
			workflow.has_errors as has_errors
		FROM workflow_step_connection wfc
		LEFT JOIN workflow_step ws_in ON ws_in.id = wfc.output_step_id
		LEFT JOIN workflow_step_input wsi ON wfc.input_step_input_id = wsi.id
		LEFT JOIN workflow_step ws_out ON ws_out.id = wsi.workflow_step_id
		LEFT JOIN workflow_output as wo ON wsi.workflow_step_id = wfc.output_step_id
		LEFT JOIN workflow on ws_in.workflow_id = workflow.id
		LEFT JOIN stored_workflow as sw on sw.latest_workflow_id = workflow.id
		$wf_filter
	EOF
}

query_history-connections() { ## : The connections of tools, from output to input, in histories (tool_predictions)
	handle_help "$@" <<-EOF
		This is used by the usegalaxy.eu tool prediction workflow, allowing for building models out of tool connections.
	EOF

	read -r -d '' QUERY <<-EOF
		SELECT
			h.id AS h_id,
			h.update_time::DATE AS h_update,
			jtod.job_id AS in_id,
			j.tool_id AS in_tool,
			j.tool_version AS in_tool_v,
			jtid.job_id AS out_id,
			j2.tool_id AS out_tool,
			j2.tool_version AS out_ver
		FROM
			job AS j
			LEFT JOIN history AS h ON j.history_id = h.id
			LEFT JOIN job_to_output_dataset AS jtod ON j.id = jtod.job_id
			LEFT JOIN job_to_input_dataset AS jtid ON jtod.dataset_id = jtid.dataset_id
			LEFT JOIN job AS j2 ON jtid.job_id = j2.id
		WHERE
			jtid.job_id IS NOT NULL
	EOF
}

query_datasets-created-daily() { ## : The min/max/average/p95/p99 of total size of datasets created in a single day.
	handle_help "$@" <<-EOF
		    $ gxadmin query datasets-created-daily
		     min | quant_1st | median  |         mean          | quant_3rd |  perc_95  |  perc_99  |    max    |    sum     |    stddev
		    -----+-----------+---------+-----------------------+-----------+-----------+-----------+-----------+------------+---------------
		       2 |    303814 | 6812862 | 39653071.914285714286 |  30215616 | 177509882 | 415786146 | 533643009 | 1387857517 | 96920615.1745
		    (1 row)

		or more readably:

		    $ gxadmin query datasets-created-daily --human
		       min   | quant_1st | median  | mean  | quant_3rd | perc_95 | perc_99 |  max   |   sum   | stddev
		    ---------+-----------+---------+-------+-----------+---------+---------+--------+---------+--------
		     2 bytes | 297 kB    | 6653 kB | 38 MB | 29 MB     | 169 MB  | 397 MB  | 509 MB | 1324 MB | 92 MB
		    (1 row)
	EOF

	if [[ $1 == "--human" ]]; then
		summary="$(summary_statistics sum 1)"
	else
		summary="$(summary_statistics sum)"
	fi

	read -r -d '' QUERY <<-EOF
		WITH temp_queue_times AS
		(select
			date_trunc('day', create_time AT TIME ZONE 'UTC'),
			sum(coalesce(total_size, file_size))
		from dataset
		group by date_trunc
		order by date_trunc desc)
		select
			$summary
		from temp_queue_times
	EOF
}

query_largest-collection() { ## : Returns the size of the single largest collection
	handle_help "$@" <<-EOF
	EOF

	fields="count=0"
	tags=""

	read -r -d '' QUERY <<-EOF
		WITH temp_table_collection_count AS (
			SELECT count(*)
			FROM dataset_collection_element
			GROUP BY dataset_collection_id
			ORDER BY count desc
		)
		select max(count) as count from temp_table_collection_count
	EOF
}

query_queue-time() { ##? <tool_id>: The average/95%/99% a specific tool spends in queue state.
	handle_help "$@" <<-EOF
		    $ gxadmin query queue-time toolshed.g2.bx.psu.edu/repos/nilesh/rseqc/rseqc_geneBody_coverage/2.6.4.3
		           min       |     perc_95     |     perc_99     |       max
		    -----------------+-----------------+-----------------+-----------------
		     00:00:15.421457 | 00:00:55.022874 | 00:00:59.974171 | 00:01:01.211995
	EOF

	read -r -d '' QUERY <<-EOF
		WITH temp_queue_times AS
		(select
			min(a.create_time - b.create_time) as queue_time
		from
			job_state_history as a
		inner join
			job_state_history as b
		on
			(a.job_id = b.job_id)
		where
			a.job_id in (select id from job where tool_id like '%${arg_tool_id}%' and state = 'ok' and create_time > (now() AT TIME ZONE 'UTC' - '3 months'::interval))
			and a.state = 'running'
			and b.state = 'queued'
		group by
			a.job_id
		order by
			queue_time desc
		)
		select
			min(queue_time),
			percentile_cont(0.95) WITHIN GROUP (ORDER BY queue_time) as perc_95,
			percentile_cont(0.99) WITHIN GROUP (ORDER BY queue_time) as perc_99,
			max(queue_time)
		from temp_queue_times
	EOF
}

query_queue() { ## [--by (tool|destination|user)]: Brief overview of currently running jobs grouped by tool (default) or other columns
	handle_help "$@" <<-EOF
		    $ gxadmin query queue
		                                tool_id                                |  state  | count
		    -------------------------------------------------------------------+---------+-------
		     toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.6.0      | queued  |     9
		     toolshed.g2.bx.psu.edu/repos/iuc/dexseq/dexseq_count/1.24.0.0     | running |     7
		     toolshed.g2.bx.psu.edu/repos/nml/spades/spades/1.2                | queued  |     6
		     ebi_sra_main                                                      | running |     6
		     toolshed.g2.bx.psu.edu/repos/iuc/trinity/trinity/2.8.3            | queued  |     5
		     toolshed.g2.bx.psu.edu/repos/devteam/bowtie2/bowtie2/2.3.4.2      | running |     5
		     toolshed.g2.bx.psu.edu/repos/nml/spades/spades/3.11.1+galaxy1     | queued  |     4
		     toolshed.g2.bx.psu.edu/repos/iuc/mothur_venn/mothur_venn/1.36.1.0 | running |     2
		     toolshed.g2.bx.psu.edu/repos/nml/metaspades/metaspades/3.9.0      | running |     2
		     upload1                                                           | running |     2

		    $ gxadmin query queue --by destination

		     destination_id |  state  | job_count
		    ----------------+---------+-----------
		     normal         | running |       128
		     multicore      | running |        64
		     multicore      | queued  |        16

		    $ gxadmin iquery queue --by destination
		    queue-summary-by-destination,state=running,destination_id=normal count=128
		    queue-summary-by-destination,state=running,destination_id=multicore count=64
		    queue-summary-by-destination,state=queued,destination_id=multicore count=16
	EOF

	fields="count=2"
	tags="tool_id=0;state=1"
	column="tool_id"
	title="tool"

	if [[ "$1" == "--by" ]]; then
		if [[ "$2" == "user" ]]; then
			tags="user_id=0;state=1"
			column="user_id"
			title="user"
			query_name="queue_by_user"
		elif [[ "$2" == "destination" ]]; then
			tags="destination_id=0;state=1"
			column="destination_id"
			title="destination"
			query_name="queue_by_destination"
		elif [[ "$2" == "tool" ]]; then
			query_name="queue_by_tool"
			# nothing else needed
		else
			error "Unknown attribute"
			exit 1
		fi
	fi

	read -r -d '' QUERY <<-EOF
		SELECT
			${column}, state, count(${column}) as ${title}_count
		FROM
			job
		WHERE
			state in ('queued', 'running')
		GROUP BY
			${column}, state
		ORDER BY
			${title}_count desc
	EOF
}

query_queue-overview() { ##? [--short-tool-id]: View used mostly for monitoring
	handle_help "$@" <<-EOF
		Primarily for monitoring of queue. Optimally used with 'iquery' and passed to Telegraf.

		    $ gxadmin iquery queue-overview
		    queue-overview,tool_id=upload1,tool_version=0.0.1,state=running,handler=main.web.1,destination_id=condor,job_runner_name=condor,user=1 count=1

	EOF

	# Use full tool id by default
	tool_id="tool_id"
	if [[ -n "$arg_short_tool_id" ]]; then
		tool_id="regexp_replace(tool_id, '.*toolshed.*/repos/', '')"
	fi

	# Include by default
	if [ -z "$GDPR_MODE"  ]; then
		user_id='user_id'
	else
		user_id="'0'"
	fi

	fields="count=6"
	tags="tool_id=0;tool_version=1;destination_id=2;handler=3;state=4;job_runner_name=5;user_id=7"

	read -r -d '' QUERY <<-EOF
		WITH queue AS (
			SELECT
				regexp_replace($tool_id, '/[0-9.a-z+-]+$', '')::TEXT AS tool_id,
				tool_version::TEXT,
				COALESCE(destination_id, 'unknown')::TEXT AS destination_id,
				COALESCE(handler, 'unknown')::TEXT AS handler,
				state::TEXT,
				COALESCE(job_runner_name, 'unknown')::TEXT AS job_runner_name,
				count(*) AS count,
				$user_id::TEXT AS user_id
			FROM
				job
			WHERE
				state = 'running' OR state = 'queued' OR state = 'new'
			GROUP BY
				tool_id, tool_version, destination_id, handler, state, job_runner_name, user_id
		)
		SELECT
			tool_id, tool_version, destination_id, handler, state, job_runner_name, sum(count), user_id
		FROM
			queue
		GROUP BY
			tool_id, tool_version, destination_id, handler, state, job_runner_name, user_id

	EOF
}

query_queue-details() {
	query_queue-detail $@
}

query_queue-detail() { ##? [--all] [--seconds] [--since-update]: Detailed overview of running and queued jobs
	handle_help "$@" <<-EOF
		    $ gxadmin query queue-detail
		      state  |   id    |  extid  |                                 tool_id                                   | username | time_since_creation
		    ---------+---------+---------+---------------------------------------------------------------------------+----------+---------------------
		     running | 4360629 | 229333  | toolshed.g2.bx.psu.edu/repos/bgruening/infernal/infernal_cmsearch/1.1.2.0 | xxxx     | 5 days 11:00:00
		     running | 4362676 | 230237  | toolshed.g2.bx.psu.edu/repos/iuc/mothur_venn/mothur_venn/1.36.1.0         | xxxx     | 4 days 18:00:00
		     running | 4364499 | 231055  | toolshed.g2.bx.psu.edu/repos/iuc/mothur_venn/mothur_venn/1.36.1.0         | xxxx     | 4 days 05:00:00
		     running | 4366604 | 5183013 | toolshed.g2.bx.psu.edu/repos/iuc/dexseq/dexseq_count/1.24.0.0             | xxxx     | 3 days 20:00:00
		     running | 4366605 | 5183016 | toolshed.g2.bx.psu.edu/repos/iuc/dexseq/dexseq_count/1.24.0.0             | xxxx     | 3 days 20:00:00
		     queued  | 4350274 | 225743  | toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.6.0              | xxxx     | 9 days 05:00:00
		     queued  | 4353435 | 227038  | toolshed.g2.bx.psu.edu/repos/iuc/trinity/trinity/2.8.3                    | xxxx     | 8 days 08:00:00
		     queued  | 4361914 | 229712  | toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.6.0              | xxxx     | 5 days -01:00:00
		     queued  | 4361812 | 229696  | toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.6.0              | xxxx     | 5 days -01:00:00
		     queued  | 4361939 | 229728  | toolshed.g2.bx.psu.edu/repos/nml/spades/spades/1.2                        | xxxx     | 4 days 21:00:00
		     queued  | 4361941 | 229731  | toolshed.g2.bx.psu.edu/repos/nml/spades/spades/1.2                        | xxxx     | 4 days 21:00:00
	EOF

	fields="count=9"
	tags="state=0;id=1;extid=2;tool_id=3;username=4;handler=6;job_runner_name=7;destination_id=8"

	d=""
	nonpretty="("
	time_column="job.create_time"
	time_column_name="time_since_creation"

	if [[ -n "$arg_all" ]]; then
		d=", 'new'"
	fi
	if [[ -n "$arg_seconds" ]]; then
		fields="$fields;time_since_creation=5"
		nonpretty="EXTRACT(EPOCH FROM "
	fi
	if [[ -n "$arg_since_update" ]]; then
		time_column="job.update_time"
		time_column_name="time_since_update"
	fi

	username=$(gdpr_safe galaxy_user.username username "Anonymous User")

	read -r -d '' QUERY <<-EOF
		SELECT
			job.state,
			job.id,
			job.job_runner_external_id as extid,
			job.tool_id,
			$username,
			$nonpretty now() AT TIME ZONE 'UTC' - $time_column) as $time_column_name,
			job.handler,
			job.job_runner_name,
			COALESCE(job.destination_id, 'none') as destination_id,
			1 as count
		FROM job
		FULL OUTER JOIN galaxy_user ON job.user_id = galaxy_user.id
		WHERE
			state in ('running', 'queued'$d)
		ORDER BY
			state desc,
			$time_column_name desc
	EOF
}

query_runtime-per-user() { ##? <email>: computation time of user (by email)
	handle_help "$@" <<-EOF
		    $ gxadmin query runtime-per-user hxr@informatik.uni-freiburg.de
		       sum
		    ----------
		     14:07:39
	EOF

	read -r -d '' QUERY <<-EOF
			SELECT sum((metric_value || ' second')::interval)
			FROM job_metric_numeric
			WHERE job_id in (
				SELECT id
				FROM job
				WHERE user_id in (
					SELECT id
					FROM galaxy_user
					where email = '$arg_email'
				)
			) AND metric_name = 'runtime_seconds'
	EOF
}

query_jobs-nonterminal() { ## [--states=new,queued,running] [--update-time] [--older-than=<interval>] [username|id|email]: Job info of nonterminal jobs separated by user
	handle_help "$@" <<-EOF
		You can request the user information by username, id, and user email

		    $ gxadmin query jobs-nonterminal helena-rasche
		       id    | tool_id             |  state  |        create_time         | runner | ext_id |     handler     | user_id
		    ---------+---------------------+---------+----------------------------+--------+--------+-----------------+---------
		     4760549 | featurecounts/1.6.3 | running | 2019-01-18 14:05:14.871711 | condor | 197549 | handler_main_7  | 599
		     4760552 | featurecounts/1.6.3 | running | 2019-01-18 14:05:16.205867 | condor | 197552 | handler_main_7  | 599
		     4760554 | featurecounts/1.6.3 | running | 2019-01-18 14:05:17.170157 | condor | 197580 | handler_main_8  | 599
		     4760557 | featurecounts/1.6.3 | running | 2019-01-18 14:05:18.25044  | condor | 197545 | handler_main_10 | 599
		     4760573 | featurecounts/1.6.3 | running | 2019-01-18 14:05:47.20392  | condor | 197553 | handler_main_2  | 599
		     4760984 | deseq2/2.11.40.4    | new     | 2019-01-18 14:56:37.700714 |        |        | handler_main_1  | 599
		     4766092 | deseq2/2.11.40.4    | new     | 2019-01-21 07:24:16.232376 |        |        | handler_main_5  | 599
		     4811598 | cuffnorm/2.2.1.2    | running | 2019-02-01 13:08:30.400016 | condor | 248432 | handler_main_0  | 599
		    (8 rows)

		You can also query all non-terminal jobs by all users

		    $ gxadmin query jobs-nonterminal | head
		       id    |  tool_id            |  state  |        create_time         | runner | ext_id |     handler     | user_id
		    ---------+---------------------+---------+----------------------------+--------+--------+-----------------+---------
		     4760549 | featurecounts/1.6.3 | running | 2019-01-18 14:05:14.871711 | condor | 197549 | handler_main_7  |     599
		     4760552 | featurecounts/1.6.3 | running | 2019-01-18 14:05:16.205867 | condor | 197552 | handler_main_7  |     599
		     4760554 | featurecounts/1.6.3 | running | 2019-01-18 14:05:17.170157 | condor | 197580 | handler_main_8  |     599
		     4760557 | featurecounts/1.6.3 | running | 2019-01-18 14:05:18.25044  | condor | 197545 | handler_main_10 |     599
		     4760573 | featurecounts/1.6.3 | running | 2019-01-18 14:05:47.20392  | condor | 197553 | handler_main_2  |     599
		     4760588 | featurecounts/1.6.3 | new     | 2019-01-18 14:11:03.766558 |        |        | handler_main_9  |      11
		     4760589 | featurecounts/1.6.3 | new     | 2019-01-18 14:11:05.895232 |        |        | handler_main_1  |      11
		     4760590 | featurecounts/1.6.3 | new     | 2019-01-18 14:11:07.328533 |        |        | handler_main_2  |      11

		By default jobs in the states 'new', 'queued', and 'running' are considered non-terminal, but this can
		be controlled by passing a comma-separated list to the '--states=' parameter. In addition, by default,
		all non-terminal jobs are displayed, but you can limit this to only jobs created or updated before a
		certain time with '--older-than='. This option takes a value in the PostgreSQL date/time interval
		format, see documentation: https://www.postgresql.org/docs/current/functions-datetime.html

		Be sure to quote intervals containing spaces. Finally, by default, the column returned (and filtered
		with in the case of '--older-than=') is 'job.create_time', but this can be changed to 'job.update_time'
		with '--update-time'. So to return all queued and running jobs that have not been updated in the past 2
		days:

		    $ gxadmin query jobs-nonterminal --states=queued,running --older-than='2 days' --update-time | head -5
		       id   |       tool_id        |  state  |     update_time     |     runner   | ext_id |      handler     | user_id
		    --------+----------------------+---------+---------------------+--------------+--------+------------------+---------
		     335897 | trinity/2.9.1        | queued  | 2021-03-10 10:44:09 | bridges      | 335897 | main_w3_handler2 | 599
		     338554 | repeatmasker/4.0.9   | running | 2021-03-09 10:41:30 | jetstream_iu | 338554 | main_w4_handler2 | 11
		     338699 | hisat2/2.1.0+galaxy7 | queued  | 2021-03-10 05:36:26 | jetstream_iu | 338699 | main_w3_handler2 | 42
	EOF

	states='new,queued,running'
	interval=
	user_filter='true'
	time_column='create_time'

	if (( $# > 0 )); then
		for args in "$@"; do
			if [ "$args" = '--update-time' ]; then
				time_column='update_time'
			elif [ "${args:0:9}" = '--states=' ]; then
				states="${args:9}"
			elif [ "${args:0:13}" = '--older-than=' ]; then
				interval="${args:13}"
			elif [ "${args:0:2}" != '==' ]; then
				user_filter=$(get_user_filter "$1")
			fi
		done
	fi

	states="'$(echo "$states" | sed "s/,/', '/g")'"

	if [ -n "$interval" ]; then
		interval="AND job.$time_column < NOW() - interval '$interval'"
	fi

	user_id=$(gdpr_safe job.user_id user_id "anon")

	read -r -d '' QUERY <<-EOF
		SELECT
			job.id, job.tool_id, job.state, job.$time_column AT TIME ZONE 'UTC' AS $time_column, job.job_runner_name, job.job_runner_external_id, job.handler, $user_id
		FROM
			job
		LEFT OUTER JOIN
			galaxy_user ON job.user_id = galaxy_user.id
		WHERE
			$user_filter AND job.state IN ($states) $interval
		ORDER BY job.id ASC
	EOF
}

query_jobs-per-user() { ##? <user>: Number of jobs run by a specific user
	handle_help "$@" <<-EOF
		    $ gxadmin query jobs-per-user helena
		     count | user_id
		    -------+---------
		       999 |       1
		    (1 row)
	EOF

	user_filter=$(get_user_filter "$arg_user")

	read -r -d '' QUERY <<-EOF
			SELECT count(*), user_id
			FROM job
			WHERE user_id in (
				SELECT id
				FROM galaxy_user
				WHERE $user_filter
			)
			GROUP BY user_id
	EOF
}

query_recent-jobs() { ##? <hours>: Jobs run in the past <hours> (in any state)
	handle_help "$@" <<-EOF
		    $ gxadmin query recent-jobs 2.1
		       id    |     create_time     |      tool_id          | state |    username
		    ---------+---------------------+-----------------------+-------+-----------------
		     4383997 | 2018-10-05 16:07:00 | Filter1               | ok    |
		     4383994 | 2018-10-05 16:04:00 | echo_main_condor      | ok    |
		     4383993 | 2018-10-05 16:04:00 | echo_main_drmaa       | error |
		     4383992 | 2018-10-05 16:04:00 | echo_main_handler11   | ok    |
		     4383983 | 2018-10-05 16:04:00 | echo_main_handler2    | ok    |
		     4383982 | 2018-10-05 16:04:00 | echo_main_handler1    | ok    |
		     4383981 | 2018-10-05 16:04:00 | echo_main_handler0    | ok    |
	EOF

	username=$(gdpr_safe galaxy_user.username username)

	read -r -d '' QUERY <<-EOF
		SELECT
			job.id,
			job.create_time AT TIME ZONE 'UTC' as create_time,
			job.tool_id,
			job.state, $username
		FROM job, galaxy_user
		WHERE job.create_time > (now() AT TIME ZONE 'UTC' - '$arg_hours hours'::interval) AND job.user_id = galaxy_user.id
		ORDER BY id desc
	EOF
}

query_training-list() { ##? [--all]: List known trainings
	handle_help "$@" <<-EOF
		This module is specific to EU's implementation of Training Infrastructure as a Service. But this specifically just checks for all groups with the name prefix 'training-'

		    $ gxadmin query training-list
		        name    |  created
		    ------------+------------
		     hogeschool | 2020-01-22
		     asdf       | 2019-08-28
		    (2 rows)

	EOF

	d1=""
	d2="AND deleted = false"
	if [[ -n "$arg_all" ]]; then
		d1=", deleted"
		d2=""
	fi

	read -r -d '' QUERY <<-EOF
		SELECT
			substring(name from 10) as name,
			date_trunc('day', create_time AT TIME ZONE 'UTC')::date as created
			$d1
		FROM galaxy_group
		WHERE name like 'training-%' $d2
		ORDER BY create_time DESC
	EOF
}

query_training-members() { ##? <tr_id>: List users in a specific training
	handle_help "$@" <<-EOF
		    $ gxadmin query training-members hts2018
		          username      |       joined
		    --------------------+---------------------
		     helena-rasche      | 2018-09-21 21:42:01
	EOF

	# Remove training- if they used it.
	ww=$(echo "$arg_tr_id" | sed 's/^training-//g')
	username=$(gdpr_safe galaxy_user.username username)

	read -r -d '' QUERY <<-EOF
			SELECT DISTINCT ON ($username)
				$username,
				date_trunc('second', user_group_association.create_time AT TIME ZONE 'UTC') as joined
			FROM galaxy_user, user_group_association, galaxy_group
			WHERE galaxy_group.name = 'training-$ww'
				AND galaxy_group.id = user_group_association.group_id
				AND user_group_association.user_id = galaxy_user.id
	EOF
}

query_training-members-remove() { ##? <training> <username> [--yesdoit]: Remove a user from a training
	handle_help "$@" <<-EOF
	EOF
	# TODO: Move to mutate

	# Remove training- if they used it.
	ww=$(echo "$arg_training" | sed 's/^training-//g')

	if [[ -n $arg_yesdoit ]]; then
		results="$(query_tsv "$qstr")"
		uga_id=$(echo "$results" | awk -F'\t' '{print $1}')
		if (( uga_id > -1 )); then
			qstr="delete from user_group_association where id = $uga_id"
		fi
		echo "$qstr"
	else
		read -r -d '' QUERY <<-EOF
			SELECT
				user_group_association.id,
				galaxy_user.username,
				galaxy_group.name
			FROM
				user_group_association
			LEFT JOIN galaxy_user ON user_group_association.user_id = galaxy_user.id
			LEFT JOIN galaxy_group ON galaxy_group.id = user_group_association.group_id
			WHERE
				galaxy_group.name = 'training-$ww'
				AND galaxy_user.username = '$arg_username'
		EOF
	fi
}

query_largest-histories() { ##? [--human]: Largest histories in Galaxy
	handle_help "$@" <<-EOF
		Finds all histories and print by decreasing size

		    $ gxadmin query largest-histories
		     total_size | id | substring  | username
		    ------------+----+------------+----------
		       17215831 |  6 | Unnamed hi | helena
		          45433 |  8 | Unnamed hi | helena
		          42846 |  9 | Unnamed hi | helena
		           1508 | 10 | Circos     | helena
		            365 |  2 | Tag Testin | helena
		            158 | 44 | test       | helena
		             16 | 45 | Unnamed hi | alice

		Or you can supply the --human flag, but this should not be used with iquery/InfluxDB

		    $ gxadmin query largest-histories --human
		     total_size | id | substring  | userna
		    ------------+----+------------+-------
		     16 MB      |  6 | Unnamed hi | helena
		     44 kB      |  8 | Unnamed hi | helena
		     42 kB      |  9 | Unnamed hi | helena
		     1508 bytes | 10 | Circos     | helena
		     365 bytes  |  2 | Tag Testin | helena
		     158 bytes  | 44 | test       | helena
		     16 bytes   | 45 | Unnamed hi | alice
	EOF

	username=$(gdpr_safe galaxy_user.username username)

	fields="size=0"
	tags="id=1;name=2;username=3"

	total_size="sum(coalesce(dataset.total_size, dataset.file_size, 0)) as total_size"
	if [[ -n "$arg_human" ]]; then
		total_size="pg_size_pretty(sum(coalesce(dataset.total_size, dataset.file_size, 0))) as total_size"
	fi

	read -r -d '' QUERY <<-EOF
		SELECT
			$total_size,
			history.id,
			substring(history.name, 1, 10),
			$username
		FROM
			dataset
			JOIN history_dataset_association on dataset.id = history_dataset_association.dataset_id
			JOIN history on history_dataset_association.history_id = history.id
			JOIN galaxy_user on history.user_id = galaxy_user.id
		GROUP BY history.id, history.name, history.user_id, galaxy_user.username
		ORDER BY sum(coalesce(dataset.total_size, dataset.file_size, 0)) DESC
	EOF
}

query_training-queue() { ##? <training_id>: Jobs currently being run by people in a given training
	handle_help "$@" <<-EOF
		Finds all jobs by people in that queue (including things they are executing that are not part of a training)

		    $ gxadmin query training-queue hts2018
		     state  |   id    | extid  | tool_id |   username    |       created
		    --------+---------+--------+---------+---------------+---------------------
		     queued | 4350274 | 225743 | upload1 |               | 2018-09-26 10:00:00
	EOF

	# Remove training- if they used it.
	ww=$(echo "$arg_training_id" | sed 's/^training-//g')

	username=$(gdpr_safe galaxy_user.username username)

	read -r -d '' QUERY <<-EOF
			SELECT
				job.state,
				job.id,
				job.job_runner_external_id AS extid,
				job.tool_id,
				$username,
				job.create_time AT TIME ZONE 'UTC' AS created
			FROM
				job, galaxy_user
			WHERE
				job.user_id = galaxy_user.id
				AND job.create_time > (now() AT TIME ZONE 'UTC' - '3 hours'::interval)
				AND galaxy_user.id
					IN (
							SELECT
								galaxy_user.id
							FROM
								galaxy_user, user_group_association, galaxy_group
							WHERE
								galaxy_group.name = 'training-$ww'
								AND galaxy_group.id = user_group_association.group_id
								AND user_group_association.user_id = galaxy_user.id
						)
			ORDER BY
				job.create_time ASC
	EOF
}

query_disk-usage() { ##? [--human]: Disk usage per object store.
	handle_help "$@" <<-EOF
		Query the different object stores referenced in your Galaxy database

		    $ gxadmin query disk-usage
		     object_store_id |    sum
		    -----------------+------------
		                     | 1387857517
		    (1 row)

		Or you can supply the --human flag, but this should not be used with iquery/InfluxDB

		    $ gxadmin query disk-usage --human
		     object_store_id |    sum
		    -----------------+------------
		                     | 1324 MB
		    (1 row)
	EOF

	fields="count=1"
	tags="object_store_id=0"

	size="sum(coalesce(dataset.total_size, dataset.file_size, 0))"
	if [[ -n "$arg_human" ]]; then
		size="pg_size_pretty(sum(coalesce(dataset.total_size, dataset.file_size, 0))) as sum"
	fi

	read -r -d '' QUERY <<-EOF
			SELECT
				object_store_id, $size
			FROM dataset
			WHERE NOT purged
			GROUP BY object_store_id
			ORDER BY sum(coalesce(dataset.total_size, dataset.file_size, 0)) DESC
	EOF
}

query_users-count() { ## : Shows sums of active/external/deleted/purged accounts
	handle_help "$@" <<-EOF
		     active | external | deleted | purged | count
		    --------+----------+---------+--------+-------
		     f      | f        | f       | f      |   182
		     t      | f        | t       | t      |     2
		     t      | f        | t       | f      |     6
		     t      | f        | f       | f      |  2350
		     f      | f        | t       | t      |    36
	EOF

	fields="count=4"
	tags="active=0;external=1;deleted=2;purged=3"

	read -r -d '' QUERY <<-EOF
			SELECT
				active, external, deleted, purged, count(*) as count
			FROM
				galaxy_user
			GROUP BY
				active, external, deleted, purged
	EOF
}

query_tool-last-used-date() { ## : When was the most recent invocation of every tool
	handle_help "$@" <<-EOF
		Example invocation:

		    $ gxadmin query tool-last-used-date
		             max         |          tool_id
		    ---------------------+---------------------------
		     2019-02-01 00:00:00 | test_history_sanitization
		     2018-12-01 00:00:00 | require_format
		     2018-11-01 00:00:00 | upload1
		    (3 rows)

		**WARNING**

		!> It is not truly every tool, there is no easy way to find the tools which have never been run.
	EOF

	read -r -d '' QUERY <<-EOF
		select max(date_trunc('month', create_time AT TIME ZONE 'UTC')), tool_id from job group by tool_id order by max desc
	EOF
}

query_users-total() { ## : Total number of Galaxy users (incl deleted, purged, inactive)
	handle_help "$@" <<-EOF
	EOF

	fields="count=0"
	tags=""

	read -r -d '' QUERY <<-EOF
			SELECT count(*) FROM galaxy_user
	EOF
}

query_groups-list() { ## : List all groups known to Galaxy
	handle_help "$@" <<-EOF
	EOF

	fields="count=1"
	tags="group_name=0"

	read -r -d '' QUERY <<-EOF
			SELECT
				galaxy_group.name, count(*)
			FROM
				galaxy_group, user_group_association
			WHERE
				user_group_association.group_id = galaxy_group.id
			GROUP BY name
	EOF
}

query_collection-usage() { ## : Information about how many collections of various types are used
	handle_help "$@" <<-EOF
	EOF

	fields="count=1"
	tags="group_name=0"

	read -r -d '' QUERY <<-EOF
		SELECT
			dc.collection_type, count(*)
		FROM
			history_dataset_collection_association as hdca
		INNER JOIN
			dataset_collection as dc
			ON hdca.collection_id = dc.id
		GROUP BY
			dc.collection_type
	EOF
}

query_ts-repos() { ## : Counts of toolshed repositories by toolshed and owner.
	handle_help "$@" <<-EOF
	EOF

	fields="count=2"
	tags="tool_shed=0;owner=1"

	read -r -d '' QUERY <<-EOF
			SELECT
				tool_shed, owner, count(*)
			FROM
				tool_shed_repository
			GROUP BY
				tool_shed, owner
	EOF
}

query_tool-metrics() { ##? <tool_id> <metric_id> [--like]: See values of a specific metric
	handle_help "$@" <<-EOF
		A good way to use this is to fetch the memory usage of a tool and then
		do some aggregations. The following requires [data_hacks](https://github.com/bitly/data_hacks)

		    $ gxadmin tsvquery tool-metrics %rgrnastar/rna_star% memory.max_usage_in_bytes --like | \\
		        awk '{print \$1 / 1024 / 1024 / 1024}' | \\
		        histogram.py --percentage
		    # NumSamples = 441; Min = 2.83; Max = 105.88
		    # Mean = 45.735302; Variance = 422.952289; SD = 20.565804; Median 51.090900
		    # each ∎ represents a count of 1
		        2.8277 -    13.1324 [    15]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ (3.40%)
		       13.1324 -    23.4372 [    78]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ (17.69%)
		       23.4372 -    33.7419 [    47]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ (10.66%)
		       33.7419 -    44.0466 [    31]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ (7.03%)
		       44.0466 -    54.3514 [    98]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ (22.22%)
		       54.3514 -    64.6561 [   102]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ (23.13%)
		       64.6561 -    74.9608 [    55]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ (12.47%)
		       74.9608 -    85.2655 [    11]: ∎∎∎∎∎∎∎∎∎∎∎ (2.49%)
		       85.2655 -    95.5703 [     3]: ∎∎∎ (0.68%)
		       95.5703 -   105.8750 [     1]: ∎ (0.23%)
	EOF

	tool_subquery="SELECT id FROM job WHERE tool_id = '$arg_tool_id'"
	if [[ -n "$arg_like" ]]; then
		tool_subquery="SELECT id FROM job WHERE tool_id like '$arg_tool_id'"
	fi

	read -r -d '' QUERY <<-EOF
		SELECT
			metric_value
		FROM job_metric_numeric
		WHERE
			metric_name = '$arg_metric_id'
			and
			job_id in (
				$tool_subquery
			)
	EOF
}

query_tool-available-metrics() { ##? <tool_id>: list all available metrics for a given tool
	handle_help "$@" <<-EOF
		Gives a list of available metrics, which can then be used to query.

		    $ gxadmin query tool-available-metrics upload1
		                 metric_name
		    -------------------------------------
		     memory.stat.total_rss
		     memory.stat.total_swap
		     memory.stat.total_unevictable
		     memory.use_hierarchy
		     ...
	EOF

	read -r -d '' QUERY <<-EOF
		SELECT
			distinct metric_name
		FROM job_metric_numeric
		WHERE job_id in (
			SELECT id FROM job WHERE tool_id = '$arg_tool_id'
		)
		ORDER BY metric_name asc
	EOF
}

query_tool-memory-per-inputs() { ##? <tool_id> [--like]: See memory usage and inout size data
	handle_help "$@" <<-EOF
		Display details about tool input counts and sizes along with memory usage and the relation between them,
		to aid in determining appropriate memory allocations for tools.

		    $ gxadmin query tool-memory-per-inputs %/unicycler/% --like
		        id    |                           tool_id                            | input_count | total_input_size_mb | mean_input_size_mb | median_input_size_mb | memory_used_mb | memory_used_per_input_mb | memory_mean_input_ratio | memory_median_input_ratio
		    ----------+--------------------------------------------------------------+-------------+---------------------+--------------------+----------------------+----------------+--------------------------+-------------------------+---------------------------
		     34663027 | toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.8.0 |           2 |                 245 |                122 |                  122 |           4645 |                       19 |                      38 |                        38
		     34657045 | toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.8.0 |           2 |                  51 |                 25 |                   25 |           1739 |                       34 |                      68 |                        68
		     34655863 | toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.8.0 |           2 |                1829 |                915 |                  915 |          20635 |                       11 |                      23 |                        23
		     34650581 | toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.8.0 |           3 |                 235 |                 78 |                  112 |          30550 |                      130 |                     391 |                       274
		     34629187 | toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.8.0 |           2 |                2411 |               1206 |                 1206 |          50018 |                       21 |                      41 |                        41

		A good way to use this is to fetch the data and then do some aggregations. The following requires
		[data_hacks](https://github.com/bitly/data_hacks):

		    $ gxadmin tsvquery tool-memory-per-inputs %/unicycler/% --like | \\
		        awk '{print \$10}' | histogram.py --percentage --max=256
		    # NumSamples = 870; Min = 4.00; Max = 256.00
		    # 29 values outside of min/max
		    # Mean = 67.804598; Variance = 15461.789404; SD = 124.345444; Median 37.000000
		    # each ∎ represents a count of 4
		        4.0000 -    29.2000 [   368]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ (42.30%)
		       29.2000 -    54.4000 [   226]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ (25.98%)
		       54.4000 -    79.6000 [   133]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ (15.29%)
		       79.6000 -   104.8000 [    45]: ∎∎∎∎∎∎∎∎∎∎∎ (5.17%)
		      104.8000 -   130.0000 [    28]: ∎∎∎∎∎∎∎ (3.22%)
		      130.0000 -   155.2000 [    12]: ∎∎∎ (1.38%)
		      155.2000 -   180.4000 [     9]: ∎∎ (1.03%)
		      180.4000 -   205.6000 [     6]: ∎ (0.69%)
		      205.6000 -   230.8000 [    10]: ∎∎ (1.15%)
		      230.8000 -   256.0000 [     4]: ∎ (0.46%)
	EOF

	tool_clause="j.tool_id = '$arg_tool_id'"
	if [[ -n "$arg_like" ]]; then
		tool_clause="j.tool_id like '$arg_tool_id'"
	fi

	read -r -d '' QUERY <<-EOF
		WITH job_cte AS (
			SELECT
				j.id,
				j.tool_id
			FROM
				job j
			WHERE
				$tool_clause
				AND
					j.state = 'ok'
		),
		mem_cte AS (
			SELECT
				j.id,
				jmn.metric_value AS memory_used
			FROM
				job_cte j
			JOIN
				job_metric_numeric jmn ON j.id = jmn.job_id
			WHERE
				jmn.plugin = 'cgroup'
				AND
					jmn.metric_name = 'memory.memsw.max_usage_in_bytes'
		),
		data_cte AS (
			SELECT
				j.id,
				count(jtid.id) AS input_count,
				sum(d.total_size) AS total_input_size,
				avg(d.total_size) AS mean_input_size,
				percentile_cont(0.5) WITHIN GROUP (ORDER BY d.total_size) AS median_input_size
			FROM
				job_cte j
			JOIN
				job_to_input_dataset jtid ON j.id = jtid.job_id
			JOIN
				history_dataset_association hda ON jtid.dataset_id = hda.id
			JOIN
				dataset d ON hda.dataset_id = d.id
			GROUP BY
				j.id
		)
		SELECT
			j.*,
			d.input_count,
			(d.total_input_size/1024/1024)::bigint AS total_input_size_mb,
			(d.mean_input_size/1024/1024)::bigint AS mean_input_size_mb,
			(d.median_input_size/1024/1024)::bigint AS median_input_size_mb,
			(m.memory_used/1024/1024)::bigint AS memory_used_mb,
			(m.memory_used/d.total_input_size)::bigint AS memory_used_per_input_mb,
			(m.memory_used/d.mean_input_size)::bigint AS memory_mean_input_ratio,
			(m.memory_used/d.median_input_size)::bigint AS memory_median_input_ratio
		FROM
			job_cte j
		JOIN
			mem_cte m on j.id = m.id
		JOIN
			data_cte d on j.id = d.id
		ORDER BY
			j.id DESC
	EOF
}

query_monthly-cpu-stats() { ##? [year] : CPU years/hours allocated to tools by month
	handle_help "$@" <<-EOF
		This uses the galaxy_slots and runtime_seconds metrics in order to
		calculate allocated CPU years/hours. This will not be the value of what is
		actually consumed by your jobs, you should use cgroups.

		    $ gxadmin query monthly-cpu-stats
		       month    | cpu_years | cpu_hours
		    ------------+-----------+-----------
		     2020-05-01 |     53.55 | 469088.02
		     2020-04-01 |     59.55 | 521642.60
		     2020-03-01 |     57.04 | 499658.86
		     2020-02-01 |     53.93 | 472390.31
		     2020-01-01 |     56.49 | 494887.37
		     ...
	EOF

	if [ ! -z $arg_year ] && date -d "$arg_year" >/dev/null
	then
	    filter_by_year="date_trunc('year', job.create_time AT TIME ZONE 'UTC') = '$arg_year-01-01'::date"
	fi

	read -r -d '' QUERY <<-EOF
		SELECT
			date_trunc('month', job.create_time  AT TIME ZONE 'UTC')::date as month,
			round(sum((a.metric_value * b.metric_value) / 3600 / 24 / 365 ), 2) as cpu_years,
			round(sum((a.metric_value * b.metric_value) / 3600 ), 2) as cpu_hours
		FROM
			job_metric_numeric a,
			job_metric_numeric b,
			job
		WHERE
			b.job_id = a.job_id
			AND a.job_id = job.id
			AND a.metric_name = 'runtime_seconds'
			AND b.metric_name = 'galaxy_slots'
			$filter_by_year
		GROUP BY month
		ORDER BY month DESC
	EOF
}

query_monthly-cpu-years() { ## : CPU years allocated to tools by month
	handle_help "$@" <<-EOF
		This uses the galaxy_slots and runtime_seconds metrics in order to
		calculate allocated CPU years. This will not be the value of what is
		actually consumed by your jobs, you should use cgroups.

		    $ gxadmin query monthly-cpu-years
		        month   | cpu_years
		    ------------+-----------
		     2019-04-01 |      2.95
		     2019-03-01 |     12.38
		     2019-02-01 |     11.47
		     2019-01-01 |      8.27
		     2018-12-01 |     11.42
		     2018-11-01 |     16.99
		     2018-10-01 |     12.09
		     2018-09-01 |      6.27
		     2018-08-01 |      9.06
		     2018-07-01 |      6.17
		     2018-06-01 |      5.73
		     2018-05-01 |      7.36
		     2018-04-01 |     10.21
		     2018-03-01 |      5.20
		     2018-02-01 |      4.53
		     2018-01-01 |      4.05
		     2017-12-01 |      2.44
	EOF

	read -r -d '' QUERY <<-EOF
		SELECT
			date_trunc('month', job.create_time)::date as month,
			round(sum((a.metric_value * b.metric_value) / 3600 / 24 / 365), 2) as cpu_years
		FROM
			job_metric_numeric a,
			job_metric_numeric b,
			job
		WHERE
			b.job_id = a.job_id
			AND a.job_id = job.id
			AND a.metric_name = 'runtime_seconds'
			AND b.metric_name = 'galaxy_slots'
		GROUP BY date_trunc('month', job.create_time)
		ORDER BY date_trunc('month', job.create_time) DESC
	EOF
}


query_monthly-data(){ ##? [year] [--human]: Number of active users per month, running jobs
	handle_help "$@" <<-EOF
		Find out how much data was ingested or created by Galaxy during the past months.

		    $ gxadmin query monthly-data 2018
		        month   | pg_size_pretty
		    ------------+----------------
		     2018-12-01 | 62 TB
		     2018-11-01 | 50 TB
		     2018-10-01 | 59 TB
		     2018-09-01 | 32 TB
		     2018-08-01 | 26 TB
		     2018-07-01 | 42 TB
		     2018-06-01 | 34 TB
		     2018-05-01 | 33 TB
		     2018-04-01 | 27 TB
		     2018-03-01 | 32 TB
		     2018-02-01 | 18 TB
		     2018-01-01 | 16 TB
	EOF
	size="sum(coalesce(dataset.total_size, dataset.file_size, 0))"

	if [[ -n $arg_human ]]; then
		size="pg_size_pretty(sum(coalesce(dataset.total_size, dataset.file_size, 0)))"
	fi

	if [[ -n $arg_year ]]; then
		where="WHERE date_trunc('year', dataset.create_time AT TIME ZONE 'UTC') = '$arg_year-01-01'::date"
	fi

	read -r -d '' QUERY <<-EOF
		SELECT
			date_trunc('month', dataset.create_time AT TIME ZONE 'UTC')::date AS month,
			$size
		FROM
			dataset
		$where
		GROUP BY
			month
		ORDER BY
			month DESC
	EOF
}

query_monthly-gpu-years() { ## : GPU years allocated to tools by month
	handle_help "$@" <<-EOF
		This uses the CUDA_VISIBLE_DEVICES and runtime_seconds metrics in order to
		calculate allocated GPU years. This will not be the value of what is
		actually consumed by your jobs, you should use cgroups. Only works if the
		environment variable 'CUDA_VISIBLE_DEVICES' is recorded as job metric by Galaxy.
		Requires Nvidia GPUs.

		    $ gxadmin query monthly-gpu-years
		        month   | gpu_years
		    ------------+-----------
		     2019-04-01 |      2.95
		     2019-03-01 |     12.38
		     2019-02-01 |     11.47
		     2019-01-01 |      8.27
		     2018-12-01 |     11.42
		     2018-11-01 |     16.99
		     2018-10-01 |     12.09
		     2018-09-01 |      6.27
		     2018-08-01 |      9.06
		     2018-07-01 |      6.17
		     2018-06-01 |      5.73
		     2018-05-01 |      7.36
		     2018-04-01 |     10.21
		     2018-03-01 |      5.20
		     2018-02-01 |      4.53
		     2018-01-01 |      4.05
		     2017-12-01 |      2.44
	EOF

	read -r -d '' QUERY <<-EOF
		SELECT
			date_trunc('month', job.create_time)::date as month,
			round(sum((a.metric_value * length(replace(b.metric_value, ',', ''))) / 3600 / 24 / 365), 2) as gpu_years
		FROM
			job_metric_numeric a,
			job_metric_text b,
			job
		WHERE
			b.job_id = a.job_id
			AND a.job_id = job.id
			AND a.metric_name = 'runtime_seconds'
			AND b.metric_name = 'CUDA_VISIBLE_DEVICES'
		GROUP BY date_trunc('month', job.create_time)
		ORDER BY date_trunc('month', job.create_time) DESC
	EOF
}

query_user-cpu-years() { ## : CPU years allocated to tools by user
	handle_help "$@" <<-EOF
		This uses the galaxy_slots and runtime_seconds metrics in order to
		calculate allocated CPU years. This will not be the value of what is
		actually consumed by your jobs, you should use cgroups.

		rank  | user_id |  username   | cpu_years
		----- | ------- | ----------- | ----------
		1     |         | 123f911b5f1 |     20.35
		2     |         | cb0fabc0002 |     14.93
		3     |         | 7e9e9b00b89 |     14.24
		4     |         | 42f211e5e87 |     14.06
		5     |         | 26cdba62c93 |     12.97
		6     |         | fa87cddfcae |      7.01
		7     |         | 44d2a648aac |      6.70
		8     |         | 66c57b41194 |      6.43
		9     |         | 6b1467ac118 |      5.45
		10    |         | d755361b59a |      5.19

	EOF

	username=$(gdpr_safe galaxy_user.username username 'Anonymous')

	read -r -d '' QUERY <<-EOF
		SELECT
			row_number() OVER (ORDER BY round(sum((a.metric_value * b.metric_value) / 3600 / 24 / 365), 2) DESC) as rank,
			job.user_id,
			$username,
			round(sum((a.metric_value * b.metric_value) / 3600 / 24 / 365), 2) as cpu_years
		FROM
			job_metric_numeric a,
			job_metric_numeric b,
			job
			FULL OUTER JOIN galaxy_user ON job.user_id = galaxy_user.id
		WHERE
			b.job_id = a.job_id
			AND a.job_id = job.id
			AND a.metric_name = 'runtime_seconds'
			AND b.metric_name = 'galaxy_slots'
		GROUP BY job.user_id, galaxy_user.username
		ORDER BY round(sum((a.metric_value * b.metric_value) / 3600 / 24 / 365), 2) DESC
		LIMIT 50
	EOF
}

query_user-gpu-years() { ## : GPU years allocated to tools by user
	handle_help "$@" <<-EOF
		This uses the CUDA_VISIBLE_DEVICES and runtime_seconds metrics in order to
		calculate allocated GPU years. This will not be the value of what is
		actually consumed by your jobs, you should use cgroups. Only works if the
		environment variable 'CUDA_VISIBLE_DEVICES' is recorded as job metric by Galaxy.
		Requires Nvidia GPUs.

		rank  | user_id |  username   | gpu_years
		----- | ------- | ----------- | ----------
		1     |         | 123f911b5f1 |     20.35
		2     |         | cb0fabc0002 |     14.93
		3     |         | 7e9e9b00b89 |     14.24
		4     |         | 42f211e5e87 |     14.06
		5     |         | 26cdba62c93 |     12.97
		6     |         | fa87cddfcae |      7.01
		7     |         | 44d2a648aac |      6.70
		8     |         | 66c57b41194 |      6.43
		9     |         | 6b1467ac118 |      5.45
		10    |         | d755361b59a |      5.19

	EOF

	username=$(gdpr_safe galaxy_user.username username 'Anonymous')

	read -r -d '' QUERY <<-EOF
		SELECT
			row_number() OVER (ORDER BY round(sum((a.metric_value * length(replace(b.metric_value, ',', ''))) / 3600 / 24 / 365), 2) DESC) as rank,
			job.user_id,
			$username,
			round(sum((a.metric_value * length(replace(b.metric_value, ',', ''))) / 3600 / 24 / 365), 2) as gpu_years
		FROM
			job_metric_numeric a,
			job_metric_text b,
			job
			FULL OUTER JOIN galaxy_user ON job.user_id = galaxy_user.id
		WHERE
			b.job_id = a.job_id
			AND a.job_id = job.id
			AND a.metric_name = 'runtime_seconds'
			AND b.metric_name = 'CUDA_VISIBLE_DEVICES'
		GROUP BY job.user_id, galaxy_user.username
		ORDER BY round(sum((a.metric_value * length(replace(b.metric_value, ',', ''))) / 3600 / 24 / 365), 2) DESC
		LIMIT 50
	EOF
}

query_user-disk-usage() { ##? [--human] [--use-precalc]: Retrieve an approximation of the disk usage for users
	handle_help "$@" <<-EOF
		This uses the dataset size and the history association in order to
		calculate total disk usage for a user. This is currently limited
		to the 50 users with the highest storage usage.
		By default it prints the storage usage in bytes but you can use --human:

		rank  | user id  |  username   |  email      | storage usage
		----- | -------- | ----------- | ----------- | -------------
		1     |  5       | 123f911b5f1 | 123@911.5f1 |      20.35 TB
		2     |  6       | cb0fabc0002 | cb0@abc.002 |      14.93 TB
		3     |  9       | 7e9e9b00b89 | 7e9@9b0.b89 |      14.24 TB
		4     |  11      | 42f211e5e87 | 42f@11e.e87 |      14.06 GB
		5     |  2       | 26cdba62c93 | 26c@ba6.c93 |      12.97 GB
		6     |  1005    | fa87cddfcae | fa8@cdd.cae |       7.01 GB
		7     |  2009    | 44d2a648aac | 44d@a64.aac |       6.70 GB
		8     |  432     | 66c57b41194 | 66c@7b4.194 |       6.43 GB
		9     |  58945   | 6b1467ac118 | 6b1@67a.118 |       5.45 MB
		10    |  10      | d755361b59a | d75@361.59a |       5.19 KB

		A flag, --use-precalc, is provided which reads the disk_usage column of the galaxy_user table, using the values precisely as displayed to users in Galaxy.
	EOF

	username=$(gdpr_safe galaxy_user.username user_name 'Anonymous')
	useremail=$(gdpr_safe galaxy_user.email user_email 'Anonymous')


	fields="size=4"
	tags="userid=1;username=2"

	if [[ -n $arg_use_precalc ]]; then
		size="disk_usage as \"storage usage\""
		if [[ -n $arg_human ]]; then
			size="pg_size_pretty(disk_usage) as \"storage usage\""
		fi

		read -r -d '' QUERY <<-EOF
			SELECT
				row_number() OVER (ORDER BY galaxy_user.disk_usage DESC) as rank,
				galaxy_user.id as "user id",
				$username,
				$useremail,
				$size
			FROM
				galaxy_user
			GROUP BY galaxy_user.id
			ORDER BY 1
			LIMIT 50
		EOF
	else
		size="sum(coalesce(dataset.total_size, dataset.file_size, 0)) as \"storage usage\""
		if [[ -n $arg_human ]]; then
			size="pg_size_pretty(sum(coalesce(dataset.total_size, dataset.file_size, 0))) as \"storage usage\""
		fi

		read -r -d '' QUERY <<-EOF
			SELECT
				row_number() OVER (ORDER BY sum(coalesce(dataset.total_size, dataset.file_size, 0)) DESC) as rank,
				galaxy_user.id as "user id",
				$username,
				$useremail,
				$size
			FROM
				dataset,
				galaxy_user,
				history_dataset_association,
				history
			WHERE
				NOT dataset.purged
				AND dataset.id = history_dataset_association.dataset_id
				AND history_dataset_association.history_id = history.id
				AND history.user_id = galaxy_user.id
			GROUP BY galaxy_user.id
			ORDER BY 1
			LIMIT 50
		EOF
	fi
}

query_user-disk-quota() { ## : Retrieves the 50 users with the largest quotas
	handle_help "$@" <<-EOF
		This calculates the total assigned disk quota to users.
		It only displays the top 50 quotas.

		rank  | user_id  |  username    |    quota
		----- | -------- | ------------ | ------------
		1     |          | 123f911b5f1  |       20.35
		2     |          | cb0fabc0002  |       14.93
		3     |          | 7e9e9b00b89  |       14.24
		4     |          | 42f211e5e87  |       14.06
		5     |          | 26cdba62c93  |       12.97
		6     |          | fa87cddfcae  |        7.01
		7     |          | 44d2a648aac  |        6.70
		8     |          | 66c57b41194  |        6.43
		9     |          | 6b1467ac118  |        5.45
		10    |          | d755361b59a  |        5.19
	EOF

	username=$(gdpr_safe galaxy_user.username username 'Anonymous')

	read -r -d '' QUERY <<-EOF
		WITH user_basequota_list AS (
			SELECT galaxy_user.id as "user_id",
				basequota.bytes as "quota"
			FROM galaxy_user,
				quota basequota,
				user_quota_association
			WHERE galaxy_user.id = user_quota_association.user_id
				AND basequota.id = user_quota_association.quota_id
				AND basequota.operation = '='
				AND NOT basequota.deleted
			GROUP BY galaxy_user.id, basequota.bytes
		),
		user_basequota AS (
			SELECT user_basequota_list.user_id,
				MAX(user_basequota_list.quota) as "quota"
			FROM user_basequota_list
			GROUP BY user_basequota_list.user_id
		),
		user_addquota_list AS (
			SELECT galaxy_user.id as "user_id",
				addquota.bytes as "quota"
			FROM galaxy_user,
				quota addquota,
				user_quota_association
			WHERE galaxy_user.id = user_quota_association.user_id
				AND addquota.id = user_quota_association.quota_id
				AND addquota.operation = '+'
				AND NOT addquota.deleted
			GROUP BY galaxy_user.id, addquota.bytes
		),
		user_addquota AS (
			SELECT user_addquota_list.user_id,
				sum(user_addquota_list.quota) AS "quota"
			FROM user_addquota_list
			GROUP BY user_addquota_list.user_id
		),
		user_minquota_list AS (
			SELECT galaxy_user.id as "user_id",
				minquota.bytes as "quota"
			FROM galaxy_user,
				quota minquota,
				user_quota_association
			WHERE galaxy_user.id = user_quota_association.user_id
				AND minquota.id = user_quota_association.quota_id
				AND minquota.operation = '-'
				AND NOT minquota.deleted
			GROUP BY galaxy_user.id, minquota.bytes
		),
		user_minquota AS (
			SELECT user_minquota_list.user_id,
				sum(user_minquota_list.quota) AS "quota"
			FROM user_minquota_list
			GROUP BY user_minquota_list.user_id
		),
		group_basequota_list AS (
			SELECT galaxy_user.id as "user_id",
				galaxy_group.id as "group_id",
				basequota.bytes as "quota"
			FROM galaxy_user,
				galaxy_group,
				quota basequota,
				group_quota_association,
				user_group_association
			WHERE galaxy_user.id = user_group_association.user_id
				AND galaxy_group.id = user_group_association.group_id
				AND basequota.id = group_quota_association.quota_id
				AND galaxy_group.id = group_quota_association.group_id
				AND basequota.operation = '='
				AND NOT basequota.deleted
			GROUP BY galaxy_user.id, galaxy_group.id, basequota.bytes
		),
		group_basequota AS (
			SELECT group_basequota_list.user_id,
				group_basequota_list.group_id,
				MAX(group_basequota_list.quota) as "quota"
			FROM group_basequota_list
			GROUP BY group_basequota_list.user_id, group_basequota_list.group_id
		),
		group_addquota_list AS (
			SELECT galaxy_user.id as "user_id",
				addquota.bytes as "quota"
			FROM galaxy_user,
				galaxy_group,
				quota addquota,
				group_quota_association,
				user_group_association
			WHERE galaxy_user.id = user_group_association.user_id
				AND galaxy_group.id = user_group_association.group_id
				AND addquota.id = group_quota_association.quota_id
				AND galaxy_group.id = group_quota_association.group_id
				AND addquota.operation = '+'
				AND NOT addquota.deleted
			GROUP BY galaxy_user.id, addquota.bytes
		),
		group_addquota AS (
			SELECT group_addquota_list.user_id,
				sum(group_addquota_list.quota) AS "quota"
			FROM group_addquota_list
			GROUP BY group_addquota_list.user_id
		),
		group_minquota_list AS (
			SELECT galaxy_user.id as "user_id",
				minquota.bytes as "quota"
			FROM galaxy_user,
				galaxy_group,
				quota minquota,
				group_quota_association,
				user_group_association
			WHERE galaxy_user.id = user_group_association.user_id
				AND galaxy_group.id = user_group_association.group_id
				AND minquota.id = group_quota_association.quota_id
				AND galaxy_group.id = group_quota_association.group_id
				AND minquota.operation = '-'
				AND NOT minquota.deleted
			GROUP BY galaxy_user.id, galaxy_group.id, galaxy_group.name, minquota.bytes
		),
		group_minquota AS (
			SELECT group_minquota_list.user_id,
				sum(group_minquota_list.quota) AS "quota"
			FROM group_minquota_list
			GROUP BY group_minquota_list.user_id
		),
		all_user_default_quota AS (
			SELECT galaxy_user.id as "user_id",
				quota.bytes
			FROM galaxy_user,
				quota
			WHERE quota.id = (SELECT quota_id FROM default_quota_association)
		),
		quotas AS (
			SELECT all_user_default_quota.user_id as "aud_uid",
				all_user_default_quota.bytes as "aud_quota",
				user_basequota.user_id as "ubq_uid",
				user_basequota.quota as "ubq_quota",
				user_addquota.user_id as "uaq_uid",
				user_addquota.quota as "uaq_quota",
				user_minquota.user_id as "umq_uid",
				user_minquota.quota as "umq_quota",
				group_basequota.user_id as "gbq_uid",
				group_basequota.quota as "gbq_quota",
				group_addquota.user_id as "gaq_uid",
				group_addquota.quota as "gaq_quota",
				group_minquota.user_id as "gmq_uid",
				group_minquota.quota as "gmq_quota"
			FROM all_user_default_quota
			FULL OUTER JOIN user_basequota ON all_user_default_quota.user_id = user_basequota.user_id
			FULL OUTER JOIN user_addquota ON all_user_default_quota.user_id = user_addquota.user_id
			FULL OUTER JOIN user_minquota ON all_user_default_quota.user_id = user_minquota.user_id
			FULL OUTER JOIN group_basequota ON all_user_default_quota.user_id = group_basequota.user_id
			FULL OUTER JOIN group_addquota ON all_user_default_quota.user_id = group_addquota.user_id
			FULL OUTER JOIN group_minquota ON all_user_default_quota.user_id = group_minquota.user_id
		),
		computed_quotas AS (
			SELECT aud_uid as "user_id",
				COALESCE(GREATEST(ubq_quota, gbq_quota), aud_quota) as "base_quota",
				(COALESCE(uaq_quota, 0) + COALESCE(gaq_quota, 0)) as "add_quota",
				(COALESCE(umq_quota, 0) + COALESCE(gmq_quota, 0)) as "min_quota"
			FROM quotas
		)

		SELECT row_number() OVER (ORDER BY (computed_quotas.base_quota + computed_quotas.add_quota - computed_quotas.min_quota) DESC) as rank,
			galaxy_user.id as "user_id",
			$username,
			pg_size_pretty(computed_quotas.base_quota + computed_quotas.add_quota - computed_quotas.min_quota) as "quota"
		FROM computed_quotas,
			galaxy_user
		WHERE computed_quotas.user_id = galaxy_user.id
		GROUP BY galaxy_user.id, galaxy_user.username, computed_quotas.base_quota, computed_quotas.add_quota, computed_quotas.min_quota
		ORDER BY (computed_quotas.base_quota + computed_quotas.add_quota - computed_quotas.min_quota) DESC
		LIMIT 50
	EOF
}

query_group-cpu-seconds() { ##? [group]: Retrieve an approximation of the CPU time in seconds for group(s)
	handle_help "$@" <<-EOF
		This uses the galaxy_slots and runtime_seconds metrics in order to
		calculate allocated CPU time in seconds. This will not be the value of
		what is actually consumed by jobs of the group, you should use cgroups instead.

		rank  | group_id |  group_name  | cpu_seconds
		----- | -------- | ------------ | ------------
		1     |          | 123f911b5f1  |       20.35
		2     |          | cb0fabc0002  |       14.93
		3     |          | 7e9e9b00b89  |       14.24
		4     |          | 42f211e5e87  |       14.06
		5     |          | 26cdba62c93  |       12.97
		6     |          | fa87cddfcae  |        7.01
		7     |          | 44d2a648aac  |        6.70
		8     |          | 66c57b41194  |        6.43
		9     |          | 6b1467ac118  |        5.45
		10    |          | d755361b59a  |        5.19
	EOF

	where=""
	if [[ -n $arg_group ]]; then
		where="AND galaxy_group.name = '$arg_group'"
	fi

	groupname=$(gdpr_safe galaxy_group.name group_name 'Anonymous')

	read -r -d '' QUERY <<-EOF
		WITH jobs_info AS (
			SELECT job.user_id,
				round(sum(a.metric_value * b.metric_value), 2) AS cpu_seconds
			FROM job_metric_numeric AS a,
				job_metric_numeric AS b,
				job
			WHERE job.id = a.job_id
				AND job.id = b.job_id
				AND a.metric_name = 'runtime_seconds'
				AND b.metric_name = 'galaxy_slots'
			GROUP BY job.user_id
		), user_job_info AS (
			SELECT user_id,
				sum(cpu_seconds) AS cpu_seconds
			FROM jobs_info
			GROUP BY user_id
		)

		SELECT row_number() OVER (ORDER BY round(sum(user_job_info.cpu_seconds), 0) DESC) as rank,
			galaxy_group.id as group_id,
			$groupname,
			round(sum(user_job_info.cpu_seconds), 0) as cpu_seconds
		FROM user_job_info,
			galaxy_group,
			user_group_association
		WHERE user_job_info.user_id = user_group_association.user_id
			AND user_group_association.group_id = galaxy_group.id
			$where
		GROUP BY galaxy_group.id, galaxy_group.name
		ORDER BY round(sum(user_job_info.cpu_seconds), 0) DESC
		LIMIT 50
	EOF
}

query_group-gpu-time() { ##? [group]: Retrieve an approximation of the GPU time for users
	handle_help "$@" <<-EOF
		This uses the galaxy_slots and runtime_seconds metrics in order to
		calculate allocated GPU time. This will not be the value of what is
		actually consumed by jobs of the group, you should use cgroups instead.
		Only works if the  environment variable 'CUDA_VISIBLE_DEVICES' is
		recorded as job metric by Galaxy. Requires Nvidia GPUs.

		rank  | group_id |  group_name | gpu_seconds
		----- | -------- | ----------- | -----------
		1     |          | 123f911b5f1 |      20.35
		2     |          | cb0fabc0002 |      14.93
		3     |          | 7e9e9b00b89 |      14.24
		4     |          | 42f211e5e87 |      14.06
		5     |          | 26cdba62c93 |      12.97
		6     |          | fa87cddfcae |       7.01
		7     |          | 44d2a648aac |       6.70
		8     |          | 66c57b41194 |       6.43
		9     |          | 6b1467ac118 |       5.45
		10    |          | d755361b59a |       5.19
	EOF

	where=""
	if [[ -n $arg_group ]]; then
		where="AND galaxy_group.name = '$arg_group'"
	fi

	groupname=$(gdpr_safe galaxy_group.name group_name 'Anonymous')

	read -r -d '' QUERY <<-EOF
		WITH jobs_info AS (
			SELECT job.user_id,
				round(sum(a.metric_value * length(replace(b.metric_value, ',', ''))), 2) AS gpu_seconds
			FROM job_metric_numeric AS a,
				job_metric_text AS b,
				job
			WHERE job.id = a.job_id
				AND job.id = b.job_id
				AND a.metric_name = 'runtime_seconds'
				AND b.metric_name = 'CUDA_VISIBLE_DEVICES'
			GROUP BY job.user_id
		), user_job_info AS (
			SELECT user_id,
				sum(gpu_seconds) AS gpu_seconds
			FROM jobs_info
			GROUP BY user_id
		)
		SELECT row_number() OVER (ORDER BY round(sum(user_job_info.gpu_seconds), 0) DESC) as rank,
			galaxy_group.id as group_id,
			$groupname,
			round(sum(user_job_info.gpu_seconds), 0) as gpu_seconds
		FROM user_job_info,
			galaxy_group,
			user_group_association
		WHERE user_job_info.user_id = user_group_association.user_id
			AND user_group_association.group_id = galaxy_group.id
			$where
		GROUP BY galaxy_group.id, galaxy_group.name
		ORDER BY round(sum(user_job_info.gpu_seconds), 0) DESC
		LIMIT 50
	EOF
}

query_monthly-users-registered(){ ## [year] [--by_group]: Number of users registered each month
	handle_help "$@" <<-EOF
		Number of users that registered each month. **NOTE**: Does not include anonymous users or users in no group.
		Parameters:
		--by_group: Will separate out registrations by galaxy user group as well
		year: Will return monthly user registrations from the start of [year] till now

		$ gxadmin query monthly-users 2020 --by_group
			month    | Group name | count
		 ------------+------------+-------
		  2020-08-01 | Group_1    |     1
		  2020-08-01 | Group_2    |     1
		  2020-08-01 | Group_3    |     1
		  2020-08-01 | Group_4    |     3
		  2020-07-01 | Group_1    |     1
		  2020-07-01 | Group_2    |     6
		  2020-07-01 | Group_3    |     2
		  2020-07-01 | Group_4    |     6
		  2020-07-01 | Group_5    |     2
		  2020-07-01 | Group_6    |     1
		  ...
	EOF

	if (( $# > 0 )); then
		for args in "$@"; do
			if [ "$args" = "--by_group" ]; then
				where_g="galaxy_user.id = user_group_association.user_id and galaxy_group.id = user_group_association.group_id"
				select="galaxy_group.name,"
				from="galaxy_group, user_group_association,"
				group=", galaxy_group.name"
			else
				where_y="date_trunc('year', galaxy_user.create_time AT TIME ZONE 'UTC') = '$args-01-01'::date"
			fi
		done
		if (( $# > 1 )); then
			where="WHERE $where_y and $where_g"
		else
			where="WHERE $where_y $where_g"
		fi
	fi

	read -r -d '' QUERY <<-EOF
		SELECT
			date_trunc('month', galaxy_user.create_time)::DATE AS month,
			$select
			count(*)
		FROM
			$from
			galaxy_user
		$where
		GROUP BY
			month
			$group
		ORDER BY
			month DESC
	EOF
}

query_monthly-users-active(){ ## [year] [--by_group]: Number of active users per month, running jobs
	handle_help "$@" <<-EOF
		Number of unique users each month who ran jobs. **NOTE**: does not include anonymous users.
		Parameters:
		--by_group: Separate out active users by galaxy user group
		year: Will return monthly active users from the start of [year] till now

		    $ gxadmin query monthly-users-active 2018
		       month    | active_users
		    ------------+--------------
		     2018-12-01 |          811
		     2018-11-01 |          658
		     2018-10-01 |          583
		     2018-09-01 |          444
		     2018-08-01 |          342
		     2018-07-01 |          379
		     2018-06-01 |          370
		     2018-05-01 |          330
		     2018-04-01 |          274
		     2018-03-01 |          186
		     2018-02-01 |          168
		     2018-01-01 |          122
	EOF

	if (( $# > 0 )); then
		for args in "$@"; do
			if [ "$args" = "--by_group" ]; then
				where_g="job.user_id = user_group_association.user_id and user_group_association.group_id = galaxy_group.id"
				select="galaxy_group.name,"
				from=", user_group_association, galaxy_group"
				group=", galaxy_group.name"
			else
				where_y="date_trunc('year', job.create_time AT TIME ZONE 'UTC') = '$args-01-01'::date"
			fi
		done
		if (( $# > 1 )); then
			where="WHERE $where_y and $where_g"
		else
			where="WHERE $where_y $where_g"
		fi
	fi

	read -r -d '' QUERY <<-EOF
		SELECT
			date_trunc('month', job.create_time AT TIME ZONE 'UTC')::date as month,
			$select
			count(distinct job.user_id) as active_users
		FROM
			job
			$from
		$where
		GROUP BY
			month
			$group
		ORDER BY
			month DESC
	EOF
}

query_monthly-jobs(){ ## [year] [--by_group]: Number of jobs run each month
	handle_help "$@" <<-EOF
		Count jobs run each month
		Parameters:
		--by_group: Will separate out job counts for each month by galaxy user group
		year: Will return number of monthly jobs run from the start of [year] till now
		    $ gxadmin query monthly-jobs 2018
		        month   | count
		    ------------+--------
		     2018-12-01 |  96941
		     2018-11-01 |  94625
		     2018-10-01 | 156940
		     2018-09-01 | 103331
		     2018-08-01 | 128658
		     2018-07-01 |  90852
		     2018-06-01 | 230470
		     2018-05-01 | 182331
		     2018-04-01 | 109032
		     2018-03-01 | 197125
		     2018-02-01 | 260931
		     2018-01-01 |  25378
	EOF

	if (( $# > 0 )); then
		for args in "$@"; do
			if [ "$args" = "--by_group" ]; then
				where_g="job.user_id = user_group_association.user_id and galaxy_group.id = user_group_association.group_id"
				select="galaxy_group.name,"
				from="galaxy_group, user_group_association,"
				group=", galaxy_group.name"
			else
				where_y="date_trunc('year', job.create_time AT TIME ZONE 'UTC') = '$args-01-01'::date"
			fi
		done
		if (( $# > 1 )); then
			where="WHERE $where_y and $where_g"
		else
			where="WHERE $where_y $where_g"
		fi
	fi

	read -r -d '' QUERY <<-EOF
		SELECT
			date_trunc('month', job.create_time AT TIME ZONE 'UTC')::DATE AS month,
			$select
			count(*)
		FROM
			$from
			job
		$where
		GROUP BY
			month
			$group
		ORDER BY
			month DESC
	EOF
}

query_total-jobs(){ ## : Total number of jobs run by galaxy instance
	handle_help "$@" <<-EOF
		Count total number of jobs

		    $ gxadmin query total-jobs
		      state  | count
		    ---------+-------
		     deleted |    21
		     error   |   197
		     ok      |   798
		    (3 rows)
	EOF

	fields="count=1"
	tags="state=0"

	read -r -d '' QUERY <<-EOF
		SELECT
			state, count(*)
		FROM
			job

		GROUP BY
			state
		ORDER BY
			state
	EOF

}

query_job-history() { ##? <id>: Job state history for a specific job
	handle_help "$@" <<-EOF
		    $ gxadmin query job-history 1
		                 time              | state
		    -------------------------------+--------
		     2018-11-20 17:15:09.297907+00 | error
		     2018-11-20 17:15:08.911972+00 | queued
		     2018-11-20 17:15:08.243363+00 | new
		     2018-11-20 17:15:08.198301+00 | upload
		     2018-11-20 17:15:08.19655+00  | new
		    (5 rows)
	EOF

	read -r -d '' QUERY <<-EOF
			SELECT
				create_time AT TIME ZONE 'UTC' as time,
				state
			FROM job_state_history
			WHERE job_id = $arg_id
	EOF
}

query_job-inputs() { ##? <id>: Input datasets to a specific job
	handle_help "$@" <<-EOF
	EOF

	read -r -d '' QUERY <<-EOF
			SELECT
				hda.id AS hda_id,
				hda.state AS hda_state,
				hda.deleted AS hda_deleted,
				hda.purged AS hda_purged,
				d.id AS d_id,
				d.state AS d_state,
				d.deleted AS d_deleted,
				d.purged AS d_purged,
				d.object_store_id AS object_store_id
			FROM job j
				JOIN job_to_input_dataset jtid
					ON j.id = jtid.job_id
				JOIN history_dataset_association hda
					ON hda.id = jtid.dataset_id
				JOIN dataset d
					ON hda.dataset_id = d.id
			WHERE j.id = $arg_id
	EOF
}

query_job-outputs() { ##? <id>: Output datasets from a specific job
	handle_help "$@" <<-EOF
	EOF

	read -r -d '' QUERY <<-EOF
			SELECT
				hda.id AS hda_id,
				hda.state AS hda_state,
				hda.deleted AS hda_deleted,
				hda.purged AS hda_purged,
				d.id AS d_id,
				d.state AS d_state,
				d.deleted AS d_deleted,
				d.purged AS d_purged,
				d.object_store_id AS object_store_id
			FROM job j
				JOIN job_to_output_dataset jtod
					ON j.id = jtod.job_id
				JOIN history_dataset_association hda
					ON hda.id = jtod.dataset_id
				JOIN dataset d
					ON hda.dataset_id = d.id
			WHERE j.id = $arg_id
	EOF
}

query_job-info() { ## <-|job_id [job_id [...]]> : Retrieve information about jobs given some job IDs
	handle_help "$@" <<-EOF
		Retrieves information on a job, like the host it ran on,
		how long it ran for and the total memory.

		id    | create_time  | update_time |  tool_id     |   hostname   | handler  | runtime_seconds | memtotal
		----- | ------------ | ----------- | ------------ | ------------ | -------- | --------------- | --------
		1     |              |             | 123f911b5f1  | 123f911b5f1  | handler0 |          20.35  | 20.35 GB
		2     |              |             | cb0fabc0002  | cb0fabc0002  | handler1 |          14.93  |  5.96 GB
		3     |              |             | 7e9e9b00b89  | 7e9e9b00b89  | handler1 |          14.24  |  3.53 GB
		4     |              |             | 42f211e5e87  | 42f211e5e87  | handler4 |          14.06  |  1.79 GB
		5     |              |             | 26cdba62c93  | 26cdba62c93  | handler0 |          12.97  |  1.21 GB
		6     |              |             | fa87cddfcae  | fa87cddfcae  | handler1 |           7.01  |   987 MB
		7     |              |             | 44d2a648aac  | 44d2a648aac  | handler3 |           6.70  |   900 MB
		8     |              |             | 66c57b41194  | 66c57b41194  | handler1 |           6.43  |   500 MB
		9     |              |             | 6b1467ac118  | 6b1467ac118  | handler0 |           5.45  |   250 MB
		10    |              |             | d755361b59a  | d755361b59a  | handler2 |           5.19  |   100 MB
	EOF

	assert_count_ge $# 1 "Missing job IDs"

	if [[ "$1" == "-" ]]; then
		# read jobs from stdin
		job_ids=$(cat | paste -s -d' ')
	else
		# read from $@
		job_ids=$@;
	fi

	job_ids_string=$(join_by ',' ${job_ids[@]})

	read -r -d '' QUERY <<-EOF
		WITH hostname_query AS (
			SELECT job_id,
				metric_value as hostname
			FROM job_metric_text
			WHERE job_id IN ($job_ids_string)
				AND metric_name='HOSTNAME'
		),
		metric_num_query AS (
			SELECT job_id,
				SUM(CASE WHEN metric_name='runtime_seconds' THEN metric_value END) runtime_seconds,
				pg_size_pretty(SUM(CASE WHEN metric_name='memtotal' THEN metric_value END)) memtotal
			FROM job_metric_numeric
			WHERE job_id IN ($job_ids_string)
				AND metric_name IN ('runtime_seconds', 'memtotal')
			GROUP BY job_id
		)

		SELECT job.id,
			job.create_time,
			job.update_time,
			job.tool_id,
			job.handler,
			hostname_query.hostname,
			metric_num_query.runtime_seconds,
			metric_num_query.memtotal
		FROM job
			FULL OUTER JOIN hostname_query ON hostname_query.job_id = job.id
			FULL OUTER JOIN metric_num_query ON metric_num_query.job_id = job.id
		WHERE job.id IN ($job_ids_string)
	EOF
}

query_old-histories(){ ##? <weeks>: Lists histories that haven't been updated (used) for <weeks>
	handle_help "$@" <<-EOF
		Histories and their users who haven't been updated for a specified number of weeks. Default number of weeks is 15.

		    $gxadmin query old-histories 52
		      id   |        update_time         | user_id |  email  |       name         | published | deleted | purged | hid_counter
		    -------+----------------------------+---------+---------+--------------------+-----------+---------+--------+-------------
		     39903 | 2017-06-13 12:35:07.174749 |     834 | xxx@xxx | Unnamed history    | f         | f       | f      |          23
		      1674 | 2017-06-13 14:08:30.017574 |       9 | xxx@xxx | SAHA project       | f         | f       | f      |          47
		     40088 | 2017-06-15 04:10:48.879122 |     986 | xxx@xxx | Unnamed history    | f         | f       | f      |           3
		     39023 | 2017-06-15 09:33:12.007002 |     849 | xxx@xxx | prac 4 new final   | f         | f       | f      |         297
		     35437 | 2017-06-16 04:41:13.15785  |     731 | xxx@xxx | Unnamed history    | f         | f       | f      |          98
		     40123 | 2017-06-16 13:43:24.948344 |     987 | xxx@xxx | Unnamed history    | f         | f       | f      |          22
		     40050 | 2017-06-19 00:46:29.032462 |     193 | xxx@xxx | Telmatactis        | f         | f       | f      |          74
		     12212 | 2017-06-20 14:41:03.762881 |     169 | xxx@xxx | Unnamed history    | f         | f       | f      |          24
		     39523 | 2017-06-21 01:34:52.226653 |       9 | xxx@xxx | OSCC Cell Lines    | f         | f       | f      |         139
	EOF

	email=$(gdpr_safe galaxy_user.email 'email')

	read -r -d '' QUERY <<-EOF
		SELECT
			history.id,
			history.update_time AT TIME ZONE 'UTC' as update_time,
			history.user_id,
			$email,
			history.name,
			history.published,
			history.deleted,
			history.purged,
			history.hid_counter
		FROM
			history,
			galaxy_user
		WHERE
			history.update_time < (now() AT TIME ZONE 'UTC' - '$arg_weeks weeks'::interval) AND
			history.user_id = galaxy_user.id AND
			history.deleted = FALSE AND
			history.published = FALSE
		ORDER BY
			history.update_time desc
	EOF
}

# TODO(hxr): generic summation by metric? Leave math to consumer?
query_jobs-max-by-cpu-hours() { ## : Top 10 jobs by CPU hours consumed (requires CGroups metrics)
	handle_help "$@" <<-EOF
	EOF

	read -r -d '' QUERY <<-EOF
		SELECT
			job.id,
			job.tool_id,
			job.create_time,
			metric_value/1000000000/3600/24 as cpu_days
		FROM job, job_metric_numeric
		WHERE
			job.id = job_metric_numeric.job_id
			AND metric_name = 'cpuacct.usage'
		ORDER BY cpu_hours desc
		LIMIT 30
	EOF
}

query_errored-jobs(){ ##? <hours> [--details]: Lists jobs that errored in the last N hours.
	handle_help "$@" <<-EOF
		Lists details of jobs that have status = 'error' for the specified number of hours. Default = 24 hours

		    $ gxadmin query errored-jobs 2
		     id | create_time | tool_id | tool_version | handler  | destination_id | job_runner_external_id |      email
		    ----+-------------+---------+--------------+----------+----------------+------------------------+------------------
		      1 |             | upload1 | 1.1.0        | handler2 | slurm_normal   | 42                     | user@example.org
		      2 |             | cut1    | 1.1.1        | handler1 | slurm_normal   | 43                     | user@example.org
		      3 |             | bwa     | 0.7.17.1     | handler0 | slurm_multi    | 44                     | map@example.org
		      4 |             | trinity | 2.9.1        | handler1 | pulsar_bigmem  | 4                      | rna@example.org


	EOF

	email=$(gdpr_safe galaxy_user.email 'email')

	details=
	if [[ -n "$arg_details" ]]; then
		details="job.job_stderr,"
	fi

	read -r -d '' QUERY <<-EOF
		SELECT
			job.id,
			job.create_time AT TIME ZONE 'UTC' as create_time,
			job.tool_id,
			job.tool_version,
			job.handler,
			job.destination_id,
			job.job_runner_external_id,
			$details
			$email
		FROM
			job,
			galaxy_user
		WHERE
			job.create_time >= (now() AT TIME ZONE 'UTC' - '$arg_hours hours'::interval) AND
			job.state = 'error' AND
			job.user_id = galaxy_user.id
		ORDER BY
			job.id
	EOF
}


query_workflow-invocation-status() { ## : Report on how many workflows are in new state by handler
	handle_help "$@" <<-EOF
		Really only intended to be used in influx queries.
	EOF

	fields="count=3"
	tags="scheduler=0;handler=1;state=2"

	read -r -d '' QUERY <<-EOF
		SELECT
			COALESCE(scheduler, 'none'),
			COALESCE(handler, 'none'),
			state,
			count(*)
		FROM
			workflow_invocation
		WHERE state in ('new', 'ready')
		GROUP BY handler, scheduler, state
	EOF
}


query_workflow-invocation-totals() { ## : Report on overall workflow counts, to ensure throughput
	handle_help "$@" <<-EOF
		Really only intended to be used in influx queries.
	EOF

	fields="count=1"
	tags="state=0"

	read -r -d '' QUERY <<-EOF
		SELECT
			COALESCE(state, 'unknown'), count(*)
		FROM
			workflow_invocation
		GROUP BY state
	EOF
}

query_tool-new-errors() { ##? [weeks|4] [--short-tool-id]: Summarize percent of tool runs in error over the past weeks for "new tools"
	handle_help "$@" <<-EOF
		See jobs-in-error summary for recent tools (tools whose first execution is in recent weeks).

		    $ gxadmin query tool-errors --short-tool-id 1
		        tool_id                        | tool_runs |  percent_errored  | percent_failed | count_errored | count_failed |     handler
		    -----------------------------------+-----------+-------------------+----------------+---------------+--------------+-----------------
		     rnateam/graphclust_align_cluster/ |        55 | 0.145454545454545 |              0 |             8 |            0 | handler_main_10
		     iuc/rgrnastar/rna_star/2.6.0b-2   |        46 | 0.347826086956522 |              0 |            16 |            0 | handler_main_3
		     iuc/rgrnastar/rna_star/2.6.0b-2   |        43 | 0.186046511627907 |              0 |             8 |            0 | handler_main_0
		     iuc/rgrnastar/rna_star/2.6.0b-2   |        41 | 0.390243902439024 |              0 |            16 |            0 | handler_main_4
		     iuc/rgrnastar/rna_star/2.6.0b-2   |        40 |             0.325 |              0 |            13 |            0 | handler_main_6
		     Filter1                           |        40 |             0.125 |              0 |             5 |            0 | handler_main_0
		     devteam/bowtie2/bowtie2/2.3.4.3   |        40 |             0.125 |              0 |             5 |            0 | handler_main_7
		     iuc/rgrnastar/rna_star/2.6.0b-2   |        40 |               0.3 |              0 |            12 |            0 | handler_main_2
	EOF

	tool_id="j.tool_id"
	if [[ -n $arg_short_tool_id ]]; then
		tool_id="regexp_replace(j.tool_id, '.*toolshed.*/repos/', '') as tool_id"
	fi

	fields="tool_runs=1;percent_errored=2;percent_failed=3;count_errored=4;count_failed=5"
	tags="tool_id=0;handler=6"

	read -r -d '' QUERY <<-EOF
		SELECT
			$tool_id,
			count(*) AS tool_runs,
			sum(CASE WHEN j.state = 'error'  THEN 1 ELSE 0 END)::float / count(*) AS percent_errored,
			sum(CASE WHEN j.state = 'failed' THEN 1 ELSE 0 END)::float / count(*) AS percent_failed,
			sum(CASE WHEN j.state = 'error'  THEN 1 ELSE 0 END) AS count_errored,
			sum(CASE WHEN j.state = 'failed' THEN 1 ELSE 0 END) AS count_failed,
			j.handler
		FROM job AS j
		WHERE
			j.tool_id
			IN (
					SELECT tool_id
					FROM job AS j
					WHERE j.create_time > (now() - '$arg_weeks weeks'::INTERVAL)
					GROUP BY j.tool_id
				)
		GROUP BY j.tool_id, j.handler
		ORDER BY percent_failed_errored DESC
	EOF
}

query_tool-errors() { ##? [--short-tool-id] [weeks|4]: Summarize percent of tool runs in error over the past weeks for all tools that have failed (most popular tools first)
	handle_help "$@" <<-EOF
		See jobs-in-error summary for recently executed tools that have failed at least 10% of the time.

		    $ gxadmin query tool-errors --short-tool-id 1
		        tool_id                        | tool_runs |  percent_errored  | percent_failed | count_errored | count_failed |     handler
		    -----------------------------------+-----------+-------------------+----------------+---------------+--------------+-----------------
		     rnateam/graphclust_align_cluster/ |        55 | 0.145454545454545 |              0 |             8 |            0 | handler_main_10
		     iuc/rgrnastar/rna_star/2.6.0b-2   |        46 | 0.347826086956522 |              0 |            16 |            0 | handler_main_3
		     iuc/rgrnastar/rna_star/2.6.0b-2   |        43 | 0.186046511627907 |              0 |             8 |            0 | handler_main_0
		     iuc/rgrnastar/rna_star/2.6.0b-2   |        41 | 0.390243902439024 |              0 |            16 |            0 | handler_main_4
		     iuc/rgrnastar/rna_star/2.6.0b-2   |        40 |             0.325 |              0 |            13 |            0 | handler_main_6
		     Filter1                           |        40 |             0.125 |              0 |             5 |            0 | handler_main_0
		     devteam/bowtie2/bowtie2/2.3.4.3   |        40 |             0.125 |              0 |             5 |            0 | handler_main_7
		     iuc/rgrnastar/rna_star/2.6.0b-2   |        40 |               0.3 |              0 |            12 |            0 | handler_main_2
	EOF

	# TODO: Fix this nonsense for proper args
	tool_id="j.tool_id"
	if [[ -n $arg_short_tool_id ]]; then
		tool_id="regexp_replace(j.tool_id, '.*toolshed.*/repos/', '') as tool_id"
	fi

	fields="tool_runs=1;percent_errored=2;percent_failed=3;count_errored=4;count_failed=5"
	tags="tool_id=0;handler=6"

	read -r -d '' QUERY <<-EOF
		SELECT
			$tool_id,
			count(*) AS tool_runs,
			sum(CASE WHEN j.state = 'error'  THEN 1 ELSE 0 END)::float / count(*) AS percent_errored,
			sum(CASE WHEN j.state = 'failed' THEN 1 ELSE 0 END)::float / count(*) AS percent_failed,
			sum(CASE WHEN j.state = 'error'  THEN 1 ELSE 0 END) AS count_errored,
			sum(CASE WHEN j.state = 'failed' THEN 1 ELSE 0 END) AS count_failed,
			j.handler
		FROM
			job AS j
		WHERE
			j.create_time > (now() - '$arg_weeks weeks'::INTERVAL)
		GROUP BY
			j.tool_id, j.handler
		HAVING
			sum(CASE WHEN j.state IN ('error', 'failed') THEN 1 ELSE 0 END) * 100.0 / count(*) > 10.0
		ORDER BY
			tool_runs DESC
	EOF
}

query_tool-likely-broken() { ##? [--short-tool-id] [weeks|4]: Find tools that have been executed in recent weeks that are (or were due to job running) likely substantially broken
	handle_help "$@" <<-EOF
		This runs an identical query to tool-errors, except filtering for tools
		which were run more than 4 times, and have a failure rate over 95%.

		                             tool_id                       | tool_runs | percent_errored | percent_failed | count_errored | count_failed |     handler
		    -------------------------------------------------------+-----------+-----------------+----------------+---------------+--------------+-----------------
		     simon-gladman/velvetoptimiser/velvetoptimiser/2.2.6   |        14 |               1 |              0 |            14 |            0 | handler_main_7
		     bgruening/hicexplorer_hicplottads/hicexplorer_hicplott|         9 |               1 |              0 |             9 |            0 | handler_main_0
		     bgruening/text_processing/tp_replace_in_column/1.1.3  |         8 |               1 |              0 |             8 |            0 | handler_main_3
		     bgruening/text_processing/tp_awk_tool/1.1.1           |         7 |               1 |              0 |             7 |            0 | handler_main_5
		     rnateam/dorina/dorina_search/1.0.0                    |         7 |               1 |              0 |             7 |            0 | handler_main_2
		     bgruening/text_processing/tp_replace_in_column/1.1.3  |         6 |               1 |              0 |             6 |            0 | handler_main_9
		     rnateam/dorina/dorina_search/1.0.0                    |         6 |               1 |              0 |             6 |            0 | handler_main_11
		     rnateam/dorina/dorina_search/1.0.0                    |         6 |               1 |              0 |             6 |            0 | handler_main_8
	EOF

	# TODO: Fix this nonsense for proper args
	tool_id="j.tool_id"
	if [[ -n $arg_short_tool_id ]]; then
		tool_id="regexp_replace(j.tool_id, '.*toolshed.*/repos/', '') as tool_id"
	fi

	fields="tool_runs=1;percent_errored=2;percent_failed=3;count_errored=4;count_failed=5"
	tags="tool_id=0;handler=6"

	read -r -d '' QUERY <<-EOF
		SELECT
			$tool_id,
			count(*) AS tool_runs,
			sum(CASE WHEN j.state = 'error'  THEN 1 ELSE 0 END)::float / count(*) AS percent_errored,
			sum(CASE WHEN j.state = 'failed' THEN 1 ELSE 0 END)::float / count(*) AS percent_failed,
			sum(CASE WHEN j.state = 'error'  THEN 1 ELSE 0 END) AS count_errored,
			sum(CASE WHEN j.state = 'failed' THEN 1 ELSE 0 END) AS count_failed,
			j.handler
		FROM
			job AS j
		WHERE
			j.create_time > (now() - '$arg_weeks weeks'::INTERVAL)
		GROUP BY
			j.tool_id, j.handler
		HAVING
			sum(CASE WHEN j.state IN ('error', 'failed') THEN 1 ELSE 0 END) * 100.0 / count(*) > 95.0
			AND count(*) > 4
		ORDER BY
			tool_runs DESC
	EOF
}

query_user-recent-aggregate-jobs() { ##? <user> [days|7]: Show aggregate information for jobs in past N days for user (by email/id/username)
	handle_help "$@" <<-EOF
		Obtain an overview of tools that a user has run in the past N days
	EOF

	# args
	user_filter=$(get_user_filter "$arg_user")

	read -r -d '' QUERY <<-EOF
		SELECT
			date_trunc('day', create_time), tool_id, state, count(*)
		FROM
			job
		JOIN
			galaxy_user on galaxy_user.id = job.user_id
		WHERE
			$user_filter AND create_time > (now() - '$arg_days days'::INTERVAL)
		GROUP BY
			date_trunc, tool_id, state
		ORDER BY
			date_trunc DESC
	EOF
}

query_user-history-list() { ##? <user> [--size]: List a user's (by email/id/username) histories.
	handle_help "$@" <<-EOF
		Obtain an overview of histories of a user. By default orders the histories by date.
		When using '--size' it overrides the order to size.

		$ gxadmin query user-history-list <username|id|email>
		  ID   |                 Name                 |        Last Updated        |   Size
		-------+--------------------------------------+----------------------------+-----------
			52 | Unnamed history                      | 2019-08-08 15:15:32.284678 | 293 MB
		 30906 | Unnamed history                      | 2019-07-23 16:25:36.084019 | 13 kB
	EOF

	# args
	user_filter=$(get_user_filter "$arg_user")
	order_col="uh.update_time"
	if [[ -n "$arg_size" ]]; then
		order_col="hs.hist_size"
	fi

	read -r -d '' QUERY <<-EOF
		WITH user_histories AS (
			SELECT id,
				name,
				update_time
			FROM history
			WHERE user_id IN (
				SELECT id
				FROM galaxy_user
				WHERE $user_filter
			) AND NOT purged
		), history_sizes AS (
			SELECT history_id,
				sum(coalesce(dataset.total_size, dataset.file_size, 0)) as "hist_size"
			FROM history_dataset_association,
				dataset
			WHERE history_id IN (
				SELECT id
				FROM user_histories
			) AND history_dataset_association.dataset_id = dataset.id
			GROUP BY history_id
		)
		SELECT uh.id as "ID",
			uh.name as "Name",
			uh.update_time as "Last Updated",
			pg_size_pretty(hs.hist_size) as "Size"
		FROM user_histories uh,
			history_sizes hs
		WHERE uh.id = hs.history_id
		ORDER BY $order_col DESC
	EOF
}

query_history-contents() { ## <history_id> [--dataset|--collection]: List datasets and/or collections in a history
	handle_help "$@" <<-EOF
		Obtain an overview of tools that a user has run in the past N days
	EOF
	local dsq clq q

	dsq="select dataset_id, name, hid, visible, deleted, copied_from_history_dataset_association_id as copied_from from history_dataset_association where history_id = $1"
	clq="select collection_id, name, hid, visible, deleted, copied_from_history_dataset_collection_association_id as copied_from from history_dataset_collection_association where history_id = $1;"

	if [[ $2 == "--dataset" ]] || [[ $2 == "--datasets" ]]; then
			q="$dsq"
	elif [[ $2 == "--collection" ]] || [[ $2 == "--collections" ]]; then
			q="$clq"
	else
			q="$dsq;$clq"
	fi

	read -r -d '' QUERY <<-EOF
			$q
	EOF
}

query_hdca-info() { ##? <hdca_id>: Information on a dataset collection
	handle_help "$@" <<-EOF
	EOF

	read -r -d '' QUERY <<-EOF
		SELECT *
		FROM dataset_collection
		WHERE id = $arg_hdca_id
	EOF
}

query_hdca-datasets() { ##? <hdca_id>: List of files in a dataset collection
	handle_help "$@" <<-EOF
	EOF

	read -r -d '' QUERY <<-EOF
		SELECT element_index, hda_id, ldda_id, child_collection_id, element_identifier
		FROM dataset_collection_element
		WHERE dataset_collection_id = $arg_hdca_id
		ORDER by element_index asc
	EOF
}

query_jobs-queued-internal-by-handler() { ## : How many queued jobs do not have external IDs, by handler
	handle_help "$@" <<-EOF
		Identify which handlers have a backlog of jobs which should be
		receiving external cluster IDs but have not yet.

		handler          | count
		---------------- + ------
		handler_main_0   |    14
		handler_main_1   |     4
		handler_main_10  |    13
		handler_main_2   |    11
		handler_main_3   |    14
		handler_main_4   |    12
		handler_main_5   |     9
		handler_main_6   |     7
		handler_main_7   |    13
		handler_main_8   |     9
		handler_main_9   |    14
	EOF

	fields="count=1"
	tags="handler=0"

	read -r -d '' QUERY <<-EOF
		SELECT
			handler,
			count(handler)
		FROM
			job
		WHERE
			state = 'queued'
			AND job_runner_external_id IS null
		GROUP BY
			handler
	EOF
}


query_jobs-queued() { ## : How many queued jobs have external cluster IDs
	handle_help "$@" <<-EOF
		Shows the distribution of jobs in queued state, whether or not they have received an external ID.


		n            | count
		------------ | ------
		unprocessed  |   118
		processed    |    37
	EOF

	fields="count=1"
	tags="group=0"

	read -r -d '' QUERY <<-EOF
		SELECT
			CASE WHEN job_runner_external_id IS NOT null THEN 'processed' ELSE 'unprocessed' END as n,
			count(*)
		FROM
			job
		WHERE
			state = 'queued'
		GROUP BY n
	EOF
}

query_users-with-oidc() { ## : How many users logged in with OIDC
	handle_help "$@" <<-EOF
		provider | count
		-------- | ------
		elixir   |     5
	EOF

	fields="count=1"
	tags="provider=0"

	read -r -d '' QUERY <<-EOF
		SELECT provider, count(distinct user_id) FROM oidc_user_authnz_tokens GROUP BY provider
	EOF
}

query_history-runtime-system() { ##? <history_id>: Sum of runtimes by all jobs in a history
	handle_help "$@" <<-EOF
	EOF

	read -r -d '' QUERY <<-EOF
		SELECT
			(sum(job_metric_numeric.metric_value)::INT8 || 'seconds')::INTERVAL
		FROM
			job LEFT JOIN job_metric_numeric ON job.id = job_metric_numeric.job_id
		WHERE
			job.history_id = $arg_history_id AND metric_name = 'runtime_seconds'
	EOF
}

query_history-runtime-wallclock() { ##? <history_id>: Time as elapsed by a clock on the wall
	handle_help "$@" <<-EOF
	EOF

	read -r -d '' QUERY <<-EOF
		SELECT
			max(job.update_time) - min(job.create_time)
		FROM
			job
		WHERE
			job.history_id = $arg_history_id
	EOF
}

query_history-runtime-system-by-tool() { ##? <history_id>: Sum of runtimes by all jobs in a history, split by tool
	handle_help "$@" <<-EOF
	EOF

	read -r -d '' QUERY <<-EOF
		SELECT
			job.tool_id,
			(sum(job_metric_numeric.metric_value)::INT || 'seconds')::INTERVAL
		FROM
			job LEFT JOIN job_metric_numeric ON job.id = job_metric_numeric.job_id
		WHERE
			job.history_id = $arg_history_id AND metric_name = 'runtime_seconds'
		GROUP BY
			job.tool_id
		ORDER BY
			"interval" DESC
	EOF
}

query_upload-gb-in-past-hour() { ##? [hours|1]: Sum in bytes of files uploaded in the past hour
	handle_help "$@" <<-EOF
		Quick output, mostly useful for graphing, to produce a nice graph of how heavily are people uploading currently.
	EOF

	fields="count=0"
	tags="hours=1"

	read -r -d '' QUERY <<-EOF
		SELECT
			coalesce(sum(coalesce(dataset.total_size, coalesce(dataset.file_size, 0))), 0),
			$arg_hours as hours
		FROM
			job
			LEFT JOIN job_to_output_dataset ON job.id = job_to_output_dataset.job_id
			LEFT JOIN history_dataset_association ON
					job_to_output_dataset.dataset_id = history_dataset_association.id
			LEFT JOIN dataset ON history_dataset_association.dataset_id = dataset.id
		WHERE
			job.tool_id = 'upload1'
			AND job.create_time AT TIME ZONE 'UTC' > (now() - '$arg_hours hours'::INTERVAL)
	EOF
}

query_queue-detail-by-handler() { ##? <handler_id>: List jobs for a specific handler
	handle_help "$@" <<-EOF
		List the jobs currently being processed by a specific handler
	EOF

	read -r -d '' QUERY <<-EOF
		SELECT
			id,
			create_time,
			state,
			regexp_replace(tool_id, '.*toolshed.*/repos/', ''),
			job_runner_name,
			job_runner_external_id,
			destination_id
		FROM
			job
		WHERE
			handler = '$arg_handler_id' AND state IN ('new', 'queued', 'running')
	EOF
}


query_pg-cache-hit() { ## : Check postgres in-memory cache hit ratio
	handle_help "$@" <<-EOF
		Query from: https://www.citusdata.com/blog/2019/03/29/health-checks-for-your-postgres-database/

		Tells you about the cache hit ratio, is Postgres managing to store
		commonly requested objects in memory or are they being loaded every
		time?

		heap_read  | heap_hit |         ratio
		----------- ---------- ------------------------
		29         |    64445 | 0.99955020628470391165
	EOF

	fields="read=0;hit=1;ratio=2"
	tags=""

	read -r -d '' QUERY <<-EOF
		SELECT
			sum(heap_blks_read) as heap_read,
			sum(heap_blks_hit)  as heap_hit,
			sum(heap_blks_hit) / (sum(heap_blks_hit) + sum(heap_blks_read)) as ratio
		FROM
			pg_statio_user_tables
	EOF

}

query_pg-table-bloat() { ##? [--human]: show table and index bloat in your database ordered by most wasteful
	handle_help "$@" <<-EOF
		Query from: https://www.citusdata.com/blog/2019/03/29/health-checks-for-your-postgres-database/
		Originally from: https://github.com/heroku/heroku-pg-extras/tree/master/commands
	EOF

	if [[ -n "$arg_human" ]]; then
		waste_query="pg_size_pretty(raw_waste)"
	else
		waste_query="raw_waste"
	fi

	fields="bloat=3;ratio=4"
	tags="type=0;schema=1;object=2"

	read -r -d '' QUERY <<-EOF
		WITH constants AS (
			SELECT current_setting('block_size')::numeric AS bs, 23 AS hdr, 4 AS ma
		), bloat_info AS (
			SELECT
				ma,bs,schemaname,tablename,
				(datawidth+(hdr+ma-(case when hdr%ma=0 THEN ma ELSE hdr%ma END)))::numeric AS datahdr,
				(maxfracsum*(nullhdr+ma-(case when nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2
			FROM (
				SELECT
					schemaname, tablename, hdr, ma, bs,
					SUM((1-null_frac)*avg_width) AS datawidth,
					MAX(null_frac) AS maxfracsum,
					hdr+(
						SELECT 1+count(*)/8
						FROM pg_stats s2
						WHERE null_frac<>0 AND s2.schemaname = s.schemaname AND s2.tablename = s.tablename
					) AS nullhdr
				FROM pg_stats s, constants
				GROUP BY 1,2,3,4,5
			) AS foo
		), table_bloat AS (
			SELECT
				schemaname, tablename, cc.relpages, bs,
				CEIL((cc.reltuples*((datahdr+ma-
					(CASE WHEN datahdr%ma=0 THEN ma ELSE datahdr%ma END))+nullhdr2+4))/(bs-20::float)) AS otta
			FROM bloat_info
			JOIN pg_class cc ON cc.relname = bloat_info.tablename
			JOIN pg_namespace nn ON cc.relnamespace = nn.oid AND nn.nspname = bloat_info.schemaname AND nn.nspname <> 'information_schema'
		), index_bloat AS (
			SELECT
				schemaname, tablename, bs,
				coalesce(c2.relname,'?') AS iname, COALESCE(c2.reltuples,0) AS ituples, c2.relpages,0 AS ipages,
				COALESCE(CEIL((c2.reltuples*(datahdr-12))/(bs-20::float)),0) AS iotta -- very rough approximation, assumes all cols
			FROM bloat_info
			JOIN pg_class cc ON cc.relname = bloat_info.tablename
			JOIN pg_namespace nn ON cc.relnamespace = nn.oid AND nn.nspname = bloat_info.schemaname AND nn.nspname <> 'information_schema'
			JOIN pg_index i ON indrelid = cc.oid
			JOIN pg_class c2 ON c2.oid = i.indexrelid
		)
		SELECT
			type, schemaname, object_name, bloat, $waste_query as waste
		FROM
		(SELECT
			'table' as type,
			schemaname,
			tablename as object_name,
			ROUND(CASE WHEN otta=0 THEN 0.0 ELSE table_bloat.relpages/otta::numeric END,1) AS bloat,
			CASE WHEN relpages < otta THEN '0' ELSE (bs*(table_bloat.relpages-otta)::bigint)::bigint END AS raw_waste
		FROM
			table_bloat
				UNION
		SELECT
			'index' as type,
			schemaname,
			tablename || '::' || iname as object_name,
			ROUND(CASE WHEN iotta=0 OR ipages=0 THEN 0.0 ELSE ipages/iotta::numeric END,1) AS bloat,
			CASE WHEN ipages < iotta THEN '0' ELSE (bs*(ipages-iotta))::bigint END AS raw_waste
		FROM
			index_bloat) bloat_summary
		ORDER BY raw_waste DESC, bloat DESC
	EOF
}

query_pg-mandelbrot() { ## : show the mandlebrot set
	handle_help "$@" <<-EOF
		Copied from: https://github.com/heroku/heroku-pg-extras/tree/master/commands
	EOF

	read -r -d '' QUERY <<-EOF
		WITH RECURSIVE Z(IX, IY, CX, CY, X, Y, I) AS (
				SELECT IX, IY, X::float, Y::float, X::float, Y::float, 0
				FROM (select -2.2 + 0.031 * i, i from generate_series(0,101) as i) as xgen(x,ix),
					 (select -1.5 + 0.031 * i, i from generate_series(0,101) as i) as ygen(y,iy)
				UNION ALL
				SELECT IX, IY, CX, CY, X * X - Y * Y + CX AS X, Y * X * 2 + CY, I + 1
				FROM Z
				WHERE X * X + Y * Y < 16::float
				AND I < 100
		)
		SELECT array_to_string(array_agg(SUBSTRING(' .,,,-----++++%%%%@@@@#### ', LEAST(GREATEST(I,1),27), 1)),'')
		FROM (
			SELECT IX, IY, MAX(I) AS I
			FROM Z
			GROUP BY IY, IX
			ORDER BY IY, IX
		 ) AS ZT
		GROUP BY IY
		ORDER BY IY
	EOF
}

query_pg-index-usage() { ## : calculates your index hit rate (effective databases are at 99% and up)
	handle_help "$@" <<-EOF
		Originally from: https://github.com/heroku/heroku-pg-extras/tree/master/commands

		-1 means "Insufficient Data", this was changed to a numeric value to be acceptable to InfluxDB
	EOF

	fields="index_usage=1;rows=2"
	tags="relname=0"

	read -r -d '' QUERY <<-EOF
		SELECT relname,
			CASE COALESCE(idx_scan, 0)
				WHEN 0 THEN -1
				ELSE (100 * idx_scan / (seq_scan + idx_scan))
			END percent_of_times_index_used,
			n_live_tup rows_in_table
		 FROM
			pg_stat_user_tables
		ORDER BY
			n_live_tup DESC
	EOF
}

query_pg-index-size() { ##? [--human]: show table and index bloat in your database ordered by most wasteful
	handle_help "$@" <<-EOF
		Originally from: https://github.com/heroku/heroku-pg-extras/tree/master/commands
	EOF

	if [[ -n "$arg_human" ]]; then
		human_size="pg_size_pretty(sum(c.relpages::bigint*8192)::bigint)"
	else
		human_size="sum(c.relpages::bigint*8192)::bigint"
	fi

	fields="size=1"
	tags="relname=0"

	read -r -d '' QUERY <<-EOF
		SELECT
			c.relname AS name,
			$human_size AS size
		FROM pg_class c
		LEFT JOIN pg_namespace n ON (n.oid = c.relnamespace)
		WHERE n.nspname NOT IN ('pg_catalog', 'information_schema')
		AND n.nspname !~ '^pg_toast'
		AND c.relkind='i'
		GROUP BY c.relname
		ORDER BY sum(c.relpages) DESC
	EOF
}

query_pg-long-running-queries() { ## : show all queries longer than five minutes by descending duration
	handle_help "$@" <<-EOF
		Originally from: https://github.com/heroku/heroku-pg-extras/tree/master/commands
	EOF

	read -r -d '' QUERY <<-EOF
		SELECT
			pid,
			now() - pg_stat_activity.query_start AS duration,
			query AS query
		FROM
			pg_stat_activity
		WHERE
			pg_stat_activity.query <> ''::text
			AND state <> 'idle'
			AND now() - pg_stat_activity.query_start > interval '5 minutes'
		ORDER BY
			now() - pg_stat_activity.query_start DESC
	EOF

}

query_pg-table-size() { ##? [--human]: show the size of the tables (excluding indexes), descending by size
	handle_help "$@" <<-EOF
		Originally from: https://github.com/heroku/heroku-pg-extras/tree/master/commands
	EOF

	if [[ -n "$arg_human" ]]; then
		# TODO: there has got to be a less ugly way to do this
		human_size="pg_size_pretty("
		human_after=")"
	else
		human_size=""
		human_after=""
	fi

	fields="table_size=1;index_size=2"
	tags="relname=0"

	read -r -d '' QUERY <<-EOF
		SELECT
			c.relname AS name,
			${human_size}pg_table_size(c.oid)${human_after} AS size,
			${human_size}pg_indexes_size(c.oid)${human_after} AS index_size
		FROM pg_class c
		LEFT JOIN pg_namespace n ON (n.oid = c.relnamespace)
		WHERE n.nspname NOT IN ('pg_catalog', 'information_schema')
		AND n.nspname !~ '^pg_toast'
		AND c.relkind='r'
		ORDER BY pg_table_size(c.oid) DESC
	EOF
}

query_pg-unused-indexes() { ##? [--human]: show unused and almost unused indexes
	handle_help "$@" <<-EOF
		Originally from: https://github.com/heroku/heroku-pg-extras/tree/master/commands

		From their documentation:

		> "Ordered by their size relative to the number of index scans.
		> Exclude indexes of very small tables (less than 5 pages),
		> where the planner will almost invariably select a sequential scan,
		> but may not in the future as the table grows"
	EOF

	if [[ -n "$arg_human" ]]; then
		# TODO: there has got to be a less ugly way to do this
		human_size="pg_size_pretty("
		human_after=")"
	else
		human_size=""
		human_after=""
	fi

	fields="index_size=2;index_scans=3"
	tags="table=0;index=1"

	read -r -d '' QUERY <<-EOF
		SELECT
			schemaname || '.' || relname AS table,
			indexrelname AS index,
			${human_size}pg_relation_size(i.indexrelid)${human_after} AS index_size,
			COALESCE(idx_scan, 0) as index_scans
		FROM pg_stat_user_indexes ui
		JOIN pg_index i ON ui.indexrelid = i.indexrelid
		WHERE NOT indisunique AND idx_scan < 50 AND pg_relation_size(relid) > 5 * 8192
		ORDER BY
			pg_relation_size(i.indexrelid) / nullif(idx_scan, 0) DESC NULLS FIRST,
			pg_relation_size(i.indexrelid) DESC
	EOF
}

query_pg-vacuum-stats() { ## : show dead rows and whether an automatic vacuum is expected to be triggered
	handle_help "$@" <<-EOF
		Originally from: https://github.com/heroku/heroku-pg-extras/tree/master/commands
	EOF

	read -r -d '' QUERY <<-EOF
		WITH table_opts AS (
			SELECT
				pg_class.oid, relname, nspname, array_to_string(reloptions, '') AS relopts
			FROM
				 pg_class INNER JOIN pg_namespace ns ON relnamespace = ns.oid
		), vacuum_settings AS (
			SELECT
				oid, relname, nspname,
				CASE
					WHEN relopts LIKE '%autovacuum_vacuum_threshold%'
						THEN substring(relopts, '.*autovacuum_vacuum_threshold=([0-9.]+).*')::integer
						ELSE current_setting('autovacuum_vacuum_threshold')::integer
					END AS autovacuum_vacuum_threshold,
				CASE
					WHEN relopts LIKE '%autovacuum_vacuum_scale_factor%'
						THEN substring(relopts, '.*autovacuum_vacuum_scale_factor=([0-9.]+).*')::real
						ELSE current_setting('autovacuum_vacuum_scale_factor')::real
					END AS autovacuum_vacuum_scale_factor
			FROM
				table_opts
		)
		SELECT
			vacuum_settings.nspname AS schema,
			vacuum_settings.relname AS table,
			to_char(psut.last_vacuum, 'YYYY-MM-DD HH24:MI') AS last_vacuum,
			to_char(psut.last_autovacuum, 'YYYY-MM-DD HH24:MI') AS last_autovacuum,
			to_char(pg_class.reltuples, '9G999G999G999') AS rowcount,
			to_char(psut.n_dead_tup, '9G999G999G999') AS dead_rowcount,
			to_char(autovacuum_vacuum_threshold
					 + (autovacuum_vacuum_scale_factor::numeric * pg_class.reltuples), '9G999G999G999') AS autovacuum_threshold,
			CASE
				WHEN autovacuum_vacuum_threshold + (autovacuum_vacuum_scale_factor::numeric * pg_class.reltuples) < psut.n_dead_tup
				THEN 'yes'
			END AS expect_autovacuum
		FROM
			pg_stat_user_tables psut INNER JOIN pg_class ON psut.relid = pg_class.oid
				INNER JOIN vacuum_settings ON pg_class.oid = vacuum_settings.oid
		ORDER BY 1
	EOF
}

query_pg-stat-bgwriter() { ## : Stats about the behaviour of the bgwriter, checkpoints, buffers, etc.
	handle_help "$@" <<-EOF
	EOF

	fields="checkpoints_timed=0;checkpoints_req=1;checkpoint_write_time=2;checkpoint_sync_time=3;buffers_checkpoint=4;buffers_clean=5;maxwritten_clean=6;buffers_backend=7;buffers_backend_fsync=8;buffers_alloc=9"

	read -r -d '' QUERY <<-EOF
		SELECT
			checkpoints_timed,
			checkpoints_req,
			checkpoint_write_time,
			checkpoint_sync_time,
			buffers_checkpoint,
			buffers_clean,
			maxwritten_clean,
			buffers_backend,
			buffers_backend_fsync,
			buffers_alloc
		FROM
			pg_stat_bgwriter
	EOF
}


query_pg-stat-user-tables() { ## : stats about tables (tuples, index scans, vacuums, analyzes)
	handle_help "$@" <<-EOF
	EOF

	tags="schemaname=0;relname=1"
	fields="seq_scan=2;seq_tup_read=3;idx_scan=4;idx_tup_fetch=5;n_tup_ins=6;n_tup_upd=7;n_tup_del=8;n_tup_hot_upd=9;n_live_tup=10;n_dead_tup=11;vacuum_count=12;autovacuum_count=13;analyze_count=14;autoanalyze_count=15"

	read -r -d '' QUERY <<-EOF
		SELECT
			schemaname,
			relname,
			seq_scan,
			seq_tup_read,
			COALESCE(idx_scan, 0),
			COALESCE(idx_tup_fetch, 0),
			n_tup_ins,
			n_tup_upd,
			n_tup_del,
			n_tup_hot_upd,
			n_live_tup,
			n_dead_tup,
			vacuum_count,
			autovacuum_count,
			analyze_count,
			autoanalyze_count
		FROM
			pg_stat_user_tables
	EOF
}

query_data-origin-distribution-merged() {
	summary="$(summary_statistics data $human)"
	username=$(gdpr_safe job.user_id galaxy_user)

	read -r -d '' QUERY <<-EOF
		WITH asdf AS (
			SELECT
				'total' as origin,
				sum(coalesce(dataset.total_size, dataset.file_size, 0)) AS data,
				date_trunc('month', dataset.create_time) as created,
				$username
			FROM job
			LEFT JOIN job_to_output_dataset ON job.id = job_to_output_dataset.job_id
			LEFT JOIN history_dataset_association ON job_to_output_dataset.dataset_id = history_dataset_association.id
			LEFT JOIN dataset ON history_dataset_association.dataset_id = dataset.id
			GROUP BY
				origin, job.user_id, created, galaxy_user
		)
		SELECT
			origin,
			round(data, 2 - length(data::text)),
			created,
			galaxy_user
		FROM asdf
		ORDER BY galaxy_user desc
	EOF
}

query_data-origin-distribution() { ## : data sources (uploaded vs derived)
	handle_help "$@" <<-EOF
		Break down the source of data in the server, uploaded data vs derived (created as output from a tool)

		Recommendation is to run with GDPR_MODE so you can safely share this information:

		    GDPR_MODE=\$(openssl rand -hex 24 2>/dev/null) gxadmin tsvquery data-origin-distribution | gzip > data-origin.tsv.gz

		Output looks like:

		    derived 130000000000    2019-07-01 00:00:00     fff4f423d06
		    derived 61000000000     2019-08-01 00:00:00     fff4f423d06
		    created 340000000       2019-08-01 00:00:00     fff4f423d06
		    created 19000000000     2019-07-01 00:00:00     fff4f423d06
		    derived 180000000000    2019-04-01 00:00:00     ffd28c0cf8c
		    created 21000000000     2019-04-01 00:00:00     ffd28c0cf8c
		    derived 1700000000      2019-06-01 00:00:00     ffd28c0cf8c
		    derived 120000000       2019-06-01 00:00:00     ffcb567a837
		    created 62000000        2019-05-01 00:00:00     ffcb567a837
		    created 52000000        2019-06-01 00:00:00     ffcb567a837
		    derived 34000000        2019-07-01 00:00:00     ffcb567a837

	EOF

	username=$(gdpr_safe job.user_id galaxy_user)
	echo "$username"

	read -r -d '' QUERY <<-EOF
		WITH asdf AS (
			SELECT
				case when job.tool_id = 'upload1' then 'created' else 'derived' end AS origin,
				sum(coalesce(dataset.total_size, dataset.file_size, 0)) AS data,
				date_trunc('month', dataset.create_time) as created,
				$username
			FROM job
			LEFT JOIN job_to_output_dataset ON job.id = job_to_output_dataset.job_id
			LEFT JOIN history_dataset_association ON job_to_output_dataset.dataset_id = history_dataset_association.id
			LEFT JOIN dataset ON history_dataset_association.dataset_id = dataset.id
			GROUP BY
				origin, job.user_id, created, galaxy_user
		)
		SELECT
			origin,
			round(data, 2 - length(data::text)),
			created,
			galaxy_user
		FROM asdf
		ORDER BY galaxy_user, created desc
	EOF
}

query_data-origin-distribution-summary() { ##? [--human]: breakdown of data sources (uploaded vs derived)
	handle_help "$@" <<-EOF
		Break down the source of data in the server, uploaded data vs derived (created as output from a tool)

		This query builds a table with the volume of derivced and uploaded data per user, and then summarizes this:

		origin  |   min   | quant_1st | median  |  mean  | quant_3rd | perc_95 | perc_99 |  max  | stddev
		------- | ------- | --------- | ------- | ------ | --------- | ------- | ------- | ----- | --------
		created | 0 bytes | 17 MB     | 458 MB  | 36 GB  | 11 GB     | 130 GB  | 568 GB  | 11 TB | 257 GB
		derived | 0 bytes | 39 MB     | 1751 MB | 200 GB | 28 GB     | 478 GB  | 2699 GB | 90 TB | 2279 GB
	EOF

	tags="dataorigin=0"
	fields="min=1;q1=2;median=3;mean=4;q3=5;p95=6;p99=7;max=8;sum=9;stddev=10"

	summary="$(summary_statistics data $arg_human)"

	read -r -d '' QUERY <<-EOF
		WITH user_job_data AS (
			SELECT
				case when job.tool_id = 'upload1' then 'created' else 'derived' end AS origin,
				sum(coalesce(dataset.total_size, dataset.file_size, 0)) AS data,
				job.user_id
			FROM job
			LEFT JOIN job_to_output_dataset ON job.id = job_to_output_dataset.job_id
			LEFT JOIN history_dataset_association ON job_to_output_dataset.dataset_id = history_dataset_association.id
			LEFT JOIN dataset ON history_dataset_association.dataset_id = dataset.id
			GROUP BY
				origin, job.user_id
		)

		SELECT
			origin,
			$summary
		FROM user_job_data
		GROUP BY origin
	EOF
}

query_aq() { ## <table> <column> <-|job_id [job_id [...]]>: Given a list of IDs from a table (e.g. 'job'), access a specific column from that table
	handle_help "$@" <<-EOF
	EOF

	table=$1; shift
	column=$1; shift

	if [[ "$1" == "-" ]]; then
		# read jobs from stdin
		ids=$(cat | paste -s -d' ')
	else
		# read from $@
		ids=$@;
	fi

	ids_string=$(join_by ',' ${ids[@]})

	read -r -d '' QUERY <<-EOF
		SELECT
			$column
		FROM $table
		WHERE id in ($ids_string)
	EOF
}

query_q() { ## <query>: Passes a raw SQL query directly through to the database
	handle_help "$@" <<-EOF
	EOF

	QUERY="$@"
}

query_good-for-pulsar() { ## : Look for jobs EU would like to send to pulsar
	handle_help "$@" <<-EOF
		This selects all jobs and finds two things:
		- sum of input sizes
		- runtime

		and then returns a simple /score/ of (input/runtime) and sorts on that
		hopefully identifying things with small inputs and long runtimes.
	EOF

	read -r -d '' QUERY <<-EOF
		WITH job_data AS (
			SELECT
				regexp_replace(j.tool_id, '.*toolshed.*/repos/', '') as tool_id,
				SUM(d.total_size) AS size,
				MIN(jmn.metric_value) AS runtime,
				SUM(d.total_size) / min(jmn.metric_value) AS score
			FROM job j
			LEFT JOIN job_to_input_dataset jtid ON j.id = jtid.job_id
			LEFT JOIN history_dataset_association hda ON jtid.dataset_id = hda.id
			LEFT JOIN dataset d ON hda.dataset_id = d.id
			LEFT JOIN job_metric_numeric jmn ON j.id = jmn.job_id
			WHERE jmn.metric_name = 'runtime_seconds'
				AND d.total_size IS NOT NULL
			GROUP BY j.id
		)

		SELECT
			tool_id,
			percentile_cont(0.50) WITHIN GROUP (ORDER BY score) ::bigint AS median_score,
			percentile_cont(0.50) WITHIN GROUP (ORDER BY runtime) ::bigint AS median_runtime,
			pg_size_pretty(percentile_cont(0.50) WITHIN GROUP (ORDER BY size) ::bigint) AS median_size,
			count(*)
		FROM job_data
		GROUP BY tool_id
		ORDER BY median_score ASC
	EOF
}

query_jobs-ready-to-run() { ## : Find jobs ready to run (Mostly a performance test)
	handle_help "$@" <<-EOF
		Mostly a performance test
	EOF

	read -r -d '' QUERY <<-EOF
		SELECT
			EXISTS(
				SELECT
					history_dataset_association.id,
					history_dataset_association.history_id,
					history_dataset_association.dataset_id,
					history_dataset_association.create_time,
					history_dataset_association.update_time,
					history_dataset_association.state,
					history_dataset_association.copied_from_history_dataset_association_id,
					history_dataset_association.copied_from_library_dataset_dataset_association_id,
					history_dataset_association.name,
					history_dataset_association.info,
					history_dataset_association.blurb,
					history_dataset_association.peek,
					history_dataset_association.tool_version,
					history_dataset_association.extension,
					history_dataset_association.metadata,
					history_dataset_association.parent_id,
					history_dataset_association.designation,
					history_dataset_association.deleted,
					history_dataset_association.visible,
					history_dataset_association.extended_metadata_id,
					history_dataset_association.version,
					history_dataset_association.hid,
					history_dataset_association.purged,
					history_dataset_association.hidden_beneath_collection_instance_id
				FROM
					history_dataset_association,
					job_to_output_dataset
				WHERE
					job.id = job_to_output_dataset.job_id
					AND history_dataset_association.id
						= job_to_output_dataset.dataset_id
					AND history_dataset_association.deleted = true
			)
				AS anon_1,
			EXISTS(
				SELECT
					history_dataset_collection_association.id
				FROM
					history_dataset_collection_association,
					job_to_output_dataset_collection
				WHERE
					job.id = job_to_output_dataset_collection.job_id
					AND history_dataset_collection_association.id
						= job_to_output_dataset_collection.dataset_collection_id
					AND history_dataset_collection_association.deleted
						= true
			)
				AS anon_2,
			job.id AS job_id,
			job.create_time AS job_create_time,
			job.update_time AS job_update_time,
			job.history_id AS job_history_id,
			job.library_folder_id AS job_library_folder_id,
			job.tool_id AS job_tool_id,
			job.tool_version AS job_tool_version,
			job.state AS job_state,
			job.info AS job_info,
			job.copied_from_job_id AS job_copied_from_job_id,
			job.command_line AS job_command_line,
			job.dependencies AS job_dependencies,
			job.param_filename AS job_param_filename,
			job.runner_name AS job_runner_name_1,
			job.stdout AS job_stdout,
			job.stderr AS job_stderr,
			job.exit_code AS job_exit_code,
			job.traceback AS job_traceback,
			job.session_id AS job_session_id,
			job.user_id AS job_user_id,
			job.job_runner_name AS job_job_runner_name,
			job.job_runner_external_id
				AS job_job_runner_external_id,
			job.destination_id AS job_destination_id,
			job.destination_params AS job_destination_params,
			job.object_store_id AS job_object_store_id,
			job.imported AS job_imported,
			job.params AS job_params,
			job.handler AS job_handler
		FROM
			job
		WHERE
			job.state = 'new'
			AND job.handler IS NULL
			AND job.handler = 'handler0'
	EOF
}

query_workers() { ## : Retrieve a list of Galaxy worker processes
	handle_help "$@" <<-EOF
		This retrieves a list of Galaxy worker processes.
		This functionality is only available on Galaxy
		20.01 or later.

		server_name         | hostname | pid
		------------------- | -------- | ---
		main.web.1          | server1  | 123
		main.job-handlers.1 | server2  | 456

	EOF

	read -r -d '' QUERY <<-EOF
		SELECT
			server_name,
			hostname,
			pid
		FROM
			worker_process
		WHERE
			pid IS NOT NULL
	EOF
}

query_pg-rows-per-table() { ## : Print rows per table
	handle_help "$@" <<-EOF
		This retrieves a list of tables in the database and their size
	EOF

	read -r -d '' QUERY <<-EOF
		SELECT
		    n.nspname AS table_schema,
		    c.relname AS table_name,
		    c.reltuples AS rows
		FROM
		    pg_class AS c
		    JOIN pg_namespace AS n ON
		            n.oid = c.relnamespace
		WHERE
		    c.relkind = 'r'
		    AND n.nspname
		        NOT IN (
		                'information_schema',
		                'pg_catalog'
		            )
		ORDER BY
		    c.reltuples DESC
	EOF
}

query_dump-users() { ##? [--apikey] [--email] : Dump the list of users and their emails
	handle_help "$@" <<-EOF
		This retrieves a list of all users
	EOF

	if [[ -n "$arg_email"  ]]; then
		email=",$(gdpr_safe email email)"
	else
		email=""
	fi

	if [[ -n "$arg_apikey"  ]]; then
		apikey="apikey"
		apikeyjoin="left join api_keys "
	else
		email=""
	fi

	read -r -d '' QUERY <<-EOF
		SELECT
			username
			$email
		FROM
			galaxy_user
		ORDER BY
		    id desc
	EOF
}

query_job-metrics() { ## : Retrieves input size, runtime, memory for all executed jobs
	handle_help "$@" <<-EOF
		Dump runtime stats for ALL jobs:

		    $ gxadmin query job-metrics
		    job_id  |               tool_id                |  state  | total_filesize | num_files | runtime_seconds |   slots   | memory_bytes |        create_time
		    --------+--------------------------------------+---------+----------------+-----------+-----------------+-----------+--------------+----------------------------
		    19      | require_format                       | ok      |           5098 |         1 |       4.0000000 | 1.0000000 |              | 2018-12-04 17:17:02.148239
		    48      | __SET_METADATA__                     | ok      |                |         0 |       4.0000000 | 1.0000000 |              | 2019-02-05 22:46:33.848141
		    49      | upload1                              | ok      |                |           |       6.0000000 | 1.0000000 |              | 2019-02-05 22:58:41.610146
		    50      | upload1                              | ok      |                |           |       5.0000000 | 1.0000000 |              | 2019-02-07 21:30:11.645826
		    51      | upload1                              | ok      |                |           |       5.0000000 | 1.0000000 |              | 2019-02-07 21:30:12.18259
		    52      | upload1                              | ok      |                |           |       7.0000000 | 1.0000000 |              | 2019-02-07 21:31:15.304868
		    54      | upload1                              | ok      |                |           |       5.0000000 | 1.0000000 |              | 2019-02-07 21:31:16.116164
		    53      | upload1                              | ok      |                |           |       7.0000000 | 1.0000000 |              | 2019-02-07 21:31:15.665948
			...
		    989     | circos                               | error   |         671523 |        12 |      14.0000000 | 1.0000000 |              | 2020-04-30 10:13:33.872872
		    990     | circos                               | error   |         671523 |        12 |      10.0000000 | 1.0000000 |              | 2020-04-30 10:19:36.72646
		    991     | circos                               | error   |         671523 |        12 |      10.0000000 | 1.0000000 |              | 2020-04-30 10:21:00.460471
		    992     | circos                               | ok      |         671523 |        12 |      21.0000000 | 1.0000000 |              | 2020-04-30 10:31:35.366913
		    993     | circos                               | error   |         588747 |         6 |       8.0000000 | 1.0000000 |              | 2020-04-30 11:12:17.340591
		    994     | circos                               | error   |         588747 |         6 |       9.0000000 | 1.0000000 |              | 2020-04-30 11:15:27.076502
		    995     | circos                               | error   |         588747 |         6 |      42.0000000 | 1.0000000 |              | 2020-04-30 11:16:41.19449
		    996     | circos                               | ok      |         588747 |         6 |      48.0000000 | 1.0000000 |              | 2020-04-30 11:21:51.49684
		    997     | circos                               | ok      |         588747 |         6 |      46.0000000 | 1.0000000 |              | 2020-04-30 11:23:52.455536

		**WARNING**

		!> This can be very slow for large databases and there is no tool filtering; every job + dataset table record are scanned.
	EOF

	read -r -d '' QUERY <<-EOF
		WITH dataset_filesizes AS (
			SELECT
				job_to_input_dataset.job_id, sum(file_size) AS total_filesize,
				count(file_size) AS num_files FROM dataset
			LEFT JOIN job_to_input_dataset ON dataset.id = job_to_input_dataset.dataset_id
			GROUP BY job_to_input_dataset.job_id
		)

		SELECT
			job.id AS job_id,
			job.tool_id,
			job.state,
			dataset_filesizes.total_filesize,
			dataset_filesizes.num_files,
			jmn1.metric_value AS runtime_seconds,
			jmn2.metric_value AS slots,
			jmn3.metric_value AS memory_bytes,
			job.create_time AS create_time
		FROM job
		LEFT JOIN dataset_filesizes ON job.id = dataset_filesizes.job_id
		LEFT JOIN (SELECT * FROM job_metric_numeric WHERE job_metric_numeric.metric_name = 'runtime_seconds') jmn1 ON jmn1.job_id = job.id
		LEFT JOIN (SELECT * FROM job_metric_numeric WHERE job_metric_numeric.metric_name = 'galaxy_slots') jmn2 ON jmn2.job_id = job.id
		LEFT JOIN (SELECT * FROM job_metric_numeric WHERE job_metric_numeric.metric_name = 'memory.memsw.max_usage_in_bytes') jmn3 ON jmn3.job_id = job.id
	EOF
}

query_history-core-hours()  { ##? [history-name-ilike]: Produces the median core hour count for histories matching a name filter
	handle_help "$@" <<-EOF
	EOF

	read -r -d '' QUERY <<-EOF
		WITH
			toolavg
				AS (
					SELECT
						tool_id, history_id, round(sum(a.metric_value * b.metric_value / 3600), 2) AS cpu_hours
					FROM
						job_metric_numeric AS a, job_metric_numeric AS b, job
					WHERE
						b.job_id = a.job_id
						AND a.job_id = job.id
						AND a.metric_name = 'runtime_seconds'
						AND b.metric_name = 'galaxy_slots'
						AND history_id in (select id from history where name ilike '%$arg_history_name%')
					GROUP BY
						tool_id, history_id
				),
			toolmedian
				AS (
					SELECT
						toolavg.tool_id, percentile_cont(0.5) WITHIN GROUP (ORDER BY cpu_hours)
					FROM
						toolavg
					GROUP BY
						toolavg.tool_id
				)
		SELECT
			sum(toolmedian.percentile_cont)
		FROM
			toolmedian
	EOF
}

query_pulsar-gb-transferred()  { ##? [--bymonth] [--byrunner] [--human]: Counts up datasets transferred and output file size produced by jobs running on destinations like pulsar_*
	handle_help "$@" <<-EOF
	EOF

	orderby=""
	declare -a ordering

	if [[ -n "$arg_bymonth" ]]; then
		orderby="ORDER BY sent.month ASC"
		ordering+=("sent.month")
	fi

	if [[ -n "$arg_byrunner" ]]; then
		if [[ ! -n "$arg_bymonth" ]]; then
			orderby="ORDER BY sent.runner ASC"
		fi
		ordering+=("sent.runner")
	fi

	if [[ -n "$arg_human" ]]; then
		pg_size_pretty_a="pg_size_pretty("
		pg_size_pretty_b=")"
	fi

	groupby=""
	data_string="${ordering[*]}"
	csvcols="${data_string//${IFS:0:1}/,}"
	if (( ${#ordering[@]} > 0 )); then
		groupby="GROUP BY $csvcols"
		csvcols="$csvcols,"
	fi

	read -r -d '' QUERY <<-EOF
		WITH
			sent
				AS (
					SELECT
						job.id AS job,
						date_trunc('month', job.create_time)::DATE AS month,
						job.job_runner_name AS runner,
						ds_in.total_size AS size
					FROM
						job
						LEFT JOIN job_to_input_dataset AS jtid ON job.id = jtid.job_id
						LEFT JOIN history_dataset_association AS hda_in ON jtid.dataset_id = hda_in.id
						LEFT JOIN dataset AS ds_in ON hda_in.dataset_id = ds_in.id
					WHERE
						job_runner_name LIKE 'pulsar%'
					ORDER BY
						job.id DESC
				),
			recv
				AS (
					SELECT
						job.id AS job,
						date_trunc('month', job.create_time)::DATE AS month,
						job.job_runner_name AS runner,
						ds_out.total_size AS size
					FROM
						job
						LEFT JOIN job_to_output_dataset AS jtid ON job.id = jtid.job_id
						LEFT JOIN history_dataset_association AS hda_out ON jtid.dataset_id = hda_out.id
						LEFT JOIN dataset AS ds_out ON hda_out.dataset_id = ds_out.id
					WHERE
						job_runner_name LIKE 'pulsar%'
					ORDER BY
						job.id DESC
				)
		SELECT
			$csvcols ${pg_size_pretty_a}sum(sent.size)${pg_size_pretty_b} AS sent, ${pg_size_pretty_a}sum(recv.size)${pg_size_pretty_b} AS recv, count(sent.size) as job_count
		FROM
			sent FULL JOIN recv ON sent.job = recv.job
		$groupby
		$orderby
	EOF
}
registered_subcommands="$registered_subcommands filter"
_filter_short_help="Some text filtering and processing commands"


filter_pg2md() { ## : Convert postgres table format outputs to something that can be pasted as markdown
	handle_help "$@" <<-EOF
		Imagine doing something like:

		    $ gxadmin query active-users 2018 | gxadmin filter pg2md
		    unique_users  |        month
		    ------------- | --------------------
		    811           | 2018-12-01 00:00:00
		    658           | 2018-11-01 00:00:00
		    583           | 2018-10-01 00:00:00
		    444           | 2018-09-01 00:00:00
		    342           | 2018-08-01 00:00:00
		    379           | 2018-07-01 00:00:00
		    370           | 2018-06-01 00:00:00
		    330           | 2018-05-01 00:00:00
		    274           | 2018-04-01 00:00:00
		    186           | 2018-03-01 00:00:00
		    168           | 2018-02-01 00:00:00
		    122           | 2018-01-01 00:00:00

		and it should produce a nicely formatted table
	EOF
	cat | sed 's/--+--/- | -/g;s/^\(\s\+\)\([^|]\+\) |/\2 \1|/g' | head -n -2
}

filter_identicon(){ ## : Convert an input data stream into an identicon (e.g. with hostname)
	handle_help "$@" <<-EOF
		Given an input data stream, digest it, and colour it using the same logic as digest-color

		    $ echo test | ./gxadmin filter identicon
		      ██████
		    ██      ██
		    ██  ██  ██
		      ██████
		    ██  ██  ██

		(Imagine that it is a nice pink/blue colour scheme)
	EOF

	cat | $GXADMIN_PYTHON -c "$identicon_script"
}

filter_digest-color() { ## : Color an input stream based on the contents (e.g. hostname)
	handle_help "$@" <<-EOF
		Colors entire input stream based on digest of entire input's contents.
		Mostly useful for colouring a hostname or some similar value.

		    $ echo test | ./gxadmin filter digest-color
		    test

		(Imagine that it is light blue text on a pink background)

		**NOTE** If the output isn't coloured properly, try:

		    export TERM=screen-256color
	EOF

	data="$(cat)"
	fg_color=$((16#$(echo "$data" | perl -pe "chomp if eof" | sha256sum | cut -c1-2)))
	bg_color=$((fg_color + 15))

	echo "$(tput setaf $fg_color)$(tput setab $bg_color)${data}$(tput sgr0)"
}
registered_subcommands="$registered_subcommands uwsgi"
_uwsgi_short_help="Galaxy Handler/Zergling management (SystemD only)"


uwsgi_stats-influx(){ ## <addr>: InfluxDB formatted output for the current stats
	handle_help "$@" <<-EOF
		Contact a specific uWSGI stats address (requires uwsgi binary on path)
		and requests the current stats + formats them for InfluxDB. For some
		reason it has trouble with localhost vs IP address, so recommend that
		you use IP.

		    $ gxadmin uwsgi stats-influx 127.0.0.1:9191
		    uwsgi.locks,addr=127.0.0.1:9191,group=user_0 count=0
		    uwsgi.locks,addr=127.0.0.1:9191,group=signal count=0
		    uwsgi.locks,addr=127.0.0.1:9191,group=filemon count=0
		    uwsgi.locks,addr=127.0.0.1:9191,group=timer count=0
		    uwsgi.locks,addr=127.0.0.1:9191,group=rbtimer count=0
		    uwsgi.locks,addr=127.0.0.1:9191,group=cron count=0
		    uwsgi.locks,addr=127.0.0.1:9191,group=thunder count=2006859
		    uwsgi.locks,addr=127.0.0.1:9191,group=rpc count=0
		    uwsgi.locks,addr=127.0.0.1:9191,group=snmp count=0
		    uwsgi.general,addr=127.0.0.1:9191 listen_queue=0,listen_queue_errors=0,load=0,signal_queue=0
		    uwsgi.sockets,addr=127.0.0.1:9191,name=127.0.0.1:4001,proto=uwsgi queue=0,max_queue=100,shared=0,can_offload=0
		    uwsgi.worker,addr=127.0.0.1:9191,id=1 accepting=1,requests=65312,exceptions=526,harakiri_count=26,signals=0,signal_queue=0,status="idle",rss=0,vsz=0,running_time=17433008661,respawn_count=27,tx=15850829410,avg_rt=71724
		    uwsgi.worker,addr=127.0.0.1:9191,id=2 accepting=1,requests=67495,exceptions=472,harakiri_count=51,signals=0,signal_queue=0,status="idle",rss=0,vsz=0,running_time=15467746010,respawn_count=52,tx=15830867066,avg_rt=65380
		    uwsgi.worker,addr=127.0.0.1:9191,id=3 accepting=1,requests=67270,exceptions=520,harakiri_count=35,signals=0,signal_queue=0,status="idle",rss=0,vsz=0,running_time=14162158015,respawn_count=36,tx=15799661545,avg_rt=73366
		    uwsgi.worker,addr=127.0.0.1:9191,id=4 accepting=1,requests=66434,exceptions=540,harakiri_count=34,signals=0,signal_queue=0,status="idle",rss=0,vsz=0,running_time=15740205807,respawn_count=35,tx=16231969649,avg_rt=75468
		    uwsgi.worker,addr=127.0.0.1:9191,id=5 accepting=1,requests=67021,exceptions=534,harakiri_count=38,signals=0,signal_queue=0,status="idle",rss=0,vsz=0,running_time=14573155758,respawn_count=39,tx=16517287963,avg_rt=140855
		    uwsgi.worker,addr=127.0.0.1:9191,id=6 accepting=1,requests=66810,exceptions=483,harakiri_count=24,signals=0,signal_queue=0,status="idle",rss=0,vsz=0,running_time=19107513635,respawn_count=25,tx=15945313469,avg_rt=64032
		    uwsgi.worker,addr=127.0.0.1:9191,id=7 accepting=1,requests=66544,exceptions=460,harakiri_count=35,signals=0,signal_queue=0,status="idle",rss=0,vsz=0,running_time=14240478391,respawn_count=36,tx=15499531841,avg_rt=114981
		    uwsgi.worker,addr=127.0.0.1:9191,id=8 accepting=1,requests=67577,exceptions=517,harakiri_count=35,signals=0,signal_queue=0,status="idle",rss=0,vsz=0,running_time=14767971195,respawn_count=36,tx=15780639229,avg_rt=201275

		For multiple zerglings you can run this for each and just 2>/dev/null

		    PATH=/opt/galaxy/venv/bin:/sbin:/bin:/usr/sbin:/usr/bin gxadmin uwsgi stats-influx 127.0.0.1:9190 2>/dev/null
		    PATH=/opt/galaxy/venv/bin:/sbin:/bin:/usr/sbin:/usr/bin gxadmin uwsgi stats-influx 127.0.0.1:9191 2>/dev/null
		    exit 0

		And it will fetch only data for responding uwsgis.
	EOF
	address="$1"; shift

	# fetch data
	uwsgi=$(which uwsgi)
	data="$($uwsgi --connect-and-read "$address" 2>&1)"

	echo "$data" | \
		jq -r '.locks[] | to_entries[] |  "uwsgi.locks,addr='"$address"',group=\(.key) count=\(.value)"' | \
		sed 's/group=user 0/group=user_0/g'

	echo "$data" | \
		jq -r '. | "uwsgi.general,addr='"$address"' listen_queue=\(.listen_queue),listen_queue_errors=\(.listen_queue_errors),load=\(.load),signal_queue=\(.signal_queue)"'

	echo "$data" | \
		jq -r '.sockets[] | "uwsgi.sockets,addr='"$address"',name=\(.name),proto=\(.proto) queue=\(.queue),max_queue=\(.max_queue),shared=\(.shared),can_offload=\(.can_offload)"'

	echo "$data" | \
		jq -r '.workers[] | "uwsgi.worker,addr='"$address"',id=\(.id) accepting=\(.accepting),requests=\(.requests),exceptions=\(.exceptions),harakiri_count=\(.harakiri_count),signals=\(.signals),signal_queue=\(.signal_queue),status=\"\(.status)\",rss=\(.rss),vsz=\(.vsz),running_time=\(.running_time),respawn_count=\(.respawn_count),tx=\(.tx),avg_rt=\(.avg_rt)"' | \
		sed 's/"busy"/1/g;s/"idle"/0/g;'
}

uwsgi_status() { ## : Current system status
	handle_help "$@" <<-EOF
		Current status of all uwsgi processes
	EOF

	echo "galaxy-zergpool:   $(systemctl status galaxy-zergpool | grep Active:)"
	for folder in $(find /sys/fs/cgroup/memory/system.slice/system-galaxy\\x2dzergling.slice/ -mindepth 1 -type d -name 'galaxy-zergling*service'); do
		service=$(basename "$folder")
		echo "$service $(systemctl status $service | grep Active:)"
	done
	for folder in $(find /sys/fs/cgroup/memory/system.slice/system-galaxy\\x2dhandler.slice/ -mindepth 1 -type d -name 'galaxy-handler*service'); do
		service=$(basename "$folder")
		echo "$service $(systemctl status $service | grep Active:)"
	done

	for folder in $(find /sys/fs/cgroup/memory/system.slice/system-galaxy\\x2dworkflow\\x2dscheduler.slice/ -mindepth 1 -type d -name 'galaxy-workflow*service'); do
		service=$(basename "$folder")
		echo "$service $(systemctl status $service | grep Active:)"
	done
}

uwsgi_memory() { ## : Current system memory usage
	handle_help "$@" <<-EOF
		Obtain memory usage of the various Galaxy processes

		Also consider using systemd-cgtop
	EOF

	echo "galaxy-zergpool.service $(cat /sys/fs/cgroup/memory/system.slice/galaxy-zergpool.service/memory.memsw.usage_in_bytes)"
	for folder in $(find /sys/fs/cgroup/memory/system.slice/system-galaxy\\x2dzergling.slice/ -mindepth 1 -type d -name 'galaxy-zergling*service'); do
		service=$(basename "$folder")
		echo "$service $(cat "$folder/memory.memsw.usage_in_bytes")"
	done

	for folder in $(find /sys/fs/cgroup/memory/system.slice/system-galaxy\\x2dhandler.slice/ -mindepth 1 -type d -name 'galaxy-handler*service'); do
		service=$(basename "$folder")
		echo "$service $(cat "$folder/memory.memsw.usage_in_bytes")"
	done


	for folder in $(find /sys/fs/cgroup/memory/system.slice/system-galaxy\\x2dworkflow\\x2dscheduler.slice/ -mindepth 1 -type d -name 'galaxy-workflow*service'); do
		service=$(basename "$folder")
		echo "$service $(cat "$folder/memory.memsw.usage_in_bytes")"
	done

}

uwsgi_pids() { ## : Galaxy process PIDs
	handle_help "$@" <<-EOF
		Obtain memory usage of the various Galaxy processes
	EOF

	echo "galaxy-zergpool:   $(systemctl status galaxy-zergpool   | grep PID: | cut -d' ' -f 4)"
	for i in {0..3}; do
		echo "galaxy-zergling@$i: $(systemctl status galaxy-zergling@$i | grep PID: | cut -d' ' -f 4)"
	done
	for i in {0..11}; do
		echo "galaxy-handler@$i:  $(systemctl status galaxy-handler@$i | grep PID: | cut -d' ' -f 4)"
	done
}

uwsgi_stats() { ## : uwsgi stats
	handle_help "$@" <<-EOF
	EOF

	uwsgi_stats-influx 127.0.0.1:4010 2>/dev/null || true
	uwsgi_stats-influx 127.0.0.1:4011 2>/dev/null || true
	uwsgi_stats-influx 127.0.0.1:4012 2>/dev/null || true
	uwsgi_stats-influx 127.0.0.1:4013 2>/dev/null || true

	echo "systemd service=galaxy-zergpool memory=$(cat /sys/fs/cgroup/memory/system.slice/galaxy-zergpool.service/memory.memsw.usage_in_bytes)"
	for folder in $(find /sys/fs/cgroup/memory/system.slice/system-galaxy\\x2dzergling.slice/ -mindepth 1 -type d -name 'galaxy-zergling*service'); do
		service=$(basename "$folder")
		echo "systemd service=$service memory=$(cat "$folder/memory.memsw.usage_in_bytes")"
	done

	for folder in $(find /sys/fs/cgroup/memory/system.slice/system-galaxy\\x2dhandler.slice/ -mindepth 1 -type d -name 'galaxy-handler*service'); do
		service=$(basename "$folder")
		echo "systemd service=$service memory=$(cat "$folder/memory.memsw.usage_in_bytes")"
	done
}

uwsgi_zerg-swap() { ## : Swap zerglings in order (unintelligent version)
	handle_help "$@" <<-EOF
		This is the "dumb" version which loops across the zerglings and restarts them in series
	EOF

	active="$(uwsgi_status | grep -v inactive | grep zergling | cut -d: -f1 | wc -l)"
	if (( active == 1 )); then
		error "Won't restart when only a single zergling is active as this will trigger downtime"
		exit 1
	fi

	# Obtain current running ones
	for zergling in $(uwsgi_status | grep -v inactive | grep zergling | cut -d: -f1); do
		echo "Restarting $zergling"
		systemctl restart "$zergling"
		number=$(echo "$zergling" | sed 's/.*@//g')
		stats="127.0.0.1:401$number"
		wait_for_url "$stats"
		success "$zergling is alive"
	done
}

uwsgi_zerg-scale-up() { ## : Add another zergling to deal with high load
	handle_help "$@" <<-EOF
	EOF

	# Obtain current running ones
	an_inactive=$(status | grep inactive | grep zergling | head -n1 | cut -d: -f1)
	echo "$an_inactive is inactive, starting"
	systemctl start "$an_inactive"
}

#uwsgi_zerg-scale-down() { ## : Remove an extraneous zergling
	#handle_help "$@" <<-EOF
	#EOF

	## Obtain current running ones
	#active="$(status | grep -v inactive | grep zergling | cut -d: -f1)"
	#num_active=$(echo "$active" | wc -l)

	#if (( num_active <= 2 )); then
		#error "We do not permit scaling below 2 zerglings"
		#exit 1
	#fi

	## Kill highest numbered one
	#warning "$to_kill will be stopped"
	#systemctl stop "$to_kill"
#}

uwsgi_zerg-strace() { ## [number]: Strace a zergling
	handle_help "$@" <<-EOF
	EOF

	if (( $# > 0 )); then
		number=$1;
	fi
	procs=$(uwsgi_pids | grep "zergling@${number}" | cut -d':' -f2 | sed 's/\s*//g;' | tr '\n' ' ' | sed 's/^\s*//;s/\s*$//g;s/ / -p /g')
	strace -e open,openat -p "$procs"
}

uwsgi_handler-strace() { ## [number]: Strace a handler
	handle_help "$@" <<-EOF
	EOF

	if (( $# > 0 )); then
		number=$1;
	fi
	procs=$(uwsgi_pids | grep "handler@${number}" | cut -d':' -f2 | sed 's/\s*//g;' | tr '\n' ' ' | sed 's/^\s*//;s/\s*$//g;s/ / -p /g')
	strace -e open,openat -p "$procs"
}

uwsgi_handler-restart() { ## : Restart all handlers
	handle_help "$@" <<-EOF
	EOF

	for i in {0..11}; do
		systemctl restart galaxy-handler@$i &
	done
}

uwsgi_lastlog(){ ## : Fetch the number of seconds since the last log message was written
	handle_help "$@" <<-EOF
		Lets you know if any of your workers or handlers have maybe stopped processing jobs.

		    $ gxadmin uwsgi lastlog
		    journalctl.lastlog,service=galaxy-handler@0 seconds=8
		    journalctl.lastlog,service=galaxy-handler@1 seconds=2
		    journalctl.lastlog,service=galaxy-handler@2 seconds=186
		    journalctl.lastlog,service=galaxy-handler@3 seconds=19
		    journalctl.lastlog,service=galaxy-handler@4 seconds=6
		    journalctl.lastlog,service=galaxy-handler@5 seconds=80
		    journalctl.lastlog,service=galaxy-handler@6 seconds=52
		    journalctl.lastlog,service=galaxy-handler@7 seconds=1
		    journalctl.lastlog,service=galaxy-handler@8 seconds=79
		    journalctl.lastlog,service=galaxy-handler@9 seconds=40
		    journalctl.lastlog,service=galaxy-handler@10 seconds=123
		    journalctl.lastlog,service=galaxy-handler@11 seconds=13
		    journalctl.lastlog,service=galaxy-zergling@0 seconds=0
		    journalctl.lastlog,service=galaxy-zergling@1 seconds=0
		    journalctl.lastlog,service=galaxy-zergling@2 seconds=2866

	EOF

	NOW=$(date +%s)

	for i in {0..11}; do
		lines=$(journalctl -u galaxy-handler@$i -n 1 --no-pager)
		if [[ ! $lines == *"No entries"* ]]; then
			timestamp=$(journalctl -u galaxy-handler@$i -n 1 --no-pager | grep -v 'Logs begin' | awk '{print $1" "$2" "$3}');
			unix=$(date -d "$timestamp" +%s)
			date_diff=$((NOW - unix));
			echo "journalctl.lastlog,service=galaxy-handler@$i seconds=$date_diff";
		fi
	done

	for i in {0..4}; do
		lines=$(journalctl -u galaxy-zergling@$i -n 1 --no-pager)
		if [[ ! $lines == *"No entries"* ]]; then
			timestamp=$(journalctl -u galaxy-zergling@$i -n 1 --no-pager | grep -v 'Logs begin' | awk '{print $1" "$2" "$3}');
			unix=$(date -d "$timestamp" +%s)
			date_diff=$((NOW - unix));
			echo "journalctl.lastlog,service=galaxy-zergling@$i seconds=$date_diff";
		fi
	done
}

uwsgi_active-users() { ## : Count active users
	handle_help "$@" <<-EOF
		Count active users and return an influx compatible measurement
	EOF

	users=$(journalctl -u galaxy-zerg* --since '10 minutes ago' | grep pid: | awk '{print $10}' | sort -u | wc -l)
	echo "journalctl.activeusers,service=galaxy count=${users}"
}
registered_subcommands="$registered_subcommands galaxy"
_galaxy_short_help="Galaxy Administration"


galaxy_cleanup() { ## [days]: Cleanup histories/hdas/etc for past N days (default=30)
	handle_help "$@" <<-EOF
		Cleanup histories/hdas/etc for past N days using the python objects-based method
	EOF

	days=30
	if (( $# > 0 )); then
		days=$1
	fi

	assert_set_env GALAXY_ROOT
	assert_set_env GALAXY_LOG_DIR
	mkdir -p $GALAXY_LOG_DIR

	run_date=$(date --rfc-3339=seconds)

	for action in {delete_userless_histories,delete_exported_histories,purge_deleted_users,purge_deleted_histories,purge_deleted_hdas,purge_historyless_hdas,purge_hdas_of_purged_histories,delete_datasets,purge_datasets}; do
		start_time=$(date +%s)
		cd "$GALAXY_ROOT";
		$GXADMIN_PYTHON \
			"$GALAXY_ROOT/scripts/cleanup_datasets/pgcleanup.py" \
			-U \
			-o "$days" \
			-l "$GALAXY_LOG_DIR" \
			-s $action \
			-w 128MB \
			 >> "$GALAXY_LOG_DIR/cleanup-${run_date}-${action}.log" \
			2>> "$GALAXY_LOG_DIR/cleanup-${run_date}-${action}.err";
		finish_time=$(date +%s)
		runtime=$(( finish_time - start_time ))

		# Something that telegraf can consume
		ec=$?
		if (( ec == 0 )); then
			echo "cleanup_datasets,group=$action success=1,runtime=$runtime"
		else
			echo "cleanup_datasets,group=$action success=0,runtime=$runtime"
		fi
	done
}

galaxy_encode() { ## <encoded-id>: Encode an ID
	handle_help "$@" <<-EOF
		Encode an unecoded ID

		    $ GALAXY_CONFIG_FILE=/srv/galaxy/config/galaxy.yml GALAXY_ROOT=/srv/galaxy/server gxadmin galaxy encode 123
		    6fe4eea8c591a9c4
	EOF

	assert_set_env VIRTUAL_ENV
	assert_set_env GALAXY_ROOT
	assert_set_env GALAXY_CONFIG_FILE

	assert_count $# 1 "Missing ID"

	python "$GALAXY_ROOT/scripts/secret_decoder_ring.py" -c "$GALAXY_CONFIG_FILE" encode "$1"
}

galaxy_decode() { ## <encoded-id>: Decode an encoded ID
	handle_help "$@" <<-EOF
		Encode an unecoded ID

		    $ GALAXY_CONFIG_FILE=/srv/galaxy/config/galaxy.yml GALAXY_ROOT=/srv/galaxy/server gxadmin galaxy decode 6fe4eea8c591a9c4
		    123
	EOF

	assert_set_env VIRTUAL_ENV
	assert_set_env GALAXY_ROOT
	assert_set_env GALAXY_CONFIG_FILE

	assert_count $# 1 "Missing ID"

	python "$GALAXY_ROOT/scripts/secret_decoder_ring.py" -c "$GALAXY_CONFIG_FILE" decode "$1"
}

galaxy_migrate-tool-install-to-sqlite() { ## : Converts normal potsgres toolshed repository tables into the SQLite version
	handle_help "$@" <<-EOF
		    $ gxadmin migrate-tool-install-to-sqlite
		    Creating new sqlite database: galaxy_install.sqlite
		    Migrating tables
		      export: tool_shed_repository
		      import: tool_shed_repository
		      ...
		      export: repository_repository_dependency_association
		      import: repository_repository_dependency_association
		    Complete
	EOF

	# Export tables
	if [[ -f  galaxy_install.sqlite ]]; then
		error "galaxy_install.sqlite exists, not overwriting"
		exit 1
	fi

	success "Creating new sqlite database: galaxy_install.sqlite"
	empty_schema=$(mktemp)
	echo "
	PRAGMA foreign_keys=OFF;
	BEGIN TRANSACTION;
	CREATE TABLE migrate_version (
		repository_id VARCHAR(250) NOT NULL,
		repository_path TEXT,
		version INTEGER,
		PRIMARY KEY (repository_id)
	);
	CREATE TABLE tool_shed_repository (
		id INTEGER NOT NULL,
		create_time DATETIME,
		update_time DATETIME,
		tool_shed VARCHAR(255),
		name VARCHAR(255),
		description TEXT,
		owner VARCHAR(255),
		changeset_revision VARCHAR(255),
		deleted BOOLEAN,
		metadata BLOB,
		includes_datatypes BOOLEAN,
		installed_changeset_revision VARCHAR(255),
		uninstalled BOOLEAN,
		dist_to_shed BOOLEAN,
		ctx_rev VARCHAR(10),
		status VARCHAR(255),
		error_message TEXT,
		tool_shed_status BLOB,
		PRIMARY KEY (id),
		CHECK (deleted IN (0, 1))
	);
	CREATE TABLE tool_version (
		id INTEGER NOT NULL,
		create_time DATETIME,
		update_time DATETIME,
		tool_id VARCHAR(255),
		tool_shed_repository_id INTEGER,
		PRIMARY KEY (id),
		FOREIGN KEY(tool_shed_repository_id) REFERENCES tool_shed_repository (id)
	);
	CREATE TABLE tool_version_association (
		id INTEGER NOT NULL,
		tool_id INTEGER NOT NULL,
		parent_id INTEGER NOT NULL,
		PRIMARY KEY (id),
		FOREIGN KEY(tool_id) REFERENCES tool_version (id),
		FOREIGN KEY(parent_id) REFERENCES tool_version (id)
	);
	CREATE TABLE migrate_tools (
		repository_id VARCHAR(255),
		repository_path TEXT,
		version INTEGER
	);
	CREATE TABLE tool_dependency (
		id INTEGER NOT NULL,
		create_time DATETIME,
		update_time DATETIME,
		tool_shed_repository_id INTEGER NOT NULL,
		name VARCHAR(255),
		version VARCHAR(40),
		type VARCHAR(40),
		status VARCHAR(255),
		error_message TEXT,
		PRIMARY KEY (id),
		FOREIGN KEY(tool_shed_repository_id) REFERENCES tool_shed_repository (id)
	);
	CREATE TABLE repository_dependency (
		id INTEGER NOT NULL,
		create_time DATETIME,
		update_time DATETIME,
		tool_shed_repository_id INTEGER NOT NULL,
		PRIMARY KEY (id),
		FOREIGN KEY(tool_shed_repository_id) REFERENCES tool_shed_repository (id)
	);
	CREATE TABLE repository_repository_dependency_association (
		id INTEGER NOT NULL,
		create_time DATETIME,
		update_time DATETIME,
		tool_shed_repository_id INTEGER,
		repository_dependency_id INTEGER,
		PRIMARY KEY (id),
		FOREIGN KEY(tool_shed_repository_id) REFERENCES tool_shed_repository (id),
		FOREIGN KEY(repository_dependency_id) REFERENCES repository_dependency (id)
	);
	CREATE INDEX ix_tool_shed_repository_name ON tool_shed_repository (name);
	CREATE INDEX ix_tool_shed_repository_deleted ON tool_shed_repository (deleted);
	CREATE INDEX ix_tool_shed_repository_tool_shed ON tool_shed_repository (tool_shed);
	CREATE INDEX ix_tool_shed_repository_changeset_revision ON tool_shed_repository (changeset_revision);
	CREATE INDEX ix_tool_shed_repository_owner ON tool_shed_repository (owner);
	CREATE INDEX ix_tool_shed_repository_includes_datatypes ON tool_shed_repository (includes_datatypes);
	CREATE INDEX ix_tool_version_tool_shed_repository_id ON tool_version (tool_shed_repository_id);
	CREATE INDEX ix_tool_version_association_tool_id ON tool_version_association (tool_id);
	CREATE INDEX ix_tool_version_association_parent_id ON tool_version_association (parent_id);
	CREATE INDEX ix_tool_dependency_tool_shed_repository_id ON tool_dependency (tool_shed_repository_id);
	CREATE INDEX ix_repository_dependency_tool_shed_repository_id ON repository_dependency (tool_shed_repository_id);
	CREATE INDEX ix_repository_repository_dependency_association_tool_shed_repository_id ON repository_repository_dependency_association (tool_shed_repository_id);
	CREATE INDEX ix_repository_repository_dependency_association_repository_dependency_id ON repository_repository_dependency_association (repository_dependency_id);
	COMMIT;
	" > "${empty_schema}"
	sqlite3 galaxy_install.sqlite < "${empty_schema}"
	rm "${empty_schema}"

	success "Migrating tables"

	# tool_shed_repository is special :(
	table=tool_shed_repository
	success "  export: ${table}"
	export_csv=$(mktemp /tmp/tmp.gxadmin.${table}.XXXXXXXXXXX)
	psql -c "COPY (select
		id, create_time, update_time, tool_shed, name, description, owner, changeset_revision, case when deleted then 1 else 0 end, metadata, includes_datatypes, installed_changeset_revision, uninstalled, dist_to_shed, ctx_rev, status, error_message, tool_shed_status from $table) to STDOUT with CSV" > "$export_csv";

	success "  import: ${table}"
	echo ".mode csv
.import ${export_csv} ${table}" | sqlite3 galaxy_install.sqlite
	ec=$?
	if (( ec == 0 )); then
		rm "${export_csv}";
	else
		error "  sql: ${export_csv}"
	fi

	sqlite3 galaxy_install.sqlite "insert into migrate_version values ('ToolShedInstall', 'lib/galaxy/model/tool_shed_install/migrate', 17)"
	# the rest are sane!
	for table in {tool_version,tool_version_association,migrate_tools,tool_dependency,repository_dependency,repository_repository_dependency_association}; do
		success "  export: ${table}"
		export_csv=$(mktemp /tmp/tmp.gxadmin.${table}.XXXXXXXXXXX)
		psql -c "COPY (select * from $table) to STDOUT with CSV" > "$export_csv";

		success "  import: ${table}"
		echo ".mode csv
.import ${export_csv} ${table}" | sqlite3 galaxy_install.sqlite
		ec=$?
		if (( ec == 0 )); then
			rm "${export_csv}"
		else
			error "  sql: ${export_csv}"
		fi
	done

	success "Complete"
}

galaxy_migrate-tool-install-from-sqlite() { ## [sqlite-db]: Converts SQLite version into normal potsgres toolshed repository tables
	handle_help "$@" <<-EOF
		    $ gxadmin migrate-tool-install-from-sqlite db.sqlite
		    Migrating tables
		      export: tool_shed_repository
		      import: tool_shed_repository
		      ...
		      export: repository_repository_dependency_association
		      import: repository_repository_dependency_association
		    Complete
	EOF
	assert_count $# 1 "Must provide database"

	success "Migrating tables"

	# Truncate first, since need to be removed in a specific ordering (probably
	# cascade would work but cascade is SCARY)
	psql -c "TRUNCATE TABLE repository_repository_dependency_association, repository_dependency, tool_dependency, migrate_tools, tool_version_association, tool_version, tool_shed_repository"
	ec1=$?

	# If you truncate this one, then it also removes galaxy codebase version,
	# breaking everything.
	psql -c "delete from migrate_version where repository_id = 'ToolShedInstall'"
	ec2=$?

	if (( ec1 == 0  && ec2 == 0 )); then
		success "  Cleaned"
	else
		error "  Failed to clean"
	fi

	# Then load data in same 'safe' order as in sqlite version
	for table in {migrate_version,tool_shed_repository,tool_version,tool_version_association,migrate_tools,tool_dependency,repository_dependency,repository_repository_dependency_association}; do
		success "  export: ${table}"
		export_csv=$(mktemp /tmp/tmp.gxadmin.${table}.XXXXXXXXXXX)

		if [[ "$table" == "tool_shed_repository" ]]; then
			# Might have json instead of hexencoded json
			sqlite3 -csv "$1" "select * from $table" | python -c "$hexencodefield9" > "$export_csv";
		elif [[ "$table" == "tool_version" ]]; then
			# Might have null values quoted as empty string
			sqlite3 -csv "$1" "select * from $table" | sed 's/""$//' > "$export_csv";
		else
			sqlite3 -csv "$1" "select * from $table" > "$export_csv";
		fi

		psql -c "COPY $table FROM STDIN with CSV" < "$export_csv";
		ec=$?

		if (( ec == 0 )); then
			rm "${export_csv}"
			success "  import: ${table}"
		else
			error "  csv: ${export_csv}"
			break
		fi
	done

	# Update sequences
	success "Updating sequences"
	for table in {tool_shed_repository,tool_version,tool_version_association,tool_dependency,repository_dependency,repository_repository_dependency_association}; do
		psql -c "SELECT setval('${table}_id_seq', (SELECT MAX(id) FROM ${table}));"
	done

	success "Comparing table counts"

	for table in {migrate_version,tool_shed_repository,tool_version,tool_version_association,migrate_tools,tool_dependency,repository_dependency,repository_repository_dependency_association}; do
		postgres=$(psql -c "COPY (select count(*) from $table) to STDOUT with CSV")
		sqlite=$(sqlite3 -csv "$1" "select count(*) from $table")

		if (( postgres == sqlite )); then
			success "  $table: $postgres == $sqlite"
		else
			error "  $table: $postgres != $sqlite"
		fi
	done

	success "Complete"
}

galaxy_amqp-test() { ## <amqp_url>: Test a given AMQP URL for connectivity
	handle_help "$@" <<-EOF
		**Note**: must be run in Galaxy Virtualenv

		Simple script to test an AMQP URL. If connection works, it will
		immediately exit with a python object:

		    $ gxadmin galaxy amqp-test pyamqp://user:pass@host:port/vhost
		    <kombu.transport.pyamqp.Channel object at 0x7fe56a836290>

		    $ gxadmin galaxy amqp-test pyamqp://user:pass@host:port/vhost?ssl=1
		    <kombu.transport.pyamqp.Channel object at 0x7fe56a836290>

		Some errors look weird:

		*wrong password*:

		    $ gxadmin galaxy amqp-test ...
		    Traceback
		    ...
		    amqp.exceptions.AccessRefused: (0, 0): (403) ACCESS_REFUSED - Login was refused using authentication mechanism AMQPLAIN. For details see the broker logfile.

		*wrong host*, *inaccessible host*, basically any other problem:

		    $ gxadmin galaxy amqp-test ...
		    [hangs forever]

		Basically any error results in a hang forever. It is recommended you run it with a timeout:

		    $ timeout 1 gxadmin galaxy amqp-test
		    $

	EOF
	assert_count $# 1 "Must provide URL"

	URL="$1"

	script=$(cat <<EOF
from kombu import Connection
from kombu import Exchange

with Connection('$URL') as conn:
	print(conn.default_channel)
EOF
)

	python -c "$script"
}


galaxy_cleanup-jwd() { ## <working_dir> [1|months ago]: (NEW) Cleanup job working directories
	handle_help "$@" <<-EOF
		Scans through a provided job working directory subfolder, e.g.
		job_working_directory/ without the 005 subdir to find all folders which
		were changed less recently than N months.

		 Then it takes the first 1000 entries and cleans them up. This was more
		of a hack to handle the fact that the list produced by find is really
		long, and the for loop hangs until it's done generating the list.
	EOF

	assert_count_ge $# 1 "Must supply at least working dir"

	jwd=$1
	months=${2:-1}

	# scan a given directory for jwds.
	for possible_dir in $(find "$jwd" -maxdepth 3 -mindepth 3  -not -newermt "$months month ago" | grep -v _cleared_contents | head -n 1000); do
			job_id=$(basename "$possible_dir")
			if [[ "$job_id" =~ ^[0-9]{3,}$ ]]; then
					state=$(psql -c "COPY (select state from job where id = $job_id) to STDOUT with CSV")
					if [[ "$state" == "error" ]] || [[ "$state" == "ok" ]] || [[ "$state" == "deleted" ]] || [[ "$state" == "paused" ]] || [[ "$state" == "new_manually_dropped" ]]; then
							echo "- $possible_dir $job_id $state"
							rm -rf "$possible_dir"
					else
							echo "? $possible_dir $job_id $state"
					fi
			fi
	done
}

galaxy_fix-conda-env() { ## <conda_dir/envs/>: Fix broken conda environments
	handle_help "$@" <<-EOF
		Fixes any broken conda environments which are missing the activate scripts in their correct locations.

		MUST end in /envs/
	EOF

	conda_dir=$(dirname $1)

	for i in $(find ${conda_dir}/envs/ -mindepth 1 -maxdepth 1 -type d);
	do
		if [ ! -f $i/bin/conda ]; then
			${conda_dir}/bin/conda ..checkenv bash $i
		fi
	done
}

galaxy_fav_tools() { ## : Favourite tools in Galaxy DB
	handle_help "$@" <<-EOF
		What are people's fav tools
	EOF

	psql -qAt -c "select value::json->'tools' from user_preference where name = 'favorites'" | jq -s add | jq '. | @tsv' -r | sed 's/\t/\n/g' | sort | uniq -c | sort -n
}

galaxy_ie-list() { ## : List GIEs
	handle_help "$@" <<-EOF
		List running IEs (based on output of queue-detail)

		    $ gxadmin local ie-list
		    running  6134209  jupyter_notebook  helena-rasche

	EOF

	gxadmin query queue-detail  | grep interactive | awk -F'|' '{print $1" "$2" "$4" "$5}' | sed 's/interactive_tool_//g' | column -t
}

galaxy_ie-show() { ## [gie-galaxy-job-id]: Report on a GIE [HTCondor Only!]
	handle_help "$@" <<-EOF
		The new versions of IEs are IMPOSSIBLE to track down, so here's a handy
		function for you to make you hate life a lil bit less.


		    $ gxadmin local ie-show 6134209
		    Galaxy ID: 6134209
		    Condor ID: 1489026
		    Container: jupyter_notebook
		    Running on: cn032.bi.uni-freiburg.de
		    Job script: /data/dnb01/galaxy_db/pbs/galaxy_6134209.sh
		    Working dir: /data/dnb02/galaxy_db/job_working_directory/006/134/6134209
		    Runtime:
		    {
		      8888: {
		        host: 132.230.68.65,
		        port: 1035,
		        protocol: tcp,
		        tool_port: 8888
		      }
		    }

		    Mapping
		    Key: aaaabbbbccccdddd
		    KeyType: interactivetoolentrypoint
		    Token: 00001111222233334444555566667777
		    URL: https://aaaabbbbccccdddd-00001111222233334444555566667777.interactivetoolentrypoint.interactivetool.usegalaxy.eu

		It's probably only going to work for us? Sorry :)

	EOF

	assert_set_env GALAXY_CONFIG_FILE

	id=$1

	cluster_id=$(gxadmin jsonquery queue-detail | jq ".[] | select(.id == $1) | .extid" -r)
	cluster_id_c=$(echo -n "$cluster_id" | wc -c)
	echo "Galaxy ID: $id"
	echo "Condor ID: $cluster_id"
	if (( cluster_id_c  == 0 )); then
		exit 1
	fi

	container=$(condor_q $cluster_id -autoformat JobDescription | sed 's/interactive_tool_//g')
	echo "Container: $container"
	running_on=$(condor_q $cluster_id -autoformat RemoteHost | sed 's/.*@//g')
	echo "Running on: $running_on"

	job_script=$(condor_q $cluster_id -autoformat Cmd)
	echo "Job script: $job_script"

	working_dir=$(grep ^cd $job_script | grep working_directory | sed 's/^cd //g')
	echo "Working dir: $working_dir"

	echo "Runtime:"
	cat $working_dir/container_runtime.json  | jq -S

	port=$(cat $working_dir/container_runtime.json  | jq .[].port -r)
	host=$(cat $working_dir/container_runtime.json  | jq .[].host -r)

	echo
	echo "Mapping"

	token=$(gxadmin tsvquery q "select token from interactivetool_entry_point where job_id = $id")

	data_dir=$(cat "$GALAXY_CONFIG_FILE" | grep interactivetools_map |cut -f2 -d'='| tr -d " ")

	key=$(sqlite3 $data_dir 'select key from gxitproxy where token="'$token'"')
	key_type=$(sqlite3 $data_dir 'select key_type from gxitproxy where token="'$token'"')
	token=$(sqlite3 $data_dir 'select token from gxitproxy where token="'$token'"')

	echo "Key: $key"
	echo "KeyType: $key_type"
	echo "Token: $token"

	echo "URL: https://${key}-${token}.${key_type}.interactivetool.usegalaxy.eu"
}

registered_subcommands="$registered_subcommands report"
_report_short_help="Various (rich) reports. Consider https://github.com/ttscoff/mdless for easier reading in the terminal! Or | pandoc | lynx -stdin"


align_cols() {
	#cat | sed 's/\t/\t | /g' | column -t -s'	'
	cat | sed 's/\t/ | /g'
}


report_user-info(){ ## <user_id|username|email>: Quick overview of a Galaxy user in your system
	handle_help "$@" <<-EOF
		This command lets you quickly find out information about a user. The output is formatted as markdown by default.

		Consider [mdless](https://github.com/ttscoff/mdless) for easier reading in the terminal!

		    $ gxadmin report user-info helena-rasche
		    # Galaxy User 580

		      Property | Value
		    ---------- | -----
		            ID | helena-rasche (id=580) hxr@informatik.uni-freiburg.de
		       Created | 2017-07-26 14:47:37.575484
		    Properties | ext=f deleted=f purged=f active=t
		    Disk Usage | 137 GB

		    ## Groups/Roles

		    Groups: training-freiburg-rnaseq-2018, training-emc2018
		    Roles: admin, Backofen

		    ## Recent Jobs

		    Tool ID                      | Status | Created                    | Exit Code | Runtime
		    ----                         | ----   | ----                       | ---       | ----
		    Grep1                        | ok     | 2019-01-21 07:27:24.472706 | 0         | 00:01:19
		    CONVERTER_fasta_to_tabular   | ok     | 2019-01-21 07:27:24.339862 | 0         | 00:03:34
		    secure_hash_message_digest   | ok     | 2019-01-18 16:43:44.262265 | 0         | 00:00:08
		    CONVERTER_gz_to_uncompressed | ok     | 2019-01-18 10:18:23.99171  | 0         | 00:10:02
		    upload1                      | ok     | 2019-01-18 08:44:24.955622 | 0         | 01:11:07
		    echo_main_env                | ok     | 2019-01-17 16:45:04.019233 | 0         | 00:00:29
		    secure_hash_message_digest   | ok     | 2019-01-17 16:03:21.33665  | 0         | 00:00:07
		    secure_hash_message_digest   | ok     | 2019-01-17 16:03:20.937433 | 0         | 00:00:09

		    ## Largest Histories

		    History ID | Name                         | Size
		    ----       | ----                         | ----
		    20467      | imported: RNASEQ             | 52 GB
		    94748      | imported: ChIP-seq           | 49 GB
		    94761      | reduced history-export problem |   49 GB
		    102448     | Wheat Genome                 | 42 GB
		    38923      | imported: Zooplankton        | 29 GB
		    64974      | imported: 65991-A            | 17 GB
		    20488      | Unnamed history              | 15 GB
		    19414      | Unnamed history              | 12 GB
		    92407      | Testing                      | 11 GB
		    60522      | example1/wf3-shed-tools.ga   | 5923 MB
	EOF

	# Metada
	read -r -d '' qstr <<-EOF
		SELECT
			username, id, email, create_time AT TIME ZONE 'UTC' as create_time, external, deleted, purged, active, pg_size_pretty(disk_usage)
		FROM
			galaxy_user
		WHERE
			(galaxy_user.email = '$1' or galaxy_user.username = '$1' or galaxy_user.id = CAST(REGEXP_REPLACE(COALESCE('$1','0'), '[^0-9]+', '0', 'g') AS INTEGER))
	EOF
	results=$(query_tsv "$qstr")
	user_id=$(echo "$results" | awk '{print $2}')

	if [[ -z "$user_id" ]]; then
		error "Unknown user"
		exit 1
	fi

	# Groups
	read -r -d '' qstr <<-EOF
		select string_agg(galaxy_group.name, ', ') from user_group_association, galaxy_group where user_id = $user_id and user_group_association.group_id = galaxy_group.id
	EOF
	group_membership=$(query_tsv "$qstr")

	read -r -d '' qstr <<-EOF
		select string_agg(role.name, ', ') from user_role_association, role where user_id = $user_id and user_role_association.role_id = role.id and role.type not in ('private', 'sharing')
	EOF
	role_membership=$(query_tsv "$qstr")

	# Recent jobs
	read -r -d '' qstr <<-EOF
		SELECT
			job.id, tool_id, state, create_time AT TIME ZONE 'UTC' as create_time, exit_code, metric_value::text::interval
		FROM
			job, job_metric_numeric
		WHERE
			job.user_id = $user_id and job.id = job_metric_numeric.job_id and metric_name = 'runtime_seconds' order by job.id desc limit 10
	EOF
	recent_jobs=$(query_tsv "$qstr")
	recent_jobs2=$(printf "ID\tTool ID\tStatus\tCreated\tExit Code\tRuntime\n----\t----\t----\t----\t---\t----\n%s" "$recent_jobs" | align_cols)

	# running jobs
	read -r -d '' qstr <<-EOF
		SELECT
			id, tool_id, tool_version, handler, destination_id, state, create_time AT TIME ZONE 'UTC' as create_time, now() AT TIME ZONE 'UTC' - create_time  AT TIME ZONE 'UTC' as runtime
		FROM
			job
		WHERE
			job.user_id = $user_id and job.state in ('running', 'queued', 'new') order by job.id desc
	EOF
	running_jobs=$(query_tsv "$qstr")
	running_jobs2=$(printf "Tool ID\tTool Version\tHandler\tDestination\tState\tCreated\tRuntime\n----\t----\t----\t----\t----\t---\t----\n%s" "$running_jobs" | align_cols)

	# Recent workflows
	read -r -d '' qstr <<-EOF
		SELECT
			workflow_invocation.id,
			workflow_invocation.create_time,
			state,
			scheduler,
			handler,
			substring(workflow.name, 0, 20) AS workflow,
			substring(history.name, 0, 20) AS history
		FROM
			workflow_invocation
			JOIN workflow ON workflow_invocation.workflow_id = workflow.id
			JOIN history ON workflow_invocation.history_id = history.id
		WHERE
			history.user_id = $user_id
		ORDER BY
			create_time DESC
		LIMIT
			25
	EOF
	recent_wf=$(query_tsv "$qstr")
	recent_wf=$(printf "ID\tCreated\tState\tScheduler\tHandler\tWorkflow\tHistory\n----\t----\t----\t----\t----\t----\t----\n%s" "$recent_wf" | align_cols)

	# Largest Histories
	read -r -d '' qstr <<-EOF
		SELECT
			history.id, history.name, pg_size_pretty(sum(COALESCE(dataset.total_size, 0))) AS size, history.deleted, history.purged
		FROM
			history
			LEFT JOIN history_dataset_association ON history.id = history_dataset_association.history_id
			LEFT JOIN dataset ON history_dataset_association.dataset_id = dataset.id
		WHERE
			history.user_id = $user_id
		GROUP BY
			history.id, history.name
		ORDER BY
			sum(COALESCE(dataset.total_size, 0)) DESC
		LIMIT
			10
	EOF
	largest_histories=$(query_tsv "$qstr")
	largest_histories=$(printf "History ID\tName\tSize\tDeleted\tPurged\n----\t----\t----\t----\t----\n%s" "$largest_histories" | align_cols)

	read -r -d '' template <<EOF
# Galaxy User $user_id

  Property | Value
---------- | -----
        ID | %s (id=%s) %s
   Created | %s %s
Properties | ext=%s deleted=%s purged=%s active=%s
Disk Usage | %s %s

## Groups/Roles

Groups: %s
Roles: %s

## Recent Jobs

%s

## Running Jobs

%s

## Recent Workflow Invocations

%s

## Largest Histories

%s
\n
EOF
	# shellcheck disable=SC2086
	# shellcheck disable=SC2059
	printf "$template" $results "$group_membership" "$role_membership" "$recent_jobs2" "$running_jobs2" "$recent_wf" "$largest_histories"
}

report_job-info(){ ## <id>: Information about a specific job
	handle_help "$@" <<-EOF
		    $ gxadmin report job-info 1
		     tool_id | state | username |        create_time         | job_runner_name | job_runner_external_id
		    ---------+-------+----------+----------------------------+-----------------+------------------------
		     upload1 | ok    | admin    | 2012-12-06 16:34:27.492711 | local:///       | 9347
	EOF

	assert_count $# 1 "Missing Job ID"
	username=$(gdpr_safe galaxy_user.username username)
	job_id=$1

	###       ###
	#  JOB INFO #
	###       ###

	read -r -d '' qstr <<-EOF
		SELECT
			job.tool_id,
			job.state,
			job.handler,
			job.create_time AT TIME ZONE 'UTC' as create_time,
			COALESCE(job.job_runner_name, '?'),
			COALESCE(job.job_runner_external_id, '?')
		FROM job
		WHERE job.id = $job_id
	EOF
	results=$(query_tsv "$qstr")
	read -r -d '' template <<EOF
# Galaxy Job $job_id

Property      | Value
------------- | -----
         Tool | %s
        State | %s
      Handler | %s
      Created | %s %s
Job Runner/ID | %s / %s
EOF
	# shellcheck disable=SC2059
	# shellcheck disable=SC2086
	printf "$template"  $results

	###       ###
	# USER INFO #
	###       ###

	read -r -d '' qstr <<-EOF
		SELECT
			$username,
			job.user_id
		FROM job, galaxy_user
		WHERE job.id = $job_id AND job.user_id = galaxy_user.id
	EOF
	job_owner=$(query_tsv "$qstr")
	# shellcheck disable=SC2183
	# shellcheck disable=SC2086
	printf "\n        Owner | %s (id=%s)\n\n" $job_owner


	###         ###
	# DEST PARAMS #
	###         ###
	read -r -d '' qstr <<-EOF
		SELECT convert_from(destination_params::bytea, 'UTF8')
		FROM job
		WHERE job.id = $job_id
	EOF
	results=$(query_tsv_json "$qstr")
	printf "## Destination Parameters\n\n"
	# shellcheck disable=SC2016
	tbl=$(echo "$results" | jq -S '. | to_entries[] | [.key, .value] | @tsv' -r | sed 's/\t/\t`/g;s/$/`/g')
	# shellcheck disable=SC2059
	printf "Key\tValue\n---\t---\n$tbl" | align_cols

	###          ###
	# DEPENDENCIES #
	###          ###
	read -r -d '' qstr <<-EOF
		SELECT convert_from(dependencies::bytea, 'UTF8')
		FROM job
		WHERE job.id = $job_id
	EOF
	results=$(query_tsv_json "$qstr")
	printf "\n## Dependencies\n\n"
	tbl=$(echo "$results" | jq -S '.[] | [.name, .version, .dependency_type, .cacheable, .exact, .environment_path, .model_class] | @tsv' -r)
	# shellcheck disable=SC2059
	printf "Name\tVersion\tDependency Type\tCacheable\tExact\tEnvironment Path\tModel Class\n----\t-------\t---------------\t---------\t-----\t----------------\t-----------\n$tbl\n" | align_cols

	###        ###
	# JOB PARAMS #
	###        ###
	read -r -d '' qstr <<-EOF
		SELECT j.name, j.value
		FROM job_parameter j
		WHERE j.job_id = $job_id
	EOF
	results=$(query_tsv "$qstr")
	printf "\n## Tool Parameters\n\n"
	# shellcheck disable=SC2059
	printf "Name\tSettings\n-----\t------------\n$results\n\n" | sed 's/[\"\`]//g' | align_cols

	###      ###
	#  INPUTS  #
	###      ###
	read -r -d '' qstr <<-EOF
	SELECT
		jtod.job_id,
	    hda.name,
	    hda.extension,
	    hda.id,
	    hda.state,
	    hda.deleted,
	    hda.purged,
	    ds.id,
	    ds.state,
	    ds.deleted,
	    ds.purged,
	    pg_size_pretty(ds.total_size)
	FROM
	    job_to_input_dataset AS jtid,
		history_dataset_association AS hda,
		dataset AS ds,
		job_to_output_dataset as jtod
	WHERE
	    jtid.job_id = $1
		AND jtid.dataset_id = hda.id
		AND hda.dataset_id = ds.id
		AND jtod.dataset_id = ds.id
	EOF
	input_ds=$(query_tsv "$qstr")
	input_ds_tbl=$(printf "\nJob ID\tName\tExtension\thda-id\thda-state\thda-deleted\thda-purged\tds-id\tds-state\tds-deleted\tds-purged\tSize\n----\t----\t----\t----\t----\t----\t----\t----\t----\t----\t----\t----\n%s" "$input_ds" | align_cols)

	###       ###
	#  OUTPUTS  #
	###       ###
	read -r -d '' qstr <<-EOF
	SELECT
	    hda.name,
	    hda.extension,
	    hda.id,
	    hda.state,
	    hda.deleted,
	    hda.purged,
	    ds.id,
	    ds.state,
	    ds.deleted,
	    ds.purged,
	    pg_size_pretty(ds.total_size)
	FROM
	    job_to_output_dataset AS jtod,
		history_dataset_association AS hda,
		dataset AS ds
	WHERE
	    jtod.job_id = $1
		AND jtod.dataset_id = hda.id
		AND hda.dataset_id = ds.id
	EOF
	output_ds=$(query_tsv "$qstr")
	output_ds_tbl=$(printf "Name\tExtension\thda-id\thda-state\thda-deleted\thda-purged\tds-id\tds-state\tds-deleted\tds-purged\tSize\n----\t----\t----\t----\t----\t----\t----\t----\t----\t----\t----\n%s" "$output_ds" |  align_cols)


	read -r -d '' template3 <<EOF

## Inputs

%s

## Outputs

%s

\n
EOF
	# shellcheck disable=SC2059
	printf "$template3" "$input_ds_tbl" "$output_ds_tbl"
}


report_assigned-to-handler(){ ## <handler>: Report what items are assigned to a handler currently.
	handle_help "$@" <<-EOF
	EOF

	assert_count $# 1 "Missing Handler ID"
	printf "# Handler %s\n\n" "$1"

	# Jerbs
	printf "## Jobs\n\n"
	read -r -d '' qstr <<-EOF
		SELECT
			id, create_time, tool_id, state, object_store_id, user_id
		FROM job
		WHERE handler = '$1' and state in ('new', 'queued', 'running')
		ORDER BY create_time DESC
	EOF
	output_ds=$(query_tsv "$qstr")
	printf "ID\tCreate Time\tTool ID\tState\tObject Store\tUser ID\n----\t----\t----\t----\t----\t----\n%s" "$output_ds" | align_cols


	# Workflows
	printf "## Workflows\n\n"
	read -r -d '' qstr <<-EOF
		SELECT
			id, create_time, workflow_id, history_id, state, scheduler, uuid
		FROM workflow_invocation
		WHERE handler = '$1' and state = 'new'
	EOF
	output_ds=$(query_tsv "$qstr")
	printf "ID\tCreate Time\tWorkflow ID\tHistory ID\tState\tScheduler\tUUID\n----\t----\t----\t----\t----\t----\t----\n%s" "$output_ds" | align_cols
}
# Some unlisted functions that are probably not useful as things people might
# want to query (and thus have cluttering their query menu), but should be
# shared because that's the purpose of gxadmin, sharing all of these weird
# queries that we have :)

query_memory-and-cpu-on-same-node() {
	handle_help "$@" <<-EOF
		Helena needed to check the reported values of memory/cpu over a series
		of jobs on a specific host, to debug what went wrong with the cgroup
		metrics.

		TODO(hxr): find some way to note "unlisted" functions and let people do the help for them?
	EOF

	assert_count $# 1 "Missing host name"
	host="$1"

	read -r -d '' QUERY <<-EOF
		SELECT
			jmn_a.metric_value AS "memory.memsw.max_usage_in_bytes",
			jmn_b.metric_value AS "cpuacct.usage",
			job.update_time
		FROM
			job_metric_numeric AS jmn_a, job_metric_numeric AS jmn_b, job
		WHERE
			jmn_a.job_id
			IN (
					SELECT
						job_id
					FROM
						job_metric_text
					WHERE
						metric_value = '$host'
					ORDER BY
						job_id DESC
					LIMIT
						200
				)
			AND jmn_a.metric_name = 'memory.memsw.max_usage_in_bytes'
			AND jmn_b.job_id
				IN (
						SELECT
							job_id
						FROM
							job_metric_text
						WHERE
							metric_value = '$host'
						ORDER BY
							job_id DESC
						LIMIT
							200
					)
			AND jmn_b.metric_name = 'cpuacct.usage'
			AND jmn_a.job_id = job.id
			AND jmn_b.job_id = job.id
		ORDER BY
			job.create_time DESC
	EOF
}

registered_subcommands="$registered_subcommands cluster"
_cluster_short_help="WIP"


cluster_slurm-stats() { ## : Collect stats on slurm
	handle_help "$@" <<-EOF
	EOF

	error "Not yet implemented"
}
registered_subcommands="$registered_subcommands server"
_server_short_help="Various overall statistics"
_server_long_help="
	'query' can be exchanged with 'tsvquery' or 'csvquery' for tab- and comma-separated variants.
	In some cases 'iquery' is supported for InfluxDB compatible output.
	In all cases 'explainquery' will show you the query plan, in case you need to optimise or index data. 'explainjsonquery' is useful with PEV: http://tatiyants.com/pev/
"


server_users() { ## : Count of different classifications of users
	handle_help "$@" <<-EOF
	EOF

	op="="
	if (( $# > 1 )); then
		op="$2"
	fi

	date_filter=""
	if (( $# > 0 )); then
		date_filter="WHERE date_trunc('day', create_time AT TIME ZONE 'UTC') $op '$1'::date"
	fi

	fields="count=4"
	tags="active=0;external=1;deleted=2;purged=3"

	read -r -d '' QUERY <<-EOF
		SELECT
			active, external, deleted, purged, count(*) as count
		FROM
			galaxy_user
		$date_filter
		GROUP BY
			active, external, deleted, purged
	EOF
}

server_groups() { ## : Counts of group memberships
	handle_help "$@" <<-EOF
	EOF

	op="="
	if (( $# > 1 )); then
		op="$2"
	fi

	date_filter=""
	if (( $# > 0 )); then
		date_filter="AND galaxy_group.create_time AT TIME ZONE 'UTC' <= '$1'::date AND date_trunc('day', user_group_association.create_time AT TIME ZONE 'UTC') $op '$1'::date"
	fi


	fields="count=1"
	tags="name=0"

	read -r -d '' QUERY <<-EOF
		SELECT
			galaxy_group.name, count(*)
		FROM
			galaxy_group, user_group_association
		WHERE
			user_group_association.group_id = galaxy_group.id
			$date_filter
		GROUP BY name
	EOF
}

server_datasets() { ## : Counts of datasets
	handle_help "$@" <<-EOF
	EOF

	op="="
	if (( $# > 1 )); then
		op="$2"
	fi

	date_filter=""
	if (( $# > 0 )); then
		date_filter="WHERE date_trunc('day', create_time AT TIME ZONE 'UTC') $op '$1'::date"
	fi

	fields="count=4;size=5"
	tags="state=0;deleted=1;purged=2;object_store_id=3"

	read -r -d '' QUERY <<-EOF
		SELECT
			COALESCE(state, '__unknown__'),
			deleted,
			purged,
			COALESCE(object_store_id, 'none'),
			count(*),
			sum(coalesce(dataset.total_size, dataset.file_size, 0))
		FROM
			dataset
		$date_filter
		GROUP BY
			state, deleted, purged, object_store_id
	EOF
}

server_hda() { ## : Counts of HDAs
	handle_help "$@" <<-EOF
	EOF

	op="="
	if (( $# > 1 )); then
		op="$2"
	fi

	date_filter=""
	if (( $# > 0 )); then
		date_filter="AND date_trunc('day', history_dataset_association.create_time AT TIME ZONE 'UTC') $op '$1'::date"
	fi

	fields="sum=2;avg=3;min=4;max=5;count=6"
	tags="extension=0;deleted=1"

	read -r -d '' QUERY <<-EOF
		SELECT
			COALESCE(history_dataset_association.extension, '__unknown__'),
			history_dataset_association.deleted,
			COALESCE(sum(dataset.file_size), 0),
			COALESCE(avg(dataset.file_size), 0)::bigint,
			COALESCE(min(dataset.file_size), 0),
			COALESCE(max(dataset.file_size), 0),
			count(*)
		FROM
			history_dataset_association, dataset
		WHERE
			history_dataset_association.dataset_id = dataset.id
			${date_filter}
		GROUP BY
			history_dataset_association.extension, history_dataset_association.deleted
	EOF
}

server_ts-repos() { ## : Counts of TS repos
	handle_help "$@" <<-EOF
	EOF

	op="="
	if (( $# > 1 )); then
		op="$2"
	fi

	date_filter=""
	if (( $# > 0 )); then
		date_filter="WHERE date_trunc('day', create_time AT TIME ZONE 'UTC') $op '$1'::date"
	fi

	fields="count=2"
	tags="tool_shed=0;owner=1"

	read -r -d '' QUERY <<-EOF
		SELECT
			tool_shed, owner, count(*)
		FROM
			tool_shed_repository
		$date_filter
		GROUP BY
			tool_shed, owner
		ORDER BY count desc
	EOF
}

server_histories() { ## : Counts of histories and sharing
	handle_help "$@" <<-EOF
	EOF

	op="="
	if (( $# > 1 )); then
		op="$2"
	fi

	date_filter=""
	if (( $# > 0 )); then
		date_filter="WHERE date_trunc('day', create_time AT TIME ZONE 'UTC') $op '$1'::date"
	fi

	fields="count=6"
	tags="deleted=0;purged=1;published=2;importable=3;importing=4;genome_build=5"

	read -r -d '' QUERY <<-EOF
		SELECT
			deleted, purged, published, importable, importing, COALESCE(genome_build, '__unknown__'), count(*)
		FROM history
		${date_filter}
		GROUP BY
			deleted, purged, published, importable, importing, genome_build
	EOF
}


server_jobs() { ## : Counts of jobs
	handle_help "$@" <<-EOF
	EOF

	op="="
	if (( $# > 1 )); then
		op="$2"
	fi

	date_filter=""
	if (( $# > 0 )); then
		date_filter="WHERE date_trunc('day', create_time AT TIME ZONE 'UTC') $op '$1'::date"
	fi

	fields="count=3"
	tags="state=0;job_runner_name=1;destination_id=2"

	read -r -d '' QUERY <<-EOF
		SELECT
			COALESCE(state, '__unknown__'),
			COALESCE(job_runner_name, '__unknown__'),
			COALESCE(destination_id, '__unknown__'), count(*)
		FROM
			job
		${date_filter}
		GROUP BY
			state, job_runner_name, destination_id
	EOF
}

server_allocated-cpu() { ## : CPU time per job runner
	handle_help "$@" <<-EOF
	EOF

	op="="
	if (( $# > 1 )); then
		op="$2"
	fi

	date_filter=""
	if (( $# > 0 )); then
		date_filter="AND date_trunc('day', job.create_time AT TIME ZONE 'UTC') $op '$1'::date"
	fi

	# TODO: here we select the hostname, don't do that.
	# Hack for EU's test vs main separation, both submitting job stats.
	# Solve by either running IN telegraf or doing it in the meta functions
	fields="cpu_seconds=1"
	tags="job_runner_name=0;host=2"

	read -r -d '' QUERY <<-EOF
		SELECT
			job.job_runner_name,
			round(sum(a.metric_value * b.metric_value), 2) AS cpu_seconds,
			'$HOSTNAME' as host
		FROM
			job_metric_numeric AS a,
			job_metric_numeric AS b,
			job
		WHERE
			b.job_id = a.job_id
			AND a.job_id = job.id
			AND a.metric_name = 'runtime_seconds'
			AND b.metric_name = 'galaxy_slots'
			${date_filter}
		GROUP BY
			job.job_runner_name
	EOF
}

server_allocated-gpu() { ## : GPU time per job runner
	handle_help "$@" <<-EOF
	EOF

	op="="
	if (( $# > 1 )); then
		op="$2"
	fi

	date_filter=""
	if (( $# > 0 )); then
		date_filter="AND date_trunc('day', job.create_time AT TIME ZONE 'UTC') $op '$1'::date"
	fi

	fields="gpu_seconds=1"
	tags="job_runner_name=0"

	read -r -d '' QUERY <<-EOF
		SELECT
			job.job_runner_name,
			round(sum(a.metric_value * length(replace(b.metric_value, ',', ''))), 2) AS gpu_seconds
		FROM
			job_metric_numeric AS a,
			job_metric_text AS b,
			job
		WHERE
			b.job_id = a.job_id
			AND a.job_id = job.id
			AND a.metric_name = 'runtime_seconds'
			AND b.metric_name = 'CUDA_VISIBLE_DEVICES'
			${date_filter}
		GROUP BY
			job.job_runner_name
	EOF
}

server_workflows() { ## : Counts of workflows
	handle_help "$@" <<-EOF
	EOF

	op="="
	if (( $# > 1 )); then
		op="$2"
	fi

	date_filter=""
	if (( $# > 0 )); then
		date_filter="WHERE date_trunc('day', create_time AT TIME ZONE 'UTC') $op '$1'::date"
	fi

	fields="count=3"
	tags="deleted=0;importable=1;published=2"

	read -r -d '' QUERY <<-EOF
		SELECT
			deleted, importable, published, count(*)
		FROM
			stored_workflow
		${date_filter}
		GROUP BY
			deleted, importable, published
	EOF
}

server_workflow-invocations() { ## : Counts of workflow invocations
	handle_help "$@" <<-EOF
	EOF

	op="="
	if (( $# > 1 )); then
		op="$2"
	fi

	date_filter=""
	if (( $# > 0 )); then
		date_filter="WHERE date_trunc('day', create_time AT TIME ZONE 'UTC') $op '$1'::date"
	fi

	fields="count=2"
	tags="scheduler=0;handler=1"

	read -r -d '' QUERY <<-EOF
		SELECT
			COALESCE(scheduler, '__unknown__'),
			COALESCE(handler, '__unknown__'),
			count(*)
		FROM
			workflow_invocation
		${date_filter}
		GROUP BY
			scheduler, handler
	EOF
}

server_groups-disk-usage() { ## [YYYY-MM-DD] [=, <=, >= operators]: Retrieve an approximation of the disk usage for groups
	handle_help "$@" <<-EOF
	EOF

	op="="
	if (( $# > 1 )); then
		op="$2"
	fi

	date_filter=""
	if (( $# > 0 )); then
		date_filter="AND date_trunc('day', dataset.create_time AT TIME ZONE 'UTC') $op '$1'::date"
	fi

	groupname=$(gdpr_safe galaxy_group.name group_name 'Anonymous')

	fields="storage_usage=1"
	tags="group_name=0"

	read -r -d '' QUERY <<-EOF
		SELECT $groupname,
			sum(coalesce(dataset.total_size, dataset.file_size, 0)) as "storage_usage"
		FROM dataset,
			galaxy_group,
			user_group_association,
			history_dataset_association,
			history
		WHERE NOT dataset.purged
			AND dataset.id = history_dataset_association.dataset_id
			AND history_dataset_association.history_id = history.id
			AND history.user_id = user_group_association.user_id
			AND user_group_association.group_id = galaxy_group.id
			$date_filter
		GROUP BY galaxy_group.name
	EOF
}

server_groups-allocated-cpu() { ## [YYYY-MM-DD] [=, <=, >= operators]: Retrieve an approximation of the CPU allocation for groups
	handle_help "$@" <<-EOF
	EOF

	op="="
	if (( $# > 1 )); then
		op="$2"
	fi

	date_filter=""
	if (( $# > 0 )); then
		date_filter="AND date_trunc('day', job.create_time AT TIME ZONE 'UTC') $op '$1'::date"
	fi

	groupname=$(gdpr_safe galaxy_group.name group_name 'Anonymous')

	fields="cpu_seconds=1"
	tags="group_name=0"

	read -r -d '' QUERY <<-EOF
		WITH jobs_info AS (
			SELECT job.user_id,
				round(sum(a.metric_value * b.metric_value), 2) AS cpu_seconds
			FROM job_metric_numeric AS a,
				job_metric_numeric AS b,
				job
			WHERE job.id = a.job_id
				AND job.id = b.job_id
				AND a.metric_name = 'runtime_seconds'
				AND b.metric_name = 'galaxy_slots'
				$date_filter
			GROUP BY job.user_id
		), user_job_info AS (
			SELECT user_id,
				sum(cpu_seconds) AS cpu_seconds
			FROM jobs_info
			GROUP BY user_id
		)
		SELECT $groupname,
			round(sum(user_job_info.cpu_seconds), 2) as cpu_seconds
		FROM user_job_info,
			galaxy_group,
			user_group_association
		WHERE user_job_info.user_id = user_group_association.user_id
			AND user_group_association.group_id = galaxy_group.id
		GROUP BY galaxy_group.name

	EOF
}

server_groups-allocated-gpu() { ## [YYYY-MM-DD] [=, <=, >= operators]: Retrieve an approximation of the GPU allocation for groups
	handle_help "$@" <<-EOF
	EOF

	op="="
	if (( $# > 1 )); then
		op="$2"
	fi

	date_filter=""
	if (( $# > 0 )); then
		date_filter="AND date_trunc('day', job.create_time AT TIME ZONE 'UTC') $op '$1'::date"
	fi

	groupname=$(gdpr_safe galaxy_group.name group_name 'Anonymous')

	fields="gpu_seconds=1"
	tags="group_name=0"

	read -r -d '' QUERY <<-EOF
		WITH jobs_info AS (
			SELECT job.user_id,
				round(sum(a.metric_value * length(replace(b.metric_value, ',', ''))), 2) AS gpu_seconds
			FROM job_metric_numeric AS a,
				job_metric_text AS b,
				job
			WHERE job.id = a.job_id
				AND job.id = b.job_id
				AND a.metric_name = 'runtime_seconds'
				AND b.metric_name = 'CUDA_VISIBLE_DEVICES'
				$date_filter
			GROUP BY job.user_id
		), user_job_info AS (
			SELECT user_id,
				sum(gpu_seconds) AS gpu_seconds
			FROM jobs_info
			GROUP BY user_id
		)
		SELECT $groupname,
			round(sum(user_job_info.gpu_seconds), 2) as gpu_seconds
		FROM user_job_info,
			galaxy_group,
			user_group_association
		WHERE user_job_info.user_id = user_group_association.user_id
			AND user_group_association.group_id = galaxy_group.id
		GROUP BY galaxy_group.name
	EOF
}

server_workflow-trace-archive-metrics() { ## : [Workflow Trace Archive] Export numeric metrics
	handle_help "$@" <<-EOF
		Helper for WTA
	EOF

	read -r -d '' QUERY <<-EOF
		SELECT * FROM job_metric_numeric
	EOF
}

server_workflow-trace-archive-jobs() { ## : [Workflow Trace Archive] Export jobs
	handle_help "$@" <<-EOF
		Helper for WTA
	EOF

	read -r -d '' QUERY <<-EOF
		SELECT
			id,
			create_time,
			update_time,
			tool_id,
			tool_version,
			state,
			runner_name,
			job_runner_name,
			imported,
			object_store_id,
			handler,
			exit_code,
			destination_id,
			copied_from_job_id,
			dynamic_tool_id,
			galaxy_version
		FROM
			job
	EOF
}

server_workflow-trace-archive-workflows() { ## : [Workflow Trace Archive] Export workflows
	handle_help "$@" <<-EOF
		Helper for WTA
	EOF

	read -r -d '' QUERY <<-EOF
		SELECT
			id,
			create_time,
			update_time,
			stored_workflow_id,
			has_cycles,
			has_errors,
			parent_workflow_id,
			uuid
		FROM workflow
	EOF
}

server_workflow-trace-archive-workflow-invocations() { ## : [Workflow Trace Archive] Export workflow invocations
	handle_help "$@" <<-EOF
		Helper for WTA
	EOF

	read -r -d '' QUERY <<-EOF
		SELECT
			id,
			create_time,
			update_time,
			workflow_id,
			state,
			scheduler,
			handler
		FROM workflow_invocation
	EOF
}

server_workflow-trace-archive-workflow-steps() { ## : [Workflow Trace Archive] Export workflow steps
	handle_help "$@" <<-EOF
		Helper for WTA
	EOF

	read -r -d '' QUERY <<-EOF
		SELECT id,
			create_time,
			update_time,
			workflow_id,
			type,
			tool_id,
			tool_version,
			order_index,
			subworkflow_id,
			dynamic_tool_id
		FROM workflow_step
	EOF
}

server_workflow-trace-archive-workflow-invocation-steps() { ## : [Workflow Trace Archive] Export workflow invocation steps
	handle_help "$@" <<-EOF
		Helper for WTA
	EOF

	read -r -d '' QUERY <<-EOF
		SELECT
			id,  create_time,  update_time,  workflow_invocation_id,
			workflow_step_id,  job_id,  state
		FROM workflow_invocation_step
	EOF
}

server_workflow-trace-archive-workflow-connections() { ## : [Workflow Trace Archive] Export workflow connections
	handle_help "$@" <<-EOF
		Helper for WTA
	EOF

	read -r -d '' QUERY <<-EOF
		SELECT
			id, output_step_id, input_step_input_id, output_name, input_subworkflow_step_id
		FROM workflow_step_connection
	EOF
}
server_workflow-trace-archive-workflow-step-input() { ## : [Workflow Trace Archive] Export workflow step-input
	handle_help "$@" <<-EOF
		Helper for WTA
	EOF

	read -r -d '' QUERY <<-EOF
		SELECT
			id, workflow_step_id, name
		FROM workflow_step_input
	EOF
}
registered_subcommands="$registered_subcommands meta"
_meta_short_help="Miscellaneous"


meta_find_slurpable() {
	grep -s -h -o '^server_[a-z-]*' "$0" "$GXADMIN_SITE_SPECIFIC" | grep -v 'workflow-trace-archive' | sort | sed 's/server_//g'
}

meta_update() { ## : Update the script
	handle_help "$@" <<-EOF
	EOF

	tmp=$(mktemp);
	curl https://raw.githubusercontent.com/galaxyproject/gxadmin/main/gxadmin > "$tmp";
	chmod ugo+rx "$tmp";
	mv "$tmp" "$0";
	exit 0;
}

meta_cmdlist() {
	handle_help "$@" <<-EOF
	EOF

	IFS=$'\n'
	# TOC
	for section in $(locate_cmds_nolocal | correct_cmd | awk '{print $1}' | sort -u); do
		# Progress
		echo $section

		# contents
		echo "# $section"            >  "docs/README.${section}.md"
		echo                         >> "docs/README.${section}.md"
		echo "Command | Description" >> "docs/README.${section}.md"
		echo "------- | -----------" >> "docs/README.${section}.md"
		for command in $(locate_cmds_nolocal | correct_cmd | grep "^$section"); do
			cmd_part="$(echo "$command" | sed 's/:.*//g;s/\s*<.*//g;s/\s*\[.*//')"
			desc_part="$(echo "$command" | sed 's/^[^:]*:\s*//g')"
			key_part="$(echo "$cmd_part" | sed 's/ /-/g')"

			if [[ "$command" != *"Deprecated"* ]]; then
				# Main ToC
				echo "[\`${cmd_part}\`](#${key_part}) | $desc_part" >> "docs/README.${section}.md"
			else
				echo "\`${cmd_part}\` | $desc_part" >> "docs/README.${section}.md"
			fi
		done

		for command in $(locate_cmds_nolocal | correct_cmd | grep "^$section"); do
			cmd_part="$(echo "$command" | sed 's/:.*//g;s/\s*<.*//g;s/\s*\[.*//;s/\s*$//')"
			desc_part="$(echo "$command" | sed 's/^[^:]*:\s*//g;s/\s*$//')"
			key_part="$(echo "$cmd_part" | sed 's/ /-/g')"
			url_part="$(echo "$cmd_part" | sed 's/ /_/g')"

			if [[ "$command" != *"Deprecated"* ]]; then
				# Subsec documentation
				echo                          >> "docs/README.${section}.md"
				echo "## $cmd_part"           >> "docs/README.${section}.md"
				echo                          >> "docs/README.${section}.md"
				echo "([*source*](https://github.com/galaxyproject/gxadmin/search?q=${url_part}&type=Code))" >> "docs/README.${section}.md"
				bash -c "$0 $cmd_part --help" >> "docs/README.${section}.md"
			fi
		done
	done
}

meta_cmdlist2() {
	handle_help "$@" <<-EOF
	EOF

	IFS=$'\n'
	# TOC
	for section in $(locate_cmds_nolocal | correct_cmd | awk '{print $1}' | sort -u); do
		for command in $(locate_cmds_nolocal | correct_cmd | grep "^$section"); do
			echo "$command" | sed 's/:.*//g;s/\s*<.*//g;s/\s*\[.*//;s/\s*$//'
		done
	done
}

meta_slurp-current() { ## [--date] [slurp-name [2nd-slurp-name [...]]]: Executes what used to be "Galaxy Slurp"
	handle_help "$@" <<-EOF
		Obtain influx compatible metrics regarding the current state of the
		server. UseGalaxy.EU uses this to display things like "Current user
		count" and "Current dataset size/distribution/etc."

		It is expected that you are passing this straight to telegraf, there is
		no non-influx output supported currently.

		Supplying --date will include the current timestamp at the end of the
		line, making it compatible for "gxadmin meta influx-post" usage

		You can add your own functions which are included in this output, using
		the \$GXADMIN_SITE_SPECIFIC file. They must start with the prefix
		"server_", e.g. "server_mymetric".

		    $ gxadmin meta slurp-current
		    server-allocated-cpu,job_runner_name=condor cpu_years=102.00
		    server-allocated-cpu,job_runner_name=local cpu_years=1346.00
		    server-datasets,deleted=f,object_store_id=,state=error,purged=f count=37,size=29895528
		    server-datasets,deleted=f,object_store_id=,state=ok,purged=f count=72,size=76739510
		    server-datasets,deleted=f,object_store_id=,state=discarded,purged=f count=2,size=0
		    server-hda,deleted=f,extension=gff3 max=2682565,sum=2682565,avg=2682565,min=2682565
		    server-hda,deleted=t,extension=tabular max=468549,sum=597843,avg=22142,min=2
		    server-hda,deleted=f,extension=fastqsanger max=3,sum=3,avg=3,min=3
		    server-hda,deleted=f,extension=tabular max=2819293,sum=3270268,avg=155727,min=3
		    server-hda,deleted=f,extension=png max=800459,sum=7047695,avg=503407,min=142863
		    server-hda,deleted=t,extension=auto max=9571,sum=9571,avg=9571,min=9571
		    server-hda,deleted=t,extension=data max=2,sum=2,avg=2,min=2
		    server-hda,deleted=t,extension=txt max=5023,sum=5039,avg=2520,min=16
		    server-hda,deleted=t,extension=bigwig max=2972569,sum=5857063,avg=1464266,min=0
		    server-hda,deleted=t,extension=tar.gz max=209034,sum=1318690,avg=188384,min=91580
		    server-hda,deleted=f,extension=vcf_bgzip max=3965889,sum=3965889,avg=3965889,min=3965889
		    server-hda,deleted=t,extension=png max=2969711,sum=6584812,avg=1097469,min=183
		    server-hda,deleted=f,extension=txt max=9584828,sum=132124407,avg=4556014,min=0
		    server-hda,deleted=f,extension=bigwig max=14722,sum=17407,avg=8704,min=2685
		    server-hda,deleted=f,extension=tar.gz max=209025,sum=390421,avg=195211,min=181396
		    server-histories,importable=f,deleted=f,genome_build=?,published=f,importing=f,purged=f count=14
		    server-jobs,state=ok,destination_id=slurm_singularity,job_runner_name=local count=26
		    server-jobs,state=error,destination_id=condor_resub,job_runner_name=condor count=1
		    server-jobs,state=deleted,destination_id=local,job_runner_name=local count=1
		    server-jobs,state=error,destination_id=condor,job_runner_name=condor count=8
		    server-jobs,state=ok,destination_id=local,job_runner_name=local count=13
		    server-jobs,state=ok,destination_id=condor,job_runner_name=condor count=2
		    server-jobs,state=error,destination_id=local,job_runner_name=local count=3
		    server-users,active=t,deleted=f,external=f,purged=f count=3
		    server-workflows,deleted=f,importable=f,published=f count=3
	EOF

	append=""
	if [[ $1 == "--date" ]]; then
		append=" "$(date +%s%N)
		shift;
	fi

	specific_slurp=($@)

	# shellcheck disable=SC2013
	for func in $(meta_find_slurpable); do
		# To allow only slurping the one that was requested, if this was done.
		if (( ${#specific_slurp[@]} > 0 )); then
			if [[ ! "${specific_slurp[*]}" =~ "${func}"  ]]; then
				continue
			fi
		fi

		obtain_func "server" "$func"
		$wrapper query_influx "$QUERY" "server-$query_name" "$fields" "$tags" | sed "s/$/$append/"
	done
}

meta_slurp-upto() { ## <yyyy-mm-dd> [slurp-name [2nd-slurp-name [...]]]: Slurps data up to a specific date.
	handle_help "$@" <<-EOF
		Obtain influx compatible metrics regarding the summed state of the
		server up to a specific date. UseGalaxy.EU uses this to display things
		like charts of "how many users were registered as of date X".

		This calls all of the same functions as 'gxadmin meta slurp-current',
		but with date filters for the entries' creation times.
	EOF

	date=$1; shift
	specific_slurp=($@)

	# shellcheck disable=SC2013
	for func in $(meta_find_slurpable); do
		# To allow only slurping the one that was requested, if this was done.
		if (( ${#specific_slurp[@]} > 0 )); then
			if [[ ! "${specific_slurp[*]}" =~ "${func}"  ]]; then
				continue
			fi
		fi

		obtain_func server "$func" "$date" "<="
		$wrapper query_influx "$QUERY" "server-$query_name.upto" "$fields" "$tags" | \
			sed "s/$/ $(date -d "$date" +%s%N)/"
	done
}

meta_slurp-day() { ## <yyyy-mm-dd> [slurp-name [2nd-slurp-name [...]]]: Slurps data on a specific date.
	handle_help "$@" <<-EOF
		Obtain influx compatible metrics regarding the state of the
		server on a specific date. UseGalaxy.EU uses this to display things
		like charts of "how many users were registered as of date X". You can
		backfill data from your server by running a for loop like:

		    #!/bin/bash
		    for i in range {1..365}; do
		        gxadmin meta slurp-day \$(date -d "$i days ago" "+%Y-%m-%d") | <get into influx somehow>
		    done

		It is expected that you are passing this straight to telegraf, there is
		no non-influx output supported currently.

		This calls all of the same functions as 'gxadmin meta slurp-current',
		but with date filters for the entries' creation times.

		You can add your own functions which are included in this output, using
		the \$GXADMIN_SITE_SPECIFIC file. They must start with the prefix
		"server_", e.g. "server_mymetric". They should include a date filter as
		well, or the metrics reported here will be less useful.

		    $ gxadmin meta slurp-day 2019-01-01
		    server-allocated-cpu.daily,job_runner_name=condor cpu_years=102.00
		    server-datasets.daily,deleted=f,object_store_id=,state=error,purged=f count=37,size=29895528
		    server-datasets.daily,deleted=f,object_store_id=,state=ok,purged=f count=72,size=76739510
		    server-datasets.daily,deleted=f,object_store_id=,state=discarded,purged=f count=2,size=0
		    server-hda.daily,deleted=t,extension=data max=2,sum=2,avg=2,min=2
		    server-hda.daily,deleted=t,extension=txt max=5023,sum=5039,avg=2520,min=16
		    server-hda.daily,deleted=f,extension=fastqsanger max=3,sum=3,avg=3,min=3
		    server-hda.daily,deleted=f,extension=tabular max=3,sum=51,avg=3,min=3
		    server-hda.daily,deleted=t,extension=tabular max=468549,sum=552788,avg=21261,min=2
		    server-histories.daily,importable=f,deleted=f,genome_build=?,published=f,importing=f,purged=f count=5
		    server-jobs.daily,state=error,destination_id=condor,job_runner_name=condor count=8
		    server-jobs.daily,state=error,destination_id=condor_resub,job_runner_name=condor count=1
		    server-jobs.daily,state=error,destination_id=condor_a,job_runner_name=condor count=11
		    server-jobs.daily,state=ok,destination_id=condor,job_runner_name=condor count=2
		    server-jobs.daily,state=ok,destination_id=condor_a,job_runner_name=condor count=23
		    server-users.daily,active=t,deleted=f,external=f,purged=f count=1
		    server-workflows.daily,deleted=f,importable=f,published=f count=1

	EOF

	date=$1; shift;
	specific_slurp=($@)

	# shellcheck disable=SC2013
	for func in $(meta_find_slurpable); do
		# To allow only slurping the one that was requested, if this was done.
		if (( ${#specific_slurp[@]} > 0 )); then
			if [[ ! "${specific_slurp[*]}" =~ "${func}"  ]]; then
				continue
			fi
		fi

		obtain_func server "$func" "$date"
		$wrapper query_influx "$QUERY" "server-$query_name.daily" "$fields" "$tags" | \
			sed "s/$/ $(date -d "$date" +%s%N)/"
	done
}

meta_slurp-initial() { ## <yyyy-mm-dd> <yyyy-mm-dd> [slurp-name [2nd-slurp-name [...]]]: Slurps data starting at the first date until the second date.
	handle_help "$@" <<-EOF
		Obtains influx compatible metrics between dates and posts this to Influx.
		This function calls 'gxadmin meta slurp-upto' and 'gxadmin meta slurp-day'.

		It requires a start and end date. Allows to run specific slurp queries.
	EOF

	# Variables
	begindate=$1; shift
	enddate=$1; shift
	specific_slurp=($@)
	specific_slurp_string=""
	if (( ${#specific_slurp[@]} > 0 )); then
		for specificslurppiece in "${specific_slurp[@]}"; do
			if (( ${#specific_slurp_string} == 0 )); then
				specific_slurp_string="$specificslurppiece"
			else
				specific_slurp_string="$specific_slurp_string $specificslurppiece"
			fi
		done
	fi

	tmpfile=/tmp/gxadmin-meta-slurp-initial-$(date +%s)

	# Create temporary file
	echo "" > $tmpfile

	# Validate environment
	assert_set_env INFLUX_DB
	assert_set_env INFLUX_URL
	assert_set_env INFLUX_PASS
	assert_set_env INFLUX_USER

	# Loop for the data
	d=$begindate
	while [ "$d" != $enddate ]; do
		echo "Slurping for $d"
		# Slurp the data
		meta_slurp-upto $d $specific_slurp_string > $tmpfile
		meta_slurp-day $d $specific_slurp_string >> $tmpfile

		# Post to influxdb
		meta_influx-post $INFLUX_DB $tmpfile

		d=$(date -I -d "$d + 1 day")
	done

	rm $tmpfile
}

meta_error() {
	error "$@"
}

meta_warning() {
	warning "$@"
}

meta_success() {
	success "$@"
}


meta_influx-post() { ## <db> <file>: Post contents of file (in influx line protocol) to influx
	handle_help "$@" <<-EOF
		Post data to InfluxDB. Must be [influx line protocol formatted](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/)

		Posting data from a file:

		    $ gxadmin meta influx-post galaxy file.inflx

		Posting data from the output of a command

		    $ gxadmin meta influx-post galaxy <(echo "weather,location=us-midwest temperature=\$RANDOM \$(date +%s%N)")

		Posting data from the output of a gxadmin command

		    $ gxadmin meta influx-post galaxy <(gxadmin meta slurp-current --date)

		**WARNING**

		!> If you are sending a LOT of data points, consider splitting
		!> them. Influx recommends 5-10k lines:
		!>
		!>     $ split --lines=5000 data.iflx PREFIX
		!>     $ for file in PREFIX*; do gxadmin meta influx-post galaxy $file; done
	EOF

	assert_set_env INFLUX_URL
	assert_set_env INFLUX_PASS
	assert_set_env INFLUX_USER

	assert_count_ge $# 2 "Must supply DB and then data file path"
	DB="$1"
	FILE="$2"

	# If the user is reading the output of a command then it'll be a transient
	# FD and might fail this check? Or it has for me. So if proc is in there,
	# don't bother asserting that it exists.
	if [[ "$FILE" != "/proc/"* ]]; then
		assert_file "$FILE"
	fi

	curl --silent -XPOST "${INFLUX_URL}/write?db=${DB}&u=${INFLUX_USER}&p=${INFLUX_PASS}" --data-binary @"${FILE}"
}

meta_influx-query() { ## <db> "<query>": Query an influx DB
	handle_help "$@" <<-EOF
		Query an InfluxDB

		Query percentage of memory used over last hour.

		    $ gxadmin meta influx-query galaxy "select mean(used_percent) from mem where host='X' AND time > now() - 1h group by time(10m)" | \\
		        jq '.results[0].series[0].values[] | @tsv' -r
		    2019-04-26T09:30:00Z    64.83119975586277
		    2019-04-26T09:40:00Z    64.58284600472675
		    2019-04-26T09:50:00Z    64.62714491344244
		    2019-04-26T10:00:00Z    64.62339148181154
		    2019-04-26T10:10:00Z    63.95268353798708
		    2019-04-26T10:20:00Z    64.66849537282599
		    2019-04-26T10:30:00Z    65.06069941790024
	EOF

	assert_set_env INFLUX_URL
	assert_set_env INFLUX_PASS
	assert_set_env INFLUX_USER

	assert_count_ge $# 2 "Must supply DB and then query"
	DB="$1"
	QUERY="$2"

	curl --silent "${INFLUX_URL}/query?db=${DB}&u=${INFLUX_USER}&p=${INFLUX_PASS}" --data-urlencode "q=${QUERY}"
}

meta_iquery-grt-export() { ## : Export data from a GRT database for sending to influx
	handle_help "$@" <<-EOF
		**WARNING**:

		!> GRT database specific query, will not work with a galaxy database!
	EOF

	fields="count=4"
	timestamp="3"
	tags="tool_id=0;tool_version=1;instance=2"

	read -r -d '' QUERY <<-EOF
		SELECT
			api_job.tool_id,
			api_job.tool_version,
			api_galaxyinstance.title,
			extract(epoch from date_trunc('week', api_job.create_time)) || '000000000' as date,
			count(*)
		FROM
			api_job, api_galaxyinstance
		WHERE
			api_job.instance_id = api_galaxyinstance.id
		GROUP BY
			api_job.tool_id,
			api_job.tool_version,
			api_galaxyinstance.title,
			date
	EOF
}

meta_whatsnew() { ## : What's new in this version of gxadmin
	handle_help "$@" <<-EOF
		Informs users of what's new in the changelog since their version
	EOF

	current_version=$(version)
	prev_version=$(( current_version - 1 ))
	#sed -n '1,/^# 12/d;/^# 11/q;p'
	echo "$CHANGELOG" | sed -n "/^# ${prev_version}/q;p"
}

meta_export-grafana-dashboards() { ## [grafana_db|/var/lib/grafana/grafana.db]: Export all dashboards from a Grafana database to CWD and commit them to git.
	handle_help "$@" <<-EOF
		Given a grafana database, use sqlite3 to access all of the dashboards within, and then write them out to the current working directly. Next, commit them and update a README.

		This script forms the basis of https://github.com/usegalaxy-eu/grafana-dashboards

		**WARNING**

		!> This script will silently remove all json files from CWD
		!> as a first step. Additionally it will commit and push at the end, so it
		!> should be run in a directory that has a git repo initialised, where you
		!> are not concerned about accidentally pushing to wrong remote.
	EOF

	db_path=${1:-/var/lib/grafana/grafana.db}

	rm -f *.json
	sqlite3 --csv -separator "$(printf '\t')" $db_path \
		'select title,data from dashboard;' | \
		awk -F'\t' '{gsub("\"", "", $1); print $2 > $1".json" }'

	for i in *.json; do
		q=$(mktemp)
		cat "$i" | sed 's/^"//;s/"$//g;s/""/"/g' | jq -S . > "$q"
		mv "$q" "$i";

		lines=$(wc -l "$i" | awk '{print $1}')
		if (( lines < 10 )); then
			rm "$i"
		fi
	done;


	cat > README.md <<-EOF
	# Grafana Dashbaords

	Name | Tags | Version | Live | JSON
	--- | --- | --- | --- | ---
	EOF

	sqlite3 --csv -separator "$(printf '\t')" $db_path \
		'SELECT title,uid,title,version,GROUP_CONCAT(dashboard_tag.term) FROM dashboard left outer join dashboard_tag on dashboard.id = dashboard_tag.dashboard_id WHERE dashboard.is_folder = 0 GROUP BY title, uid, title order by title asc' | \
		awk -F'\t' '{gsub("\"", "", $1); gsub("\"", "", $3); gsub(" ", "%20", $3); print $1" | "$5" | "$4" | [Live](https://stats.galaxyproject.eu/d/"$2") | [File](./"$3".json)"}' \
		>> README.md

	git add *.json README.md
	git commit -a -m "Automated commit for $(date)"
	git push --quiet
}

meta_wta-report() { ## Export all workflow trace archive queries
	handle_help "$@" <<-EOF
		Run through several WTA commands and export those to CSV
	EOF

	tmpdir=$(mktemp -d)

	obtain_func "server" "workflow-trace-archive-metrics"
	query_csv "$QUERY" > "$tmpdir/job_metrics_numeric.csv"

	obtain_func "server" "workflow-trace-archive-jobs"
	query_csv "$QUERY" > "$tmpdir/jobs.csv"

	obtain_func "server" "workflow-trace-archive-workflows"
	query_csv "$QUERY" > "$tmpdir/workflows.csv"

	obtain_func "server" "workflow-trace-archive-workflow-invocations"
	query_csv "$QUERY" > "$tmpdir/workflow-invocations.csv"

	obtain_func "server" "workflow-trace-archive-workflow-steps"
	query_csv "$QUERY" > "$tmpdir/workflow-steps.csv"

	obtain_func "server" "workflow-trace-archive-workflow-invocation-steps"
	query_csv "$QUERY" > "$tmpdir/workflow-invoke-steps.csv"

	obtain_func "server" "workflow-trace-archive-workflow-connections"
	query_csv "$QUERY" > "$tmpdir/workflow-connections.csv"

	obtain_func "server" "workflow-trace-archive-workflow-step-input"
	query_csv "$QUERY" > "$tmpdir/workflow-step-input.csv"

	tar cfz $tmpdir.tar.gz $tmpdir/*

	echo $tmpdir.tar.gz
}
meta_complete-bash() { ## : export bash autocompletion
	handle_help "$@" <<-EOF
		Produces the bash autocompletion for gxadmin

		Write the output of this somewhere useful, and source it in your bash profile.

		    $ gxadmin meta bash-complete > ~/.cache/gxadmin-autocomplete.sh
			$ . ~/.cache/gxadmin-autocomplete.sh
			$ gxadmin<TAB>
	EOF

	IFS=$'\n'
	commands=$(locate_cmds_nolocal | correct_cmd | sed 's/://g')

	leading=$(echo "$commands" | awk '{print $1}' | sort -u | paste -s -d' ')
	subcommands=""
	for cmd in $(echo "$commands" | awk '{print $1}' | sort -u); do
		subcmds=$(echo "$commands" | awk '($1 == "'"$cmd"'"){ print $2 }' | sort -u | paste -s -d' ')
		if (( ${#subcmds} > 0 )); then
			subcommands="${subcommands}\n\n\t\t$cmd)\n\t\t\tCOMPREPLY=( \$(compgen -W \"${subcmds}\" \${cur}) )\n\t\t\treturn 0\n\t\t;;"
		fi
	done;
	# Fix newlines
	subcommands="$(echo -e "$subcommands")"

	# Template
BASH_COMPLETION_TEMPLATE=$(cat <<EOF
_gxadmin() {
	local cur prev opts
	COMPREPLY=()
	cur="\${COMP_WORDS[COMP_CWORD]}"
	prev="\${COMP_WORDS[COMP_CWORD-1]}"

	local -r cmds="%s"

	case "\${prev}" in
		%s
	esac

	COMPREPLY=( \$(compgen -W "\${cmds}" -- \${cur}) )
	return 0
}

complete -F _gxadmin gxadmin
EOF
)

	# shellcheck disable=SC2059
	printf "${BASH_COMPLETION_TEMPLATE}\n" "$leading" "$subcommands"
}
obtain_func() {
	category="$1"; shift
	query_name="$1"; shift

	if [[ "$category" != "" ]]; then
		category="${category}_"
	fi

	fn="${category}${query_name}"
	LC_ALL=C type "$fn" 2> /dev/null | grep -q 'function'
	ec=$?

	# ##? enables fancy argument parsing.
	grep -s -h -o "${fn}()\s*{ ##?\? .*" "$0" | fgrep --quiet -o '##?'
	# Nothing needs to be done, just let that function handle and auto-parse
	# and auto-export
	enable_wap=$?
	if (( enable_wap == 0 )); then
		wonderful_argument_parser "$fn" "$@"
	fi

	if (( ec == 0 )); then
		$fn "$@";
	else
		export QUERY="ERROR"
	fi
}

wonderful_argument_parser() {
	# from the top, the function to look for
	fn=$1;
	# make it drop, so we can parse the rest
	shift;
	declare -a parsed_keys
	declare -a parsed_vals
	declare -a positional_args
	declare -a optional_flag_args
	declare -a optional_args
	declare -a args
	positional_index=0
	optional_index=0

	# shellcheck disable=SC2207
	read -r -a signature <<< $(grep -s -h -o "${fn}()\s*{ ##?\? .*" "$0" | sed 's/.*##?//g;s/: .*//g;')
	signature+=('[--help]') # This is always available

	# shellcheck disable=SC2068
	for x in $@; do
		args+=("$x");

		# If the help flag is in there, we can short-circuit
		if [[ "$x" == "--help" ]] || [[ "$x" == "-h" ]]; then
			return
		fi

		shift;
	done

	for arg in "${signature[@]}"; do
		if [[ "$arg" == '<'* ]]; then
			# This is a required argument
			positional_args+=("$(echo "$arg" | sed 's/^<//;s/>$//')")
		elif [[ "$arg" == '[--'* ]]; then
			# This is an optional argument
			optional_flag_args+=("$arg")
		elif [[ "$arg" == '['* ]]; then
			# This is an optional argument
			optional_args+=("$(echo "$arg" | sed 's/^\[//;s/\]$//')")
		else
			# This is an error
			echo "ERROR!!! Bad argument specification: $arg"
		fi
	done

	# Size of the arrays
	positional_count=${#positional_args[@]}
	optional_count=${#optional_args[@]}

	offset=0
	while true; do
		# Get the first bit of content from the arguments
		a_cur=${args[$offset]}
		if [[ "$a_cur" == "" ]]; then
			break
		fi

		if [[ "$a_cur" == "--"* ]]; then
			# This is a flag. So find the matching flag definition.
			for arg in "${optional_flag_args[@]}"; do
				# Two types of args: with, without values
				if [[ "$arg" == "[--"* ]]; then
					if [[ "$arg" == *'|'* ]]; then
						# This has another argument.
						# So we need to pop something else.
						# And increment offset so we don't re-process it
						if [[ "$arg" == '['$a_cur'|'* ]]; then
							val="${args[$offset+1]}"
							offset=$(( offset + 1 ))
							k="$(echo "$a_cur" | sed 's/^--//;s/-/_/g')"
							parsed_keys+=("${k}")
							parsed_vals+=("$val")
						fi
					else
						# This is just a flag
						if [[ "$arg" == '['$a_cur']' ]]; then
							k="$(echo "$a_cur" | sed 's/^--//;s/-/_/g')"
							parsed_keys+=("${k}")
							parsed_vals+=(1)
						fi
					fi
				fi
			done
		else
			# This is a non-flag, so maybe a positional, maybe an optional argument.
			# So we need to find the Nth positional (via positional_index)
			if (( (positional_index + optional_index) >= (positional_count + optional_count) )); then
				echo "Error: more positional arguments than should be possible"
				exit 1;
			fi

			if (( positional_index < positional_count )); then
				parsed_keys+=("${positional_args[$positional_index]}")
				parsed_vals+=("${a_cur}")
				positional_index=$((positional_index + 1))
			else
				# We're past the positional and into the optional
				k="${optional_args[$optional_index]}"
				parsed_keys+=("$(echo "$k" | sed 's/|.*//g')")
				parsed_vals+=("${a_cur}")
				optional_index=$(( optional_index + 1 ))
			fi
		fi
		offset=$(( offset + 1 ))
	done

	# Set all default optional args, if they aren't set
	if (( optional_index < optional_count )); then
		for i in $(seq $optional_index $((optional_count - 1)) ); do
			if [[ "${optional_args[$i]}" == *'|'* ]]; then
				k="${optional_args[$i]}"
				parsed_keys+=("$(echo "$k" | sed 's/|.*//g')")
				parsed_vals+=("$(echo "$k" | sed 's/.*|//g')")
			fi
		done
	fi

	if (( positional_index < positional_count )); then
		for i in $(seq $positional_index $(( positional_count - 1 )) ); do
			error "Required argument <${positional_args[$i]}> is missing"
		done
		exit 1;
	fi

	size=${#parsed_keys[@]}
	for i in $(seq 0 $((size - 1))); do
		#printf "\t%10s=%-10s\n" "${parsed_keys[$i]}" "${parsed_vals[$i]}"
		export arg_${parsed_keys[$i]}=${parsed_vals[$i]}
	done
}

look_for() {
	query_type="$1"; shift
	group_name="$1"; shift
	query_name="$1"; shift

	# query_type group_name query_name
	# query      tsvquery   users-total
	# galaxy     none       somethingelse
	#echo "HIII $query_type $group_name $query_name"

	if [[ $query_type == "local" ]]; then
		if [[ ! -f ${GXADMIN_SITE_SPECIFIC} ]]; then
			error "No local functions are defined in ${GXADMIN_SITE_SPECIFIC}"
			exit 1
		else
			# Load functions
			# shellcheck disable=SC1090
			. "${GXADMIN_SITE_SPECIFIC}"
		fi
	fi

	# Check that FN exists
	fn="${query_type}_${query_name}"
	LC_ALL=C type "$fn" 2> /dev/null | grep -q 'function'
	ec=$?

	if (( ec != 0 )); then
		didyoumean "${query_type}" "${query_name}"
	fi

	# Set the application name to something useful. (Makes it easier to figure
	# out which gxadmin command is hogging resources, etc.)
	export PGAPPNAME=gxadmin.${query_type}.${query_name}

	if [[ $query_type == "query" ]]; then
		obtain_func "$query_type" "$query_name" "$@"

		# If query in error, exit.
		if [[ "$QUERY" == "ERROR" ]]; then
			error "Error"
			usage query
		fi

		# Run the queries
		case "$group_name" in
			tsvquery         ) query_tsv "$QUERY";;
			csvquery         ) query_csv "$QUERY";;
			query            ) query_tbl "$QUERY";;
			jsonquery        ) query_json "$QUERY";;
			iquery           ) query_influx "$QUERY" "$query_name" "$fields" "$tags" "$timestamp";;
			explainquery     ) query_exp "$QUERY";;
			explainjsonquery ) query_expj "$QUERY";;
			echoquery        ) query_echo "$QUERY";;
			# default
			*                )  usage "Error";;
		esac
	elif [[ $query_type == "server" ]]; then
		obtain_func "$query_type" "$query_name" "$@"
		# If query in error, exit.
		if [[ "$QUERY" == "ERROR" ]]; then
			error "Error"
			usage server
		fi

		# Run the queries
		case "$group_name" in
			tsvserver         ) query_tsv "$QUERY";;
			csvserver         ) query_csv "$QUERY";;
			server            ) query_tbl "$QUERY";;
			jsonserver        ) query_json "$QUERY";;
			iserver           ) query_influx "$QUERY" "$query_name" "$fields" "$tags" "$timestamp";;
			explainserver     ) query_exp "$QUERY";;
			explainjsonserver ) query_expj "$QUERY";;
			echoserver        ) query_echo "$QUERY";;
			# default
			*                )  usage "Error";;
		esac

	elif [[ $query_type == "mutate" ]]; then
		obtain_func "$query_type" "$query_name" "$@"
		# If query in error, exit.
		if [[ "$QUERY" == "ERROR" ]]; then
			error "Error"
			usage mutate
		fi

		# Run the queries
		case "$group_name" in
			mutate            ) query_tbl "$QUERY";;
			explainmutate     ) query_exp "$QUERY";;
			explainjsonmutate ) query_expj "$QUERY";;
			echomutate        ) query_echo "$QUERY";;
			# default
			*                 )  usage "Error";;
		esac

	elif [[ $query_type == "local" ]]; then
		if [[ -z "${GXADMIN_BUGGER_OFF}" ]] && (( (RANDOM % 25) == 0 )); then
			warning "Hey! It's great that you're using gxadmin! You should contribute these functions back :) Other people might find these useful, or could learn something from the code you've written, even if you think it is too specific to your site."
		fi

		$fn "$@";

		if [[ "${query_name}" == "query"* ]]; then
			query_tbl "$QUERY"
		fi
	else
		$fn "$@";

		if [[ "${query_name}" == "query"* ]]; then
			query_tbl "$QUERY"
		elif [[ "${query_name}" == "iquery"* ]]; then
			query_influx "$QUERY" "$query_name" "$fields" "$tags" "$timestamp"
		fi
	fi
}
# https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance
levenshtein='
function levenshtein(str1, str2, cost_ins, cost_rep, cost_del,    str1_len, str2_len, matrix, is_match, i, j, x, y, z) {
	if(cost_ins == "") cost_ins = 1
	if(cost_rep == "") cost_rep = 1
	if(cost_del == "") cost_del = 1
	str1_len = length(str1)
	str2_len = length(str2)
	if(str1_len == 0) return str2_len * cost_ins
	if(str2_len == 0) return str1_len * cost_del
	matrix[0, 0] = 0
	for(i = 1; i <= str1_len; i++) {
		matrix[i, 0] = i * cost_del
		for(j = 1; j <= str2_len; j++) {
			matrix[0, j] = j * cost_ins
			x = matrix[i - 1, j] + cost_del
			y = matrix[i, j - 1] + cost_ins
			z = matrix[i - 1, j - 1] + (substr(str1, i, 1) == substr(str2, j, 1) ? 0 : cost_rep)
			x = x < y ? x : y
			matrix[i, j] = x < z ? x : z
		}
	}
	return matrix[str1_len, str2_len]
}
{
	print levenshtein($1, $2)"\t"$1;
}'


levenshtein_filter(){
	cat | awk -f <(echo "$levenshtein") | sort -nk1 | head -n 10
}
wrap_time() {
	local TIMEFORMAT="Time to execute $1: %R seconds"
	time "$@"
}

search() { # <term>: Search for a specific command
	if (( $# > 0 )); then
		if command -v fzf &> /dev/null; then
			res=$(locate_cmds | correct_cmd | fzf -q "$1" | sed 's/[:<\[].*//g')
			fzf_autorun_cmd
		else
			locate_cmds | correct_cmd | grep "$1" | colour_word "$1" orange
		fi
	else
		if command -v fzf &> /dev/null; then
			res=$(locate_cmds | correct_cmd | fzf | sed 's/[:<\[].*//g')
			fzf_autorun_cmd
		else
			locate_cmds | correct_cmd
		fi
	fi
}

if (( $# == 0 )); then
	usage
fi

mode="$1"; shift
wrapper=
if [[ "$mode" == time* ]]; then
	wrapper=wrap_time
	mode=${mode:4}
fi

case "$mode" in
	#*query ) $wrapper query "$mode" "$@" ;;
	*query ) $wrapper look_for "query"  "$mode" "$@" ;;
	*server) $wrapper look_for "server" "$mode" "$@" ;;
	*mutate) $wrapper look_for "mutate" "$mode" "$@" ;;
	config ) $wrapper look_for "$mode" "none"  "$@" ;;
	cluster) $wrapper look_for "$mode" "none"  "$@" ;;
	filter ) $wrapper look_for "$mode" "none"  "$@" ;;
	galaxy ) $wrapper look_for "$mode" "none"  "$@" ;;
	meta   ) $wrapper look_for "$mode" "none"  "$@" ;;
	report ) $wrapper look_for "$mode" "none"  "$@" ;;
	uwsgi  ) $wrapper look_for "$mode" "none"  "$@" ;;
	local  ) $wrapper look_for "$mode" "none"  "$@" ;;
	s      ) search "$@" ;;
	find   ) search "$@" ;;
	search ) search "$@" ;;

	# version commands
	version   ) version ;;
	-v        ) version ;;
	--version ) version ;;
	# help
	help      ) usage;;
	-h        ) usage;;
	--help    ) usage;;
	# anything else
	*         ) didyoumean "$mode" "$@";;
esac