Skip to content

Instantly share code, notes, and snippets.

@ludenus
Last active July 15, 2023 22:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ludenus/9be3348633e938c6e13527b9d194bd0f to your computer and use it in GitHub Desktop.
Save ludenus/9be3348633e938c6e13527b9d194bd0f to your computer and use it in GitHub Desktop.
profile_eks_container.sh
#!/bin/bash
set -a # export all functions
set -e
set -o pipefail
me=`basename "$0"`
date_fmt='+%Y-%m-%d %H:%M:%S %z'
export version=${JA_PRF_VERSION:-'2.9'}
export async_profiler_tarball_url="https://github.com/jvm-profiling-tools/async-profiler/releases/download/v${version}/async-profiler-${version}-linux-x64.tar.gz"
export workdir=${JA_PRF_WORKDIR:-/tmp}
mkdir -p ${workdir}
export workdir=`cd $workdir; pwd`
export namespace=${JA_PRF_NAMESPACE:?"ERROR: JA_PRF_NAMESPACE must be specified!"}
export selector=${JA_PRF_SELECTOR:?"ERROR: JA_PRF_SELECTOR must be specified!"}
export container=${JA_PRF_CONTAINER:?"ERROR: JA_PRF_CONTAINER must be specified!"}
export prepare_node_sh_url=${JA_PRF_PREPARE_NODE_SH_URL:-'https://gist.githubusercontent.com/ludenus/577b6337ecd21944467026dfce2a2f35/raw/prepare_eks_node.sh'}
export time_sec=${JA_PRF_TIME_SEC:-30}
export events=${JA_PRF_EVENTS:-'cpu'} # valid options: cpu, alloc, lock, wall, itimer
export nexus_url=${JA_PRF_NEXUS_URL:?"ERROR: JA_PRF_NEXUS_URL must be specified!"}
export nexus_user=${JA_PRF_NEXUS_USER:?"ERROR: JA_PRF_NEXUS_USER must be specified!"}
export nexus_pass=${JA_PRF_NEXUS_PASS:?"ERROR: JA_PRF_NEXUS_PASS must be specified!"}
# functions ====================================================================
function log() {
echo "`date "$date_fmt"` [$me] ${1}"
}
function die() {
local msg=${1:-"ERROR: die for reason unknown"}
local code=${2:-254}
log "${msg}" >&2
exit ${code}
}
function download_java_async_profiler() {
log " ... download java async profiler: ${workdir}"
mkdir -p ${workdir}
local tar_gz="${workdir}/async-profiler-${version}-linux-x64.tar.gz"
if [ ! -f "${tar_gz}" ]; then
log " ... download java async profiler package: ${async_profiler_tarball_url} "
curl -LsSf "${async_profiler_tarball_url}" > "${tar_gz}"
else
log " ... java async profiler package found: ${tar_gz}"
fi
log "[ok] download java async profiler: ${tar_gz}"
}
function check_prerequsites() {
which jq >/dev/null || die "ERROR: jq not found" 11
which kubectl >/dev/null || die "ERROR: kubectl not found" 11
kubectl get pods --all-namespaces >/dev/null || die "ERROR: kubectl failed to get pods" 11
kubectl krew list | grep node-shell >/dev/null || die "ERROR: kubectl plugin node-shell not found" 11
}
function get_pod_names() {
kubectl get pods -n "${namespace}" --no-headers --selector="${selector}" | awk '{print $1}'
}
function copy_profiler_into_pod() {
local pod_name=${1:?"ERROR: copy_profiler_into_pod requires pod name!"}
local tar_gz="${workdir}/async-profiler-${version}-linux-x64.tar.gz"
kubectl exec -n ${namespace} --container=${container} ${pod_name} -- mkdir -p ${workdir}
kubectl cp -n ${namespace} --container=${container} ${tar_gz} ${pod_name}:${tar_gz}
kubectl exec -n ${namespace} --container=${container} ${pod_name} -- /bin/sh -c "cd ${workdir} && tar -xf ${tar_gz}"
}
function get_pod_json() {
local pod_name=${1:?"ERROR: get_pod_json requires pod name!"}
kubectl -n ${namespace} get pod ${pod_name} -o json
}
function get_node_name_for_pod() {
local pod_name=${1:?"ERROR: get_pod_node requires pod name!"}
get_pod_json "${pod_name}" | jq -r .spec.nodeName
}
function get_image_version() {
local pod_name=${1:?"ERROR: get_image_version requires pod name!"}
get_pod_json "${pod_name}" | jq -r ".spec.containers[] | select (.name=\"${container}\").image"
}
function prepare_node() {
local node_name=${1:?"ERROR: prepare_node requires node name!"}
log " ... prepare_node: ${node_name} '${version}' '${workdir}'"
kubectl node-shell -n ${namespace} ${node_name} -- /bin/sh -e -c "export JA_PRF_VERSION='${version}'; export JA_PRF_WORKDIR='${workdir}'; curl -LSsf '$prepare_node_sh_url' | /bin/sh -e - "
log "[ok] prepare_node: ${node_name}"
}
function get_java_cmd_regex_in_pod() {
local pod_name=${1:?"ERROR: get_java_cmd_in_pod requires pod name!"}
kubectl exec -n ${namespace} --container=${container} ${pod_name} -- /bin/sh -e -c 'sleep 1; ps -ef | grep java | grep -v grep | head -1 | awk "{\$1=\$2=\$3=\"\"; print \$0}"' | sed -r 's/^ *//' | sed -r 's#[^a-zA-Z0-9/_.-]#.#g'
}
function resolve_java_pid_on_node() {
local node_name="${1:?'ERROR: resolve_java_pid_on_node requires node name!'}"
local java_cmd="${2:?'ERROR: resolve_java_pid_on_node requires java_cmd!'}"
kubectl node-shell -n ${namespace} ${node_name} -- /bin/sh -e -c "sleep 1; ps -ef | grep java | grep -v grep | grep -P \"${java_cmd}\" | awk '{print \$2}'" | sed -r 's/[^0-9]//g'
}
function run_profiler() {
local node_name="${1:?'ERROR: run_profiler requires node name!'}"
local pod_name="${2:?'ERROR: run_profiler requires pod name!'}"
local java_pid="${3:?'ERROR: run_profiler requires java_pid!'}"
local profiler_sh="${workdir}/async-profiler-${version}-linux-x64/profiler.sh"
local image_ver=`get_image_version ${pod_name}`
local timestamp=`date '+%Y-%m-%d_%H-%M-%S'`
local report_file="${pod_name}_${events}_${timestamp}.html"
local report_url="${nexus_url}/repository/raw-storage/async-profiler/${container}/${report_file}"
local title="(${time_sec}) seconds [${events}] ${pod_name} ${image_ver} ${timestamp}"
log " ... run profiler: ${title}"
kubectl node-shell -n ${namespace} ${node_name} -- /bin/sh -e -c "${profiler_sh} -e ${events} -d ${time_sec} --title '${title}' -f '${workdir}/${report_file}' ${java_pid}"
log "[ok] run profiler"
log " ... fetch report from pod: ${pod_name}"
kubectl cp -n ${namespace} --container=${container} ${pod_name}:${workdir}/${report_file} ./${report_file}
log "[ok] fetch report from pod: ${pod_name}"
log " ... upload report"
curl -k -Ssf -u ${nexus_user}:${nexus_pass} --upload-file ./${report_file} ${report_url}
log "[ok] upload report to: ${report_url}"
}
# main ====================================================================
check_prerequsites
download_java_async_profiler
pod_names=`get_pod_names`
[ -z "${pod_names}" ] && die "ERROR: no pods found in namespace: [${namespace}] for selector: '${selector}'"
tmp=`mktemp`
cat <<- 'EOF1' >> $tmp
me=`basename "$0"`
export exit_code=0
EOF1
# prepare nodes and compose profiler launch commands
for pod_name in ${pod_names}; do
echo "----------------------------$pod_name-------------------------------"
copy_profiler_into_pod ${pod_name}
export node_name=`get_node_name_for_pod ${pod_name}`
prepare_node ${node_name}
export java_cmd=`get_java_cmd_regex_in_pod ${pod_name}`
log "java_cmd: '${java_cmd}'"
export java_pid=`resolve_java_pid_on_node ${node_name} "${java_cmd}"`
log "java_pid: '${java_pid}'"
# will launch profiler in background
printf "run_profiler ${node_name} ${pod_name} ${java_pid} &\n" >> "$tmp"
done
# append background process wait
cat <<- 'EOF2' >> "$tmp"
for job in `jobs -p`;do
log "waiting $job..."
wait $job || let "exit_code+=1"
done
log "exiting with code: ${exit_code}"
exit ${exit_code}
EOF2
ls -pilaF $tmp
cat $tmp
echo "-----------------------------------------------------------------------"
log " ... launching profiler for pods"
/bin/bash -e $tmp && rm -f $tmp || die "ERROR: something went wrong, examine log for details"
log "[ok]"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment