Created
August 30, 2021 21:12
-
-
Save ctran/0c20887d2ef5d456ab4ede44c9615841 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
## The following is automatically generated code, do not manually modify. | |
## Template is available in # scripts/commons-templates.sh | |
## START AUTOGENERATED CODE | |
# shellcheck disable=SC2034 | |
SCRIPT_VERSION=1630078691 | |
# Useful variables | |
HERE="$( cd -P "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" | |
# Logging functions | |
LOGGING_LEVEL=${CBSUPPORT_LOGGING_LEVEL:-'DEBUG'} | |
function log_info() { | |
local -r txt="${1}" | |
__log 'INFO' "${txt}" | |
} | |
function log_debug() { | |
local -r txt="${1}" | |
__log 'DEBUG' "${txt}" | |
} | |
function log_error() { | |
local -r txt="${1}" | |
__log 'DEBUG' "${txt}" | |
} | |
# A simpler solution would be to use associative arrays, but we cannot assume bash 4... | |
function __level_to_int() { | |
local -r level="${1}" | |
if [ "${level}" == 'ERROR' ]; then | |
echo 2 | |
elif [ "${level}" == 'INFO' ]; then | |
echo 1 | |
else | |
# default | |
echo 0 | |
fi | |
} | |
function __log() { | |
local -r level="${1}" | |
local -r txt="${2}" | |
(( "$(__level_to_int "$level")" < "$(__level_to_int "$LOGGING_LEVEL")" )) && return | |
echo "[${level}] ${txt}" | |
} | |
function check_tool() { | |
local -r cmd="${1}" | |
local -r verbose="${2:-true}" | |
is_tool_installed ${cmd} || ("${verbose}" && log_debug "${cmd} is recommended but it's not installed.") | |
} | |
function is_tool_installed() { | |
local -r cmd="${1}" | |
command -v "${cmd}" >/dev/null 2>&1 | |
} | |
## END AUTOGENERATED CODE | |
#################################################################################### | |
# This script is used to collect data for | |
# 'RequiredData: Performance, Hang or High CPU Issues for a Java process running on Linux' | |
# | |
##################################################################################### | |
function print_help() { | |
cat <<EOM | |
Unable to find required PID argument. Please rerun the script as follows: | |
$(basename $0) PID [duration] [frequency] | |
PID: Java process (Jenkins, CI, CD) PID | |
duration: Tests duration time in seconds (default 60 seconds) | |
frequency: Number of seconds that will wait until next data require (default 5 seconds) | |
Optional environment vars | |
JAVA_HOME used to locate JDK | |
JATTACH_HOME path to directory containing jattach (optional: is used only if no JDK is found and jattach is not in the path) | |
JAVA_USERID Java userid if this script is run as root instead of the userid running the Java process | |
PERFORMANCE_DATA_OUTPUT_DIR output dir | |
In case no JDK is found, the script will try to use jattach: https://github.com/apangin/jattach | |
Run $(basename $0) --help to print help. | |
EOM | |
} | |
function script_validation() { | |
log_debug "Script Validation Results" | |
log_debug "Moving to ${PERFORMANCE_DATA_OUTPUT_DIR}" | |
pushd "${PERFORMANCE_DATA_OUTPUT_DIR}" >/dev/null || exit | |
#check if the directory can be written to by the user that is running the script, i.e. user | |
touch testFile.txt 2>/dev/null | |
if [ -e testFile.txt ]; then | |
log_debug 'This directory can be written to by the script' | |
else | |
log_error 'This directory cannot be written to by the script. Please either run this script from a directory that can be written to or use the optional environment variable: PERFORMANCE_DATA_OUTPUT_DIR .' | |
exit 1 | |
fi | |
rm -rf testFile.txt | |
check_tool 'top' | |
check_tool 'vmstat' | |
check_tool 'netstat' | |
check_tool 'iostat' | |
log_debug "Moving back to current dir ${HERE}" | |
popd >/dev/null || exit | |
} | |
duration=60 | |
frequency=5 | |
if [ $# -eq 1 ]; then | |
if [ "$1" = "--help" ]; then | |
print_help | |
exit 0 | |
fi | |
pid="${1}" | |
elif [ $# -eq 2 ]; then | |
pid="${1}" | |
duration="${2}" | |
elif [ $# -eq 3 ]; then | |
pid="${1}" | |
duration="${2}" | |
frequency="${3}" | |
else | |
print_help "${0}" | |
exit 1 | |
fi | |
if [ -z "$PERFORMANCE_DATA_OUTPUT_DIR" ]; then | |
PERFORMANCE_DATA_OUTPUT_DIR="$(pwd)" | |
log_debug "Output dir ${PERFORMANCE_DATA_OUTPUT_DIR}" | |
fi | |
script_validation "${0}" | |
declare jcmd_bin="jcmd" | |
declare jstack_bin="jstack" | |
declare jattach_bin="jattach" | |
if [ -n "${JAVA_HOME}" ]; then | |
# shellcheck disable=SC2016 | |
log_debug 'JAVA_HOME is set. Looking for JDK tools in ${JAVA_HOME}/bin.' | |
jcmd_bin="${JAVA_HOME}/bin/jcmd" | |
jstack_bin="${JAVA_HOME}/bin/jstack" | |
else | |
log_debug 'JAVA_HOME is NOT set. Looking for a JDK on the PATH.' | |
fi | |
if ! is_tool_installed "${jcmd_bin}" && ! is_tool_installed "${jstack_bin}"; then | |
log_debug 'jcmd or jstack not found. Looking for jattach' | |
if [ -n "${JATTACH_HOME}" ]; then | |
log_debug "JATTACH_HOME is set. Looking for the binary in ${JATTACH_HOME}" | |
jattach_bin="${JATTACH_HOME}/jattach" | |
else | |
log_debug 'JATTACH_HOME is NOT set. Looking for jattach on the PATH.' | |
fi | |
if ! is_tool_installed "${jattach_bin}"; then | |
log_error 'Could not find a JDK nor jattach. Either the full Java JDK and jattach are not installed or they are not the path of the user that is running the Java process.' | |
exit 1 | |
fi | |
fi | |
declare cmd_prefix="" | |
if [ -n "${JAVA_USERID}" ]; then | |
cmd_prefix="sudo -u ${JAVA_USERID}" | |
log_debug "user ${JAVA_USERID}" | |
fi | |
function write_threads() { | |
local pid="$1" | |
local threadFileName="$2" | |
if is_tool_installed "${jcmd_bin}"; then | |
${cmd_prefix} "${jcmd_bin}" "${pid}" Thread.print -l >"${threadFileName}" | |
elif is_tool_installed ${jstack_bin}; then | |
${cmd_prefix} "${jstack_bin}" -l "${pid}" >"${threadFileName}" | |
elif is_tool_installed "${jattach_bin}"; then | |
${cmd_prefix} "${jattach_bin}" "${pid}" threaddump >"${threadFileName}" | |
fi | |
} | |
# Create temporary directories | |
TEMP_DIR="$PERFORMANCE_DATA_OUTPUT_DIR/tmp.$pid.$(date +%Y%m%d%H%M%S)" | |
log_debug "Temporary dir ${TEMP_DIR}" | |
mkdir -p "${TEMP_DIR}" | |
mkdir "${TEMP_DIR}"/iostat "${TEMP_DIR}"/threads "${TEMP_DIR}"/netstat "${TEMP_DIR}"/topdashHOutput "${TEMP_DIR}"/topOutput "${TEMP_DIR}"/vmstat "${TEMP_DIR}"/nfsiostat "${TEMP_DIR}"/nfsstat | |
# Begin script and notify the end user | |
log_info "The collectPerformanceData.sh script $SCRIPT_VERSION is starting in custom mode." | tee "$TEMP_DIR"/mode.txt | |
log_info "The pid is $pid" >>"$TEMP_DIR"/mode.txt | |
log_info "The custom duration is $duration" >>"$TEMP_DIR"/mode.txt | |
log_info "The custom thread dump generation frequency is $frequency" >>"$TEMP_DIR"/mode.txt | |
# Output the Default Settings to the end user | |
log_debug "The custom mode should only be used if requested && if data should be collected for longer than 1 minute" | |
log_info "The collectPerformanceData.sh script will run for $duration seconds." | |
log_info "It will generate a full data generation (threadDump, iostat, vmstat, netstat, top) every $frequency seconds." | |
log_debug ">>>>>>>>>>>>>>>The frequency Has To Divide into the duration by a whole integer.<<<<<<<<<<<<<<<" | |
log_debug ">>>>>>>>>>>>>>>The duration Divided by 60 should also be a whole integer.<<<<<<<<<<<<<<<" | |
log_debug ">>>>>>>>>>>>>>>The duration Divided by 5 should also be a whole integer.<<<<<<<<<<<<<<<" | |
log_debug ">>>>>>>>>>>>>>>Setting the frequency to low, i.e. 1 second, may cause the data to be inconclusive.<<<<<<<<<<<<<<<" | |
# Begin data generation once every $frequency seconds. | |
while [ "${duration}" -gt 0 ]; do | |
# Taking top data collection | |
log_info "Taking top data collection." | |
COLUMNS=300 top -bc -n 1 >"$TEMP_DIR"/topOutput/topOutput."$(date +%Y%m%d%H%M%S)".txt & | |
# Taking topdashH data collection | |
log_info "Taking TopdashH data collection." | |
top -bH -p $pid -n 1 >"$TEMP_DIR"/topdashHOutput/topdashHOutput.$pid."$(date +%Y%m%d%H%M%S)".txt & | |
# Taking vmstat data collection in the background | |
log_info "Taking vmstat data collection." | |
vmstat >"$TEMP_DIR"/vmstat/vmstat."$(date +%Y%m%d%H%M%S)".out & | |
# Taking netstat data | |
log_info "Taking netstat collection." | |
# redirecring to /dev/null to get rid of the annoying message for non root users | |
netstat -pan 2>/dev/null >"$TEMP_DIR"/netstat/netstat."$(date +%Y%m%d%H%M%S)".out & | |
# Taking iostat data collection | |
log_info "Taking iostat data collection." | |
if which iostat 2>/dev/null >/dev/null; then | |
iostat -t >"$TEMP_DIR"/iostat/iostat."$(date +%Y%m%d%H%M%S)".out & | |
else | |
log_debug 'The command iostat was not found' | |
fi | |
# Taking nfsiostat data collection | |
log_info 'Taking nfsiostat data collection.' | |
if which nfsiostat 2>/dev/null >/dev/null; then | |
nfsiostat >"$TEMP_DIR"/nfsiostat/nfsiostat."$(date +%Y%m%d%H%M%S)".out & | |
else | |
log_debug 'The command nfsiostat was not found' | |
fi | |
# Taking nfsstat data collection | |
log_info 'Taking nfsstat data collection.' | |
if which nfsstat 2>/dev/null >/dev/null; then | |
nfsstat -c >"$TEMP_DIR"/nfsstat/nfsstat."$(date +%Y%m%d%H%M%S)".out & | |
else | |
log_debug 'The command nfsstat was not found' | |
fi | |
# Taking a threadDump | |
THREADS_FILENAME="$TEMP_DIR"/threads/threads.$pid."$(date +%Y%m%d%H%M%S)".txt | |
write_threads "${pid}" "$THREADS_FILENAME" & | |
# Record the process PID | |
THREAD_DUMP_PID=$! | |
log_info "Collected a threadDump for PID $pid." | |
# Wait for the thread dump background process | |
wait $THREAD_DUMP_PID | |
# Get the exit code of the $THREAD_DUMP_PID | |
THREAD_DUMP_PID_STATUS=$? | |
# Wait for all background process | |
wait | |
if [ $THREAD_DUMP_PID_STATUS -ne 0 ]; then | |
rm -r "$TEMP_DIR" | |
log_error 'The script failed to collect a thread dump. Maybe it is not launched with the same user that the Java process is running as. Try with sudo -u <JAVA_USERID> >>>>>>>>>>>>>>>' | |
exit 1 | |
fi | |
# Pause for THREADDUMP_FREQUENCY seconds. | |
log_info "A new collection will start in ${frequency} seconds." | |
sleep "${frequency}" | |
# Update duration | |
duration=$(( $duration - $frequency)) | |
done | |
log_info "Packaging data and preparing for cleanup." | |
log_debug "Moving to $PERFORMANCE_DATA_OUTPUT_DIR" | |
pushd "${TEMP_DIR}" >/dev/null || exit | |
PERFORMANCE_DATA_ARCHIVE_NAME="${CBSUPPORT_OUTPUT:-performanceData.$pid.output.tar.gz}" | |
tar -czf "${PERFORMANCE_DATA_ARCHIVE_NAME}" topOutput topdashHOutput mode.txt threads vmstat netstat iostat nfsiostat nfsstat | |
cp "${PERFORMANCE_DATA_ARCHIVE_NAME}" .. | |
log_info "Cleanup files" | |
# Clean up the topOutput.txt and topdashHOutput.$pid.txt files | |
rm -r "$TEMP_DIR" | |
log_debug "Moving back to current dir ${HERE}" | |
popd >/dev/null || exit | |
# Notify end user. Do not do it when running in the context of cbsupport as the message is misleading for the end user. | |
if [ -z "$CBSUPPORT_OUTPUT" ]; then | |
log_info "The temporary dir \"${TEMP_DIR}\" has been deleted" | |
log_info "The collectPerformanceData.sh script in CUSTOM MODE is complete." | |
log_info "The Output files are contained within !>>>! ${PERFORMANCE_DATA_ARCHIVE_NAME} !<<<!" | |
log_info "Please upload the ${PERFORMANCE_DATA_ARCHIVE_NAME} archive to your ticket for review." | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment