|
#! /usr/bin/env nix-shell |
|
#! nix-shell -i bash -p curl smenu jq textql coreutils |
|
# shellcheck shell=bash |
|
# |
|
# If you export a github access token as $gh_token before running this |
|
# it'll execute with a better API allowance. |
|
# |
|
# This is not smart about paging. It doesn't have to support terribly |
|
# deep historic analysis, so I'm hoping "run daily" or |
|
# "run every N hours" is enough to get useful data. |
|
# |
|
# ./gh_macos_stats.sh <gh_username> <gh_repo> |
|
|
|
GH_REPO="$1/$2" |
|
FILE_PREFIX="$1_$2_" |
|
WORKFLOW_ID_FILE="${FILE_PREFIX}workflow_id" |
|
STEP_DURATION_FILE="${FILE_PREFIX}run_job_steps.csv" |
|
RELEVANT_RUN_FILE="${FILE_PREFIX}relevant_runs" |
|
RELEVANT_STEP_FILE="${FILE_PREFIX}relevant_steps" |
|
PREVIOUSLY_SEEN_JOBS="${FILE_PREFIX}previously_seen_jobs_urls" |
|
|
|
function _last(){ |
|
if [[ ${@#} > 0 ]]; then |
|
echo ${@: -1} |
|
else |
|
return 1 |
|
fi |
|
} |
|
|
|
function pick_workflow_id(){ |
|
if [[ -n "$gh_token" ]]; then |
|
_last $(curl -L -H "Authorization: token $gh_token" "https://api.github.com/repos/$GH_REPO/actions/workflows" 2>/dev/null | jq '.workflows[] | select(.state == "active") | "name: "+.name+", config: "+.path+", id: "+(.id|tostring)' | smenu -d -m 'Select the workflow you added the macOS performance test to') 2>/dev/null |
|
else |
|
_last $(curl -L "https://api.github.com/repos/$GH_REPO/actions/workflows" 2>/dev/null | jq '.workflows[] | select(.state == "active") | "name: "+.name+", config: "+.path+", id: "+(.id|tostring)' | smenu -d -m 'Select the workflow you added the macOS performance test to') 2>/dev/null |
|
fi |
|
} |
|
|
|
function get_workflow_id(){ |
|
if [[ -s "$WORKFLOW_ID_FILE" ]]; then |
|
cat "$WORKFLOW_ID_FILE" |
|
else |
|
workflow_id="$(pick_workflow_id)" |
|
if [[ -n "$workflow_id" ]]; then |
|
echo "$workflow_id" | tee "$WORKFLOW_ID_FILE" |
|
else |
|
return 1 |
|
fi |
|
fi |
|
} |
|
|
|
function pick_relevant_jobs(){ |
|
echo -e "\nI need your help picking out the relevant jobs." |
|
echo "First, we'll look at jobs with 'macos' in the name." |
|
while read -r job; do |
|
if [[ ${job,,} == *macos* ]]; then |
|
R=$(echo "Yes No" | smenu -d -m "Is $job one of the jobs you'd like to compare?" -s /Y) |
|
if [[ $R == "Yes" ]]; then |
|
echo "Adding $job to the list of jobs to compare." |
|
echo "$job" >> "$RELEVANT_RUN_FILE" |
|
fi |
|
else |
|
echo "Skipping $job for now." |
|
fi |
|
done < <(jq '.name' $1 | sort -u) |
|
R=$(echo "Yes No" | smenu -d -m "Did I skip over any jobs we should include?" -s /N) |
|
if [[ $R == "Yes" ]]; then |
|
echo "Ok. I'll repeat the process for the remaining jobs." |
|
while read -r job; do |
|
if [[ ${job,,} != *macos* ]]; then |
|
R=$(echo "Yes No" | smenu -d -m "Is $job one of the jobs you'd like to compare?" -s /Y) |
|
if [[ $R == "Yes" ]]; then |
|
echo "Adding $job to the list of jobs to compare." |
|
echo "$job" >> "$RELEVANT_RUN_FILE" |
|
fi |
|
else |
|
echo "Skipping $job for now." |
|
fi |
|
done < <(jq '.name' $1 | sort -u) |
|
fi |
|
} |
|
|
|
function pick_relevant_steps(){ |
|
echo -e "\nI need your help picking out the relevant steps." |
|
|
|
echo "First, let's look for any steps with 'nix' in the name." |
|
while read -r step; do |
|
if [[ ${step,,} == *nix* ]]; then |
|
R=$(echo "Yes No" | smenu -d -m "Is $step one of the steps you'd like to compare?" -s /Y) |
|
if [[ $R == "Yes" ]]; then |
|
echo "Adding $step to the list of steps to compare." |
|
echo "$step" >> "$RELEVANT_STEP_FILE" |
|
fi |
|
else |
|
echo "Skipping $step for now." |
|
fi |
|
done < <(jq '.steps[].name' $1 | sort -u) |
|
R=$(echo "Yes No" | smenu -d -m "Did I skip over any steps we should include?" -s /N) |
|
if [[ $R == "Yes" ]]; then |
|
echo "Ok. I'll repeat the process for the remaining steps." |
|
while read -r step; do |
|
if [[ ${step,,} != *nix* ]]; then |
|
R=$(echo "Yes No" | smenu -d -m "Is $step one of the steps you'd like to compare?" -s /Y) |
|
if [[ $R == "Yes" ]]; then |
|
echo "Adding '$step' to the list of steps to compare." |
|
echo "$step" >> "$RELEVANT_STEP_FILE" |
|
fi |
|
else |
|
echo "Skipping '$step' for now." |
|
fi |
|
done < <(jq '.steps[].name' $1 | sort -u) |
|
fi |
|
} |
|
|
|
function fetch_new_job_stats(){ |
|
if [[ ! -s "$STEP_DURATION_FILE" ]]; then |
|
echo "job_id,run_id,job,step,duration" > "$STEP_DURATION_FILE" |
|
fi |
|
|
|
if [[ ! -s "$PREVIOUSLY_SEEN_JOBS" ]]; then |
|
touch "$PREVIOUSLY_SEEN_JOBS" |
|
fi |
|
|
|
if [[ -n "$gh_token" ]]; then |
|
new_job_urls="$(comm -13 "$PREVIOUSLY_SEEN_JOBS" <(curl -L -H "Authorization: token $gh_token" "https://api.github.com/repos/$GH_REPO/actions/workflows/$GH_WORKFLOW/runs?status=success" 2>/dev/null | jq --raw-output '.workflow_runs[].jobs_url' | sort -u) | sort -u | tee -a "$PREVIOUSLY_SEEN_JOBS")" |
|
else |
|
new_job_urls="$(comm -13 "$PREVIOUSLY_SEEN_JOBS" <(curl -L "https://api.github.com/repos/$GH_REPO/actions/workflows/$GH_WORKFLOW/runs?status=success" 2>/dev/null | jq --raw-output '.workflow_runs[].jobs_url' | sort -u) | sort -u | tee -a "$PREVIOUSLY_SEEN_JOBS")" |
|
fi |
|
|
|
if [[ -n "$new_job_urls" ]]; then |
|
if [[ -s "$RELEVANT_RUN_FILE" && -s "$RELEVANT_STEP_FILE" ]]; then |
|
echo "New jobs found; fetching and applying previously-configured filters." |
|
echo "You can see previously-configured settings in $RELEVANT_RUN_FILE and $RELEVANT_STEP_FILE. If you'd like a fresh start, just rm them." |
|
|
|
if [[ -n "$gh_token" ]]; then |
|
curl -L -H "Authorization: token $gh_token" $new_job_urls 2>/dev/null | jq '.jobs[]' | jq --raw-output --slurpfile wanted_jobs "$RELEVANT_RUN_FILE" --slurpfile wanted_steps "$RELEVANT_STEP_FILE" '. | select(.name | IN($wanted_jobs[])) | {id:.id, run_id:.run_id, name:.name, step: (.steps[] | select(.name | IN($wanted_steps[])))} | [.run_id, .id, .name, .step.name, ((.step.completed_at | strptime("%Y-%m-%dT%H:%M:%S.000Z") | mktime) - (.step.started_at | strptime("%Y-%m-%dT%H:%M:%S.000Z") | mktime))] | @csv' >> "$STEP_DURATION_FILE" |
|
else |
|
curl -L $new_job_urls 2>/dev/null | jq '.jobs[]' | jq --raw-output --slurpfile wanted_jobs "$RELEVANT_RUN_FILE" --slurpfile wanted_steps "$RELEVANT_STEP_FILE" '. | select(.name | IN($wanted_jobs[])) | {id:.id, run_id:.run_id, name:.name, step: (.steps[] | select(.name | IN($wanted_steps[])))} | [.run_id, .id, .name, .step.name, ((.step.completed_at | strptime("%Y-%m-%dT%H:%M:%S.000Z") | mktime) - (.step.started_at | strptime("%Y-%m-%dT%H:%M:%S.000Z") | mktime))] | @csv' >> "$STEP_DURATION_FILE" |
|
fi |
|
|
|
sort -ur -o "$STEP_DURATION_FILE" "$STEP_DURATION_FILE" |
|
else |
|
echo "No previously-configured filters; I'll set them up after fetching new jobs." |
|
runs_json="$(mktemp)" |
|
if [[ -n "$gh_token" ]]; then |
|
curl -L -H "Authorization: token $gh_token" $new_job_urls 2>/dev/null | jq '.jobs[]' > "$runs_json" |
|
else |
|
curl -L $new_job_urls 2>/dev/null | jq '.jobs[]' > "$runs_json" |
|
fi |
|
# both of these can work from the same base result |
|
|
|
if [[ ! -s "$RELEVANT_RUN_FILE" ]]; then |
|
pick_relevant_jobs "$runs_json" |
|
fi |
|
|
|
if [[ ! -s "$RELEVANT_STEP_FILE" ]]; then |
|
pick_relevant_steps "$runs_json" |
|
fi |
|
jq --raw-output --slurpfile wanted_jobs "$RELEVANT_RUN_FILE" --slurpfile wanted_steps "$RELEVANT_STEP_FILE" '. | select(.name | IN($wanted_jobs[])) | {id:.id, run_id:.run_id, name:.name, step: (.steps[] | select(.name | IN($wanted_steps[])))} | [.run_id, .id, .name, .step.name, ((.step.completed_at | strptime("%Y-%m-%dT%H:%M:%S.000Z") | mktime) - (.step.started_at | strptime("%Y-%m-%dT%H:%M:%S.000Z") | mktime))] | @csv' "$runs_json" >> "$STEP_DURATION_FILE" |
|
fi |
|
else |
|
echo "No new jobs found to process." |
|
fi |
|
} |
|
|
|
function report_average_duration(){ |
|
echo -e "\nReporting for $GH_REPO (avg duration per step, broken down by job):" |
|
# the actual analysis is pretty nice at this point... |
|
local last_step="" |
|
while IFS=$'\t' read -r step job duration; do |
|
if [[ "$last_step" == "$step" ]]; then |
|
: # don't emit a new header |
|
else |
|
# do emit a new header |
|
printf "\nStep: %s\n" "$step" |
|
fi |
|
printf " %10.4fs - %s\n" "$duration" "$job" |
|
last_step="$step" |
|
done < <(textql -header -output-dlm=tab -sql "select step,job,avg(duration) group by step, job order by step, job" "$STEP_DURATION_FILE") |
|
} |
|
|
|
GH_WORKFLOW="$(get_workflow_id)" |
|
|
|
# no point trying if this isn't set (offline? API rate limit?) |
|
if [[ -n "$GH_WORKFLOW" ]]; then |
|
fetch_new_job_stats |
|
report_average_duration |
|
fi |
Updated script; forgot I'd only added the token-using code to the first curl call; now applied to all of them.