Skip to content

Instantly share code, notes, and snippets.

@robbat2
Last active November 28, 2023 00:18
Show Gist options
  • Save robbat2/08dcb9a42ff912e835b49ec89a20ac51 to your computer and use it in GitHub Desktop.
Save robbat2/08dcb9a42ff912e835b49ec89a20ac51 to your computer and use it in GitHub Desktop.
performance review helper tooling from GitHub API
# SPDX-License: ???
def parseDate(date): date | capture("(?<no_tz>.*)(?<tz_sgn>[-+])(?<tz_hr>\\d{2}):(?<tz_min>\\d{2})$") | (.no_tz + "Z" | sub("\\.000Z";"Z")| fromdateiso8601) - (.tz_sgn + "60" | tonumber) * ((.tz_hr | tonumber) * 60 + (.tz_min | tonumber));
#!/bin/bash
# Robin's handy-dandy GitHub/GHE scraper for performance reviews.
#
# SPDX-FileType: SOURCE
# SPDX-FileCopyrightText: Copyright 2020-2023 Robin H. Johnson <robbat2@orbis-terrarum.net>
# SPDX-License-Identifier: AGPL-3.0-or-later
#
# Yes, it's really AGPL-3; this is glue code and I really want the community to
# share improvements.
#
# Requirements:
# - "hub" https://github.com/github/hub
# - jq
#T=$(date -u +%Y%m%dT%H%M%SZ)
T=$(date -u +%Y%m%d)
SUFFIX="v4.${T}.json"
# Use GitHub Enterprise by changing the endpoint.
# The `hub` client must be configured for the endpoint.
#: "${ENDPOINT:=https://github-enterprise.mycompany.com/api}"
: "${ENDPOINT:=https://github.com/api}"
# The "events" endpoint does not go very far back
GHE_EVENTS="${ENDPOINT}/v3/users/{USERNAME}/events"
# This endpoint goes VERY far back
GHE_COMMITS="${ENDPOINT}/v3/search/commits"
# TODO: How to bypass the history age on GraphQL calls?
EMAILS=(
#rjohnson@...
rjohnson
robbat2@gentoo.org
robbat2@orbis-terrarum.net
)
NAMES=(
'robbat2'
'Robin*Johnson'
)
USERNAMES=(
'robbat2'
'rjohnson'
)
cmd='hub api -H Accept:application/vnd.github.cloak-preview --include --paginate --obey-ratelimit'
idempotent_scrape() {
d=$1
shift
test -e "${d}" && return 0
echo "Fetching $*"
dt=${d}.tmp
"$@" >"${dt}" && mv "${dt}" "${d}"
}
for _username in "${USERNAMES[@]}" ; do
idempotent_scrape "events.user_${_username}.${SUFFIX}" ${cmd} "${GHE_EVENTS/\{USERNAME\}/${_username}}"
done
for ymd in 2023-{01..12}-01 ; do
ymd_next=$(date -u -d "${ymd} + 1 month" +%Y-%m-%d)
d1=${ymd}
d2=${ymd_next}
_d1=${ymd//-/}
_d2=${ymd_next//-/}
# $cmd is supposed to expand below
for _type in author committer ; do
for _email in "${EMAILS[@]}" ; do
idempotent_scrape "commits-${_type}-email_${_email}.${_d1}-${_d2}.${SUFFIX}" ${cmd} "${GHE_COMMITS}?q=${_type}-email:${_email}+${_type}-date:${d1}..${d2}"
done
for _name in "${NAMES[@]}" ; do
# Escape any part of the name that might be invalid filename
_name2="$(echo "$_name" | sed -r -e 's,[^0-9a-zA-Z]+,_,g')"
idempotent_scrape "commits-${_type}-name_${_name2}.${_d1}-${_d2}.${SUFFIX}" ${cmd} "${GHE_COMMITS}?q=${_type}-name:${_name}+${_type}-date:${d1}..${d2}"
done
for _username in "${USERNAMES[@]}" ; do
# Escape any part of the username that might be invalid fileusername
_username2="$(echo "$_username" | sed -r -e 's,[^0-9a-zA-Z]+,_,g')"
idempotent_scrape "commits-${_type}-username_${_username2}.${d1//\//}.${SUFFIX}" ${cmd} "${GHE_COMMITS}?q=${_type}-username:${_username}+${_type}-date:${d1}..${d2}"
done
done
done
echo "Finding unique commits"
grep '^{' commits-*"${SUFFIX}" -h \
|jq '.items|.[]' -c \
|jq -s 'INDEX(.url)' >unique-commits.json
start_date='2022/12/15'
start_ts=$(date -u +%s -d @${start_date})
echo "Filtering commits to start at $start_date"
# I didn't filter parse_date.jq, I just improved it
jq 'to_entries|.[]|.value' unique-commits.json -c \
|jq --argjson start_ts "$start_ts" 'include "parse_date"; select(parseDate(.commit.committer.date) >= $start_ts)' \
> unique-commits-filtered.json
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment