Skip to content

Instantly share code, notes, and snippets.

@mcornella

mcornella/.gitignore

Last active Feb 3, 2021
Embed
What would you like to do?
Stats and PR triage for ohmyzsh
github
pulls
repo
stats
types
*.txt
.env

Zsh utilities to fetch PRs from Oh My Zsh and perform some analysis.

To start, make a folder (mine is called 'automation'), then clone this gist inside this folder (I named it 'pr-stats'). Then, copy the example.env file and rename it .env, then set the GITHUB_TOKEN variable in the file with a valid, read-only token. Then, run refresh.zsh.

Note: triage.zsh applies labels when ran with triage.zsh commit. Do not run like that, as I haven't checked it in a while and it could undo changes made by the ohmyzsh-bot.

  • refresh.zsh: downloads GitHub data and runs the other files
  • pulls.zsh: runs through the PRs and generates the pulls.txt and pulls-title.txt files, and gets their git diff and puts them in the pulls/ folder.
  • stats.zsh: processes the git diffs and generates various stats, and puts them in the stats/ folder.
  • types.zsh: processes the git diffs and classifies PRs into various types, and puts them in the types/ folder.
  • triage.zsh: runs through the PRs and checks whether they are correctly labeled, and changes them if told.
  • year-review.zsh: ran with <year>, it'll offer a bunch of stats about a particular year. For example: year-review.zsh 2020.
# rename this file to .env
# Go to https://github.com/settings/tokens/new
GITHUB_TOKEN=<github-token>
#!/usr/bin/zsh
set -e
base=${0:h:A}
repo=$base/repo
pulls=$base/pulls
github=$base/github
rm -f $pulls/*.txt(N)
jq '.data.repository.pullRequests.nodes | .[] | .number' $github/* > $base/pulls.txt
jq -r '.data.repository.pullRequests.nodes | .[] | [.number,.title] | @tsv' $github/* > $base/pulls-title.txt
pushd $repo
while read pr; do
echo PR $pr
git diff --name-only origin/master...origin/pull-$pr > $pulls/pull-$pr.txt
git diff --name-status origin/master...origin/pull-$pr > $pulls/mod-$pr.txt
git diff origin/master...origin/pull-$pr > $pulls/diff-$pr.txt
done < $base/pulls.txt
popd
#!/usr/bin/zsh
set -e
base=${0:h:A}
repo=$base/repo
pulls=$base/pulls
github=$base/github
mkdir -p "$pulls" "$github"
if [[ ! -d "$repo" ]]; then
git clone git@github.com:ohmyzsh/ohmyzsh.git "$repo"
git -C "$repo" config remote.origin.fetch '+refs/pull/*/head:refs/remotes/origin/pull-*'
fi
if [[ -z $GITHUB_TOKEN ]]; then
source .env
fi
# refresh open PR JSONs
rm -f "$github/pulls_*.json"
i=1
endCursor=null
hasNextPage=true
while [[ $hasNextPage = true ]]; do
query='{
"query": "{
repository(name: \"ohmyzsh\", owner: \"ohmyzsh\") {
pullRequests(first: 100, orderBy: {field: UPDATED_AT, direction: ASC}, states: OPEN, after: '$endCursor') {
pageInfo { endCursor, hasNextPage },
nodes {
number,
title,
author { login },
headRefOid,
labels(first: 20) {
nodes {
name
}
}
}
}
}
}"
}'
query="${${query//$'\n'/}//$'\t'/}"
# Do the graphQL request
curl -H "Authorization: bearer $GITHUB_TOKEN" -X POST -d "$query" https://api.github.com/graphql > $github/pulls_$i.json
endCursor=\\\"$(jq -r '.data.repository.pullRequests.pageInfo.endCursor' $github/pulls_$i.json)\\\"
hasNextPage=$(jq '.data.repository.pullRequests.pageInfo.hasNextPage' $github/pulls_$i.json)
(( i++ ))
done
# refresh repository
pushd $repo
git fetch --all
git checkout -q master
git reset --hard origin/master
popd
# refresh pull requests
echo Refreshing pull request data...
./pulls.zsh
echo Refreshing pull request stats...
./stats.zsh
echo Refreshing pull request types...
./types.zsh
#!/usr/bin/zsh
set -e
base=${0:h:A}
repo=$base/repo
pulls=$base/pulls
stats=$base/stats
mkdir -p "$stats"
rm -f $stats/*(N)
# mods
cat $pulls/mod-* | sort -u > $stats/mods.txt
# files
cat $pulls/pull-* | sort -u > $stats/files.txt
# howmanychanged.txt
while read number title; do
echo "$(wc -l $pulls/pull-$number.txt | cut -d' ' -f1)\t$number\t$title"
done < $base/pulls-title.txt | sort -nr > $stats/howmanychanged.txt
# modified.txt
for pull ($pulls/pull-*); do
sed 's/\(plugins\/[^/]*\)\/.*/\1/' $pull | sort -u
done | sort | uniq -c | sort -nr > $stats/modified.txt
#!/bin/zsh
set -e
set -o pipefail
base=${0:h:A}
repo=$base/repo
pulls=$base/github
if [[ -z $GITHUB_TOKEN && -e .env ]]; then
source .env
fi
if [[ -z $GITHUB_TOKEN ]]; then
echo No GITHUB_TOKEN present
return 1
fi
if [[ "$1" = --help ]]; then
echo "Usage: $0 [commit]" >&2
echo "Use 'commit' if you want to apply the label changes" >&2
exit
fi
URI=https://api.github.com
API_VERSION=v3
API_HEADER="Accept: application/vnd.github.${API_VERSION}+json"
AUTH_HEADER="Authorization: token ${GITHUB_TOKEN}"
typeset -A LABELS
LABELS=(
core 'Area: core'
init 'Area: init'
install 'Area: installer'
update 'Area: updater'
plugin 'Area: plugin'
theme 'Area: theme'
uninstall 'Area: uninstaller'
new_plugin 'New: plugin'
new_theme 'New: theme'
plugin_aws 'Plugin: aws'
plugin_git 'Plugin: git'
plugin_mercurial 'Plugin: mercurial'
plugin_tmux 'Plugin: tmux'
alias 'Topic: alias'
bindkey 'Topic: bindkey'
completion 'Topic: completion'
documentation 'Type: documentation'
conflicts 'Status: conflicts'
)
has_conflicts() {
git -c user.name=bot -c user.email=b@o.t \
merge --no-commit --no-ff $GITHUB_SHA &>/dev/null && ret=1 || ret=0
git merge --abort &>/dev/null
return $ret
}
triage_pull_request() {
local -aU labels files plugins themes
local file plugin theme diff
# Changed files
files=("${(f)$(git diff --name-only HEAD...$GITHUB_SHA)}")
# Filter files to only obtain core files (inside 'lib/' or 'tools/')
if (( ${files[(I)lib/*|tools/*]} > 0 )); then
labels+=($LABELS[core])
fi
# Filter files to only obtain changed plugins ('plugins/$name')
plugins=(${(M)files#plugins/*/})
if (( $#plugins > 0 )); then
labels+=($LABELS[plugin])
for plugin ($plugins); do
# If the plugin doesn't exist mark it as new
[[ ! -e "$plugin" ]] && labels+=($LABELS[new_plugin])
done
fi
# Filter files to only obtain changed themes ('themes/$name.zsh-theme')
themes=(${(M)files#themes/*.zsh-theme})
if (( $#themes > 0 )); then
labels+=($LABELS[theme])
for theme ($themes); do
[[ ! -e "$theme" ]] && labels+=($LABELS[new_theme])
done
fi
# Loop over the rest of the files for miscellaneous tests
for file ($files); do
case $file in
oh-my-zsh.(sh|.zsh)) labels+=($LABELS[init]) ;;
tools/*upgrade.sh) labels+=($LABELS[update]) ;;
tools/install.sh) labels+=($LABELS[install]) ;;
tools/uninstall.sh) labels+=($LABELS[uninstall]) ;;
plugins/aws/*) labels+=($LABELS[plugin_aws]) ;;
plugins/git/*) labels+=($LABELS[plugin_git]) ;;
plugins/mercurial/*) labels+=($LABELS[plugin_mercurial]) ;;
plugins/tmux/*) labels+=($LABELS[plugin_tmux]) ;;
(|*/)README.*) labels+=($LABELS[documentation]) ;;
esac
case ${file:t} in
*.zsh) # check if or aliases or bindkeys are added, deleted or modified
diff=$(git diff HEAD...$GITHUB_SHA -- $file)
grep -q -E '^[-+] *alias ' <<< $diff && labels+=($LABELS[alias])
grep -q -E '^[-+] *bindkey ' <<< $diff && labels+=($LABELS[bindkey]) ;;
_*) # check if completion files are added, deleted or modified
labels+=($LABELS[completion]) ;;
esac
done
# Print labels in ascending order and quote for labels with spaces
if (( $#labels > 0 )); then
print -l ${(oq)labels}
fi
}
process() {
local number=$1 jsonfile=$2 run=0
[[ $3 = commit ]] && run=1
local JSON
JSON=$(jq ".data.repository.pullRequests.nodes | .[] | select(.number | . and contains($number))" $jsonfile)
local -aU current_labels
current_labels=("${(f)$(jq --raw-output '.labels.nodes | .[].name' <<< $JSON)}")
local sha=$(jq --raw-output .headRefOid <<< $JSON)
GITHUB_SHA=$sha
# Creates an array of labels to apply to the PR being analyzed
local -aU labels
labels=("${(f)$(triage_pull_request)}")
# Check if PR has conflicts with master
if has_conflicts; then
# echo Pull request with conflicts
labels+=($LABELS[conflicts])
# Check if PR has "conflicts" label
elif (( $current_labels[(I)$LABELS[conflicts]] > 0 )); then
local replace=1
fi
if (( $+replace )); then
# Add current labels to the set of labels to add (except the "conflicts" label)
labels+=(${current_labels:#$LABELS[conflicts]})
else
# Remove current labels from the set of labels to add
labels=(${labels:|current_labels})
fi
# Update labels
if (( $#labels > 0 )); then
data=$(print -l $labels | jq -cnR '{ labels: [inputs | select(length>0)] }')
echo $'\n'Pull request "#$number": https://github.com/ohmyzsh/ohmyzsh/pull/$number
if (( $+replace )); then
# Replace labels: https://developer.github.com/v3/issues/labels/#replace-all-labels-for-an-issue
echo "Old labels:" ${(j:, :)${(qq)current_labels}}
echo "Replacing labels to:" ${(j:, :)${(qq)labels}}...
(( $run )) || return 0
curl -XPUT -sSL \
-H "${AUTH_HEADER}" \
-H "${API_HEADER}" \
--data $data \
"${URI}/repos/ohmyzsh/ohmyzsh/issues/${number}/labels"
else
# Add labels: https://developer.github.com/v3/issues/labels/#add-labels-to-an-issue
echo "Adding labels to PR #$number:" ${(j:, :)${(qq)labels}}...
(( $run )) || return 0
curl -XPOST -sSL \
-H "${AUTH_HEADER}" \
-H "${API_HEADER}" \
--data $data \
"${URI}/repos/ohmyzsh/ohmyzsh/issues/${number}/labels"
fi
sleep 1 # wait before next API call
else
# echo "No labels added to PR #$number."
fi
}
pushd $repo
for jsonfile in ${pulls:A}/*.json; do
echo Processing PRs from ${jsonfile:t}...
while read number; do
process $number $jsonfile $1
done < <(jq '.data.repository.pullRequests.nodes | .[].number' $jsonfile)
echo
done
popd
#!/usr/bin/zsh
set -e
base=${0:h:A}
repo=$base/repo
pulls=$base/pulls
types=$base/types
# setup
mkdir -p $types
rm -rf $types/*(N)
mkdir $types/plugins
# process
while read number title; do
declare -aU plugins=()
declare newlib=0 newplugin=0 modcompletion=0 newreadme=0
pushd $repo
while read action file; do
if [[ $file = lib/* && ! -e $file ]]; then
newlib=1
fi
if [[ $file = plugins/* ]]; then
plugin=${(M)file#plugins/*/}
plugins+=(${plugin:t})
if [[ ! -e $plugin ]]; then
newplugin=1
fi
if [[ ${file:t} = _* && $action = M ]]; then
modcompletion=1
fi
if [[ ${file:t} = (README|readme).md && ! -e $file ]]; then
newreadme=1
fi
fi
done < $pulls/mod-$number.txt
popd
if (( $newlib )); then echo "$number\t$title" >> $types/newlibs.txt; fi
if (( $newplugin )); then echo "$number\t$title" >> $types/newplugins.txt; fi
if (( $modcompletion )); then echo "$number\t$title" >> $types/modcompletions.txt; fi
if (( $newreadme && ! $newplugin )); then echo "$number\t$title" >> $types/pluginreadmes.txt; fi
for plugin ($plugins); do
echo "$number\t$title" >> $types/plugins/$plugin.txt
done
done < $base/pulls-title.txt
#!/usr/bin/zsh
set -e
base=${0:h:A}
repo=$base/repo
year=$1
function first_commit {
local year=$1
git log --format="%h" --after="$(( year - 1 ))-12-31 23:59" --reverse | head -1
}
function last_commit {
local year=$1
git log --format="%h" --before="$(( year + 1 ))-01-01 00:00" | head -1
}
function has_new_plugins {
local hash=$1 ret=0 tmpplugin=''
git diff-tree --name-status --no-commit-id -r "$hash" | while read mod file; do
[[ $mod = A ]] || continue
local plugin_name=''
case "$file" in
plugins/*/(_*|*.plugin.zsh)) plugin_name=${${file#plugins/}%%/*} ;;
plugins/*.plugin.zsh) plugin_name=${file:t:r:r} ;;
*) continue ;;
esac
# check if the plugin file added matches with a previous plugin file seen in the same commit
[[ $plugin_name != $tmpplugin ]] || continue
tmpplugin=$plugin_name
# check if $plugin_name was added in a previous commit
[[ -z "$(git log -1 --reverse --oneline "$hash^" -- "plugins/$plugin_name" | head -1)" ]] || continue
echo >&2 "new plugin: $plugin_name (at ${hash:0:7})"
(( ret += 1 ))
done
(( ret > 0 )) || return 1
echo $ret
}
function has_new_themes {
local hash=$1 ret=0
git diff-tree --name-status --no-commit-id -r "$hash" | while read mod file; do
if [[ $mod = A && "$file" = themes/*.zsh-theme ]]; then
local theme_name=${file:t:r}
echo >&2 "new theme: $theme_name (at ${hash:0:7})"
(( ret += 1 ))
fi
done
(( ret > 0 )) || return 1
echo $ret
}
# This function gets contributors by name (%an)
# For email, use %ae
function contributors_before {
local year=$1
git log --format="%an" --before="${year}-01-01 00:00" | sort -u
}
pushd $repo
# Stats
# Commits
first="$(first_commit $year)"
last="$(last_commit $year)"
# Contributor count
contributors_lastyear="$(contributors_before $(( $year )))"
contributors_thisyear="$(contributors_before $(( $year + 1 )) )"
new_contributors=$(( $(wc -l <<< "$contributors_thisyear") - $(wc -l <<< "$contributors_lastyear") ))
comm -23 <(<<< "$contributors_thisyear") <(<<< "$contributors_lastyear") | sed >&2 's/^/new contributor: /'
# Number of commits
commits=0
# New plugins
new_plugins=0
# New themes
new_themes=0
# Define the range: the very first commit has no parent
if git rev-parse --verify --quiet ${first}^ &>/dev/null; then
range="${first}^..${last}"
else
range="${last}"
fi
# Iterate over the list of commits in $range
git rev-list --date-order --reverse $range | while read commit_hash; do
(( commits += 1 ))
if _n=$(has_new_plugins $commit_hash); then
(( new_plugins += _n ))
fi
if _n=$(has_new_themes $commit_hash); then
(( new_themes += _n ))
fi
done
popd
echo first: $first, last: $last, commits: $commits, new contributors: $new_contributors, new plugins: $new_plugins, new themes: $new_themes
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment