Skip to content

Instantly share code, notes, and snippets.

@PaulKinlan
Created March 30, 2026 20:45
Show Gist options
  • Select an option

  • Save PaulKinlan/c9abaad41c218b64cab00196b88e4d46 to your computer and use it in GitHub Desktop.

Select an option

Save PaulKinlan/c9abaad41c218b64cab00196b88e4d46 to your computer and use it in GitHub Desktop.
#!/usr/bin/env bash
# gh-ai-prs.sh
# Fetch 6 months of AI-created PR counts from GitHub Search API.
# Uses total_count per query (not actual results), so no 1,000-result cap.
# Output: CSV to stdout, progress to stderr.
#
# Tracks:
# 1. PRs created by known AI bot accounts
# 2. PRs containing AI-generated markers in title/body
#
# Usage:
# ./gh-ai-prs.sh # weekly buckets, last 6 months
# ./gh-ai-prs.sh --granularity monthly
# ./gh-ai-prs.sh --start 2025-08-01 --end 2026-02-27
#
# Requirements: gh (authenticated), jq
# Rate limit: 30 search requests/min authenticated. Script sleeps 2.5s/request.
set -euo pipefail
# ── Config ────────────────────────────────────────────────────────────────────
GRANULARITY="weekly"
END_DATE=$(date +%Y-%m-%d)
START_DATE=$(date -d "6 months ago" +%Y-%m-%d)
SLEEP_SECS=2.5
while [[ $# -gt 0 ]]; do
case $1 in
--granularity) GRANULARITY="$2"; shift 2 ;;
--start) START_DATE="$2"; shift 2 ;;
--end) END_DATE="$2"; shift 2 ;;
--sleep) SLEEP_SECS="$2"; shift 2 ;;
*) echo "Unknown arg: $1" >&2; exit 1 ;;
esac
done
# ── Queries to track ─────────────────────────────────────────────────────────
# Format: "display_name|search_query"
# All queries get "is:pr" prepended and "created:FROM..TO" appended automatically.
declare -a TOOLS=(
# ── AI Coding Agents (PR authors) ──
"Copilot SWE Agent (author)|author:copilot-swe-agent[bot]"
"Codex Connector (author)|author:chatgpt-codex-connector[bot]"
"Claude (author)|author:claude[bot]"
"Devin (author)|author:devin-ai-integration[bot]"
"Jules (author)|author:google-labs-jules[bot]"
"Amazon Q (author)|author:amazon-q-developer[bot]"
"Sweep (author)|author:sweep-ai[bot]"
"Pixeebot (author)|author:pixeebot[bot]"
"Codeflash (author)|author:codeflash-ai[bot]"
"Codegen (author)|author:codegen-sh[bot]"
"Mentat (author)|author:mentatbot[bot]"
"GitAuto (author)|author:gitauto-ai[bot]"
# ── Code Review / Other Bots (may also create PRs) ──
"CodeRabbit (author)|author:coderabbitai[bot]"
"Sourcery (author)|author:sourcery-ai[bot]"
"Gemini Code Assist (author)|author:gemini-code-assist[bot]"
"Cursor (author)|author:cursor[bot]"
# ── Dependency / Automation (context) ──
"Dependabot (author)|author:dependabot[bot]"
"Renovate (author)|author:renovate[bot]"
# ── Body/title markers ──
"Claude Code (body)|\"Generated with Claude Code\""
"Claude (co-author in body)|\"Co-Authored-By: Claude\""
"Devin (body)|\"Created by Devin\""
"Sweep (title)|\"Sweep:\""
)
# ── Date range generation ─────────────────────────────────────────────────────
generate_ranges() {
local granularity="$1"
local start="$2"
local end="$3"
case "$granularity" in
daily)
local current="$start"
while [[ "$current" < "$end" || "$current" == "$end" ]]; do
echo "${current}|${current}"
current=$(date -d "$current + 1 day" +%Y-%m-%d)
done
;;
weekly)
local current
current=$(date -d "$start - $(date -d "$start" +%u) days + 1 day" +%Y-%m-%d)
[[ "$current" < "$start" ]] && current=$(date -d "$current + 7 days" +%Y-%m-%d)
while [[ "$current" < "$end" || "$current" == "$end" ]]; do
local week_end
week_end=$(date -d "$current + 6 days" +%Y-%m-%d)
[[ "$week_end" > "$end" ]] && week_end="$end"
echo "${current}|${week_end}"
current=$(date -d "$current + 7 days" +%Y-%m-%d)
done
;;
monthly)
local current="$start"
while [[ "$current" < "$end" || "$current" == "$end" ]]; do
local year month last_day
year=$(date -d "$current" +%Y)
month=$(date -d "$current" +%m)
last_day=$(date -d "$year-$month-01 + 1 month - 1 day" +%Y-%m-%d)
[[ "$last_day" > "$end" ]] && last_day="$end"
local range_start
range_start=$(date -d "$year-$month-01" +%Y-%m-%d)
echo "${range_start}|${last_day}"
current=$(date -d "$year-$month-01 + 1 month" +%Y-%m-%d)
done
;;
esac
}
# ── Query function ────────────────────────────────────────────────────────────
url_encode() {
local str="$1"
str="${str// /+}"
str="${str//[/\%5B}"
str="${str//]/\%5D}"
str="${str//:/%3A}"
echo "$str"
}
MAX_RETRIES=5
RETRY_BASE_SECS=60
query_count() {
local query="$1"
local from="$2"
local to="$3"
local full_query="is:pr ${query} created:${from}..${to}"
local attempt=0
while true; do
local result
local exit_code=0
result=$(gh api \
--method GET \
"search/issues" \
-f q="${full_query}" \
-f per_page=1 \
2>&1) || exit_code=$?
if echo "$result" | grep -qi "secondary rate limit"; then
attempt=$(( attempt + 1 ))
if [[ "$attempt" -gt "$MAX_RETRIES" ]]; then
echo "ERROR: secondary rate limit — gave up after $MAX_RETRIES retries" >&2
echo "ERROR"
return
fi
local wait_secs=$(( RETRY_BASE_SECS * attempt ))
echo " RATE LIMITED: waiting ${wait_secs}s before retry $attempt/$MAX_RETRIES..." >&2
sleep "$wait_secs"
continue
fi
if [[ "$exit_code" -ne 0 ]]; then
echo "ERROR: gh api failed (exit $exit_code): $result" >&2
echo "ERROR"
return
fi
local count
count=$(echo "$result" | jq -r '.total_count // empty' 2>/dev/null)
if [[ -z "$count" ]]; then
local err_msg
err_msg=$(echo "$result" | jq -r '.message // .errors[0].message // "unknown error"' 2>/dev/null || echo "unparseable response")
echo "ERROR: API response: $err_msg" >&2
echo "ERROR"
return
fi
echo "$count"
return
done
}
build_search_url() {
local query="$1"
local from="$2"
local to="$3"
local encoded
encoded=$(url_encode "is:pr ${query} created:${from}..${to}")
echo "https://github.com/search?q=${encoded}&type=pullrequests"
}
# ── Main ──────────────────────────────────────────────────────────────────────
echo "tool,period_start,period_end,granularity,total_count,url"
mapfile -t RANGES < <(generate_ranges "$GRANULARITY" "$START_DATE" "$END_DATE")
total_requests=$(( ${#TOOLS[@]} * ${#RANGES[@]} ))
done_requests=0
est_secs=$(( total_requests * 3 ))
est_mins=$(( est_secs / 60 ))
echo "==> ${#TOOLS[@]} tools × ${#RANGES[@]} periods = ${total_requests} API calls" >&2
echo "==> Estimated time: ~${est_mins} minutes" >&2
echo "" >&2
for tool_entry in "${TOOLS[@]}"; do
display="${tool_entry%%|*}"
query="${tool_entry#*|}"
echo "--- ${display} ---" >&2
for range in "${RANGES[@]}"; do
from="${range%%|*}"
to="${range#*|}"
count=$(query_count "$query" "$from" "$to")
url=$(build_search_url "$query" "$from" "$to")
echo "${display},${from},${to},${GRANULARITY},${count},${url}"
if [[ "$count" == "ERROR" ]]; then
echo " ${from}..${to}: ERROR (see above)" >&2
else
echo " ${from}..${to}: ${count} ${url}" >&2
fi
done_requests=$(( done_requests + 1 ))
if [[ "$done_requests" -lt "$total_requests" ]]; then
sleep "$SLEEP_SECS"
fi
done
done
echo "" >&2
echo "==> Done. ${done_requests} requests made." >&2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment