Created
March 30, 2026 20:45
-
-
Save PaulKinlan/c9abaad41c218b64cab00196b88e4d46 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| # gh-ai-prs.sh | |
| # Fetch 6 months of AI-created PR counts from GitHub Search API. | |
| # Uses total_count per query (not actual results), so no 1,000-result cap. | |
| # Output: CSV to stdout, progress to stderr. | |
| # | |
| # Tracks: | |
| # 1. PRs created by known AI bot accounts | |
| # 2. PRs containing AI-generated markers in title/body | |
| # | |
| # Usage: | |
| # ./gh-ai-prs.sh # weekly buckets, last 6 months | |
| # ./gh-ai-prs.sh --granularity monthly | |
| # ./gh-ai-prs.sh --start 2025-08-01 --end 2026-02-27 | |
| # | |
| # Requirements: gh (authenticated), jq | |
| # Rate limit: 30 search requests/min authenticated. Script sleeps 2.5s/request. | |
| set -euo pipefail | |
| # ── Config ──────────────────────────────────────────────────────────────────── | |
| GRANULARITY="weekly" | |
| END_DATE=$(date +%Y-%m-%d) | |
| START_DATE=$(date -d "6 months ago" +%Y-%m-%d) | |
| SLEEP_SECS=2.5 | |
| while [[ $# -gt 0 ]]; do | |
| case $1 in | |
| --granularity) GRANULARITY="$2"; shift 2 ;; | |
| --start) START_DATE="$2"; shift 2 ;; | |
| --end) END_DATE="$2"; shift 2 ;; | |
| --sleep) SLEEP_SECS="$2"; shift 2 ;; | |
| *) echo "Unknown arg: $1" >&2; exit 1 ;; | |
| esac | |
| done | |
| # ── Queries to track ───────────────────────────────────────────────────────── | |
| # Format: "display_name|search_query" | |
| # All queries get "is:pr" prepended and "created:FROM..TO" appended automatically. | |
| declare -a TOOLS=( | |
| # ── AI Coding Agents (PR authors) ── | |
| "Copilot SWE Agent (author)|author:copilot-swe-agent[bot]" | |
| "Codex Connector (author)|author:chatgpt-codex-connector[bot]" | |
| "Claude (author)|author:claude[bot]" | |
| "Devin (author)|author:devin-ai-integration[bot]" | |
| "Jules (author)|author:google-labs-jules[bot]" | |
| "Amazon Q (author)|author:amazon-q-developer[bot]" | |
| "Sweep (author)|author:sweep-ai[bot]" | |
| "Pixeebot (author)|author:pixeebot[bot]" | |
| "Codeflash (author)|author:codeflash-ai[bot]" | |
| "Codegen (author)|author:codegen-sh[bot]" | |
| "Mentat (author)|author:mentatbot[bot]" | |
| "GitAuto (author)|author:gitauto-ai[bot]" | |
| # ── Code Review / Other Bots (may also create PRs) ── | |
| "CodeRabbit (author)|author:coderabbitai[bot]" | |
| "Sourcery (author)|author:sourcery-ai[bot]" | |
| "Gemini Code Assist (author)|author:gemini-code-assist[bot]" | |
| "Cursor (author)|author:cursor[bot]" | |
| # ── Dependency / Automation (context) ── | |
| "Dependabot (author)|author:dependabot[bot]" | |
| "Renovate (author)|author:renovate[bot]" | |
| # ── Body/title markers ── | |
| "Claude Code (body)|\"Generated with Claude Code\"" | |
| "Claude (co-author in body)|\"Co-Authored-By: Claude\"" | |
| "Devin (body)|\"Created by Devin\"" | |
| "Sweep (title)|\"Sweep:\"" | |
| ) | |
| # ── Date range generation ───────────────────────────────────────────────────── | |
| generate_ranges() { | |
| local granularity="$1" | |
| local start="$2" | |
| local end="$3" | |
| case "$granularity" in | |
| daily) | |
| local current="$start" | |
| while [[ "$current" < "$end" || "$current" == "$end" ]]; do | |
| echo "${current}|${current}" | |
| current=$(date -d "$current + 1 day" +%Y-%m-%d) | |
| done | |
| ;; | |
| weekly) | |
| local current | |
| current=$(date -d "$start - $(date -d "$start" +%u) days + 1 day" +%Y-%m-%d) | |
| [[ "$current" < "$start" ]] && current=$(date -d "$current + 7 days" +%Y-%m-%d) | |
| while [[ "$current" < "$end" || "$current" == "$end" ]]; do | |
| local week_end | |
| week_end=$(date -d "$current + 6 days" +%Y-%m-%d) | |
| [[ "$week_end" > "$end" ]] && week_end="$end" | |
| echo "${current}|${week_end}" | |
| current=$(date -d "$current + 7 days" +%Y-%m-%d) | |
| done | |
| ;; | |
| monthly) | |
| local current="$start" | |
| while [[ "$current" < "$end" || "$current" == "$end" ]]; do | |
| local year month last_day | |
| year=$(date -d "$current" +%Y) | |
| month=$(date -d "$current" +%m) | |
| last_day=$(date -d "$year-$month-01 + 1 month - 1 day" +%Y-%m-%d) | |
| [[ "$last_day" > "$end" ]] && last_day="$end" | |
| local range_start | |
| range_start=$(date -d "$year-$month-01" +%Y-%m-%d) | |
| echo "${range_start}|${last_day}" | |
| current=$(date -d "$year-$month-01 + 1 month" +%Y-%m-%d) | |
| done | |
| ;; | |
| esac | |
| } | |
| # ── Query function ──────────────────────────────────────────────────────────── | |
| url_encode() { | |
| local str="$1" | |
| str="${str// /+}" | |
| str="${str//[/\%5B}" | |
| str="${str//]/\%5D}" | |
| str="${str//:/%3A}" | |
| echo "$str" | |
| } | |
| MAX_RETRIES=5 | |
| RETRY_BASE_SECS=60 | |
| query_count() { | |
| local query="$1" | |
| local from="$2" | |
| local to="$3" | |
| local full_query="is:pr ${query} created:${from}..${to}" | |
| local attempt=0 | |
| while true; do | |
| local result | |
| local exit_code=0 | |
| result=$(gh api \ | |
| --method GET \ | |
| "search/issues" \ | |
| -f q="${full_query}" \ | |
| -f per_page=1 \ | |
| 2>&1) || exit_code=$? | |
| if echo "$result" | grep -qi "secondary rate limit"; then | |
| attempt=$(( attempt + 1 )) | |
| if [[ "$attempt" -gt "$MAX_RETRIES" ]]; then | |
| echo "ERROR: secondary rate limit — gave up after $MAX_RETRIES retries" >&2 | |
| echo "ERROR" | |
| return | |
| fi | |
| local wait_secs=$(( RETRY_BASE_SECS * attempt )) | |
| echo " RATE LIMITED: waiting ${wait_secs}s before retry $attempt/$MAX_RETRIES..." >&2 | |
| sleep "$wait_secs" | |
| continue | |
| fi | |
| if [[ "$exit_code" -ne 0 ]]; then | |
| echo "ERROR: gh api failed (exit $exit_code): $result" >&2 | |
| echo "ERROR" | |
| return | |
| fi | |
| local count | |
| count=$(echo "$result" | jq -r '.total_count // empty' 2>/dev/null) | |
| if [[ -z "$count" ]]; then | |
| local err_msg | |
| err_msg=$(echo "$result" | jq -r '.message // .errors[0].message // "unknown error"' 2>/dev/null || echo "unparseable response") | |
| echo "ERROR: API response: $err_msg" >&2 | |
| echo "ERROR" | |
| return | |
| fi | |
| echo "$count" | |
| return | |
| done | |
| } | |
| build_search_url() { | |
| local query="$1" | |
| local from="$2" | |
| local to="$3" | |
| local encoded | |
| encoded=$(url_encode "is:pr ${query} created:${from}..${to}") | |
| echo "https://github.com/search?q=${encoded}&type=pullrequests" | |
| } | |
| # ── Main ────────────────────────────────────────────────────────────────────── | |
| echo "tool,period_start,period_end,granularity,total_count,url" | |
| mapfile -t RANGES < <(generate_ranges "$GRANULARITY" "$START_DATE" "$END_DATE") | |
| total_requests=$(( ${#TOOLS[@]} * ${#RANGES[@]} )) | |
| done_requests=0 | |
| est_secs=$(( total_requests * 3 )) | |
| est_mins=$(( est_secs / 60 )) | |
| echo "==> ${#TOOLS[@]} tools × ${#RANGES[@]} periods = ${total_requests} API calls" >&2 | |
| echo "==> Estimated time: ~${est_mins} minutes" >&2 | |
| echo "" >&2 | |
| for tool_entry in "${TOOLS[@]}"; do | |
| display="${tool_entry%%|*}" | |
| query="${tool_entry#*|}" | |
| echo "--- ${display} ---" >&2 | |
| for range in "${RANGES[@]}"; do | |
| from="${range%%|*}" | |
| to="${range#*|}" | |
| count=$(query_count "$query" "$from" "$to") | |
| url=$(build_search_url "$query" "$from" "$to") | |
| echo "${display},${from},${to},${GRANULARITY},${count},${url}" | |
| if [[ "$count" == "ERROR" ]]; then | |
| echo " ${from}..${to}: ERROR (see above)" >&2 | |
| else | |
| echo " ${from}..${to}: ${count} ${url}" >&2 | |
| fi | |
| done_requests=$(( done_requests + 1 )) | |
| if [[ "$done_requests" -lt "$total_requests" ]]; then | |
| sleep "$SLEEP_SECS" | |
| fi | |
| done | |
| done | |
| echo "" >&2 | |
| echo "==> Done. ${done_requests} requests made." >&2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment