Created
March 27, 2026 23:32
-
-
Save PaulKinlan/68210b7a1c7edfce2fc2f8789065b227 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| # ai-commit-6month.sh | |
| # Fetch 6 months of AI coding agent commit counts from GitHub Search API. | |
| # Uses total_count per query (not actual results), so no 1,000-result cap. | |
| # Output: CSV to stdout, progress to stderr. | |
| # | |
| # Tracks TWO detection methods: | |
| # 1. Co-Author trailers in commit messages (e.g. "Co-Authored-By: Claude") | |
| # 2. Bot user accounts as commit author (e.g. "copilot-swe-agent[bot]") | |
| # | |
| # Usage: | |
| # ./ai-commit-6month.sh # weekly buckets, last 6 months | |
| # ./ai-commit-6month.sh --granularity monthly | |
| # ./ai-commit-6month.sh --start 2025-08-01 --end 2026-02-27 | |
| # | |
| # Requirements: gh (authenticated), jq | |
| # Rate limit: 30 search requests/min authenticated. Script sleeps 2.5s/request. | |
| set -euo pipefail | |
| # ── Config ──────────────────────────────────────────────────────────────────── | |
| GRANULARITY="weekly" # weekly | monthly | daily | |
| END_DATE=$(date +%Y-%m-%d) | |
| START_DATE=$(date -d "6 months ago" +%Y-%m-%d) | |
| SLEEP_SECS=2.5 # stay under 30 req/min rate limit | |
| while [[ $# -gt 0 ]]; do | |
| case $1 in | |
| --granularity) GRANULARITY="$2"; shift 2 ;; | |
| --start) START_DATE="$2"; shift 2 ;; | |
| --end) END_DATE="$2"; shift 2 ;; | |
| --sleep) SLEEP_SECS="$2"; shift 2 ;; | |
| *) echo "Unknown arg: $1" >&2; exit 1 ;; | |
| esac | |
| done | |
| # ── Tools to track ──────────────────────────────────────────────────────────── | |
| # Format: "display_name|search_query" | |
| # | |
| # Two types of query: | |
| # - Message match: searches commit message text (Co-Authored-By trailers) | |
| # - Author match: searches by commit author (bot user accounts) | |
| # | |
| # Some tools use BOTH methods (e.g. Copilot has co-author trailer AND bot account). | |
| # We track them as separate rows so you can see both signals. | |
| declare -a TOOLS=( | |
| # ── Co-Author trailer matches (commit message) ── | |
| "Claude (co-author)|Co-Authored-By: Claude" | |
| "GitHub Copilot (co-author)|Co-Authored-By: GitHub Copilot" | |
| "Cursor (co-author)|Co-Authored-By: Cursor" | |
| "Codex (co-author)|Co-Authored-By: Codex" | |
| "Aider (co-author)|Co-Authored-By: Aider" | |
| "Devin (co-author)|Co-Authored-By: Devin" | |
| "Windsurf (co-author)|Co-Authored-By: Windsurf" | |
| "Happy (co-author)|Co-Authored-By: Happy" | |
| # ── Bot user account matches (commit author) ── | |
| "Copilot Agent (author)|author:copilot-swe-agent[bot]" | |
| "Jules (author)|author:google-labs-jules[bot]" | |
| "Devin (author)|author:devin-ai-integration[bot]" | |
| "Amazon Q (author)|author:amazon-q-developer[bot]" | |
| ) | |
| # ── Date range generation ───────────────────────────────────────────────────── | |
| generate_ranges() { | |
| local granularity="$1" | |
| local start="$2" | |
| local end="$3" | |
| case "$granularity" in | |
| daily) | |
| local current="$start" | |
| while [[ "$current" < "$end" || "$current" == "$end" ]]; do | |
| echo "${current}|${current}" | |
| current=$(date -d "$current + 1 day" +%Y-%m-%d) | |
| done | |
| ;; | |
| weekly) | |
| # Align to Monday of the start week | |
| local current | |
| current=$(date -d "$start - $(date -d "$start" +%u) days + 1 day" +%Y-%m-%d) | |
| [[ "$current" < "$start" ]] && current=$(date -d "$current + 7 days" +%Y-%m-%d) | |
| while [[ "$current" < "$end" || "$current" == "$end" ]]; do | |
| local week_end | |
| week_end=$(date -d "$current + 6 days" +%Y-%m-%d) | |
| [[ "$week_end" > "$end" ]] && week_end="$end" | |
| echo "${current}|${week_end}" | |
| current=$(date -d "$current + 7 days" +%Y-%m-%d) | |
| done | |
| ;; | |
| monthly) | |
| local current="$start" | |
| while [[ "$current" < "$end" || "$current" == "$end" ]]; do | |
| local year month last_day | |
| year=$(date -d "$current" +%Y) | |
| month=$(date -d "$current" +%m) | |
| last_day=$(date -d "$year-$month-01 + 1 month - 1 day" +%Y-%m-%d) | |
| [[ "$last_day" > "$end" ]] && last_day="$end" | |
| local range_start | |
| range_start=$(date -d "$year-$month-01" +%Y-%m-%d) | |
| echo "${range_start}|${last_day}" | |
| current=$(date -d "$year-$month-01 + 1 month" +%Y-%m-%d) | |
| done | |
| ;; | |
| esac | |
| } | |
| # ── Query function ──────────────────────────────────────────────────────────── | |
| url_encode() { | |
| # Minimal percent-encoding for query strings | |
| local str="$1" | |
| str="${str// /+}" | |
| str="${str//[/\%5B}" | |
| str="${str//]/\%5D}" | |
| str="${str//:/%3A}" | |
| echo "$str" | |
| } | |
| MAX_RETRIES=5 | |
| RETRY_BASE_SECS=60 # start with 60s wait on secondary rate limit | |
| query_count() { | |
| local query="$1" | |
| local from="$2" | |
| local to="$3" | |
| local full_query="${query} author-date:${from}..${to}" | |
| local attempt=0 | |
| while true; do | |
| # GitHub search API: /search/commits | |
| # We only need total_count, so per_page=1 minimises data transfer. | |
| local result | |
| local exit_code=0 | |
| result=$(gh api \ | |
| --method GET \ | |
| "search/commits" \ | |
| -f q="${full_query}" \ | |
| -f per_page=1 \ | |
| 2>&1) || exit_code=$? | |
| # Check for secondary rate limit (HTTP 403 with "secondary rate limit" message) | |
| if echo "$result" | grep -qi "secondary rate limit"; then | |
| attempt=$(( attempt + 1 )) | |
| if [[ "$attempt" -gt "$MAX_RETRIES" ]]; then | |
| echo "ERROR: secondary rate limit — gave up after $MAX_RETRIES retries: $result" >&2 | |
| echo "ERROR" | |
| return | |
| fi | |
| local wait_secs=$(( RETRY_BASE_SECS * attempt )) | |
| echo " RATE LIMITED: waiting ${wait_secs}s before retry $attempt/$MAX_RETRIES..." >&2 | |
| sleep "$wait_secs" | |
| continue | |
| fi | |
| if [[ "$exit_code" -ne 0 ]]; then | |
| echo "ERROR: gh api failed (exit $exit_code): $result" >&2 | |
| echo "ERROR" | |
| return | |
| fi | |
| local count | |
| count=$(echo "$result" | jq -r '.total_count // empty' 2>/dev/null) | |
| if [[ -z "$count" ]]; then | |
| local err_msg | |
| err_msg=$(echo "$result" | jq -r '.message // .errors[0].message // "unknown error"' 2>/dev/null || echo "unparseable response") | |
| echo "ERROR: API response: $err_msg" >&2 | |
| echo "ERROR" | |
| return | |
| fi | |
| echo "$count" | |
| return | |
| done | |
| } | |
| build_search_url() { | |
| local query="$1" | |
| local from="$2" | |
| local to="$3" | |
| local encoded | |
| encoded=$(url_encode "${query} author-date:${from}..${to}") | |
| echo "https://github.com/search?q=${encoded}&type=commits" | |
| } | |
| # ── Main ────────────────────────────────────────────────────────────────────── | |
| echo "tool,period_start,period_end,granularity,total_count,url" | |
| mapfile -t RANGES < <(generate_ranges "$GRANULARITY" "$START_DATE" "$END_DATE") | |
| total_requests=$(( ${#TOOLS[@]} * ${#RANGES[@]} )) | |
| done_requests=0 | |
| # Estimate time without bc (pure bash arithmetic, integer seconds) | |
| est_secs=$(( total_requests * 3 )) | |
| est_mins=$(( est_secs / 60 )) | |
| echo "==> ${#TOOLS[@]} tools × ${#RANGES[@]} periods = ${total_requests} API calls" >&2 | |
| echo "==> Estimated time: ~${est_mins} minutes" >&2 | |
| echo "" >&2 | |
| for tool_entry in "${TOOLS[@]}"; do | |
| display="${tool_entry%%|*}" | |
| query="${tool_entry#*|}" | |
| echo "--- ${display} ---" >&2 | |
| for range in "${RANGES[@]}"; do | |
| from="${range%%|*}" | |
| to="${range#*|}" | |
| count=$(query_count "$query" "$from" "$to") | |
| url=$(build_search_url "$query" "$from" "$to") | |
| echo "${display},${from},${to},${GRANULARITY},${count},${url}" | |
| if [[ "$count" == "ERROR" ]]; then | |
| echo " ${from}..${to}: ERROR (see above)" >&2 | |
| else | |
| echo " ${from}..${to}: ${count} ${url}" >&2 | |
| fi | |
| done_requests=$(( done_requests + 1 )) | |
| if [[ "$done_requests" -lt "$total_requests" ]]; then | |
| sleep "$SLEEP_SECS" | |
| fi | |
| done | |
| done | |
| echo "" >&2 | |
| echo "==> Done. ${done_requests} requests made." >&2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment