Skip to content

Instantly share code, notes, and snippets.

@giuli007
Last active May 15, 2025 12:27
Show Gist options
  • Save giuli007/c0ad2a78a538d11dbd07a9ef93f714ae to your computer and use it in GitHub Desktop.
Save giuli007/c0ad2a78a538d11dbd07a9ef93f714ae to your computer and use it in GitHub Desktop.
Pull Request data for llm
#!/bin/bash
set -euo pipefail
# Check if a URL is provided
if [ -z "$1" ]; then
echo "Usage: $0 <github-pull-request-url>"
exit 1
fi
url="$1"
curl_opts=("-L" "-s")
if [ -n "${GH_TOKEN:-}" ]; then
curl_opts+=("-H" "Authorization: token ${GH_TOKEN}")
fi
# Extract components from the URL
if [[ "$url" =~ https://github.com/ ]]; then
# Public GitHub URL
domain="github.com"
api_base="api.github.com"
owner=$(echo "$url" | sed -E 's|https://github.com/([^/]+)/([^/]+)/pulls/([0-9]+)|\1|')
repo=$(echo "$url" | sed -E 's|https://github.com/([^/]+)/([^/]+)/pulls/([0-9]+)|\2|')
pr_number=$(echo "$url" | sed -E 's|https://github.com/([^/]+)/([^/]+)/pulls/([0-9]+)|\3|')
# Construct API URL for public GitHub
base_url="https://$api_base/repos/$owner/$repo/pulls/$pr_number"
else
# GitHub Enterprise URL
domain=$(echo "$url" | sed -E 's|https://([^/]+)/.*|\1|')
owner=$(echo "$url" | sed -E 's|https://[^/]+/([^/]+)/([^/]+)/pulls/([0-9]+)|\1|')
repo=$(echo "$url" | sed -E 's|https://[^/]+/([^/]+)/([^/]+)/pulls/([0-9]+)|\2|')
pr_number=$(echo "$url" | sed -E 's|https://[^/]+/([^/]+)/([^/]+)/pulls/([0-9]+)|\3|')
# Construct API URL for GitHub Enterprise
base_url="https://$domain/api/v3/repos/$owner/$repo/pulls/$pr_number"
fi
#base_url=$(echo "$url" | sed -E 's|https://([^/]+)/([^/]+)/([^/]+)/pull/([0-9]+)|https://\1/api/v3/repos/\2/\3/pulls/\4|')
#echo $base_url
# Fetch the pull request data
#pr_data=$(curl -L -s -H "Authorization: token ${GH_TOKEN}" "$base_url")
pr_data=$(curl "${curl_opts[@]}" "$base_url")
#echo $pr_data
# Extract relevant information
pr_date=$(echo "$pr_data" | jq -r '.created_at')
pr_author=$(echo "$pr_data" | jq -r '.user.login')
pr_title=$(echo "$pr_data" | jq -r '.title')
pr_body=$(echo "$pr_data" | jq -r '.body')
comments_url=$(echo "$pr_data" | jq -r '.comments_url')
review_comments_url=$(echo "$pr_data" | jq -r '.review_comments_url')
# Fetch comments
comments=$(curl "${curl_opts[@]}" "$comments_url")
review_comments=$(curl "${curl_opts[@]}" "$review_comments_url")
# Fetch the diff
diff_url="${base_url}.diff"
diff=$(curl "${curl_opts[@]}" -H "Accept: application/vnd.github.v3.diff" "$diff_url")
# Print the structured information
echo "Pull Request Information:"
echo "-------------------------"
echo "Date: $pr_date"
echo "Author: $pr_author"
echo "Title: $pr_title"
echo "Description: $pr_body"
echo
echo "Unified Diff:"
echo "-------------"
echo "$diff"
echo
echo "Comments:"
echo "---------"
# Print comments
echo "$comments" | jq -r '.[] | "User: \(.user.login)\nDate: \(.created_at)\nComment: \(.body)\n---"'
echo
echo "Review Comments:"
echo "----------------"
# Print review comments
#echo "$review_comments" | jq -r '.[] | "User: \(.user.login)\nDate: \(.created_at)\nPath: \(.path)\nLast Line of Diff Hunk: \(.diff_hunk | split("\n")[-2])\nComment: \(.body)\n---"'
echo "$review_comments" | jq -r '
# Group comments by their root comment ID
group_by(if .in_reply_to_id == null then .id else .in_reply_to_id end)[] |
# For each group, extract the diff_hunk, path, and comments details
"Discussion on file: \(.[] | select(.in_reply_to_id == null) | .path)\n" +
"Line: \(.[] | select(.in_reply_to_id == null) | .diff_hunk | split("\n")[-1])\n" +
"\n---\n" +
(map(
"Date: \(.created_at)\n" +
"Author: \(.user.login)\n" +
"Comment: \(.body)\n"
) | join("\n---\n")) +
"\n====================\n"
'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment