Skip to content

Instantly share code, notes, and snippets.

@sohang3112
Last active April 21, 2024 00:38
Show Gist options
  • Save sohang3112/ced966fd7bf2d2a1b69a92f1386f790f to your computer and use it in GitHub Desktop.
Save sohang3112/ced966fd7bf2d2a1b69a92f1386f790f to your computer and use it in GitHub Desktop.
Using Github API, fetch details of all issues raised by me and save as spreadsheet
"""
Install requirements: pip install requests pandas parse
Set your Github Personal Access Token in environment variable GITHUB_PERSONAL_ACCESS_TOKEN before running this script.
Output Files:
- gh_issues.xlsx (MAIN OUTPUT) - issues' info in spreadsheet form
- gh_issues.json - original JSON response from Github Issues API
"""
from typing import Any, List, Dict
import os
import json
import requests
import pandas as pd
import parse
def parse_github_issue_url(html_url: str) -> Dict[str, str]:
"""@return: dict with keys "repo_owner", "repo" """
ans = parse.parse('https://github.com/{repo_owner}/{repo}/issues/{}', html_url)
return ans.named
def github_issues(author: str, github_token: str) -> List[Dict[str, Any]]:
"""Fetch issues raised by author via Github REST API"""
headers = { 'Authorization': f'token {github_token}' }
params = { 'q': f"author:{author} type:issue" }
response = requests.get('https://api.github.com/search/issues', params=params, headers=headers)
response.raise_for_status()
data = response.json()
with open('gh_issues.json', 'w') as f:
json.dump(data, f, indent=4)
assert not data['incomplete_results']
print('Total issues fetched:', data['total_count'])
return data['items']
# I have saved a read-only Github Personal Access Token in Windows Environment Variables Settings (personal account)
# Generate token by doing: github.com > login > Settings > Developer Settings > Personal Access Tokens > Tokens (classic)
gh_token = os.environ['GITHUB_PERSONAL_ACCESS_TOKEN']
author = 'sohang3112' # my Github username - change to your username to search in issues raised by you
# fetch issues JSON
issues_list = github_issues(author, gh_token)
# convert issues from JSON to pandas DataFrame
columns = [
'html_url', 'title',
'state', 'state_reason', 'locked',
'assignees', 'comments', 'score',
'updated_at',
'labels', 'reactions'
]
issues_df = pd.DataFrame(issues_list, columns=columns)
issues_df = issues_df.rename(columns={'html_url': 'issue_url'})
issues_df['labels'] = issues_df['labels'].apply(lambda labels: ', '.join([ label['name'] for label in labels ]))
issues_df['assignees'] = issues_df['assignees'].apply(lambda assignees: ', '.join([ x['login'] for x in assignees ]))
issues_df['reactions'] = issues_df['reactions'].apply(lambda reactions: reactions['total_count'])
issues_df['updated_at'] = issues_df['updated_at'].str.partition('T')[0] # from datetime format, keep only date, remove time
issues_df[['repo_owner', 'repo']] = pd.DataFrame(issues_df['issue_url'].apply(parse_github_issue_url).tolist())
issues_df.to_excel('gh_issues.xlsx', index=False) # save result spreadsheet
# now filters can be applied easily in the saved spreadsheet
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment