Last active
April 21, 2024 00:38
-
-
Save sohang3112/ced966fd7bf2d2a1b69a92f1386f790f to your computer and use it in GitHub Desktop.
Using Github API, fetch details of all issues raised by me and save as spreadsheet
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Install requirements: pip install requests pandas parse | |
Set your Github Personal Access Token in environment variable GITHUB_PERSONAL_ACCESS_TOKEN before running this script. | |
Output Files: | |
- gh_issues.xlsx (MAIN OUTPUT) - issues' info in spreadsheet form | |
- gh_issues.json - original JSON response from Github Issues API | |
""" | |
from typing import Any, List, Dict | |
import os | |
import json | |
import requests | |
import pandas as pd | |
import parse | |
def parse_github_issue_url(html_url: str) -> Dict[str, str]: | |
"""@return: dict with keys "repo_owner", "repo" """ | |
ans = parse.parse('https://github.com/{repo_owner}/{repo}/issues/{}', html_url) | |
return ans.named | |
def github_issues(author: str, github_token: str) -> List[Dict[str, Any]]: | |
"""Fetch issues raised by author via Github REST API""" | |
headers = { 'Authorization': f'token {github_token}' } | |
params = { 'q': f"author:{author} type:issue" } | |
response = requests.get('https://api.github.com/search/issues', params=params, headers=headers) | |
response.raise_for_status() | |
data = response.json() | |
with open('gh_issues.json', 'w') as f: | |
json.dump(data, f, indent=4) | |
assert not data['incomplete_results'] | |
print('Total issues fetched:', data['total_count']) | |
return data['items'] | |
# I have saved a read-only Github Personal Access Token in Windows Environment Variables Settings (personal account) | |
# Generate token by doing: github.com > login > Settings > Developer Settings > Personal Access Tokens > Tokens (classic) | |
gh_token = os.environ['GITHUB_PERSONAL_ACCESS_TOKEN'] | |
author = 'sohang3112' # my Github username - change to your username to search in issues raised by you | |
# fetch issues JSON | |
issues_list = github_issues(author, gh_token) | |
# convert issues from JSON to pandas DataFrame | |
columns = [ | |
'html_url', 'title', | |
'state', 'state_reason', 'locked', | |
'assignees', 'comments', 'score', | |
'updated_at', | |
'labels', 'reactions' | |
] | |
issues_df = pd.DataFrame(issues_list, columns=columns) | |
issues_df = issues_df.rename(columns={'html_url': 'issue_url'}) | |
issues_df['labels'] = issues_df['labels'].apply(lambda labels: ', '.join([ label['name'] for label in labels ])) | |
issues_df['assignees'] = issues_df['assignees'].apply(lambda assignees: ', '.join([ x['login'] for x in assignees ])) | |
issues_df['reactions'] = issues_df['reactions'].apply(lambda reactions: reactions['total_count']) | |
issues_df['updated_at'] = issues_df['updated_at'].str.partition('T')[0] # from datetime format, keep only date, remove time | |
issues_df[['repo_owner', 'repo']] = pd.DataFrame(issues_df['issue_url'].apply(parse_github_issue_url).tolist()) | |
issues_df.to_excel('gh_issues.xlsx', index=False) # save result spreadsheet | |
# now filters can be applied easily in the saved spreadsheet |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment