sohang3112/get_my_github_issues.py

## get_my_github_issues.py
"""
Install requirements: pip install requests pandas parse
Set your Github Personal Access Token in environment variable GITHUB_PERSONAL_ACCESS_TOKEN before running this script.

Output Files:
- gh_issues.xlsx (MAIN OUTPUT) - issues' info in spreadsheet form
- gh_issues.json - original JSON response from Github Issues API
"""
from typing import Any, List, Dict
import os
import json

import requests
import pandas as pd
import parse

def parse_github_issue_url(html_url: str) -> Dict[str, str]:
    """@return: dict with keys "repo_owner", "repo" """
    ans = parse.parse('https://github.com/{repo_owner}/{repo}/issues/{}', html_url)
    return ans.named

def github_issues(author: str, github_token: str) -> List[Dict[str, Any]]:
    """Fetch issues raised by author via Github REST API"""
    headers = { 'Authorization': f'token {github_token}' }
    params = { 'q': f"author:{author} type:issue" }
    response = requests.get('https://api.github.com/search/issues', params=params, headers=headers)
    response.raise_for_status()
    data = response.json()
    with open('gh_issues.json', 'w') as f:
        json.dump(data, f, indent=4)
    assert not data['incomplete_results']
    print('Total issues fetched:', data['total_count'])
    return data['items']

# I have saved a read-only Github Personal Access Token in Windows Environment Variables Settings (personal account)
# Generate token by doing: github.com > login > Settings > Developer Settings > Personal Access Tokens > Tokens (classic)
gh_token = os.environ['GITHUB_PERSONAL_ACCESS_TOKEN']
author = 'sohang3112'        # my Github username - change to your username to search in issues raised by you

# fetch issues JSON
issues_list = github_issues(author, gh_token)

# convert issues from JSON to pandas DataFrame
columns = [
    'html_url', 'title',
    'state', 'state_reason', 'locked',
    'assignees', 'comments', 'score',
    'updated_at',
    'labels', 'reactions'
]
issues_df = pd.DataFrame(issues_list, columns=columns)
issues_df = issues_df.rename(columns={'html_url': 'issue_url'})
issues_df['labels'] = issues_df['labels'].apply(lambda labels: ', '.join([ label['name'] for label in labels ]))
issues_df['assignees'] = issues_df['assignees'].apply(lambda assignees: ', '.join([ x['login'] for x in assignees ]))
issues_df['reactions'] = issues_df['reactions'].apply(lambda reactions: reactions['total_count'])
issues_df['updated_at'] = issues_df['updated_at'].str.partition('T')[0]  # from datetime format, keep only date, remove time
issues_df[['repo_owner', 'repo']] = pd.DataFrame(issues_df['issue_url'].apply(parse_github_issue_url).tolist())

issues_df.to_excel('gh_issues.xlsx', index=False)         # save result spreadsheet
# now filters can be applied easily in the saved spreadsheet
	"""
	Install requirements: pip install requests pandas parse
	Set your Github Personal Access Token in environment variable GITHUB_PERSONAL_ACCESS_TOKEN before running this script.

	Output Files:
	- gh_issues.xlsx (MAIN OUTPUT) - issues' info in spreadsheet form
	- gh_issues.json - original JSON response from Github Issues API
	"""
	from typing import Any, List, Dict
	import os
	import json

	import requests
	import pandas as pd
	import parse

	def parse_github_issue_url(html_url: str) -> Dict[str, str]:
	"""@return: dict with keys "repo_owner", "repo" """
	ans = parse.parse('https://github.com/{repo_owner}/{repo}/issues/{}', html_url)
	return ans.named

	def github_issues(author: str, github_token: str) -> List[Dict[str, Any]]:
	"""Fetch issues raised by author via Github REST API"""
	headers = { 'Authorization': f'token {github_token}' }
	params = { 'q': f"author:{author} type:issue" }
	response = requests.get('https://api.github.com/search/issues', params=params, headers=headers)
	response.raise_for_status()
	data = response.json()
	with open('gh_issues.json', 'w') as f:
	json.dump(data, f, indent=4)
	assert not data['incomplete_results']
	print('Total issues fetched:', data['total_count'])
	return data['items']

	# I have saved a read-only Github Personal Access Token in Windows Environment Variables Settings (personal account)
	# Generate token by doing: github.com > login > Settings > Developer Settings > Personal Access Tokens > Tokens (classic)
	gh_token = os.environ['GITHUB_PERSONAL_ACCESS_TOKEN']
	author = 'sohang3112' # my Github username - change to your username to search in issues raised by you

	# fetch issues JSON
	issues_list = github_issues(author, gh_token)

	# convert issues from JSON to pandas DataFrame
	columns = [
	'html_url', 'title',
	'state', 'state_reason', 'locked',
	'assignees', 'comments', 'score',
	'updated_at',
	'labels', 'reactions'
	]
	issues_df = pd.DataFrame(issues_list, columns=columns)
	issues_df = issues_df.rename(columns={'html_url': 'issue_url'})
	issues_df['labels'] = issues_df['labels'].apply(lambda labels: ', '.join([ label['name'] for label in labels ]))
	issues_df['assignees'] = issues_df['assignees'].apply(lambda assignees: ', '.join([ x['login'] for x in assignees ]))
	issues_df['reactions'] = issues_df['reactions'].apply(lambda reactions: reactions['total_count'])
	issues_df['updated_at'] = issues_df['updated_at'].str.partition('T')[0] # from datetime format, keep only date, remove time
	issues_df[['repo_owner', 'repo']] = pd.DataFrame(issues_df['issue_url'].apply(parse_github_issue_url).tolist())

	issues_df.to_excel('gh_issues.xlsx', index=False) # save result spreadsheet
	# now filters can be applied easily in the saved spreadsheet