Skip to content

Instantly share code, notes, and snippets.

@sr-murthy
Last active May 28, 2022 17:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sr-murthy/e3713372d0a64de185753c85f4388e47 to your computer and use it in GitHub Desktop.
Save sr-murthy/e3713372d0a64de185753c85f4388e47 to your computer and use it in GitHub Desktop.
Viewing top contributors for public GitHub repositories
import json
import pandas as pd
import requests
def get_github_repo_top_contributors(org_user_name, repo_name, top_n=50):
"""
Gets the top n contributors data for the given GitHub repository - the
repository should be specified via an organisational or user name and
a repository name.
:param org_user_name: An organisational or username
:type org_user_name: ``str``
:param repo_name: A repository name
:type repo_name: ``str``
:param top_n: The size of the top contributors list - default
is ``50``
:type top_n: ``int``
:return: The top n contributors data for the given GitHub
repository, as a dataframe
:rtype: ``pd.DataFrame``
"""
def process_response_json(res):
return json.loads(res.text)
url_template = (
'https://api.github.com/repos/{org_user_name}/{repo_name}/contributors'
'?q=contributions&per_page={per_page}&page={page_num}&order=desc'
)
hundred_per_page_reqs, last_req_size = divmod(top_n, 100)
if hundred_per_page_reqs == 0:
return pd.DataFrame(process_response_json(requests.get(
url_template.format(org_user_name=org_user_name, repo_name=repo_name, per_page=last_req_size, page_num=1)
)))
req_urls = [
url_template.format(org_user_name=org_user_name, repo_name=repo_name, per_page=100, page_num=page_num)
for page_num in range(1, hundred_per_page_reqs + 1)
]
if last_req_size > 0:
req_urls += [
url_template.format(org_user_name=org_user_name, repo_name=repo_name, per_page=last_req_size, page_num=(hundred_per_page_reqs + 1))
]
data = pd.DataFrame()
for req_url in req_urls:
try:
_data = pd.DataFrame(process_response_json(requests.get(req_url)))
except ValueError as e:
raise
else:
data = pd.concat([data, _data], axis=0, ignore_index=True)
return data
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment