Skip to content

Instantly share code, notes, and snippets.

@ara4n
Created June 12, 2024 10:35
Show Gist options
  • Save ara4n/79447347c4b3e1852c343be749df41df to your computer and use it in GitHub Desktop.
Save ara4n/79447347c4b3e1852c343be749df41df to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import requests
import pandas as pd
import matplotlib.pyplot as plt
# Replace these with your own repository details and personal access token
GITHUB_TOKEN = 'hunter42'
REPO_OWNER = 'matrix-org'
REPO_NAME = 'matrix-doc'
# Function to get pull requests with labels and created date from GitHub
def get_pull_requests(repo_owner, repo_name, token):
url = f'https://api.github.com/repos/{repo_owner}/{repo_name}/pulls'
headers = {'Authorization': f'token {token}'}
params = {'state': 'all'}
pull_requests = []
page = 1
while True:
response = requests.get(url, headers=headers, params={**params, 'page': page})
if response.status_code != 200:
raise Exception(f"Failed to fetch pull requests: {response.status_code}")
prs = response.json()
if not prs:
break
for pr in prs:
proposal = [True for label in pr['labels'] if label['name'] == 'proposal']
if not proposal:
continue
#labels = [label['name'] for label in pr['labels'] if label['name'] in ['kind:core', 'kind:feature', 'kind:maintenance']]
#if len(labels) == 0:
# labels = ["other"]
if pr['state'] == 'closed':
labels = ['merged'] if [True for label in pr['labels'] if label['name'] == 'merged'] else ['closed']
else:
labels = ['open']
created_at = pr['created_at']
pull_requests.append({'number': pr['number'], 'labels': labels, 'created_at': created_at})
page += 1
return pull_requests
# Fetch pull requests
pull_requests = get_pull_requests(REPO_OWNER, REPO_NAME, GITHUB_TOKEN)
# Convert the data to a DataFrame
df = pd.DataFrame(pull_requests)
df['created_at'] = pd.to_datetime(df['created_at']).dt.date
# Expand the DataFrame to have one row per label
expanded_rows = []
for index, row in df.iterrows():
for label in row['labels']:
expanded_rows.append({'created_at': row['created_at'], 'label': label})
labels_df = pd.DataFrame(expanded_rows)
# Group by date and label to count occurrences
labels_counts = labels_df.groupby(['created_at', 'label']).size().unstack(fill_value=0)
# Compute the cumulative sum
cumulative_counts = labels_counts.cumsum()
# Plot the data as a stacked area graph
plt.figure(figsize=(14, 8))
cumulative_counts.plot.area(ax=plt.gca())
plt.xlabel('Date')
plt.ylabel('Cumulative Number of MSCs')
plt.title('Cumulative Number of MSCs by Kind Over Time')
plt.legend(title='Labels', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment