Skip to content

Instantly share code, notes, and snippets.

@drew2a
Last active June 14, 2024 08:17
Show Gist options
  • Save drew2a/3eec7389359a57737b06c1991bf2c2a3 to your computer and use it in GitHub Desktop.
Save drew2a/3eec7389359a57737b06c1991bf2c2a3 to your computer and use it in GitHub Desktop.
Fetch issues with 'type: bug' label and visualise them
import json
import requests
def fetch_github_data(repo, endpoint, params={}):
""" Fetch data from a GitHub repository endpoint. """
data = []
page = 1
base_url = f'https://api.github.com/repos/{repo}/{endpoint}'
while True:
print(f"Processing {endpoint}, page {page}...")
url = f'{base_url}?page={page}&per_page=100'
response = requests.get(url, params=params)
page_data = response.json()
if page_data:
data.extend(page_data)
page += 1
else:
break
return data
# Define the repository
repo = 'Tribler/tribler'
# Fetch issue data
print("Fetching issues...")
issues = fetch_github_data(repo, 'issues', {'state': 'all', 'labels': 'type: bug'})
# Save issues to a JSON file
with open('issues.json', 'w') as file:
json.dump(issues, file)
print("Issues saved to 'issues.json'.")
# Fetch release data
print("Fetching releases...")
releases = fetch_github_data(repo, 'releases')
# Save releases to a JSON file
with open('releases.json', 'w') as file:
json.dump(releases, file)
print("Releases saved to 'releases.json'.")
import json
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib.dates import DateFormatter, DayLocator, MonthLocator, WeekdayLocator
# Load issues and releases from JSON files
with open('issues.json', 'r') as file:
issues = json.load(file)
with open('releases.json', 'r') as file:
releases = json.load(file)
# Processing issue data
print("Processing issue data...")
issue_data = []
for issue in issues:
created_at = datetime.strptime(issue['created_at'], '%Y-%m-%dT%H:%M:%SZ')
closed_at = datetime.strptime(issue['closed_at'], '%Y-%m-%dT%H:%M:%SZ') if issue['closed_at'] else None
is_pull_request = 'pull_request' in issue
issue_data.append({'created_at': created_at, 'closed_at': closed_at, 'is_pull_request': is_pull_request})
df_issues = pd.DataFrame(issue_data)
# Initialize counters
last_date = datetime.now().date()
first_date = last_date - timedelta(days=60) # Get data for the last 30 days
current_date = first_date
one_day = timedelta(days=1)
# Counting open issues by date
open_issues = []
while current_date <= last_date:
count_open_issues = ((df_issues['created_at'].dt.date <= current_date) &
((df_issues['closed_at'].isnull()) | (df_issues['closed_at'].dt.date > current_date)) &
(~df_issues['is_pull_request'])).sum()
open_issues.append({'date': current_date, 'open_issues': count_open_issues})
current_date += one_day
df_open_issues = pd.DataFrame(open_issues)
# Grouping releases by major.minor version and getting the earliest release date
grouped_releases = {}
for release in releases:
version_parts = release['tag_name'].split('.')
major_minor = '.'.join(version_parts[:2])
release_date = datetime.strptime(release['published_at'], '%Y-%m-%dT%H:%M:%SZ').date()
if major_minor in grouped_releases:
grouped_releases[major_minor] = min(grouped_releases[major_minor], release_date)
else:
grouped_releases[major_minor] = release_date
# Sorting releases by date
sorted_releases = sorted(grouped_releases.items(), key=lambda x: x[1])
# Rainbow colors
colors = ['red', 'orange', 'yellow', 'green', 'blue', 'indigo', 'violet']
print("Plotting Open Issues Over Time with Release Groups...")
plt.figure(figsize=(18, 6), facecolor='white') # Установка белого фона
plt.plot(df_open_issues['date'], df_open_issues['open_issues'], marker='o', linestyle='-', label='Open Issues', markersize=4)
# Adding colored rectangles for release groups
plt.title('Open Issues Over Time with Colored Release Periods')
plt.xlabel('Date')
plt.ylabel('Number of Open Issues')
plt.grid(True)
plt.legend()
# Set the limit for X-axis to the last month
plt.xlim([first_date, last_date])
# Set x-axis major ticks to the first day of each month and minor ticks to every week
plt.gca().xaxis.set_major_locator(MonthLocator()) # First day of each month
plt.gca().xaxis.set_minor_locator(WeekdayLocator(byweekday=1)) # Every Monday
plt.gca().xaxis.set_major_formatter(DateFormatter('%Y-%m-%d'))
plt.gca().xaxis.set_minor_formatter(DateFormatter('%d'))
# Rotate date labels for better readability
plt.gcf().autofmt_xdate()
plt.tight_layout()
plt.show()
print("Visualization completed.")
import json
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import pandas as pd
# Load issues and releases from JSON files
with open('issues.json', 'r') as file:
issues = json.load(file)
with open('releases.json', 'r') as file:
releases = json.load(file)
# Process the issue data
print("Processing issue data...")
issue_data = []
for issue in issues:
created_at = datetime.strptime(issue['created_at'], '%Y-%m-%dT%H:%M:%SZ')
closed_at = datetime.strptime(issue['closed_at'], '%Y-%m-%dT%H:%M:%SZ') if issue['closed_at'] else None
is_pull_request = 'pull_request' in issue
issue_data.append({'created_at': created_at, 'closed_at': closed_at, 'is_pull_request': is_pull_request})
# Convert issue data to DataFrame
df_issues = pd.DataFrame(issue_data)
# Initialize counters
first_date = df_issues['created_at'].min().date()
last_date = datetime.now().date()
current_date = first_date
one_day = timedelta(days=1)
# Count open issues by date
open_issues = []
while current_date <= last_date:
count_open_issues = ((df_issues['created_at'].dt.date <= current_date) &
((df_issues['closed_at'].isnull()) | (df_issues['closed_at'].dt.date > current_date)) &
(~df_issues['is_pull_request'])).sum()
open_issues.append({'date': current_date, 'open_issues': count_open_issues})
current_date += one_day
# Convert to DataFrame for plotting
df_open_issues = pd.DataFrame(open_issues)
# Plotting
print("Plotting Open Issues Over Time with Release Dates...")
plt.figure(figsize=(18, 6))
# Plot open issues
plt.plot(df_open_issues['date'], df_open_issues['open_issues'], marker='o', linestyle='-', label='Open Issues',
markersize=4)
# Add vertical lines for releases
for release in releases:
release_date = datetime.strptime(release['published_at'], '%Y-%m-%dT%H:%M:%SZ').date()
if first_date <= release_date <= last_date:
plt.axvline(x=release_date, color='red', linestyle='--', lw=0.5)
plt.text(release_date, df_open_issues['open_issues'].max(), release['name'], rotation=90,
verticalalignment='bottom', fontsize=8, color='red')
plt.title('Open Issues Over Time with Release Dates')
plt.xlabel('Date')
plt.ylabel('Number of Open Issues')
plt.grid(True)
plt.legend()
plt.tight_layout() # Adjust layout to fit labels and text
plt.show()
print("Visualization completed.")
import json
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import pandas as pd
# Load issues and releases from JSON files
with open('issues.json', 'r') as file:
issues = json.load(file)
with open('releases.json', 'r') as file:
releases = json.load(file)
# Processing issue data
print("Processing issue data...")
issue_data = []
for issue in issues:
created_at = datetime.strptime(issue['created_at'], '%Y-%m-%dT%H:%M:%SZ')
closed_at = datetime.strptime(issue['closed_at'], '%Y-%m-%dT%H:%M:%SZ') if issue['closed_at'] else None
is_pull_request = 'pull_request' in issue
issue_data.append({'created_at': created_at, 'closed_at': closed_at, 'is_pull_request': is_pull_request})
df_issues = pd.DataFrame(issue_data)
# Initialize counters
first_date = df_issues['created_at'].min().date()
last_date = datetime.now().date()
current_date = first_date
one_day = timedelta(days=1)
# Counting open issues by date
open_issues = []
while current_date <= last_date:
count_open_issues = ((df_issues['created_at'].dt.date <= current_date) &
((df_issues['closed_at'].isnull()) | (df_issues['closed_at'].dt.date > current_date)) &
(~df_issues['is_pull_request'])).sum()
open_issues.append({'date': current_date, 'open_issues': count_open_issues})
current_date += one_day
df_open_issues = pd.DataFrame(open_issues)
# Grouping releases by major.minor version and getting the earliest release date
grouped_releases = {}
for release in releases:
version_parts = release['tag_name'].split('.')
major_minor = '.'.join(version_parts[:2])
release_date = datetime.strptime(release['published_at'], '%Y-%m-%dT%H:%M:%SZ').date()
if major_minor in grouped_releases:
grouped_releases[major_minor] = min(grouped_releases[major_minor], release_date)
else:
grouped_releases[major_minor] = release_date
# Sorting releases by date
sorted_releases = sorted(grouped_releases.items(), key=lambda x: x[1])
# Rainbow colors
colors = ['red', 'orange', 'yellow', 'green', 'blue', 'indigo', 'violet']
# Plotting
print("Plotting Open Issues Over Time with Release Groups...")
plt.figure(figsize=(18, 6))
plt.plot(df_open_issues['date'], df_open_issues['open_issues'], marker='o', linestyle='-', label='Open Issues', markersize=4)
# Adding colored rectangles for release groups
for i, (version, start_date) in enumerate(sorted_releases):
color = colors[i % len(colors)]
end_date = sorted_releases[i+1][1] if i+1 < len(sorted_releases) else last_date
plt.axvspan(start_date, end_date, color=color, alpha=0.3)
plt.text(start_date, df_open_issues['open_issues'].max(), version, fontsize=8, color=color, ha='left')
plt.title('Open Issues Over Time with Colored Release Periods')
plt.xlabel('Date')
plt.ylabel('Number of Open Issues')
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()
print("Visualization completed.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment