Skip to content

Instantly share code, notes, and snippets.

@jennynz
Last active August 24, 2022 01:20
Show Gist options
  • Save jennynz/6a29ab14da932b114a7328fb0f1ae096 to your computer and use it in GitHub Desktop.
Save jennynz/6a29ab14da932b114a7328fb0f1ae096 to your computer and use it in GitHub Desktop.
Datavis for how long contributors wait for a review on open source projects
# Given a dataframe called `df` which you've wrangled to have the columns:
# - org (str)
# - year (int or float)
# - bin (str) (e.g. Under 1 day, 1 day to 1 week)
# - percent (float)
# Here's the matplotlib code to generate the datavis in this blog post:
# https://levelup.gitconnected.com/how-does-pr-review-wait-time-affect-your-open-source-project-d79bd0af0ea3
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# "Plus Jakarta Sans" open source font here: https://github.com/tokotype/PlusJakartaSans/releases
sns.set(style="whitegrid", font_scale=1.4, font="Plus Jakarta Sans")
org_repos_anonymised = [
'facebook/react',
'Ruby framework 1 (anonymised)',
'Ruby framework 2 (anonymised)',
'tensorflow/tensorflow',
'travis-ci (various repos)',
'vuejs/vue'
]
fig = plt.figure(figsize=(9.5, 13))
orgs = df['org'].unique()
axs = fig.subplots(len(orgs)+1,1)
years = list(range(2017, 2022))
BAR_WIDTH = 1
grouped = df.groupby(['organization_login', 'bin'])
for i, org in enumerate(orgs):
current_height = [0] * len(years)
for j, bin in enumerate(bins):
series_vals = grouped.get_group((org, bin)).sort_values('year')['percent'].values
axs[i].bar(x=years, width=BAR_WIDTH, bottom = current_height, height=series_vals, color=BIN_COLOURS[j])
current_height += series_vals
axs[i].grid(axis='x', visible=False)
axs[i].spines['bottom'].set_visible(False)
axs[i].spines['top'].set_visible(False)
axs[i].spines['right'].set_visible(False)
axs[i].spines['left'].set_visible(False)
axs[i].set_ylim([0, 1])
axs[i].set_yticks([])
axs[i].set_xticks([])
# Super hacky way to make the bottom of the figure a bit bigger so that the bottommost org name doesn't get cut off
axs[-1].grid(visible=False)
axs[-1].set_xticks([])
axs[-1].set_yticks([])
axs[-1].spines['bottom'].set_visible(False)
axs[-1].spines['top'].set_visible(False)
axs[-1].spines['right'].set_visible(False)
axs[-1].spines['left'].set_visible(False)
# Add title and axis names
axs[0].set_title("How long do contributors wait for a review?", loc='left', x = 0.045, pad=150, fontsize=24)
# Legend
fig.text(0.06, 0.945, '% of contributors with a median review wait time of...', ha='left', va='top', size=16)
for j, bin in enumerate(bins):
axs[0].plot([], [], color=BIN_COLOURS[j], lw=12, label=bin)
axs[0].legend(loc='lower left', bbox_to_anchor=(0.04,1.5), frameon=False, ncol=3)
axs[0].xaxis.tick_top()
axs[0].set_xticks(years)
axs[0].tick_params(axis="x", length=0, pad=10)
for i, org in enumerate(org_repos_anonymised):
fig.text(0.058, 0.73-i*0.12, org, ha='left', va='top', size=14)
plt.tight_layout(h_pad=4.5)
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment