Skip to content

Instantly share code, notes, and snippets.

@iainlane
Last active September 21, 2023 13:22
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save iainlane/3d77d8b09b5c5532154a51a1f3183181 to your computer and use it in GitHub Desktop.
Save iainlane/3d77d8b09b5c5532154a51a1f3183181 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from dateutil import parser
# Custom date parser
def custom_date_parser(date_string):
date_string = date_string.split(" ")[0] + " " + date_string.split(" ")[1]
return parser.parse(date_string)
# Generate this tsv file using the following command (takes a while):
# gh pr list --limit 9999999 --state all > pr.tsv
df = pd.read_csv(
"pr.tsv",
sep="\t",
header=None,
names=["pr_number", "pr_title", "branch", "status", "date"],
)
df["date"] = df["date"].apply(custom_date_parser)
df = df.sort_values("date")
df.set_index(pd.DatetimeIndex(df["date"]), inplace=True)
# Resample the DataFrame by day and count the number of PRs
prs_per_day = df.resample("D").size()
# Plotting
plt.figure(figsize=(36, 18))
sns.lineplot(data=prs_per_day, color="lightgrey")
plt.title("Rate of PR creation over time")
plt.xlabel("Date")
plt.ylabel("PRs per day")
# Calculate dates for every 10,000th PR and time to achieve each milestone
milestone_prs = df[df["pr_number"] % 10000 == 0]
milestones = pd.DataFrame(
{"milestone": milestone_prs["pr_number"], "date": milestone_prs.index}
)
milestones["time_to_achieve"] = milestones["date"].diff()
milestones["time_to_achieve"].fillna(
milestones.loc[milestones["milestone"] == 10000, "date"] - df.index.min(),
inplace=True,
)
busiest_day = prs_per_day.idxmax()
milestones = pd.concat(
[
milestones,
pd.DataFrame(
{"milestone": "Busiest Day", "date": busiest_day, "time_to_achieve": None},
index=[0],
),
],
ignore_index=True,
)
# Draw vertical lines for milestones and annotate
for _, row in milestones.iterrows():
color = "red"
if row["milestone"] == "Busiest Day":
date = str(row["date"]).split(" ")[0]
annotation_text = f"Busiest Day ({date})-{prs_per_day.loc[busiest_day]} PRs"
color = "green"
else:
annotation_text = f'PR {row["milestone"]}-{row["time_to_achieve"].days} days'
color = "red"
plt.axvline(x=row["date"], color=color, linestyle="--")
plt.annotate(
annotation_text,
(row["date"], 0),
xytext=(5, 0), # Offset from the line a bit
textcoords="offset points",
rotation=90,
verticalalignment="bottom",
color="blue",
fontsize=12,
)
plt.savefig("pr_rate.svg", format="svg")
print(milestones)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment