Skip to content

Instantly share code, notes, and snippets.

@ageorgou
Created June 23, 2022 16:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save ageorgou/1b348a253e396bf2bfe09788531964a3 to your computer and use it in GitHub Desktop.
Save ageorgou/1b348a253e396bf2bfe09788531964a3 to your computer and use it in GitHub Desktop.
Plot the distribution of commit frequency for multiple submissions (WIP)
# WIP! Have not tested with multiple submissions yet!
from pathlib import Path
import warnings
import matplotlib.pyplot as plt
import pandas as pd
def get_dates_from_file(dates_file) -> pd.Series:
# Is this the best way? Should the index values be strings?
timestamps = pd.read_csv(dates_file, header=None, names=["timestamp"])
timestamps.timestamp = pd.to_datetime(timestamps.timestamp)
timestamps["date"] = timestamps.timestamp.apply(lambda x: f"{x:%Y-%m-%d}")
commits_per_day = timestamps.groupby("date").size()
return commits_per_day
if __name__ == "__main__":
# Before running this, we need to generate the dates for each repository.
# Specifically we need `git log --format="%ci" > dates`
# to be run in the right directory for each submission,
# and moved up to the "top level" of the submission
# Create a DataFrame to hold the number of commits per day for all students.
# Indexed by date, each column corresponds to a student.
all_numbers = pd.DataFrame()
for submission in submission_ids:
# Run git log if needed or retrieve file
student_file = Path(f"Participant_{submission}_assignsubmission_file_") / "dates"
# Assuming everyone has a repo...
commits_per_day = get_dates_from_file(student_file)
commits_per_day.name = submission
try:
all_numbers = all_numbers.join(commits_per_day, how="outer", sort=True)
except:
warnings.warn(f"Could not merge data for student {submission}")
# This will leave NaNs for dates where no data existed, we'd prefer 0
all_numbers = all_numbers.fillna(0).astype(int)
# Plot
# f = plt.figure()
# ax = f.add_subplot(1, 1, 1)
# ax.imshow([[c[1] for c in commits_per_day.items()]])
# ax.set_aspect(20)
# f.savefig("heatmap.png")
plt.imshow(all_numbers.to_numpy()) # needs some massaging
plt.savefig("heatmap.png")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment