Created
June 23, 2022 16:35
-
-
Save ageorgou/1b348a253e396bf2bfe09788531964a3 to your computer and use it in GitHub Desktop.
Plot the distribution of commit frequency for multiple submissions (WIP)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# WIP! Have not tested with multiple submissions yet! | |
from pathlib import Path | |
import warnings | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
def get_dates_from_file(dates_file) -> pd.Series: | |
# Is this the best way? Should the index values be strings? | |
timestamps = pd.read_csv(dates_file, header=None, names=["timestamp"]) | |
timestamps.timestamp = pd.to_datetime(timestamps.timestamp) | |
timestamps["date"] = timestamps.timestamp.apply(lambda x: f"{x:%Y-%m-%d}") | |
commits_per_day = timestamps.groupby("date").size() | |
return commits_per_day | |
if __name__ == "__main__": | |
# Before running this, we need to generate the dates for each repository. | |
# Specifically we need `git log --format="%ci" > dates` | |
# to be run in the right directory for each submission, | |
# and moved up to the "top level" of the submission | |
# Create a DataFrame to hold the number of commits per day for all students. | |
# Indexed by date, each column corresponds to a student. | |
all_numbers = pd.DataFrame() | |
for submission in submission_ids: | |
# Run git log if needed or retrieve file | |
student_file = Path(f"Participant_{submission}_assignsubmission_file_") / "dates" | |
# Assuming everyone has a repo... | |
commits_per_day = get_dates_from_file(student_file) | |
commits_per_day.name = submission | |
try: | |
all_numbers = all_numbers.join(commits_per_day, how="outer", sort=True) | |
except: | |
warnings.warn(f"Could not merge data for student {submission}") | |
# This will leave NaNs for dates where no data existed, we'd prefer 0 | |
all_numbers = all_numbers.fillna(0).astype(int) | |
# Plot | |
# f = plt.figure() | |
# ax = f.add_subplot(1, 1, 1) | |
# ax.imshow([[c[1] for c in commits_per_day.items()]]) | |
# ax.set_aspect(20) | |
# f.savefig("heatmap.png") | |
plt.imshow(all_numbers.to_numpy()) # needs some massaging | |
plt.savefig("heatmap.png") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment