Skip to content

Instantly share code, notes, and snippets.

@dpshelio
Forked from ageorgou/analyse_grades.py
Last active June 23, 2022 18:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dpshelio/aa57ecd98a253b390d6c0156d27891af to your computer and use it in GitHub Desktop.
Save dpshelio/aa57ecd98a253b390d6c0156d27891af to your computer and use it in GitHub Desktop.
Plot the distribution of commit frequency for multiple submissions (WIP)
from pathlib import Path
import re
import tempfile
import shutil
import shlex
import subprocess
import warnings
import matplotlib.pyplot as plt
import pandas as pd
def get_dates_from_file(dates_file) -> pd.Series:
# Is this the best way? Should the index values be strings?
timestamps = pd.read_csv(dates_file, header=None, names=["timestamp"])
timestamps.timestamp = pd.to_datetime(timestamps.timestamp)
timestamps["date"] = timestamps.timestamp.apply(lambda x: f"{x:%Y-%m-%d}")
commits_per_day = timestamps.groupby("date").size()
return commits_per_day
# directory: extract submissions
def extract_submissions(directory_submissions):
# directory with files like: Participant_3548328_assignsubmission_file_
submissions = Path(directory_submissions).glob('Participant_*/*tar*')
submission_ids = {}
for submission in submissions:
id_submission = submission.parent.name.split('_')[1]
student_id = re.compile(r'([a-zA-Z0-9]{5}|[0-9]{8})\.tar\.gz')
if re.match(student_id, submission.name):
submission_ids[id_submission] = {'path': submission, 'filename': submission.name}
return(submission_ids)
# for each submission folder: extract tar file
# for each tar file: expand and find the git directory
def extract_tar(tarpath, sub_id):
with tempfile.TemporaryDirectory() as tmpsub:
shutil.unpack_archive(tarpath, tmpsub)
git_dirs = Path(tmpsub).glob('**/.git')
git_counter = 0
for git_dir in git_dirs:
location = git_dir.parent
command = "git log --pretty=format:'%ci'"
git_dates = subprocess.run(shlex.split(command), capture_output=True,
cwd=location)
output = f"{sub_id}_dates"
if git_counter != 0:
output += f"_{git_counter:02d}"
with open(output, 'bw') as out:
out.write(git_dates.stdout)
out.write(b'\n') # no needed, but looks nicer when checking the files.
# extract the dates into a dataframe.
def merge_dates(datespath=Path('.'), plot=True, deadline="2022-01-01"):
submission_git_history = datespath.glob('*_dates*')
all_numbers = pd.DataFrame()
for submission in submission_git_history:
commits_per_day = get_dates_from_file(submission)
commits_per_day.name = submission.name.split('_')[0]
try:
all_numbers = all_numbers.join(commits_per_day, how="outer", sort=True)
except:
warnings.warn(f"Could not merge data for student {submission}")
# This will leave NaNs for dates where no data existed, we'd prefer 0
all_numbers = all_numbers.fillna(0).astype(int)
if plot:
fig = plt.figure()
ax = fig.add_subplot(111)
ax.imshow(all_numbers.to_numpy())
locs, labels = ax.get_yticks(), ax.get_yticklabels()
labels = [all_numbers.index[int(l)] if 0 <= l < len(all_numbers.index) else "" for l in locs]
ax.set(yticklabels=[])
ax.tick_params(left=False)
ax.set_yticks(locs)
ax.set_yticklabels(labels, rotation=35)
value = list(all_numbers.index).index(deadline)
ax.hlines(value, 0, all_numbers.shape[1], colors='red')
plt.savefig("heatmap.png")
if __name__ == "__main__":
# FIXME add path and deadline as command line arguments
submissions = extract_submissions('./submissions/coursework_01/02_20211130')
for submission, properties in submissions.items():
extract_tar(properties['path'], submission)
merge_dates(Path('.'), deadline="2021-11-23")
# TODO Read data from moodle csv
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment