|
#!/usr/bin/python3 |
|
import argparse |
|
import pandas as pd |
|
from math import ceil |
|
|
|
def process(input_files, videos, output_file, aggregated): |
|
prefixes = ["URL: Videoaula: ", "Arquivo: Videoaula: ", "File: Videoaula: " ] |
|
lessons = pd.read_csv(videos, index_col=0) |
|
total_length = lessons.sum(axis=0)['length'] |
|
minutes_per_class = 100 |
|
fieldnames = ["Time", "User", "Affected", "Context", "Component", "Event", |
|
"Description", "Origin", "IP"] |
|
|
|
logs = pd.concat( (pd.read_csv(file, names=fieldnames) for file in input_files), |
|
ignore_index=True) |
|
re_prefixes = '.*(?:' + '|'.join(prefixes) + ')' |
|
# Restrict to video lessons' logs |
|
logs = logs[logs.Context.str.contains(re_prefixes)] |
|
# Remove unnecessary prefixes |
|
logs.Context = logs.Context.str.replace(re_prefixes, '') |
|
# Keep only the most recent logs (they appear first in the files) |
|
logs = pd.DataFrame.drop_duplicates(logs, subset=['User', 'Context'], keep='first') |
|
logs = logs[['Time', 'User', 'Context']] |
|
# Add length column |
|
logs = logs.join(lessons, on='Context', how='inner') |
|
logs = logs.reset_index(drop=True) |
|
logs = logs.sort_values(by=['User', 'length'], ascending=[True, False]) |
|
agg_logs = logs.groupby(['User']).sum() |
|
agg_logs['Classes'] = agg_logs['length'].divide(minutes_per_class).apply(ceil) |
|
agg_logs['Minutes'] = agg_logs['length'].apply(ceil) |
|
agg_logs['% Minutes'] = agg_logs['Minutes'].divide(total_length).multiply(100).apply(ceil) |
|
# Export results to two CSV files (full and aggregated data) |
|
logs.to_csv(output_file, index=False) |
|
col_names = ["Classes", "Minutes", "% Minutes"] |
|
agg_logs[col_names].to_csv(aggregated, index=True) |
|
|
|
if __name__ == "__main__": |
|
parser = argparse.ArgumentParser(description='Get statistics from Moodle logs.') |
|
parser.add_argument('-l', '--logs', nargs='+', |
|
help='names of the csv log files download from Moodle') |
|
parser.add_argument('-v', '--videos', |
|
help='name of the csv file title and length of videos') |
|
parser.add_argument('-s', '--stats', |
|
help='name of the file where the full output will be saved') |
|
parser.add_argument('-a', '--aggregated', |
|
help='name of the file for aggregated statistics') |
|
args = parser.parse_args() |
|
process(args.logs, args.videos, args.stats, args.aggregated) |