Skip to content

Instantly share code, notes, and snippets.

@stefano-maggiolo
Created July 12, 2014 09:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save stefano-maggiolo/efab11d4e34d3fde92d1 to your computer and use it in GitHub Desktop.
Save stefano-maggiolo/efab11d4e34d3fde92d1 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
# Contest Management System - http://cms-dev.github.io/
# Copyright © 2014 Stefano Maggiolo <s.maggiolo@gmail.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""Utility to compute histograms related to the submission queue status.
This tool works offline (that is, after a contest) to produce some histograms:
- the number of times the queue had size n (counted every second);
- the queue size by time;
- the time from submission to evaluation.
This script will get confused if a change of dataset happened during
the competition.
"""
from __future__ import absolute_import
from __future__ import print_function
from __future__ import unicode_literals
import argparse
import sys
import matplotlib.pyplot as plt
from datetime import datetime
from cms import utf8_decoder
from cms.db import SessionGen, Submission, Contest, ask_for_contest
def compute_histograms(contest_id, log_file, time_offset=0):
seen_submission_ids = set()
evaluation_times = list()
time_with_n_pending = list()
pending_per_time = dict()
submission_times = dict()
with SessionGen() as session:
contest = Contest.get_from_id(contest_id, session)
start_time = int(contest.start.strftime("%s"))
for submission in contest.get_submissions():
submission_times[submission.id] = \
int(submission.timestamp.strftime("%s"))
for line in open(log_file):
if line.endswith("pending.\n"):
line = line.split()
try:
pending_number = int(line[5])
except:
continue
# Default check time is 2s.
dt = datetime.strptime(line[0] + " " + line[1],
"%Y/%m/%d %H:%M:%S")
submission_time = int(dt.strftime("%s")) \
- time_offset - start_time
pending_per_time[submission_time] = pending_number
elif "completed. Success" in line:
line = line.split()
if line[6] == 'compile':
continue
submission_id = line[9]
if submission_id in seen_submission_ids:
continue
seen_submission_ids.add(submission_id)
try:
dt = datetime.strptime(line[0] + " " + line[1],
"%Y/%m/%d %H:%M:%S")
evaluation_time = int(dt.strftime("%s")) - time_offset
except:
continue
try:
timestamp = submission_times[int(submission_id)]
except:
continue
evaluation_times.append(evaluation_time - timestamp)
max_time = max(pending_per_time.keys())
x_axis = [0]
while x_axis[-1] <= max_time:
x_axis.append(x_axis[-1] + 1)
y_axis = [pending_per_time[i] if i in pending_per_time else 0
for i in x_axis]
for i, value in enumerate(x_axis):
if value not in pending_per_time and i > 0 and i < len(x_axis) - 1 \
and value - 1 in pending_per_time and value + 1 in pending_per_time:
y_axis[i] = (y_axis[i-1] + y_axis[i+1]) / 2.0
if (y_axis[i] > 0):
time_with_n_pending.append(y_axis[i])
plt.plot(x_axis, y_axis)
plt.title('Queue size during the contest')
plt.xlabel('Contest time')
plt.ylabel('Number of submissions not evaluated')
plt.grid(True)
plt.savefig('pending.png')
plt.clf()
plt.hist(time_with_n_pending,
bins=[x / 2.0
for x in range(max(time_with_n_pending) * 2 + 4)])
plt.title('Number of second the queue had a certain size (0 not pictured)')
plt.xlabel('Queue size')
plt.ylabel('Number of seconds')
plt.grid(True)
plt.savefig('seconds_per_queue_size.png')
plt.clf()
plt.hist(evaluation_times, bins=range(max(evaluation_times) + 2))
plt.title('Waiting time between submission and evaluation')
plt.xlabel('Number of seconds')
plt.ylabel('Number of submissions')
plt.grid(True)
plt.savefig('waiting_time.png')
plt.clf()
def main():
"""Parse arguments and launch process.
"""
parser = argparse.ArgumentParser(
description="Remove a user from a contest in CMS.")
parser.add_argument("-c", "--contest-id", action="store", type=int,
help="id of contest the user is in")
parser.add_argument("-l", "--log-file", action="store", type=str,
required=True,
help="path to ES log file")
parser.add_argument("-t", "--time-offset", action="store", type=int,
help="time difference in seconds between UTC and "
"log time", default=0)
args = parser.parse_args()
if args.contest_id is None:
args.contest_id = ask_for_contest()
compute_histograms(args.contest_id,
args.log_file,
time_offset=args.time_offset)
return 0
if __name__ == "__main__":
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment