Last active
August 29, 2015 14:12
-
-
Save dskarataev/7ef994119876ac8e7463 to your computer and use it in GitHub Desktop.
logstat.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf-8 | |
# Implementation of screening test for position Backend Developer | |
# 2014, Denis Karataev | |
import re | |
import numpy | |
LOG_PATH = '/home/dsk/sample.log' | |
TEMPLATES = [ | |
'GET /api/users/{user_id}/count_pending_messages', | |
'GET /api/users/{user_id}/get_messages', | |
'GET /api/users/{user_id}/get_friends_progress', | |
'GET /api/users/{user_id}/get_friends_score', | |
'POST /api/users/{user_id}', | |
'GET /api/users/{user_id}', | |
] | |
def print_statistics(): | |
result_data = {} | |
# this pattern we use for getting method, url, dyno, connect_time, service_time | |
pattern_main = re.compile(r'.*method=(GET|POST) path=(/api/users/\d+?.*?) host=.*? dyno=(.*?)' | |
r' connect=(\d+?)ms service=(\d+?)ms.*') | |
# this pattern we use for replacing exact user_id in url to the template {user_id} | |
pattern_sub = re.compile(r'^/api/users/\d+?(/|$)') | |
try: | |
with open(LOG_PATH) as f: | |
for line in f: | |
m = re.match(pattern_main, line) | |
if m: | |
method, url, dyno, connect_time, service_time = m.group(1), m.group(2), m.group(3),\ | |
int(m.group(4)), int(m.group(5)) | |
# replace every exact user_id to the template {user_id} | |
url = re.sub(pattern_sub, '/api/users/{user_id}/', url) | |
if url.endswith('/'): | |
url = url[:-1] | |
url = ' '.join((method, url)) | |
# compare url with every of given templates that we need to aggregate data | |
if url in TEMPLATES: | |
if url not in result_data: | |
result_data[url] = { | |
'connect_time': [], | |
'service_time': [], | |
'total_time': [], | |
'dynos': [], | |
} | |
# save raw data for future counting | |
result_data[url]['connect_time'].append(connect_time) | |
result_data[url]['service_time'].append(service_time) | |
result_data[url]['total_time'].append(connect_time + service_time) | |
result_data[url]['dynos'].append(dyno) | |
# if we have captured at least something, do aggregation | |
if result_data: | |
# to save same sort order as in the task description | |
for url in TEMPLATES: | |
if url in result_data: | |
# count times it was called. | |
# Same as how many times we have added raw data to the list | |
called = len(result_data[url]['connect_time']) | |
# count average (mean) and median time for every type of time | |
connect_time = result_data[url]['connect_time'] | |
connect_time_avg = numpy.mean(numpy.array(connect_time)) | |
connect_time_med = numpy.median(numpy.array(connect_time)) | |
service_time = result_data[url]['service_time'] | |
service_time_avg = numpy.mean(numpy.array(service_time)) | |
service_time_med = numpy.median(numpy.array(service_time)) | |
total_time = result_data[url]['total_time'] | |
total_time_avg = numpy.mean(numpy.array(total_time)) | |
total_time_med = numpy.median(numpy.array(total_time)) | |
# count the most dyno | |
dynos = result_data[url]['dynos'] | |
most_dyno = max(set(dynos), key=dynos.count) | |
# output results | |
print url | |
print 'Was called %d times' % called | |
# here we use float with rounding up to 3 numbers after the point like in ping command | |
print 'Mean time (connect/service/total) = %.3f/%.3f/%.3f ms' % (connect_time_avg, | |
service_time_avg, | |
total_time_avg) | |
# here we use float with rounding up to 1 number after the point | |
# because we can have only two cases: integer or integer + 0.5 | |
print 'Median time (connect/service/total) = %.1f/%.1f/%.1f ms' % (connect_time_med, | |
service_time_med, | |
total_time_med) | |
print 'The most responded dyno: %s' % most_dyno | |
print '' | |
else: | |
print 'There is no data that could be interested.' | |
except IOError, e: | |
print 'Problem with logfile: %s' % e | |
if __name__ == '__main__': | |
print_statistics() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment