Last active
November 18, 2016 12:25
-
-
Save adimania/0454b0d820e5f37178d040db312c6e16 to your computer and use it in GitHub Desktop.
get median of an Apache httpd log file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from flask import Flask | |
import re | |
import math | |
import redis | |
import zlib | |
import ast | |
app = Flask(__name__) | |
LOG_FILE = '/var/log/access.log' | |
red = redis.Redis() | |
def get_sizes(): | |
regex='([(\\S\\.)]+) - - \\[(.*?)\\] "(.*?)" (\\d+) (\\d+)' | |
sizes = {} | |
with open(LOG_FILE) as f: | |
for line in f: | |
# In case the request has a - in place of size, replace it with 0 | |
if line[-2] == '-': | |
line = line[:-2]+'0' | |
log_groups=re.match(regex, line).groups() | |
if log_groups[3] in sizes: | |
sizes[log_groups[3]].append(int(log_groups[4])) | |
else: | |
sizes[log_groups[3]] = [int(log_groups[4])] | |
return sizes | |
def get_median(resp_code): | |
''' | |
https://en.wikipedia.org/wiki/Median | |
The median is the value separating the higher half of a data sample, a population, or a probability distribution, from the lower half. In simple terms, it may be thought of as the "middle" value of a data set. | |
''' | |
adler = zlib.adler32(open(LOG_FILE).read()) | |
median_str = red.get(adler) | |
if median_str: | |
median_dict = ast.literal_eval(median_str) | |
else: | |
sizes = get_sizes() | |
median_dict = {} | |
for key in sizes: | |
sizes[key].sort() | |
length = len(sizes[key]) | |
# Remember that index of a list starts at 0. | |
if length % 2 == 0: | |
median = (sizes[resp_code][length/2 - 1] + sizes[resp_code][length/2])/2.0 | |
else: | |
median = float(sizes[resp_code][int(math.floor(length/2))]) | |
median_dict[key] = median | |
red.set(adler, str(median_dict)) | |
return median_dict[resp_code] | |
@app.route('/<resp_code>', methods=["GET"]) | |
def get_response(resp_code): | |
return str(get_median(resp_code)) | |
if __name__ == '__main__': | |
app.run(host="0.0.0.0", port=8888) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment