Last active
February 26, 2022 00:34
-
-
Save bunnylab/568336d90f4129f5601255629eaa26e0 to your computer and use it in GitHub Desktop.
script for simple web analytics
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env/python | |
''' | |
NGINX TRACKING PIXEL VIEW COUNT | |
This is a pretty simple python script to parse nginx access logs for hits on a | |
set of image files corresponding to some sites we want to keep track of views on. | |
Writes out updated view counts to a json file at /var/www/test/test.json | |
''' | |
import re, json | |
from datetime import datetime, timedelta | |
# nginx access log regex with named groups noice noice noice | |
pattern = '''(?P<ipaddress>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(?P<dateandtime>\d{2}\/[a-z]{3}\/\d{4}:\d{2}:\d{2}:\d{2} (\+|\-)\d{4})\] ((\"(GET|POST) )(?P<url>.+)(http\/1\.1")) (?P<statuscode>\d{3}) (?P<bytessent>\d+) (["](?P<refferer>(\-)|(.+))["]) (["](?P<useragent>.+)["])''' | |
cpattern = re.compile(pattern, re.IGNORECASE) | |
# access count dict | |
view_counts = {} | |
try: | |
with open("/var/www/tp/views.json", "r") as view_file: | |
view_counts = json.load(view_file) | |
except FileNotFoundError: | |
pass | |
# open conf file and check for time of last run if any | |
last_run = None | |
with open("/usr/local/bin/access-parse/last-run.conf", "r") as lr_file: | |
try: | |
last_run = datetime.fromisoformat(lr_file.read()) | |
except ValueError: | |
last_run = None | |
# itterate through log and parse all access events | |
with open("/var/log/nginx/tracking.access.log") as log: | |
for line in log: | |
rsearch = cpattern.search(line) | |
if rsearch: | |
access = (rsearch.groupdict()) | |
print(access) | |
time = datetime.strptime(access["dateandtime"], "%d/%b/%Y:%H:%M:%S %z") | |
# parse any new accesses to our tracking pixels and increment our counts | |
print(exclude_ips) | |
if not last_run or time > last_run: | |
print(access['ipaddress']) | |
if access['url'].startswith('/img/') and (access['ipaddress'] not in exclude_ips): | |
pixel = access['url'][5:] | |
view_counts[pixel] = view_counts.get(pixel, 0) + 1 | |
print("new access") | |
else: | |
print("no new access") | |
else: | |
pass | |
# finished parsing log | |
else: | |
last_run = time | |
# write out new values | |
with open("/usr/local/bin/access-parse/last-run.conf", "w") as lr_file: | |
lr_file.write(last_run.isoformat()) | |
with open("/var/www/tp/views.json", "w") as view_file: | |
json.dump(view_counts, view_file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment