Skip to content

Instantly share code, notes, and snippets.

@bunnylab
Last active February 26, 2022 00:34
Show Gist options
  • Save bunnylab/568336d90f4129f5601255629eaa26e0 to your computer and use it in GitHub Desktop.
Save bunnylab/568336d90f4129f5601255629eaa26e0 to your computer and use it in GitHub Desktop.
script for simple web analytics
#!/usr/bin/env/python
'''
NGINX TRACKING PIXEL VIEW COUNT
This is a pretty simple python script to parse nginx access logs for hits on a
set of image files corresponding to some sites we want to keep track of views on.
Writes out updated view counts to a json file at /var/www/test/test.json
'''
import re, json
from datetime import datetime, timedelta
# nginx access log regex with named groups noice noice noice
pattern = '''(?P<ipaddress>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(?P<dateandtime>\d{2}\/[a-z]{3}\/\d{4}:\d{2}:\d{2}:\d{2} (\+|\-)\d{4})\] ((\"(GET|POST) )(?P<url>.+)(http\/1\.1")) (?P<statuscode>\d{3}) (?P<bytessent>\d+) (["](?P<refferer>(\-)|(.+))["]) (["](?P<useragent>.+)["])'''
cpattern = re.compile(pattern, re.IGNORECASE)
# access count dict
view_counts = {}
try:
with open("/var/www/tp/views.json", "r") as view_file:
view_counts = json.load(view_file)
except FileNotFoundError:
pass
# open conf file and check for time of last run if any
last_run = None
with open("/usr/local/bin/access-parse/last-run.conf", "r") as lr_file:
try:
last_run = datetime.fromisoformat(lr_file.read())
except ValueError:
last_run = None
# itterate through log and parse all access events
with open("/var/log/nginx/tracking.access.log") as log:
for line in log:
rsearch = cpattern.search(line)
if rsearch:
access = (rsearch.groupdict())
print(access)
time = datetime.strptime(access["dateandtime"], "%d/%b/%Y:%H:%M:%S %z")
# parse any new accesses to our tracking pixels and increment our counts
print(exclude_ips)
if not last_run or time > last_run:
print(access['ipaddress'])
if access['url'].startswith('/img/') and (access['ipaddress'] not in exclude_ips):
pixel = access['url'][5:]
view_counts[pixel] = view_counts.get(pixel, 0) + 1
print("new access")
else:
print("no new access")
else:
pass
# finished parsing log
else:
last_run = time
# write out new values
with open("/usr/local/bin/access-parse/last-run.conf", "w") as lr_file:
lr_file.write(last_run.isoformat())
with open("/var/www/tp/views.json", "w") as view_file:
json.dump(view_counts, view_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment