Skip to content

Instantly share code, notes, and snippets.

@karenc
Created September 9, 2019 20:01
Show Gist options
  • Save karenc/8f335da24607c84ec7ac50af03397ca8 to your computer and use it in GitHub Desktop.
Save karenc/8f335da24607c84ec7ac50af03397ca8 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# https://github.com/openstax/cnx/issues/669
# Oops error when navigating to the Psychology book through the CNX homepage
# Get the varnish files here:
# scp homedirs1.cnx.org:/home/pumazi/p0{1,2}_905_varn.out .
# Run this script by doing:
# ./parse_varnish_logs.py p01_905_varn.out p02_905_varn.out
# example request:
#
# * << BeReq >> 4292886
# - Begin bereq 4292885 fetch
# - Timestamp Start: 1567685158.431862 0.000000 0.000000
# - BereqMethod GET
# - BereqURL /extras/8d04a686-d5e8-4798-a27d-c608e4d0e187@26.19:72169e9d-cba1-4d43-923c-41ae4d01b3cc@11
# - BereqProtocol HTTP/1.1
# - BereqHeader Host: archive.cnx.org
# - BereqHeader Accept: application/json, text/javascript, */*; q=0.01
# - BereqHeader User-Agent: Mozilla/5.0 (Linux; Android 8.1.0; SM-T580) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36
# - BereqHeader Sec-Fetch-Mode: cors
# - BereqHeader Origin: https://cnx.org
# - BereqHeader Sec-Fetch-Site: same-site
# - BereqHeader Referer: https://cnx.org/contents/jQSmhtXo@26.19:chaencuh@11/2-1-Displacement
# - BereqHeader Accept-Language: en-US,en;q=0.9,lt;q=0.8
# - BereqHeader X-Secure: true
# - BereqHeader X-Forwarded-Proto: https
# - BereqHeader X-Forwarded-For: 69.67.85.194, 128.42.169.27
# - BereqHeader Accept-Encoding: gzip
# - BereqHeader If-None-Match: "hIazt05ahVXndnutCKlE9Q"
# - BereqHeader X-Varnish: 4292886
# - VCL_call BACKEND_FETCH
# - VCL_return fetch
# - BackendOpen 52 15f4883d-5d93-4101-b6b7-25ad0022d42e.prod08_archive0 128.42.169.78 6500 128.42.169.89 56142
# - Timestamp Bereq: 1567685158.434152 0.002290 0.002290
# - Timestamp Beresp: 1567685159.549903 1.118041 1.115751
# - BerespProtocol HTTP/1.1
# - BerespStatus 500
# - BerespReason Internal Server Error
# - BerespHeader Content-Length: 110
# - BerespHeader Content-Type: text/plain
# - BerespHeader Date: Thu, 05 Sep 2019 11:38:35 GMT
# - BerespHeader Server: waitress
# - TTL RFC -1 10 -1 1567685160 1567685160 1567683515 0 0
# - VCL_call BACKEND_RESPONSE
# - TTL VCL 0 10 0 1567685160
# - BerespHeader X-Varnish-Status: uncacheable - status code >= 500
# - BerespHeader X-Varnish-Backend: prod08_archive0
# - BerespHeader X-Varnish-Ttl: 0.000
# - VCL_return deliver
# - Storage malloc Transient
# - ObjProtocol HTTP/1.1
# - ObjStatus 500
# - ObjReason Internal Server Error
# - ObjHeader Content-Length: 110
# - ObjHeader Content-Type: text/plain
# - ObjHeader Date: Thu, 05 Sep 2019 11:38:35 GMT
# - ObjHeader Server: waitress
# - ObjHeader X-Varnish-Status: uncacheable - status code >= 500
# - ObjHeader X-Varnish-Backend: prod08_archive0
# - ObjHeader X-Varnish-Ttl: 0.000
# - Fetch_Body 3 length stream
# - BackendReuse 52 15f4883d-5d93-4101-b6b7-25ad0022d42e.prod08_archive0
# - Timestamp BerespBody: 1567685159.550290 1.118428 0.000387
# - Length 110
# - BereqAcct 684 0 684 140 110 250
# - End
import json
import re
import sys
class Request:
def __init__(self, lines):
self.bereq = lines[0].split('>>')[-1].strip()
self.fields = {}
for line in lines[1:-1]:
_, header, value = re.split(r'\s+', line, 2)
if 'Header' in header:
k, v = value.split(':', 1)
value = {k: v.strip(),
k.lower(): v.strip()}
if header not in self.fields:
self.fields[header] = value
elif isinstance(self.fields[header], dict):
self.fields[header].update(value)
elif isinstance(self.fields[header], list):
self.fields[header].append(value)
else:
self.fields[header] = [self.fields[header], value]
def __getattr__(self, attr):
if attr in self.fields:
return self.fields[attr]
raise AttributeError
def __str__(self):
return f'<Request BeReq={self.bereq} BereqURL={self.BereqURL} BerespStatus={self.BerespStatus}>'
if __name__ == '__main__':
requests = []
for filename in sys.argv[1:]:
with open(filename) as f:
batch = []
for line in f.readlines():
if not line.strip() and batch:
requests.append(Request(batch))
batch = []
else:
batch.append(line.strip())
for req in requests:
# For header fields like BereqHeader, ObjHeader, BerespHeader, you can do:
# req.BereqHeader['user-agent'] (<- lower case header for consistency)
# For fields that appear multiple times like Timestamp
# req.Timestamp[0]
# For other fields, just the field name should work
# req.BerespStatus
print(req.BerespStatus)
with open('prod_varnish_requests.json', 'w') as f:
json.dump([r.fields for r in requests], f)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment