Skip to content

Instantly share code, notes, and snippets.



Created Mar 16, 2021
What would you like to do?
IBM Web Server Log Parse
from hashlib import md5 as xx
from collections import namedtuple
import os
import re
# Environment parametes
COLUMN_SEPARATOR = chr(os.getenv('SEPARATOR', 449))
FILE_NAME = os.getenv('FILE', 'hashed.csv')
# Regular expression to capture JSESSIONID
JSESSION_PATTERN = r'JSESSIONID=([a-zA-Z0-9_\-]+)?[;\s]?'
# Data Structure for log entries
AccessLogRow = namedtuple('AccessLogRow', 'ip date method code url referer agent cookie version id')
with open("logs.csv") as reader, open(FILE_NAME, "w") as writer:
for line in reader:
# Convert log entry to data structure
log_entry = AccessLogRow(*line.split(COLUMN_SEPARATOR))
# Hashing Fields
haship = xx(log_entry.ip.encode('utf-8')).hexdigest()
# Default value for jsession or pattern matched
jsessionid = 'EMPTY'
if "JSESSIONID" in log_entry.cookie:
jsessionid =, log_entry.cookie).group(1)
[haship] +
[] +
[log_entry.method] +
[log_entry.code] +
[log_entry.url] +
[log_entry.referer] +
[log_entry.agent] +
[jsessionid] +
[log_entry.version] +
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment