IBM Web Server Log Parse
from hashlib import md5 as xx
from collections import namedtuple
import os
import re
# Environment parametes
COLUMN_SEPARATOR = chr(os.getenv('SEPARATOR', 449))
FILE_NAME = os.getenv('FILE', 'hashed.csv')
# Regular expression to capture JSESSIONID
JSESSION_PATTERN = r'JSESSIONID=([a-zA-Z0-9_\-]+)?[;\s]?'
# Data Structure for log entries
AccessLogRow = namedtuple('AccessLogRow', 'ip date method code url referer agent cookie version id')
with open("logs.csv") as reader, open(FILE_NAME, "w") as writer:
for line in reader:
# Convert log entry to data structure
log_entry = AccessLogRow(*line.split(COLUMN_SEPARATOR))
# Hashing Fields
haship = xx(log_entry.ip.encode('utf-8')).hexdigest()
# Default value for jsession or pattern matched
jsessionid = 'EMPTY'
if "JSESSIONID" in log_entry.cookie:
jsessionid =, log_entry.cookie).group(1)
[haship] +
[] +
[log_entry.method] +
[log_entry.code] +
[log_entry.url] +
[log_entry.referer] +
[log_entry.agent] +
[jsessionid] +
[log_entry.version] +
