Skip to content

Instantly share code, notes, and snippets.

@canimus

canimus/parse.py

Created Mar 16, 2021
Embed
What would you like to do?
IBM Web Server Log Parse
from hashlib import md5 as xx
from collections import namedtuple
import os
import re
# Environment parametes
COLUMN_SEPARATOR = chr(os.getenv('SEPARATOR', 449))
FILE_NAME = os.getenv('FILE', 'hashed.csv')
# Regular expression to capture JSESSIONID
JSESSION_PATTERN = r'JSESSIONID=([a-zA-Z0-9_\-]+)?[;\s]?'
# Data Structure for log entries
AccessLogRow = namedtuple('AccessLogRow', 'ip date method code url referer agent cookie version id')
with open("logs.csv") as reader, open(FILE_NAME, "w") as writer:
for line in reader:
# Convert log entry to data structure
log_entry = AccessLogRow(*line.split(COLUMN_SEPARATOR))
# Hashing Fields
haship = xx(log_entry.ip.encode('utf-8')).hexdigest()
# Default value for jsession or pattern matched
jsessionid = 'EMPTY'
if "JSESSIONID" in log_entry.cookie:
jsessionid = re.search(JSESSION_PATTERN, log_entry.cookie).group(1)
writer.write(COLUMN_SEPARATOR.join(
[haship] +
[log_entry.date] +
[log_entry.method] +
[log_entry.code] +
[log_entry.url] +
[log_entry.referer] +
[log_entry.agent] +
[jsessionid] +
[log_entry.version] +
[log_entry.id]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment