Skip to content

Instantly share code, notes, and snippets.

@jweyrich
Last active June 11, 2024 14:39
Show Gist options
  • Save jweyrich/8d53a7bf5bad7b5958423cb4e538ab20 to your computer and use it in GitHub Desktop.
Save jweyrich/8d53a7bf5bad7b5958423cb4e538ab20 to your computer and use it in GitHub Desktop.
AWS ALB Log Parser written in Python
#!/usr/bin/env python3
# coding=utf8
#
# AUTHOR: Jardel Weyrich <jweyrich at gmail dot com>
#
from __future__ import print_function
import re, sys
def parse_alb_log_file(file_path):
fields = [
"type",
"timestamp",
"alb",
"client_ip",
"client_port",
"backend_ip",
"backend_port",
"request_processing_time",
"backend_processing_time",
"response_processing_time",
"alb_status_code",
"backend_status_code",
"received_bytes",
"sent_bytes",
"request_verb",
"request_url",
"request_proto",
"user_agent",
"ssl_cipher",
"ssl_protocol",
"target_group_arn",
"trace_id",
"domain_name",
"chosen_cert_arn",
"matched_rule_priority",
"request_creation_time",
"actions_executed",
"redirect_url",
"new_field",
]
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
# REFERENCE: https://docs.aws.amazon.com/athena/latest/ug/application-load-balancer-logs.html#create-alb-table
regex = r"([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*)[:-]([0-9]*) ([-.0-9]*) ([-.0-9]*) ([-.0-9]*) (|[-0-9]*) (-|[-0-9]*) ([-0-9]*) ([-0-9]*) \"([^ ]*) ([^ ]*) (- |[^ ]*)\" \"([^\"]*)\" ([A-Z0-9-\_]+) ([A-Za-z0-9.-]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^\"]*)\" ([-.0-9]*) ([^ ]*) \"([^\"]*)\" ($|\"[^ ]*\")(.*)"
with open(file_path, 'r') as file:
for line in file:
matches = re.search(regex, line)
if matches:
for i, field in enumerate(fields):
end = ", " if i < len(fields)-1 else "\n"
print("%s=\"%s\"" % (field, matches.group(i+1)), end=end)
if __name__ == '__main__':
if len(sys.argv) != 2:
sys.exit("usage: %s <log_file_path>" % sys.argv[0])
parse_alb_log_file(sys.argv[1])
@Shubhamnegi
Copy link

Shubhamnegi commented Nov 4, 2023

@jweyrich using this regex i have build utility which can pull logs from s3 and push parsed log to different destination such as elasticsearch or influxdb for further analysis.
Parsing can be triggered in 3 ways

  • Cli option to pass logs dir in local system
  • lambda handler which gets triggered on s3 notifications (link s3 notification to lambda)
  • SQS consumer which consumes notification from SQS using s3 notifications (link s3 notifications to SQS)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment