-
-
Save kristapsk/358c1800d7b7d0a7a96b685057b12b1e to your computer and use it in GitHub Desktop.
Convert Apache/Nginx Unified Log Format to CSV
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# accesslog2csv: Convert default, unified access log from Apache, Nginx | |
# servers to CSV format. | |
# | |
# Original source by Maja Kraljic, July 18, 2017 | |
# Modified by Joshua Wright to parse all elements in the HTTP request as | |
# different columns, December 16, 2019 | |
# Modified by Kristaps Kaupe to support (ignore) additional fields at the end and also added shebang at March 2023 | |
import csv | |
import re | |
import sys | |
if len(sys.argv) == 1: | |
sys.stdout.write("Usage: %s <access.log> <accesslog.csv>\n"%sys.argv[0]) | |
sys.exit(0) | |
log_file_name = sys.argv[1] | |
csv_file_name = sys.argv[2] | |
pattern = re.compile(r'(?P<ip>.*?) (?P<remote_log_name>.*?) (?P<userid>.*?) \[(?P<date>.*?)(?= ) (?P<timezone>.*?)\] \"(?P<request_method>.*?) (?P<path>.*?)(?P<request_version> HTTP/.*)?\" (?P<status>.*?) (?P<length>.*?) \"(?P<referrer>.*?)\" \"(?P<user_agent>.*?)\".*') | |
file = open(log_file_name) | |
with open(csv_file_name, 'w') as out: | |
csv_out=csv.writer(out) | |
csv_out.writerow(['host', 'ident', 'user', 'time', 'verb', 'url', 'httpver', 'status', 'size', 'referer', 'useragent']) | |
for line in file: | |
m = pattern.match(line) | |
result = m.groups() | |
csv_out.writerow(result) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment