Created
November 16, 2022 11:10
Revisions
-
joswr1ght created this gist
Nov 16, 2022 .There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,58 @@ #!/usr/bin/env python3 # Anonymize the first column (delimited by space) IPv4 address in an ASCII file with a # consistent IP address that excludes RFC1918 and other internal network IP addresses. # You can use this to take an Apache (or Nginx or probably other log files as well) file # and change each of the source IP addresses to another value that is preserved consistently # throughout the output log file. # Changes to the log file are written to STDOUT, so run this as # `apacheanon.py access.log > new-access.log`. # # 2022-11-16 Joshua Wright import sys import random from collections import defaultdict octet1list = list(range(1, 255)) # Remove first-octet values for reserved networks. # This eliminates more than just the reserved portions of these network, but # :man-shrugging: octet1list.remove(10) octet1list.remove(127) octet1list.remove(169) octet1list.remove(172) octet1list.remove(192) octet2list = list(range(0, 255)) octet3list = octet2list octet4list = list(range(1, 255)) def randip(): return f'{random.choice(octet1list)}.' \ f'{random.choice(octet2list)}.' \ f'{random.choice(octet3list)}.' \ f'{random.choice(octet4list)}' if (len(sys.argv) != 2): sys.stderr.write('Consistently replace the IP addresses in the first column of a log file with a random IP.\n') sys.stderr.write('Writes new log data on STDOUT.\n\n') sys.stderr.write(f'Usage: {sys.argv[0]} <access.log>\n') sys.exit(0) substituteips = defaultdict(randip) cnt = 0 with open(sys.argv[1], 'r') as logfile: logline = logfile.readline() while len(logline) != 0: ip, restoflog = logline.split(' ', 1) sys.stdout.write(f'{substituteips[ip]} {restoflog}') cnt += 1 logline = logfile.readline() sys.stderr.write(f'Substituted {cnt} IP addresses.\n')