Skip to content

Instantly share code, notes, and snippets.

@joswr1ght
Created November 16, 2022 11:10

Revisions

  1. joswr1ght created this gist Nov 16, 2022.
    58 changes: 58 additions & 0 deletions apacheanon.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,58 @@
    #!/usr/bin/env python3
    # Anonymize the first column (delimited by space) IPv4 address in an ASCII file with a
    # consistent IP address that excludes RFC1918 and other internal network IP addresses.
    # You can use this to take an Apache (or Nginx or probably other log files as well) file
    # and change each of the source IP addresses to another value that is preserved consistently
    # throughout the output log file.
    # Changes to the log file are written to STDOUT, so run this as
    # `apacheanon.py access.log > new-access.log`.
    #
    # 2022-11-16 Joshua Wright

    import sys
    import random
    from collections import defaultdict

    octet1list = list(range(1, 255))

    # Remove first-octet values for reserved networks.
    # This eliminates more than just the reserved portions of these network, but
    # :man-shrugging:
    octet1list.remove(10)
    octet1list.remove(127)
    octet1list.remove(169)
    octet1list.remove(172)
    octet1list.remove(192)

    octet2list = list(range(0, 255))
    octet3list = octet2list
    octet4list = list(range(1, 255))


    def randip():
    return f'{random.choice(octet1list)}.' \
    f'{random.choice(octet2list)}.' \
    f'{random.choice(octet3list)}.' \
    f'{random.choice(octet4list)}'


    if (len(sys.argv) != 2):
    sys.stderr.write('Consistently replace the IP addresses in the first column of a log file with a random IP.\n')
    sys.stderr.write('Writes new log data on STDOUT.\n\n')
    sys.stderr.write(f'Usage: {sys.argv[0]} <access.log>\n')
    sys.exit(0)


    substituteips = defaultdict(randip)
    cnt = 0

    with open(sys.argv[1], 'r') as logfile:
    logline = logfile.readline()
    while len(logline) != 0:
    ip, restoflog = logline.split(' ', 1)
    sys.stdout.write(f'{substituteips[ip]} {restoflog}')

    cnt += 1
    logline = logfile.readline()

    sys.stderr.write(f'Substituted {cnt} IP addresses.\n')