Skip to content

Instantly share code, notes, and snippets.

@keiichishima
Last active August 29, 2015 14:24
Show Gist options
  • Save keiichishima/47a4fbe760be90abf92f to your computer and use it in GitHub Desktop.
Save keiichishima/47a4fbe760be90abf92f to your computer and use it in GitHub Desktop.
Log anonymizer
#!/usr/bin/env python
import argparse
import hashlib
import random
import re
import sys
from netaddr import EUI
from yacryptopan import CryptoPAn
mac_re = re.compile('(([0-9a-fA-F][0-9a-fA-F][:-]){5})[0-9a-fA-F][0-9a-fA-F]')
ipv4_re = re.compile('(?P<prefix>(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])[.](\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])[.])(?P<host>(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])[.](\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5]))(?P<trailer>\D)')
ipv6_re = re.compile('((([0-9a-f]{1,4}:){7}([0-9a-f]{1,4}|:))|(([0-9a-f]{1,4}:){6}(:[0-9a-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9a-f]{1,4}:){5}(((:[0-9a-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9a-f]{1,4}:){4}(((:[0-9a-f]{1,4}){1,3})|((:[0-9a-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9a-f]{1,4}:){3}(((:[0-9a-f]{1,4}){1,4})|((:[0-9a-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9a-f]{1,4}:){2}(((:[0-9a-f]{1,4}){1,5})|((:[0-9a-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9a-f]{1,4}:){1}(((:[0-9a-f]{1,4}){1,6})|((:[0-9a-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9a-f]{1,4}){1,7})|((:[0-9a-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?')
hostname_re = re.compile('([a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?[.])+[a-zA-Z]+')
def hash_mac(mac_match):
mac_orig = mac_match.group(0)
if args.anonymize_oui:
hashed_v = hashlib.md5(args.key + mac_orig).hexdigest()
hashed_mac = '%s-%s-%s-%s-%s-%s' % (hashed_v[0:2], hashed_v[2:4],
hashed_v[4:6], hashed_v[6:8],
hashed_v[8:10], hashed_v[10:12])
else:
mac = EUI(mac_orig)
hashed_ei = hashlib.md5(args.key + mac.ei).hexdigest()
hashed_mac = '%s-%s-%s-%s' % (str(mac)[0:8], hashed_ei[0:2],
hashed_ei[2:4], hashed_ei[4:6])
return hashed_mac.upper()
def cpa_ipv4(ipv4_match):
ipv4_pref_orig = ipv4_match.group('prefix')
ipv4_host_orig = ipv4_match.group('host')
trailer = ipv4_match.group('trailer')
cpaed_ipv4 = cpa.anonymize(ipv4_pref_orig + ipv4_host_orig)
return cpaed_ipv4 + trailer
def cpa_ipv6(ipv6_match):
ipv6_orig = ipv6_match.group(0)
return cpa.anonymize(ipv6_orig)
def hash_hostname(hostname_match):
hostname_orig = hostname_match.group(0)
hashed_hostname = hashlib.md5(args.key + hostname_orig).hexdigest()
return 'hostname-' + hashed_hostname + '.nonexistentdomain'
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--infile')
parser.add_argument('-o', '--outfile')
parser.add_argument('-k', '--key',
default=''.join([chr(x) for x in range(0,32)]),
help='A key used for AES and padding (32bytes)')
parser.add_argument('--anonymize-oui', action='store_true',
help='Anonymize the OUI part')
args = parser.parse_args()
# read from stdin unless infile is specified.
stream_in = sys.stdin
if args.infile:
stream_in = open(args.infile, 'r')
# write to stdout unless outfile is specified.
stream_out = sys.stdout
if args.outfile:
stream_out = open(args.outfile, 'w')
cpa = CryptoPAn(args.key)
line = stream_in.readline()
while line:
line = mac_re.sub(hash_mac, line)
line = ipv4_re.sub(cpa_ipv4, line)
line = ipv6_re.sub(cpa_ipv6, line)
line = hostname_re.sub(hash_hostname, line)
stream_out.write(line)
line = stream_in.readline()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment