Last active
August 29, 2015 14:24
-
-
Save keiichishima/47a4fbe760be90abf92f to your computer and use it in GitHub Desktop.
Log anonymizer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import argparse | |
import hashlib | |
import random | |
import re | |
import sys | |
from netaddr import EUI | |
from yacryptopan import CryptoPAn | |
mac_re = re.compile('(([0-9a-fA-F][0-9a-fA-F][:-]){5})[0-9a-fA-F][0-9a-fA-F]') | |
ipv4_re = re.compile('(?P<prefix>(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])[.](\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])[.])(?P<host>(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])[.](\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5]))(?P<trailer>\D)') | |
ipv6_re = re.compile('((([0-9a-f]{1,4}:){7}([0-9a-f]{1,4}|:))|(([0-9a-f]{1,4}:){6}(:[0-9a-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9a-f]{1,4}:){5}(((:[0-9a-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9a-f]{1,4}:){4}(((:[0-9a-f]{1,4}){1,3})|((:[0-9a-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9a-f]{1,4}:){3}(((:[0-9a-f]{1,4}){1,4})|((:[0-9a-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9a-f]{1,4}:){2}(((:[0-9a-f]{1,4}){1,5})|((:[0-9a-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9a-f]{1,4}:){1}(((:[0-9a-f]{1,4}){1,6})|((:[0-9a-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9a-f]{1,4}){1,7})|((:[0-9a-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?') | |
hostname_re = re.compile('([a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?[.])+[a-zA-Z]+') | |
def hash_mac(mac_match): | |
mac_orig = mac_match.group(0) | |
if args.anonymize_oui: | |
hashed_v = hashlib.md5(args.key + mac_orig).hexdigest() | |
hashed_mac = '%s-%s-%s-%s-%s-%s' % (hashed_v[0:2], hashed_v[2:4], | |
hashed_v[4:6], hashed_v[6:8], | |
hashed_v[8:10], hashed_v[10:12]) | |
else: | |
mac = EUI(mac_orig) | |
hashed_ei = hashlib.md5(args.key + mac.ei).hexdigest() | |
hashed_mac = '%s-%s-%s-%s' % (str(mac)[0:8], hashed_ei[0:2], | |
hashed_ei[2:4], hashed_ei[4:6]) | |
return hashed_mac.upper() | |
def cpa_ipv4(ipv4_match): | |
ipv4_pref_orig = ipv4_match.group('prefix') | |
ipv4_host_orig = ipv4_match.group('host') | |
trailer = ipv4_match.group('trailer') | |
cpaed_ipv4 = cpa.anonymize(ipv4_pref_orig + ipv4_host_orig) | |
return cpaed_ipv4 + trailer | |
def cpa_ipv6(ipv6_match): | |
ipv6_orig = ipv6_match.group(0) | |
return cpa.anonymize(ipv6_orig) | |
def hash_hostname(hostname_match): | |
hostname_orig = hostname_match.group(0) | |
hashed_hostname = hashlib.md5(args.key + hostname_orig).hexdigest() | |
return 'hostname-' + hashed_hostname + '.nonexistentdomain' | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument('-i', '--infile') | |
parser.add_argument('-o', '--outfile') | |
parser.add_argument('-k', '--key', | |
default=''.join([chr(x) for x in range(0,32)]), | |
help='A key used for AES and padding (32bytes)') | |
parser.add_argument('--anonymize-oui', action='store_true', | |
help='Anonymize the OUI part') | |
args = parser.parse_args() | |
# read from stdin unless infile is specified. | |
stream_in = sys.stdin | |
if args.infile: | |
stream_in = open(args.infile, 'r') | |
# write to stdout unless outfile is specified. | |
stream_out = sys.stdout | |
if args.outfile: | |
stream_out = open(args.outfile, 'w') | |
cpa = CryptoPAn(args.key) | |
line = stream_in.readline() | |
while line: | |
line = mac_re.sub(hash_mac, line) | |
line = ipv4_re.sub(cpa_ipv4, line) | |
line = ipv6_re.sub(cpa_ipv6, line) | |
line = hostname_re.sub(hash_hostname, line) | |
stream_out.write(line) | |
line = stream_in.readline() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment