-
-
Save wanix/473463a291060151bde3640c760e429e to your computer and use it in GitHub Desktop.
A script to anonymize emails in a SQL dump
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# coding: utf-8 | |
import argparse | |
import re | |
from uuid import uuid4 | |
from hashlib import sha256 | |
SALT = str(uuid4()).encode() | |
def load_file(file_path): | |
with open(file_path, encoding='utf-8') as f: | |
return f.read() | |
def anonymize(email, domain): | |
m = sha256() | |
m.update(SALT) | |
m.update(email.lower().encode()) | |
return '{}@{}'.format(m.hexdigest(), domain) | |
def replace_emails(text, fake_domain, allow): | |
def replace(m): | |
d = m.group(2).lower() | |
if d in allow: | |
out = m.group(0).lower() | |
else: | |
out = anonymize(m.group(0), fake_domain) | |
print('replace {} => {}'.format(m.group(0), out)) | |
return out | |
e = re.compile(r'([a-zA-Z_\.0-9+]{1,100})@([a-zA-Z_\.0-9\-]{1,100})') | |
return e.sub(replace, text) | |
def write_out(text, file_path): | |
with open(file_path, mode='w', encoding='utf-8') as f: | |
f.write(text) | |
def main(args): | |
text = load_file(args.input) | |
allow = set(args.allow + [args.fake_domain]) | |
text = replace_emails(text, args.fake_domain, allow) | |
write_out(text, args.output) | |
def parse(): | |
parser = argparse.ArgumentParser(description="Transform all emails in a" | |
" text file into fake email address") | |
parser.add_argument('-i', '--input', help='Input file', required=True) | |
parser.add_argument('-o', '--output', help='Output file', required=True) | |
parser.add_argument('-d', '--fake-domain', default='example.com', | |
help='All re-writter emails will be in this domain.') | |
parser.add_argument('-a', '--allow', action='append', | |
help='Do not anonymize those domains.') | |
return parser.parse_args() | |
if __name__ == '__main__': | |
args = parse() | |
main(args) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment