Skip to content

Instantly share code, notes, and snippets.

@insin
Created March 7, 2011 14:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save insin/858577 to your computer and use it in GitHub Desktop.
Save insin/858577 to your computer and use it in GitHub Desktop.
Anonymise XML text by XPATH
# usage: anon.py input.xml output.xml
import random
import string
import sys
from lxml import etree
XPATHS = [
'/Path/To/Elements/*',
'/Another/Path/'
]
TEXT_GROUPS = [
string.ascii_lowercase,
string.ascii_uppercase,
''.join(str(i) for i in range(0, 10))
]
def anonymise(input):
output = []
for char in input:
for text_group in TEXT_GROUPS:
if char in text_group:
output.append(random.choice(text_group))
break
else:
output.append(char)
return ''.join(output)
def main(input_file, output_file):
xml = etree.parse(input_file)
memo = {}
for xpath in XPATHS:
for element in xml.iterfind(xpath):
if element.text:
if element.text not in memo:
memo[element.text] = anonymise(element.text)
element.text = memo[element.text]
xml.write(output_file)
if __name__ == '__main__':
main(*sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment