Skip to content

Instantly share code, notes, and snippets.

@maxant
Last active November 6, 2017 20:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save maxant/88811ccf2d3a31c7687b2fc19d64592f to your computer and use it in GitHub Desktop.
Save maxant/88811ccf2d3a31c7687b2fc19d64592f to your computer and use it in GitHub Desktop.
# prerequisites:
# https://github.com/stchris/untangle/blob/master/examples.py
# C:\temp>c:\Python27\Scripts\pip.exe install untangle
# run: C:\temp\read_xml_out_csv>c:\Python27\python.exe read_xml_out_csv.py
import untangle
import os
import time
def now():
return time.time()*1000.0
model = {} # dictionary of dictionaries with filename -> servlet -> parameter name -> parameter value
params = [] # all params that are found
last_name = "" # last found name, so that we can add the value to the parent dictionary
start = now()
files = filter(lambda file: file.endswith(".xml"), os.listdir(".")) # all xml files in the current dir
# parse files and collect all data
for file in files:
print "parsing file %s" % file
model[file] = {}
xml = untangle.parse(file)
for servlet in xml.config.servlet:
servlet_name = servlet.name.cdata
model[file][servlet_name] = {}
for child in servlet.children:
if child._name == "param_name":
last_name = child.cdata
if child.cdata not in params:
params.append(last_name)
elif child._name == "param_value":
model[file][servlet_name][last_name] = child.cdata
params.sort()
# build header
lines = "filename,servlet name,"
for param in params:
lines += param + ","
lines += "\n"
# build rows
for file in files:
sorted_servlets = []
for servlet in model[file]:
sorted_servlets.append(servlet)
sorted_servlets.sort()
for servlet in sorted_servlets:
lines += file + "," + servlet + ","
for param in params:
if param in model[file][servlet]:
lines += model[file][servlet][param]
lines += ","
lines += "\n"
# write output
output = "output.csv"
text_file = open(output, "w")
text_file.write(lines)
text_file.close()
print "output written to %s" % output
print "finished in %dms" % (now()-start)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment