Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save VladRassokhin/15962e8e38696c423dc96f126a7b26d4 to your computer and use it in GitHub Desktop.
Save VladRassokhin/15962e8e38696c423dc96f126a7b26d4 to your computer and use it in GitHub Desktop.
Convert Intel ARK Product Specification Comparison XML into CSV
# coding: utf-8
import datetime
import re
import sys
import xmltodict
reload(sys)
sys.setdefaultencoding('utf-8')
if len(sys.argv) < 2:
print "Usage: {} intel_ark.xml".format(sys.argv[0])
sys.exit(1)
in_file_name = sys.argv[1]
out_file_name = re.sub('\.xml$', '.csv', in_file_name)
print("Loading {}...".format(in_file_name))
with open(in_file_name) as in_file:
start = datetime.datetime.now()
intel_dict = xmltodict.parse(in_file)
rows = intel_dict['ss:Workbook']['ss:Worksheet']['ss:Table']['ss:Row']
max_len = 0
keys_len = 0
for x in rows:
cell = x['ss:Cell']
if isinstance(cell, dict):
continue
else:
max_len = max(max_len, len(cell))
keys_len += 1
result = {}
keys = ['Name']
current_row = []
result['Name'] = current_row
for x in rows:
cell = x['ss:Cell']
if isinstance(cell, dict):
data = cell['ss:Data']
text = data.get('text', data.get('#text', None))
# print("c=d {}", text)
else:
for i, item in enumerate(cell):
if not item:
continue
elif isinstance(item, dict):
if 'ss:Data' in item:
text = item['ss:Data']['#text']
if i == 0:
current_row = []
result[text] = current_row
keys.append(text)
continue
current_row.append(text)
# print('{:>4} {}'.format(i, text))
else:
current_row.append(u'N/A')
else:
print item.__name__
current_row = []
# print('')
print "Saving to {}".format(out_file_name)
with open(out_file_name, "w+") as out_file:
for j in range(0, len(keys) - 1):
out_file.write(keys[j])
if j != len(keys):
out_file.write(',')
out_file.write('\n')
for i in range(0, max_len - 1):
for j in range(0, len(keys) - 1):
out_file.write(result[keys[j]][i].replace(',', ''))
if j != len(keys):
out_file.write(',')
out_file.write('\n')
out_file.flush()
print "Finished, converted {} rows".format(max_len - 1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment