Skip to content

Instantly share code, notes, and snippets.

@minhtc
Last active Mar 10, 2020
Embed
What would you like to do?
Python 3 script to convert XML file rows into csv and HTML files
import sys
import pandas as pd
import xml.etree.ElementTree as et
pd.set_option('colheader_justify', 'left')
html_string = '''<html><link rel="stylesheet" type="text/css" href="style.css"/><body>{table}</body></html>'''
full_columns = ['No', 'docid', 'ordner', 'hauptordner', 'bemerkung', 'revision', 'dokumentenart', 'bu-monat', 'bu-jahr', 'firma']
compact_columns = ['No', 'docid', 'hauptordner', 'bemerkung', 'dokumentenart', 'bu-monat', 'bu-jahr', 'firma']
filename = "./export.xml"
if len(sys.argv) > 1:
filename = sys.argv[1]
print(f"Processing {filename}")
xtree = et.parse(filename)
xroot = xtree.getroot()
df = pd.DataFrame(columns=full_columns)
df_trash = pd.DataFrame(columns=full_columns)
index = 0
export_file_name = ''
for document in xroot:
if 'docid' in document.attrib:
index = index+1
classifyInfo = document.find('classifyInfos/classifyInfo')
info = classifyInfo[0]
if not export_file_name:
export_file_name = info.find('firma').text + '-' + info.find('bu-jahr').text + info.find('bu-monat').text
new_df = pd.DataFrame(
[[index, document.attrib['docid'],
info.find('ordner').text,
info.find('hauptordner').text,
info.find('bemerkung').text,
info.find('revision').text,
info.find('dokumentenart').text,
info.find('bu-monat').text,
info.find('bu-jahr').text,
info.find('firma').text]], columns=df.columns)
if classifyInfo.attrib.get('trashed') == 'true':
df_trash = df_trash.append(new_df)
else:
df = df.append(new_df)
df.to_csv('export-' + export_file_name + '.csv', index=False)
with open('export-' + export_file_name + '.html', 'w') as f:
f.write(html_string.format(table=df.to_html(index=False, columns=compact_columns)))
df_trash.to_csv('trash-' + export_file_name + '.csv', index=False)
with open('trash-' + export_file_name + '.html', 'w') as f:
f.write(html_string.format(table=df_trash.to_html(index=False, columns=compact_columns)))
print('done')
@minhtc
Copy link
Author

minhtc commented Oct 27, 2019

create file style.css in same folder

table {
  font-size: 14px;
  font-family: Arial;
  border-collapse: collapse;
  border: 1px solid #abd08d;
}
table thead {
  background-color: #70ad46;
  color: #fff;
}

table td,
th {
  padding: 5px;
}

table th:nth-child(3),
table th:nth-child(5),
table th:nth-child(6),
table th:nth-child(7),
table td:nth-child(3),
table td:nth-child(5),
table td:nth-child(6),
table td:nth-child(7) {
  font-size: 12px;
}
table td:nth-child(4) {
  width: 300px;
}
table td:nth-child(6) {
  width: 10px;
}
table tr:nth-child(even) {
  background: #e2efdb;
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment