Skip to content

Instantly share code, notes, and snippets.

@akshaybabloo
Last active August 10, 2017 23:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save akshaybabloo/03e998ec97456bf187b58eff26123e0d to your computer and use it in GitHub Desktop.
Save akshaybabloo/03e998ec97456bf187b58eff26123e0d to your computer and use it in GitHub Desktop.
NeuCube form submission XML parser
import xmltodict
import pandas as pd
import dateutil.parser
def format_date(dt):
"""
Formats DateTTime to D-M-Y.
Parameters
----------
dt: str
String of date and time.
Returns
-------
dt: str
Formatted date.
"""
date = dateutil.parser.parse(dt)
dt = '{}-{}-{}'.format(date.day, date.month, date.year)
return dt
def parse(path, file_name):
"""
Parses the KEDRI download submission form to relevant data and writes it to CSV file.
Parameters
----------
path: str
Absolute path of the XML file.
file_name: str
File name to bes saved as csv.
"""
file = open(path)
main_content = {'Date': [], 'Name': [], 'Company/Institute': [], 'Email': [], 'Phone': []}
content = xmltodict.parse(file.read())
for submission in dict(dict(content)['submissions'])['submission']:
main_content['Date'].append(format_date(dict(submission)['@time']))
for text_q in dict(submission)['text_q']:
if dict(text_q)['@name'] == 'Name':
if '#text' in dict(text_q):
main_content['Name'].append(dict(text_q)['#text'])
else:
main_content['Name'].append(None)
elif dict(text_q)['@name'] == 'Company/Institute':
if '#text' in dict(text_q):
main_content['Company/Institute'].append(dict(text_q)['#text'])
else:
main_content['Company/Institute'].append(None)
elif dict(text_q)['@name'] == 'Email':
if '#text' in dict(text_q):
main_content['Email'].append(dict(text_q)['#text'])
else:
main_content['Email'].append(None)
elif dict(text_q)['@name'] == 'Phone':
if '#text' in dict(text_q):
main_content['Phone'].append(dict(text_q)['#text'])
else:
main_content['Phone'].append(None)
df = pd.DataFrame(main_content)
df = df[['Date', 'Name', 'Company/Institute', 'Phone', 'Email']]
df = df.drop_duplicates(subset=['Name'])
df.index += 1
df.to_csv(file_name)
file.close()
if __name__ == '__main__':
parse('submission_log.xml', 'Kedri.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment