Skip to content

Instantly share code, notes, and snippets.

@andreyfedoseev
Created July 12, 2012 14:20
Show Gist options
  • Save andreyfedoseev/3098411 to your computer and use it in GitHub Desktop.
Save andreyfedoseev/3098411 to your computer and use it in GitHub Desktop.
Script to convert ArtsEdge XML data to CSV
import csv
from lxml import etree
FIELDS = (
"Ages",
"Assessments",
"Audiences",
"CreatedDate",
"Description",
"Duration",
"Grades",
"Groupings",
"Keywords",
"Languages",
"Partner",
"PhysicalSpaces",
"RelatedURLs",
"RequiredTechnologies",
"ResourceTypes",
"Subjects",
"TeachingMethods",
"Title",
"URL",
"WorldRegions",
)
def run():
xml_file = open("ArtsEdgeData20120625.xml")
xml = xml_file.read()
xml_file.close()
root = etree.XML(xml)
data = []
for record in root:
record_data = dict()
for field in record:
if field.tag == "Ages":
record_data["Ages"] = []
for lookup in field:
for value in lookup:
record_data["Ages"].append(value.text)
elif field.tag == "Assessments":
record_data["Assessments"] = []
for lookup in field:
for value in lookup:
record_data["Assessments"].append(value.text)
elif field.tag == "Audiences":
record_data["Audiences"] = []
for lookup in field:
for value in lookup:
record_data["Audiences"].append(value.text)
elif field.tag == "CreatedDate":
record_data["CreatedDate"] = field.text
elif field.tag == "Description":
record_data["Description"] = field.text
elif field.tag == "Duration":
record_data["Duration"] = field.text
elif field.tag == "Grades":
record_data["Grades"] = []
for lookup in field:
for value in lookup:
record_data["Grades"].append(value.text)
elif field.tag == "Groupings":
record_data["Groupings"] = []
for lookup in field:
for value in lookup:
record_data["Groupings"].append(value.text)
elif field.tag == "Keywords":
record_data["Keywords"] = []
for lookup in field:
for value in lookup:
record_data["Keywords"].append(value.text)
elif field.tag == "Languages":
record_data["Languages"] = []
for lookup in field:
for value in lookup:
record_data["Languages"].append(value.text)
elif field.tag == "Partner":
for tag in field:
if tag.tag == "Value":
record_data["Partner"] = tag.text
elif field.tag == "PhysicalSpaces":
record_data["PhysicalSpaces"] = []
for lookup in field:
for value in lookup:
record_data["PhysicalSpaces"].append(value.text)
elif field.tag == "RelatedURLs":
record_data["RelatedURLs"] = []
for related_url in field:
for url in related_url:
record_data["RelatedURLs"].append(url.text)
elif field.tag == "RequiredTechnologies":
record_data["RequiredTechnologies"] = []
for lookup in field:
for value in lookup:
record_data["RequiredTechnologies"].append(value.text)
elif field.tag == "ResourceTypes":
record_data["ResourceTypes"] = []
for lookup in field:
for value in lookup:
record_data["ResourceTypes"].append(value.text)
elif field.tag == "TeachingMethods":
record_data["TeachingMethods"] = []
for subject in field:
for name in subject:
record_data["TeachingMethods"].append(name.text)
elif field.tag == "ResourceTypes":
record_data["ResourceTypes"] = []
for lookup in field:
for value in lookup:
record_data["ResourceTypes"].append(value.text)
elif field.tag == "Title":
record_data["Title"] = field.text
elif field.tag == "URL":
record_data["URL"] = field.text
elif field.tag == "WorldRegions":
record_data["WorldRegions"] = []
for lookup in field:
for value in lookup:
record_data["WorldRegions"].append(value.text)
data.append(record_data)
writer = csv.writer(open("ArtsEdgeData20120625.csv", "w+"))
writer.writerow(FIELDS)
for record in data:
row = []
for field in FIELDS:
value = record.get(field) or u""
if isinstance(value, list):
value = u"|".join(value)
value = value.encode("utf-8")
row.append(value)
writer.writerow(row)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment