Skip to content

Instantly share code, notes, and snippets.

@justinvw
Created April 19, 2011 09:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 5 You must be signed in to fork a gist
  • Save justinvw/927047 to your computer and use it in GitHub Desktop.
Save justinvw/927047 to your computer and use it in GitHub Desktop.
Simple script to convert a CSV file to XML
#!/usr/bin/env python
# encoding: utf-8
"""
csv2xml.py
Created by Justin van Wees on 2011-04-18.
"""
import sys
import os
import string
import re
import csv
import libxml2
VERSION = '0.1 (2011-04-18)'
class ConvertToXML(object):
def __init__(self, options, source_csv, dest_file=None, ):
self.csv = self.parse_csv(filename=source_csv, delimiter=options.delimiter,
quotechar=options.quotechar)
self.headers = self.parse_headers(self.csv[0])
self.xml = self.create_xml(root_element=options.xml_root,
record_element=options.xml_record,
headers=self.headers, csv=self.csv)
if dest_file:
self.save(dest_file, self.xml)
else:
print self.xml
def parse_csv(self, filename, delimiter, quotechar):
csv.register_dialect('custom', delimiter=delimiter,
quotechar=quotechar)
csv_file = open(filename, mode='r')
csv_file = list(csv.reader(csv_file))
return csv_file
def parse_headers(self, headers):
punct = set(string.punctuation)
parsed_headers = []
for head in headers:
# Strip punct
head = ''.join(ch for ch in head if ch not in punct)
# Strip whitespace at beginning and end of string, make lowercase
head = head.strip().lower()
# Replace space with underscores
head = head.replace(' ', '_')
parsed_headers.append(head)
return parsed_headers
def create_xml(self, root_element, record_element, headers, csv):
doc = libxml2.newDoc(version='1.0')
root = doc.newChild(None, root_element, None)
for record in csv[1:]:
this_record = root.newChild(None, record_element, None)
for index, header in enumerate(headers):
if len(record[index]) > 0:
this_record.newChild(None, header, record[index])
else:
this_record.newChild(None, header, None)
return doc.serialize(encoding='utf-8', format=1)
def create_xml2(self, root_element, record_element, headers, csv):
doc = Document()
root = doc.createElement(root_element)
doc.appendChild(root)
for record in csv[1:]:
this_record = doc.createElement(record_element)
for index, header in enumerate(headers):
this_item = doc.createElement(unicode(header, 'utf-8'))
if len(record[index]) > 0:
this_item.appendChild(doc.createTextNode(unicode(record[index], 'utf-8')))
this_record.appendChild(this_item)
root.appendChild(this_record)
print doc.toprettyxml(encoding="UTF-8")
def save(self, filename, xml):
xml_file = open(filename, 'w')
xml_file.write(xml)
xml_file.close()
if __name__ == '__main__':
from optparse import OptionParser
parser = OptionParser(version="%prog " + VERSION,
usage='%prog [options] SOURCE_CSV DEST_XML')
parser.disable_interspersed_args()
parser.add_option('-d', '--delimiter', dest='delimiter', type='str', default=',',
help="One-char string used to separate fields in the CSV file")
parser.add_option('-q', '--quote-char', dest='quotechar', type='str',
default='"', help="One-char string used to quote fields that contain 'special' chars")
parser.add_option('-r', '--root-element', dest="xml_root", type='str', default='root',
help="Name of the root element")
parser.add_option('-i', '--record-element', dest="xml_record", type='str',
default='record', help="Name of the record elements")
(options, args) = parser.parse_args()
if len(args) > 1:
xml = ConvertToXML(options, args[0], args[1])
else:
print ConvertToXML(options, args[0])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment