Skip to content

Instantly share code, notes, and snippets.

@pilgrim2go
Forked from KarolBedkowski/etlutils.py
Created November 9, 2016 10:33
Show Gist options
  • Save pilgrim2go/6bcd57cf484a1c5917bdfea8a9be2847 to your computer and use it in GitHub Desktop.
Save pilgrim2go/6bcd57cf484a1c5917bdfea8a9be2847 to your computer and use it in GitHub Desktop.
petl - utils (csv-unicode, xml)
import csv
from xml.etree import ElementTree
from petl import tocsv, fromcsv, convertall
def tocsvunicode(table, filename, encoding='utf-8', **kwds):
data = (map(lambda x: unicode(x).encode(encoding), row) for row in table)
tocsv(data, filename, **kwds)
def fromcsvunicode(source=None, dialect=csv.excel, encoding='utf-8', **kwargs):
table = fromcsv(source, dialect, **kwargs)
return convertall(table, lambda x: unicode(x, encoding))
def fromxml_record(filename, row_tag, col_tag, col_key_attr):
''' load xml in format
<row_tag>
<col_tag col_key_attr="column">value</col_tag>
<col_tag col_key_attr="column">value</col_tag>
....
</row_tag>
<row_tag>
...
</row_tag>
...
look(fromdicts(list(fromxml_record(....)))
'''
tree = ElementTree.parse(filename)
for row in tree.iterfind(row_tag):
yield dict((col.attrib[col_key_attr], col.text) for col in row.findall(col_tag))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment