Skip to content

Instantly share code, notes, and snippets.

@ahlusar1989
Created August 31, 2015 16:43
Show Gist options
  • Save ahlusar1989/103afc6a435daa2b6e91 to your computer and use it in GitHub Desktop.
Save ahlusar1989/103afc6a435daa2b6e91 to your computer and use it in GitHub Desktop.
from glob import iglob
import csv
from collections import *
import os, sys
import csv
import itertools
from csv import reader
import xml.etree.cElementTree as ElementTree
from xml.etree.ElementTree import XMLParser
##### Heavy Lifting
def flatten_list(aList, prefix=''):
for i, element in enumerate(aList, 1):
eprefix = "{}{}".format(prefix, i)
if element:
# treat like dict
if len(element) == 1 or element[0].tag != element[1].tag:
yield from flatten_dict(element, eprefix)
# treat like list
elif element[0].tag == element[1].tag:
yield from flatten_list(element, eprefix)
elif element.text:
text = element.text.strip()
if text:
yield eprefix[:].rstrip('.'), element.text
def flatten_dict(parent_element, prefix=''):
prefix = prefix + parent_element.tag
if parent_element.items():
for k, v in parent_element.items():
yield prefix + k, v
for element in parent_element:
eprefix = element.tag
if element:
# treat like dict - we assume that if the first two tags
# in a series are different, then they are all different.
if len(element) == 1 or element[0].tag != element[1].tag:
yield from flatten_dict(element, prefix=prefix)
# treat like list - we assume that if the first two tags
# in a series are the same, then the rest are the same.
else:
# here, we put the list in dictionary; the key is the
# tag name the list elements all share in common, and
# the value is the list itself
yield from flatten_list(element, prefix=eprefix)
# if the tag has attributes, add those to the dict
if element.items():
for k, v in element.items():
yield eprefix+k
# this assumes that if you've got an attribute in a tag,
# you won't be having any text. This may or may not be a
# good idea -- time will tell. It works for the way we are
# currently doing XML configuration files...
elif element.items():
for k, v in element.items():
yield eprefix+k
# finally, if there are no child tags and no attributes, extract
# the text
else:
yield eprefix, element.text
def makerows(pairs):
headers = []
columns = {}
for k, v in pairs:
if k in columns:
columns[k].extend((v,))
else:
headers.append(k)
columns[k] = [k, v]
m = max(len(c) for c in columns.values())
for c in columns.values():
c.extend('' for i in range(len(c), m))
L = [columns[k] for k in headers]
rows = list(zip(*L))
return rows
def pairs_from_root(element):
for k, v in flatten_dict(element):
kk = k.rsplit('.', 1)[-1]
yield kk, v
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment