Created
August 31, 2015 16:43
-
-
Save ahlusar1989/103afc6a435daa2b6e91 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from glob import iglob | |
import csv | |
from collections import * | |
import os, sys | |
import csv | |
import itertools | |
from csv import reader | |
import xml.etree.cElementTree as ElementTree | |
from xml.etree.ElementTree import XMLParser | |
##### Heavy Lifting | |
def flatten_list(aList, prefix=''): | |
for i, element in enumerate(aList, 1): | |
eprefix = "{}{}".format(prefix, i) | |
if element: | |
# treat like dict | |
if len(element) == 1 or element[0].tag != element[1].tag: | |
yield from flatten_dict(element, eprefix) | |
# treat like list | |
elif element[0].tag == element[1].tag: | |
yield from flatten_list(element, eprefix) | |
elif element.text: | |
text = element.text.strip() | |
if text: | |
yield eprefix[:].rstrip('.'), element.text | |
def flatten_dict(parent_element, prefix=''): | |
prefix = prefix + parent_element.tag | |
if parent_element.items(): | |
for k, v in parent_element.items(): | |
yield prefix + k, v | |
for element in parent_element: | |
eprefix = element.tag | |
if element: | |
# treat like dict - we assume that if the first two tags | |
# in a series are different, then they are all different. | |
if len(element) == 1 or element[0].tag != element[1].tag: | |
yield from flatten_dict(element, prefix=prefix) | |
# treat like list - we assume that if the first two tags | |
# in a series are the same, then the rest are the same. | |
else: | |
# here, we put the list in dictionary; the key is the | |
# tag name the list elements all share in common, and | |
# the value is the list itself | |
yield from flatten_list(element, prefix=eprefix) | |
# if the tag has attributes, add those to the dict | |
if element.items(): | |
for k, v in element.items(): | |
yield eprefix+k | |
# this assumes that if you've got an attribute in a tag, | |
# you won't be having any text. This may or may not be a | |
# good idea -- time will tell. It works for the way we are | |
# currently doing XML configuration files... | |
elif element.items(): | |
for k, v in element.items(): | |
yield eprefix+k | |
# finally, if there are no child tags and no attributes, extract | |
# the text | |
else: | |
yield eprefix, element.text | |
def makerows(pairs): | |
headers = [] | |
columns = {} | |
for k, v in pairs: | |
if k in columns: | |
columns[k].extend((v,)) | |
else: | |
headers.append(k) | |
columns[k] = [k, v] | |
m = max(len(c) for c in columns.values()) | |
for c in columns.values(): | |
c.extend('' for i in range(len(c), m)) | |
L = [columns[k] for k in headers] | |
rows = list(zip(*L)) | |
return rows | |
def pairs_from_root(element): | |
for k, v in flatten_dict(element): | |
kk = k.rsplit('.', 1)[-1] | |
yield kk, v |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment