Created
March 2, 2019 17:52
-
-
Save RaidAndFade/a6c9a6069db47279ad1574de86037b2d to your computer and use it in GitHub Desktop.
Freelancer Pani7
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from lxml import etree | |
import sys | |
import os | |
if len(sys.argv) != 2: | |
sys.exit("Make sure to put the path of the file as an argument. Or to drag the file into the script.") | |
# the input is what we get as an argument, the ouput is that same file, in that same location, | |
# with .xml instead of its current extension | |
infile = sys.argv[1] | |
outfile = os.path.splitext(infile)[0]+".index.xml" | |
doc = etree.parse(infile) | |
# dictionary of indecies | |
indecies = {} | |
# go through every shop individually, getting their address and splitting for keywords, saving | |
# the number into the indecies dictionary for all keywords | |
for shop in doc.getroot(): | |
number = shop.get("number") | |
addr = shop.find("address").text.lower() | |
parts = addr.split(" ") | |
for part in parts: | |
if part in indecies: | |
indecies[part].append(number) | |
else: | |
indecies[part] = [number] | |
# Creating the index xml with lxml | |
out = etree.Element("index") | |
doc = etree.ElementTree(out) | |
for entry in indecies: | |
el = etree.SubElement(out, "entry") | |
etree.SubElement(el, "keyword").text = entry | |
accounts = etree.SubElement(el, "accounts") | |
for x in indecies[entry]: | |
etree.SubElement(accounts, "number").text = x | |
outFile = open(outfile, 'w') | |
outFile.write(etree.tostring(doc,pretty_print=True)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from lxml import etree | |
import json | |
import sys | |
import os | |
if len(sys.argv) != 2: | |
sys.exit("Make sure to put the path of the file as an argument. Or to drag the file into the script.") | |
# the input is what we get as an argument, the ouput is that same file, in that same location, | |
# with .xml instead of its current extension | |
infile = sys.argv[1] | |
outfile = os.path.splitext(infile)[0]+".xml" | |
shops = {} | |
with open(infile,"r") as f: | |
lines = f.readlines() | |
for json_line in lines: | |
line = json.loads(json_line) | |
if "index" in line: #skip the ones that are {"index":{...}} | |
continue | |
shop_number = str(line['shop_number']) | |
shops[shop_number] = line #store the shop in our temporary dictionary, by shop_number | |
#sort the keys, so that we order the shops by number from least to greatest | |
k = shops.keys() | |
k.sort() | |
#create the XML file and insert the shops and their data following the format discussed over chat. | |
out = etree.Element("shop_numbers") | |
doc = etree.ElementTree(out) | |
for shop_number in k: | |
shop = shops[shop_number] | |
el = etree.SubElement(out,"shop", number=shop_number) | |
addr_el = etree.SubElement(el, "address") | |
addr_el.text = shop['address'] | |
email_el = etree.SubElement(el, "email") | |
email_el.text = shop['email'] | |
city_el = etree.SubElement(el, "city") | |
city_el.text = shop['city'] | |
state_el = etree.SubElement(el, "state") | |
state_el.text = shop['state'] | |
out_file = open(outfile, 'w') | |
out_file.write(etree.tostring(doc,pretty_print=True)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment