Skip to content

Instantly share code, notes, and snippets.

@RaidAndFade
Created March 2, 2019 17:52
Show Gist options
  • Save RaidAndFade/a6c9a6069db47279ad1574de86037b2d to your computer and use it in GitHub Desktop.
Save RaidAndFade/a6c9a6069db47279ad1574de86037b2d to your computer and use it in GitHub Desktop.
Freelancer Pani7
from lxml import etree
import sys
import os
if len(sys.argv) != 2:
sys.exit("Make sure to put the path of the file as an argument. Or to drag the file into the script.")
# the input is what we get as an argument, the ouput is that same file, in that same location,
# with .xml instead of its current extension
infile = sys.argv[1]
outfile = os.path.splitext(infile)[0]+".index.xml"
doc = etree.parse(infile)
# dictionary of indecies
indecies = {}
# go through every shop individually, getting their address and splitting for keywords, saving
# the number into the indecies dictionary for all keywords
for shop in doc.getroot():
number = shop.get("number")
addr = shop.find("address").text.lower()
parts = addr.split(" ")
for part in parts:
if part in indecies:
indecies[part].append(number)
else:
indecies[part] = [number]
# Creating the index xml with lxml
out = etree.Element("index")
doc = etree.ElementTree(out)
for entry in indecies:
el = etree.SubElement(out, "entry")
etree.SubElement(el, "keyword").text = entry
accounts = etree.SubElement(el, "accounts")
for x in indecies[entry]:
etree.SubElement(accounts, "number").text = x
outFile = open(outfile, 'w')
outFile.write(etree.tostring(doc,pretty_print=True))
from lxml import etree
import json
import sys
import os
if len(sys.argv) != 2:
sys.exit("Make sure to put the path of the file as an argument. Or to drag the file into the script.")
# the input is what we get as an argument, the ouput is that same file, in that same location,
# with .xml instead of its current extension
infile = sys.argv[1]
outfile = os.path.splitext(infile)[0]+".xml"
shops = {}
with open(infile,"r") as f:
lines = f.readlines()
for json_line in lines:
line = json.loads(json_line)
if "index" in line: #skip the ones that are {"index":{...}}
continue
shop_number = str(line['shop_number'])
shops[shop_number] = line #store the shop in our temporary dictionary, by shop_number
#sort the keys, so that we order the shops by number from least to greatest
k = shops.keys()
k.sort()
#create the XML file and insert the shops and their data following the format discussed over chat.
out = etree.Element("shop_numbers")
doc = etree.ElementTree(out)
for shop_number in k:
shop = shops[shop_number]
el = etree.SubElement(out,"shop", number=shop_number)
addr_el = etree.SubElement(el, "address")
addr_el.text = shop['address']
email_el = etree.SubElement(el, "email")
email_el.text = shop['email']
city_el = etree.SubElement(el, "city")
city_el.text = shop['city']
state_el = etree.SubElement(el, "state")
state_el.text = shop['state']
out_file = open(outfile, 'w')
out_file.write(etree.tostring(doc,pretty_print=True))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment