Skip to content

Instantly share code, notes, and snippets.

@FreaKzero
Last active October 30, 2020 16:25
Show Gist options
  • Save FreaKzero/70be69c9aa90fdecf6262be4f308a952 to your computer and use it in GitHub Desktop.
Save FreaKzero/70be69c9aa90fdecf6262be4f308a952 to your computer and use it in GitHub Desktop.
Python3 Script to sanitize Categories and playerstrings from skypscraper cache XMLs - especially needed for openretro data
import xml.etree.ElementTree as ET
import re
import os
import sys
import argparse
if sys.version_info[0] < 3:
print("Must be using Python 3")
sys.exit(1)
if(len(sys.argv) < 2):
print("Please provide a path")
sys.exit(1)
parser = argparse.ArgumentParser()
parser.add_argument("path", help="Path")
parser.add_argument("--dry", help="Dry run without writing")
parser.add_argument("--player", help="DEFAULT or MAXPLAYER")
parser.add_argument("--nocat", help="String for categories which cant be resolved")
args = parser.parse_args()
DRY_TEST = False
# MAXPLAYER for only max player numbers
# Everything else => n-n (example: 1-12)
PLAYERFORMAT = "DEFAULT"
# None => use the first grabbed tag as category
# String => use the given String as category
NO_CAT_STRING = None
if args.dry:
DRY_TEST = True
if args.player:
PLAYERFORMAT = args.player
if args.nocat:
NO_CAT_STRING = args.nocat
LOGGER = {
"nocat": [],
"players": []
}
# Defined order which category should be used, first seen tag will be the category
# First item is most important - last item at least
defined = [
"disney",
"educational",
"sports",
"shooter",
"rpg",
"puzzle",
"shootemup",
"racing",
"beatemup",
"cards",
"quiz",
"topdown",
"strategy",
"platform",
"adventure",
"reaction",
"arcade",
"simulation",
"action",
"maze",
"pinball",
"boardgame",
"movie",
"creativity"
]
# Replace typos from users, and things which are basically the same
rep = {
"fighting": "beatemup",
"fighter": "beatemup",
"fight": "beatemup",
"beatempup": "beatemup",
"car": "racing",
"actionadventure": "adventure",
"actionadvenure": "adventure",
"jumper": "platform",
"jumponthings": "platform",
"wanderer": "rpg",
"puzzlesolve": "puzzle",
"blackjack": "cards",
}
def writeFile(file):
if (DRY_TEST):
print("[DRY] Write: {}".format(file))
else:
try:
os.rename(file,file+".orig")
f = open(file, "w")
f.write(ET.tostring(root, encoding='unicode'))
print("Writing {}".format(file))
f.close()
except:
print("Cant write File {}".format(file))
def outputlog(what):
if (what == 'nocat' and len(LOGGER["nocat"]) > 0):
print("* Used Fallback Categories for {} Games".format(len(LOGGER["nocat"])))
print("\n".join(LOGGER["nocat"]))
else:
print("* Player sanitization statistics")
for item in LOGGER['players']:
print("{}\tx\t{}".format(item["players"], item["count"]))
def findFile(dir_path, search):
found = []
for root, dirs, files in os.walk(dir_path):
for file in files:
if file == search:
found.append(root+'/'+str(file))
return found
def findDict(lst, key, value):
for i, dic in enumerate(lst):
if dic[key] == value:
return i
return -1
def catsort(str):
if (str in defined):
return defined[::-1].index(str)
else:
return -1
def replacetag(str):
if str in rep:
return rep[str]
else:
return str
def tagclean(child, root):
c = child.text.split(', ')
x = list(set(list(map(replacetag, c))))
x.sort(reverse=True, key=catsort)
tpl = "[T]: {}\n[P]: {}\n[O]: {}\n[W]: {} \n"
for cat in x:
if (cat in defined):
return cat
else:
game = root.find("./resource[@id='{}'][@type='title']".format(child.attrib["id"])).text
platform = root.find("./resource[@id='{}'][@type='platform']".format(child.attrib["id"])).text
write = x[0]
if (isinstance(NO_CAT_STRING, str)):
write = NO_CAT_STRING
LOGGER["nocat"].append(tpl.format(game, platform, child.text, write))
return write
def playerclean(child):
cleaned = re.sub(" ", "", re.sub("\s*\(.*.?\s*", "", child.text))
plstr = cleaned
if(PLAYERFORMAT == "MAXPLAYER"):
match = re.match("\d-(\d{1,3})", plstr)
if match is not None:
plstr = match[1]
if(cleaned.find('-') < 0 and len(cleaned) > 2):
plstr = "1"
found = findDict(LOGGER["players"], "players", plstr)
if (found > -1):
LOGGER["players"][found]["count"] += 1
else:
LOGGER["players"].append(dict({"players": plstr, "count": 1}))
return plstr
databases = findFile(args.path, "db.xml")
backups = findFile(args.path, "db.xml.orig")
if (len(backups) > 0):
answer = input("Delete current Backups and reclean ? (y/n)")
if answer == "n":
sys.exit()
for backdb in backups:
print("Removing Backup: {}".format(backdb))
if(DRY_TEST):
print("[DRY] Remove {}".format(backdb))
else:
os.remove(backdb)
if (len(databases) < 1):
print("No Databases found on given path")
sys.exit()
for db in databases:
print("Reading/Cleaning {}".format(db))
root = ET.parse(db).getroot()
CURRENTFOLDER = os.path.split(os.path.dirname(db))[1]
for child in root:
if (child.attrib["type"] == "tags"):
child.text = tagclean(child, root)
if (child.attrib["type"] == "players"):
child.text = playerclean(child)
writeFile(db)
outputlog('nocat')
outputlog('players')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment