Last active
March 25, 2018 13:10
-
-
Save fuzz6001/454e0d1d4e1a627e02a572c88b33f438 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
import xml.etree.ElementTree as ET | |
import re | |
import sys | |
myname = sys.argv[0][:sys.argv[0].rfind(".")] | |
print(myname, 'v0.2') | |
if len(sys.argv) < 4: | |
print('usage: python', sys.argv[0], 'in.dat out.dat region1 [region2 region3 ...]') | |
sys.exit(1) | |
indat = sys.argv[1] | |
outdat = sys.argv[2] | |
print('[in] ', indat) | |
print('[out]', outdat) | |
search_regions = set([region.lower() for region in sys.argv[3:]]) | |
#print(search_regions) | |
tree = ET.parse(indat) | |
root = tree.getroot() | |
pattern = re.compile('\((.+?)\)') | |
for child in root.findall('game'): | |
name = child.attrib.get('name', '') | |
if not any([search_regions.intersection([region.strip().lower() for region in regions.split(',')]) for regions in re.findall(pattern, name)]): | |
root.remove(child) | |
#print(name) | |
print('count =', len(root.findall('game'))) | |
tree.write(outdat) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Usage
only Europe
USA & Japan
History
v0.2
solved the region extraction issue.
You can extract all titles now!
v0.1
initial release.