Skip to content

Instantly share code, notes, and snippets.

@xixasdev
Last active November 10, 2022 23:46
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save xixasdev/f0632aab83972985adcc7d2e11bdd6fe to your computer and use it in GitHub Desktop.
Save xixasdev/f0632aab83972985adcc7d2e11bdd6fe to your computer and use it in GitHub Desktop.
X4 Foundations Savegame: XML Analyzer to determine deserialization criteria
#!/usr/bin/env python3
# x4_savegame_xmlanalyzer_v1.py
# AUTHOR: xixas | DATE: 2022.01.09 | LICENSE: WTFPL/PDM/CC0... your choice
# DESCRIPTION: Parse an X4 Foundations savegame XML file and report tag statistics
import argparse
import re
import sys
import xml.sax
ENUM_THRESHOLD = 2
RE_ENUM = re.compile('[A-Za-z_][A-Za-z0-9_]*')
RE_TYPES = {
'int': re.compile('-?[0-9]+'),
'float': re.compile('-?[0-9.]+'),
'int_pair': re.compile('\{-?[0-9]+, ?-?[0-9]+\}'),
'id': re.compile('\[0x[0-9a-zA-Z]+\]'),
'exponent': re.compile('-?[0-9]+e-?[0-9]+'),
}
def parse_args(args):
parser = argparse.ArgumentParser(description="Parse an XML file and report tag statistics")
parser.add_argument('filepath', help='path to XML file to be parsed')
opts = parser.parse_args(args)
return opts
def run(args):
opts = parse_args(args)
tags = parse_xml(opts.filepath)
consolidate_tag_types(tags)
print_tags(tags)
def parse_xml(filepath):
handler = XmlHandler()
parser = xml.sax.make_parser()
parser.setContentHandler(handler)
with open(filepath, 'r') as fd:
parser.parse(fd)
return handler.tags
def consolidate_tag_types(tags, _tag_stack = []):
for (tag_name, tag_info) in tags.items():
local_tag_stack = _tag_stack + [tag_name]
for (attr_name, attr_info) in tag_info['attrs'].items():
types = attr_info['types']
if len(types) > 1:
if 'string' in types:
type_names = list(types.keys())
for type_name in type_names:
type_info = types[type_name]
if type_name != 'string':
types['string']['count'] += type_info['count']
types.pop(type_name)
elif 'float' in types:
if 'int' in types:
types['float']['count'] += types['int']['count']
types.pop('int')
# Determine enums
if len(types) == 1 and 'string' in types:
values = attr_info['values']
total_strings = len(values)
string_count_at_enum_threshold = 0
can_enum = True
for (string_value, string_info) in values.items():
if not re.fullmatch(RE_ENUM, string_value):
can_enum = False
break
if string_info['count'] >= ENUM_THRESHOLD:
string_count_at_enum_threshold += 1
if can_enum and string_count_at_enum_threshold >= total_strings / 2:
types['enum'] = {'count': types['string']['count'], 'values': [k for k in values.keys()]}
types.pop('string')
children = tag_info['children']
if len(children):
consolidate_tag_types(children, local_tag_stack)
def print_tags(tags_dict, indent=''):
for (tag_name, tag_info) in sorted(tags_dict.items()):
print(f"{indent}{tag_name} ({tag_info['count']})")
for (attr_name, attr_info) in sorted(tag_info['attrs'].items()):
types = [f"{type_name} ({type_info['count']})" for (type_name, type_info) in sorted(attr_info['types'].items(), key=lambda x: x[1]['count'], reverse=True)]
enum_values = (' {'+', '.join([k for k in attr_info['values'].keys()])+'}') if 'enum' in attr_info['types'] else ''
print(f"{indent} : {attr_name} ({attr_info['count']}): {', '.join(types)}{enum_values}")
if tag_info['children']:
print_tags(tag_info['children'], f"{indent} ")
class XmlHandler(xml.sax.ContentHandler):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.tags = {}
self.tag_stack = []
def startElement(self, name, attrs):
tags = self.tags
for t in self.tag_stack:
tags = tags[t]['children']
self.tag_stack.append(name)
tags[name] = tag = tags.get(name, {'count': 0, 'attrs': {}, 'children': {}})
tag['count'] += 1
for (attr_name, attr_value) in attrs.items():
tag['attrs'][attr_name] = attr = tag['attrs'].get(attr_name, {'count': 0, 'types': {}, 'values': {}})
attr['count'] += 1
types = attr['types']
type_name = 'string'
if 'string' not in types:
for (re_type_name, re_type) in RE_TYPES.items():
if re.fullmatch(re_type, attr_value):
type_name = re_type_name
break
attr['types'][type_name] = type_item = attr['types'].get(type_name, {'count': 0})
type_item['count'] += 1
# Store value count
values = attr['values']
values[attr_value] = values.get(attr_value, {'count': 0})
values[attr_value]['count'] += 1
def endElement(self, name):
self.tag_stack.pop()
if __name__ == '__main__':
run(sys.argv[1:])
@xixasdev
Copy link
Author

xixasdev commented Jan 12, 2022

Update: See version 2 here with a number of improved features.

X4: Foundations Savegame XML Analyzer

This is an initial attempt at breaking down X4's savegame files to help determine potential deserialization strategies for the purpose of discussing ways to improve save/load performance.

It lists all encountered tags and attributes and attempts a best-guess at deserialization type. If it believes a value can be stored to an enumerated type it also lists the enum names.

This first version is a little too aggressive with nesting relationships, considering all nested tags to be different tags if their parent hierarchies are different from one another. This is particularly evident with cue tags. I'll loosen this up for v2.

Use:

Extract an X4 savegame to its base xml file.
Save this script to the same directory.
Run the script with the save file as an argument -- e.g.:

python3 x4_savegame_xmlanalyzer_v1.py save_009.xml

Notations:

  • () Numbers in parentheses are counts for encountered tags/attributes/types
  • : Attributes are prefixed with a colon below their parent tag - they are followed by encountered type(s)
  • "id" typed attributes are attributes that reference other objects
  • {} Possible enum values are wrapped in curly braces

Runtime:

Expect 2 to 4 minutes, depending on hardware and savegame size.
This is Python -- quick to write, slow to run.

Sample Output

I'm cutting this short (note the ... in the middle).
My test savegame for this run was 714 MB (uncompressed), ~14.7 million lines.
Sample output was 4150 lines... and I'm not pasting all that here :)

savegame (1)
  aidirector (1)
    entity (21090)
      : id (21090): id (21090)
      script (34029)
        : attention (112): enum (112) {visible}
        : id (34029): int (34029)
        : index (34029): int (34029)
        : label (33655): string (33655)
        : name (34029): string (34029)
        : order (7791): id (7791)
        : time (34029): float (34029)
        command (15288)
          : type (15288): enum (15288) {attackenemies, repair, freetrade, trade, dockat, explore, resupply, wait, patrol, escort, freemining, searchresources, buildstation, mining, support, investigate, attackobject, withdrawbattle, movetozone, protect, recon, follow, recycle, collect}
          param (5288)
            : type (5288): enum (5288) {component}
            : value (5288): id (5288)
        commandaction (21091)
          : type (21091): enum (21091) {standingby, repairchecking, searchingtrades, repairingto, attacking, flying, calculating, undocking, docking, searchingresources, buildingto, executingtrade, waitingdrones, waitingtodock, investigating, attackingto}
          param (190)
            : type (190): enum (190) {component}
            : value (190): id (190)
        counters (308)
          counter (389)
            : current (264): int (264)
            : max (384): int (384)
            : type (5): enum (5) {list}
            : value (5): int (5)
        vars (34021)
          value (584289)
            : name (584289): string (584289)
            : type (530717): enum (530717) {component, integer, list, build, group, time, table, float, hitpoints, macro, length, money, largeint, quadrant, order, position, rotation, dronemode, blacklistgroup, ware, xmlkeyword, flightbehaviour, keyword, string, trade, class, formationshape, assignment, attention, faction, angle, largefloat}
            : value (404816): string (404816)
    patch (1)
      script (82)
        : name (82): string (82)
        : version (82): int (82)
  economylog (1)
    entries (7)
      : condensed (3): int (3)
      : type (7): enum (7) {cargo, tradeoffer, trade, money}

...
...
...

          subordinate (276)
            : commander (276): id (276)
            : job (276): enum (276) {teladi_fighter_escort_s_patrol, teladi_fighter_escort_s_miningfleet, ministry_frigate_escort_m, holyorder_fighter_escort_s_patrol, xenon_fighter_escort_s_patrol, zyarth_frigate_escort_m, argon_frigate_escort_m, argon_fighter_escort_s_frigate, antigone_fighter_escort_s_frigate, terran_corvette_escort_m, xenon_frigate_escort_m, antigone_fighter_escort_s_miningfleet, holyorder_corvette_escort_m, holyorder_fighter_escort_s_miningfleet, holyorder_fighter_escort_s_frigate, argon_destroyer_escort_l, teladi_fighter_escort_s_frigate, fallensplit_fighter_escort_s, zyarth_destroyer_escort_l_noescort, teladi_frigate_escort_m, antigone_frigate_escort_m, zyarth_fighter_escort_s_carrier_wing, ministry_fighter_escort_s_frigate, zyarth_fighter_escort_s_patrol, zyarth_corvette_escort_m, zyarth_fighter_escort_s_frigate, xenon_fighter_escort_s_frigate, paranid_fighter_escort_s_patrol, ministry_destroyer_escort_l_reinforced, teladi_destroyer_escort_l, argon_fighter_escort_s_patrol, antigone_destroyer_escort_l, fallensplit_fighter_escort_s_escort, holyorder_destroyer_escort_l, antigone_fighter_escort_s_patrol}
            : subordinate (276): id (276)
    physics (1)
      filters (1)
        filter (277)
          : id (277): id (277)
          filtered (768)
            : id (768): id (768)
    traderules (1)
      traderule (2)
        : allow (2): int (2)
        : factions (2): string (2)
        : id (2): int (2)
        : name (2): string (2)
        : owner (2): enum (2) {player}
    uianchorhelper (1)
      : uianchorhelper (1): id (1)
    uianchorhelper_cutscene (1)
      : uianchorhelper_cutscene (1): id (1)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment