Skip to content

Instantly share code, notes, and snippets.

@ZephyrBlu
Last active March 18, 2020 19:19
Show Gist options
  • Save ZephyrBlu/7636b5d79fc60ba16210322b341d7d71 to your computer and use it in GitHub Desktop.
Save ZephyrBlu/7636b5d79fc60ba16210322b341d7d71 to your computer and use it in GitHub Desktop.
Script for SC2 Tournament Analysis
import uuid
import json
from pathlib import Path, PurePath
from sc2_tournament_analysis import recursive_parse, json_to_csv
from multiprocessing import Pool
from fuzzywuzzy import fuzz
from zephyrus_sc2_parser import parse_replay
from sc2_tournament_analysis.defaults import (
standard_ignore_units, standard_merge_units, standard_player_match
)
"""
data schema:
GameID: uuid
Map: str
Duration: float (minutes)
PlayerName: str
IsWinner: bool
Race: str ('Protoss', 'Terran', 'Zerg')
UnitName: str
BirthTime: float (minutes)
DeathTime: float (minutes)
"""
def parse_data(players, timeline, stats, metadata, **kwargs):
"""
Data function for each replay that is parsed
All replay data is passed into this function and can be manipulated at will
"""
# regex patterns for parsing dir names
name_id_matches = kwargs['name_id_matches']
# event/stage identifiers
identifiers = kwargs['identifiers']
# lists of units to ignore and units with multiple modes to merge
ignore_units = kwargs['ignore_units']
merge_units = kwargs['merge_units']
def check_winner(p_id):
if metadata['winner'] == p_id:
return True
return False
match_info = []
current_merged_units = {}
game_id = str(uuid.uuid4())
# search through each players units and parse unit data
for p_id, player in players.items():
for obj in player.objects.values():
if 'unit' not in obj.type or obj.name in ignore_units:
continue
# merge instances of different unit modes into one unit for analysis purposes
# we keep track of merged units until all units have been processed, then merge
if obj.name in merge_units.keys() or obj.name in merge_units.values():
if obj.name in merge_units.keys():
unit_name = merge_units[obj.name]
elif obj.name in merge_units.values():
unit_name = obj.name
else:
unit_name = obj.name
if name_id_matches:
player_name = name_id_matches[p_id]
else:
player_name = player.name
current_unit_info = {
'game_id': game_id,
'map': metadata['map'],
'duration': round(metadata['game_length']/60, 2),
'player_name': player_name,
'is_winner': check_winner(p_id),
'race': players[p_id].race,
'unit_name': unit_name,
# 'produced': info['live'] + info['died'],
# 'killed': info['died'],
'birth_time': round(obj.birth_time/22.4/60, 2) if obj.birth_time else None,
'death_time': round(obj.death_time/22.4/60, 2) if obj.death_time else None,
'morph_time': round(obj.morph_time/22.4/60, 2) if obj.morph_time else None,
}
# sets event/stage identifiers as k:v pairs
# ex: { 'event': 'Cheeseadephia' }
for i in identifiers:
current_unit_info[i[0]] = i[1]
match_info.append(current_unit_info)
# add merged units to match info
for unit, info in current_merged_units.items():
match_info.append(info)
return match_info
def handle_replay(path, player_names, identifiers):
"""
Handles assigning player names to in-game names before data processing
"""
players, timeline, stats, metadata = parse_replay(
path, local=True, detailed=True
)
player_match = standard_player_match
for name, value in identifiers:
if name == 'event' and (value == 'Nation Wars' or value == 'Cheeseadelphia'):
player_match = False
if player_match:
match_ratios = []
for p_id, p in players.items():
# partial_ratio fuzzy matches substrings instead of an exact match
current_match_ratio = fuzz.partial_ratio(p.name, player_names[0])
match_ratios.append((p.player_id, p.name, current_match_ratio))
name_match = max(match_ratios, key=lambda x: x[2])
# linking matched names to in game names
name_id_matches = {
name_match[0]: player_names[0]
}
if name_match[0] == 1:
name_id_matches[2] = player_names[1]
else:
name_id_matches[1] = player_names[1]
else:
name_id_matches = {}
match_info = parse_data(
players,
timeline,
stats,
metadata,
name_id_matches=name_id_matches,
identifiers=identifiers,
ignore_units=standard_ignore_units,
merge_units=standard_merge_units,
)
return match_info
# regex patterns for each event
event_patterns = {
'ASUS ROG': [
('event', 'ASUS ROG'),
('stage', 'Group Stage \\w{1}(?: *$)'),
('group', '(?<=Group )\\w{1}(?: *$)'),
],
'Blizzcon': [
('event', 'Blizzcon'),
('stage', '[r,R][o,O]\\d|Final|Semifinal'),
],
'Cheeseadelphia': [
('event', 'Cheeseadelphia'),
],
'HSC XX': [
('event', 'HSC XX'),
('group', '(?<=Group )\\w{1}(?: *$)'),
],
'Nation Wars': [
('event', 'Nation Wars'),
],
'QLASH Invitational': [
('event', 'QLASH Invitational'),
],
'WCS Fall': [
('event', 'WCS Fall'),
('stage', 'Knockout|Group Stage \\w{1}(?: *$)|[r,R][o,O]\\d|Final'),
('group', '(?<=Group )\\w{1}(?: *$)'),
],
}
# required for multiprocessing
if __name__ == '__main__':
path = Path().absolute() / 'replays'
match_info = []
for item in path.iterdir():
item_name = PurePath(item).name
# no player match for Nation Wars and Cheeseadelphia
if item_name == 'Nation Wars' or item_name == 'Cheeseadelphia':
match_info_list = recursive_parse(
sub_dir=item,
player_match=False,
data_function=parse_data,
identifier_rules=event_patterns[item_name],
multi=True,
)
else:
match_info_list = recursive_parse(
sub_dir=item,
data_function=parse_data,
identifier_rules=event_patterns[item_name],
multi=True,
)
results = []
# Pool multiprocessing for better throughput of parsing
with Pool(10) as p:
results = p.starmap(handle_replay, match_info_list)
list(map(match_info.extend, results))
# generates JSON file of raw data
with open('match_info.json', 'w', encoding='utf-8') as output:
json.dump({'match_info': match_info}, output)
csv_rows = [
'GameID',
'Map',
'Duration',
'PlayerName',
'IsWinner',
'Race',
'UnitName',
'BirthTime',
'DeathTime',
'MorphTime',
'Event',
'Stage',
'Group',
]
def transform(record):
"""
Transforms raw JSON data into CSV data
"""
# HSC XX needs identifiers to be re-ordered due to
# missing one in between the 2 it uses
if record['event'] == 'HSC XX':
result = list(value for value in record.values())[:10]
result.append(record['event'])
result.append('')
if 'group' in record:
result.append(record['group'])
return tuple(result)
return tuple(value for value in record.values())
json_to_csv(csv_rows, data_function=transform)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment