Last active
March 18, 2020 19:19
-
-
Save ZephyrBlu/7636b5d79fc60ba16210322b341d7d71 to your computer and use it in GitHub Desktop.
Script for SC2 Tournament Analysis
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import uuid | |
import json | |
from pathlib import Path, PurePath | |
from sc2_tournament_analysis import recursive_parse, json_to_csv | |
from multiprocessing import Pool | |
from fuzzywuzzy import fuzz | |
from zephyrus_sc2_parser import parse_replay | |
from sc2_tournament_analysis.defaults import ( | |
standard_ignore_units, standard_merge_units, standard_player_match | |
) | |
""" | |
data schema: | |
GameID: uuid | |
Map: str | |
Duration: float (minutes) | |
PlayerName: str | |
IsWinner: bool | |
Race: str ('Protoss', 'Terran', 'Zerg') | |
UnitName: str | |
BirthTime: float (minutes) | |
DeathTime: float (minutes) | |
""" | |
def parse_data(players, timeline, stats, metadata, **kwargs): | |
""" | |
Data function for each replay that is parsed | |
All replay data is passed into this function and can be manipulated at will | |
""" | |
# regex patterns for parsing dir names | |
name_id_matches = kwargs['name_id_matches'] | |
# event/stage identifiers | |
identifiers = kwargs['identifiers'] | |
# lists of units to ignore and units with multiple modes to merge | |
ignore_units = kwargs['ignore_units'] | |
merge_units = kwargs['merge_units'] | |
def check_winner(p_id): | |
if metadata['winner'] == p_id: | |
return True | |
return False | |
match_info = [] | |
current_merged_units = {} | |
game_id = str(uuid.uuid4()) | |
# search through each players units and parse unit data | |
for p_id, player in players.items(): | |
for obj in player.objects.values(): | |
if 'unit' not in obj.type or obj.name in ignore_units: | |
continue | |
# merge instances of different unit modes into one unit for analysis purposes | |
# we keep track of merged units until all units have been processed, then merge | |
if obj.name in merge_units.keys() or obj.name in merge_units.values(): | |
if obj.name in merge_units.keys(): | |
unit_name = merge_units[obj.name] | |
elif obj.name in merge_units.values(): | |
unit_name = obj.name | |
else: | |
unit_name = obj.name | |
if name_id_matches: | |
player_name = name_id_matches[p_id] | |
else: | |
player_name = player.name | |
current_unit_info = { | |
'game_id': game_id, | |
'map': metadata['map'], | |
'duration': round(metadata['game_length']/60, 2), | |
'player_name': player_name, | |
'is_winner': check_winner(p_id), | |
'race': players[p_id].race, | |
'unit_name': unit_name, | |
# 'produced': info['live'] + info['died'], | |
# 'killed': info['died'], | |
'birth_time': round(obj.birth_time/22.4/60, 2) if obj.birth_time else None, | |
'death_time': round(obj.death_time/22.4/60, 2) if obj.death_time else None, | |
'morph_time': round(obj.morph_time/22.4/60, 2) if obj.morph_time else None, | |
} | |
# sets event/stage identifiers as k:v pairs | |
# ex: { 'event': 'Cheeseadephia' } | |
for i in identifiers: | |
current_unit_info[i[0]] = i[1] | |
match_info.append(current_unit_info) | |
# add merged units to match info | |
for unit, info in current_merged_units.items(): | |
match_info.append(info) | |
return match_info | |
def handle_replay(path, player_names, identifiers): | |
""" | |
Handles assigning player names to in-game names before data processing | |
""" | |
players, timeline, stats, metadata = parse_replay( | |
path, local=True, detailed=True | |
) | |
player_match = standard_player_match | |
for name, value in identifiers: | |
if name == 'event' and (value == 'Nation Wars' or value == 'Cheeseadelphia'): | |
player_match = False | |
if player_match: | |
match_ratios = [] | |
for p_id, p in players.items(): | |
# partial_ratio fuzzy matches substrings instead of an exact match | |
current_match_ratio = fuzz.partial_ratio(p.name, player_names[0]) | |
match_ratios.append((p.player_id, p.name, current_match_ratio)) | |
name_match = max(match_ratios, key=lambda x: x[2]) | |
# linking matched names to in game names | |
name_id_matches = { | |
name_match[0]: player_names[0] | |
} | |
if name_match[0] == 1: | |
name_id_matches[2] = player_names[1] | |
else: | |
name_id_matches[1] = player_names[1] | |
else: | |
name_id_matches = {} | |
match_info = parse_data( | |
players, | |
timeline, | |
stats, | |
metadata, | |
name_id_matches=name_id_matches, | |
identifiers=identifiers, | |
ignore_units=standard_ignore_units, | |
merge_units=standard_merge_units, | |
) | |
return match_info | |
# regex patterns for each event | |
event_patterns = { | |
'ASUS ROG': [ | |
('event', 'ASUS ROG'), | |
('stage', 'Group Stage \\w{1}(?: *$)'), | |
('group', '(?<=Group )\\w{1}(?: *$)'), | |
], | |
'Blizzcon': [ | |
('event', 'Blizzcon'), | |
('stage', '[r,R][o,O]\\d|Final|Semifinal'), | |
], | |
'Cheeseadelphia': [ | |
('event', 'Cheeseadelphia'), | |
], | |
'HSC XX': [ | |
('event', 'HSC XX'), | |
('group', '(?<=Group )\\w{1}(?: *$)'), | |
], | |
'Nation Wars': [ | |
('event', 'Nation Wars'), | |
], | |
'QLASH Invitational': [ | |
('event', 'QLASH Invitational'), | |
], | |
'WCS Fall': [ | |
('event', 'WCS Fall'), | |
('stage', 'Knockout|Group Stage \\w{1}(?: *$)|[r,R][o,O]\\d|Final'), | |
('group', '(?<=Group )\\w{1}(?: *$)'), | |
], | |
} | |
# required for multiprocessing | |
if __name__ == '__main__': | |
path = Path().absolute() / 'replays' | |
match_info = [] | |
for item in path.iterdir(): | |
item_name = PurePath(item).name | |
# no player match for Nation Wars and Cheeseadelphia | |
if item_name == 'Nation Wars' or item_name == 'Cheeseadelphia': | |
match_info_list = recursive_parse( | |
sub_dir=item, | |
player_match=False, | |
data_function=parse_data, | |
identifier_rules=event_patterns[item_name], | |
multi=True, | |
) | |
else: | |
match_info_list = recursive_parse( | |
sub_dir=item, | |
data_function=parse_data, | |
identifier_rules=event_patterns[item_name], | |
multi=True, | |
) | |
results = [] | |
# Pool multiprocessing for better throughput of parsing | |
with Pool(10) as p: | |
results = p.starmap(handle_replay, match_info_list) | |
list(map(match_info.extend, results)) | |
# generates JSON file of raw data | |
with open('match_info.json', 'w', encoding='utf-8') as output: | |
json.dump({'match_info': match_info}, output) | |
csv_rows = [ | |
'GameID', | |
'Map', | |
'Duration', | |
'PlayerName', | |
'IsWinner', | |
'Race', | |
'UnitName', | |
'BirthTime', | |
'DeathTime', | |
'MorphTime', | |
'Event', | |
'Stage', | |
'Group', | |
] | |
def transform(record): | |
""" | |
Transforms raw JSON data into CSV data | |
""" | |
# HSC XX needs identifiers to be re-ordered due to | |
# missing one in between the 2 it uses | |
if record['event'] == 'HSC XX': | |
result = list(value for value in record.values())[:10] | |
result.append(record['event']) | |
result.append('') | |
if 'group' in record: | |
result.append(record['group']) | |
return tuple(result) | |
return tuple(value for value in record.values()) | |
json_to_csv(csv_rows, data_function=transform) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment