Skip to content

Instantly share code, notes, and snippets.

@redacted
Created April 5, 2012 15:27
Show Gist options
  • Save redacted/2311951 to your computer and use it in GitHub Desktop.
Save redacted/2311951 to your computer and use it in GitHub Desktop.
Checks balance of StarCraft 2 races based on sc2-replays.net results
import urllib2
from collections import defaultdict, namedtuple
import BeautifulSoup
import sys
import time
import socket
Factions = ["protoss", "zerg", "terran"]
Match = namedtuple("Match", ['factions', 'winner', 'map', 'duration'])
def remove_non_ascii(s):
## some names have non-ascii characters
## interferes with pattern matching, and we can discard
return "".join(i for i in s if ord(i)<128)
def get_player_factions(response, players):
## need to get the factions as a player-keyed dict to
## determine winner and loser
playerFactions = {}
for line in response.split("\n"):
line = line
for f in Factions:
if f in line and "rep_oppo" in line:
faction_line = line
player_factions_raw = faction_line.split("<img src=")[1:]
for pf in player_factions_raw:
for p in players:
if p in pf:
player = p
for f in Factions:
if f in pf: faction = f
playerFactions[player] = faction
return playerFactions
def get_match_details(soup):
## Get the players and map
matchup = soup.find('title').contents[0].split(":")[-1].strip()
players_raw, map_raw = matchup.split("(")
players_raw = players_raw.split()
map_raw = map_raw.split(',')
map_name = map_raw[0].replace(" ", "")
if len(map_name) == 0:
map_name = "none"
return [players_raw[0], players_raw[-1]], map_name
def get_match_duration(response):
## get the match length (are factions stronger for different lengths?)
for l in response.split("\n"):
if "length:" in l:
t = l.split("</div>")[-1].replace("<br/>","").strip()
t = t.split()
if len(t) == 2:
# seconds only
return int(t[0])
elif len(t) == 4:
# minutes
return int(t[0])*60 + int(t[2])
elif len(t) == 6:
# hours!
return int(t[0])*3600 + int(t[2])*60 + int(t[4])
def get_winner(soup):
## find out who won
return soup.find('div', {'id':'winner'}).find('span').contents[0]
def build_match_url_list(n_requested):
## returns a list of n_requested match URLs
## walks the "all match" page as needed to get enough results
base_url = "http://www.sc2-replays.net/en/replays/&sort=time&time=&rel=0"
page_increase = "&page=" # start at &page=2
matches = get_matches(base_url)
page_idx = 2
try:
while(len(matches) < n_requested):
print "Got", len(matches), "matches"
new_url = base_url + page_increase + str(page_idx)
matches.extend(get_matches(new_url))
page_idx += 1
time.sleep(0.2)
except socket.error:
pass
print "Got", len(matches), "matches (final)"
return matches
def get_matches(url):
## extract the match result URLs present on a page
## slightly hackish, works well though
page_matches = set()
page1 = urllib2.urlopen(url).read()
for line in page1.split("\n"):
line = line.split('"')
for chunk in line:
if "www.sc2-replays.net/en/replays/" in chunk and "-vs-" in chunk:
page_matches.add(chunk)
return list(page_matches)
def get_match_results(url):
## given a match URL, requests, parses
## returns a Match namedtuple
r = remove_non_ascii(urllib2.urlopen(url).read()).lower()
m = BeautifulSoup.BeautifulSoup(r)
players, mapname = get_match_details(m)
player_factions = get_player_factions(r, players)
win_faction = player_factions[get_winner(m)]
if len(set(player_factions.values())) > 1:
factions = "-".join(sorted(player_factions.values()))
Match = namedtuple("Match", ['factions', 'winner', 'map', 'duration'])
mr = Match(factions, win_faction, mapname, get_match_duration(r))
return mr
def calculate_results(n):
## the main logic of the program
## builds a list of matches, works out who won
## returns a dict, keyed by factions playing and with value = list of winners
matches = build_match_url_list(n)
match_results = []
for idx,m in enumerate(matches):
try:
print "({0}/{1} Working on {2}".format(idx+1, len(matches), m)
mr = get_match_results(m)
if mr: match_results.append(mr)
except UnboundLocalError:
# unicode names cause issues FIXME
pass
return match_results
def wins_by_faction(mr):
## given a list of Match namedtuples, returns the win percentages
## for each faction in each matchup
results = defaultdict(list)
for m in mr:
results[m.factions].append(m.winner)
print "\t Wins by faction \t"
for k in results:
win_list = results[k]
print k, [(w,
win_list.count(w),
round(win_list.count(w)/float(len(win_list)), 3)) for w in set(win_list)]
print "\n"
def wins_by_time(mr):
## given a list of Match namedtuples, returns the win percentages
## for each faction depending on length of match
results = defaultdict(list)
for m in mr:
results[round(m.duration, -2)].append(m.winner)
print "\t Wins by time \t"
for k in sorted(results):
win_list = results[k]
print k, [(w,
win_list.count(w),
round(win_list.count(w)/float(len(win_list)), 3)) for w in set(win_list)]
print "\n"
def save_to_file(path, mr_list):
""" save matches to a text file for later analysis """
with open(path, "w") as dmp:
for mr in mr_list:
dmp.write("{0} {1} {2} {3}\n".format(mr.factions, mr.winner, mr.map, mr.duration))
def load_saved_file(path):
""" given a path to a previously saved match list, load it """
s = []
with open(path) as fin:
for l in fin:
l = l.split()
s.append(Match(l[0], l[1], l[2], int(l[3])))
return s
if __name__ == '__main__':
try:
n_r = int(sys.argv[1])
except:
n_r = 25
print "\t Getting {0} matches...".format(n_r)
match_results = calculate_results(n=n_r)
wins_by_faction(match_results)
wins_by_time(match_results)
save_to_file("matches.txt", match_results)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment