Skip to content

Instantly share code, notes, and snippets.

@edran
Forked from ebetica/check_rep.py
Created February 16, 2017 18:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save edran/4543182608ba3a6e3feb871f90bb1be7 to your computer and use it in GitHub Desktop.
Save edran/4543182608ba3a6e3feb871f90bb1be7 to your computer and use it in GitHub Desktop.
Runs through a directory of starcraft replays and outputs all the corrupt ones
# This script tries as best as possible to filter out bad replays
# Pass it a subdir, and it will read all '.rep' files, and spit out a list
# of the corrupt files in stdout
from __future__ import print_function
from pyreplib import replay # https://github.com/HearthSim/pyreplib/
from itertools import repeat
from multiprocessing import Pool, Process, Pipe
from multiprocessing.pool import ThreadPool
import os
import sys
import datetime
release = datetime.datetime(2008, 11, 25) # release date of 1.16
def analyze(repname, conn):
rep = replay.Replay(repname)
if (rep.date < release or rep.engine_name.lower() != "broodwar"):
conn.send(repname)
conn.send(None)
def filterfiles(args):
root, fname = args
if '.rep' in fname and '.lock' not in fname:
return os.path.join(root, fname)
return None
pool = Pool()
flst = []
for root, dirs, files in os.walk(sys.argv[1]):
flst += [f for f in pool.map(filterfiles, zip(repeat(root), files))
if f is not None]
# analyze sometimes segfaults, so a Pool will break
# Instead, just start a new process for each replay
def tpfunc(repname):
conn, send = Pipe()
t = Process(target=analyze, args=(repname, send))
t.start()
t.join()
if conn.poll(5):
res = conn.recv()
if res is not None:
print(res)
else:
print(repname)
# Threadpool makes sure we don't accidentally forkbomb ourselves
tp = ThreadPool()
tp.map(tpfunc, flst)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment