edran/check_rep.py

## check_rep.py
# This script tries as best as possible to filter out bad replays
#   Pass it a subdir, and it will read all '.rep' files, and spit out a list
#   of the corrupt files in stdout
from __future__ import print_function
from pyreplib import replay  # https://github.com/HearthSim/pyreplib/
from itertools import repeat
from multiprocessing import Pool, Process, Pipe
from multiprocessing.pool import ThreadPool
import os
import sys
import datetime
release = datetime.datetime(2008, 11, 25)  # release date of 1.16


def analyze(repname, conn):
    rep = replay.Replay(repname)
    if (rep.date < release or rep.engine_name.lower() != "broodwar"):
        conn.send(repname)
    conn.send(None)


def filterfiles(args):
    root, fname = args
    if '.rep' in fname and '.lock' not in fname:
        return os.path.join(root, fname)
    return None


pool = Pool()
flst = []
for root, dirs, files in os.walk(sys.argv[1]):
    flst += [f for f in pool.map(filterfiles, zip(repeat(root), files))
             if f is not None]


# analyze sometimes segfaults, so a Pool will break
# Instead, just start a new process for each replay
def tpfunc(repname):
    conn, send = Pipe()
    t = Process(target=analyze, args=(repname, send))
    t.start()
    t.join()
    if conn.poll(5):
        res = conn.recv()
        if res is not None:
            print(res)
    else:
        print(repname)


# Threadpool makes sure we don't accidentally forkbomb ourselves
tp = ThreadPool()
tp.map(tpfunc, flst)
	# This script tries as best as possible to filter out bad replays
	# Pass it a subdir, and it will read all '.rep' files, and spit out a list
	# of the corrupt files in stdout
	from __future__ import print_function
	from pyreplib import replay # https://github.com/HearthSim/pyreplib/
	from itertools import repeat
	from multiprocessing import Pool, Process, Pipe
	from multiprocessing.pool import ThreadPool
	import os
	import sys
	import datetime
	release = datetime.datetime(2008, 11, 25) # release date of 1.16


	def analyze(repname, conn):
	rep = replay.Replay(repname)
	if (rep.date < release or rep.engine_name.lower() != "broodwar"):
	conn.send(repname)
	conn.send(None)


	def filterfiles(args):
	root, fname = args
	if '.rep' in fname and '.lock' not in fname:
	return os.path.join(root, fname)
	return None


	pool = Pool()
	flst = []
	for root, dirs, files in os.walk(sys.argv[1]):
	flst += [f for f in pool.map(filterfiles, zip(repeat(root), files))
	if f is not None]


	# analyze sometimes segfaults, so a Pool will break
	# Instead, just start a new process for each replay
	def tpfunc(repname):
	conn, send = Pipe()
	t = Process(target=analyze, args=(repname, send))
	t.start()
	t.join()
	if conn.poll(5):
	res = conn.recv()
	if res is not None:
	print(res)
	else:
	print(repname)


	# Threadpool makes sure we don't accidentally forkbomb ourselves
	tp = ThreadPool()
	tp.map(tpfunc, flst)