gcapell/A.py

## A.py
"""
""   Solution: Runs (Google Code Jam 2011 World Finals Problem A)
""   by: aq@okaq.com
""   run: python A.py infile outfile
"""

import sys
import time
import collections

MOD = 1000003

def bc(n, k):
	"""Binomial coefficient"""
	r = 1
	for i in range(1, k+1):
		r = r * (n-i+1) / i
	return r

bcmd = {}
def bcm(*args):
	"""binomial coefficient (memoized)"""
	try:
		return bcmd[args]
	except KeyError:
		bc0 = bc(*args)
		bcmd[args] = bc0
		return bc0

def count_transitions(existing_letters, existing_runs, add_letters, final_runs):
	"""
	Given existing_letters (count) arranged in existing_runs (count),
	in how many ways can we add add_letters to form final_runs?
	"""
	if existing_runs == 0:
		return 1 if final_runs ==1 else 0

	# We can add letters at boundaries (adds one run)
	# or within existing runs (adds two runs).
	return sum (
		(
			# How many ways can we insert at a boundary?
			bcm(existing_runs + 1, at_boundary) *

			# How many ways can we insert within an existing run?
			bcm(existing_letters - existing_runs, in_run) *

			# How many ways can we arrange the letters we are adding
			# within our (at_boundary+in_run) insertion points?
			bcm(add_letters - 1, at_boundary + in_run - 1)
		) for in_run, at_boundary in gen_insertions(final_runs - existing_runs)
	)

def gen_insertions(delta):
	for in_run in range(delta/2 + 1):
		yield in_run, delta - 2 * in_run

def solve(freq, runs_goal):
	"""
	Dynamic programming algo that runs iteratively up to original number of runs.
	Complicated, but fast
	"""
	runs_count = [1] + [0] * (runs_goal-1)
	existing_letters = 0
	for add_letters in freq:
		runs_count_prev = list(runs_count)
		runs_count = [0]

		for r in range(1, runs_goal+1):
			runs_count.append(sum(
				count_transitions(existing_letters, r0, add_letters, r) * runs_count_prev[r0]
				for r0 in range(r)
			) % MOD)
		existing_letters += add_letters
	return runs_count[-1]

def get_words(filename):
	"""Yield (testnum, word) pairs read from input filename"""
	with open(filename) as fp:
		ntests = int(fp.readline())
		for testnum in range(1, ntests+1):
			yield testnum, fp.readline().strip()

def analyse(word):
	"""
	Given word return (freq, runs) pair
	where freq is list of counts of letters,
	runs is number of runs in word.
	"""
	prev = None
	runs = 0
	hist = collections.defaultdict(int)
	for c in word:
		hist[c] += 1
		if c != prev:
			runs += 1
			prev = c
	return hist.values(), runs

class Timer:
	"""Simple split timer"""
	def __init__(self):
		self.start = time.time()
	def split_time(self):
		return "%.3f" % (time.time() - self.start)

def main():
	t = Timer()
	with open(sys.argv[2], 'w') as fout:
		for testnum, word in get_words(sys.argv[1]):
			freq, runs = analyse(word)
			s0 = solve(freq, runs)
			fout.write("Case #%d: %d\n" % (testnum, s0))
			print "split time for case #%d (%d): %s" % (testnum, s0, t.split_time())
	print "total time: %s" % (t.split_time())

if __name__ == '__main__':
	main()
	"""
	"" Solution: Runs (Google Code Jam 2011 World Finals Problem A)
	"" by: aq@okaq.com
	"" run: python A.py infile outfile
	"""

	import sys
	import time
	import collections

	MOD = 1000003

	def bc(n, k):
	"""Binomial coefficient"""
	r = 1
	for i in range(1, k+1):
	r = r * (n-i+1) / i
	return r

	bcmd = {}
	def bcm(*args):
	"""binomial coefficient (memoized)"""
	try:
	return bcmd[args]
	except KeyError:
	bc0 = bc(*args)
	bcmd[args] = bc0
	return bc0

	def count_transitions(existing_letters, existing_runs, add_letters, final_runs):
	"""
	Given existing_letters (count) arranged in existing_runs (count),
	in how many ways can we add add_letters to form final_runs?
	"""
	if existing_runs == 0:
	return 1 if final_runs ==1 else 0

	# We can add letters at boundaries (adds one run)
	# or within existing runs (adds two runs).
	return sum (
	(
	# How many ways can we insert at a boundary?
	bcm(existing_runs + 1, at_boundary) *

	# How many ways can we insert within an existing run?
	bcm(existing_letters - existing_runs, in_run) *

	# How many ways can we arrange the letters we are adding
	# within our (at_boundary+in_run) insertion points?
	bcm(add_letters - 1, at_boundary + in_run - 1)
	) for in_run, at_boundary in gen_insertions(final_runs - existing_runs)
	)

	def gen_insertions(delta):
	for in_run in range(delta/2 + 1):
	yield in_run, delta - 2 * in_run

	def solve(freq, runs_goal):
	"""
	Dynamic programming algo that runs iteratively up to original number of runs.
	Complicated, but fast
	"""
	runs_count = [1] + [0] * (runs_goal-1)
	existing_letters = 0
	for add_letters in freq:
	runs_count_prev = list(runs_count)
	runs_count = [0]

	for r in range(1, runs_goal+1):
	runs_count.append(sum(
	count_transitions(existing_letters, r0, add_letters, r) * runs_count_prev[r0]
	for r0 in range(r)
	) % MOD)
	existing_letters += add_letters
	return runs_count[-1]

	def get_words(filename):
	"""Yield (testnum, word) pairs read from input filename"""
	with open(filename) as fp:
	ntests = int(fp.readline())
	for testnum in range(1, ntests+1):
	yield testnum, fp.readline().strip()

	def analyse(word):
	"""
	Given word return (freq, runs) pair
	where freq is list of counts of letters,
	runs is number of runs in word.
	"""
	prev = None
	runs = 0
	hist = collections.defaultdict(int)
	for c in word:
	hist[c] += 1
	if c != prev:
	runs += 1
	prev = c
	return hist.values(), runs

	class Timer:
	"""Simple split timer"""
	def __init__(self):
	self.start = time.time()
	def split_time(self):
	return "%.3f" % (time.time() - self.start)

	def main():
	t = Timer()
	with open(sys.argv[2], 'w') as fout:
	for testnum, word in get_words(sys.argv[1]):
	freq, runs = analyse(word)
	s0 = solve(freq, runs)
	fout.write("Case #%d: %d\n" % (testnum, s0))
	print "split time for case #%d (%d): %s" % (testnum, s0, t.split_time())
	print "total time: %s" % (t.split_time())

	if __name__ == '__main__':
	main()