seansummers/solution2tsv.py

## solution2tsv.py
#! /usr/bin/env python
from __future__ import print_function
import re
import hashlib
import collections

buffering = 2**29
answer_re = r'^(?P<result>[^,]+),(?P<cpu_time>[\d\.]+),(?P<core>.+)'
core_line_re = r'(?P<facility_id>\d+):(?P<channels>(?:\d+[,;])+)'

signer = hashlib.sha256()
answer_rx = re.compile(answer_re)
core_line_rx = re.compile(core_line_re)
Answer = collections.namedtuple('Answer',  ['result', 'cpu_time', 'core'])

def core_value(core):
	return '{};'.format(';'.join(core))

def core_id(core):
	id = signer.copy()
	discriminator = '{}@{}'.format(len(core), core_value(core))
	id.update(discriminator.encode('utf-8'))
	return id.hexdigest()

class sat_answer(Answer):
	__slots__ = ()
	@property
	def id(self):
		return core_id(self.core)

def channels(channel_line):
	unique_channels = {int(__) for __ in channel_line.strip(' ,;').split(',')}
	sorted_channels = sorted(unique_channels)
	return ','.join(str(__) for __ in sorted_channels)

def candidate(core_line):
	core_prospects = core_line_rx.finditer(core_line)
	unique_prospects = {int(__.group('facility_id')): channels(__.group('channels')) for __ in core_prospects}
	sorted_prospects = (':'.join((str(__), unique_prospects[__])) for __ in sorted(unique_prospects))
	return tuple(sorted_prospects)

def answerfile(filename, buffering = None):
	with open(filename, buffering=buffering) as f:
		for line in f:
			core = sat_answer._make(answer_rx.match(line).groups())
			yield core._replace(core=candidate(core.core))

if __name__ == '__main__':
	results = answerfile('../data/sat.clean', buffering=buffering)
	with open('core_view.psv', 'w', buffering=buffering) as core_psv, \
		open('answer_view.psv', 'w', buffering=buffering) as answer_psv, \
		open('propsect_view.psv', 'w', buffering=buffering) as prospect_psv, \
		open('candidate_view.psv', 'w', buffering=buffering) as candidate_psv:
		for answer in results:
			core_hash = answer.id
			print('|'.join((core_value(answer.core), core_hash)), file=core_psv)
			print('|'.join((core_hash, answer.result, answer.cpu_time)), file=answer_psv)
			for prospect in answer.core:
				print('|'.join((core_hash, prospect)), file=candidate_psv)
				print(prospect, file=prospect_psv)
	#! /usr/bin/env python
	from __future__ import print_function
	import re
	import hashlib
	import collections

	buffering = 2**29
	answer_re = r'^(?P<result>[^,]+),(?P<cpu_time>[\d\.]+),(?P<core>.+)'
	core_line_re = r'(?P<facility_id>\d+):(?P<channels>(?:\d+[,;])+)'

	signer = hashlib.sha256()
	answer_rx = re.compile(answer_re)
	core_line_rx = re.compile(core_line_re)
	Answer = collections.namedtuple('Answer', ['result', 'cpu_time', 'core'])

	def core_value(core):
	return '{};'.format(';'.join(core))

	def core_id(core):
	id = signer.copy()
	discriminator = '{}@{}'.format(len(core), core_value(core))
	id.update(discriminator.encode('utf-8'))
	return id.hexdigest()

	class sat_answer(Answer):
	__slots__ = ()
	@property
	def id(self):
	return core_id(self.core)

	def channels(channel_line):
	unique_channels = {int(__) for __ in channel_line.strip(' ,;').split(',')}
	sorted_channels = sorted(unique_channels)
	return ','.join(str(__) for __ in sorted_channels)

	def candidate(core_line):
	core_prospects = core_line_rx.finditer(core_line)
	unique_prospects = {int(__.group('facility_id')): channels(__.group('channels')) for __ in core_prospects}
	sorted_prospects = (':'.join((str(__), unique_prospects[__])) for __ in sorted(unique_prospects))
	return tuple(sorted_prospects)

	def answerfile(filename, buffering = None):
	with open(filename, buffering=buffering) as f:
	for line in f:
	core = sat_answer._make(answer_rx.match(line).groups())
	yield core._replace(core=candidate(core.core))

	if __name__ == '__main__':
	results = answerfile('../data/sat.clean', buffering=buffering)
	with open('core_view.psv', 'w', buffering=buffering) as core_psv, \
	open('answer_view.psv', 'w', buffering=buffering) as answer_psv, \
	open('propsect_view.psv', 'w', buffering=buffering) as prospect_psv, \
	open('candidate_view.psv', 'w', buffering=buffering) as candidate_psv:
	for answer in results:
	core_hash = answer.id
	print('\|'.join((core_value(answer.core), core_hash)), file=core_psv)
	print('\|'.join((core_hash, answer.result, answer.cpu_time)), file=answer_psv)
	for prospect in answer.core:
	print('\|'.join((core_hash, prospect)), file=candidate_psv)
	print(prospect, file=prospect_psv)