neumond/bsynth.py

## bsynth.py
from math import floor
from collections import defaultdict
from itertools import count


class TooHighLevelException(Exception):
    pass


def make_number(p, level):
    """
    >>> make_number(5, 0)
    '+++++'
    >>> make_number(-5, 0)
    '-----'
    """
    # +++++ 5
    # >+++[<+++>-]<++  3*3+2
    # >>+++++[<++++[<+++>-]>-]<<+ 5*4*3+1
    sgn = '+' if p > 0 else '-'
    if p < 0:
        p = -p
    if level == 0:
        return sgn * p
    a = floor(p ** (1 / (level + 1)))
    if a <= 1:
        raise TooHighLevelException
    al = a ** level
    b = p // al
    if b <= 1:
        raise TooHighLevelException
    q = p % al
    res = ''
    for i in range(0, level, 1):
        res = '[<{}{}>-]'.format((sgn if i == 0 else '+') * a, res)
    res = '{}{}{}{}'.format('>' * level, '+' * b, res, '<' * level)
    if q > 0:
        res += sgn * q
    return res


def optimize(s):
    while True:
        s2 = s.replace('<>', '').replace('><', '').replace('+-', '').replace('-+', '')
        if s2 == s:
            break
        s = s2
    i = max(s.rfind(']'), s.rfind('.'), s.rfind(','))
    return s[:i+1]


def make_number_best(p):
    res = None
    try:
        for lvl in range(20):
            x = make_number(p, lvl)
            if res is None or len(x) < len(res):
                res = x
    except TooHighLevelException:
        pass
    return res


def make_char_dict(uniq):
    """
    Prepare char table, then output chars by
    selecting places in table.
    """
    code = []
    for b in uniq:
        code.append(make_number_best(b))
        code.append('>')
    return code


def split_numbers_by_gaps(nums, maxdiff):
    '''
    >>> split_numbers_by_gaps([1, 2, 8, 9, 15, 45, 46], 1)
    [[1, 2], [8, 9], [15], [45, 46]]
    >>> split_numbers_by_gaps([1, 2, 3, 4, 5], 1)
    [[1, 2, 3, 4, 5]]
    >>> split_numbers_by_gaps([1, 2, 4, 5], 1)
    [[1, 2], [4, 5]]
    >>> split_numbers_by_gaps([1, 2, 4, 5], 2)
    [[1, 2, 4, 5]]
    >>> split_numbers_by_gaps([1, 2, 5, 6], 2)
    [[1, 2], [5, 6]]
    '''
    res = []
    nums = sorted(nums)
    grp = [nums[0]]
    for n in nums[1:]:
        if n - grp[-1] > maxdiff:
            res.append(grp)
            grp = [n]
        else:
            grp.append(n)
    if grp:
        res.append(grp)
    return res


def center_split(grp, maxlen):
    '''
    >>> center_split([1, 2, 3, 4, 5], 3)
    [[1], [2, 3, 4], [5]]
    >>> center_split([1, 2], 3)
    [[1, 2]]
    '''
    if len(grp) <= maxlen:
        return [grp]
    whole_count = len(grp) // maxlen
    edge_left = (len(grp) - whole_count * maxlen) // 2
    edge_right = len(grp) - whole_count * maxlen - edge_left
    res = []
    if edge_left:
        res.append(grp[0:edge_left])
    for i in range(whole_count):
        base = edge_left + i * maxlen
        res.append(grp[base:base+maxlen])
    if edge_right:
        res.append(grp[-edge_right:])
    return res


def find_closest_bisect(needle, haystack):
    if needle <= haystack[0]:
        return haystack[0]
    if needle >= haystack[-1]:
        return haystack[-1]
    a, b = 0, len(haystack)
    while b - a > 1:
        c = (a + b) // 2
        if needle > haystack[c]:
            a = c
        else:
            b = c
    da, db = needle - haystack[a], haystack[b] - needle
    return haystack[a] if da < db else haystack[b]


def brainfuck_debug(code):
    data = [0]
    cur = 0
    ptr = 0
    output = []
    stack = []
    chunk = [0] * 10
    while ptr < len(code):
        ch = code[ptr]
        if ch == '+':
            data[cur] += 1
        elif ch == '-':
            data[cur] -= 1
        elif ch == '.':
            output.append(data[cur])
        elif ch == '[':
            stack.append(ptr + 1)
        elif ch == ']':
            if data[cur] != 0:
                ptr = stack[-1]
            else:
                ptr += 1
                stack.pop()
        elif ch == '>':
            cur += 1
            if cur >= len(data):
                data.extend(chunk)
        elif ch == '<':
            cur -= 1
            if cur < 0:
                data = chunk + data
                cur += len(chunk)
        if ch != ']':
            ptr += 1
        # print(data)
    # print(output)
    return output


class BaseBrainfuckGenerator:
    def __call__(self, line):
        raise NotImplementedError

    def run(self, line):
        return optimize(self(
            list(line.encode('utf-8'))
        ))

    @classmethod
    def run_and_check(cls, line):
        bfuck = cls().run(line)
        output = brainfuck_debug(bfuck)
        output = bytes(output).decode('utf-8')
        assert output == line
        return bfuck


class LinearBrainfuckGenerator(BaseBrainfuckGenerator):
    """
    Make difference code (for zero memory cell) sequentially for every char.
    # TODO: possible improvement: skip correcting zero cell if it's easier to make char from scratch.
    """
    def __call__(self, line):
        lst = 0
        code = []
        for b in line:
            diff = b - lst
            lst = b
            code.append(make_number_best(diff))
            code.append('.')
        return ''.join(code)


class DictedBrainfuckGenerator(BaseBrainfuckGenerator):
    "Simple approach with make_char_dict"
    def __call__(self, line):
        uniq = list(sorted(set(line)))
        code = make_char_dict(uniq)
        code.append('<' * len(uniq))
        for b in line:
            i = uniq.index(b)
            code.append('>' * i)
            code.append('.')
            code.append('<' * i)
        return ''.join(code)


class CombinedBrainfuckGenerator(BaseBrainfuckGenerator):
    def __call__(self, line):
        GAP_LEN = 2
        GAP2_LEN = GAP_LEN * 2 + 1
        # MAX_RATE = 2 * GAP_LEN * (GAP_LEN + 1)

        uniq = list(sorted(set(line)))

        # find adjacent groups of codes
        gaps = split_numbers_by_gaps(uniq, GAP_LEN)
        gaps = list(filter(lambda x: len(x) > 1, gaps))

        # split very long adjacent groups
        splitted_gaps = []
        for gap in gaps:
            splitted_gaps.extend(center_split(gap, GAP2_LEN))

        # add middle points of gaps to char registry
        key_nums = []
        for gap in splitted_gaps:
            key_nums.append(sum(gap) // len(gap))

        # mapping from char codes to gaps
        key_map = {}
        for n in key_nums:
            for i in range(n - GAP_LEN, n + GAP_LEN + 1):
                key_map[i] = find_closest_bisect(i, key_nums)

        code = make_char_dict(key_nums)
        state = {v: v for v in key_nums}

        lst = 0
        for b in line:
            if b in key_map:
                key = key_map[b]
                idx = key_nums.index(key)
                code.append('<' * len(key_nums))
                code.append('>' * idx)

                diff = b - state[key]
                state[key] = b
                code.append(make_number(diff, 0))
                code.append('.')

                code.append('<' * idx)
                code.append('>' * len(key_nums))
            else:
                diff = b - lst
                lst = b
                code.append(make_number_best(diff))
                code.append('.')
        return ''.join(code)


class LazyBrainfuckGenerator(BaseBrainfuckGenerator):
    def pick_action(self, code):
        best_dist, best_cell = None, None
        for cell_i, cell_v in enumerate(self.cell_states):
            d = abs(cell_v - code)
            if best_dist is None or d < best_dist:
                best_dist, best_cell = d, cell_i
        return best_cell

    def plan_actions(self, line):
        self.cell_states = [0]
        actions = []
        for keycode in line:
            cell_index = self.pick_action(keycode)
            diff = keycode - self.cell_states[cell_index]
            actions.append((cell_index, diff))
            self.cell_states[cell_index] = keycode

            if self.cell_states[-1] != 0:
                self.cell_states.append(0)
        del self.cell_states
        return actions

    def cell_levels(self, actions):
        result = defaultdict(lambda: 0)
        for cell_index, diff in actions:
            code = make_number_best(diff)
            level = code.count(']') + 1
            if level > result[cell_index]:
                result[cell_index] = level
        return result

    def cell_indices(self, cell_levels):
        result, ptr = {}, 0
        for cell_i in count():
            if cell_i not in cell_levels:
                break
            result[cell_i] = ptr
            ptr += cell_levels[cell_i]
        return result

    @staticmethod
    def make_shift(diff):
        sym = '>' if diff >= 0 else '<'
        return sym * abs(diff)

    def assemble_code(self, actions, cell_ptrs):
        result = []
        current_position = 0
        for cell_index, diff in actions:
            target_position = cell_ptrs[cell_index]
            result.append(self.make_shift(target_position - current_position))
            current_position = target_position
            result.append(make_number_best(diff))
            result.append('.')
        return result

    def __call__(self, line):
        actions = self.plan_actions(line)
        cell_levels = self.cell_levels(actions)
        cell_ptrs = self.cell_indices(cell_levels)
        return ''.join(self.assemble_code(actions, cell_ptrs))


TEST_LINES = [

]


def test_algorithms():
    winners = defaultdict(lambda: 0)
    for line in TEST_LINES:
        best_cls, best_len = None, None
        print(line[:10])
        for cls in (
            LinearBrainfuckGenerator,
            DictedBrainfuckGenerator,
            CombinedBrainfuckGenerator,
            LazyBrainfuckGenerator,
        ):
            l = len(cls.run_and_check(line))
            print('  {}: {}'.format(cls.__name__, l))
            if best_cls is None or l < best_len:
                best_cls, best_len = cls, l
        winners[best_cls] += 1
        # print(line[:10], best_cls)
    for cls, wins in sorted(winners.items(), key=lambda x: -x[1]):
        print(cls.__name__, wins)


test_algorithms()
# print(LazyBrainfuckGenerator().run('sooqa jopa'))
	from math import floor
	from collections import defaultdict
	from itertools import count


	class TooHighLevelException(Exception):
	pass


	def make_number(p, level):
	"""
	>>> make_number(5, 0)
	'+++++'
	>>> make_number(-5, 0)
	'-----'
	"""
	# +++++ 5
	# >+++[<+++>-]<++ 3*3+2
	# >>+++++[<++++[<+++>-]>-]<<+ 543+1
	sgn = '+' if p > 0 else '-'
	if p < 0:
	p = -p
	if level == 0:
	return sgn * p
	a = floor(p ** (1 / (level + 1)))
	if a <= 1:
	raise TooHighLevelException
	al = a ** level
	b = p // al
	if b <= 1:
	raise TooHighLevelException
	q = p % al
	res = ''
	for i in range(0, level, 1):
	res = '[<{}{}>-]'.format((sgn if i == 0 else '+') * a, res)
	res = '{}{}{}{}'.format('>' * level, '+' * b, res, '<' * level)
	if q > 0:
	res += sgn * q
	return res


	def optimize(s):
	while True:
	s2 = s.replace('<>', '').replace('><', '').replace('+-', '').replace('-+', '')
	if s2 == s:
	break
	s = s2
	i = max(s.rfind(']'), s.rfind('.'), s.rfind(','))
	return s[:i+1]


	def make_number_best(p):
	res = None
	try:
	for lvl in range(20):
	x = make_number(p, lvl)
	if res is None or len(x) < len(res):
	res = x
	except TooHighLevelException:
	pass
	return res


	def make_char_dict(uniq):
	"""
	Prepare char table, then output chars by
	selecting places in table.
	"""
	code = []
	for b in uniq:
	code.append(make_number_best(b))
	code.append('>')
	return code


	def split_numbers_by_gaps(nums, maxdiff):
	'''
	>>> split_numbers_by_gaps([1, 2, 8, 9, 15, 45, 46], 1)
	[[1, 2], [8, 9], [15], [45, 46]]
	>>> split_numbers_by_gaps([1, 2, 3, 4, 5], 1)
	[[1, 2, 3, 4, 5]]
	>>> split_numbers_by_gaps([1, 2, 4, 5], 1)
	[[1, 2], [4, 5]]
	>>> split_numbers_by_gaps([1, 2, 4, 5], 2)
	[[1, 2, 4, 5]]
	>>> split_numbers_by_gaps([1, 2, 5, 6], 2)
	[[1, 2], [5, 6]]
	'''
	res = []
	nums = sorted(nums)
	grp = [nums[0]]
	for n in nums[1:]:
	if n - grp[-1] > maxdiff:
	res.append(grp)
	grp = [n]
	else:
	grp.append(n)
	if grp:
	res.append(grp)
	return res


	def center_split(grp, maxlen):
	'''
	>>> center_split([1, 2, 3, 4, 5], 3)
	[[1], [2, 3, 4], [5]]
	>>> center_split([1, 2], 3)
	[[1, 2]]
	'''
	if len(grp) <= maxlen:
	return [grp]
	whole_count = len(grp) // maxlen
	edge_left = (len(grp) - whole_count * maxlen) // 2
	edge_right = len(grp) - whole_count * maxlen - edge_left
	res = []
	if edge_left:
	res.append(grp[0:edge_left])
	for i in range(whole_count):
	base = edge_left + i * maxlen
	res.append(grp[base:base+maxlen])
	if edge_right:
	res.append(grp[-edge_right:])
	return res


	def find_closest_bisect(needle, haystack):
	if needle <= haystack[0]:
	return haystack[0]
	if needle >= haystack[-1]:
	return haystack[-1]
	a, b = 0, len(haystack)
	while b - a > 1:
	c = (a + b) // 2
	if needle > haystack[c]:
	a = c
	else:
	b = c
	da, db = needle - haystack[a], haystack[b] - needle
	return haystack[a] if da < db else haystack[b]


	def brainfuck_debug(code):
	data = [0]
	cur = 0
	ptr = 0
	output = []
	stack = []
	chunk = [0] * 10
	while ptr < len(code):
	ch = code[ptr]
	if ch == '+':
	data[cur] += 1
	elif ch == '-':
	data[cur] -= 1
	elif ch == '.':
	output.append(data[cur])
	elif ch == '[':
	stack.append(ptr + 1)
	elif ch == ']':
	if data[cur] != 0:
	ptr = stack[-1]
	else:
	ptr += 1
	stack.pop()
	elif ch == '>':
	cur += 1
	if cur >= len(data):
	data.extend(chunk)
	elif ch == '<':
	cur -= 1
	if cur < 0:
	data = chunk + data
	cur += len(chunk)
	if ch != ']':
	ptr += 1
	# print(data)
	# print(output)
	return output


	class BaseBrainfuckGenerator:
	def __call__(self, line):
	raise NotImplementedError

	def run(self, line):
	return optimize(self(
	list(line.encode('utf-8'))
	))

	@classmethod
	def run_and_check(cls, line):
	bfuck = cls().run(line)
	output = brainfuck_debug(bfuck)
	output = bytes(output).decode('utf-8')
	assert output == line
	return bfuck


	class LinearBrainfuckGenerator(BaseBrainfuckGenerator):
	"""
	Make difference code (for zero memory cell) sequentially for every char.
	# TODO: possible improvement: skip correcting zero cell if it's easier to make char from scratch.
	"""
	def __call__(self, line):
	lst = 0
	code = []
	for b in line:
	diff = b - lst
	lst = b
	code.append(make_number_best(diff))
	code.append('.')
	return ''.join(code)


	class DictedBrainfuckGenerator(BaseBrainfuckGenerator):
	"Simple approach with make_char_dict"
	def __call__(self, line):
	uniq = list(sorted(set(line)))
	code = make_char_dict(uniq)
	code.append('<' * len(uniq))
	for b in line:
	i = uniq.index(b)
	code.append('>' * i)
	code.append('.')
	code.append('<' * i)
	return ''.join(code)


	class CombinedBrainfuckGenerator(BaseBrainfuckGenerator):
	def __call__(self, line):
	GAP_LEN = 2
	GAP2_LEN = GAP_LEN * 2 + 1
	# MAX_RATE = 2 * GAP_LEN * (GAP_LEN + 1)

	uniq = list(sorted(set(line)))

	# find adjacent groups of codes
	gaps = split_numbers_by_gaps(uniq, GAP_LEN)
	gaps = list(filter(lambda x: len(x) > 1, gaps))

	# split very long adjacent groups
	splitted_gaps = []
	for gap in gaps:
	splitted_gaps.extend(center_split(gap, GAP2_LEN))

	# add middle points of gaps to char registry
	key_nums = []
	for gap in splitted_gaps:
	key_nums.append(sum(gap) // len(gap))

	# mapping from char codes to gaps
	key_map = {}
	for n in key_nums:
	for i in range(n - GAP_LEN, n + GAP_LEN + 1):
	key_map[i] = find_closest_bisect(i, key_nums)

	code = make_char_dict(key_nums)
	state = {v: v for v in key_nums}

	lst = 0
	for b in line:
	if b in key_map:
	key = key_map[b]
	idx = key_nums.index(key)
	code.append('<' * len(key_nums))
	code.append('>' * idx)

	diff = b - state[key]
	state[key] = b
	code.append(make_number(diff, 0))
	code.append('.')

	code.append('<' * idx)
	code.append('>' * len(key_nums))
	else:
	diff = b - lst
	lst = b
	code.append(make_number_best(diff))
	code.append('.')
	return ''.join(code)


	class LazyBrainfuckGenerator(BaseBrainfuckGenerator):
	def pick_action(self, code):
	best_dist, best_cell = None, None
	for cell_i, cell_v in enumerate(self.cell_states):
	d = abs(cell_v - code)
	if best_dist is None or d < best_dist:
	best_dist, best_cell = d, cell_i
	return best_cell

	def plan_actions(self, line):
	self.cell_states = [0]
	actions = []
	for keycode in line:
	cell_index = self.pick_action(keycode)
	diff = keycode - self.cell_states[cell_index]
	actions.append((cell_index, diff))
	self.cell_states[cell_index] = keycode

	if self.cell_states[-1] != 0:
	self.cell_states.append(0)
	del self.cell_states
	return actions

	def cell_levels(self, actions):
	result = defaultdict(lambda: 0)
	for cell_index, diff in actions:
	code = make_number_best(diff)
	level = code.count(']') + 1
	if level > result[cell_index]:
	result[cell_index] = level
	return result

	def cell_indices(self, cell_levels):
	result, ptr = {}, 0
	for cell_i in count():
	if cell_i not in cell_levels:
	break
	result[cell_i] = ptr
	ptr += cell_levels[cell_i]
	return result

	@staticmethod
	def make_shift(diff):
	sym = '>' if diff >= 0 else '<'
	return sym * abs(diff)

	def assemble_code(self, actions, cell_ptrs):
	result = []
	current_position = 0
	for cell_index, diff in actions:
	target_position = cell_ptrs[cell_index]
	result.append(self.make_shift(target_position - current_position))
	current_position = target_position
	result.append(make_number_best(diff))
	result.append('.')
	return result

	def __call__(self, line):
	actions = self.plan_actions(line)
	cell_levels = self.cell_levels(actions)
	cell_ptrs = self.cell_indices(cell_levels)
	return ''.join(self.assemble_code(actions, cell_ptrs))


	TEST_LINES = [

	]


	def test_algorithms():
	winners = defaultdict(lambda: 0)
	for line in TEST_LINES:
	best_cls, best_len = None, None
	print(line[:10])
	for cls in (
	LinearBrainfuckGenerator,
	DictedBrainfuckGenerator,
	CombinedBrainfuckGenerator,
	LazyBrainfuckGenerator,
	):
	l = len(cls.run_and_check(line))
	print(' {}: {}'.format(cls.__name__, l))
	if best_cls is None or l < best_len:
	best_cls, best_len = cls, l
	winners[best_cls] += 1
	# print(line[:10], best_cls)
	for cls, wins in sorted(winners.items(), key=lambda x: -x[1]):
	print(cls.__name__, wins)


	test_algorithms()
	# print(LazyBrainfuckGenerator().run('sooqa jopa'))