masak/balanced-bracket.p6

## balanced-bracket.p6
# Recursive definition: a balanced-bracket string is always empty or of the form
#
#     [S1]S2
#
# where S1 and S2 are balanced-bracket strings.
#
# Let n(S) be the number of bracket pairs in the balanced-bracket string S. And let
# C(n) be the number of balanced-bracket strings with exactly n bracket pairs.
# C(n) happens to be the Catalan sequence <http://oeis.org/A000108>:
#
# n     0, 1, 2, 3,  4,  5,   6,   7,    8,    9,    10
# C(n)  1, 1, 2, 5, 14, 42, 132, 429, 1430, 4862, 16796
#
# We'll need this sequence, so let's define it. oeis has it as (2n)!/(n!(n+1)!),
# which fits nicely with a caching, incremental solution. The factor between each
# step in the sequence is (2n-1)(2n)/(n(n+1)) = (4n-2)/(n+1).

sub infix:<cstep>($prev, $n) { $prev * (4*$n - 2) / ($n+1) }
constant C = [\cstep] 1, 1..*;

# A way to arrive at the C(n) sequence is to note that, by the definition of
# balanced-bracket strings above,
#
# C(0) = 1
# C(n) = ∑[k <- 0..n-1] C(n-k-1) C(k) when n > 0
#
# That is, the next C(n) in the sequence is had by doing a kind of "convolution
# sum" of all the previous values. Knowing not only C(n) but how it is
# constructed gives us enough information to order the strings using
# structural recursion. I.e. given a number i < n, we can find the ith string,
# without computing all the i-1 strings before it.

# In order to understand what the ith balanced-bracket string is, let's take
# n = 4 as an example. There are 14 such strings, and they can be ordered
# by increasing k = n(S2).
#
# k = 0: 5 x 1 combinations. [[[[]]]] [[[][]]] [[[]][]] [[][[]]] [[][][]]
# k = 1: 2 x 1 combinations. [[[]]][] [[][]][]
# k = 2: 1 x 2 combinations. [[]][[]] [[]][][]
# k = 3: 1 x 5 combinations. [][[[]]] [][[][]] [][[]][] [][][[]] [][][][]
#
# Different amounts of strings end up in different categories. This distribution
# is the main thing we have to compute. The rest is just recursing down
# structurally.

sub bb-string($n) {
    return "" unless $n;

    my $i = (^C[$n]).roll;
    for ^$n -> $n2 {
        my $n1 = $n - $n2 - 1;
        my $combinations = C[$n1] * C[$n2];
        my ($S1, $S2) = bb-string($n1), bb-string($n2)
            and return "[$S1]$S2"
            if $i < $combinations;
        $i -= $combinations;
    }
    die "never reached -- the cases in the for loop are exhaustive";
}

sub MAIN($n = 3) {
    say bb-string($n);
}
	# Recursive definition: a balanced-bracket string is always empty or of the form
	#
	# [S1]S2
	#
	# where S1 and S2 are balanced-bracket strings.
	#
	# Let n(S) be the number of bracket pairs in the balanced-bracket string S. And let
	# C(n) be the number of balanced-bracket strings with exactly n bracket pairs.
	# C(n) happens to be the Catalan sequence <http://oeis.org/A000108>:
	#
	# n 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
	# C(n) 1, 1, 2, 5, 14, 42, 132, 429, 1430, 4862, 16796
	#
	# We'll need this sequence, so let's define it. oeis has it as (2n)!/(n!(n+1)!),
	# which fits nicely with a caching, incremental solution. The factor between each
	# step in the sequence is (2n-1)(2n)/(n(n+1)) = (4n-2)/(n+1).

	sub infix:<cstep>($prev, $n) { $prev * (4*$n - 2) / ($n+1) }
	constant C = [\cstep] 1, 1..*;

	# A way to arrive at the C(n) sequence is to note that, by the definition of
	# balanced-bracket strings above,
	#
	# C(0) = 1
	# C(n) = ∑[k <- 0..n-1] C(n-k-1) C(k) when n > 0
	#
	# That is, the next C(n) in the sequence is had by doing a kind of "convolution
	# sum" of all the previous values. Knowing not only C(n) but how it is
	# constructed gives us enough information to order the strings using
	# structural recursion. I.e. given a number i < n, we can find the ith string,
	# without computing all the i-1 strings before it.

	# In order to understand what the ith balanced-bracket string is, let's take
	# n = 4 as an example. There are 14 such strings, and they can be ordered
	# by increasing k = n(S2).
	#
	# k = 0: 5 x 1 combinations. [[[[]]]] [[[][]]] [[[]][]] [[][[]]] [[][][]]
	# k = 1: 2 x 1 combinations. [[[]]][] [[][]][]
	# k = 2: 1 x 2 combinations. [[]][[]] [[]][][]
	# k = 3: 1 x 5 combinations. [][[[]]] [][[][]] [][[]][] [][][[]] [][][][]
	#
	# Different amounts of strings end up in different categories. This distribution
	# is the main thing we have to compute. The rest is just recursing down
	# structurally.

	sub bb-string($n) {
	return "" unless $n;

	my $i = (^C[$n]).roll;
	for ^$n -> $n2 {
	my $n1 = $n - $n2 - 1;
	my $combinations = C[$n1] * C[$n2];
	my ($S1, $S2) = bb-string($n1), bb-string($n2)
	and return "[$S1]$S2"
	if $i < $combinations;
	$i -= $combinations;
	}
	die "never reached -- the cases in the for loop are exhaustive";
	}

	sub MAIN($n = 3) {
	say bb-string($n);
	}