kulp/tree_macro.pl

## tree_macro.pl
#!/usr/bin/env perl
use strict;
use List::Util qw(max);

my $name = "b";

# for each significant bit position, where the sequence through the bit
# positions is chosen such that each bit chosen divides the remaining
# search space as nearly evenly as possible, construct an array that
# looks like this:
#
# [ B, [ L, R ] ]
#
# where     B is the number of the current bitposition
#           L is either a number or another array like this one
#           R is defined similarly to but independently from L
#
# where L and R are constructed recursively. L is the "left" side of the
# resulting tree structure from this node downward, and R is the "right"
# side. The left side is the side that corresponds to bit position B in
# the input string being zero; the right side corresponds to a one.
#
# For example, consider as input the list of integers (1, 5, 6, 9). These
# numbers expressed in binary are
#
#   3210 (bit position)
#   ____
#   0001
#   0101
#   0110
#   1001
#
# The bit-position that causes the most even split is position 2. The first
# (unelaborated) level of the structure would look like this :
#
# [ 2, [ L, R ] ]
#
# Taking out position 2 leaves us with two lists :
#
#   310 (bit position)
#   ___
#   001 (from 1)
#   101 (from 9)
#   ---
#   001 (from 5)
#   010 (from 6)
#
# Now from the first list, we can divide into two lists based on bit position 3.
# After we do so, there is no further differentiation (all remaining values are
# equal in other bit positions), so the recursion ends. A similar operation
# occurs for the second list, but in bit position 1. The structure looks like
# this :
#
# [ 2, [
#     [ 3, [ 1, 9 ] ]
#     [ 1, [ 5, 6 ] ]
# ] ]
#
# The resulting tree structure can now be converted into a set of nested
# ternary expressions over the bits of an input number. In this case, the
# ternary expression would be (x & 4 ? x & 1 ? 3 : 2 : x & 8 ? 1 : 0). The end
# result is that the input values are hashed without collision into the range
# [0, N) where N is the number of input values, without holes. This is
# particular useful for creating sparse maps using simple arrays in C.

# macro() expresses the name of a hash output
#sub macro { "INDEX($_[0])" }    # wrapped in a macro
sub macro { shift }             # bare

# indexer() expresses access to a particular element of $name
#sub indexer { "$name\[$_[0]\]" }        # an array indexer
#sub indexer { "($name) & 1 << $_[0]" }    # a bitmask indexer
# a clever bitmask indexer that seeks to minimize its length
sub indexer {
    my $expr = "1 << $_[0]";
    my $len  = length $expr;
    my $val  = eval $expr;
    "$name & " . ($len < length $val ? $expr : $val)
}

# Takes a list of input numbers; returns a structure as described above
sub build_structure {
    my ($i, $bits, @nums) = @_;

    my @masked = map {
        my $mask = 1 << $_;
        [ [ grep { !($mask & $_) } @nums ], [ grep { ($mask & $_) } @nums ] ]
    } 0 .. $bits - 1;

    my @spread = map {
        my $mask = 1 << $_;
        [ $_ => [ (scalar @{ $masked[$_][0] }), (scalar @{ $masked[$_][1] }) ] ]
    } 0 .. $bits - 1;

    my @interesting = sort {
        abs($a->[1][0] - $a->[1][1]) - abs($b->[1][0] - $b->[1][1])
    } @spread;
    my ($bitpos, $split) = @{ $interesting[0] };
    my ($left, $right);

    ($i, $left ) = $split->[0] > 1
        ? build_structure($i, $bits, @{ $masked[$bitpos][0] })
        : ($i + 1, macro($i));
    ($i, $right) = $split->[1] > 1
        ? build_structure($i, $bits, @{ $masked[$bitpos][1] })
        : ($i + 1, macro($i));

    my $outer = [ $bitpos, $left, $right ];
    return ($i => $outer);
}

# Takes a structure built by build_structure(); returns a ternary
# expression as a string, using $name
sub build_expr {
    my ($struct) = @_;
    my ($bitpos, $left, $right) = @$struct;
    my $leftstr  = ref $left  ? build_expr($left ) : $left;
    my $rightstr = ref $right ? build_expr($right) : $right;
    # "right" and "left" are swapped here because truth comes before
    # falsehood in a ternary expression, but afterward in the original
    # structure layout
    return indexer($bitpos) . " ? $rightstr : $leftstr";
}

sub get_expr {
    my (@lines) = @_;
    my $bits = int(1 + log(max @lines) / log(2));
    my ($index, $struct) = build_structure(0, $bits, @lines);
    (my $string = build_expr($struct)) =~ s/\s+//g;
    return $string;
}

# If called as a script, print out the expression generated from the lines of
# standard input. Otherwise, don't do anything until called.
if (!caller) {
    my @lines = map { chomp; $_ } <>;
    print get_expr(@lines), "\n";
}

1;
	#!/usr/bin/env perl
	use strict;
	use List::Util qw(max);

	my $name = "b";

	# for each significant bit position, where the sequence through the bit
	# positions is chosen such that each bit chosen divides the remaining
	# search space as nearly evenly as possible, construct an array that
	# looks like this:
	#
	# [ B, [ L, R ] ]
	#
	# where B is the number of the current bitposition
	# L is either a number or another array like this one
	# R is defined similarly to but independently from L
	#
	# where L and R are constructed recursively. L is the "left" side of the
	# resulting tree structure from this node downward, and R is the "right"
	# side. The left side is the side that corresponds to bit position B in
	# the input string being zero; the right side corresponds to a one.
	#
	# For example, consider as input the list of integers (1, 5, 6, 9). These
	# numbers expressed in binary are
	#
	# 3210 (bit position)
	# ____
	# 0001
	# 0101
	# 0110
	# 1001
	#
	# The bit-position that causes the most even split is position 2. The first
	# (unelaborated) level of the structure would look like this :
	#
	# [ 2, [ L, R ] ]
	#
	# Taking out position 2 leaves us with two lists :
	#
	# 310 (bit position)
	# ___
	# 001 (from 1)
	# 101 (from 9)
	# ---
	# 001 (from 5)
	# 010 (from 6)
	#
	# Now from the first list, we can divide into two lists based on bit position 3.
	# After we do so, there is no further differentiation (all remaining values are
	# equal in other bit positions), so the recursion ends. A similar operation
	# occurs for the second list, but in bit position 1. The structure looks like
	# this :
	#
	# [ 2, [
	# [ 3, [ 1, 9 ] ]
	# [ 1, [ 5, 6 ] ]
	# ] ]
	#
	# The resulting tree structure can now be converted into a set of nested
	# ternary expressions over the bits of an input number. In this case, the
	# ternary expression would be (x & 4 ? x & 1 ? 3 : 2 : x & 8 ? 1 : 0). The end
	# result is that the input values are hashed without collision into the range
	# [0, N) where N is the number of input values, without holes. This is
	# particular useful for creating sparse maps using simple arrays in C.

	# macro() expresses the name of a hash output
	#sub macro { "INDEX($_[0])" } # wrapped in a macro
	sub macro { shift } # bare

	# indexer() expresses access to a particular element of $name
	#sub indexer { "$name\[$_[0]\]" } # an array indexer
	#sub indexer { "($name) & 1 << $_[0]" } # a bitmask indexer
	# a clever bitmask indexer that seeks to minimize its length
	sub indexer {
	my $expr = "1 << $_[0]";
	my $len = length $expr;
	my $val = eval $expr;
	"$name & " . ($len < length $val ? $expr : $val)
	}

	# Takes a list of input numbers; returns a structure as described above
	sub build_structure {
	my ($i, $bits, @nums) = @_;

	my @masked = map {
	my $mask = 1 << $_;
	[ [ grep { !($mask & $_) } @nums ], [ grep { ($mask & $_) } @nums ] ]
	} 0 .. $bits - 1;

	my @spread = map {
	my $mask = 1 << $_;
	[ $_ => [ (scalar @{ $masked[$_][0] }), (scalar @{ $masked[$_][1] }) ] ]
	} 0 .. $bits - 1;

	my @interesting = sort {
	abs($a->[1][0] - $a->[1][1]) - abs($b->[1][0] - $b->[1][1])
	} @spread;
	my ($bitpos, $split) = @{ $interesting[0] };
	my ($left, $right);

	($i, $left ) = $split->[0] > 1
	? build_structure($i, $bits, @{ $masked[$bitpos][0] })
	: ($i + 1, macro($i));
	($i, $right) = $split->[1] > 1
	? build_structure($i, $bits, @{ $masked[$bitpos][1] })
	: ($i + 1, macro($i));

	my $outer = [ $bitpos, $left, $right ];
	return ($i => $outer);
	}

	# Takes a structure built by build_structure(); returns a ternary
	# expression as a string, using $name
	sub build_expr {
	my ($struct) = @_;
	my ($bitpos, $left, $right) = @$struct;
	my $leftstr = ref $left ? build_expr($left ) : $left;
	my $rightstr = ref $right ? build_expr($right) : $right;
	# "right" and "left" are swapped here because truth comes before
	# falsehood in a ternary expression, but afterward in the original
	# structure layout
	return indexer($bitpos) . " ? $rightstr : $leftstr";
	}

	sub get_expr {
	my (@lines) = @_;
	my $bits = int(1 + log(max @lines) / log(2));
	my ($index, $struct) = build_structure(0, $bits, @lines);
	(my $string = build_expr($struct)) =~ s/\s+//g;
	return $string;
	}

	# If called as a script, print out the expression generated from the lines of
	# standard input. Otherwise, don't do anything until called.
	if (!caller) {
	my @lines = map { chomp; $_ } <>;
	print get_expr(@lines), "\n";
	}

	1;