Skip to content

Instantly share code, notes, and snippets.

@kulp
Created March 30, 2010 22:54
Show Gist options
  • Save kulp/349716 to your computer and use it in GitHub Desktop.
Save kulp/349716 to your computer and use it in GitHub Desktop.
Generates CPP macros for mapping arbitrary integer sets to small integer sets
#!/usr/bin/env perl
use strict;
use List::Util qw(max);
my $name = "b";
# for each significant bit position, where the sequence through the bit
# positions is chosen such that each bit chosen divides the remaining
# search space as nearly evenly as possible, construct an array that
# looks like this:
#
# [ B, [ L, R ] ]
#
# where B is the number of the current bitposition
# L is either a number or another array like this one
# R is defined similarly to but independently from L
#
# where L and R are constructed recursively. L is the "left" side of the
# resulting tree structure from this node downward, and R is the "right"
# side. The left side is the side that corresponds to bit position B in
# the input string being zero; the right side corresponds to a one.
#
# For example, consider as input the list of integers (1, 5, 6, 9). These
# numbers expressed in binary are
#
# 3210 (bit position)
# ____
# 0001
# 0101
# 0110
# 1001
#
# The bit-position that causes the most even split is position 2. The first
# (unelaborated) level of the structure would look like this :
#
# [ 2, [ L, R ] ]
#
# Taking out position 2 leaves us with two lists :
#
# 310 (bit position)
# ___
# 001 (from 1)
# 101 (from 9)
# ---
# 001 (from 5)
# 010 (from 6)
#
# Now from the first list, we can divide into two lists based on bit position 3.
# After we do so, there is no further differentiation (all remaining values are
# equal in other bit positions), so the recursion ends. A similar operation
# occurs for the second list, but in bit position 1. The structure looks like
# this :
#
# [ 2, [
# [ 3, [ 1, 9 ] ]
# [ 1, [ 5, 6 ] ]
# ] ]
#
# The resulting tree structure can now be converted into a set of nested
# ternary expressions over the bits of an input number. In this case, the
# ternary expression would be (x & 4 ? x & 1 ? 3 : 2 : x & 8 ? 1 : 0). The end
# result is that the input values are hashed without collision into the range
# [0, N) where N is the number of input values, without holes. This is
# particular useful for creating sparse maps using simple arrays in C.
# macro() expresses the name of a hash output
#sub macro { "INDEX($_[0])" } # wrapped in a macro
sub macro { shift } # bare
# indexer() expresses access to a particular element of $name
#sub indexer { "$name\[$_[0]\]" } # an array indexer
#sub indexer { "($name) & 1 << $_[0]" } # a bitmask indexer
# a clever bitmask indexer that seeks to minimize its length
sub indexer {
my $expr = "1 << $_[0]";
my $len = length $expr;
my $val = eval $expr;
"$name & " . ($len < length $val ? $expr : $val)
}
# Takes a list of input numbers; returns a structure as described above
sub build_structure {
my ($i, $bits, @nums) = @_;
my @masked = map {
my $mask = 1 << $_;
[ [ grep { !($mask & $_) } @nums ], [ grep { ($mask & $_) } @nums ] ]
} 0 .. $bits - 1;
my @spread = map {
my $mask = 1 << $_;
[ $_ => [ (scalar @{ $masked[$_][0] }), (scalar @{ $masked[$_][1] }) ] ]
} 0 .. $bits - 1;
my @interesting = sort {
abs($a->[1][0] - $a->[1][1]) - abs($b->[1][0] - $b->[1][1])
} @spread;
my ($bitpos, $split) = @{ $interesting[0] };
my ($left, $right);
($i, $left ) = $split->[0] > 1
? build_structure($i, $bits, @{ $masked[$bitpos][0] })
: ($i + 1, macro($i));
($i, $right) = $split->[1] > 1
? build_structure($i, $bits, @{ $masked[$bitpos][1] })
: ($i + 1, macro($i));
my $outer = [ $bitpos, $left, $right ];
return ($i => $outer);
}
# Takes a structure built by build_structure(); returns a ternary
# expression as a string, using $name
sub build_expr {
my ($struct) = @_;
my ($bitpos, $left, $right) = @$struct;
my $leftstr = ref $left ? build_expr($left ) : $left;
my $rightstr = ref $right ? build_expr($right) : $right;
# "right" and "left" are swapped here because truth comes before
# falsehood in a ternary expression, but afterward in the original
# structure layout
return indexer($bitpos) . " ? $rightstr : $leftstr";
}
sub get_expr {
my (@lines) = @_;
my $bits = int(1 + log(max @lines) / log(2));
my ($index, $struct) = build_structure(0, $bits, @lines);
(my $string = build_expr($struct)) =~ s/\s+//g;
return $string;
}
# If called as a script, print out the expression generated from the lines of
# standard input. Otherwise, don't do anything until called.
if (!caller) {
my @lines = map { chomp; $_ } <>;
print get_expr(@lines), "\n";
}
1;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment