-
-
Save Ovid/7c22b76e37b7b7b7e7c01bfbe68709bb to your computer and use it in GitHub Desktop.
# RedBlack tree mockup for Corinna, modeled after https://metacpan.org/pod/Tree::RedBlack | |
# It is not guaranteed to work | |
use feature 'class'; | |
# we really want this to be a private class, but we can't yet | |
class RedBlack::Node { | |
field $key :reader :writer :param { undef }; | |
field $value :reader :writer :param { undef }; | |
field $parent :reader :writer :param { undef }; | |
field $color :reader :writer; # 1 is red, 0 is black | |
field $right :reader :writer; | |
field $left :reader :writer; | |
method successor() { | |
if ($right) { | |
return $right->min; | |
} | |
my $parent = $parent; | |
my $right = $self; | |
# the == relies on the fact that it should use the address of | |
# the nodes. Sloppy. | |
while ( $parent && $right == $parent->right ) { | |
$right = $parent; | |
$parent = $right->parent; | |
} | |
$parent; | |
} | |
method minimum() { | |
while ( $self->left ) { | |
$self = $self->left; | |
} | |
return $self; | |
} | |
method maximum() { | |
while ( $self->right ) { | |
$self = $self->right; | |
} | |
return $self; | |
} | |
method replace_with( $new_node //= RedBlack::Node->new ) { | |
$key = $new_node->key; | |
$key = $new_node->key; | |
$value = $new_node->value; | |
$left = $new_node->left; | |
$right = $new_node->right; | |
$color = $new_node->color; | |
} | |
} | |
class RedBlack { | |
use RedBlack::Node; | |
use Scalar::Util 'blessed'; | |
field $cmp :param { sub ( $key1, $key2 ) { return $key1 eq $key2 } }; | |
field $root :param { undef }; # contains a RedBlack::Node guaranteed to be black | |
method insert( $key, $value=undef ) { | |
my $node = $root; | |
my $parent; | |
while ($node) { | |
$parent = $node; | |
if ( !$cmp->( $key, $node->key ) ) { | |
$node = $node->left; | |
} | |
else { | |
$node = $node->right; | |
} | |
} | |
if ($parent) { | |
# Handle case of inserting node with duplicate key. | |
if ( $cmp->( $parent->key, $key ) ) { | |
my $val = $parent->value; | |
$parent->set_value($value); | |
return $val; | |
} | |
$node = ( ref $parent )->new( | |
key => $key, | |
value => $value, | |
parent => $parent, | |
); | |
if ( !$cmp->( $key, $parent->key ) ) { | |
$parent->set_left($node); | |
} | |
else { | |
$parent->set_right($node); | |
} | |
} | |
else { | |
$root = $node = RedBlack::Node->new( key => $key, value => $value ); | |
} | |
$node->set_color(1); | |
while ( $node != $root && $node->parent->color ) { | |
if ( defined $node->parent->parent->left | |
&& $node->parent == $node->parent->parent->left ) | |
{ | |
my $uncle = $node->parent->parent->right; | |
if ( $uncle && $uncle->color ) { | |
$node->parent->set_color(0); | |
$uncle->set_color(0); | |
$node->parent->parent->set_color(1); | |
$node = $node->parent->parent; | |
} | |
else { | |
if ( $node == $node->parent->right ) { | |
$node = $node->parent; | |
$self->left_rotate($node); | |
} | |
$node->parent->set_color(0); | |
$node->parent->parent->set_color(1); | |
$self->right_rotate( $node->parent->parent ); | |
} | |
} | |
else { | |
my $uncle = $node->parent->parent->left; | |
if ( $uncle && $uncle->color ) { | |
$node->parent->set_color(0); | |
$uncle->set_color(0); | |
$node->parent->parent->set_color(1); | |
$node = $node->parent->parent; | |
} | |
else { | |
if ( defined $node->parent->left | |
&& $node == $node->parent->left ) | |
{ | |
$node = $node->parent; | |
$self->right_rotate($node); | |
} | |
$node->parent->set_color(0); | |
$node->parent->parent->set_color(1); | |
$self->left_rotate( $node->parent->parent ); | |
} | |
} | |
} | |
$root->set_color(0); | |
return; | |
} | |
method minimum() { | |
if ($root) { | |
if ( $root->left ) { | |
return $root->left->minimum; | |
} | |
else { | |
return $root; | |
} | |
} | |
return; | |
} | |
method maximum() { | |
if ($root) { | |
if ( $root->right ) { | |
return $root->right->maximum; | |
} | |
else { | |
return $root; | |
} | |
} | |
return; | |
} | |
method find($key) { | |
my $node = $root; | |
while ($node) { | |
if ( $cmp->( $key, $node->key ) ) { | |
return $node->value; | |
} | |
elsif ( $cmp->( $key, $node->key ) < 0 ) { | |
$node = $node->left; | |
} | |
else { | |
$node = $node->right; | |
} | |
} | |
# Got to the end without finding the node. | |
return; | |
} | |
# this has a bug, faithfully copied from the original, but | |
# since it's only a mockup, I'm not too concerned for now. | |
method delete ($key) { | |
my $node = $self->find_node($key) or return; | |
my ( $successor, $successor_child ); | |
if ( !( $node->left && $node->right ) ) { | |
$successor = $node; | |
} | |
else { | |
$successor = $node->successor; | |
} | |
if ( $successor->left ) { | |
$successor_child = $successor->left; | |
} | |
else { | |
$successor_child = $successor->right || RedBlack::Node->new; | |
} | |
$successor_child->set_parent( $successor->parent ); | |
if ( !$successor_child || !$successor_child->parent ) { | |
$root = $successor_child; | |
} | |
elsif ( $successor == $successor->parent->left ) { | |
$successor->parent->set_left($successor_child); | |
} | |
else { | |
$successor->parent->set_right($successor_child); | |
} | |
if ( $successor != $node ) { | |
$node->set_key( $successor->key ); | |
$node->set_value( $successor->value ); | |
} | |
if ( !$successor->color ) { | |
$self->delete_fixup($successor_child); | |
} | |
if ( !$successor_child->parent ) { | |
$root = undef; | |
} | |
$successor; | |
} | |
# private methods start here | |
method delete_fixup ($x) :private { | |
while ( $x != $root && !$x->color ) { | |
if ( $x == $x->parent->left ) { | |
my $w = $x->parent->right; | |
if ( $w->color ) { | |
$w->set_color(0); | |
$x->parent->set_color(1); | |
$self->left_rotate( $x->parent ); | |
} | |
if ( !$w->left->color && !$w->right->color ) { | |
$w->set_color(1); | |
$x = $x->parent; | |
} | |
else { | |
if ( !$w->right->color ) { | |
$w->left->set_color(0); | |
$w->set_color(1); | |
$self->right_rotate($w); | |
$w = $x->parent->right; | |
} | |
$w->set_color( $x->parent->color ); | |
$x->parent->set_color(0); | |
$w->right->set_color(0); | |
$self->left_rotate( $x->parent ); | |
$x = $root; | |
} | |
} | |
else { | |
my $w = $x->parent->left; | |
if ( $w->color ) { | |
$w->set_color(0); | |
$x->parent->set_color(1); | |
$self->right_rotate( $x->parent ); | |
} | |
if ( !$w->left->color && !$w->right->color ) { | |
$w->set_color(1); | |
$x = $x->parent; | |
} | |
else { | |
if ( !$w->left->color ) { | |
$w->right->set_color(0); | |
$w->set_color(1); | |
$self->left_rotate($w); | |
$w = $x->parent->left; | |
} | |
$w->color( $x->parent->color ); | |
$x->parent->set_color(0); | |
$w->left->set_color(0); | |
$self->right_rotate( $x->parent ); | |
$x = $root; | |
} | |
} | |
} | |
$x->set_color(0); | |
} | |
method find_node($key) :private { | |
my $node = $root; | |
while ($node) { | |
if ( $cmp->( $key, $node->key ) ) { | |
return $node; | |
} | |
elsif ( $cmp->( $key, $node->key ) < 0 ) { | |
$node = $node->left; | |
} | |
else { | |
$node = $node->right; | |
} | |
} | |
# Got to the end without finding the node. | |
return; | |
} | |
method left_rotate($node) :private { | |
my $child = $node->right; | |
$node->set_right( $child->left ); | |
if ( $child->left ) { | |
$child->left->set_parent($node); | |
} | |
$child->set_parent( $node->parent ); | |
if ( $node->parent ) { | |
if ( $node == $node->parent->left ) { | |
$node->parent->set_left($child); | |
} | |
else { | |
$node->parent->set_right($child); | |
} | |
} | |
else { | |
$root = $child; | |
} | |
$child->set_left($node); | |
$node->set_parent($child); | |
} | |
method right_rotate($node) :private { | |
my $child = $node->left; | |
$node->set_left( $child->right ); | |
if ( $child->right ) { | |
$child->right->set_parent($node); | |
} | |
$child->set_parent( $node->parent ); | |
if ( $node->parent ) { | |
if ( $node == $node->parent->right ) { | |
$node->parent->set_right($child); | |
} | |
else { | |
$node->parent->set_left($child); | |
} | |
} | |
else { | |
$root = $child; | |
} | |
$child->set_right($node); | |
$node->set_parent($child); | |
} | |
} |
Unfortunately, this example also showcases the biggest problem with Corinna; speed. A Red/Black tree is maybe one of the best examples of an object (the tree container) which contains smaller internal objects (the nodes) which are dictated by the container and gain nothing by wrapping their fields with accessors. And the whole purpose of a Red/Black tree is speed.
Calling ->parent->set_color(1)
invokes two perl function calls to write a field that should just be directly accessible to the container. With the large number of function calls, it doesn't even matter how well the PAD storage is optimized, it is guaranteed to be slower than any other tree implementation on CPAN.
For comparison, the fastest pure-perl Red/Black tree on CPAN is Tree::RB, and it uses code like $x->[_PARENT][_PARENT][_COLOR] = RED
where the constants are folded at compile time and can take advantage of the new perl optimization that does chain of array fetches in a single opcode.
If corinna could somehow resolve to array access under the hood, and provide inherited or protected access to the field offsets of another class without needing to use the accessors, it would be so much more useful... and could take the place of ugly array implementations like Tree::RB.
Hi @nrdvana . I want to apologize for the length of this reply. This issue has been raised repeatedly about Corinna, so I have a lot of experience thinking and writing about it. If this reply seems antagonistic as a result of its length, I do not mean it like that! I appreciate your excellent feedback.
Yes, the whole purpose of a Red/Black tree is speed. However, the purpose of my gist wasn't about speed, it was to fulfill a specific request from a core P5P developer to show what Corinna would look like for something like this. But if we're going to talk about speed, let's dive in.
First, there is the old saying about software: first make it right, then make it fast. Or we can think of Knuth's famous (and over the top) dictum about "premature optimization."
Second, this example doesn't show a problem with speed because it's a mockup and cannot be run. Therefore it cannot be benchmarked. Therefore, we don't know that this is slow. The purpose was simply to show a more complex example so we could understand how Corinna would look.
Third, my trivial (and incomplete!) benchmarks against the Object::Pad
testbed already shows it's comparable in performance to Moo and that's with no optimizations whatsoever (first make it right, then make it fast). At least one company is using the testbed in production and it reporting that it's noticeably faster and consumes less memory.
Fourth, there are tradeoffs. consider a simple point object:
class Point {
field ($x,$y) :param;
method move($dx, $dy) {
$x += $dx;
$y += $dy;
}
}
There are no method calls there and we're just accessing lexicals. That's one reason why much of the testbed usage is reported as faster than Moo/se: a dereference is always going to be slower than a lexical. This is a structural issue that legacy OO in Perl really can't get around. So for Corinna classes mostly working with instance lexicals and not methods, I don't believe performance will suffer at all. It will depend on your use case.
Fifth, $x->[_PARENT][_PARENT][_COLOR] = RED
is reaching into the internals rather than going through the public interface. As a result, it cannot be subclassed effectively. That's a fair trade-off of flexibility versus performance for many use cases, but it still needs to be recognized as a trade-off and not "X is better than Y."
Sixth, since Tree::DB
reaching into the internals and that effectively prevents subclassing, the class is a "final" class and it should at least document itself as such, and why it's doing that: performance reasons. However, in reading what you've written, there seems to be an assumption that Perl's methods can't be faster. However, SunStar Systems has written code for "Sealed" methods. For final classes, sealed methods are bound at compile-time making them even faster (slightly) than a sub class, not to mention a method call. I've supplied feedback to them on the technique and they've updated their code, so it still runs nicely. So we can have faster method calls in Corinna, but we don't want to go there yet.
Finally, since the testbed is already as fast as (in some cases faster) that Moo/se and has shown to have lower memory consumption on production systems, and given that there's been absolutely no optimization, I'm quite confident that Corinna is going to hit a sweet spot of both developer performance and CPU performance. For now, we need to continue to finalize the design.
First make it right, then make it fast.
I agree with all the points up to 5th. Regarding that one, there are many designs in software development where adding an extra layer of abstraction is useful or might theoretically be useful in the future, but another whole class of problems where the internal data structures have no reason at all to ever be subclassed outside the knowledge of the containing class, and having the containing class use the method abstraction does nothing except waste performance. Red/Black trees are a prime example of that second group of problems, which is the point I was going for above.
"First make it right, then make it fast." ... and perl 6 did exactly that, and is arguably the prime reason it failed.
I would argue for the approach used by Git. "Make it right and fast (and ugly), then make it pretty and convenient". Perl blessed arrays are already right and fast, and just need syntactic sugar and helpful tooling so that they are less of a pain to use. It sure would be nice if Corinna had a provision for that, including protected access of named fields that compiles into array access (or pad access).
I took https://metacpan.org/pod/Tree::RedBlack and I mocked it up as Corinna. I first wrote it in
Object::Pad
and then converted it. It probably doesn't quite work because I simplified some things after I converted it (such as the$cmp
usage).The only public interface are these methods on RedBlack class (the nodes are internal and not exposed):
new
insert
delete
find
maximum
minimum
There are some things to note.
RedBlack::Node
class should be a private class, but that's not yet specified:private
, butObject::Pad
usesmethod $some_method {}
As for the final point, here's an example in Raku (then, Perl 6): https://blogs.perl.org/users/ovid/2013/02/red-black-trees-in-perl-6-explained.html
The Raku example shows just how small this class could be with a combination of multimethods and types. We're really missing out here, but baby steps. And yes, the Raku example is hard to read, but working with complex algorithms can be like that. Thus, so long as the interface is clean, complexities can be hidden.
As mentioned,
Object::Pad
uses a different syntax for private methods:That's not in the Corinna specification and design thought would be required. It's Perlish and also crystal clear that it's not public.
The more I wrote that code, the more I wished we had twigils even though I argued against them. They would make it clear which variables are bound to the class and prevent shadowing. Paul wanted
$:some_var
, but formats use$:
.