Skip to content

Instantly share code, notes, and snippets.

@scottwalters
Last active August 29, 2015 13:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save scottwalters/9380607 to your computer and use it in GitHub Desktop.
Save scottwalters/9380607 to your computer and use it in GitHub Desktop.
#!/usr/bin/perl
package arrays2;
# inspired by http://blog.headius.com/2012/09/avoiding-hash-lookups-in-ruby.html ...
# attempt to use arrays for class instance data in Perl rather than hashes.
# this is done by scanning the bytecode, finding places where the '$self' hash ref
# is accessed, converting the names of the hash subscripts to numbers, changing the
# and hashes to arrays.
# for example, $self->{foo} might be changed to $self->[1].
# bless { foo => ..., bar => ... }, $package is also changed to bless [ ..., ... ], $package.
# this is a prototype. it does not detect when it is in over its head. it doesn't do dataflow
# analysis to figure out where $self is passed to and change uses there. it also does not
#
# XXX todo:
# XXX when helems with non-const keys (eg $self->{$foo}) are found, replace it with a call to run-time field_number(), the name=>number resolver
# XXX aelemfast_lex_ref -- we can't use aelemfast_lex because it doesn't work on refs; need a new op that does
# XXX aelemfast_lex_no_magic -- tie $self in the bless; code not rewritten would fall back on the tied logic; uses we find would be rewritten to use this new op
# XXX method resolution caching could be implemented with a similar hack. I'd love to see the benchmark results from that.
# done:
# detect uses of $self with non-constants and abort operations if found!
# now that we conicalize to the superclass, we either need to do all new methods first or else need to actually reorder list items to bless; re-orders them
# only change blesses on the variable named '$package' or some other heuristic so that we don't muck up factory methods
# when examining a subclass, use the $fields data for superclasses too! -- at least for the simple case of single inheritence
use 5.016;
our $VERSION = '0.1';
#
# starts with the CHECK() routine. that calls one_cv_at_a_time() for each code value,
# which calls walkoptree_slow() after some prep work, which calls look_for_things_to_diddle()
# for each actual bytecode instruction.
#
use B 'OPf_KIDS', 'OPf_WANT_SCALAR', 'OPf_WANT_LIST', 'OPf_WANT', 'OPf_REF', 'OPf_MOD', 'OPf_SPECIAL';
use B::Generate;
use B::Concise 'concise_cv'; # 'walk_topdown'
use B::Deparse; # debugging XXX
# use B::Utils;
use Devel::Peek;
use Data::Dumper 'Dumper';
use strict;
use warnings;
my %knownuniverse; # modules using us
my $curcv; # cv currently being inspected; would be cleaner to pass this around rather than set a quasi-global
my $package; # current package; each package has its own table of instance data names (self subscripts)
my $method; # current function/method name we're looking at
my $fields; # $fields->{$package}->{$self_hash_subscript} = number
my $redo_reverse_indices; # function to recompute $previous for the current CV
my $previous = {}; # previous B::OP object for any B::OP object; opposite of next; inferred from next
my %did_already; # which B::OPs we've modified; arrays were getting ref'd twice because parent info was stale and two rules matched
my $lastpack; my $lastline; my $lastfile; # current place in the source file as taken from the last observed B::COP
sub import {
my $caller = caller;
$knownuniverse{$caller} = 1;
}
CHECK {
# make a hash of code values we've found - memory address of the opcode is mapped to the
# B object encapsulating it. then go through them all, marking them done as we do them.
# this is tricky since more may appear as we go along. for each code value we find, call
# one_cv_at_a_time() on it.
# build initial list of code values from methods/functions in the subs and the main root
# %knowncvs = do { my $x = B::main_cv(); ( $$x => $x ) }; # XXX fix
for my $package (keys %knownuniverse) {
no strict 'refs';
my @methods = grep { defined &{$package.'::'.$_} } keys %{$package.'::'};
if( grep $_ eq 'new', @methods ) {
# move new() to the front of the queue as a hack to hopefully avoid having to re-order the initialization list in the anonhash to bless
@methods = grep $_ ne 'new', @methods;
unshift @methods, 'new';
}
for my $method ( @methods ) {
warn "doing sub $package\::$method\n";
my $cv = B::svref_2object(*{$package.'::'.$method}{CODE});
one_cv_at_a_time($cv, $package, $method);
}
}
# XXX iffy on this... double check it
# foreach my $cv ((B::main_cv->PADLIST->ARRAY)[1]->ARRAY) {
# # print "debug: main pad list: ", ref $cv, "\n";
# next unless ref $cv eq 'B::CV';
# # print "debug: found a cv!\n";
# $knowncvs{$$cv} = $cv;
# }
# XXX resurrect this or find another way to deal with code we find in anoncode ops
# foreach (keys %knowncvs) {
# # we look through the list of code values each time just in case something got added
# # this happens when we encounter anoncode operations
# $curcv = $knowncvs{$_}; goto next_cv if ! $donecvs{$curcv};
# }
}
sub one_cv_at_a_time {
# get ready to recurse through the bytecode tree - build a reverse index, previous, from the next
# links and do any debugging output after we traverse the tree
$curcv = shift; # stuff it into the global
$package = shift or die; # stuff it into the global
$method = shift; # stuff it into the global; might be null if we're looking at an anonymous subroutine
my $leave = $curcv->ROOT;
# since subclasses share instance data fields with superclasses, we need to conicalize $package to the highest level superclass
while( my @isa = do { no strict 'refs'; @{ "$package\::ISA" }; } ) {
die "don't know how to reconcile multiple superclasses" if @isa > 1;
$package = $isa[0];
warn "followed inheritance to package $package";
}
# not currently using this but it's really handy to have; a proper perl implementation of re-threading would be even better; does Code::Splice have one of those?
# $redo_reverse_indices = sub {
# walkoptree_slow($leave, sub {
# my $self = shift; return unless $self and $$self;
# my $next = $self->next; return unless $next and $$next;
# $previous->{$$next} = $self;
# });
# };
# $redo_reverse_indices->();
# XXX
#warn "getting ready to do this sub:\n";
# concise_cv('basic', $curcv->object_2svref, ); # dump the opcode tree of this code value
#warn "/getting ready\n";
walkoptree_slow($leave, \&look_for_things_to_diddle);
# B::main_root()->linklist();
# print $$leave, " basic:\n"; B::Concise::concise_cv_obj('basic', $curcv); # debug
# print $$leave, " exec:\n"; B::Concise::concise_cv_obj('exec', $curcv); # debug
return 1;
}
my @parents = ();
sub walkoptree_slow {
# actually recurse the bytecode tree
# stolen from B.pm, modified
my $op = shift;
my $sub = shift;
my $level = shift;
$level ||= 0;
# warn(sprintf("walkoptree: %d. %s\n", $level, peekop($op))) if $debug;
$sub->($op, $level, \@parents);
if ($op->can('flags') and $op->flags() & OPf_KIDS) {
# print "debug: go: ", ' ' x $level, $op->name(), "\n"; # debug
push @parents, $op;
my $kid = $op->first();
my $next;
next_kid:
# was being changed right out from under us, so pre-compute
$next = 0; $next = $kid->sibling() if $$kid;
walkoptree_slow($kid, $sub, $level + 1);
$kid = $next;
goto next_kid if $kid;
pop @parents;
}
if (B::class($op) eq 'PMOP' && $op->pmreplroot() && ${$op->pmreplroot()}) {
# pattern-match operators
push @parents, $op;
walkoptree_slow($op->pmreplroot(), $sub, $level + 1);
pop @parents;
}
};
sub look_for_things_to_diddle {
my $self = shift; # op object
my $level = shift;
my $parents = shift;
return unless $self and $$self;
return unless exists $parents->[0]; # root op isn't that interesting and we need a parent
my $parent = $parents->[-1];
my $non_null_parent = do { my $i = -1; $i-- until $parents->[$i]->name() ne 'null'; $parents->[$i]; };
my $field_num;
if($self->name() eq 'nextstate') {
# record where we are in the program for any diagnstics
# $lastpack = $self->stash()->PV(); # NAME();
$lastpack = '';
$lastfile = $self->file();
$lastline = $self->line();
}
# recognize constructs of the form $self->{constant}
# a <2> helem sKRM*/2 ->b <-- helem; change to aelem
# 8 <1> rv2hv[t2] sKR/1 ->9 <-- rv2hv; change to rv2av
# 7 <0> padsv[$self:1,2] sM/DREFHV ->8 <-- padsv on '$self'
# 9 <$> const(PV "foo") s/BARE ->a <-- const; change the SV
goto not_instance_data unless $self->name eq 'helem';
goto not_instance_data unless $self->first->name eq 'rv2hv';
goto not_instance_data unless $self->first->first->name eq 'padsv';
# warn "contents of SV referenced by const op: " . (($curcv->PADLIST->ARRAY)[0]->ARRAY)[ $self->first->first->targ ]->PVX; # contents of SV referenced by const op: $self
goto not_instance_data unless (($curcv->PADLIST->ARRAY)[0]->ARRAY)[ $self->first->first->targ ]->PVX eq '$self';
# goto not_instance_data unless $self->first->sibling->name eq 'const';
die "accessing \$self with non-constants is not yet supported, while parsing $lastfile, line $lastline" unless $self->first->sibling->name eq 'const';
$field_num = field_number( $self->first->sibling->sv->PVX, $package );
warn "constant: " . $self->first->sibling->sv->PVX . " field_num: $field_num";
$self->type( B::OP->new('aelem', 0)->type ); # B::OP::new(class, type, flags)
$self->first->type( B::UNOP->new('rv2av', 0, 0)->type ); # B::UNOP::new(class, type, flags, sv_first)
$self->first->sibling->sv( $field_num );
$self->first->sibling->flags( $self->first->sibling->flags );
# see what we came up with
#warn "post op tree hackery:\n";
# concise_cv('basic', $curcv->object_2svref, ); # dump the opcode tree of this code value
#warn "/post op tree hackery\n";
#warn "code post hackery:\n";
# my $dp = B::Deparse->new;
# $dp->{curcv} = $curcv;
# warn $dp->deparse($self, 0), "\n";
#warn "/code post hackery\n";
not_instance_data:
#
#
#
# recognize constructs of the form bless { }, $package
# recognize constructs of the form bless { foo => 1, }, $package
# 9 <@> bless sK/2 ->a
# - <0> ex-pushmark s ->6
# 7 <@> anonhash sK* ->8 <-- change to anonlist
# 6 <0> pushmark s ->7 <-- XXX remove the keys from the key/value pairs and use them to learn fields
# 8 <0> padsv[$package:1,2] s ->9
# a <@> bless vK/2 ->b
# - <0> ex-pushmark s ->5
# 8 <@> anonhash sK*/1 ->9
# 5 <0> pushmark s ->6
# 6 <$> const(PV "foo") s/BARE ->7
# 7 <$> const(IV 1) s ->8
# 9 <0> padsv[$package:1,2] s ->a
goto not_bless unless $self->name eq 'bless';
goto not_bless unless $self->first->sibling->name eq 'anonhash';
goto not_bless unless $self->first->sibling->sibling->name eq 'padsv';
goto not_bless unless grep $_ eq (($curcv->PADLIST->ARRAY)[0]->ARRAY)[ $self->first->sibling->sibling->targ ]->PVX, '$package', '$class';
#warn "pre op tree hackery:\n";
# concise_cv('basic', $curcv->object_2svref, ); # dump the opcode tree of this code value
#warn "/pre op tree hackery\n";
# found it; start changing stuff around
$self->first->sibling->type( B::LISTOP->new('anonlist', 0, 0, 0)->type ); # B::LISTOP::new(class, type, flags, sv_first, sv_last)
do {
# take the keys out of the list of data that initialized the anonhash
# XXX if blessing happens elsewhere than new() and we already have numbers for fields, we will have to re-order the stuff in the list
my $pushmark = $self->first->sibling->first;
$pushmark->name eq 'pushmark' or die;
my $list_item = $pushmark->sibling;
my @initialization_list;
while( $list_item and $$list_item ) {
die unless $list_item->name eq 'const';
my $field_num = field_number( $list_item->sv->PVX, $package );
warn "constant in bless anonhash: " . $list_item->sv->PVX . " field_num: $field_num";
$list_item = $list_item->sibling; # skip to the value
$list_item and $$list_item or die; # make sure there is a value
push @initialization_list, [ $field_num, $list_item ];
# warn "setting next from " . deparse($last_list_item) . " to " . deparse($list_item);
# $last_list_item->next( $list_item ); # splice out the key
# $last_list_item->sibling( $list_item ); # splice out the key
# $last_list_item = $list_item;
$list_item = $list_item->sibling; # skip to the next key, if there is one
}
if( @initialization_list ) {
# re-order values in the init list so that they're in the right slots to be used to initialize use as an array
restart_sort_and_fill_in_initialization_list_gaps:
@initialization_list = sort { $a->[0] <=> $b->[0] } @initialization_list;
# insert undef for any skipped numeric positions
for( my $i = 0; $i < @initialization_list; $i++ ) {
if( $initialization_list[$i][0] != $i ) {
# warn "inserting a new undef const in position $i";
my $const = B::SVOP->new('const', 0, 0 ); # B::SVOP::new(class, type, flags, sv)
# $const->sv( B::sv_undef );
$const->sv( undef );
push @initialization_list, [ $i, $const, ];
goto restart_sort_and_fill_in_initialization_list_gaps;
}
}
# warn Data::Dumper::Dumper \@initialization_list;
# relink the initialization list of const ops
my $last_list_item = $pushmark;
for( my $i = 0; $i < @initialization_list; $i++ ) {
my $cur_list_item = $initialization_list[$i][1];
# warn "next value in list: " . ${ $cur_list_item->sv->object_2svref };
$last_list_item->sibling( $cur_list_item );
$last_list_item->next( $cur_list_item );
$last_list_item = $cur_list_item;
}
$last_list_item->sibling( 0 );
$last_list_item->next( $self->first->sibling ); # that's the 'anonhash', now 'anonlist'
}
};
#warn "post op tree hackery:\n";
# concise_cv('basic', $curcv->object_2svref, ); # dump the opcode tree of this code value
#warn "/post op tree hackery\n";
#warn "code post hackery:\n";
# my $dp = B::Deparse->new;
# $dp->{curcv} = $curcv;
# warn $dp->deparse($self, 0), "\n";
#warn "/code post hackery\n";
not_bless:
return 0;
}
sub deparse {
my $op = shift;
my $dp = B::Deparse->new;
$dp->{curcv} = $curcv;
return $dp->deparse($op, 0);
}
sub field_number {
my $name = shift; # name of some $self->{...} subscript
my $package = shift or die;
# if that field in that package already has a number, return it
return $fields->{$package}->{$name} if exists $fields->{$package}->{$name};
# figure out what the next unused number is and assign it and return that
my $max_observed = -1;
for my $k ( %{ $fields->{$package} } ) {
$max_observed = $fields->{$package}->{$k} if defined $fields->{$package}->{$k} and $fields->{$package}->{$k} > $max_observed;
}
$fields->{$package}->{$name} = $max_observed + 1;
return $fields->{$package}->{$name};
}
1;
package foo;
use strict;
use warnings;
use lib '.';
use arrays2;
sub new {
my $package = shift;
# bless [ ], $package; # XXX translate bless { }
bless { foo => 1, bar => 2, }, $package; # XXX translate bless { }
}
sub foo {
my $self = shift;
$self->{foo} = shift if @_;
return $self->{foo};
}
sub bar {
my $self = shift;
$self->{bar} = shift if @_;
return $self->{bar};
}
package main;
my $foo = foo->new;
$foo->foo(30);
print "foo: ", $foo->foo, "\n";
$foo->foo eq '30' or die;
$foo->bar(15);
print "bar: ", $foo->foo, "\n";
$foo->bar eq '15' or die;
print "foo: ", $foo->foo, "\n";
$foo->foo eq '30' or die;
$foo->foo(45);
print "foo: ", $foo->foo, "\n";
$foo->foo eq '45' or die;
$foo->bar eq '15' or die;
perl -e 'package Foo::Bar; sub yy { 1 }; package main; use Devel::Peek; my Foo::Bar $x; Dump($x);'
SV = NULL(0x0) at 0x805c6e8
REFCNT = 1
FLAGS = (PADMY)
package foo;
use strict;
use warnings;
use lib '.';
# use arrays2;
sub new {
my $package = shift;
# bless [ ], $package; # XXX translate bless { }
bless { foo => 1, bar => 2, }, $package; # XXX translate bless { }
}
sub foo {
my $self = shift;
$self->{foo} = shift if @_;
return $self->{foo};
}
sub bar {
my $self = shift;
$self->{bar} = shift if @_;
return $self->{bar};
}
package main;
use Benchmark qw(:all) ;
timethis(50000, sub {
my $foo = foo->new;
$foo->foo(30);
# print "foo: ", $foo->foo, "\n";
$foo->foo eq '30' or die;
$foo->bar(15);
# print "bar: ", $foo->foo, "\n";
$foo->bar eq '15' or die;
# print "foo: ", $foo->foo, "\n";
$foo->foo eq '30' or die;
$foo->foo(45);
# print "foo: ", $foo->foo, "\n";
$foo->foo eq '45' or die;
$foo->bar eq '15' or die;
});
package foo;
use strict;
use warnings;
use lib '.';
use arrays2;
sub new {
my $package = shift;
# bless [ ], $package; # XXX translate bless { }
bless { foo => 33, bar => 2, }, $package; # XXX translate bless { }
}
sub foo {
my $self = shift;
$self->{foo} = shift if @_;
return $self->{foo};
}
sub bar {
my $self = shift;
$self->{bar} = shift if @_;
return $self->{bar};
}
package quux;
use base 'foo';
use arrays2;
sub baz {
my $self = shift;
# $self->{ int rand 10 } = int rand 20; # this will generate an error
}
sub quux {
my $self = shift;
$self->{quux} = shift if @_;
$self->{foo}++;
warn "foo is now $self->{foo}";
return $self->{quux};
}
package main;
my $foo = foo->new;
$foo->foo(30);
# print "foo: ", $foo->foo, "\n";
$foo->foo eq '30' or die;
$foo->bar(15);
# print "bar: ", $foo->foo, "\n";
$foo->bar eq '15' or die;
# print "foo: ", $foo->foo, "\n";
$foo->foo eq '30' or die;
$foo->foo(45);
# print "foo: ", $foo->foo, "\n";
$foo->foo eq '45' or die;
$foo->bar eq '15' or die;
my $quux = quux->new;
$quux->quux(50);
$quux->quux eq '50' or die;
warn $quux->foo;
$quux->foo eq '35' or die; # incremented twice from 33 to 35 because quux() was called twice
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment