Last active
August 29, 2015 13:57
-
-
Save scottwalters/9380607 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
package arrays2; | |
# inspired by http://blog.headius.com/2012/09/avoiding-hash-lookups-in-ruby.html ... | |
# attempt to use arrays for class instance data in Perl rather than hashes. | |
# this is done by scanning the bytecode, finding places where the '$self' hash ref | |
# is accessed, converting the names of the hash subscripts to numbers, changing the | |
# and hashes to arrays. | |
# for example, $self->{foo} might be changed to $self->[1]. | |
# bless { foo => ..., bar => ... }, $package is also changed to bless [ ..., ... ], $package. | |
# this is a prototype. it does not detect when it is in over its head. it doesn't do dataflow | |
# analysis to figure out where $self is passed to and change uses there. it also does not | |
# | |
# XXX todo: | |
# XXX when helems with non-const keys (eg $self->{$foo}) are found, replace it with a call to run-time field_number(), the name=>number resolver | |
# XXX aelemfast_lex_ref -- we can't use aelemfast_lex because it doesn't work on refs; need a new op that does | |
# XXX aelemfast_lex_no_magic -- tie $self in the bless; code not rewritten would fall back on the tied logic; uses we find would be rewritten to use this new op | |
# XXX method resolution caching could be implemented with a similar hack. I'd love to see the benchmark results from that. | |
# done: | |
# detect uses of $self with non-constants and abort operations if found! | |
# now that we conicalize to the superclass, we either need to do all new methods first or else need to actually reorder list items to bless; re-orders them | |
# only change blesses on the variable named '$package' or some other heuristic so that we don't muck up factory methods | |
# when examining a subclass, use the $fields data for superclasses too! -- at least for the simple case of single inheritence | |
use 5.016; | |
our $VERSION = '0.1'; | |
# | |
# starts with the CHECK() routine. that calls one_cv_at_a_time() for each code value, | |
# which calls walkoptree_slow() after some prep work, which calls look_for_things_to_diddle() | |
# for each actual bytecode instruction. | |
# | |
use B 'OPf_KIDS', 'OPf_WANT_SCALAR', 'OPf_WANT_LIST', 'OPf_WANT', 'OPf_REF', 'OPf_MOD', 'OPf_SPECIAL'; | |
use B::Generate; | |
use B::Concise 'concise_cv'; # 'walk_topdown' | |
use B::Deparse; # debugging XXX | |
# use B::Utils; | |
use Devel::Peek; | |
use Data::Dumper 'Dumper'; | |
use strict; | |
use warnings; | |
my %knownuniverse; # modules using us | |
my $curcv; # cv currently being inspected; would be cleaner to pass this around rather than set a quasi-global | |
my $package; # current package; each package has its own table of instance data names (self subscripts) | |
my $method; # current function/method name we're looking at | |
my $fields; # $fields->{$package}->{$self_hash_subscript} = number | |
my $redo_reverse_indices; # function to recompute $previous for the current CV | |
my $previous = {}; # previous B::OP object for any B::OP object; opposite of next; inferred from next | |
my %did_already; # which B::OPs we've modified; arrays were getting ref'd twice because parent info was stale and two rules matched | |
my $lastpack; my $lastline; my $lastfile; # current place in the source file as taken from the last observed B::COP | |
sub import { | |
my $caller = caller; | |
$knownuniverse{$caller} = 1; | |
} | |
CHECK { | |
# make a hash of code values we've found - memory address of the opcode is mapped to the | |
# B object encapsulating it. then go through them all, marking them done as we do them. | |
# this is tricky since more may appear as we go along. for each code value we find, call | |
# one_cv_at_a_time() on it. | |
# build initial list of code values from methods/functions in the subs and the main root | |
# %knowncvs = do { my $x = B::main_cv(); ( $$x => $x ) }; # XXX fix | |
for my $package (keys %knownuniverse) { | |
no strict 'refs'; | |
my @methods = grep { defined &{$package.'::'.$_} } keys %{$package.'::'}; | |
if( grep $_ eq 'new', @methods ) { | |
# move new() to the front of the queue as a hack to hopefully avoid having to re-order the initialization list in the anonhash to bless | |
@methods = grep $_ ne 'new', @methods; | |
unshift @methods, 'new'; | |
} | |
for my $method ( @methods ) { | |
warn "doing sub $package\::$method\n"; | |
my $cv = B::svref_2object(*{$package.'::'.$method}{CODE}); | |
one_cv_at_a_time($cv, $package, $method); | |
} | |
} | |
# XXX iffy on this... double check it | |
# foreach my $cv ((B::main_cv->PADLIST->ARRAY)[1]->ARRAY) { | |
# # print "debug: main pad list: ", ref $cv, "\n"; | |
# next unless ref $cv eq 'B::CV'; | |
# # print "debug: found a cv!\n"; | |
# $knowncvs{$$cv} = $cv; | |
# } | |
# XXX resurrect this or find another way to deal with code we find in anoncode ops | |
# foreach (keys %knowncvs) { | |
# # we look through the list of code values each time just in case something got added | |
# # this happens when we encounter anoncode operations | |
# $curcv = $knowncvs{$_}; goto next_cv if ! $donecvs{$curcv}; | |
# } | |
} | |
sub one_cv_at_a_time { | |
# get ready to recurse through the bytecode tree - build a reverse index, previous, from the next | |
# links and do any debugging output after we traverse the tree | |
$curcv = shift; # stuff it into the global | |
$package = shift or die; # stuff it into the global | |
$method = shift; # stuff it into the global; might be null if we're looking at an anonymous subroutine | |
my $leave = $curcv->ROOT; | |
# since subclasses share instance data fields with superclasses, we need to conicalize $package to the highest level superclass | |
while( my @isa = do { no strict 'refs'; @{ "$package\::ISA" }; } ) { | |
die "don't know how to reconcile multiple superclasses" if @isa > 1; | |
$package = $isa[0]; | |
warn "followed inheritance to package $package"; | |
} | |
# not currently using this but it's really handy to have; a proper perl implementation of re-threading would be even better; does Code::Splice have one of those? | |
# $redo_reverse_indices = sub { | |
# walkoptree_slow($leave, sub { | |
# my $self = shift; return unless $self and $$self; | |
# my $next = $self->next; return unless $next and $$next; | |
# $previous->{$$next} = $self; | |
# }); | |
# }; | |
# $redo_reverse_indices->(); | |
# XXX | |
#warn "getting ready to do this sub:\n"; | |
# concise_cv('basic', $curcv->object_2svref, ); # dump the opcode tree of this code value | |
#warn "/getting ready\n"; | |
walkoptree_slow($leave, \&look_for_things_to_diddle); | |
# B::main_root()->linklist(); | |
# print $$leave, " basic:\n"; B::Concise::concise_cv_obj('basic', $curcv); # debug | |
# print $$leave, " exec:\n"; B::Concise::concise_cv_obj('exec', $curcv); # debug | |
return 1; | |
} | |
my @parents = (); | |
sub walkoptree_slow { | |
# actually recurse the bytecode tree | |
# stolen from B.pm, modified | |
my $op = shift; | |
my $sub = shift; | |
my $level = shift; | |
$level ||= 0; | |
# warn(sprintf("walkoptree: %d. %s\n", $level, peekop($op))) if $debug; | |
$sub->($op, $level, \@parents); | |
if ($op->can('flags') and $op->flags() & OPf_KIDS) { | |
# print "debug: go: ", ' ' x $level, $op->name(), "\n"; # debug | |
push @parents, $op; | |
my $kid = $op->first(); | |
my $next; | |
next_kid: | |
# was being changed right out from under us, so pre-compute | |
$next = 0; $next = $kid->sibling() if $$kid; | |
walkoptree_slow($kid, $sub, $level + 1); | |
$kid = $next; | |
goto next_kid if $kid; | |
pop @parents; | |
} | |
if (B::class($op) eq 'PMOP' && $op->pmreplroot() && ${$op->pmreplroot()}) { | |
# pattern-match operators | |
push @parents, $op; | |
walkoptree_slow($op->pmreplroot(), $sub, $level + 1); | |
pop @parents; | |
} | |
}; | |
sub look_for_things_to_diddle { | |
my $self = shift; # op object | |
my $level = shift; | |
my $parents = shift; | |
return unless $self and $$self; | |
return unless exists $parents->[0]; # root op isn't that interesting and we need a parent | |
my $parent = $parents->[-1]; | |
my $non_null_parent = do { my $i = -1; $i-- until $parents->[$i]->name() ne 'null'; $parents->[$i]; }; | |
my $field_num; | |
if($self->name() eq 'nextstate') { | |
# record where we are in the program for any diagnstics | |
# $lastpack = $self->stash()->PV(); # NAME(); | |
$lastpack = ''; | |
$lastfile = $self->file(); | |
$lastline = $self->line(); | |
} | |
# recognize constructs of the form $self->{constant} | |
# a <2> helem sKRM*/2 ->b <-- helem; change to aelem | |
# 8 <1> rv2hv[t2] sKR/1 ->9 <-- rv2hv; change to rv2av | |
# 7 <0> padsv[$self:1,2] sM/DREFHV ->8 <-- padsv on '$self' | |
# 9 <$> const(PV "foo") s/BARE ->a <-- const; change the SV | |
goto not_instance_data unless $self->name eq 'helem'; | |
goto not_instance_data unless $self->first->name eq 'rv2hv'; | |
goto not_instance_data unless $self->first->first->name eq 'padsv'; | |
# warn "contents of SV referenced by const op: " . (($curcv->PADLIST->ARRAY)[0]->ARRAY)[ $self->first->first->targ ]->PVX; # contents of SV referenced by const op: $self | |
goto not_instance_data unless (($curcv->PADLIST->ARRAY)[0]->ARRAY)[ $self->first->first->targ ]->PVX eq '$self'; | |
# goto not_instance_data unless $self->first->sibling->name eq 'const'; | |
die "accessing \$self with non-constants is not yet supported, while parsing $lastfile, line $lastline" unless $self->first->sibling->name eq 'const'; | |
$field_num = field_number( $self->first->sibling->sv->PVX, $package ); | |
warn "constant: " . $self->first->sibling->sv->PVX . " field_num: $field_num"; | |
$self->type( B::OP->new('aelem', 0)->type ); # B::OP::new(class, type, flags) | |
$self->first->type( B::UNOP->new('rv2av', 0, 0)->type ); # B::UNOP::new(class, type, flags, sv_first) | |
$self->first->sibling->sv( $field_num ); | |
$self->first->sibling->flags( $self->first->sibling->flags ); | |
# see what we came up with | |
#warn "post op tree hackery:\n"; | |
# concise_cv('basic', $curcv->object_2svref, ); # dump the opcode tree of this code value | |
#warn "/post op tree hackery\n"; | |
#warn "code post hackery:\n"; | |
# my $dp = B::Deparse->new; | |
# $dp->{curcv} = $curcv; | |
# warn $dp->deparse($self, 0), "\n"; | |
#warn "/code post hackery\n"; | |
not_instance_data: | |
# | |
# | |
# | |
# recognize constructs of the form bless { }, $package | |
# recognize constructs of the form bless { foo => 1, }, $package | |
# 9 <@> bless sK/2 ->a | |
# - <0> ex-pushmark s ->6 | |
# 7 <@> anonhash sK* ->8 <-- change to anonlist | |
# 6 <0> pushmark s ->7 <-- XXX remove the keys from the key/value pairs and use them to learn fields | |
# 8 <0> padsv[$package:1,2] s ->9 | |
# a <@> bless vK/2 ->b | |
# - <0> ex-pushmark s ->5 | |
# 8 <@> anonhash sK*/1 ->9 | |
# 5 <0> pushmark s ->6 | |
# 6 <$> const(PV "foo") s/BARE ->7 | |
# 7 <$> const(IV 1) s ->8 | |
# 9 <0> padsv[$package:1,2] s ->a | |
goto not_bless unless $self->name eq 'bless'; | |
goto not_bless unless $self->first->sibling->name eq 'anonhash'; | |
goto not_bless unless $self->first->sibling->sibling->name eq 'padsv'; | |
goto not_bless unless grep $_ eq (($curcv->PADLIST->ARRAY)[0]->ARRAY)[ $self->first->sibling->sibling->targ ]->PVX, '$package', '$class'; | |
#warn "pre op tree hackery:\n"; | |
# concise_cv('basic', $curcv->object_2svref, ); # dump the opcode tree of this code value | |
#warn "/pre op tree hackery\n"; | |
# found it; start changing stuff around | |
$self->first->sibling->type( B::LISTOP->new('anonlist', 0, 0, 0)->type ); # B::LISTOP::new(class, type, flags, sv_first, sv_last) | |
do { | |
# take the keys out of the list of data that initialized the anonhash | |
# XXX if blessing happens elsewhere than new() and we already have numbers for fields, we will have to re-order the stuff in the list | |
my $pushmark = $self->first->sibling->first; | |
$pushmark->name eq 'pushmark' or die; | |
my $list_item = $pushmark->sibling; | |
my @initialization_list; | |
while( $list_item and $$list_item ) { | |
die unless $list_item->name eq 'const'; | |
my $field_num = field_number( $list_item->sv->PVX, $package ); | |
warn "constant in bless anonhash: " . $list_item->sv->PVX . " field_num: $field_num"; | |
$list_item = $list_item->sibling; # skip to the value | |
$list_item and $$list_item or die; # make sure there is a value | |
push @initialization_list, [ $field_num, $list_item ]; | |
# warn "setting next from " . deparse($last_list_item) . " to " . deparse($list_item); | |
# $last_list_item->next( $list_item ); # splice out the key | |
# $last_list_item->sibling( $list_item ); # splice out the key | |
# $last_list_item = $list_item; | |
$list_item = $list_item->sibling; # skip to the next key, if there is one | |
} | |
if( @initialization_list ) { | |
# re-order values in the init list so that they're in the right slots to be used to initialize use as an array | |
restart_sort_and_fill_in_initialization_list_gaps: | |
@initialization_list = sort { $a->[0] <=> $b->[0] } @initialization_list; | |
# insert undef for any skipped numeric positions | |
for( my $i = 0; $i < @initialization_list; $i++ ) { | |
if( $initialization_list[$i][0] != $i ) { | |
# warn "inserting a new undef const in position $i"; | |
my $const = B::SVOP->new('const', 0, 0 ); # B::SVOP::new(class, type, flags, sv) | |
# $const->sv( B::sv_undef ); | |
$const->sv( undef ); | |
push @initialization_list, [ $i, $const, ]; | |
goto restart_sort_and_fill_in_initialization_list_gaps; | |
} | |
} | |
# warn Data::Dumper::Dumper \@initialization_list; | |
# relink the initialization list of const ops | |
my $last_list_item = $pushmark; | |
for( my $i = 0; $i < @initialization_list; $i++ ) { | |
my $cur_list_item = $initialization_list[$i][1]; | |
# warn "next value in list: " . ${ $cur_list_item->sv->object_2svref }; | |
$last_list_item->sibling( $cur_list_item ); | |
$last_list_item->next( $cur_list_item ); | |
$last_list_item = $cur_list_item; | |
} | |
$last_list_item->sibling( 0 ); | |
$last_list_item->next( $self->first->sibling ); # that's the 'anonhash', now 'anonlist' | |
} | |
}; | |
#warn "post op tree hackery:\n"; | |
# concise_cv('basic', $curcv->object_2svref, ); # dump the opcode tree of this code value | |
#warn "/post op tree hackery\n"; | |
#warn "code post hackery:\n"; | |
# my $dp = B::Deparse->new; | |
# $dp->{curcv} = $curcv; | |
# warn $dp->deparse($self, 0), "\n"; | |
#warn "/code post hackery\n"; | |
not_bless: | |
return 0; | |
} | |
sub deparse { | |
my $op = shift; | |
my $dp = B::Deparse->new; | |
$dp->{curcv} = $curcv; | |
return $dp->deparse($op, 0); | |
} | |
sub field_number { | |
my $name = shift; # name of some $self->{...} subscript | |
my $package = shift or die; | |
# if that field in that package already has a number, return it | |
return $fields->{$package}->{$name} if exists $fields->{$package}->{$name}; | |
# figure out what the next unused number is and assign it and return that | |
my $max_observed = -1; | |
for my $k ( %{ $fields->{$package} } ) { | |
$max_observed = $fields->{$package}->{$k} if defined $fields->{$package}->{$k} and $fields->{$package}->{$k} > $max_observed; | |
} | |
$fields->{$package}->{$name} = $max_observed + 1; | |
return $fields->{$package}->{$name}; | |
} | |
1; | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package foo; | |
use strict; | |
use warnings; | |
use lib '.'; | |
use arrays2; | |
sub new { | |
my $package = shift; | |
# bless [ ], $package; # XXX translate bless { } | |
bless { foo => 1, bar => 2, }, $package; # XXX translate bless { } | |
} | |
sub foo { | |
my $self = shift; | |
$self->{foo} = shift if @_; | |
return $self->{foo}; | |
} | |
sub bar { | |
my $self = shift; | |
$self->{bar} = shift if @_; | |
return $self->{bar}; | |
} | |
package main; | |
my $foo = foo->new; | |
$foo->foo(30); | |
print "foo: ", $foo->foo, "\n"; | |
$foo->foo eq '30' or die; | |
$foo->bar(15); | |
print "bar: ", $foo->foo, "\n"; | |
$foo->bar eq '15' or die; | |
print "foo: ", $foo->foo, "\n"; | |
$foo->foo eq '30' or die; | |
$foo->foo(45); | |
print "foo: ", $foo->foo, "\n"; | |
$foo->foo eq '45' or die; | |
$foo->bar eq '15' or die; | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
perl -e 'package Foo::Bar; sub yy { 1 }; package main; use Devel::Peek; my Foo::Bar $x; Dump($x);' | |
SV = NULL(0x0) at 0x805c6e8 | |
REFCNT = 1 | |
FLAGS = (PADMY) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package foo; | |
use strict; | |
use warnings; | |
use lib '.'; | |
# use arrays2; | |
sub new { | |
my $package = shift; | |
# bless [ ], $package; # XXX translate bless { } | |
bless { foo => 1, bar => 2, }, $package; # XXX translate bless { } | |
} | |
sub foo { | |
my $self = shift; | |
$self->{foo} = shift if @_; | |
return $self->{foo}; | |
} | |
sub bar { | |
my $self = shift; | |
$self->{bar} = shift if @_; | |
return $self->{bar}; | |
} | |
package main; | |
use Benchmark qw(:all) ; | |
timethis(50000, sub { | |
my $foo = foo->new; | |
$foo->foo(30); | |
# print "foo: ", $foo->foo, "\n"; | |
$foo->foo eq '30' or die; | |
$foo->bar(15); | |
# print "bar: ", $foo->foo, "\n"; | |
$foo->bar eq '15' or die; | |
# print "foo: ", $foo->foo, "\n"; | |
$foo->foo eq '30' or die; | |
$foo->foo(45); | |
# print "foo: ", $foo->foo, "\n"; | |
$foo->foo eq '45' or die; | |
$foo->bar eq '15' or die; | |
}); | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package foo; | |
use strict; | |
use warnings; | |
use lib '.'; | |
use arrays2; | |
sub new { | |
my $package = shift; | |
# bless [ ], $package; # XXX translate bless { } | |
bless { foo => 33, bar => 2, }, $package; # XXX translate bless { } | |
} | |
sub foo { | |
my $self = shift; | |
$self->{foo} = shift if @_; | |
return $self->{foo}; | |
} | |
sub bar { | |
my $self = shift; | |
$self->{bar} = shift if @_; | |
return $self->{bar}; | |
} | |
package quux; | |
use base 'foo'; | |
use arrays2; | |
sub baz { | |
my $self = shift; | |
# $self->{ int rand 10 } = int rand 20; # this will generate an error | |
} | |
sub quux { | |
my $self = shift; | |
$self->{quux} = shift if @_; | |
$self->{foo}++; | |
warn "foo is now $self->{foo}"; | |
return $self->{quux}; | |
} | |
package main; | |
my $foo = foo->new; | |
$foo->foo(30); | |
# print "foo: ", $foo->foo, "\n"; | |
$foo->foo eq '30' or die; | |
$foo->bar(15); | |
# print "bar: ", $foo->foo, "\n"; | |
$foo->bar eq '15' or die; | |
# print "foo: ", $foo->foo, "\n"; | |
$foo->foo eq '30' or die; | |
$foo->foo(45); | |
# print "foo: ", $foo->foo, "\n"; | |
$foo->foo eq '45' or die; | |
$foo->bar eq '15' or die; | |
my $quux = quux->new; | |
$quux->quux(50); | |
$quux->quux eq '50' or die; | |
warn $quux->foo; | |
$quux->foo eq '35' or die; # incremented twice from 33 to 35 because quux() was called twice | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment