Skip to content

Instantly share code, notes, and snippets.

@niner
Last active July 8, 2020 15:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save niner/63a718023aba72e0dffc39c1ccd84e32 to your computer and use it in GitHub Desktop.
Save niner/63a718023aba72e0dffc39c1ccd84e32 to your computer and use it in GitHub Desktop.
use nqp;
class ByteReader {
has blob8 $.buf;
has uint64 $.offset is rw;
method read(\t) {
if t =:= uint32 {
my $res = $!buf.read-uint32($!offset);
$!offset += 4;
$res
}
elsif t =:= uint16 {
my $res = $!buf.read-uint16($!offset);
$!offset += 2;
$res
}
else {
die "unsupported type: {t.^name}";
}
}
method read-struct(\t) {
my $obj = t.new;
for t.^attributes -> $attr {
$attr.set_value($obj, self.read($attr.type));
}
$obj
}
method dump-struct($obj) {
my $pos = 0;
for $obj.^attributes -> $attr {
say "$pos.base(16) $attr.name(): $attr.get_value($obj)";
my $size = $attr.type =:= uint32 ?? 4 !! $attr.type =:= uint16 ?? 2 !! die "unsupported type";
$pos += $size;
}
$obj
}
}
class MoarVM::Bytecode {
has $!mbc;
has $.string-heap;
has $!callsites;
has $!annotations;
has $!compunit;
has @!frames;
has $!bytecode;
has @!sc_handle_idxs;
has @!extop_name_idxs;
my class Header {
has uint32 $.version;
has uint32 $.sc-deps-offset;
has uint32 $.sc-deps-elems;
has uint32 $.extops-offset;
has uint32 $.extops-elems;
has uint32 $.frames-offset;
has uint32 $.frames-elems;
has uint32 $.callsites-offset;
has uint32 $.callsites-elems;
has uint32 $.strings-offset;
has uint32 $.strings-elems;
has uint32 $.sc-offset;
has uint32 $.sc-bytes;
has uint32 $.bytecode-offset;
has uint32 $.bytecode-bytes;
has uint32 $.annotations-offset;
has uint32 $.annotations-size;
has uint32 $.hll-idx;
has uint32 $.mainline-frame-idx;
has uint32 $.main-frame-idx;
has uint32 $.load-frame-idx;
has uint32 $.deserialize-frame-idx;
}
my class Frame {
has uint32 $.bytecode-offset;
has uint32 $.bytecode-length;
has uint32 $.local-types;
has uint32 $.lexicals;
has uint32 $.cuuid-idx;
has uint32 $.name-idx;
has uint16 $.outer-idx;
has uint32 $.annotations-offset;
has uint32 $.num-annotations;
has uint32 $.num-handlers;
has uint16 $.flags;
has uint16 $.num-static-lex-values;
has uint32 $.code-obj-sc-dep-idx;
has uint32 $.code-obj-sc-idx;
has uint32 $.debug-map;
}
my class Handler {
has uint32 $.start-offset;
has uint32 $.end-offset;
has uint32 $.category-mask;
has uint16 $.action;
has uint16 $.local;
has uint32 $.label;
}
sub align_section($size) {
my uint32 $aligned = nqp::ceil_n($size / 8) * 8;
$aligned
}
method BUILD(:$!mbc) { }
method from-file($file-name) {
my $file = $file-name.IO.open;
my $offset = 0;
$offset = $file.tell while ($_ = $file.get) ne 'MOARVM';
note $offset;
my $bin = $file-name.IO.open(:bin);
$bin.seek($offset);
self.new(:mbc($bin.slurp-rest(:bin)));
}
method read-string-heap($reader, $offset, $elems) {
my @strings;
$reader.offset = $offset;
for ^$elems {
my $bytes = $reader.read(uint32);
my $utf8 = $bytes +& 1;
$bytes +>= 1;
@strings.push: $!mbc.subbuf($reader.offset, $bytes).decode($utf8 ?? 'utf8' !! 'latin-1');
my int $pad = 4 - $bytes % 4;
$pad = 0 if $pad == 4;
$reader.offset += $bytes + $pad;
}
@strings;
}
method read-header() {
# 92 bytes
my $offset = 0;
my $magic = $!mbc.subbuf($offset, 8).decode;
die "Could not recognize header marker $magic" if $magic ne "MOARVM\r\n";
$offset += 8;
my $reader = ByteReader.new(:buf($!mbc), :$offset);
my $header = $reader.read-struct(Header);
my $version = $header.version;
die "Unknown version $version" if $version != 7;
say 'mainline-frame: ' ~ $header.mainline-frame-idx;
say 'sc-deps: ' ~ $header.sc-deps-offset.base(16);
say 'extops: ' ~ $header.extops-offset.base(16);
say 'frames: ' ~ $header.frames-offset.base(16);
say 'callsites: ' ~ $header.callsites-offset.base(16);
say 'strings: ' ~ $header.strings-offset.base(16);
say 'sc: ' ~ $header.sc-offset.base(16);
say 'bytecode: ' ~ $header.bytecode-offset.base(16);
say 'annotations: ' ~ $header.annotations-offset.base(16);
$!string-heap = my @strings = self.read-string-heap($reader, $header.strings-offset, $header.strings-elems);
$reader.offset = $header.frames-offset;
for ^$header.frames-elems {
my $start = $reader.offset;
my $frame = $reader.read-struct(Frame);
note "$start.base(16): @strings[$frame.name-idx] (cuid: @strings[$frame.cuuid-idx]), bytecode at {($frame.bytecode-offset() + $header.bytecode-offset).base(16)}";
$reader.offset += $frame.local-types * 2;
#for $frame.local-types {
# my $type = $reader.read(uint16);
# my $name = $reader.read(uint32);
# say @strings[$name];
#}
$reader.offset += $frame.lexicals * 6;
for ^$frame.num-handlers {
my $handler = $reader.read-struct(Handler);
if $handler.category-mask +& 4096 {
$reader.read(uint16);
}
}
$reader.offset += $frame.num-static-lex-values * 12;
note(" debug map at $reader.offset().base(16)");
$reader.offset += $frame.debug-map * 6;
}
$header;
}
}
my $bytecode = MoarVM::Bytecode.from-file(@*ARGS[0]);
ByteReader.dump-struct($bytecode.read-header);
for $bytecode.string-heap {
say $_
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment