Last active
August 28, 2018 14:16
-
-
Save tomsmeding/031905a866edbe60152ab99fb03b32fe to your computer and use it in GitHub Desktop.
Hotpatcher for my Art Attack WASM bots. (https://codegolf.stackexchange.com/a/171243/6689)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <cstdlib> | |
#include "hotpatcher.h" | |
using namespace std; | |
void hotpatch(Wasm &wasm) { | |
using InstrTables::Op; | |
static const int num_mem_pages = 15; | |
uint32_t sp_index = 0; | |
for (size_t i = 0; i < wasm.importSection.imports.size(); i++) { | |
auto &info = wasm.importSection.imports[i]; | |
if (info.kind == Wasm::Kind::Global) { | |
if (info.mod_name == "env" && info.export_name == "__stack_pointer") { | |
wasm.importSection.imports.erase(wasm.importSection.imports.begin() + i); | |
cout << "Removed __stack_pointer import" << endl; | |
break; | |
} | |
sp_index++; | |
} | |
if (info.kind == Wasm::Kind::Memory) { | |
cout << "Changed memory import limit from " << info.limit << " to "; | |
info.limit.minimum = info.limit.maximum = num_mem_pages; | |
cout << info.limit << endl; | |
} | |
} | |
// Wasm::TypeSection::Info typeInfo; | |
// typeInfo.params.push_back(Wasm::Type::I32); | |
// uint32_t print_int_type_index = wasm.typeSection.types.size(); | |
// wasm.typeSection.types.push_back(move(typeInfo)); | |
// cout << "Added type: void(int) at index=" << print_int_type_index << endl; | |
// Wasm::ImportSection::Info importInfo; | |
// importInfo.kind = Wasm::Kind::Function; | |
// importInfo.mod_name = "js"; | |
// importInfo.export_name = "print_int"; | |
// importInfo.index = print_int_type_index; | |
// wasm.importSection.imports.push_back(move(importInfo)); | |
// cout << "Added import: Function void js.print_int(int)" << endl; | |
bool have_get_global = false; | |
for (auto &body : wasm.codeSection.bodies) { | |
for (auto &instr : body.instrs) { | |
if (instr.op == Op::Get_global && instr.arg32_1 == sp_index) { | |
if (have_get_global) { | |
cout << "Multiple get_global's of the stack pointer found, bailing" << endl; | |
exit(1); | |
} | |
// cout << "get_global " << sp_index << " found" << endl; | |
instr.op = Op::I32_const; | |
instr.arg32_1 = num_mem_pages * 65536 - 1000; | |
cout << "'get_global __stack_pointer' patched to 'i32_const " << instr.arg32_1 << "'" << endl; | |
have_get_global = true; | |
} else if (instr.op == Op::Get_global && instr.arg32_1 > sp_index) { | |
instr.arg32_1--; | |
} else if (instr.op == Op::Set_global && instr.arg32_1 == sp_index) { | |
instr.op = Op::Drop; | |
cout << "'set_global __stack_pointer' patched to 'drop'" << endl; | |
} else if (instr.op == Op::Set_global && instr.arg32_1 > sp_index) { | |
instr.arg32_1--; | |
} | |
} | |
} | |
// for (auto &segment : wasm.dataSection.segments) { | |
// cout << "data segment: size=" << segment.data.size() << endl; | |
// } | |
Wasm::ExportSection::Info exportInfo; | |
exportInfo.name = "entry_fn"; | |
exportInfo.kind = Wasm::Kind::Function; | |
exportInfo.index = 2; | |
wasm.exportSection.exports.push_back(move(exportInfo)); | |
cout << "Added export: Function <entry_fn> index=2" << endl; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
#include "wasm.h" | |
using namespace std; | |
void hotpatch(Wasm &wasm); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "instrtables.h" | |
using namespace InstrTables; | |
unordered_map<uint8_t, InstrInfo> InstrTables::byte_map = { | |
#define X(name, byt, ...) {byt, {byt, Op::name, __VA_ARGS__}}, | |
INSTRTABLES_OP_XLIST | |
#undef X | |
}; | |
unordered_map<Op, InstrInfo> InstrTables::op_map = { | |
#define X(name, byt, ...) {Op::name, {byt, Op::name, __VA_ARGS__}}, | |
INSTRTABLES_OP_XLIST | |
#undef X | |
}; | |
#define STR_(x) #x | |
#define STR(x) STR_(x) | |
ostream& operator<<(ostream &os, InstrTables::Op op) { | |
switch(op) { | |
#define X(name, byt, ...) case Op::name: return os << STR(name); | |
INSTRTABLES_OP_XLIST | |
#undef X | |
default: | |
return os << "???"; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
#include <iostream> | |
#include <vector> | |
#include <unordered_map> | |
#include <cstdint> | |
using namespace std; | |
#define INSTRTABLES_OP_XLIST \ | |
X(Unreachable, 0x00, {}) \ | |
X(Nop, 0x01, {}) \ | |
X(Block, 0x02, {Part::TYPE}) /* type */ \ | |
X(Loop, 0x03, {Part::TYPE}) /* type */ \ | |
X(If, 0x04, {Part::TYPE}) /* type */ \ | |
X(Else, 0x05, {}) \ | |
X(End, 0x0B, {}) \ | |
X(Br, 0x0C, {Part::VU32}) /* index */ \ | |
X(Br_if, 0x0D, {Part::VU32}) /* index */ \ | |
X(Br_table, 0x0E, {Part::VEC_VU32}) /* vec(index) */ \ | |
X(Return, 0x0F, {}) \ | |
X(Call, 0x10, {Part::VU32}) /* index */ \ | |
X(Call_indirect, 0x11, {Part::VU32, Part::ZEROBYTE}) /* index 0x00 */ \ | |
X(Drop, 0x1A, {}) \ | |
X(Select, 0x1B, {}) \ | |
X(Get_local, 0x20, {Part::VU32}) /* index */ \ | |
X(Set_local, 0x21, {Part::VU32}) /* index */ \ | |
X(Tee_local, 0x22, {Part::VU32}) /* index */ \ | |
X(Get_global, 0x23, {Part::VU32}) /* index */ \ | |
X(Set_global, 0x24, {Part::VU32}) /* index */ \ | |
X(I32_load, 0x28, {Part::VU32, Part::VU32}) /* align offset */ \ | |
X(I64_load, 0x29, {Part::VU32, Part::VU32}) /* align offset */ \ | |
X(F32_load, 0x2A, {Part::VU32, Part::VU32}) /* align offset */ \ | |
X(F64_load, 0x2B, {Part::VU32, Part::VU32}) /* align offset */ \ | |
X(I32_load8_s, 0x2C, {Part::VU32, Part::VU32}) /* align offset */ \ | |
X(I32_load8_u, 0x2D, {Part::VU32, Part::VU32}) /* align offset */ \ | |
X(I32_load16_s, 0x2E, {Part::VU32, Part::VU32}) /* align offset */ \ | |
X(I32_load16_u, 0x2F, {Part::VU32, Part::VU32}) /* align offset */ \ | |
X(I64_load8_s, 0x30, {Part::VU32, Part::VU32}) /* align offset */ \ | |
X(I64_load8_u, 0x31, {Part::VU32, Part::VU32}) /* align offset */ \ | |
X(I64_load16_s, 0x32, {Part::VU32, Part::VU32}) /* align offset */ \ | |
X(I64_load16_u, 0x33, {Part::VU32, Part::VU32}) /* align offset */ \ | |
X(I64_load32_s, 0x34, {Part::VU32, Part::VU32}) /* align offset */ \ | |
X(I64_load32_u, 0x35, {Part::VU32, Part::VU32}) /* align offset */ \ | |
X(I32_store, 0x36, {Part::VU32, Part::VU32}) /* align offset */ \ | |
X(I64_store, 0x37, {Part::VU32, Part::VU32}) /* align offset */ \ | |
X(F32_store, 0x38, {Part::VU32, Part::VU32}) /* align offset */ \ | |
X(F64_store, 0x39, {Part::VU32, Part::VU32}) /* align offset */ \ | |
X(I32_store8, 0x3A, {Part::VU32, Part::VU32}) /* align offset */ \ | |
X(I32_store16, 0x3B, {Part::VU32, Part::VU32}) /* align offset */ \ | |
X(I64_store8, 0x3C, {Part::VU32, Part::VU32}) /* align offset */ \ | |
X(I64_store16, 0x3D, {Part::VU32, Part::VU32}) /* align offset */ \ | |
X(I64_store32, 0x3E, {Part::VU32, Part::VU32}) /* align offset */ \ | |
X(Memory_size, 0x3F, {Part::ZEROBYTE}) /* 0x00 */ \ | |
X(Memory_grow, 0x40, {Part::ZEROBYTE}) /* 0x00 */ \ | |
X(I32_const, 0x41, {Part::VI32}) /* n */ \ | |
X(I64_const, 0x42, {Part::VI64}) /* n[64] */ \ | |
X(F32_const, 0x43, {Part::F32}) /* z[f32] */ \ | |
X(F64_const, 0x44, {Part::F64}) /* z[f64] */ \ | |
X(I32_eqz, 0x45, {}) \ | |
X(I32_eq, 0x46, {}) \ | |
X(I32_ne, 0x47, {}) \ | |
X(I32_lt_s, 0x48, {}) \ | |
X(I32_lt_u, 0x49, {}) \ | |
X(I32_gt_s, 0x4A, {}) \ | |
X(I32_gt_u, 0x4B, {}) \ | |
X(I32_le_s, 0x4C, {}) \ | |
X(I32_le_u, 0x4D, {}) \ | |
X(I32_ge_s, 0x4E, {}) \ | |
X(I32_ge_u, 0x4F, {}) \ | |
X(I64_eqz, 0x50, {}) \ | |
X(I64_eq, 0x51, {}) \ | |
X(I64_ne, 0x52, {}) \ | |
X(I64_lt_s, 0x53, {}) \ | |
X(I64_lt_u, 0x54, {}) \ | |
X(I64_gt_s, 0x55, {}) \ | |
X(I64_gt_u, 0x56, {}) \ | |
X(I64_le_s, 0x57, {}) \ | |
X(I64_le_u, 0x58, {}) \ | |
X(I64_ge_s, 0x59, {}) \ | |
X(I64_ge_u, 0x5A, {}) \ | |
X(F32_eq, 0x5B, {}) \ | |
X(F32_ne, 0x5C, {}) \ | |
X(F32_lt, 0x5D, {}) \ | |
X(F32_gt, 0x5E, {}) \ | |
X(F32_le, 0x5F, {}) \ | |
X(F32_ge, 0x60, {}) \ | |
X(F64_eq, 0x61, {}) \ | |
X(F64_ne, 0x62, {}) \ | |
X(F64_lt, 0x63, {}) \ | |
X(F64_gt, 0x64, {}) \ | |
X(F64_le, 0x65, {}) \ | |
X(F64_ge, 0x66, {}) \ | |
X(I32_clz, 0x67, {}) \ | |
X(I32_ctz, 0x68, {}) \ | |
X(I32_popcnt, 0x69, {}) \ | |
X(I32_add, 0x6A, {}) \ | |
X(I32_sub, 0x6B, {}) \ | |
X(I32_mul, 0x6C, {}) \ | |
X(I32_div_s, 0x6D, {}) \ | |
X(I32_div_u, 0x6E, {}) \ | |
X(I32_rem_s, 0x6F, {}) \ | |
X(I32_rem_u, 0x70, {}) \ | |
X(I32_and, 0x71, {}) \ | |
X(I32_or, 0x72, {}) \ | |
X(I32_xor, 0x73, {}) \ | |
X(I32_shl, 0x74, {}) \ | |
X(I32_shr_s, 0x75, {}) \ | |
X(I32_shr_u, 0x76, {}) \ | |
X(I32_rotl, 0x77, {}) \ | |
X(I32_rotr, 0x78, {}) \ | |
X(I64_clz, 0x79, {}) \ | |
X(I64_ctz, 0x7A, {}) \ | |
X(I64_popcnt, 0x7B, {}) \ | |
X(I64_add, 0x7C, {}) \ | |
X(I64_sub, 0x7D, {}) \ | |
X(I64_mul, 0x7E, {}) \ | |
X(I64_div_s, 0x7F, {}) \ | |
X(I64_div_u, 0x80, {}) \ | |
X(I64_rem_s, 0x81, {}) \ | |
X(I64_rem_u, 0x82, {}) \ | |
X(I64_and, 0x83, {}) \ | |
X(I64_or, 0x84, {}) \ | |
X(I64_xor, 0x85, {}) \ | |
X(I64_shl, 0x86, {}) \ | |
X(I64_shr_s, 0x87, {}) \ | |
X(I64_shr_u, 0x88, {}) \ | |
X(I64_rotl, 0x89, {}) \ | |
X(I64_rotr, 0x8A, {}) \ | |
X(F32_abs, 0x8B, {}) \ | |
X(F32_neg, 0x8C, {}) \ | |
X(F32_ceil, 0x8D, {}) \ | |
X(F32_floor, 0x8E, {}) \ | |
X(F32_trunc, 0x8F, {}) \ | |
X(F32_nearest, 0x90, {}) \ | |
X(F32_sqrt, 0x91, {}) \ | |
X(F32_add, 0x92, {}) \ | |
X(F32_sub, 0x93, {}) \ | |
X(F32_mul, 0x94, {}) \ | |
X(F32_div, 0x95, {}) \ | |
X(F32_min, 0x96, {}) \ | |
X(F32_max, 0x97, {}) \ | |
X(F32_copysign, 0x98, {}) \ | |
X(F64_abs, 0x99, {}) \ | |
X(F64_neg, 0x9A, {}) \ | |
X(F64_ceil, 0x9B, {}) \ | |
X(F64_floor, 0x9C, {}) \ | |
X(F64_trunc, 0x9D, {}) \ | |
X(F64_nearest, 0x9E, {}) \ | |
X(F64_sqrt, 0x9F, {}) \ | |
X(F64_add, 0xA0, {}) \ | |
X(F64_sub, 0xA1, {}) \ | |
X(F64_mul, 0xA2, {}) \ | |
X(F64_div, 0xA3, {}) \ | |
X(F64_min, 0xA4, {}) \ | |
X(F64_max, 0xA5, {}) \ | |
X(F64_copysign, 0xA6, {}) \ | |
X(I32_wrap_i64, 0xA7, {}) \ | |
X(I32_trunc_s_f32, 0xA8, {}) \ | |
X(I32_trunc_u_f32, 0xA9, {}) \ | |
X(I32_trunc_s_f64, 0xAA, {}) \ | |
X(I32_trunc_u_f64, 0xAB, {}) \ | |
X(I64_extend_s_i32, 0xAC, {}) \ | |
X(I64_extend_u_i32, 0xAD, {}) \ | |
X(I64_trunc_s_f32, 0xAE, {}) \ | |
X(I64_trunc_u_f32, 0xAF, {}) \ | |
X(I64_trunc_s_f64, 0xB0, {}) \ | |
X(I64_trunc_u_f64, 0xB1, {}) \ | |
X(F32_convert_s_i32, 0xB2, {}) \ | |
X(F32_convert_u_i32, 0xB3, {}) \ | |
X(F32_convert_s_i64, 0xB4, {}) \ | |
X(F32_convert_u_i64, 0xB5, {}) \ | |
X(F32_demote_f64, 0xB6, {}) \ | |
X(F64_convert_s_i32, 0xB7, {}) \ | |
X(F64_convert_u_i32, 0xB8, {}) \ | |
X(F64_convert_s_i64, 0xB9, {}) \ | |
X(F64_convert_u_i64, 0xBA, {}) \ | |
X(F64_promote_f32, 0xBB, {}) \ | |
X(I32_reinterpret_f32, 0xBC, {}) \ | |
X(I64_reinterpret_f64, 0xBD, {}) \ | |
X(F32_reinterpret_i32, 0xBE, {}) \ | |
X(F64_reinterpret_i64, 0xBF, {}) | |
namespace InstrTables { | |
enum class Op { | |
#define X(name, byt, ...) name, | |
INSTRTABLES_OP_XLIST | |
#undef X | |
}; | |
// The parts in an instruction after the opcode | |
enum class Part { | |
VU32, | |
VU64, | |
VI32, | |
VI64, | |
VEC_VU32, | |
F32, | |
F64, | |
TYPE, | |
ZEROBYTE, | |
}; | |
struct InstrInfo { | |
uint8_t byte; | |
Op op; | |
vector<Part> parts; | |
}; | |
extern unordered_map<uint8_t, InstrInfo> byte_map; | |
extern unordered_map<Op, InstrInfo> op_map; | |
} // namespace | |
ostream& operator<<(ostream &os, InstrTables::Op op); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <fstream> | |
#include <string> | |
#include <vector> | |
#include "wasm.h" | |
#include "hotpatcher.h" | |
using namespace std; | |
int main(int argc, char **argv) { | |
if (argc != 3) { | |
cerr << "Usage: " << argv[0] << " <input.wasm> <output.wasm>" << endl; | |
return 1; | |
} | |
ifstream file(argv[1]); | |
if (!file) { | |
cerr << "Input file cannot be opened" << endl; | |
return 1; | |
} | |
string contents; | |
while (true) { | |
string block(1024, '\0'); | |
file.read(&block[0], block.size()); | |
size_t nr = file.gcount(); | |
contents += block.substr(0, nr); | |
if (!file) break; | |
} | |
file.close(); | |
Wasm wasm(contents); | |
hotpatch(wasm); | |
ofstream out(argv[2]); | |
if (!out) { | |
cerr << "Output file cannot be opened" << endl; | |
return 1; | |
} | |
wasm.write(out); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CXX = g++ | |
CXXFLAGS = -Wall -Wextra -std=c++11 -O2 -fwrapv -g | |
TARGET = hotpatcher | |
.PHONY: all clean | |
all: $(TARGET) | |
clean: | |
rm -r $(TARGET) *.o | |
$(TARGET): $(patsubst %.cpp,%.o,$(wildcard *.cpp)) | |
$(CXX) -o $@ $^ | |
%.o: %.cpp $(wildcard *.h) | |
$(CXX) $(CXXFLAGS) -c -o $@ $< |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <stdexcept> | |
#include "reader.h" | |
using namespace std; | |
Reader::Reader(const string &source) | |
: source(source) {} | |
string Reader::read(size_t length) { | |
if (cursor + length > source.size()) throw runtime_error("EOF"); | |
size_t start = cursor; | |
cursor += length; | |
return source.substr(start, length); | |
} | |
uint8_t Reader::get() { | |
if (cursor >= source.size()) throw runtime_error("EOF"); | |
else return source[cursor++]; | |
} | |
uint32_t Reader::readu32() { | |
uint32_t res = 0; | |
res |= get(); | |
res |= (uint32_t)get() << 8; | |
res |= (uint32_t)get() << 16; | |
res |= (uint32_t)get() << 24; | |
return res; | |
} | |
uint64_t Reader::readu64() { | |
uint64_t res = 0; | |
res |= get(); | |
res |= (uint64_t)get() << 8; | |
res |= (uint64_t)get() << 16; | |
res |= (uint64_t)get() << 24; | |
res |= (uint64_t)get() << 32; | |
res |= (uint64_t)get() << 40; | |
res |= (uint64_t)get() << 48; | |
res |= (uint64_t)get() << 56; | |
return res; | |
} | |
uint32_t Reader::readvaru32() { | |
return readvaru64(); | |
} | |
uint64_t Reader::readvaru64() { | |
uint64_t res = 0; | |
int shift = 0; | |
while (true) { | |
uint8_t b = get(); | |
res |= (b & 0x7f) << shift; | |
shift += 7; | |
if ((b & 0x80) == 0) break; | |
} | |
return res; | |
} | |
int32_t Reader::readvari32() { | |
return readvari64(); | |
} | |
int64_t Reader::readvari64() { | |
uint64_t res = 0; | |
int shift = 0; | |
uint8_t b; | |
while (true) { | |
b = get(); | |
res |= (b & 0x7f) << shift; | |
shift += 7; | |
if ((b & 0x80) == 0) break; | |
} | |
if (shift < 64 && (b & 0x40) != 0) { | |
res |= (uint64_t)-1LL << shift; | |
} | |
return res; | |
} | |
string Reader::readbytearray() { | |
uint32_t num = readvaru32(); | |
// cerr << "readbytearray: num=" << num << endl; | |
return read(num); | |
} | |
void Reader::skip(size_t length) { | |
if (cursor < source.size()) cursor += length; | |
} | |
size_t Reader::togo() const { | |
if (cursor >= source.size()) return 0; | |
return source.size() - cursor; | |
} | |
size_t Reader::offset() const { | |
if (cursor >= source.size()) return source.size(); | |
return cursor; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
#include <string> | |
#include <cstdint> | |
using namespace std; | |
class Reader { | |
const string &source; | |
size_t cursor = 0; | |
public: | |
Reader(const string &source); | |
string read(size_t length); | |
uint8_t get(); | |
uint32_t readu32(); | |
uint64_t readu64(); | |
uint32_t readvaru32(); | |
uint64_t readvaru64(); | |
int32_t readvari32(); | |
int64_t readvari64(); | |
string readbytearray(); | |
void skip(size_t length); | |
size_t togo() const; | |
size_t offset() const; | |
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iomanip> | |
#include <sstream> | |
#include <unordered_map> | |
#include <stdexcept> | |
#include <cassert> | |
#include "wasm.h" | |
#include "reader.h" | |
#include "writer.h" | |
#include "instrtables.h" | |
using namespace std; | |
ostream& operator<<(ostream &os, Wasm::Sect sect) { | |
switch (sect) { | |
case Wasm::Sect::Custom: return os << "(custom)"; | |
case Wasm::Sect::Type: return os << "type"; | |
case Wasm::Sect::Import: return os << "import"; | |
case Wasm::Sect::Function: return os << "function"; | |
case Wasm::Sect::Table: return os << "table"; | |
case Wasm::Sect::Linear_memory: return os << "linear_memory"; | |
case Wasm::Sect::Global: return os << "global"; | |
case Wasm::Sect::Export: return os << "export"; | |
case Wasm::Sect::Start: return os << "start"; | |
case Wasm::Sect::Element: return os << "element"; | |
case Wasm::Sect::Code: return os << "code"; | |
case Wasm::Sect::Data: return os << "data"; | |
default: return os << "???"; | |
} | |
} | |
ostream& operator<<(ostream &os, Wasm::Kind kind) { | |
switch (kind) { | |
case Wasm::Kind::Function: return os << "function"; | |
case Wasm::Kind::Table: return os << "table"; | |
case Wasm::Kind::Memory: return os << "memory"; | |
case Wasm::Kind::Global: return os << "global"; | |
default: return os << "???"; | |
} | |
} | |
ostream& operator<<(ostream &os, Wasm::Type type) { | |
switch (type) { | |
case Wasm::Type::I32: return os << "i32"; | |
case Wasm::Type::I64: return os << "i64"; | |
case Wasm::Type::F32: return os << "f32"; | |
case Wasm::Type::F64: return os << "f64"; | |
case Wasm::Type::Anyfunc: return os << "anyfunc"; | |
case Wasm::Type::Func: return os << "func"; | |
case Wasm::Type::Void: return os << "void"; | |
default: return os << "???"; | |
} | |
} | |
ostream& operator<<(ostream &os, const Wasm::Limit &limit) { | |
return os << "[" << limit.minimum << "," << limit.maximum << "]"; | |
} | |
Wasm::Instruction readInstruction(Reader &reader) { | |
using namespace InstrTables; | |
uint8_t opcode = reader.get(); | |
auto it = byte_map.find(opcode); | |
if (it == byte_map.end()) { | |
throw runtime_error("Unrecognised opcode " + to_string((unsigned int)opcode)); | |
} | |
Wasm::Instruction instr; | |
instr.op = it->second.op; | |
const vector<Part> &parts = it->second.parts; | |
bool have_arg32_1 = false; | |
for (const Part p : parts) { | |
switch (p) { | |
case Part::VU32: | |
if (have_arg32_1) instr.arg32_2 = reader.readvaru32(); | |
else instr.arg32_1 = reader.readvaru32(); | |
have_arg32_1 = true; | |
break; | |
case Part::VU64: | |
instr.arg64 = reader.readvaru64(); | |
break; | |
case Part::VI32: | |
if (have_arg32_1) instr.arg32_2 = reader.readvari32(); | |
else instr.arg32_1 = reader.readvari32(); | |
have_arg32_1 = true; | |
break; | |
case Part::VI64: | |
instr.arg64 = reader.readvari64(); | |
break; | |
case Part::VEC_VU32: { | |
uint32_t num = reader.readvaru32(); | |
instr.arg32vec.reserve(num); | |
for (uint32_t i = 0; i < num; i++) { | |
instr.arg32vec.push_back(reader.readvaru32()); | |
} | |
break; | |
} | |
case Part::F32: | |
instr.arg32_1 = reader.readu32(); | |
break; | |
case Part::F64: | |
instr.arg64 = reader.readu64(); | |
break; | |
case Part::TYPE: | |
instr.type = (Wasm::Type)reader.get(); | |
switch (instr.type) { | |
case Wasm::Type::I32: case Wasm::Type::I64: | |
case Wasm::Type::F32: case Wasm::Type::F64: | |
case Wasm::Type::Void: | |
break; | |
default: | |
throw runtime_error("Invalid type in instruction"); | |
} | |
break; | |
case Part::ZEROBYTE: | |
if (reader.get() != 0) { | |
throw runtime_error( | |
"Spec specifies zero byte in opcode " + | |
to_string((unsigned int)opcode) + | |
", but not zero in wasm"); | |
} | |
break; | |
} | |
} | |
return instr; | |
} | |
Wasm::Limit readLimit(Reader &reader) { | |
uint8_t flags = reader.get(); | |
Wasm::Limit limit; | |
limit.minimum = reader.readvaru32(); | |
if (flags & 0x1) limit.maximum = reader.readvaru32(); | |
else limit.maximum = limit.minimum; | |
return limit; | |
} | |
Wasm::Wasm(const string &source) { | |
Reader reader(source); | |
if (reader.readu32() != 0x6d736100) { | |
throw runtime_error("File magic cookie not found"); | |
} | |
if (reader.readu32() != 0x1) { | |
throw runtime_error("File version not 0x1"); | |
} | |
while (reader.togo() > 0) { | |
// cout << "offset: " << showbase << hex << reader.offset() << "; "; | |
Sect opcode = (Sect)reader.get(); | |
string sect = reader.readbytearray(); | |
// size_t offset = reader.offset() - sect.size(); | |
// cout << "Section: opcode=" << opcode << ", length=" << sect.size() << endl; | |
origSections.emplace_back(opcode, sect); | |
switch (opcode) { | |
case Sect::Type: { | |
Reader reader(sect); | |
uint32_t ntypes = reader.readvaru32(); | |
// cout << "num types = " << ntypes << endl; | |
typeSection.types.reserve(ntypes); | |
for (uint32_t i = 0; i < ntypes; i++) { | |
TypeSection::Info info; | |
Type sigtype = (Type)reader.get(); | |
if (sigtype != Type::Func) { | |
throw runtime_error("Only func types supported in type section"); | |
} | |
// cout << "Type: Function ("; | |
uint32_t nparams = reader.readvaru32(); | |
info.params.reserve(nparams); | |
for (uint32_t j = 0; j < nparams; j++) { | |
info.params.push_back((Type)reader.get()); | |
// cout << info.params.back() << ","; | |
} | |
// cout << ") -> "; | |
uint32_t nreturns = reader.readvaru32(); | |
info.returns.reserve(nreturns); | |
for (uint32_t j = 0; j < nreturns; j++) { | |
info.returns.push_back((Type)reader.get()); | |
// cout << info.returns.back() << " "; | |
} | |
// cout << endl; | |
typeSection.types.push_back(move(info)); | |
} | |
break; | |
} | |
case Sect::Import: { | |
Reader reader(sect); | |
uint32_t nimports = reader.readvaru32(); | |
// cout << "num imports = " << nimports << endl; | |
importSection.imports.reserve(nimports); | |
for (uint32_t i = 0; i < nimports; i++) { | |
ImportSection::Info info; | |
string mod_name = reader.readbytearray(); | |
string export_name = reader.readbytearray(); | |
Kind kind = (Kind)reader.get(); | |
// cout << "Import kind=" << kind << endl; | |
// cout << " mod_name=<" << mod_name << "> export_name=<" << export_name << ">" << endl; | |
info.kind = kind; | |
info.mod_name = mod_name; | |
info.export_name = export_name; | |
switch (kind) { | |
case Kind::Function: { | |
uint32_t sigindex = reader.readvaru32(); | |
// cout << " sigindex=" << sigindex << endl; | |
info.index = sigindex; | |
break; | |
} | |
case Kind::Table: { | |
Type elt_type = (Type)reader.get(); | |
assert(elt_type == Type::Anyfunc); | |
Limit limit = readLimit(reader); | |
// cout << " type=anyfunc limit=" << limit << endl; | |
info.type = elt_type; | |
info.limit = limit; | |
break; | |
} | |
case Kind::Memory: { | |
Limit limit = readLimit(reader); | |
// cout << " limit=" << limit << endl; | |
info.limit = limit; | |
break; | |
} | |
case Kind::Global: { | |
Type type = (Type)reader.get(); | |
bool mut = reader.get(); | |
// cout << " type=" << type << " mutable=" << mut << endl; | |
info.type = type; | |
info.mut = mut; | |
break; | |
} | |
} | |
importSection.imports.push_back(move(info)); | |
} | |
break; | |
} | |
case Sect::Export: { | |
Reader reader(sect); | |
uint32_t nexports = reader.readvaru32(); | |
// cout << "num exports = " << nexports << endl; | |
exportSection.exports.reserve(nexports); | |
for (uint32_t i = 0; i < nexports; i++) { | |
string name = reader.readbytearray(); | |
Kind kind = (Kind)reader.get(); | |
uint32_t index = reader.readvaru32(); | |
// cout << "Export <" << name << "> kind=" << kind << " index=" << index << endl; | |
ExportSection::Info info; | |
info.name = move(name); | |
info.kind = kind; | |
info.index = index; | |
exportSection.exports.push_back(move(info)); | |
} | |
break; | |
} | |
case Sect::Code: { | |
Reader reader(sect); | |
uint32_t nfuncbodies = reader.readvaru32(); | |
// cout << "num function bodies = " << nfuncbodies << endl; | |
codeSection.bodies.reserve(nfuncbodies); | |
for (uint32_t funci = 0; funci < nfuncbodies; funci++) { | |
// cout << "Function body " << funci << endl; | |
uint32_t bodysize = reader.readvaru32(); | |
CodeSection::Body body; | |
string contents = reader.read(bodysize); | |
Reader reader(contents); | |
uint32_t nlocvars = reader.readvaru32(); | |
// cout << " Local variables:"; | |
body.locVars.reserve(nlocvars); | |
for (uint32_t i = 0; i < nlocvars; i++) { | |
uint32_t count = reader.readvaru32(); | |
Type type = (Type)reader.get(); | |
// cout << " " << count << "*" << type; | |
body.locVars.emplace_back(count, type); | |
} | |
// cout << endl; | |
// cout << " Instructions:" << endl; | |
while (reader.togo() > 0) { | |
Instruction instr = readInstruction(reader); | |
// cout << "op = " << instr.op << endl; | |
body.instrs.push_back(move(instr)); | |
} | |
codeSection.bodies.push_back(move(body)); | |
} | |
break; | |
} | |
case Sect::Data: { | |
Reader reader(sect); | |
uint32_t ndatas = reader.readvaru32(); | |
// cout << "num data blocks = " << ndatas << endl; | |
dataSection.segments.reserve(ndatas); | |
for (uint32_t i = 0; i < ndatas; i++) { | |
uint32_t index = reader.readvaru32(); | |
Instruction offsetInstr = readInstruction(reader); | |
Instruction endInstr = readInstruction(reader); | |
if (endInstr.op != InstrTables::Op::End) { | |
throw runtime_error("Data offset initialiser is not a single instruction"); | |
} | |
string data = reader.readbytearray(); | |
uint32_t offset; | |
switch (offsetInstr.op) { | |
case InstrTables::Op::I32_const: | |
offset = offsetInstr.arg32_1; | |
break; | |
default: | |
throw runtime_error("Data offset initialiser is not i32.const; NOT IMPLEMENTED"); | |
} | |
// cout << "Data block: index=" << index << " offset=" << offset << " length=" << data.size() << endl; | |
DataSection::Segment segment; | |
segment.index = index; | |
segment.offset = offset; | |
segment.data = move(data); | |
dataSection.segments.push_back(move(segment)); | |
} | |
} | |
default: | |
// cout << "Skipped section with opcode " << opcode << endl; | |
break; | |
} | |
} | |
} | |
void writeLimit(Writer &writer, const Wasm::Limit &limit) { | |
if (limit.maximum != limit.minimum) { | |
writer.writevaru32(0x1); | |
writer.writevaru32(limit.minimum); | |
writer.writevaru32(limit.maximum); | |
} else { | |
writer.writevaru32(0x0); | |
writer.writevaru32(limit.minimum); | |
} | |
} | |
void writeInstruction(Writer &writer, const Wasm::Instruction &instr) { | |
using namespace InstrTables; | |
auto it = op_map.find(instr.op); | |
if (it == op_map.end()) { | |
throw runtime_error("Unknown opcode in in-memory wasm"); | |
} | |
uint8_t byte = it->second.byte; | |
const vector<Part> &parts = it->second.parts; | |
writer.put(byte); | |
bool have_arg32_1 = false; | |
for (const Part &part : parts) { | |
switch (part) { | |
case Part::VU32: | |
if (have_arg32_1) writer.writevaru32(instr.arg32_2); | |
else writer.writevaru32(instr.arg32_1); | |
have_arg32_1 = true; | |
break; | |
case Part::VU64: | |
writer.writevaru64(instr.arg64); | |
break; | |
case Part::VI32: | |
if (have_arg32_1) writer.writevari32(instr.arg32_2); | |
else writer.writevari32(instr.arg32_1); | |
have_arg32_1 = true; | |
break; | |
case Part::VI64: | |
writer.writevari64(instr.arg64); | |
break; | |
case Part::VEC_VU32: | |
writer.writevaru32(instr.arg32vec.size()); | |
for (uint32_t v : instr.arg32vec) { | |
writer.writevaru32(v); | |
} | |
break; | |
case Part::F32: | |
writer.writeu32(instr.arg32_1); | |
break; | |
case Part::F64: | |
writer.writeu64(instr.arg64); | |
break; | |
case Part::TYPE: | |
writer.put((uint8_t)instr.type); | |
break; | |
case Part::ZEROBYTE: | |
writer.put(0x0); | |
break; | |
} | |
} | |
} | |
void Wasm::lateInsertOrigSection(Sect sect) { | |
for (size_t i = 0; i < origSections.size(); i++) { | |
if (origSections[i].first == sect) break; | |
if (origSections[i].first > sect) { | |
origSections.emplace(origSections.begin() + i, sect, string()); | |
break; | |
} | |
} | |
} | |
void Wasm::write(ostream &os) { | |
Writer writer(os); | |
writer.writeu32(0x6d736100); | |
writer.writeu32(0x1); | |
lateInsertOrigSection(Sect::Type); | |
lateInsertOrigSection(Sect::Import); | |
lateInsertOrigSection(Sect::Export); | |
lateInsertOrigSection(Sect::Code); | |
lateInsertOrigSection(Sect::Data); | |
for (const pair<Sect, string> &origsect : origSections) { | |
// cout << "Writing section " << origsect.first << endl; | |
writer.put((uint8_t)origsect.first); | |
switch (origsect.first) { | |
case Sect::Type: { | |
ostringstream ss; | |
{ | |
Writer writer(ss); | |
writer.writevaru32(typeSection.types.size()); | |
for (const TypeSection::Info &info : typeSection.types) { | |
writer.put((uint8_t)Type::Func); | |
writer.writevaru32(info.params.size()); | |
for (Type ty : info.params) writer.put((uint8_t)ty); | |
writer.writevaru32(info.returns.size()); | |
for (Type ty : info.returns) writer.put((uint8_t)ty); | |
} | |
} | |
writer.set_longform(true); | |
writer.writebytearray(ss.str()); | |
writer.set_longform(false); | |
break; | |
} | |
case Sect::Import: { | |
ostringstream ss; | |
{ | |
Writer writer(ss); | |
writer.writevaru32(importSection.imports.size()); | |
for (const ImportSection::Info &info : importSection.imports) { | |
writer.writebytearray(info.mod_name); | |
writer.writebytearray(info.export_name); | |
writer.put((uint8_t)info.kind); | |
switch (info.kind) { | |
case Kind::Function: | |
writer.writevaru32(info.index); | |
break; | |
case Kind::Table: | |
writer.put((uint8_t)info.type); | |
writeLimit(writer, info.limit); | |
break; | |
case Kind::Memory: | |
writeLimit(writer, info.limit); | |
break; | |
case Kind::Global: | |
writer.put((uint8_t)info.type); | |
writer.put(info.mut); | |
break; | |
} | |
} | |
} | |
writer.set_longform(true); | |
writer.writebytearray(ss.str()); | |
writer.set_longform(false); | |
break; | |
} | |
case Sect::Export: { | |
ostringstream ss; | |
{ | |
Writer writer(ss); | |
writer.writevaru32(exportSection.exports.size()); | |
for (const ExportSection::Info &info : exportSection.exports) { | |
writer.writebytearray(info.name); | |
writer.put((uint8_t)info.kind); | |
writer.writevaru32(info.index); | |
} | |
} | |
writer.set_longform(true); | |
writer.writebytearray(ss.str()); | |
writer.set_longform(false); | |
break; | |
} | |
case Sect::Code: { | |
ostringstream ss; | |
{ | |
Writer writer(ss); | |
writer.writevaru32(codeSection.bodies.size()); | |
for (const CodeSection::Body &body : codeSection.bodies) { | |
ostringstream ss2; | |
{ | |
Writer writer(ss2); | |
writer.writevaru32(body.locVars.size()); | |
for (const pair<uint32_t, Type> &locvar : body.locVars) { | |
writer.writevaru32(locvar.first); | |
writer.put((uint8_t)locvar.second); | |
} | |
for (const Instruction &instr : body.instrs) { | |
writeInstruction(writer, instr); | |
} | |
} | |
writer.writebytearray(ss2.str()); | |
} | |
} | |
writer.set_longform(true); | |
writer.writebytearray(ss.str()); | |
writer.set_longform(false); | |
break; | |
} | |
case Sect::Data: { | |
ostringstream ss; | |
{ | |
Writer writer(ss); | |
writer.writevaru32(dataSection.segments.size()); | |
for (const DataSection::Segment &segment : dataSection.segments) { | |
writer.writevaru32(segment.index); | |
Instruction instr; | |
instr.op = InstrTables::Op::I32_const; | |
instr.arg32_1 = segment.offset; | |
writeInstruction(writer, instr); | |
instr.op = InstrTables::Op::End; | |
writeInstruction(writer, instr); | |
writer.writebytearray(segment.data); | |
} | |
} | |
writer.set_longform(true); | |
writer.writebytearray(ss.str()); | |
writer.set_longform(false); | |
break; | |
} | |
default: | |
writer.set_longform(true); | |
writer.writebytearray(origsect.second); | |
writer.set_longform(false); | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
#include <iostream> | |
#include <string> | |
#include <vector> | |
#include <utility> | |
#include "instrtables.h" | |
using namespace std; | |
class Wasm { | |
public: | |
enum class Sect { | |
Custom = 0, | |
Type = 1, | |
Import = 2, | |
Function = 3, | |
Table = 4, | |
Linear_memory = 5, | |
Global = 6, | |
Export = 7, | |
Start = 8, | |
Element = 9, | |
Code = 10, | |
Data = 11, | |
}; | |
enum class Kind { | |
Function = 0, | |
Table = 1, | |
Memory = 2, | |
Global = 3, | |
}; | |
enum class Type { | |
I32 = 0x7F, | |
I64 = 0x7E, | |
F32 = 0x7D, | |
F64 = 0x7C, | |
Anyfunc = 0x70, | |
Func = 0x60, | |
Void = 0x40, | |
}; | |
struct Instruction { | |
InstrTables::Op op; | |
uint32_t arg32_1, arg32_2; | |
uint64_t arg64; | |
vector<uint32_t> arg32vec; | |
Type type; | |
// Floating-point values are stored in the integer fields, to prevent | |
// interpretation and ensure preservation of all bits. f32's go in | |
// arg32_1, f64's go in arg64. | |
}; | |
struct Limit { | |
uint32_t minimum, maximum; | |
}; | |
struct TypeSection { | |
struct Info { | |
vector<Type> params, returns; | |
}; | |
vector<Info> types; | |
}; | |
struct ImportSection { | |
struct Info { | |
Kind kind; | |
string mod_name, export_name; | |
uint32_t index; | |
bool mut; | |
Type type; | |
Limit limit; | |
}; | |
vector<Info> imports; | |
}; | |
struct ExportSection { | |
struct Info { | |
string name; | |
Kind kind; | |
uint32_t index; | |
}; | |
vector<Info> exports; | |
}; | |
struct CodeSection { | |
struct Body { | |
vector<pair<uint32_t, Type>> locVars; | |
vector<Instruction> instrs; | |
}; | |
vector<Body> bodies; | |
}; | |
struct DataSection { | |
struct Segment { | |
uint32_t index, offset; | |
string data; | |
}; | |
vector<Segment> segments; | |
}; | |
vector<pair<Sect, string>> origSections; | |
TypeSection typeSection; | |
ImportSection importSection; | |
CodeSection codeSection; | |
DataSection dataSection; | |
ExportSection exportSection; | |
Wasm(const string &source); | |
void write(ostream &os); | |
private: | |
void lateInsertOrigSection(Sect sect); | |
}; | |
ostream& operator<<(ostream &os, Wasm::Sect sect); | |
ostream& operator<<(ostream &os, Wasm::Kind kind); | |
ostream& operator<<(ostream &os, Wasm::Type type); | |
ostream& operator<<(ostream &os, const Wasm::Limit &limit); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "writer.h" | |
using namespace std; | |
Writer::Writer(ostream &os) | |
: os (os) {} | |
void Writer::write(const string &s) { | |
os << s; | |
} | |
void Writer::put(uint8_t n) { | |
os.put(n); | |
} | |
void Writer::writeu32(uint32_t n) { | |
os.put(n & 0xff); | |
os.put((n >> 8) & 0xff); | |
os.put((n >> 16) & 0xff); | |
os.put((n >> 24) & 0xff); | |
} | |
void Writer::writeu64(uint64_t n) { | |
os.put(n & 0xff); | |
os.put((n >> 8) & 0xff); | |
os.put((n >> 16) & 0xff); | |
os.put((n >> 24) & 0xff); | |
os.put((n >> 32) & 0xff); | |
os.put((n >> 40) & 0xff); | |
os.put((n >> 48) & 0xff); | |
os.put((n >> 56) & 0xff); | |
} | |
void Writer::writevaru32(uint32_t n) { | |
if (longform) { | |
os.put(0x80 | (n & 0x7f)); n >>= 7; | |
os.put(0x80 | (n & 0x7f)); n >>= 7; | |
os.put(0x80 | (n & 0x7f)); n >>= 7; | |
os.put(0x80 | (n & 0x7f)); n >>= 7; | |
os.put(0x00); | |
} else { | |
writevaru64(n); | |
} | |
} | |
void Writer::writevaru64(uint64_t n) { | |
do os.put((n & 0x7f) | (n >> 7 ? 0x80 : 0x00)); | |
while (n >>= 7); | |
} | |
void Writer::writevari32(int32_t n) { | |
if (longform) { | |
os.put(0x80 | (n & 0x7f)); n >>= 7; | |
os.put(0x80 | (n & 0x7f)); n >>= 7; | |
os.put(0x80 | (n & 0x7f)); n >>= 7; | |
os.put(0x80 | (n & 0x7f)); n >>= 7; | |
os.put(0x00); | |
} else { | |
writevari64(n); | |
} | |
} | |
void Writer::writevari64(int64_t n) { | |
// Adapted from https://en.wikipedia.org/wiki/LEB128#Encode_signed_integer | |
bool more = true; | |
while (more) { | |
uint8_t byte = n & 0x7f; | |
n >>= 7; | |
if ((n == 0 && (byte & 0x40) == 0) || (n == -1LL && (byte & 0x40) != 0)) { | |
more = false; | |
} else { | |
byte |= 0x80; | |
} | |
os.put(byte); | |
} | |
} | |
void Writer::writebytearray(const string &s) { | |
writevaru32(s.size()); | |
os << s; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
#include <iostream> | |
using namespace std; | |
class Writer { | |
ostream &os; | |
bool longform = false; | |
public: | |
Writer(ostream &os); | |
void write(const string &s); | |
void put(uint8_t n); | |
void writeu32(uint32_t n); | |
void writeu64(uint64_t n); | |
void writevaru32(uint32_t n); | |
void writevaru64(uint64_t n); | |
void writevari32(int32_t n); | |
void writevari64(int64_t n); | |
void writebytearray(const string &s); | |
inline void set_longform(bool y) {longform = y;} | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment