-
-
Save mame/53fb5cf7b448b3249270ea771ef89655 to your computer and use it in GitHub Desktop.
multiquine generator using ELVM
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# multiquine generator using ELVM | |
# How to use: | |
# | |
# 1. put this file in the root of ELVM. | |
# https://github.com/shinh/elvm | |
# | |
# 2. run the following commands. | |
# | |
# $ make out/8cc out/elc out/bfopt | |
# | |
# $ ruby mq-gen.rb | |
# $ gcc -o mq mq.c | |
# | |
# $ echo c | ./mq > mq2.c | |
# $ diff -s mq.c mq2.c | |
# Files mq.c and mq2.c are identical | |
# | |
# $ echo rb | ./mq > mq.rb | |
# $ echo rb | ruby mq.rb > mq2.rb | |
# $ diff -s mq.rb mq2.rb | |
# Files mq.rb and mq2.rb are identical | |
# | |
# $ echo c | ruby mq.rb > mq2.c | |
# $ diff -s mq.c mq2.c | |
# Files mq.c and mq2.c are identical | |
# | |
# (not confirmed) | |
# $ echo bf | ./mq > mq.bf | |
# $ ./opt/bfopt -c mq.bf mq.bf.c | |
# $ tcc -o mq.bf.exe mq.bf.c | |
# $ echo bf | ./mq.bf.exe > mq2.bf | |
# $ diff -s mq.bf mq2.bf | |
# Internal: | |
# | |
# * dumper.c: a translator from EIR text to original binary form | |
# | |
# * mq-gen.c: the source code of multiquine (based on elc.c) | |
# | |
# int src[] = { ... }; // prologue and epilogue of EIR of itself in binary form | |
# | |
# int main() { | |
# int prologue[] = src[0...i]; | |
# int epilogue[] = src[i..]; | |
# int quine[] = prologue + escape(src) + epilogue; // EIR of itself in binary form | |
# | |
# Module *m = load_module(quine); // translate binary from to actual EIR module | |
# elc(m); // emit it into favorite language | |
# } | |
# | |
# Each element in `src` is 0..255. So, it is represented in EIR binary form as: | |
# | |
# 0, 0, src[0], 0, 0, src[1], ..., 0, 0, src[-1] | |
# | |
# Note that EIR binary form uses big endian. | |
# `escape` function translates an normal char array to this form. | |
DumperSrc, MQGenSrc = DATA.read.split("=====") | |
MQGenSrc << %w( | |
target/util.c target/c.c target/rb.c target/bf.c ir/ir.c ir/table.c | |
).map {|s| File.read(s) }.join | |
# build `dumper.c` | |
File.write("dumper.c", DumperSrc) | |
system("gcc -o dumper -I . dumper.c ir/ir.c ir/table.c") || raise | |
# generate and build `mq-gen.c` with assiging binray data `a` as `src`, | |
# and get the EIR in binary form | |
def gen(n, a = [0x40] * n) | |
# embed two placeholders | |
src = MQGenSrc.sub("EIR_BINARY_FORM", a.join(",")) | |
src = src.sub("PROLOGUE_LENGTH") { n } | |
# generate, build, dump, and read | |
File.write("mq-gen.c", src) | |
system("out/8cc -S -Ilibc -I. mq-gen.c -o mq.eir") | |
`./dumper mq.eir`.split.map {|s| s.to_i } | |
end | |
# estimate how long `src` should be | |
n = 300000 | |
n = gen(n).size - n * 3 | |
$stderr.puts "code size: #{ n * 4 }" | |
# find the position `i` of `src` in EIR binary form | |
# (this assumes that 8cc naively embeds a string in escaped form.) | |
a = gen(n) | |
i = (a.size - 1 - n * 3).downto(0).find do |i_| | |
a[i_ + n * 3 - 1] == 0x40 && a[i_, n * 3] == [0, 0, 0x40] * n | |
end | |
# regenearte the source with embedding `i` as `PROLOGUE_LENGTH` | |
a = gen(i, [0x40] * n) | |
# remove `src` from EIR binary form (i.e., extract its prologue and epilogue) | |
a = a[0, i] + a[i + n * 3 .. -1] | |
raise if a.size != n | |
# regenarete the source with embedding `a` as `EIR_BINARY_FORM` | |
m = gen(i, a).size - n * 3 | |
raise if m != n | |
# run the final source code | |
system("out/elc -c mq.eir > mq.c") || raise | |
system("gcc -o mq mq.c") || raise | |
system("echo c | ./mq > mq2.c") || raise | |
# quine check | |
system("diff -s mq.c mq2.c") | |
__END__ | |
/* dumper.c */ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <ir/ir.h> | |
#include <target/util.h> | |
void dump_char(int n) { | |
printf("%d\n", n); | |
} | |
void dump_int(int n) { | |
dump_char(n / 256 / 256); | |
dump_char(n / 256 % 256); | |
dump_char(n % 256); | |
} | |
void dump_value(Value* v) { | |
if (v->type == REG) { | |
dump_char(v->reg); | |
} | |
else { | |
dump_char(255); | |
dump_int(v->imm); | |
} | |
} | |
void dump_module(Module* m) { | |
Inst* inst = m->text; | |
int n = 0; | |
while (inst) { | |
inst = inst->next; | |
n++; | |
} | |
dump_int(n); | |
inst = m->text; | |
while (inst) { | |
dump_char(inst->op); | |
dump_value(&inst->dst); | |
dump_value(&inst->src); | |
dump_value(&inst->jmp); | |
dump_int(inst->pc); | |
inst->lineno = 1; | |
inst = inst->next; | |
} | |
Data* data = m->data; | |
n = 0; | |
while (data) { | |
data = data->next; | |
n++; | |
} | |
dump_int(n); | |
data = m->data; | |
while (data) { | |
dump_int(data->v); | |
data = data->next; | |
} | |
} | |
int main(int argc, char* argv[]) { | |
#if defined(NOFILE) || defined(__eir__) | |
Module* module = load_eir(stdin); | |
#else | |
Module* module = load_eir_from_file(argv[1]); | |
#endif | |
dump_module(module); | |
} | |
===== | |
/* mq-gen.c */ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <ir/ir.h> | |
#include <target/util.h> | |
int src[] = { EIR_BINARY_FORM }, *quine; | |
int load_char() { | |
return *quine++; | |
} | |
int load_int() { | |
int n = load_char(); | |
n = n * 256 + load_char(); | |
n = n * 256 + load_char(); | |
return n; | |
} | |
int load_value(Value* v) { | |
int n = load_char(); | |
if (0 <= n && n <= SP) { | |
v->type = REG; | |
v->reg = n; | |
} | |
else { | |
v->type = IMM; | |
v->imm = load_int(); | |
} | |
} | |
Module* load_module() { | |
Module* m = malloc(sizeof(Module)); | |
int i, n = load_int(); | |
Inst* inst = malloc(sizeof(Inst) * n); | |
m->text = inst; | |
for (i = 0; i < n; i++) { | |
Inst* inst0 = &inst[i]; | |
inst0->op = load_char(); | |
load_value(&inst0->dst); | |
load_value(&inst0->src); | |
load_value(&inst0->jmp); | |
inst0->pc = load_int(); | |
inst0->next = &inst[i + 1]; | |
} | |
inst[i - 1].next = 0; | |
n = load_int(); | |
Data* data = malloc(sizeof(Data) * n); | |
m->data = data; | |
for (i = 0; i < n; i++) { | |
Data* data0 = &data[i]; | |
data0->v = load_int(); | |
data0->next = &data[i + 1]; | |
} | |
data[i - 1].next = 0; | |
return m; | |
} | |
/* | |
void target_bef(Module* module); | |
*/ | |
void target_bf(Module* module); | |
void target_c(Module* module); | |
/* | |
void target_cl(Module* module); | |
void target_cpp(Module* module); | |
void target_cr(Module* module); | |
void target_el(Module* module); | |
void target_forth(Module* module); | |
void target_go(Module* module); | |
void target_i(Module* module); | |
void target_java(Module* module); | |
void target_js(Module* module); | |
void target_php(Module* module); | |
void target_piet(Module* module); | |
void target_pietasm(Module* module); | |
void target_pl(Module* module); | |
void target_py(Module* module); | |
*/ | |
void target_rb(Module* module); | |
/* | |
void target_sed(Module* module); | |
void target_sh(Module* module); | |
void target_sqlite3(Module* module); | |
void target_swift(Module* module); | |
void target_tex(Module* module); | |
void target_tf(Module* module); | |
void target_tm(Module* module); | |
void target_unl(Module* module); | |
void target_vim(Module* module); | |
void target_ws(Module* module); | |
void target_x86(Module* module); | |
*/ | |
typedef void (*target_func_t)(Module*); | |
static target_func_t get_target_func(const char* ext) { | |
/* | |
if (!strcmp(ext, "bef")) return target_bef; | |
*/ | |
if (!strcmp(ext, "bf")) { | |
split_basic_block_by_mem(); | |
return target_bf; | |
} | |
if (!strcmp(ext, "c")) return target_c; | |
/* | |
if (!strcmp(ext, "cl")) return target_cl; | |
if (!strcmp(ext, "cpp")) return target_cpp; | |
if (!strcmp(ext, "cr")) return target_cr; | |
if (!strcmp(ext, "el")) return target_el; | |
if (!strcmp(ext, "forth")) return target_forth; | |
if (!strcmp(ext, "go")) return target_go; | |
if (!strcmp(ext, "i")) return target_i; | |
if (!strcmp(ext, "java")) return target_java; | |
if (!strcmp(ext, "js")) return target_js; | |
if (!strcmp(ext, "php")) return target_php; | |
if (!strcmp(ext, "piet")) return target_piet; | |
if (!strcmp(ext, "pietasm")) return target_pietasm; | |
if (!strcmp(ext, "pl")) return target_pl; | |
if (!strcmp(ext, "py")) return target_py; | |
*/ | |
if (!strcmp(ext, "rb")) return target_rb; | |
/* | |
if (!strcmp(ext, "sed")) return target_sed; | |
if (!strcmp(ext, "sh")) return target_sh; | |
if (!strcmp(ext, "sqlite3")) return target_sqlite3; | |
if (!strcmp(ext, "swift")) return target_swift; | |
if (!strcmp(ext, "tex")) return target_tex; | |
if (!strcmp(ext, "tf")) return target_tf; | |
if (!strcmp(ext, "tm")) return target_tm; | |
if (!strcmp(ext, "unl")) return target_unl; | |
if (!strcmp(ext, "vim")) return target_vim; | |
if (!strcmp(ext, "ws")) return target_ws; | |
if (!strcmp(ext, "x86")) return target_x86; | |
*/ | |
error("unknown flag: %s", ext); | |
} | |
int main(int argc, char* argv[]) { | |
char buf[32]; | |
for (int i = 0;; i++) { | |
int c = getchar(); | |
if (c == '\n' || c == EOF) { | |
buf[i] = 0; | |
break; | |
} | |
buf[i] = c; | |
} | |
target_func_t target_func = get_target_func(buf); | |
int i, j; | |
int *p = quine = malloc(sizeof(int) * 1999999); | |
for (i = 0; i < sizeof(src); *p++ = src[i++]) { | |
if (i == PROLOGUE_LENGTH) { | |
for (j = 0; j < sizeof(src); *p++ = src[j++]) *p++ = 0, *p++ = 0; | |
} | |
} | |
target_func(load_module()); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment