Skip to content

Instantly share code, notes, and snippets.

@mame

mame/mq-gen.rb Secret

Created December 22, 2016 15:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save mame/53fb5cf7b448b3249270ea771ef89655 to your computer and use it in GitHub Desktop.
Save mame/53fb5cf7b448b3249270ea771ef89655 to your computer and use it in GitHub Desktop.
multiquine generator using ELVM
# multiquine generator using ELVM
# How to use:
#
# 1. put this file in the root of ELVM.
# https://github.com/shinh/elvm
#
# 2. run the following commands.
#
# $ make out/8cc out/elc out/bfopt
#
# $ ruby mq-gen.rb
# $ gcc -o mq mq.c
#
# $ echo c | ./mq > mq2.c
# $ diff -s mq.c mq2.c
# Files mq.c and mq2.c are identical
#
# $ echo rb | ./mq > mq.rb
# $ echo rb | ruby mq.rb > mq2.rb
# $ diff -s mq.rb mq2.rb
# Files mq.rb and mq2.rb are identical
#
# $ echo c | ruby mq.rb > mq2.c
# $ diff -s mq.c mq2.c
# Files mq.c and mq2.c are identical
#
# (not confirmed)
# $ echo bf | ./mq > mq.bf
# $ ./opt/bfopt -c mq.bf mq.bf.c
# $ tcc -o mq.bf.exe mq.bf.c
# $ echo bf | ./mq.bf.exe > mq2.bf
# $ diff -s mq.bf mq2.bf
# Internal:
#
# * dumper.c: a translator from EIR text to original binary form
#
# * mq-gen.c: the source code of multiquine (based on elc.c)
#
# int src[] = { ... }; // prologue and epilogue of EIR of itself in binary form
#
# int main() {
# int prologue[] = src[0...i];
# int epilogue[] = src[i..];
# int quine[] = prologue + escape(src) + epilogue; // EIR of itself in binary form
#
# Module *m = load_module(quine); // translate binary from to actual EIR module
# elc(m); // emit it into favorite language
# }
#
# Each element in `src` is 0..255. So, it is represented in EIR binary form as:
#
# 0, 0, src[0], 0, 0, src[1], ..., 0, 0, src[-1]
#
# Note that EIR binary form uses big endian.
# `escape` function translates an normal char array to this form.
DumperSrc, MQGenSrc = DATA.read.split("=====")
MQGenSrc << %w(
target/util.c target/c.c target/rb.c target/bf.c ir/ir.c ir/table.c
).map {|s| File.read(s) }.join
# build `dumper.c`
File.write("dumper.c", DumperSrc)
system("gcc -o dumper -I . dumper.c ir/ir.c ir/table.c") || raise
# generate and build `mq-gen.c` with assiging binray data `a` as `src`,
# and get the EIR in binary form
def gen(n, a = [0x40] * n)
# embed two placeholders
src = MQGenSrc.sub("EIR_BINARY_FORM", a.join(","))
src = src.sub("PROLOGUE_LENGTH") { n }
# generate, build, dump, and read
File.write("mq-gen.c", src)
system("out/8cc -S -Ilibc -I. mq-gen.c -o mq.eir")
`./dumper mq.eir`.split.map {|s| s.to_i }
end
# estimate how long `src` should be
n = 300000
n = gen(n).size - n * 3
$stderr.puts "code size: #{ n * 4 }"
# find the position `i` of `src` in EIR binary form
# (this assumes that 8cc naively embeds a string in escaped form.)
a = gen(n)
i = (a.size - 1 - n * 3).downto(0).find do |i_|
a[i_ + n * 3 - 1] == 0x40 && a[i_, n * 3] == [0, 0, 0x40] * n
end
# regenearte the source with embedding `i` as `PROLOGUE_LENGTH`
a = gen(i, [0x40] * n)
# remove `src` from EIR binary form (i.e., extract its prologue and epilogue)
a = a[0, i] + a[i + n * 3 .. -1]
raise if a.size != n
# regenarete the source with embedding `a` as `EIR_BINARY_FORM`
m = gen(i, a).size - n * 3
raise if m != n
# run the final source code
system("out/elc -c mq.eir > mq.c") || raise
system("gcc -o mq mq.c") || raise
system("echo c | ./mq > mq2.c") || raise
# quine check
system("diff -s mq.c mq2.c")
__END__
/* dumper.c */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ir/ir.h>
#include <target/util.h>
void dump_char(int n) {
printf("%d\n", n);
}
void dump_int(int n) {
dump_char(n / 256 / 256);
dump_char(n / 256 % 256);
dump_char(n % 256);
}
void dump_value(Value* v) {
if (v->type == REG) {
dump_char(v->reg);
}
else {
dump_char(255);
dump_int(v->imm);
}
}
void dump_module(Module* m) {
Inst* inst = m->text;
int n = 0;
while (inst) {
inst = inst->next;
n++;
}
dump_int(n);
inst = m->text;
while (inst) {
dump_char(inst->op);
dump_value(&inst->dst);
dump_value(&inst->src);
dump_value(&inst->jmp);
dump_int(inst->pc);
inst->lineno = 1;
inst = inst->next;
}
Data* data = m->data;
n = 0;
while (data) {
data = data->next;
n++;
}
dump_int(n);
data = m->data;
while (data) {
dump_int(data->v);
data = data->next;
}
}
int main(int argc, char* argv[]) {
#if defined(NOFILE) || defined(__eir__)
Module* module = load_eir(stdin);
#else
Module* module = load_eir_from_file(argv[1]);
#endif
dump_module(module);
}
=====
/* mq-gen.c */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ir/ir.h>
#include <target/util.h>
int src[] = { EIR_BINARY_FORM }, *quine;
int load_char() {
return *quine++;
}
int load_int() {
int n = load_char();
n = n * 256 + load_char();
n = n * 256 + load_char();
return n;
}
int load_value(Value* v) {
int n = load_char();
if (0 <= n && n <= SP) {
v->type = REG;
v->reg = n;
}
else {
v->type = IMM;
v->imm = load_int();
}
}
Module* load_module() {
Module* m = malloc(sizeof(Module));
int i, n = load_int();
Inst* inst = malloc(sizeof(Inst) * n);
m->text = inst;
for (i = 0; i < n; i++) {
Inst* inst0 = &inst[i];
inst0->op = load_char();
load_value(&inst0->dst);
load_value(&inst0->src);
load_value(&inst0->jmp);
inst0->pc = load_int();
inst0->next = &inst[i + 1];
}
inst[i - 1].next = 0;
n = load_int();
Data* data = malloc(sizeof(Data) * n);
m->data = data;
for (i = 0; i < n; i++) {
Data* data0 = &data[i];
data0->v = load_int();
data0->next = &data[i + 1];
}
data[i - 1].next = 0;
return m;
}
/*
void target_bef(Module* module);
*/
void target_bf(Module* module);
void target_c(Module* module);
/*
void target_cl(Module* module);
void target_cpp(Module* module);
void target_cr(Module* module);
void target_el(Module* module);
void target_forth(Module* module);
void target_go(Module* module);
void target_i(Module* module);
void target_java(Module* module);
void target_js(Module* module);
void target_php(Module* module);
void target_piet(Module* module);
void target_pietasm(Module* module);
void target_pl(Module* module);
void target_py(Module* module);
*/
void target_rb(Module* module);
/*
void target_sed(Module* module);
void target_sh(Module* module);
void target_sqlite3(Module* module);
void target_swift(Module* module);
void target_tex(Module* module);
void target_tf(Module* module);
void target_tm(Module* module);
void target_unl(Module* module);
void target_vim(Module* module);
void target_ws(Module* module);
void target_x86(Module* module);
*/
typedef void (*target_func_t)(Module*);
static target_func_t get_target_func(const char* ext) {
/*
if (!strcmp(ext, "bef")) return target_bef;
*/
if (!strcmp(ext, "bf")) {
split_basic_block_by_mem();
return target_bf;
}
if (!strcmp(ext, "c")) return target_c;
/*
if (!strcmp(ext, "cl")) return target_cl;
if (!strcmp(ext, "cpp")) return target_cpp;
if (!strcmp(ext, "cr")) return target_cr;
if (!strcmp(ext, "el")) return target_el;
if (!strcmp(ext, "forth")) return target_forth;
if (!strcmp(ext, "go")) return target_go;
if (!strcmp(ext, "i")) return target_i;
if (!strcmp(ext, "java")) return target_java;
if (!strcmp(ext, "js")) return target_js;
if (!strcmp(ext, "php")) return target_php;
if (!strcmp(ext, "piet")) return target_piet;
if (!strcmp(ext, "pietasm")) return target_pietasm;
if (!strcmp(ext, "pl")) return target_pl;
if (!strcmp(ext, "py")) return target_py;
*/
if (!strcmp(ext, "rb")) return target_rb;
/*
if (!strcmp(ext, "sed")) return target_sed;
if (!strcmp(ext, "sh")) return target_sh;
if (!strcmp(ext, "sqlite3")) return target_sqlite3;
if (!strcmp(ext, "swift")) return target_swift;
if (!strcmp(ext, "tex")) return target_tex;
if (!strcmp(ext, "tf")) return target_tf;
if (!strcmp(ext, "tm")) return target_tm;
if (!strcmp(ext, "unl")) return target_unl;
if (!strcmp(ext, "vim")) return target_vim;
if (!strcmp(ext, "ws")) return target_ws;
if (!strcmp(ext, "x86")) return target_x86;
*/
error("unknown flag: %s", ext);
}
int main(int argc, char* argv[]) {
char buf[32];
for (int i = 0;; i++) {
int c = getchar();
if (c == '\n' || c == EOF) {
buf[i] = 0;
break;
}
buf[i] = c;
}
target_func_t target_func = get_target_func(buf);
int i, j;
int *p = quine = malloc(sizeof(int) * 1999999);
for (i = 0; i < sizeof(src); *p++ = src[i++]) {
if (i == PROLOGUE_LENGTH) {
for (j = 0; j < sizeof(src); *p++ = src[j++]) *p++ = 0, *p++ = 0;
}
}
target_func(load_module());
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment