Skip to content

Instantly share code, notes, and snippets.

@corsix
Last active Apr 8, 2016
Embed
What would you like to do?
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index 718cb12..1c36813 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -2620,6 +2620,107 @@ static void asm_setup_target(ASMState *as)
/* -- Trace patching ------------------------------------------------------ */
+static const uint8_t map_op1[256] = {
+0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x20,
+0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,
+0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,
+0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,
+#if LJ_TARGET_X64
+0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x14,0x14,0x14,0x14,0x14,0x14,0x14,0x14,
+#else
+0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,
+#endif
+0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,
+0x51,0x51,0x92,0x92,0x10,0x10,0x12,0x11,0x45,0x86,0x52,0x93,0x51,0x51,0x51,0x51,
+0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,
+0x93,0x86,0x93,0x93,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,
+0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x47,0x51,0x51,0x51,0x51,0x51,
+#if LJ_TARGET_X64
+0x59,0x59,0x59,0x59,0x51,0x51,0x51,0x51,0x52,0x45,0x51,0x51,0x51,0x51,0x51,0x51,
+#else
+0x55,0x55,0x55,0x55,0x51,0x51,0x51,0x51,0x52,0x45,0x51,0x51,0x51,0x51,0x51,0x51,
+#endif
+0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05,
+0x93,0x93,0x53,0x51,0x70,0x71,0x93,0x86,0x54,0x51,0x53,0x51,0x51,0x52,0x51,0x51,
+0x92,0x92,0x92,0x92,0x52,0x52,0x51,0x51,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,
+0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x45,0x45,0x47,0x52,0x51,0x51,0x51,0x51,
+0x10,0x51,0x10,0x10,0x51,0x51,0x63,0x66,0x51,0x51,0x51,0x51,0x51,0x51,0x92,0x92
+};
+
+static const uint8_t map_op2[256] = {
+0x93,0x93,0x93,0x93,0x52,0x52,0x52,0x52,0x52,0x52,0x51,0x52,0x51,0x93,0x52,0x94,
+0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
+0x53,0x53,0x53,0x53,0x53,0x53,0x53,0x53,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
+0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x34,0x51,0x35,0x51,0x51,0x51,0x51,0x51,
+0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
+0x53,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
+0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
+0x94,0x54,0x54,0x54,0x93,0x93,0x93,0x52,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
+0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,
+0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
+0x52,0x52,0x52,0x93,0x94,0x93,0x51,0x51,0x52,0x52,0x52,0x93,0x94,0x93,0x93,0x93,
+0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x94,0x93,0x93,0x93,0x93,0x93,
+0x93,0x93,0x94,0x93,0x94,0x94,0x94,0x93,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,
+0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
+0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
+0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x52
+};
+
+static uint32_t asm_insn_len(const uint8_t* p) {
+ uint32_t result = 0;
+ uint32_t prefixes = 0;
+ uint32_t x = map_op1[*p];
+ for (;;) {
+ switch (x >> 4) {
+ case 0: return result + x + (prefixes & 4);
+ case 1: prefixes |= x; x = map_op1[*++p]; ++result; break;
+ case 2: x = map_op2[*++p]; break;
+ case 3: ++p; goto mrm;
+ case 4: result -= (prefixes & 2); /* fallthrough */
+ case 5: return result + (x & 15);
+ case 6: /* Group 3 */
+ if (p[1] & 0x38) return result + 2;
+ if ((prefixes & 2) && (x == 0x66)) return result + 4;
+ return result + (x & 15);
+ case 7: /* C4 / C5 (VEX) */
+#if LJ_TARGET_X86
+ if (p[1] < 0xC0) {
+ x = 2;
+ goto mrm;
+ }
+#endif
+ if (x == 0x70) {
+ x = *++p & 0x1f;
+ ++result;
+ if (x >= 2) {
+ p += 2;
+ result += 2;
+ goto mrm;
+ }
+ }
+ ++p;
+ ++result;
+ x = map_op2[*++p];
+ break;
+ case 8: result -= (prefixes & 2); /* fallthrough */
+ case 9: mrm: /* ModR/M (and possibly SIB) */
+ result += (x & 15);
+ x = *++p;
+ switch (x >> 6) {
+ case 0: if ((x & 7) == 5) return result + 4; break;
+ case 1: ++result; break;
+ case 2: result += 4; break;
+ case 3: return result;
+ }
+ if ((x & 7) == 4) {
+ ++result;
+ if (x < 0x40 && (p[1] & 7) == 5) result += 4;
+ }
+ return result;
+ }
+ }
+}
+
/* Patch exit jumps of existing machine code to a new target. */
void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
{
@@ -2632,18 +2733,13 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px)
*(int32_t *)(p+len-4) = jmprel(p+len, target);
/* Do not patch parent exit for a stack check. Skip beyond vmstate update. */
- for (; p < pe; p++)
- if (*(uint32_t *)(p+(LJ_64 ? 3 : 2)) == stateaddr && p[0] == XI_MOVmi) {
- p += LJ_64 ? 11 : 10;
+ for (; p < pe; p += asm_insn_len(p))
+ if (*(uint32_t *)(p+(LJ_64 ? 3 : 2)) == stateaddr && p[0] == XI_MOVmi)
break;
- }
lua_assert(p < pe);
- for (; p < pe; p++) {
- if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px) {
+ for (; p < pe; p += asm_insn_len(p))
+ if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px)
*(int32_t *)(p+2) = jmprel(p+6, target);
- p += 5;
- }
- }
lj_mcode_sync(T->mcode, T->mcode + T->szmcode);
lj_mcode_patch(J, mcarea, 1);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment