Skip to content

Instantly share code, notes, and snippets.

@jart
Created May 1, 2024 06:48
Show Gist options
  • Save jart/36fd54fa858f05cefed655fb93374cad to your computer and use it in GitHub Desktop.
Save jart/36fd54fa858f05cefed655fb93374cad to your computer and use it in GitHub Desktop.
example of a program written in C/C++ that skips over invalid amd64 / arm64 opcodes
// -*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
// vi: set et ft=c ts=4 sts=4 sw=4 fenc=utf-8 :vi
#define _GNU_SOURCE
#include <signal.h>
#include <stdio.h>
#include <ucontext.h>
#ifdef __x86_64__
#define XED_ILD_HASMODRM_IGNORE_MOD 2
#define XED_MODE_REAL 0
#define XED_MODE_LEGACY 1
#define XED_MODE_LONG 2
#define XED_ILD_MAP0 0 // 8086+ ...
#define XED_ILD_MAP1 1 // 286+ 0x0F,...
#define XED_ILD_MAP2 2 // Core2+ 0x0F,0x38,...
#define XED_ILD_MAP3 3 // Core2+ 0x0F,0x3A,...
#define XED_ILD_MAP4 4
#define XED_ILD_MAP5 5
#define XED_ILD_MAP6 6
#define XED_MAX_INSTRUCTION_BYTES 15
#define Mode(x) XED_MODE_LONG
#define kRexbRmMask 000000003600
#define RexbRm(x) ((x & kRexbRmMask) >> 007)
#define kRexrRegMask 000000000017
#define RexrReg(x) ((x & kRexrRegMask) >> 000)
#define kOplengthMask 00007400000000000000000
#define Oplength(x) ((x & kOplengthMask) >> 065)
#define kRegRexbSrmMask 00000170000
#define RexbSrm(x) ((x & kRegRexbSrmMask) >> 014)
#define Rex(x) ((x & 000000000020) >> 004)
#define Osz(x) ((x & 000000000040) >> 005)
#define Asz(x) ((x & 000010000000) >> 025)
#define Srm(x) ((x & 000000070000) >> 014)
#define Rexr(x) ((x & 000000000010) >> 003)
#define Rexw(x) ((x & 000000000100) >> 006)
#define Rexx(x) ((x & 000000400000) >> 021)
#define Rexb(x) ((x & 000000002000) >> 012)
#define Sego(x) ((x & 000007000000) >> 022)
#define Ymm(x) ((x & 010000000000) >> 036)
#define RegLog2(x) ((x & 006000000000) >> 034)
#define ModrmRm(x) ((x & 000000001600) >> 007)
#define ModrmReg(x) ((x & 000000000007) >> 000)
#define ModrmSrm(x) ((x & 000000070000) >> 014)
#define ModrmMod(x) ((x & 000060000000) >> 026)
#define RexRexr(x) ((x & 000000000037) >> 000)
#define RexRexb(x) ((x & 000000007600) >> 007)
#define RexRexbSrm(x) ((x & 000000370000) >> 014)
#define Modrm(x) (ModrmMod(x) << 6 | ModrmReg(x) << 3 | ModrmRm(x))
#define SibBase(x) ((x & 00000000000340000000000) >> 040)
#define SibIndex(x) ((x & 00000000003400000000000) >> 043)
#define SibScale(x) ((x & 00000000014000000000000) >> 046)
#define Opcode(x) ((x & 00000007760000000000000) >> 050)
#define Opmap(x) ((x & 00000070000000000000000) >> 060)
#define Mopcode(x) ((x & 00000077760000000000000) >> 050)
#define Rep(x) ((x & 00000300000000000000000) >> 063)
#define WordLog2(x) ((x & 00030000000000000000000) >> 071)
#define Vreg(x) ((x & 01700000000000000000000) >> 074)
#define Bite(x) (~ModrmSrm(x) & 1)
#define RexbBase(x) (Rexb(x) << 3 | SibBase(x))
#define IsByteOp(x) (~Srm(rde) & 1)
#define SibExists(x) (ModrmRm(x) == 4)
#define IsModrmRegister(x) (ModrmMod(x) == 3)
#define SibHasIndex(x) (SibIndex(x) != 4 || Rexx(x))
#define SibHasBase(x) (SibBase(x) != 5 || ModrmMod(x))
#define SibIsAbsolute(x) (!SibHasBase(x) && !SibHasIndex(x))
#define IsRipRelative(x) (Eamode(x) && ModrmRm(x) == 5 && !ModrmMod(x))
static const unsigned xed_prefix_table_bit[8] = {
0x00000000, 0x40404040, 0x0000ffff, 0x000000f0, 0x00000000, 0x00000000, 0x00000000, 0x000d0000,
};
static const struct XedDenseMagnums {
unsigned char eamode[2][3];
unsigned char has_sib_table[3][4][8];
unsigned char has_disp_regular[3][4][8];
unsigned char imm_bits_2d[2][256];
unsigned char has_modrm_2d[XED_ILD_MAP2][256];
unsigned char disp_bits_2d[XED_ILD_MAP2][256];
unsigned char BRDISPz_BRDISP_WIDTH[4];
unsigned char MEMDISPv_DISP_WIDTH[4];
unsigned char SIMMz_IMM_WIDTH[4];
unsigned char UIMMv_IMM_WIDTH[4];
unsigned char ASZ_NONTERM_EASZ[2][3];
unsigned char OSZ_NONTERM_DF64_EOSZ[2][2][3];
unsigned char OSZ_NONTERM_EOSZ[2][2][3];
} kXed = {
.eamode = {{XED_MODE_REAL, XED_MODE_LEGACY, XED_MODE_LONG},
{XED_MODE_LEGACY, XED_MODE_REAL, XED_MODE_LEGACY}},
.has_sib_table = {{{0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0}},
{{0, 0, 0, 0, 1, 0, 0, 0},
{0, 0, 0, 0, 1, 0, 0, 0},
{0, 0, 0, 0, 1, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0}},
{{0, 0, 0, 0, 1, 0, 0, 0},
{0, 0, 0, 0, 1, 0, 0, 0},
{0, 0, 0, 0, 1, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0}}},
.has_disp_regular = {{{0, 0, 0, 0, 0, 0, 2, 0},
{1, 1, 1, 1, 1, 1, 1, 1},
{2, 2, 2, 2, 2, 2, 2, 2},
{0, 0, 0, 0, 0, 0, 0, 0}},
{{0, 0, 0, 0, 0, 4, 0, 0},
{1, 1, 1, 1, 1, 1, 1, 1},
{4, 4, 4, 4, 4, 4, 4, 4},
{0, 0, 0, 0, 0, 0, 0, 0}},
{{0, 0, 0, 0, 0, 4, 0, 0},
{1, 1, 1, 1, 1, 1, 1, 1},
{4, 4, 4, 4, 4, 4, 4, 4},
{0, 0, 0, 0, 0, 0, 0, 0}}},
.imm_bits_2d =
{{1, 1, 1, 1, 5, 7, 1, 1, 1, 1, 1, 1, 9, 7, 1, 0, 1, 1, 1, 1, 5, 7, 1, 1, 1, 1,
1, 1, 5, 7, 1, 1, 1, 1, 1, 1, 5, 7, 0, 1, 1, 1, 1, 1, 5, 7, 0, 1, 1, 1, 1, 1,
9, 7, 0, 1, 1, 1, 1, 1, 5, 7, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
6, 7, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 7,
5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 8, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 7, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9,
9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 9, 9, 8, 1, 1, 1, 9, 2, 11, 1, 8, 1, 1, 9, 1, 1,
1, 1, 1, 1, 9, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 1, 1,
8, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1},
{1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9,
1, 1, 1, 1, 12, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 1, 0, 0, 1, 1, 1, 1, 9, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 1, 1, 1, 1, 1, 1, 1, 9, 1, 9, 9, 9, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}},
.has_modrm_2d = {{1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 3, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1,
1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 3, 0, 1, 1, 1, 1, 0, 0, 3, 0, 1, 1, 1, 1,
0, 0, 3, 0, 1, 1, 1, 1, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 3, 3, 3, 3,
0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 3, 0, 3, 3, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1},
{1, 1, 1, 1, 3, 0, 0, 0, 0, 0, 3, 0, 3, 1, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
0, 0, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 3, 3, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}},
.disp_bits_2d = {{4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 4, 4, 4, 4, 4, 4, 0, 4, 4, 4, 4, 4,
4, 4, 0, 4, 4, 4, 4, 4, 4, 4, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
4, 4, 4, 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4,
4, 4, 4, 4, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 1, 1, 1, 1, 4, 4, 4, 4, 3, 3,
2, 1, 4, 4, 4, 4, 0, 4, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4},
{4, 4, 4, 4, 0, 4, 4, 4, 4, 4, 0, 4, 0, 4, 4, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4}},
.BRDISPz_BRDISP_WIDTH = {0, 16, 32, 32},
.MEMDISPv_DISP_WIDTH = {0, 16, 32, 64},
.SIMMz_IMM_WIDTH = {0x00, 0x10, 0x20, 0x20},
.UIMMv_IMM_WIDTH = {0x00, 0x10, 0x20, 0x40},
.ASZ_NONTERM_EASZ = {{1, 2, 3}, {2, 1, 2}},
.OSZ_NONTERM_DF64_EOSZ = {{{1, 2, 3}, {2, 1, 1}}, {{1, 2, 3}, {2, 1, 3}}},
.OSZ_NONTERM_EOSZ = {{{1, 2, 2}, {2, 1, 1}}, {{1, 2, 3}, {2, 1, 3}}},
};
static unsigned long xed_bits2bytes(unsigned bits) {
return bits >> 3;
}
static unsigned long xed_bytes2bits(unsigned bytes) {
return bytes << 3;
}
static unsigned char xed_get_prefix_table_bit(unsigned char a) {
return (xed_prefix_table_bit[a >> 5] >> (a & 0x1F)) & 1;
}
static void xed_set_mopcode(unsigned long long *rde, unsigned long long mopcode) {
*rde |= mopcode << 40;
}
static void xed_set_simmz_imm_width_eosz(const unsigned long long *rde,
const unsigned char eosz[2][2][3], int *imm_width,
unsigned char *imm_signed) {
*imm_width = kXed.SIMMz_IMM_WIDTH[eosz[Rexw(*rde)][Osz(*rde)][Mode(*rde)]];
*imm_signed = 1;
}
static int xed_set_imm_bytes(unsigned long long *rde, int *imm_width, unsigned char *imm_signed) {
if (!*imm_width && Opmap(*rde) < XED_ILD_MAP2) {
switch (kXed.imm_bits_2d[Opmap(*rde)][Opcode(*rde)]) {
case 0:
return -1;
case 1:
*imm_width = 0;
return 0;
case 2:
switch (ModrmReg(*rde)) {
case 0:
xed_set_simmz_imm_width_eosz(rde, kXed.OSZ_NONTERM_EOSZ, imm_width, imm_signed);
return 0;
case 7:
*imm_width = 0;
return 0;
default:
return 0;
}
case 3:
if (ModrmReg(*rde) <= 1) {
*imm_width = 8;
*imm_signed = 1;
} else if (2 <= ModrmReg(*rde) && ModrmReg(*rde) <= 7) {
*imm_width = 0;
}
return 0;
case 4:
if (ModrmReg(*rde) <= 1) {
xed_set_simmz_imm_width_eosz(rde, kXed.OSZ_NONTERM_EOSZ, imm_width, imm_signed);
} else if (2 <= ModrmReg(*rde) && ModrmReg(*rde) <= 7) {
*imm_width = 0;
}
return 0;
case 5:
*imm_width = 8;
*imm_signed = 1;
return 0;
case 6:
xed_set_simmz_imm_width_eosz(rde, kXed.OSZ_NONTERM_DF64_EOSZ, imm_width, imm_signed);
return 0;
case 7:
xed_set_simmz_imm_width_eosz(rde, kXed.OSZ_NONTERM_EOSZ, imm_width, imm_signed);
return 0;
case 8:
*imm_width = 16;
return 0;
case 9:
*imm_width = 8;
return 0;
case 10:
*imm_width =
kXed.UIMMv_IMM_WIDTH[kXed.OSZ_NONTERM_EOSZ[Rexw(*rde)][Osz(*rde)][Mode(*rde)]];
return 0;
case 11:
// actually 2 bytes for uimm0 & 1 byte for uimm1
*imm_width = xed_bytes2bits(3);
return 0;
case 12:
if (Osz(*rde) || Rep(*rde) == 2) {
*imm_width = xed_bytes2bits(1);
}
return 0;
default:
__builtin_unreachable();
}
} else {
return 0;
}
}
static int xed_is_bound_instruction(const unsigned char *bytes, const unsigned long long *rde,
int length) {
return Mode(*rde) != XED_MODE_LONG && length + 1 < XED_MAX_INSTRUCTION_BYTES &&
(bytes[length + 1] & 0xC0) != 0xC0;
}
static int xed_prefix_scanner(const unsigned char *bytes, unsigned long long *out_rde) {
int length;
unsigned rde;
unsigned char b, rep, max_bytes, islong;
unsigned char asz, osz, rex, rexw, rexr, rexx, rexb;
rex = 0;
rep = 0;
rde = 0;
length = 0;
max_bytes = XED_MAX_INSTRUCTION_BYTES;
islong = Mode(rde) == XED_MODE_LONG;
while (length < max_bytes) {
b = bytes[length];
if (xed_get_prefix_table_bit(b) == 0)
goto out;
switch (b) {
case 0x66: // osz
rex = 0;
osz = 1;
rde |= osz << 5;
break;
case 0x67: // asz
rex = 0;
asz = 1;
rde |= asz << 21;
break;
case 0x2E: // cs
if (1 || !islong) {
rde &= 037770777777;
rde |= 000002000000;
}
rex = 0;
break;
case 0x3E: // ds
if (1 || !islong) {
rde &= 037770777777;
rde |= 000004000000;
}
rex = 0;
break;
case 0x26: // es
if (1 || !islong) {
rde &= 037770777777;
rde |= 000001000000;
}
rex = 0;
break;
case 0x36: // ss
if (1 || !islong) {
rde &= 037770777777;
rde |= 000003000000;
}
rex = 0;
break;
case 0x64: // fs
rde &= 037770777777;
rde |= 000005000000;
rex = 0;
break;
case 0x65: // gs
rde &= 037770777777;
rde |= 000006000000;
rex = 0;
break;
case 0xF0: // lock
rde |= 020000000000;
rex = 0;
break;
case 0xF2: // rep
case 0xF3:
rep = b & 3;
rex = 0;
break;
default:
if (islong && (b & 0xf0) == 0x40) {
rex = b;
break;
} else {
goto out;
}
}
length++;
}
out:
if (rex) {
rexw = (rex >> 3) & 1;
rexr = (rex >> 2) & 1;
rexx = (rex >> 1) & 1;
rexb = rex & 1;
rex = 1;
rde |= rexx << 17 | rex << 16 | rexb << 15 | rex << 11 | rexb << 10 | rexw << 6 | rex << 4 |
rexr << 3;
}
*out_rde = (unsigned long long)rep << 51 | rde;
if (length < max_bytes) {
return length;
} else {
return -1;
}
}
static void xed_set_vex_prefix(unsigned long long *rde, unsigned prefix) {
switch (prefix) {
case 0:
break;
case 1: // osz
*rde &= ~(1 << 5);
*rde |= 1 << 5;
break;
case 2: // rep3
case 3: // rep2
*rde &= ~((unsigned long long)3 << 51);
*rde |= (unsigned long long)(prefix ^ 1) << 51;
break;
default:
__builtin_unreachable();
}
}
static int xed_vex_opcode_scanner(const unsigned char *bytes, unsigned long long *rde, int length,
int map, int *pos_opcode) {
xed_set_mopcode(rde, map << 8 | bytes[length]);
*pos_opcode = length;
if (Mode(*rde) == XED_MODE_LONG && Rex(*rde))
return -1;
if (Mode(*rde) == XED_MODE_REAL)
return -1;
return length + 1;
}
static int xed_vex_c4_scanner(const unsigned char *bytes, unsigned long long *rde, int length,
int *imm_width, int *vexvalid, int *pos_opcode) {
unsigned b1, b2;
int map, rexr, rexx, rexb, rexw, ymm, vrex, vexdest210;
if (xed_is_bound_instruction(bytes, rde, length))
return length;
++length;
if (length + 2 < XED_MAX_INSTRUCTION_BYTES) {
// map: 5-bit
// rex.b: 1-bit (expands r/m or srm register operand)
// rex.x: 1-bit (expands sib register operands)
// rex.r: 1-bit (expands reg register operand)
b1 = bytes[length];
rexr = !(b1 & 128);
rexx = !(b1 & 64);
rexb = (Mode(x->rde) == XED_MODE_LONG) & !(b1 & 32);
// prefix: 2-bit → {none, osz, rep3, rep2}
// vector_length: 1-bit → {xmm, ymm} aka VEX.L
// vexdest210: 3-bit (second reg operand, inverted)
// vrex: 1-bit a.k.a. vexdest3
// rex.w: 1-bit (for 64-bit registers) aka VEX.W1
b2 = bytes[length + 1];
rexw = !!(b2 & 128);
vrex = !(b2 & 64);
vexdest210 = (~b2 >> 3) & 7;
ymm = !!(b2 & 4);
xed_set_vex_prefix(rde, b2 & 3);
map = b1 & 31;
if ((b1 & 3) == XED_ILD_MAP3) {
*imm_width = xed_bytes2bits(1);
}
*rde |= (unsigned long long)vrex << 63 | rexx << 17 | rexb << 15 | rexb << 10 | rexw << 6 |
rexr << 3;
*rde |= ymm << 30;
*rde |= (unsigned long long)vexdest210 << 60;
*vexvalid = 1;
return xed_vex_opcode_scanner(bytes, rde, length + 2, map, pos_opcode);
} else {
return -1;
}
}
static int xed_vex_c5_scanner(const unsigned char *bytes, unsigned long long *rde, int length,
int *vexvalid, int *pos_opcode) {
unsigned b;
int rexr, ymm, vrex, vexdest210;
if (xed_is_bound_instruction(bytes, rde, length))
return length;
++length;
if (length + 1 < XED_MAX_INSTRUCTION_BYTES) {
// prefix: 2-bit → {none, osz, rep3, rep2}
// vector_length: 1-bit → {xmm, ymm}
// vexdest210: 3-bit
// vrex: 1-bit
// rex.r: 1-bit
b = bytes[length];
rexr = !(b & 128);
vrex = !!(b & 64);
vexdest210 = (~b >> 3) & 7;
ymm = (b >> 2) & 1;
xed_set_vex_prefix(rde, b & 3);
*rde |= (unsigned long long)vrex << 63 | rexr << 3;
*rde |= ymm << 30;
*rde |= (unsigned long long)vexdest210 << 60;
*vexvalid = 1;
return xed_vex_opcode_scanner(bytes, rde, length + 1, XED_ILD_MAP1, pos_opcode);
} else {
return -1;
}
}
static int xed_vex_scanner(const unsigned char *bytes, unsigned long long *rde, int length,
int *imm_width, int *vexvalid, int *pos_opcode) {
if (Mode(*rde) != XED_MODE_LONG)
return 0;
switch (bytes[length]) {
case 0xC5:
return xed_vex_c5_scanner(bytes, rde, length, vexvalid, pos_opcode);
case 0xC4:
return xed_vex_c4_scanner(bytes, rde, length, imm_width, vexvalid, pos_opcode);
default:
return length;
}
}
static int xed_get_next_as_opcode(const unsigned char *bytes, unsigned long long *rde, int length,
int map) {
if (length < XED_MAX_INSTRUCTION_BYTES) {
xed_set_mopcode(rde, map << 8 | bytes[length]);
return length + 1;
} else {
return -1;
}
}
static int xed_opcode_scanner(const unsigned char *bytes, unsigned long long *rde, int length,
int *imm_width, int *pos_opcode) {
unsigned char b;
if ((b = bytes[length]) != 0x0F) {
xed_set_mopcode(rde, XED_ILD_MAP0 << 8 | b);
*pos_opcode = length;
return length + 1;
} else {
*pos_opcode = ++length;
if (length < XED_MAX_INSTRUCTION_BYTES) {
switch ((b = bytes[length])) {
case 0x38:
return xed_get_next_as_opcode(bytes, rde, length + 1, XED_ILD_MAP2);
case 0x3A:
*imm_width = xed_bytes2bits(1);
return xed_get_next_as_opcode(bytes, rde, length + 1, XED_ILD_MAP3);
case 0x3B:
case 0x39:
case 0x3C:
case 0x3D:
case 0x3E:
case 0x3F:
case 0x0F:
return -1;
default:
xed_set_mopcode(rde, XED_ILD_MAP1 << 8 | b);
return length + 1;
}
} else {
return -1;
}
}
}
static long long xed_read_number(const unsigned char *p, unsigned n, unsigned s) {
switch (s << 2 | (__builtin_clz(n) ^ 31)) {
case 0:
return *p;
case 4:
return (signed char)*p;
case 1:
return p[0] | p[1] << 8;
case 5:
return (short)(p[0] | p[1] << 8);
case 2:
return p[0] | p[1] << 8 | p[2] << 16 | (unsigned)p[3] << 24;
case 6:
return (int)(p[0] | p[1] << 8 | p[2] << 16 | (unsigned)p[3] << 24);
case 3:
case 7:
return (((unsigned long long)p[0] << 000) | ((unsigned long long)p[1] << 010) |
((unsigned long long)p[2] << 020) | ((unsigned long long)p[3] << 030) |
((unsigned long long)p[4] << 040) | ((unsigned long long)p[5] << 050) |
((unsigned long long)p[6] << 060) | ((unsigned long long)p[7] << 070));
default:
__builtin_unreachable();
}
}
static int xed_has_modrm(const unsigned long long *rde) {
if (Opmap(*rde) < sizeof(kXed.has_modrm_2d) / sizeof(*kXed.has_modrm_2d)) {
return kXed.has_modrm_2d[Opmap(*rde)][Opcode(*rde)];
} else {
return 1;
}
}
static int xed_modrm_scanner(const unsigned char *bytes, unsigned long long *rde, int length,
int *disp_width, int *has_sib) {
int has_modrm = xed_has_modrm(rde);
unsigned char b, rm, reg, mod, eamode;
if (has_modrm) {
if (length < XED_MAX_INSTRUCTION_BYTES) {
b = bytes[length++];
rm = b & 0007;
reg = (b & 0070) >> 3;
mod = (b & 0300) >> 6;
*rde &= ~1;
*rde |= mod << 22 | rm << 7 | reg;
if (has_modrm != XED_ILD_HASMODRM_IGNORE_MOD) {
eamode = kXed.eamode[Asz(*rde)][Mode(*rde)];
*disp_width = xed_bytes2bits(kXed.has_disp_regular[eamode][mod][rm]);
*has_sib = kXed.has_sib_table[eamode][mod][rm];
}
return length;
} else {
return -1;
}
} else {
return length;
}
}
static int xed_sib_scanner(const unsigned char *bytes, unsigned long long *rde, int length,
int *disp_width) {
unsigned char b;
if (length < XED_MAX_INSTRUCTION_BYTES) {
b = bytes[length++];
*rde |= (unsigned long long)b << 32; // set sib byte
if ((b & 7) == 5) {
if (!ModrmMod(*rde)) {
*disp_width = xed_bytes2bits(4);
}
}
return length;
} else {
return -1;
}
}
static unsigned char
XED_LF_BRDISPz_BRDISP_WIDTH_OSZ_NONTERM_EOSZ_l2(const unsigned long long *rde) {
return kXed.BRDISPz_BRDISP_WIDTH[kXed.OSZ_NONTERM_EOSZ[Rexw(*rde)][Osz(*rde)][Mode(*rde)]];
}
static int xed_disp_scanner(const unsigned char *bytes, unsigned long long *rde, int length,
int disp_width, long long *disp) {
unsigned char disp_bytes;
unsigned char disp_unsigned = 0;
if (Opmap(*rde) < XED_ILD_MAP2) {
switch (kXed.disp_bits_2d[Opmap(*rde)][Opcode(*rde)]) {
case 0:
return -1;
case 1:
disp_width = 8;
break;
case 2:
disp_width = XED_LF_BRDISPz_BRDISP_WIDTH_OSZ_NONTERM_EOSZ_l2(rde);
disp_unsigned = 1;
break;
case 3:
if (Mode(*rde) <= XED_MODE_LEGACY) {
disp_width = XED_LF_BRDISPz_BRDISP_WIDTH_OSZ_NONTERM_EOSZ_l2(rde);
} else if (Mode(*rde) == XED_MODE_LONG) {
disp_width = 0x20;
}
break;
case 4:
break;
case 5:
disp_width = kXed.MEMDISPv_DISP_WIDTH[kXed.ASZ_NONTERM_EASZ[Asz(*rde)][Mode(*rde)]];
disp_unsigned = 1;
break;
case 6:
switch (ModrmReg(*rde)) {
case 0:
break;
case 7:
disp_width = XED_LF_BRDISPz_BRDISP_WIDTH_OSZ_NONTERM_EOSZ_l2(rde);
disp_unsigned = 1;
break;
default:
break;
}
break;
default:
__builtin_unreachable();
}
}
disp_bytes = xed_bits2bytes(disp_width);
if (!disp_bytes) {
return length;
} else if (length + disp_bytes <= XED_MAX_INSTRUCTION_BYTES) {
*disp = xed_read_number(bytes + length, disp_bytes, !disp_unsigned);
return length + disp_bytes;
} else {
return -1;
}
}
static int xed_imm_scanner(const unsigned char *bytes, unsigned long long *rde, int length,
int imm_width, unsigned long long *uimm0) {
unsigned char imm_bytes;
unsigned char imm_signed = 0;
if (xed_set_imm_bytes(rde, &imm_width, &imm_signed) == -1)
return -1;
imm_bytes = xed_bits2bytes(imm_width);
if (!imm_bytes) {
return length;
} else if (length + imm_bytes <= XED_MAX_INSTRUCTION_BYTES) {
*uimm0 = xed_read_number(bytes + length, imm_bytes, imm_signed);
return length + imm_bytes;
} else {
return -1;
}
}
__attribute__((__flatten__)) int x86ild(const void *bytes) {
int n;
int pos_opcode;
long long disp;
int has_sib = 0;
int vexvalid = 0;
int imm_width = 0;
int disp_width = 0;
unsigned long long rde;
unsigned long long uimm0;
if ((n = xed_prefix_scanner(bytes, &rde)) < 0)
return n;
if ((n = xed_vex_scanner(bytes, &rde, n, &imm_width, &vexvalid, &pos_opcode)) < 0)
return n;
if (!vexvalid && (n = xed_opcode_scanner(bytes, &rde, n, &imm_width, &pos_opcode)) < 0)
return n;
if ((n = xed_modrm_scanner(bytes, &rde, n, &disp_width, &has_sib)) < 0)
return n;
if (has_sib && (n = xed_sib_scanner(bytes, &rde, n, &disp_width)) < 0)
return n;
if ((n = xed_disp_scanner(bytes, &rde, n, disp_width, &disp)) < 0)
return n;
return xed_imm_scanner(bytes, &rde, n, imm_width, &uimm0);
}
#endif /* x86_64 */
// if an illegal instruction is encountered
// then we simply step to the next instruction
void onsigill(int sig, siginfo_t *si, void *vctx) {
ucontext_t *ctx = vctx;
#ifdef __x86_64__
ctx->uc_mcontext.gregs[REG_RIP] += x86ild((const void *)ctx->uc_mcontext.gregs[REG_RIP]);
#else
ctx->uc_mcontext.pc += 4;
#endif
}
// generate code that uses weird legacy amd bulldozer isa
__attribute__((__noinline__, __target__("fma4"), __optimize__("-ffast-math"))) //
void vecfma(float *A, const float *B, const float *C, int n) {
for (int i = 0; i < n; ++i)
A[i] = A[i] * B[i] + C[i];
}
void continue_on_sigill(void) {
struct sigaction sa;
sigemptyset(&sa.sa_mask);
sa.sa_flags = SA_SIGINFO;
sa.sa_sigaction = onsigill;
sigaction(SIGILL, &sa, 0);
}
float A[8];
float B[8];
float C[8];
int main(int argc, char *argv[]) {
continue_on_sigill();
for (int i = 0; i < 8; ++i)
A[i] = i;
for (int i = 0; i < 8; ++i)
B[i] = i * 2;
for (int i = 0; i < 8; ++i)
C[i] = i * 4;
vecfma(A, B, C, 8);
for (int i = 0; i < 8; ++i)
printf("%10g", A[i]);
printf("\n");
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment