Created
May 1, 2024 06:48
-
-
Save jart/36fd54fa858f05cefed655fb93374cad to your computer and use it in GitHub Desktop.
example of a program written in C/C++ that skips over invalid amd64 / arm64 opcodes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*- | |
// vi: set et ft=c ts=4 sts=4 sw=4 fenc=utf-8 :vi | |
#define _GNU_SOURCE | |
#include <signal.h> | |
#include <stdio.h> | |
#include <ucontext.h> | |
#ifdef __x86_64__ | |
#define XED_ILD_HASMODRM_IGNORE_MOD 2 | |
#define XED_MODE_REAL 0 | |
#define XED_MODE_LEGACY 1 | |
#define XED_MODE_LONG 2 | |
#define XED_ILD_MAP0 0 // 8086+ ... | |
#define XED_ILD_MAP1 1 // 286+ 0x0F,... | |
#define XED_ILD_MAP2 2 // Core2+ 0x0F,0x38,... | |
#define XED_ILD_MAP3 3 // Core2+ 0x0F,0x3A,... | |
#define XED_ILD_MAP4 4 | |
#define XED_ILD_MAP5 5 | |
#define XED_ILD_MAP6 6 | |
#define XED_MAX_INSTRUCTION_BYTES 15 | |
#define Mode(x) XED_MODE_LONG | |
#define kRexbRmMask 000000003600 | |
#define RexbRm(x) ((x & kRexbRmMask) >> 007) | |
#define kRexrRegMask 000000000017 | |
#define RexrReg(x) ((x & kRexrRegMask) >> 000) | |
#define kOplengthMask 00007400000000000000000 | |
#define Oplength(x) ((x & kOplengthMask) >> 065) | |
#define kRegRexbSrmMask 00000170000 | |
#define RexbSrm(x) ((x & kRegRexbSrmMask) >> 014) | |
#define Rex(x) ((x & 000000000020) >> 004) | |
#define Osz(x) ((x & 000000000040) >> 005) | |
#define Asz(x) ((x & 000010000000) >> 025) | |
#define Srm(x) ((x & 000000070000) >> 014) | |
#define Rexr(x) ((x & 000000000010) >> 003) | |
#define Rexw(x) ((x & 000000000100) >> 006) | |
#define Rexx(x) ((x & 000000400000) >> 021) | |
#define Rexb(x) ((x & 000000002000) >> 012) | |
#define Sego(x) ((x & 000007000000) >> 022) | |
#define Ymm(x) ((x & 010000000000) >> 036) | |
#define RegLog2(x) ((x & 006000000000) >> 034) | |
#define ModrmRm(x) ((x & 000000001600) >> 007) | |
#define ModrmReg(x) ((x & 000000000007) >> 000) | |
#define ModrmSrm(x) ((x & 000000070000) >> 014) | |
#define ModrmMod(x) ((x & 000060000000) >> 026) | |
#define RexRexr(x) ((x & 000000000037) >> 000) | |
#define RexRexb(x) ((x & 000000007600) >> 007) | |
#define RexRexbSrm(x) ((x & 000000370000) >> 014) | |
#define Modrm(x) (ModrmMod(x) << 6 | ModrmReg(x) << 3 | ModrmRm(x)) | |
#define SibBase(x) ((x & 00000000000340000000000) >> 040) | |
#define SibIndex(x) ((x & 00000000003400000000000) >> 043) | |
#define SibScale(x) ((x & 00000000014000000000000) >> 046) | |
#define Opcode(x) ((x & 00000007760000000000000) >> 050) | |
#define Opmap(x) ((x & 00000070000000000000000) >> 060) | |
#define Mopcode(x) ((x & 00000077760000000000000) >> 050) | |
#define Rep(x) ((x & 00000300000000000000000) >> 063) | |
#define WordLog2(x) ((x & 00030000000000000000000) >> 071) | |
#define Vreg(x) ((x & 01700000000000000000000) >> 074) | |
#define Bite(x) (~ModrmSrm(x) & 1) | |
#define RexbBase(x) (Rexb(x) << 3 | SibBase(x)) | |
#define IsByteOp(x) (~Srm(rde) & 1) | |
#define SibExists(x) (ModrmRm(x) == 4) | |
#define IsModrmRegister(x) (ModrmMod(x) == 3) | |
#define SibHasIndex(x) (SibIndex(x) != 4 || Rexx(x)) | |
#define SibHasBase(x) (SibBase(x) != 5 || ModrmMod(x)) | |
#define SibIsAbsolute(x) (!SibHasBase(x) && !SibHasIndex(x)) | |
#define IsRipRelative(x) (Eamode(x) && ModrmRm(x) == 5 && !ModrmMod(x)) | |
static const unsigned xed_prefix_table_bit[8] = { | |
0x00000000, 0x40404040, 0x0000ffff, 0x000000f0, 0x00000000, 0x00000000, 0x00000000, 0x000d0000, | |
}; | |
static const struct XedDenseMagnums { | |
unsigned char eamode[2][3]; | |
unsigned char has_sib_table[3][4][8]; | |
unsigned char has_disp_regular[3][4][8]; | |
unsigned char imm_bits_2d[2][256]; | |
unsigned char has_modrm_2d[XED_ILD_MAP2][256]; | |
unsigned char disp_bits_2d[XED_ILD_MAP2][256]; | |
unsigned char BRDISPz_BRDISP_WIDTH[4]; | |
unsigned char MEMDISPv_DISP_WIDTH[4]; | |
unsigned char SIMMz_IMM_WIDTH[4]; | |
unsigned char UIMMv_IMM_WIDTH[4]; | |
unsigned char ASZ_NONTERM_EASZ[2][3]; | |
unsigned char OSZ_NONTERM_DF64_EOSZ[2][2][3]; | |
unsigned char OSZ_NONTERM_EOSZ[2][2][3]; | |
} kXed = { | |
.eamode = {{XED_MODE_REAL, XED_MODE_LEGACY, XED_MODE_LONG}, | |
{XED_MODE_LEGACY, XED_MODE_REAL, XED_MODE_LEGACY}}, | |
.has_sib_table = {{{0, 0, 0, 0, 0, 0, 0, 0}, | |
{0, 0, 0, 0, 0, 0, 0, 0}, | |
{0, 0, 0, 0, 0, 0, 0, 0}, | |
{0, 0, 0, 0, 0, 0, 0, 0}}, | |
{{0, 0, 0, 0, 1, 0, 0, 0}, | |
{0, 0, 0, 0, 1, 0, 0, 0}, | |
{0, 0, 0, 0, 1, 0, 0, 0}, | |
{0, 0, 0, 0, 0, 0, 0, 0}}, | |
{{0, 0, 0, 0, 1, 0, 0, 0}, | |
{0, 0, 0, 0, 1, 0, 0, 0}, | |
{0, 0, 0, 0, 1, 0, 0, 0}, | |
{0, 0, 0, 0, 0, 0, 0, 0}}}, | |
.has_disp_regular = {{{0, 0, 0, 0, 0, 0, 2, 0}, | |
{1, 1, 1, 1, 1, 1, 1, 1}, | |
{2, 2, 2, 2, 2, 2, 2, 2}, | |
{0, 0, 0, 0, 0, 0, 0, 0}}, | |
{{0, 0, 0, 0, 0, 4, 0, 0}, | |
{1, 1, 1, 1, 1, 1, 1, 1}, | |
{4, 4, 4, 4, 4, 4, 4, 4}, | |
{0, 0, 0, 0, 0, 0, 0, 0}}, | |
{{0, 0, 0, 0, 0, 4, 0, 0}, | |
{1, 1, 1, 1, 1, 1, 1, 1}, | |
{4, 4, 4, 4, 4, 4, 4, 4}, | |
{0, 0, 0, 0, 0, 0, 0, 0}}}, | |
.imm_bits_2d = | |
{{1, 1, 1, 1, 5, 7, 1, 1, 1, 1, 1, 1, 9, 7, 1, 0, 1, 1, 1, 1, 5, 7, 1, 1, 1, 1, | |
1, 1, 5, 7, 1, 1, 1, 1, 1, 1, 5, 7, 0, 1, 1, 1, 1, 1, 5, 7, 0, 1, 1, 1, 1, 1, | |
9, 7, 0, 1, 1, 1, 1, 1, 5, 7, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, | |
6, 7, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 7, | |
5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 8, 1, | |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 7, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, | |
9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 9, 9, 8, 1, 1, 1, 9, 2, 11, 1, 8, 1, 1, 9, 1, 1, | |
1, 1, 1, 1, 9, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 1, 1, | |
8, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1}, | |
{1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, | |
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, | |
1, 1, 1, 1, 12, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 1, 0, 0, 1, 1, 1, 1, 9, 1, | |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 1, 1, 1, 1, 1, 1, 1, 9, 1, 9, 9, 9, 1, 1, 1, 1, | |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}}, | |
.has_modrm_2d = {{1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 3, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, | |
1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 3, 0, 1, 1, 1, 1, 0, 0, 3, 0, 1, 1, 1, 1, | |
0, 0, 3, 0, 1, 1, 1, 1, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 3, 3, 3, 3, | |
0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, | |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, | |
1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 3, 0, 3, 3, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1}, | |
{1, 1, 1, 1, 3, 0, 0, 0, 0, 0, 3, 0, 3, 1, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, | |
0, 0, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 3, 3, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, | |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}}, | |
.disp_bits_2d = {{4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 4, 4, 4, 4, 4, 4, 0, 4, 4, 4, 4, 4, | |
4, 4, 0, 4, 4, 4, 4, 4, 4, 4, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, | |
4, 4, 4, 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, | |
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, | |
4, 4, 4, 4, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 4, 4, 4, 4, 4, 4, 4, 4, | |
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 1, 1, 1, 1, 4, 4, 4, 4, 3, 3, | |
2, 1, 4, 4, 4, 4, 0, 4, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4}, | |
{4, 4, 4, 4, 0, 4, 4, 4, 4, 4, 0, 4, 0, 4, 4, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
4, 4, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, | |
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4}}, | |
.BRDISPz_BRDISP_WIDTH = {0, 16, 32, 32}, | |
.MEMDISPv_DISP_WIDTH = {0, 16, 32, 64}, | |
.SIMMz_IMM_WIDTH = {0x00, 0x10, 0x20, 0x20}, | |
.UIMMv_IMM_WIDTH = {0x00, 0x10, 0x20, 0x40}, | |
.ASZ_NONTERM_EASZ = {{1, 2, 3}, {2, 1, 2}}, | |
.OSZ_NONTERM_DF64_EOSZ = {{{1, 2, 3}, {2, 1, 1}}, {{1, 2, 3}, {2, 1, 3}}}, | |
.OSZ_NONTERM_EOSZ = {{{1, 2, 2}, {2, 1, 1}}, {{1, 2, 3}, {2, 1, 3}}}, | |
}; | |
static unsigned long xed_bits2bytes(unsigned bits) { | |
return bits >> 3; | |
} | |
static unsigned long xed_bytes2bits(unsigned bytes) { | |
return bytes << 3; | |
} | |
static unsigned char xed_get_prefix_table_bit(unsigned char a) { | |
return (xed_prefix_table_bit[a >> 5] >> (a & 0x1F)) & 1; | |
} | |
static void xed_set_mopcode(unsigned long long *rde, unsigned long long mopcode) { | |
*rde |= mopcode << 40; | |
} | |
static void xed_set_simmz_imm_width_eosz(const unsigned long long *rde, | |
const unsigned char eosz[2][2][3], int *imm_width, | |
unsigned char *imm_signed) { | |
*imm_width = kXed.SIMMz_IMM_WIDTH[eosz[Rexw(*rde)][Osz(*rde)][Mode(*rde)]]; | |
*imm_signed = 1; | |
} | |
static int xed_set_imm_bytes(unsigned long long *rde, int *imm_width, unsigned char *imm_signed) { | |
if (!*imm_width && Opmap(*rde) < XED_ILD_MAP2) { | |
switch (kXed.imm_bits_2d[Opmap(*rde)][Opcode(*rde)]) { | |
case 0: | |
return -1; | |
case 1: | |
*imm_width = 0; | |
return 0; | |
case 2: | |
switch (ModrmReg(*rde)) { | |
case 0: | |
xed_set_simmz_imm_width_eosz(rde, kXed.OSZ_NONTERM_EOSZ, imm_width, imm_signed); | |
return 0; | |
case 7: | |
*imm_width = 0; | |
return 0; | |
default: | |
return 0; | |
} | |
case 3: | |
if (ModrmReg(*rde) <= 1) { | |
*imm_width = 8; | |
*imm_signed = 1; | |
} else if (2 <= ModrmReg(*rde) && ModrmReg(*rde) <= 7) { | |
*imm_width = 0; | |
} | |
return 0; | |
case 4: | |
if (ModrmReg(*rde) <= 1) { | |
xed_set_simmz_imm_width_eosz(rde, kXed.OSZ_NONTERM_EOSZ, imm_width, imm_signed); | |
} else if (2 <= ModrmReg(*rde) && ModrmReg(*rde) <= 7) { | |
*imm_width = 0; | |
} | |
return 0; | |
case 5: | |
*imm_width = 8; | |
*imm_signed = 1; | |
return 0; | |
case 6: | |
xed_set_simmz_imm_width_eosz(rde, kXed.OSZ_NONTERM_DF64_EOSZ, imm_width, imm_signed); | |
return 0; | |
case 7: | |
xed_set_simmz_imm_width_eosz(rde, kXed.OSZ_NONTERM_EOSZ, imm_width, imm_signed); | |
return 0; | |
case 8: | |
*imm_width = 16; | |
return 0; | |
case 9: | |
*imm_width = 8; | |
return 0; | |
case 10: | |
*imm_width = | |
kXed.UIMMv_IMM_WIDTH[kXed.OSZ_NONTERM_EOSZ[Rexw(*rde)][Osz(*rde)][Mode(*rde)]]; | |
return 0; | |
case 11: | |
// actually 2 bytes for uimm0 & 1 byte for uimm1 | |
*imm_width = xed_bytes2bits(3); | |
return 0; | |
case 12: | |
if (Osz(*rde) || Rep(*rde) == 2) { | |
*imm_width = xed_bytes2bits(1); | |
} | |
return 0; | |
default: | |
__builtin_unreachable(); | |
} | |
} else { | |
return 0; | |
} | |
} | |
static int xed_is_bound_instruction(const unsigned char *bytes, const unsigned long long *rde, | |
int length) { | |
return Mode(*rde) != XED_MODE_LONG && length + 1 < XED_MAX_INSTRUCTION_BYTES && | |
(bytes[length + 1] & 0xC0) != 0xC0; | |
} | |
static int xed_prefix_scanner(const unsigned char *bytes, unsigned long long *out_rde) { | |
int length; | |
unsigned rde; | |
unsigned char b, rep, max_bytes, islong; | |
unsigned char asz, osz, rex, rexw, rexr, rexx, rexb; | |
rex = 0; | |
rep = 0; | |
rde = 0; | |
length = 0; | |
max_bytes = XED_MAX_INSTRUCTION_BYTES; | |
islong = Mode(rde) == XED_MODE_LONG; | |
while (length < max_bytes) { | |
b = bytes[length]; | |
if (xed_get_prefix_table_bit(b) == 0) | |
goto out; | |
switch (b) { | |
case 0x66: // osz | |
rex = 0; | |
osz = 1; | |
rde |= osz << 5; | |
break; | |
case 0x67: // asz | |
rex = 0; | |
asz = 1; | |
rde |= asz << 21; | |
break; | |
case 0x2E: // cs | |
if (1 || !islong) { | |
rde &= 037770777777; | |
rde |= 000002000000; | |
} | |
rex = 0; | |
break; | |
case 0x3E: // ds | |
if (1 || !islong) { | |
rde &= 037770777777; | |
rde |= 000004000000; | |
} | |
rex = 0; | |
break; | |
case 0x26: // es | |
if (1 || !islong) { | |
rde &= 037770777777; | |
rde |= 000001000000; | |
} | |
rex = 0; | |
break; | |
case 0x36: // ss | |
if (1 || !islong) { | |
rde &= 037770777777; | |
rde |= 000003000000; | |
} | |
rex = 0; | |
break; | |
case 0x64: // fs | |
rde &= 037770777777; | |
rde |= 000005000000; | |
rex = 0; | |
break; | |
case 0x65: // gs | |
rde &= 037770777777; | |
rde |= 000006000000; | |
rex = 0; | |
break; | |
case 0xF0: // lock | |
rde |= 020000000000; | |
rex = 0; | |
break; | |
case 0xF2: // rep | |
case 0xF3: | |
rep = b & 3; | |
rex = 0; | |
break; | |
default: | |
if (islong && (b & 0xf0) == 0x40) { | |
rex = b; | |
break; | |
} else { | |
goto out; | |
} | |
} | |
length++; | |
} | |
out: | |
if (rex) { | |
rexw = (rex >> 3) & 1; | |
rexr = (rex >> 2) & 1; | |
rexx = (rex >> 1) & 1; | |
rexb = rex & 1; | |
rex = 1; | |
rde |= rexx << 17 | rex << 16 | rexb << 15 | rex << 11 | rexb << 10 | rexw << 6 | rex << 4 | | |
rexr << 3; | |
} | |
*out_rde = (unsigned long long)rep << 51 | rde; | |
if (length < max_bytes) { | |
return length; | |
} else { | |
return -1; | |
} | |
} | |
static void xed_set_vex_prefix(unsigned long long *rde, unsigned prefix) { | |
switch (prefix) { | |
case 0: | |
break; | |
case 1: // osz | |
*rde &= ~(1 << 5); | |
*rde |= 1 << 5; | |
break; | |
case 2: // rep3 | |
case 3: // rep2 | |
*rde &= ~((unsigned long long)3 << 51); | |
*rde |= (unsigned long long)(prefix ^ 1) << 51; | |
break; | |
default: | |
__builtin_unreachable(); | |
} | |
} | |
static int xed_vex_opcode_scanner(const unsigned char *bytes, unsigned long long *rde, int length, | |
int map, int *pos_opcode) { | |
xed_set_mopcode(rde, map << 8 | bytes[length]); | |
*pos_opcode = length; | |
if (Mode(*rde) == XED_MODE_LONG && Rex(*rde)) | |
return -1; | |
if (Mode(*rde) == XED_MODE_REAL) | |
return -1; | |
return length + 1; | |
} | |
static int xed_vex_c4_scanner(const unsigned char *bytes, unsigned long long *rde, int length, | |
int *imm_width, int *vexvalid, int *pos_opcode) { | |
unsigned b1, b2; | |
int map, rexr, rexx, rexb, rexw, ymm, vrex, vexdest210; | |
if (xed_is_bound_instruction(bytes, rde, length)) | |
return length; | |
++length; | |
if (length + 2 < XED_MAX_INSTRUCTION_BYTES) { | |
// map: 5-bit | |
// rex.b: 1-bit (expands r/m or srm register operand) | |
// rex.x: 1-bit (expands sib register operands) | |
// rex.r: 1-bit (expands reg register operand) | |
b1 = bytes[length]; | |
rexr = !(b1 & 128); | |
rexx = !(b1 & 64); | |
rexb = (Mode(x->rde) == XED_MODE_LONG) & !(b1 & 32); | |
// prefix: 2-bit → {none, osz, rep3, rep2} | |
// vector_length: 1-bit → {xmm, ymm} aka VEX.L | |
// vexdest210: 3-bit (second reg operand, inverted) | |
// vrex: 1-bit a.k.a. vexdest3 | |
// rex.w: 1-bit (for 64-bit registers) aka VEX.W1 | |
b2 = bytes[length + 1]; | |
rexw = !!(b2 & 128); | |
vrex = !(b2 & 64); | |
vexdest210 = (~b2 >> 3) & 7; | |
ymm = !!(b2 & 4); | |
xed_set_vex_prefix(rde, b2 & 3); | |
map = b1 & 31; | |
if ((b1 & 3) == XED_ILD_MAP3) { | |
*imm_width = xed_bytes2bits(1); | |
} | |
*rde |= (unsigned long long)vrex << 63 | rexx << 17 | rexb << 15 | rexb << 10 | rexw << 6 | | |
rexr << 3; | |
*rde |= ymm << 30; | |
*rde |= (unsigned long long)vexdest210 << 60; | |
*vexvalid = 1; | |
return xed_vex_opcode_scanner(bytes, rde, length + 2, map, pos_opcode); | |
} else { | |
return -1; | |
} | |
} | |
static int xed_vex_c5_scanner(const unsigned char *bytes, unsigned long long *rde, int length, | |
int *vexvalid, int *pos_opcode) { | |
unsigned b; | |
int rexr, ymm, vrex, vexdest210; | |
if (xed_is_bound_instruction(bytes, rde, length)) | |
return length; | |
++length; | |
if (length + 1 < XED_MAX_INSTRUCTION_BYTES) { | |
// prefix: 2-bit → {none, osz, rep3, rep2} | |
// vector_length: 1-bit → {xmm, ymm} | |
// vexdest210: 3-bit | |
// vrex: 1-bit | |
// rex.r: 1-bit | |
b = bytes[length]; | |
rexr = !(b & 128); | |
vrex = !!(b & 64); | |
vexdest210 = (~b >> 3) & 7; | |
ymm = (b >> 2) & 1; | |
xed_set_vex_prefix(rde, b & 3); | |
*rde |= (unsigned long long)vrex << 63 | rexr << 3; | |
*rde |= ymm << 30; | |
*rde |= (unsigned long long)vexdest210 << 60; | |
*vexvalid = 1; | |
return xed_vex_opcode_scanner(bytes, rde, length + 1, XED_ILD_MAP1, pos_opcode); | |
} else { | |
return -1; | |
} | |
} | |
static int xed_vex_scanner(const unsigned char *bytes, unsigned long long *rde, int length, | |
int *imm_width, int *vexvalid, int *pos_opcode) { | |
if (Mode(*rde) != XED_MODE_LONG) | |
return 0; | |
switch (bytes[length]) { | |
case 0xC5: | |
return xed_vex_c5_scanner(bytes, rde, length, vexvalid, pos_opcode); | |
case 0xC4: | |
return xed_vex_c4_scanner(bytes, rde, length, imm_width, vexvalid, pos_opcode); | |
default: | |
return length; | |
} | |
} | |
static int xed_get_next_as_opcode(const unsigned char *bytes, unsigned long long *rde, int length, | |
int map) { | |
if (length < XED_MAX_INSTRUCTION_BYTES) { | |
xed_set_mopcode(rde, map << 8 | bytes[length]); | |
return length + 1; | |
} else { | |
return -1; | |
} | |
} | |
static int xed_opcode_scanner(const unsigned char *bytes, unsigned long long *rde, int length, | |
int *imm_width, int *pos_opcode) { | |
unsigned char b; | |
if ((b = bytes[length]) != 0x0F) { | |
xed_set_mopcode(rde, XED_ILD_MAP0 << 8 | b); | |
*pos_opcode = length; | |
return length + 1; | |
} else { | |
*pos_opcode = ++length; | |
if (length < XED_MAX_INSTRUCTION_BYTES) { | |
switch ((b = bytes[length])) { | |
case 0x38: | |
return xed_get_next_as_opcode(bytes, rde, length + 1, XED_ILD_MAP2); | |
case 0x3A: | |
*imm_width = xed_bytes2bits(1); | |
return xed_get_next_as_opcode(bytes, rde, length + 1, XED_ILD_MAP3); | |
case 0x3B: | |
case 0x39: | |
case 0x3C: | |
case 0x3D: | |
case 0x3E: | |
case 0x3F: | |
case 0x0F: | |
return -1; | |
default: | |
xed_set_mopcode(rde, XED_ILD_MAP1 << 8 | b); | |
return length + 1; | |
} | |
} else { | |
return -1; | |
} | |
} | |
} | |
static long long xed_read_number(const unsigned char *p, unsigned n, unsigned s) { | |
switch (s << 2 | (__builtin_clz(n) ^ 31)) { | |
case 0: | |
return *p; | |
case 4: | |
return (signed char)*p; | |
case 1: | |
return p[0] | p[1] << 8; | |
case 5: | |
return (short)(p[0] | p[1] << 8); | |
case 2: | |
return p[0] | p[1] << 8 | p[2] << 16 | (unsigned)p[3] << 24; | |
case 6: | |
return (int)(p[0] | p[1] << 8 | p[2] << 16 | (unsigned)p[3] << 24); | |
case 3: | |
case 7: | |
return (((unsigned long long)p[0] << 000) | ((unsigned long long)p[1] << 010) | | |
((unsigned long long)p[2] << 020) | ((unsigned long long)p[3] << 030) | | |
((unsigned long long)p[4] << 040) | ((unsigned long long)p[5] << 050) | | |
((unsigned long long)p[6] << 060) | ((unsigned long long)p[7] << 070)); | |
default: | |
__builtin_unreachable(); | |
} | |
} | |
static int xed_has_modrm(const unsigned long long *rde) { | |
if (Opmap(*rde) < sizeof(kXed.has_modrm_2d) / sizeof(*kXed.has_modrm_2d)) { | |
return kXed.has_modrm_2d[Opmap(*rde)][Opcode(*rde)]; | |
} else { | |
return 1; | |
} | |
} | |
static int xed_modrm_scanner(const unsigned char *bytes, unsigned long long *rde, int length, | |
int *disp_width, int *has_sib) { | |
int has_modrm = xed_has_modrm(rde); | |
unsigned char b, rm, reg, mod, eamode; | |
if (has_modrm) { | |
if (length < XED_MAX_INSTRUCTION_BYTES) { | |
b = bytes[length++]; | |
rm = b & 0007; | |
reg = (b & 0070) >> 3; | |
mod = (b & 0300) >> 6; | |
*rde &= ~1; | |
*rde |= mod << 22 | rm << 7 | reg; | |
if (has_modrm != XED_ILD_HASMODRM_IGNORE_MOD) { | |
eamode = kXed.eamode[Asz(*rde)][Mode(*rde)]; | |
*disp_width = xed_bytes2bits(kXed.has_disp_regular[eamode][mod][rm]); | |
*has_sib = kXed.has_sib_table[eamode][mod][rm]; | |
} | |
return length; | |
} else { | |
return -1; | |
} | |
} else { | |
return length; | |
} | |
} | |
static int xed_sib_scanner(const unsigned char *bytes, unsigned long long *rde, int length, | |
int *disp_width) { | |
unsigned char b; | |
if (length < XED_MAX_INSTRUCTION_BYTES) { | |
b = bytes[length++]; | |
*rde |= (unsigned long long)b << 32; // set sib byte | |
if ((b & 7) == 5) { | |
if (!ModrmMod(*rde)) { | |
*disp_width = xed_bytes2bits(4); | |
} | |
} | |
return length; | |
} else { | |
return -1; | |
} | |
} | |
static unsigned char | |
XED_LF_BRDISPz_BRDISP_WIDTH_OSZ_NONTERM_EOSZ_l2(const unsigned long long *rde) { | |
return kXed.BRDISPz_BRDISP_WIDTH[kXed.OSZ_NONTERM_EOSZ[Rexw(*rde)][Osz(*rde)][Mode(*rde)]]; | |
} | |
static int xed_disp_scanner(const unsigned char *bytes, unsigned long long *rde, int length, | |
int disp_width, long long *disp) { | |
unsigned char disp_bytes; | |
unsigned char disp_unsigned = 0; | |
if (Opmap(*rde) < XED_ILD_MAP2) { | |
switch (kXed.disp_bits_2d[Opmap(*rde)][Opcode(*rde)]) { | |
case 0: | |
return -1; | |
case 1: | |
disp_width = 8; | |
break; | |
case 2: | |
disp_width = XED_LF_BRDISPz_BRDISP_WIDTH_OSZ_NONTERM_EOSZ_l2(rde); | |
disp_unsigned = 1; | |
break; | |
case 3: | |
if (Mode(*rde) <= XED_MODE_LEGACY) { | |
disp_width = XED_LF_BRDISPz_BRDISP_WIDTH_OSZ_NONTERM_EOSZ_l2(rde); | |
} else if (Mode(*rde) == XED_MODE_LONG) { | |
disp_width = 0x20; | |
} | |
break; | |
case 4: | |
break; | |
case 5: | |
disp_width = kXed.MEMDISPv_DISP_WIDTH[kXed.ASZ_NONTERM_EASZ[Asz(*rde)][Mode(*rde)]]; | |
disp_unsigned = 1; | |
break; | |
case 6: | |
switch (ModrmReg(*rde)) { | |
case 0: | |
break; | |
case 7: | |
disp_width = XED_LF_BRDISPz_BRDISP_WIDTH_OSZ_NONTERM_EOSZ_l2(rde); | |
disp_unsigned = 1; | |
break; | |
default: | |
break; | |
} | |
break; | |
default: | |
__builtin_unreachable(); | |
} | |
} | |
disp_bytes = xed_bits2bytes(disp_width); | |
if (!disp_bytes) { | |
return length; | |
} else if (length + disp_bytes <= XED_MAX_INSTRUCTION_BYTES) { | |
*disp = xed_read_number(bytes + length, disp_bytes, !disp_unsigned); | |
return length + disp_bytes; | |
} else { | |
return -1; | |
} | |
} | |
static int xed_imm_scanner(const unsigned char *bytes, unsigned long long *rde, int length, | |
int imm_width, unsigned long long *uimm0) { | |
unsigned char imm_bytes; | |
unsigned char imm_signed = 0; | |
if (xed_set_imm_bytes(rde, &imm_width, &imm_signed) == -1) | |
return -1; | |
imm_bytes = xed_bits2bytes(imm_width); | |
if (!imm_bytes) { | |
return length; | |
} else if (length + imm_bytes <= XED_MAX_INSTRUCTION_BYTES) { | |
*uimm0 = xed_read_number(bytes + length, imm_bytes, imm_signed); | |
return length + imm_bytes; | |
} else { | |
return -1; | |
} | |
} | |
__attribute__((__flatten__)) int x86ild(const void *bytes) { | |
int n; | |
int pos_opcode; | |
long long disp; | |
int has_sib = 0; | |
int vexvalid = 0; | |
int imm_width = 0; | |
int disp_width = 0; | |
unsigned long long rde; | |
unsigned long long uimm0; | |
if ((n = xed_prefix_scanner(bytes, &rde)) < 0) | |
return n; | |
if ((n = xed_vex_scanner(bytes, &rde, n, &imm_width, &vexvalid, &pos_opcode)) < 0) | |
return n; | |
if (!vexvalid && (n = xed_opcode_scanner(bytes, &rde, n, &imm_width, &pos_opcode)) < 0) | |
return n; | |
if ((n = xed_modrm_scanner(bytes, &rde, n, &disp_width, &has_sib)) < 0) | |
return n; | |
if (has_sib && (n = xed_sib_scanner(bytes, &rde, n, &disp_width)) < 0) | |
return n; | |
if ((n = xed_disp_scanner(bytes, &rde, n, disp_width, &disp)) < 0) | |
return n; | |
return xed_imm_scanner(bytes, &rde, n, imm_width, &uimm0); | |
} | |
#endif /* x86_64 */ | |
// if an illegal instruction is encountered | |
// then we simply step to the next instruction | |
void onsigill(int sig, siginfo_t *si, void *vctx) { | |
ucontext_t *ctx = vctx; | |
#ifdef __x86_64__ | |
ctx->uc_mcontext.gregs[REG_RIP] += x86ild((const void *)ctx->uc_mcontext.gregs[REG_RIP]); | |
#else | |
ctx->uc_mcontext.pc += 4; | |
#endif | |
} | |
// generate code that uses weird legacy amd bulldozer isa | |
__attribute__((__noinline__, __target__("fma4"), __optimize__("-ffast-math"))) // | |
void vecfma(float *A, const float *B, const float *C, int n) { | |
for (int i = 0; i < n; ++i) | |
A[i] = A[i] * B[i] + C[i]; | |
} | |
void continue_on_sigill(void) { | |
struct sigaction sa; | |
sigemptyset(&sa.sa_mask); | |
sa.sa_flags = SA_SIGINFO; | |
sa.sa_sigaction = onsigill; | |
sigaction(SIGILL, &sa, 0); | |
} | |
float A[8]; | |
float B[8]; | |
float C[8]; | |
int main(int argc, char *argv[]) { | |
continue_on_sigill(); | |
for (int i = 0; i < 8; ++i) | |
A[i] = i; | |
for (int i = 0; i < 8; ++i) | |
B[i] = i * 2; | |
for (int i = 0; i < 8; ++i) | |
C[i] = i * 4; | |
vecfma(A, B, C, 8); | |
for (int i = 0; i < 8; ++i) | |
printf("%10g", A[i]); | |
printf("\n"); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment