DOSBox for ppc64le. Place risc_ppc64le.h in src/cpu/core_dynrec and apply the diff. ./autogen.sh ; ./configure CFLAGS="-O3 -mcpu=power9" CXXFLAGS="-O3 -mcpu=power9" ; make -j24
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Index: configure.ac | |
=================================================================== | |
--- configure.ac (revision 4296) | |
+++ configure.ac (working copy) | |
@@ -292,9 +292,16 @@ | |
c_targetcpu="x86" | |
c_unalignedmemory=yes | |
;; | |
+ powerpc64le*) | |
+ AC_DEFINE(C_TARGETCPU,PPC64LE) | |
+ AC_DEFINE(PAGESIZE,65536,Non-4K page size; currently ppc64le only) | |
+ AC_MSG_RESULT(OpenPOWER) | |
+ c_targetcpu="ppc64le" | |
+ c_unalignedmemory=yes | |
+ ;; | |
powerpc*) | |
AC_DEFINE(C_TARGETCPU,POWERPC) | |
- AC_MSG_RESULT(Power PC) | |
+ AC_MSG_RESULT(PowerPC) | |
c_targetcpu="powerpc" | |
c_unalignedmemory=yes | |
;; | |
@@ -390,7 +397,7 @@ | |
AC_MSG_RESULT([no, using dynamic-x86]) | |
fi | |
else | |
- if test x$c_targetcpu = xarm ; then | |
+ if test x$c_targetcpu = xarm -o x$c_targetcpu = xppc64le ; then | |
AC_DEFINE(C_DYNREC,1) | |
AC_MSG_RESULT(yes) | |
else | |
Index: src/cpu/core_dynrec/Makefile.am | |
=================================================================== | |
--- src/cpu/core_dynrec/Makefile.am (revision 4296) | |
+++ src/cpu/core_dynrec/Makefile.am (working copy) | |
@@ -2,4 +2,5 @@ | |
dyn_fpu.h operators.h risc_x64.h risc_x86.h risc_mipsel32.h \ | |
risc_armv4le.h risc_armv4le-common.h \ | |
risc_armv4le-o3.h risc_armv4le-thumb.h \ | |
- risc_armv4le-thumb-iw.h risc_armv4le-thumb-niw.h risc_armv8le.h | |
+ risc_armv4le-thumb-iw.h risc_armv4le-thumb-niw.h risc_armv8le.h \ | |
+ risc_ppc64le.h | |
Index: src/cpu/core_dynrec/cache.h | |
=================================================================== | |
--- src/cpu/core_dynrec/cache.h (revision 4296) | |
+++ src/cpu/core_dynrec/cache.h (working copy) | |
@@ -553,8 +553,8 @@ | |
static void dyn_return(BlockReturn retcode,bool ret_exception); | |
static void dyn_run_code(void); | |
+static void cache_block_closing(Bit8u* block_start,Bitu block_size); | |
- | |
/* Define temporary pagesize so the MPROTECT case and the regular case share as much code as possible */ | |
#if (C_HAVE_MPROTECT) | |
#define PAGESIZE_TEMP PAGESIZE | |
@@ -614,19 +614,25 @@ | |
} | |
// setup the default blocks for block linkage returns | |
cache.pos=&cache_code_link_blocks[0]; | |
+ core_dynrec.runcode=(BlockReturn (*)(Bit8u*))cache.pos; | |
+ // can use op to PAGESIZE_TEMP-64 bytes | |
+ dyn_run_code(); | |
+ cache_block_closing(cache_code_link_blocks, cache.pos-cache_code_link_blocks); | |
+ | |
+ cache.pos=&cache_code_link_blocks[PAGESIZE_TEMP-64]; | |
link_blocks[0].cache.start=cache.pos; | |
// link code that returns with a special return code | |
+ // must be less than 32 bytes | |
dyn_return(BR_Link1,false); | |
- cache.pos=&cache_code_link_blocks[32]; | |
+ cache_block_closing(link_blocks[0].cache.start, cache.pos-link_blocks[0].cache.start); | |
+ | |
+ cache.pos=&cache_code_link_blocks[PAGESIZE_TEMP-32]; | |
link_blocks[1].cache.start=cache.pos; | |
// link code that returns with a special return code | |
+ // must be less than 32 bytes | |
dyn_return(BR_Link2,false); | |
+ cache_block_closing(link_blocks[1].cache.start, cache.pos-link_blocks[1].cache.start); | |
- cache.pos=&cache_code_link_blocks[64]; | |
- core_dynrec.runcode=(BlockReturn (*)(Bit8u*))cache.pos; | |
-// link_blocks[1].cache.start=cache.pos; | |
- dyn_run_code(); | |
- | |
cache.free_pages=0; | |
cache.last_page=0; | |
cache.used_pages=0; | |
Index: src/cpu/core_dynrec.cpp | |
=================================================================== | |
--- src/cpu/core_dynrec.cpp (revision 4296) | |
+++ src/cpu/core_dynrec.cpp (working copy) | |
@@ -139,6 +139,7 @@ | |
#define ARMV4LE 0x04 | |
#define ARMV7LE 0x05 | |
#define ARMV8LE 0x07 | |
+#define PPC64LE 0x08 | |
#if C_TARGETCPU == X86_64 | |
#include "core_dynrec/risc_x64.h" | |
@@ -150,6 +151,8 @@ | |
#include "core_dynrec/risc_armv4le.h" | |
#elif C_TARGETCPU == ARMV8LE | |
#include "core_dynrec/risc_armv8le.h" | |
+#elif C_TARGETCPU == PPC64LE | |
+#include "core_dynrec/risc_ppc64le.h" | |
#endif | |
#include "core_dynrec/decoder.h" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Copyright (C) 2002-2019 The DOSBox Team | |
* | |
* This program is free software; you can redistribute it and/or modify | |
* it under the terms of the GNU General Public License as published by | |
* the Free Software Foundation; either version 2 of the License, or | |
* (at your option) any later version. | |
* | |
* This program is distributed in the hope that it will be useful, | |
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
* GNU General Public License for more details. | |
* | |
* You should have received a copy of the GNU General Public License along | |
* with this program; if not, write to the Free Software Foundation, Inc., | |
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | |
*/ | |
#if(0) | |
// Only needed when linting standalone with cpp | |
typedef char Bit8s; | |
typedef unsigned char Bit8u; | |
typedef short Bit16s; | |
typedef unsigned short Bit16u; | |
typedef long Bit32s; | |
typedef unsigned long Bit32u; | |
typedef long long Bit64s; | |
typedef unsigned long long Bit64u; | |
typedef Bit32s Bits; | |
typedef Bit32u Bitu; | |
#define cache_addd(x) x | |
#define INLINE inline | |
Bit32u Segs[16]; | |
Bit32u cpu_regs[16]; | |
struct cachetype { | |
Bit8u *pos; | |
} cache; | |
Bit8u *get_CF; | |
#endif | |
// debugging | |
#define DEBUG_ME 0 | |
#define __ASSERT(x,...) \ | |
{ if(!(x)) { fprintf(stderr, "ASSERT:" __VA_ARGS__); asm("trap\n"); } } | |
// some configuring defines that specify the capabilities of this architecture | |
// or aspects of the recompiling | |
// protect FC_ADDR over function calls if necessaray | |
//#define DRC_PROTECT_ADDR_REG | |
// try to use non-flags generating functions if possible | |
#define DRC_FLAGS_INVALIDATION | |
// try to replace _simple functions by code | |
#define DRC_FLAGS_INVALIDATION_DCODE | |
// type with the same size as a pointer | |
#define DRC_PTR_SIZE_IM Bit64u | |
// calling convention modifier | |
#define DRC_FC /* nothing */ | |
#define DRC_CALL_CONV /* nothing */ | |
#define DRC_USE_REGS_ADDR | |
#define DRC_USE_SEGS_ADDR | |
// register mapping | |
enum HostReg { | |
HOST_R0=0, | |
HOST_R1, | |
HOST_R2, | |
HOST_R3, | |
HOST_R4, | |
HOST_R5, | |
HOST_R6, | |
HOST_R7, | |
HOST_R8, | |
HOST_R9, | |
HOST_R10, | |
HOST_R11, | |
HOST_R12, // end of volatile registers. use for CTR calls | |
HOST_R13, | |
HOST_R14, | |
HOST_R15, | |
HOST_R16, | |
HOST_R17, | |
HOST_R18, | |
HOST_R19, | |
HOST_R20, | |
HOST_R21, | |
HOST_R22, | |
HOST_R23, | |
HOST_R24, | |
HOST_R25, | |
HOST_R26, // generic non-volatile (used for inline adc/sbb) | |
HOST_R27, // points to current CacheBlockDynRec (decode.block) | |
HOST_R28, // points to fpu | |
HOST_R29, // FC_ADDR | |
HOST_R30, // points to Segs | |
HOST_R31, // points to cpu_regs | |
HOST_NONE | |
}; | |
static const HostReg RegParams[] = { | |
HOST_R3, HOST_R4, HOST_R5, HOST_R6, | |
HOST_R7, HOST_R8, HOST_R9, HOST_R10 | |
}; | |
#if C_FPU | |
#include "fpu.h" | |
extern FPU_rec fpu; | |
#endif | |
// register that holds function return values | |
#define FC_RETOP HOST_R3 | |
// register used for address calculations, if the ABI does not | |
// state that this register is preserved across function calls | |
// then define DRC_PROTECT_ADDR_REG above | |
#define FC_ADDR HOST_R29 | |
// register that points to Segs[] | |
#define FC_SEGS_ADDR HOST_R30 | |
// register that points to cpu_regs[] | |
#define FC_REGS_ADDR HOST_R31 | |
// register that holds the first parameter | |
#define FC_OP1 RegParams[0] | |
// register that holds the second parameter | |
#define FC_OP2 RegParams[1] | |
// special register that holds the third parameter for _R3 calls (byte accessible) | |
#define FC_OP3 RegParams[2] | |
// register that holds byte-accessible temporary values | |
#define FC_TMP_BA1 FC_OP2 | |
// register that holds byte-accessible temporary values | |
#define FC_TMP_BA2 FC_OP1 | |
// temporary register for LEA | |
#define TEMP_REG_DRC HOST_R10 | |
// op comes right out of the PowerISA 3.0 documentation | |
#define IMM(op, regsd, rega, imm) (Bit32u)(((op)<<26)|((regsd)<<21)|((rega)<<16)| (((Bit64u)(imm))&0xFFFF)) | |
#define DSF(op, regs, rega, ds, bb) (Bit32u)(((op)<<26)|((regs) <<21)|((rega)<<16)| (((Bit64u)(ds))&0xFFFC)|(bb)) | |
#define EXT(regsd, rega, regb, op, rc) (Bit32u)( (31<<26)|((regsd)<<21)|((rega)<<16)| ((regb)<<11)| ((op)<<1) |(rc)) | |
#define RLW(op, regs, rega, sh, mb, me, rc) (Bit32u)(((op)<<26)|((regs) <<21)|((rega)<<16)| ((sh)<<11)|((mb )<<6)|((me)<<1) |(rc)) | |
#define RLD(op, regs, rega, sh, mx, opb, rc) (Bit32u)(((op)<<26)|((regs) <<21)|((rega)<<16)|((sh&31)<<11)|((mx&31)<<6)|(mx&32) |((opb)<<2)|((sh&32)>>4)|(rc)) | |
#define IMM_OP(op, regsd, rega, imm) cache_addd(IMM(op, regsd, rega, imm)) | |
#define DSF_OP(op, regs, rega, ds, bb) cache_addd(DSF(op, regs, rega, ds, bb)) | |
#define EXT_OP(regsd, rega, regb, op, rc) cache_addd(EXT(regsd, rega, regb, op, rc)) | |
#define RLW_OP(op, regs, rega, sh, mb, me, rc) cache_addd(RLW(op, regs, rega, sh, mb, me, rc)) | |
#define RLD_OP(op, regs, rega, sh, mx, opb, rc) cache_addd(RLD(op, regs, rega, sh, mx, opb, rc)) | |
#define NOP IMM(24, 0, 0, 0) // or 0,0,0 | |
#define NOP_OP() cache_addd(NOP) | |
#define TRAP() cache_addd(EXT(31, 0, 0, 4, 0)) // tw 31,0,0 | |
// move a full register from reg_src to reg_dst | |
// truncate to 32-bits (matches x86_64, which uses 32-bit mov) | |
static void gen_mov_regs(HostReg reg_dst,HostReg reg_src) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
// rld* etc. are backwards: rS is first in the encoding | |
// always move, even if reg_src == reg_dst, because we may need truncation | |
RLD_OP(30, reg_src, reg_dst, 0, 32, 0, 0); // clrldi dst, src, 32 | |
} | |
// move a 16bit constant value into dest_reg | |
// the upper 16bit of the destination register may be destroyed | |
static void gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
IMM_OP(14, dest_reg, 0, imm); // li dest,imm | |
} | |
DRC_PTR_SIZE_IM block_ptr; | |
// Helper for loading addresses | |
// Emits relevant code to load the upper 48 bits if needed | |
static HostReg INLINE gen_addr(Bit64s &addr, HostReg dest) | |
{ | |
Bit64s off; | |
if ((Bit16s)addr == addr) | |
return HOST_R0; // lower to immediate | |
off = addr - (Bit64s)&Segs; | |
if ((Bit16s)off == off) | |
{ | |
addr = off; | |
return FC_SEGS_ADDR; | |
} | |
off = addr - (Bit64s)&cpu_regs; | |
if ((Bit16s)off == off) | |
{ | |
addr = off; | |
return FC_REGS_ADDR; | |
} | |
off = addr - (Bit64s)block_ptr; | |
if ((Bit16s)off == off) | |
{ | |
addr = off; | |
return HOST_R27; | |
} | |
#if C_FPU | |
off = addr - (Bit64s)&fpu; | |
if ((Bit16s)off == off) | |
{ | |
addr = off; | |
return HOST_R28; | |
} | |
#endif | |
if (addr & 0xffffffff00000000) { | |
IMM_OP(15, dest, 0, (addr & 0xffff000000000000)>>48); // lis dest, upper | |
if (addr & 0x0000ffff00000000) | |
IMM_OP(24, dest, dest, (addr & 0x0000ffff00000000)>>32); // ori dest, dest, ... | |
RLD_OP(30, dest, dest, 32, 31, 1, 0); // rldicr dest, dest, 32, 31 | |
if (addr & 0x00000000ffff0000) | |
IMM_OP(25, dest, dest, (addr & 0x00000000ffff0000)>>16); // oris dest, dest, ... | |
} else | |
IMM_OP(15, dest, 0, (addr & 0x00000000ffff0000)>>16); // lis dest, lower | |
// watch unexpected sign extension with following instructions | |
if (addr & 0x8000) { | |
// make the displacement in the following instruction 0 for safety | |
IMM_OP(24, dest, dest, (addr & 0x000000000000ffff) ); | |
addr = 0; | |
} else | |
addr = (Bit16s)addr; | |
return dest; | |
} | |
// move a 64bit constant value into dest_reg | |
static void gen_mov_qword_to_reg_imm(HostReg dest_reg,Bit64u imm) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
if (imm & 0xffffffff00000000) { | |
IMM_OP(15, dest_reg, 0, (imm & 0xffff000000000000)>>48); // lis dest, upper | |
if (imm & 0x0000ffff00000000) | |
IMM_OP(24, dest_reg, dest_reg, (imm & 0x0000ffff00000000)>>32); // ori dest, dest, ... | |
RLD_OP(30, dest_reg, dest_reg, 32, 31, 1, 0); // rldicr dest, dest, 32, 31 | |
if (imm & 0x00000000ffff0000) | |
IMM_OP(25, dest_reg, dest_reg, (imm & 0x00000000ffff0000)>>16); // oris dest, dest, ... | |
} else | |
IMM_OP(15, dest_reg, 0, (imm & 0x00000000ffff0000)>>16); // lis dest, lower | |
if (imm & 0xffff) | |
IMM_OP(24, dest_reg, dest_reg, (imm & 0x000000000000ffff) ); // ori dest, dest, ... | |
} | |
// move a 32bit constant value into dest_reg | |
static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
if ((Bit16s)imm != imm) { | |
IMM_OP(15, dest_reg, 0, (imm & 0xffff0000)>>16); // lis | |
if (imm & 0x0000ffff) | |
IMM_OP(24, dest_reg, dest_reg, (imm & 0x0000ffff) ); // ori | |
} else { | |
IMM_OP(14, dest_reg, 0, imm); // li | |
} | |
} | |
// move a 32bit (dword==true) or 16bit (dword==false) value from memory into dest_reg | |
// 16bit moves may destroy the upper 16bit of the destination register | |
static void gen_mov_word_to_reg(HostReg dest_reg,void* data,bool dword) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
Bit64s addr = (Bit64s)data; | |
HostReg ld = gen_addr(addr, dest_reg); | |
IMM_OP(dword ? 32:40, dest_reg, ld, addr); // lwz/lhz dest, addr@l(ld) | |
} | |
// move an 8bit constant value into dest_reg | |
// the upper 24bit of the destination register can be destroyed | |
// this function does not use FC_OP1/FC_OP2 as dest_reg as these | |
// registers might not be directly byte-accessible on some architectures | |
static void gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm) { | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
gen_mov_word_to_reg_imm(dest_reg, imm); | |
} | |
// move an 8bit constant value into dest_reg | |
// the upper 24bit of the destination register can be destroyed | |
// this function can use FC_OP1/FC_OP2 as dest_reg which are | |
// not directly byte-accessible on some architectures | |
static void gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm) { | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
gen_mov_word_to_reg_imm(dest_reg, imm); | |
} | |
// move 32bit (dword==true) or 16bit (dword==false) of a register into memory | |
static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
Bit64s addr = (Bit64s)dest; | |
HostReg ld = gen_addr(addr, HOST_R8); | |
IMM_OP(dword ? 36 : 44, src_reg, ld, addr); // stw/sth src,addr@l(ld) | |
} | |
// move an 8bit value from memory into dest_reg | |
// the upper 24bit of the destination register can be destroyed | |
// this function does not use FC_OP1/FC_OP2 as dest_reg as these | |
// registers might not be directly byte-accessible on some architectures | |
static void gen_mov_byte_to_reg_low(HostReg dest_reg,void* data) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
Bit64s addr = (Bit64s)data; | |
HostReg ld = gen_addr(addr, dest_reg); | |
IMM_OP(34, dest_reg, ld, addr); // lbz dest,addr@l(ld) | |
} | |
// move an 8bit value from memory into dest_reg | |
// the upper 24bit of the destination register can be destroyed | |
// this function can use FC_OP1/FC_OP2 as dest_reg which are | |
// not directly byte-accessible on some architectures | |
static void gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void* data) { | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
gen_mov_byte_to_reg_low(dest_reg, data); | |
} | |
// move the lowest 8bit of a register into memory | |
static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
Bit64s addr = (Bit64s)dest; | |
HostReg ld = gen_addr(addr, HOST_R8); | |
IMM_OP(38, src_reg, ld, addr); // stb src_reg,addr@l(ld) | |
} | |
// convert an 8bit word to a 32bit dword | |
// the register is zero-extended (sign==false) or sign-extended (sign==true) | |
static void gen_extend_byte(bool sign,HostReg reg) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
if (sign) | |
EXT_OP(reg, reg, 0, 954, 0); // extsb reg, src | |
else | |
RLW_OP(21, reg, reg, 0, 24, 31, 0); // rlwinm reg, src, 0, 24, 31 | |
} | |
// convert a 16bit word to a 32bit dword | |
// the register is zero-extended (sign==false) or sign-extended (sign==true) | |
static void gen_extend_word(bool sign,HostReg reg) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
if (sign) | |
EXT_OP(reg, reg, 0, 922, 0); // extsh reg, reg | |
else | |
RLW_OP(21, reg, reg, 0, 16, 31, 0); // rlwinm reg, reg, 0, 16, 31 | |
} | |
// add a 32bit value from memory to a full register | |
static void gen_add(HostReg reg,void* op) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
gen_mov_word_to_reg(HOST_R8, op, true); // r8 = *(Bit32u*)op | |
EXT_OP(reg,reg,HOST_R8,266,0); // add reg,reg,r8 | |
} | |
// add a 32bit constant value to a full register | |
static void gen_add_imm(HostReg reg,Bit32u imm) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
if (!imm) return; | |
if ((Bit16s)imm != (Bit32s)imm) | |
IMM_OP(15, reg, reg, (imm+0x8000)>>16); // addis reg,reg,imm@ha | |
if ((Bit16s)imm) | |
IMM_OP(14, reg, reg, imm); // addi reg, reg, imm@l | |
} | |
// and a 32bit constant value with a full register | |
static void gen_and_imm(HostReg reg,Bit32u imm) { | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
Bits sbit,ebit,tbit,bbit,abit,i; | |
// sbit = number of leading 0 bits | |
// ebit = number of trailing 0 bits | |
// tbit = number of total 0 bits | |
// bbit = number of leading 1 bits | |
// abit = number of trailing 1 bits | |
if (imm == 0xFFFFFFFF) | |
return; | |
if (!imm) | |
return gen_mov_word_to_reg_imm(reg, 0); | |
sbit = ebit = tbit = bbit = abit = 0; | |
for (i=0; i < 32; i++) | |
{ | |
if (!(imm & (1<<(31-i)))) | |
{ | |
abit = 0; | |
tbit++; | |
if (sbit == i) | |
sbit++; | |
ebit++; | |
} | |
else | |
{ | |
ebit = 0; | |
if (bbit == i) | |
bbit++; | |
abit++; | |
} | |
} | |
if (sbit + ebit == tbit) | |
{ | |
RLW_OP(21,reg,reg,0,sbit,31-ebit,0); // rlwinm reg,reg,0,sbit,31-ebit | |
return; | |
} | |
if (sbit >= 16) | |
{ | |
IMM_OP(28,reg,reg,imm); // andi. reg,reg,imm | |
return; | |
} | |
if (ebit >= 16) | |
{ | |
IMM_OP(29,reg,reg,imm>>16); // andis. reg,reg,(imm>>16) | |
return; | |
} | |
if (bbit + abit == (32 - tbit)) | |
{ | |
RLW_OP(21,reg,reg,0,32-abit,bbit-1,0); // rlwinm reg,reg,0,32-abit,bbit-1 | |
return; | |
} | |
IMM_OP(28, reg, HOST_R0, imm); // andi. r0, reg, imm@l | |
IMM_OP(29, reg, reg, imm>16); // andis. reg, reg, imm@h | |
EXT_OP(reg, reg, HOST_R0, 444, 0); // or reg, reg, r0 | |
} | |
// move a 32bit constant value into memory | |
static void gen_mov_direct_dword(void* dest,Bit32u imm) { | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
gen_mov_dword_to_reg_imm(HOST_R9, imm); | |
gen_mov_word_from_reg(HOST_R9, dest, 1); | |
} | |
// move an address into memory (assumes address != NULL) | |
static void INLINE gen_mov_direct_ptr(void* dest,DRC_PTR_SIZE_IM imm) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
block_ptr = 0; | |
gen_mov_qword_to_reg_imm(HOST_R27, imm); | |
// this will be used to look-up the linked blocks | |
block_ptr = imm; | |
// "gen_mov_qword_from_reg(HOST_R27, dest, 1);" | |
Bit64s addr = (Bit64s)dest; | |
HostReg ld = gen_addr(addr, HOST_R8); | |
DSF_OP(62, HOST_R27, ld, addr, 0); // std r27, addr@l(ld) | |
} | |
// add a 32bit (dword==true) or 16bit (dword==false) constant value to a 32bit memory value | |
static void gen_add_direct_word(void* dest,Bit32u imm,bool dword) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
HostReg ld; | |
Bit64s addr = (Bit64s)dest; | |
if (!dword) | |
{ | |
imm &= 0xFFFF; | |
//addr += 2; // ENDIAN!!! | |
} | |
if (!imm) | |
return; | |
ld = gen_addr(addr, HOST_R8); | |
IMM_OP(dword ? 32 : 40, HOST_R9, ld, addr); // lwz/lhz r9, addr@l(ld) | |
if (dword && (Bit16s)imm != (Bit32s)imm) | |
IMM_OP(15, HOST_R9, HOST_R9, (imm+0x8000)>>16); // addis r9,r9,imm@ha | |
if (!dword || (Bit16s)imm) | |
IMM_OP(14, HOST_R9, HOST_R9, imm); // addi r9,r9,imm@l | |
IMM_OP(dword ? 36 : 44, HOST_R9, ld, addr); // stw/sth r9, addr@l(ld) | |
} | |
// subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a 32-bit memory value | |
static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) { | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
gen_add_direct_word(dest, -(Bit32s)imm, dword); | |
} | |
// effective address calculation, destination is dest_reg | |
// scale_reg is scaled by scale (scale_reg*(2^scale)) and | |
// added to dest_reg, then the immediate value is added | |
static INLINE void gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
if (scale) | |
{ | |
RLW_OP(21, scale_reg, HOST_R8, scale, 0, 31-scale, 0); // slwi scale_reg,r8,scale | |
scale_reg = HOST_R8; | |
} | |
gen_add_imm(dest_reg, imm); | |
EXT_OP(dest_reg, dest_reg, scale_reg, 266, 0); // add dest,dest,scaled | |
} | |
// effective address calculation, destination is dest_reg | |
// dest_reg is scaled by scale (dest_reg*(2^scale)), | |
// then the immediate value is added | |
static INLINE void gen_lea(HostReg dest_reg,Bitu scale,Bits imm) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
if (scale) | |
{ | |
RLW_OP(21, dest_reg, dest_reg, scale, 0, 31-scale, 0); // slwi dest,dest,scale | |
} | |
gen_add_imm(dest_reg, imm); | |
} | |
// helper function to choose direct or indirect call | |
static int INLINE do_gen_call(void *func, Bit64u *npos, bool pad) | |
{ | |
Bit64s f = (Bit64s)func; | |
Bit64s off = f - (Bit64s)npos; | |
Bit32u *pos = (Bit32u *)npos; | |
// the length of this branch stanza must match the assumptions in | |
// gen_fill_function_ptr | |
// relative branches are limited to +/- ~32MB | |
if (off < 0x02000000 && off >= -0x02000000) | |
{ | |
pos[0] = 0x48000001 | (off & 0x03FFFFFC); // bl func // "lis" | |
if (pad) | |
{ | |
// keep this patchable | |
pos[1] = NOP; // nop "ori" | |
pos[2] = NOP; // nop "rldicr" | |
pos[3] = NOP; // nop "oris" | |
pos[4] = NOP; // nop "ori" | |
pos[5] = NOP; // nop "mtctr" | |
pos[6] = NOP; // nop "bctrl" | |
return 28; | |
} | |
return 4; | |
} | |
// for ppc64le ELF ABI, use r12 to branch | |
pos[0] = IMM(15, HOST_R12, 0, (f & 0xffff000000000000)>>48); // lis | |
pos[1] = IMM(24, HOST_R12, HOST_R12, (f & 0x0000ffff00000000)>>32); // ori | |
pos[2] = RLD(30, HOST_R12, HOST_R12, 32, 31, 1, 0); // rldicr | |
pos[3] = IMM(25, HOST_R12, HOST_R12, (f & 0x00000000ffff0000)>>16); // oris | |
pos[4] = IMM(24, HOST_R12, HOST_R12, (f & 0x000000000000ffff) ); // ori | |
pos[5] = EXT(HOST_R12, 9, 0, 467, 0); // mtctr r12 | |
pos[6] = IMM(19, 0x14, 0, (528<<1)|1); // bctrl | |
return 28; | |
} | |
// generate a call to a parameterless function | |
static void INLINE gen_call_function_raw(void * func,bool fastcall=true) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
cache.pos += do_gen_call(func, (Bit64u*)cache.pos, fastcall); | |
} | |
// generate a call to a function with paramcount parameters | |
// note: the parameters are loaded in the architecture specific way | |
// using the gen_load_param_ functions below | |
static Bit64u INLINE gen_call_function_setup(void * func,Bitu paramcount,bool fastcall=false) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
Bit64u proc_addr=(Bit64u)cache.pos; | |
gen_call_function_raw(func,fastcall); | |
return proc_addr; | |
} | |
// load an immediate value as param'th function parameter | |
// these are 32-bit (see risc_x64.h) | |
static void INLINE gen_load_param_imm(Bitu imm,Bitu param) { | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
gen_mov_dword_to_reg_imm(RegParams[param], imm); | |
} | |
// load an address as param'th function parameter | |
// 32-bit | |
static void INLINE gen_load_param_addr(Bitu addr,Bitu param) { | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
gen_load_param_imm(addr, param); | |
} | |
// load a host-register as param'th function parameter | |
// 32-bit | |
static void INLINE gen_load_param_reg(Bitu reg,Bitu param) { | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
gen_mov_regs(RegParams[param], (HostReg)reg); | |
} | |
// load a value from memory as param'th function parameter | |
// 32-bit | |
static void INLINE gen_load_param_mem(Bitu mem,Bitu param) { | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
gen_mov_word_to_reg(RegParams[param], (void*)mem, true); | |
} | |
// jump to an address pointed at by ptr, offset is in imm | |
// use r12 for ppc64le ABI compatibility | |
static void gen_jmp_ptr(void * ptr,Bits imm=0) { | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
// "gen_mov_qword_to_reg" | |
gen_mov_qword_to_reg_imm(HOST_R12,(Bit64u)ptr); // r12 = *(Bit64u*)ptr | |
DSF_OP(58, HOST_R12, HOST_R12, 0, 0); | |
if ((Bit16s)imm != (Bit32s)imm) { | |
// XXX: this is not tested. I've left it as a quasi-assertion. | |
fprintf(stderr, "large gen_jmp_ptr offset\n"); | |
__asm__("trap\n"); | |
IMM_OP(15, HOST_R12, HOST_R12, (imm + 0x8000)>>16); // addis r12, r12, imm@ha | |
} | |
DSF_OP(58, HOST_R12, HOST_R12, (Bit16s)imm, 0); // ld r12, imm@l(r12) | |
EXT_OP(HOST_R12, 9, 0, 467, 0); // mtctr r12 | |
IMM_OP(19, 0x14, 0, 528<<1); // bctr | |
} | |
// short conditional jump (+-127 bytes) if register is zero | |
// the destination is set by gen_fill_branch() later | |
static Bit64u gen_create_branch_on_zero(HostReg reg,bool dword) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
if (!dword) | |
IMM_OP(28,reg,HOST_R0,0xFFFF); // andi. r0,reg,0xFFFF | |
else | |
IMM_OP(11, 0, reg, 0); // cmpwi cr0, reg, 0 | |
IMM_OP(16, 0x0C, 2, 0); // bc 12,CR0[Z] (beq) | |
return ((Bit64u)cache.pos-4); | |
} | |
// short conditional jump (+-127 bytes) if register is nonzero | |
// the destination is set by gen_fill_branch() later | |
static Bit64u gen_create_branch_on_nonzero(HostReg reg,bool dword) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
if (!dword) | |
IMM_OP(28,reg,HOST_R0,0xFFFF); // andi. r0,reg,0xFFFF | |
else | |
IMM_OP(11, 0, reg, 0); // cmpwi cr0, reg, 0 | |
IMM_OP(16, 0x04, 2, 0); // bc 4,CR0[Z] (bne) | |
return ((Bit64u)cache.pos-4); | |
} | |
// calculate relative offset and fill it into the location pointed to by data | |
static void gen_fill_branch(DRC_PTR_SIZE_IM data) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
#if C_DEBUG | |
Bits len=(Bit64u)cache.pos-data; | |
if (len<0) len=-len; | |
if (len >= 0x8000) LOG_MSG("Big jump %d",len); | |
#endif | |
// XXX: assert??? | |
((Bit16u*)data)[0] =((Bit64u)cache.pos-data) & 0xFFFC; // ENDIAN!!! | |
} | |
// conditional jump if register is nonzero | |
// for isdword==true the 32bit of the register are tested | |
// for isdword==false the lowest 8bit of the register are tested | |
static Bit64u gen_create_branch_long_nonzero(HostReg reg,bool dword) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
if (!dword) | |
IMM_OP(28,reg,HOST_R0,0xFF); // andi. r0,reg,0xFF | |
else | |
IMM_OP(11, 0, reg, 0); // cmpwi cr0, reg, 0 | |
IMM_OP(16, 0x04, 2, 0); // bne | |
return ((Bit64u)cache.pos-4); | |
} | |
// compare 32bit-register against zero and jump if value less/equal than zero | |
static Bit64u gen_create_branch_long_leqzero(HostReg reg) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
IMM_OP(11, 0, reg, 0); // cmpwi cr0, reg, 0 | |
IMM_OP(16, 0x04, 1, 0); // ble | |
return ((Bit64u)cache.pos-4); | |
} | |
// calculate long relative offset and fill it into the location pointed to by data | |
static void gen_fill_branch_long(Bit64u data) { | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
return gen_fill_branch((DRC_PTR_SIZE_IM)data); | |
} | |
static void cache_block_closing(Bit8u* block_start,Bitu block_size) | |
{ | |
// in the Linux kernel i-cache and d-cache are flushed separately | |
// there's probably a good reason for this ... | |
Bit8u* dstart = (Bit8u*)((Bit64u)block_start & -128); | |
Bit8u* istart = dstart; | |
while (dstart < block_start + block_size) | |
{ | |
asm volatile("dcbf %y0" :: "Z"(*dstart)); | |
// cache line size for POWER8 and POWER9 is 128 bytes | |
dstart += 128; | |
} | |
asm volatile("sync"); | |
while (istart < block_start + block_size) | |
{ | |
asm volatile("icbi %y0" :: "Z"(*istart)); | |
istart += 128; | |
} | |
asm volatile("isync"); | |
} | |
static void cache_block_before_close(void) {} | |
static void gen_function(void* func) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
Bit64s off = (Bit64s)func - (Bit64s)cache.pos; | |
// relative branches are limited to +/- 32MB | |
if (off < 0x02000000 && off >= -0x02000000) { | |
cache_addd(0x48000000 | (off & 0x03FFFFFC)); // b func | |
return; | |
} | |
gen_mov_qword_to_reg_imm(HOST_R12, (Bit64u)func); // r12 = func | |
EXT_OP(HOST_R12, 9, 0, 467, 0); // mtctr r12 | |
IMM_OP(19, 0x14, 0, 528<<1); // bctr | |
} | |
// gen_run_code is assumed to be called exactly once, gen_return_function() jumps back to it | |
static void* epilog_addr; | |
static Bit8u *getCF_glue; | |
static void gen_run_code(void) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
// prolog | |
DSF_OP(62, HOST_R1, HOST_R1, -256, 1); // stdu sp,-256(sp) | |
EXT_OP(FC_OP1, 9, 0, 467, 0); // mtctr FC_OP1 | |
EXT_OP(HOST_R0, 8, 0, 339, 0); // mflr r0 | |
// we don't clobber any CR fields we need to restore, so no need to save | |
// put at the very end of the stack frame, since we have no floats | |
// to save | |
DSF_OP(62, HOST_R26, HOST_R1, 208+0 , 0); // std r26, 208(sp) | |
DSF_OP(62, HOST_R27, HOST_R1, 208+8 , 0); // std r27, 216(sp) | |
DSF_OP(62, HOST_R28, HOST_R1, 208+16, 0); // : | |
DSF_OP(62, HOST_R29, HOST_R1, 208+24, 0); // : | |
DSF_OP(62, HOST_R30, HOST_R1, 208+32, 0); // : | |
DSF_OP(62, HOST_R31, HOST_R1, 208+40, 0); // std r31, 248(sp) | |
#if C_FPU | |
gen_mov_qword_to_reg_imm(HOST_R28, ((Bit64u)&fpu)); | |
#endif | |
gen_mov_qword_to_reg_imm(FC_SEGS_ADDR, ((Bit64u)&Segs)); | |
gen_mov_qword_to_reg_imm(FC_REGS_ADDR, ((Bit64u)&cpu_regs)); | |
DSF_OP(62, HOST_R0, HOST_R1, 256+16, 0); // std r0,256+16(sp) | |
//TRAP(); | |
IMM_OP(19, 0x14, 0, 528<<1); // bctr | |
// epilog | |
epilog_addr = cache.pos; | |
//TRAP(); | |
DSF_OP(58, HOST_R0, HOST_R1, 256+16, 0); // ld r0,256+16(sp) | |
EXT_OP(HOST_R0, 8, 0, 467, 0); // mtlr r0 | |
DSF_OP(58, HOST_R31, HOST_R1, 208+40, 0); // ld r31, 248(sp) | |
DSF_OP(58, HOST_R30, HOST_R1, 208+32, 0); // etc. | |
DSF_OP(58, HOST_R29, HOST_R1, 208+24, 0); | |
DSF_OP(58, HOST_R28, HOST_R1, 208+16, 0); | |
DSF_OP(58, HOST_R27, HOST_R1, 208+8 , 0); | |
DSF_OP(58, HOST_R26, HOST_R1, 208+0 , 0); | |
IMM_OP(14, HOST_R1, HOST_R1, 256); // addi sp, sp, 256 | |
IMM_OP(19, 0x14, 0, 16<<1); // blr | |
// trampoline to call get_CF() | |
getCF_glue = cache.pos; | |
gen_function((void*)get_CF); | |
} | |
// return from a function | |
static void gen_return_function(void) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
gen_function(epilog_addr); | |
} | |
// called when a call to a function can be replaced by a | |
// call to a simpler function | |
// these must equal the length of a branch stanza (see | |
// do_gen_call) | |
static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
Bit32u *op = (Bit32u*)pos; | |
// blank the entire old stanza | |
op[1] = NOP; | |
op[2] = NOP; | |
op[3] = NOP; | |
op[4] = NOP; | |
op[5] = NOP; | |
op[6] = NOP; | |
switch (flags_type) { | |
#if defined(DRC_FLAGS_INVALIDATION_DCODE) | |
// try to avoid function calls but rather directly fill in code | |
case t_ADDb: | |
case t_ADDw: | |
case t_ADDd: | |
*op++ = EXT(FC_RETOP, FC_OP1, FC_OP2, 266, 0); // add FC_RETOP, FC_OP1, FC_OP2 | |
break; | |
case t_ORb: | |
case t_ORw: | |
case t_ORd: | |
*op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 444, 0); // or FC_RETOP, FC_OP1, FC_OP2 | |
break; | |
case t_ADCb: | |
case t_ADCw: | |
case t_ADCd: | |
op[0] = EXT(HOST_R26, FC_OP1, FC_OP2, 266, 0); // r26 = FC_OP1 + FC_OP2 | |
op[1] = 0x48000001 | ((getCF_glue-(pos+4)) & 0x03FFFFFC); // bl get_CF | |
op[2] = IMM(12, HOST_R0, FC_RETOP, -1); // addic r0, FC_RETOP, 0xFFFFFFFF (XER[CA] = !!CF) | |
op[3] = EXT(FC_RETOP, HOST_R26, 0, 202, 0); // addze; FC_RETOP = r26 + !!CF | |
return; | |
case t_SBBb: | |
case t_SBBw: | |
case t_SBBd: | |
op[0] = EXT(HOST_R26, FC_OP2, FC_OP1, 40, 0); // r26 = FC_OP1 - FC_OP2 | |
op[1] = 0x48000001 | ((getCF_glue-(pos+4)) & 0x03FFFFFC); // bl get_CF | |
op[2] = IMM(8, HOST_R0, FC_RETOP, 0); // subfic r0, FC_RETOP, 0 (XER[CA] = !CF) | |
op[3] = EXT(FC_RETOP, HOST_R26, 0, 234, 0); // addme; FC_RETOP = r26 - 1 + !CF | |
return; | |
case t_ANDb: | |
case t_ANDw: | |
case t_ANDd: | |
*op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 28, 0); // and FC_RETOP, FC_OP1, FC_OP2 | |
break; | |
case t_SUBb: | |
case t_SUBw: | |
case t_SUBd: | |
*op++ = EXT(FC_RETOP, FC_OP2, FC_OP1, 40, 0); // subf FC_RETOP, FC_OP2, FC_OP1 | |
break; | |
case t_XORb: | |
case t_XORw: | |
case t_XORd: | |
*op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 316, 0); // xor FC_RETOP, FC_OP1, FC_OP2 | |
break; | |
case t_CMPb: | |
case t_CMPw: | |
case t_CMPd: | |
case t_TESTb: | |
case t_TESTw: | |
case t_TESTd: | |
break; | |
case t_INCb: | |
case t_INCw: | |
case t_INCd: | |
*op++ = IMM(14, FC_RETOP, FC_OP1, 1); // addi FC_RETOP, FC_OP1, #1 | |
break; | |
case t_DECb: | |
case t_DECw: | |
case t_DECd: | |
*op++ = IMM(14, FC_RETOP, FC_OP1, -1); // addi FC_RETOP, FC_OP1, #-1 | |
break; | |
case t_NEGb: | |
case t_NEGw: | |
case t_NEGd: | |
*op++ = EXT(FC_RETOP, FC_OP1, 0, 104, 0); // neg FC_RETOP, FC_OP1 | |
break; | |
case t_SHLb: | |
case t_SHLw: | |
case t_SHLd: | |
*op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 24, 0); // slw FC_RETOP, FC_OP1, FC_OP2 | |
break; | |
case t_SHRb: | |
case t_SHRw: | |
case t_SHRd: | |
*op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 536, 0); // srw FC_RETOP, FC_OP1, FC_OP2 | |
break; | |
case t_SARb: | |
*op++ = EXT(FC_OP1, FC_RETOP, 0, 954, 0); // extsb FC_RETOP, FC_OP1 | |
case t_SARw: | |
if (flags_type == t_SARw) | |
*op++ = EXT(FC_OP1, FC_RETOP, 0, 922, 0); // extsh FC_RETOP, FC_OP1 | |
case t_SARd: | |
*op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 792, 0); // sraw FC_RETOP, FC_OP1, FC_OP2 | |
break; | |
case t_ROLb: | |
*op++ = RLW(20, FC_OP1, FC_OP1, 24, 0, 7, 0); // rlwimi FC_OP1, FC_OP1, 24, 0, 7 | |
case t_ROLw: | |
if (flags_type == t_ROLw) | |
*op++ = RLW(20, FC_OP1, FC_OP1, 16, 0, 15, 0); // rlwimi FC_OP1, FC_OP1, 16, 0, 15 | |
case t_ROLd: | |
*op++ = RLW(23, FC_OP1, FC_RETOP, FC_OP2, 0, 31, 0); // rotlw FC_RETOP, FC_OP1, FC_OP2 | |
break; | |
case t_RORb: | |
*op++ = RLW(20, FC_OP1, FC_OP1, 8, 16, 23, 0); // rlwimi FC_OP1, FC_OP1, 8, 16, 23 | |
case t_RORw: | |
if (flags_type == t_RORw) | |
*op++ = RLW(20, FC_OP1, FC_OP1, 16, 0, 15, 0); // rlwimi FC_OP1, FC_OP1, 16, 0, 15 | |
case t_RORd: | |
*op++ = IMM(8, FC_OP2, FC_OP2, 32); // subfic FC_OP2, FC_OP2, 32 (FC_OP2 = 32 - FC_OP2) | |
*op++ = RLW(23, FC_OP1, FC_RETOP, FC_OP2, 0, 31, 0); // rotlw FC_RETOP, FC_OP1, FC_OP2 | |
break; | |
case t_DSHLw: // technically not correct for FC_OP3 > 16 | |
*op++ = RLW(20, FC_OP2, FC_RETOP, 16, 0, 15, 0); // rlwimi FC_RETOP, FC_OP2, 16, 0, 5 | |
*op++ = RLW(23, FC_RETOP, FC_RETOP, FC_OP3, 0, 31, 0); // rotlw FC_RETOP, FC_RETOP, FC_OP3 | |
break; | |
case t_DSHLd: | |
op[0] = EXT(FC_OP1, FC_RETOP, FC_OP3, 24, 0); // slw FC_RETOP, FC_OP1, FC_OP3 | |
op[1] = IMM(8, FC_OP3, FC_OP3, 32); // subfic FC_OP3, FC_OP3, 32 (FC_OP3 = 32 - FC_OP3) | |
op[2] = EXT(FC_OP2, FC_OP2, FC_OP3, 536, 0); // srw FC_OP2, FC_OP2, FC_OP3 | |
op[3] = EXT(FC_RETOP, FC_RETOP, FC_OP2, 444, 0); // or FC_RETOP, FC_RETOP, FC_OP2 | |
return; | |
case t_DSHRw: // technically not correct for FC_OP3 > 16 | |
*op++ = RLW(20, FC_OP2, FC_RETOP, 16, 0, 15, 0); // rlwimi FC_RETOP, FC_OP2, 16, 0, 5 | |
*op++ = EXT(FC_RETOP, FC_RETOP, FC_OP3, 536, 0); // srw FC_RETOP, FC_RETOP, FC_OP3 | |
break; | |
case t_DSHRd: | |
op[0] = EXT(FC_OP1, FC_RETOP, FC_OP3, 536, 0); // srw FC_RETOP, FC_OP1, FC_OP3 | |
op[1] = IMM(8, FC_OP3, FC_OP3, 32); // subfic FC_OP3, FC_OP3, 32 (FC_OP32 = 32 - FC_OP3) | |
op[2] = EXT(FC_OP2, FC_OP2, FC_OP3, 24, 0); // slw FC_OP2, FC_OP2, FC_OP3 | |
op[3] = EXT(FC_RETOP, FC_RETOP, FC_OP2, 444, 0); // or FC_RETOP, FC_RETOP, FC_OP2 | |
return; | |
#endif | |
default: | |
do_gen_call(fct_ptr, (Bit64u*)op, true); | |
return; | |
} | |
} | |
// mov 16bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 2 must be zero) | |
// 16bit moves may destroy the upper 16bit of the destination register | |
static void gen_mov_seg16_to_reg(HostReg dest_reg,Bitu index) { | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
gen_mov_word_to_reg(dest_reg, (Bit8u*)&Segs + index, false); | |
} | |
// mov 32bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 4 must be zero) | |
static void gen_mov_seg32_to_reg(HostReg dest_reg,Bitu index) { | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
gen_mov_word_to_reg(dest_reg, (Bit8u*)&Segs + index, true); | |
} | |
// add a 32bit value from Segs[index] to a full register using FC_SEGS_ADDR (index modulo 4 must be zero) | |
static void gen_add_seg32_to_reg(HostReg reg,Bitu index) { | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
gen_add(reg, (Bit8u*)&Segs + index); | |
} | |
// mov 16bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 2 must be zero) | |
// 16bit moves may destroy the upper 16bit of the destination register | |
static void gen_mov_regval16_to_reg(HostReg dest_reg,Bitu index) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
gen_mov_word_to_reg(dest_reg, (Bit8u*)&cpu_regs + index, false); | |
} | |
// mov 32bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 4 must be zero) | |
static void gen_mov_regval32_to_reg(HostReg dest_reg,Bitu index) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
gen_mov_word_to_reg(dest_reg, (Bit8u*)&cpu_regs + index, true); | |
} | |
// move an 8bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR | |
// the upper 24bit of the destination register can be destroyed | |
// this function does not use FC_OP1/FC_OP2 as dest_reg as these | |
// registers might not be directly byte-accessible on some architectures | |
static void gen_mov_regbyte_to_reg_low(HostReg dest_reg,Bitu index) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
gen_mov_byte_to_reg_low(dest_reg, (Bit8u*)&cpu_regs + index); | |
} | |
// move an 8bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR | |
// the upper 24bit of the destination register can be destroyed | |
// this function can use FC_OP1/FC_OP2 as dest_reg which are | |
// not directly byte-accessible on some architectures | |
static void INLINE gen_mov_regbyte_to_reg_low_canuseword(HostReg dest_reg,Bitu index) { | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
gen_mov_byte_to_reg_low_canuseword(dest_reg, (Bit8u*)&cpu_regs + index); | |
} | |
// move 16bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 2 must be zero) | |
static void gen_mov_regval16_from_reg(HostReg src_reg,Bitu index) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
gen_mov_word_from_reg(src_reg, (Bit8u*)&cpu_regs + index, false); | |
} | |
// move 32bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 4 must be zero) | |
static void gen_mov_regval32_from_reg(HostReg src_reg,Bitu index) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
gen_mov_word_from_reg(src_reg, (Bit8u*)&cpu_regs + index, true); | |
} | |
// move the lowest 8bit of a register into cpu_regs[index] using FC_REGS_ADDR | |
static void gen_mov_regbyte_from_reg_low(HostReg src_reg,Bitu index) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
gen_mov_byte_from_reg_low(src_reg, (Bit8u*)&cpu_regs + index); | |
} | |
// add a 32bit value from cpu_regs[index] to a full register using FC_REGS_ADDR (index modulo 4 must be zero) | |
static void gen_add_regval32_to_reg(HostReg reg,Bitu index) | |
{ | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
gen_add(reg, (Bit8u*)&cpu_regs + index); | |
} | |
// move 32bit (dword==true) or 16bit (dword==false) of a register into cpu_regs[index] using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero) | |
static void gen_mov_regword_from_reg(HostReg src_reg,Bitu index,bool dword) { | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
if (dword) | |
gen_mov_regval32_from_reg(src_reg, index); | |
else | |
gen_mov_regval16_from_reg(src_reg, index); | |
} | |
// move a 32bit (dword==true) or 16bit (dword==false) value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero) | |
// 16bit moves may destroy the upper 16bit of the destination register | |
static void gen_mov_regword_to_reg(HostReg dest_reg,Bitu index,bool dword) { | |
#if DEBUG_ME | |
fprintf(stderr, "ppc64le: %s\n", __FUNCTION__); | |
#endif | |
if (dword) | |
gen_mov_regval32_to_reg(dest_reg, index); | |
else | |
gen_mov_regval16_to_reg(dest_reg, index); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment