Created
March 2, 2015 12:14
-
-
Save nooga/19a3029953ec4729721b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdint.h> | |
#include <stdlib.h> | |
/* Declare integer datatypes for each number of bits */ | |
typedef uint_least8_t u8; typedef int_least8_t s8; | |
typedef uint_least16_t u16; typedef int_least16_t s16; | |
typedef uint_least32_t u32; typedef int_least32_t s32; | |
typedef uint_least64_t u64; typedef int_least64_t s64; | |
/* The following data structures provide templated access | |
* of different data sizes within a 32-bit integer. | |
*/ | |
template<unsigned size, typename T=void> | |
struct MemDataRef | |
{ | |
union { u32 value; T alt[4 / sizeof(T)]; } d; | |
// Get a reference to the "size" item beginning | |
// at byte position "ind" within the 32-bit word. | |
// All requests are assumed to be properly aligned. | |
T& reffun(unsigned ind) { return d.alt[ind >> (sizeof(T)==2)]; } | |
}; | |
template<> struct MemDataRef<1,void>: public MemDataRef<0, u8> { }; | |
template<> struct MemDataRef<2,void>: public MemDataRef<0, u16> { }; | |
template<> struct MemDataRef<4,void>: public MemDataRef<0, u32> { }; | |
/* Declare facilities for detecting and dealing with different byteorder */ | |
#define LITTLE_ENDIAN_HOST 1 /* 1=little endian */ | |
#define LITTLE_ENDIAN_SLAVE 0 /* 0=big endian */ | |
#define CROSS_ENDIAN (LITTLE_ENDIAN_HOST != LITTLE_ENDIAN_SLAVE) | |
static unsigned SwapBytes(unsigned value, unsigned size) | |
{ | |
if(size >= 2) value = ((value & 0xFF00FF00u) >> 8) | |
| ((value & 0x00FF00FFu) << 8); | |
if(size >= 4) value = (value >> 16) | (value << 16); | |
return value; | |
} | |
/* Console I/O (note: there should be terminal emulation here, but | |
* such implementation is skipped here, for brevity) */ | |
# include <conio.h> | |
static struct | |
{ | |
bool Hit() { return kbhit(); } | |
unsigned Getc() { int r = getch(); return r ? r : ' '; } | |
void Putc(unsigned c) { putchar(c); fflush(stdout); } | |
} Console; | |
/* Declare all the devices that we emulate. */ | |
// RAM device is very simple. | |
template<unsigned size_bytes> | |
class SDRAM | |
{ | |
u32 buffer[size_bytes / 4]; | |
public: | |
u32 Read(u32 index) const { return buffer[index]; } | |
void Write(u32 index, u32 value) { buffer[index] = value; } | |
}; | |
// Xilinx UART lite (serial port), lite version | |
class XilinxUARTlite | |
{ | |
bool ints_enabled, tx_emptied_int_pending, overrun; | |
struct { u8 len, pos, fifo[8]; } in; // Input queue | |
// List of supported status and control bits | |
enum { RXVALID=1, RXFULL=2, TXEMPTY=4, IE=16, OVERRUN=32, RST_RX=2 }; | |
public: | |
XilinxUARTlite(): ints_enabled(0), overrun(0), in{0,0,{0}} { } | |
u32 Read(u32 index) | |
{ | |
u32 result = TXEMPTY; | |
if(ints_enabled) result |= IE; | |
if(in.len > 0) result |= RXVALID; | |
if(in.len == 8) result |= RXFULL; | |
if(overrun) {result |= OVERRUN; overrun=false;} | |
if(index<2 && in.len) result = in.fifo[ (in.pos + 8 - in.len--) % 8 ]; | |
return result; | |
} | |
void Write(u32 index, u32 value) | |
{ | |
if(index<3) { Console.Putc(value); tx_emptied_int_pending = true; } | |
else { ints_enabled = value&IE; if(value&RST_RX) in.pos=in.len=0; } | |
} | |
bool CheckIRQ() | |
{ | |
// TX interrupt is an edge; RX interrupt is a level. | |
if(ints_enabled && (in.len > 0 || tx_emptied_int_pending)) | |
{ tx_emptied_int_pending=false; return true; } | |
return false; | |
} | |
void Input() | |
{ | |
static unsigned counter = 0; | |
if(!counter) counter = 2000; else { --counter; return; } | |
if(Console.Hit()) | |
{ | |
if(in.len >= 8) { overrun = true; return; } | |
in.fifo[ in.pos++ % 8 ] = Console.Getc(); | |
++in.len; | |
} | |
} | |
}; | |
// Xilinx interrupt timer (multi-timer chip) | |
template<unsigned N> | |
class XilinxTimer | |
{ | |
// List of registers | |
enum { TCSR=0, TLR=1, TCR=2 }; | |
// List of control bits | |
enum { UDT=2, ARHT=16, LOAD=32, ENIT=64, ENT=128, TINT=256, ENALL=1024 }; | |
struct impl | |
{ | |
u32 regs[4]; | |
void Write(u32 index, u32 value) | |
{ | |
if(index == TCSR) | |
{ // Setting the TINT bit actually clears it, | |
// otherwise it is preserved. | |
if(value & TINT) value &= ~TINT; | |
else value |= (regs[index] & TINT); } | |
regs[index] = value; | |
} | |
bool CheckIRQ() const { return (regs[TCSR] & (TINT|ENIT)) == (TINT|ENIT); } | |
void Tick() | |
{ | |
if(!(regs[TCSR] & ENT)) return; // Timer does nothing unless enabled | |
if(regs[TCSR] & LOAD) { regs[TCR] = regs[TLR]; return; } | |
u32 previous = regs[TCSR & UDT] ? regs[TCR]-- : regs[TCR]++; | |
if( (previous ^ regs[TCR]) == ~u32(0)) // overflow in either direction | |
{ | |
regs[TCSR] |= TINT; // Raise the interrupt flag | |
if(regs[TCSR] & ARHT) regs[TCR] = regs[TLR]; // Restart timer | |
else regs[TCSR] &= ~ENT; // Stop timer | |
} | |
} | |
} timers[N]; | |
public: | |
u32 Read(u32 which, u32 index) | |
{ | |
return timers[which].regs[index]; | |
} | |
void Write(u32 which, u32 index, u32 value) | |
{ | |
timers[which].Write(index, value); | |
// Setting the ENALL bit on any of the timers sets ENT on all of them. | |
if(value & ENALL) for(auto& t: timers) t.regs[TCSR] |= ENT; | |
} | |
void Tick() | |
{ | |
for(auto& t: timers) t.Tick(); | |
} | |
bool CheckIRQ() const | |
{ | |
for(const auto& t: timers) if(t.CheckIRQ()) return true; | |
return false; | |
} | |
}; | |
// Xilinx OPB Interrupt Controller | |
class XilinxINTC | |
{ | |
// List of registers | |
enum { ISR/* List of active IRQs */, | |
IPR, | |
IER/* List of IRQs that can result in an interrupt */, | |
IAR, SIE, CIE, IVR, MER }; | |
// List of bits | |
enum { ME=1, HIE=2 }; | |
u32 regs[8]; | |
public: | |
XilinxINTC(): regs() { } | |
u32 Read(u32 index) const | |
{ | |
if(index == IPR) return regs[ISR] & regs[IER]; | |
if(index == IVR) // Ordinal number of highest triggered interrupt | |
{ | |
for(unsigned i=0; i<32; ++i) if(Read(IPR) & (1<<i)) return i; | |
return ~0u; | |
} | |
return regs[index]; | |
} | |
void Write(u32 index, u32 value) | |
{ | |
switch(index) | |
{ | |
case SIE: regs[IER] |= value; break; // Atomic set ie. | |
case CIE: regs[IER] &= ~value; break; // Atomic clear ie. | |
case IAR: regs[ISR] &= ~value; break; // Acknowledge IRQ (so it's no longer raised) | |
case ISR: if(!(regs[MER] & HIE)) // If software interrupts are enabled, | |
default: regs[index] = value; // Update the register as indicated. | |
} | |
} | |
bool CheckIRQ() | |
{ | |
return (regs[MER] & ME) && Read(IPR); | |
} | |
void TriggerIRQ(unsigned irq) | |
{ | |
if(regs[MER] & HIE) // If hardware interrupts are enabled, | |
regs[ISR] |= 1 << irq; // set the IRQ level. | |
} | |
}; | |
enum intent { intent_load, intent_store, intent_execute }; | |
class MicroBlazeCPU // Micro-Blaze CPU | |
{ | |
enum { | |
v_reset = 0x00, | |
v_usr_ex = 0x08, | |
v_intr = 0x10, | |
v_brk = 0x18, | |
v_hw_ex = 0x20 | |
}; | |
union reg // MicroBlaze register file | |
{ | |
struct { // As 32-bit integers | |
u32 MSR; // Machine Status Register, i.e. flags | |
u32 ESR; // Exception Status Register, i.e. exception flags | |
// 32 general purpose registers. [0] is always zero and cannot be modified. | |
u32 r[32]; | |
// Special purpose registers. | |
// See SPR() below for the list of meanings. | |
u32 PC, EAR, BTR, SLR, SHR, ZPR, TLBX, PVRs[16]; | |
union | |
{ | |
struct { u32 LO, HI; u8 TID; } w; | |
struct { u32 G:1, M:1, I:1, W:1, ZSEL:4, WR:1,EX:1, RPN:22; | |
// Bits M, I, W are ignored. RPN=Real Page Number | |
u32 filler:4, U0:1, E:1, V:1, SIZE:3, TAG:22; | |
// E=endianess. Page size is 4^SIZE kibibytes. | |
} b; | |
} UTLB[64]; | |
u8 PID; | |
}; | |
struct { // Or broken down to individual bits: | |
// MSR bits: | |
u32 lsb:1; | |
u32 IE :1; // bit 30 (Interrupt Enable) | |
u32 C :1; // bit 29 (carry) | |
u32 BIP:1; // bit 28 (Break In Progress) | |
u32 FSL:1, ICE:1; | |
u32 DZO:1; // bit 25 (Division By Zero) | |
u32 DCE:1; | |
u32 EE :1; // bit 23 (Exceptions Enabled) | |
u32 EIP:1; // bit 22 (Exception In Progress) | |
u32 PVR:1; // bit 21 (PVR register set exists) | |
u32 UM :1; // bit 20 (1=Privileged instructions are prohibited, 0=Allowed) | |
u32 UMS:1; // bit 19 (User Mode Save) | |
u32 VM :1; // bit 18 (1=MMU address translation and access protection enabled) | |
u32 VMS:1; // bit 17 (Virtual Protected Mode Save) | |
u32 res:16; | |
u32 CC :1; // bit 0 (carry copy) | |
// ESR bits: | |
u32 EC: 5; // Exception cause | |
u32 Rx: 5; // Exception specific status: register | |
u32 S: 1; // Exception specific status: store | |
u32 DIZ:1; // Exception specific status: zone protection | |
u32 DS: 1; // Delay Slot Exception | |
u32 filler: 19; | |
}; | |
void PushUVM() { VMS=VM; UMS=UM; VM=0; UM=0; } | |
void PopUVM() { VM=VMS; UM=UMS; } | |
// SPR: Special Purpose Register access function (read/write) | |
void SPR(unsigned index, bool write, u32& param) | |
{ | |
#define r(reg, writeok) /* write or read macro */ \ | |
if(!writeok && write) break; \ | |
if(write) reg = param; else param = reg; \ | |
return | |
auto& tlb = UTLB[TLBX&63]; | |
switch(index) // The registers are numbered as follows: | |
{ | |
case 0x0000: r(PC, 0); // PC, program counter | |
case 0x0001: r(MSR, 1); // MSR, machine status register (i.e. flags) | |
case 0x0003: r(EAR, 0); // EAR, exception address register | |
case 0x0005: r(ESR, 0); // ESR, exception status register | |
case 0x000B: r(BTR, 0); // BTR, branch target register | |
case 0x0800: r(SLR, 1); // SLR, stack low register | |
case 0x0802: r(SHR, 1); // SHR, stack high register | |
case 0x1000: r(PID, 1); // PID, process identifier register | |
case 0x1001: r(ZPR, 1); // ZPR, zone protection register | |
case 0x1002: r(TLBX, 1); // TLBX, translation look-aside buffer index register | |
case 0x1003: // TLBLO, translation look-aside buffer low register | |
r(tlb.w.LO, 1); | |
case 0x1004: // TLBHI, translation look-aside buffer high register | |
if(write) | |
{ tlb.w.TID = PID; | |
tlb.w.HI = param; } | |
else | |
{ if(!UM) PID = tlb.w.TID; | |
param = tlb.w.HI; } | |
return; | |
case 0x1005: // TLBSX, translation look-aside buffer search index register | |
if(write) // Do a TLB search and report the matching TLB index in TLBX. | |
{ TLBX = 0x80000000u; // If nothing found, this value remains indicating the miss. | |
TLBfind<intent_load>(param&0xFFFFFC00, | |
[&](unsigned tlbno) -> unsigned { TLBX = tlbno; return 0; } ); | |
return; } | |
} | |
if(index < 0x2000 || write) index = 15; // Fallback to a safe "undefined" slot | |
// Access the processor version registers | |
if(write) PVRs[index&15] = param; else param = PVRs[index&15]; | |
#undef r | |
} | |
/* Virtual memory management unit (MMU) function */ | |
template<intent rw, typename T> | |
unsigned TLBfind(unsigned virt_addr, const T& func) const | |
{ | |
// Cache the last used index for this type of data access | |
static unsigned last_matched_index = 0; | |
bool UserMode = UM; | |
unsigned exception_no = 16+2+4; // Exception for "no tlb" | |
for(unsigned vv=0; vv<64; ++vv) | |
{ | |
unsigned v = (last_matched_index + vv) % 64; // Index into TLB array | |
auto& tlb = UTLB[v]; | |
// Skip invalid TLBs | |
if(!tlb.b.V) continue; | |
// Skip TLB where TID mismatches the PID | |
if(tlb.w.TID != 0 && PID != tlb.w.TID) continue; | |
unsigned page_bits = 10 + tlb.b.SIZE * 2; | |
// Skip TLB where TAG mismatches the given address | |
if( u32(virt_addr ^ (tlb.b.TAG<<10)) >> page_bits ) continue; | |
unsigned zone_type = (ZPR >> (30 - tlb.b.ZSEL*2)) & 3; | |
// Remove the bit indicating "no tlb found" exception | |
exception_no &= ~2; | |
if(zone_type == 0 && UserMode) continue; // zone-prohibited access | |
// Remove the bit indicating "zone protection" exception | |
exception_no &= ~4; | |
if(zone_type < (UserMode?3:2)) // Check WR and EX, unless exempted by zone | |
{ | |
if(rw==intent_store && !tlb.b.WR) continue; // write access prohibited | |
if(rw==intent_execute && !tlb.b.EX) continue; // execute access prohibited | |
} | |
if(rw==intent_execute && tlb.b.G) continue; // I/O space is guarded from execution | |
last_matched_index = v; | |
return func(v); | |
} | |
return exception_no; | |
} | |
} regs; | |
public: | |
/* List of read/write functions for each 64k page of the memory space */ | |
struct { u32(*read)(u32); void(*write)(u32,u32); } IOmap[0x10000]; | |
/* All memory access is routed through this template function. */ | |
/* It handles reads and writes of different sizes, | |
* with or without virtual memory mapping, | |
* with or without byte order swaps (reverse). | |
*/ | |
template<intent rw=intent_load, unsigned size=4> | |
unsigned MemAccess(u32 virt_addr, u32& value, bool reverse=false) | |
{ | |
u32 phys_addr = virt_addr; | |
bool e_bit = false; | |
if(regs.VM) // Virtual memory mode? Do TLB lookup. | |
{ | |
unsigned exception = regs.TLBfind<rw>(virt_addr, | |
[&](unsigned tlbno) -> unsigned | |
{ | |
// Map the address using the matching TLB. | |
unsigned page_mask = (1024 << (2 * regs.UTLB[tlbno].b.SIZE)) - 1; | |
phys_addr = (regs.UTLB[tlbno].b.RPN << 10) | |
+ (virt_addr & page_mask); | |
e_bit = regs.UTLB[tlbno].b.E; | |
return 0; | |
}); | |
// Produce an exception if the mapping failed (LSB indicates code/data). | |
if(exception) | |
return exception | (rw==intent_execute); | |
} | |
// Verify alignment, and produce an exception if the address was not properly aligned | |
if(virt_addr % size != 0) return 1 | (size==4 ? 4 : 0); | |
// An "E" bit in a TLB reverses the endianess for that page (except in opcode lookups). | |
if(rw != intent_execute) reverse ^= e_bit; | |
// This macro byteswaps if needed due to host/guest endian differences or the reverse bit. | |
auto S = [=](u32 v) -> u32 | |
{ return size>1 && (reverse != CROSS_ENDIAN) ? SwapBytes(v,size) : v; }; | |
// Read full 32 bits from the memory/device, | |
// unless we're going to replace it entirely. | |
MemDataRef<size> data; | |
if(rw != intent_store || size != 4) | |
data.d.value = IOmap[phys_addr/0x10000].read(phys_addr & ~3); | |
// Create a reference to the relevant data within the 32 bit word. | |
auto& r = data.reffun( (phys_addr & (4-size)) ^ (reverse ? (4-size) : 0) ); | |
if(rw == intent_store) | |
{ | |
// Write to the relevant data, and commit | |
// the entire 32-bit word the memory/device. Byteswap if needed. | |
r = S(value); | |
IOmap[phys_addr/0x10000].write(phys_addr & ~3, data.d.value); | |
} | |
else | |
value = S(r); // Read the relevant data. Byteswap if needed. | |
return 0; // Return "no exception" code | |
} | |
// This struct records the desired action to happen after instruction execution. | |
struct execmode | |
{ | |
unsigned mode:2, type:30, target:32; | |
// Modes: | |
// 0 = Normally proceed to the next instruction (other fields are ignored) | |
// 1 = Jump, right now. (type=type of jump, target=where to jump) | |
// 2 = Jump, but first execute one more instruction (delay slot) | |
// 3 = Generate an exception, right now | |
// (type = type of exception, | |
// target = memory address causing the exception) | |
}; | |
// Decode and execute a single instruction. | |
execmode ExecuteOne(u32 i, u32 arg2) | |
{ | |
// Decode the operation type and operands from the opcode | |
unsigned i_op = (i>>26)&0x3F; // Operation | |
unsigned i_rd = (i>>21)&0x1F; // Target register index | |
u32 RA = regs.r[(i>>16)&0x1F]; // Source register value | |
bool imm = i_op & 0x08; // Category of instruction | |
if(!imm) arg2 = regs.r[(i>>11)&0x1F]; // Second operand | |
// Exit macros. | |
#define Return return execmode{0,0,0} | |
// Exit and store v to RD. | |
// Note that regs.r[0] is always preserved as zero, even if written to. | |
#define ReturnRD(v) do { if(i_rd) regs.r[i_rd] = (v); Return; } while(0) | |
// Exit and store v to RD, and set carry to c (evaluated before v, which may depend on carry). | |
#define ReturnRDc(v,c) do { bool nc = (c); if(i_rd) regs.r[i_rd] = (v); regs.C = regs.CC = nc; Return; } while(0) | |
// Exit and prepare to branch/jump to another memory location | |
#define ReturnJmp(target,delayed,type) return execmode{(delayed)?2:1, type, target} | |
// Exit and raise an exception, concerning the given address. | |
#define ReturnEx(extype, addr) return execmode{3,extype,addr} | |
// Exit and raise a MMU exception, setting the exception info appropriately. | |
#define ReturnMMUex(extype, ear, write) \ | |
do { regs.Rx = i_rd; regs.S = write; regs.DIZ = error & 4; \ | |
ReturnEx(extype &~ 4, ear); } while(0) | |
// Determine which kind of operation it was: | |
if(i == 0x00000000) ReturnEx(2,0); // Specially trap this illegal opcode | |
#define tr(op) (((op) & 0x07) | (((op) & 0x30) >> 1)) | |
switch( tr(i_op) ) | |
{ | |
// Add and subtract operations: (ADD|RSUB)(I)(K)(C); comparisons: CMP(U) | |
case tr(0x06): ReturnRD( arg2+RA+regs.C ); // ADD(I)KC | |
case tr(0x07): ReturnRD( arg2-RA-regs.C ); // RSUB(I)KC | |
case tr(0x02): ReturnRDc( arg2+RA+regs.C, arg2+RA+regs.C < arg2 ); // ADD(I)C | |
case tr(0x03): ReturnRDc( arg2-RA-regs.C, arg2-RA-regs.C > arg2 ); // RSUB(I)C | |
case tr(0x00): ReturnRDc( arg2+RA, arg2+RA < arg2 ); // ADD(I) | |
case tr(0x01): ReturnRDc( arg2-RA, arg2-RA > arg2 ); // RSUB(I) | |
case tr(0x04): ReturnRD( arg2+RA ); // ADD(I)K | |
case tr(0x05): switch(imm?0:(i&0x7FF)) | |
{ case 0: ReturnRD( arg2-RA ); // RSUB(I)K | |
case 1: ReturnRD((0x7FFFFFFF&(arg2-RA)) | (((s32)arg2 < (s32)RA)<<31) ); // CMP | |
case 3: ReturnRD((0x7FFFFFFF&(arg2-RA)) | (((u32)arg2 < (u32)RA)<<31) ); // CMPU | |
} Return;/*Silently ignore bad instruction*/ | |
// Bit manipulation operations: (OR|AND|XOR|ANDN)(I) | |
case tr(0x20): ReturnRD( RA | arg2 ); // OR, ORI | |
case tr(0x21): ReturnRD( RA & arg2 ); // AND, ANDI | |
case tr(0x22): ReturnRD( RA ^ arg2 ); // XOR, XORI | |
case tr(0x23): ReturnRD( RA &~ arg2 ); // ANDN,ANDNI | |
// Barrel shift operations: BS(RL|RA|LL)(I) | |
case tr(0x11): | |
if(i&0x400) ReturnRD( RA << (arg2 & 31) ); // BSLL(I) | |
if(i&0x200) ReturnRD( s32(RA) >> (arg2 & 31) ); // BSRA(I) | |
else ReturnRD( u32(RA) >> (arg2 & 31) ); // BSRL(I) | |
// Multiplications: MUL(I|H|HU|HSU) | |
case tr(0x10): switch(imm ? 0 : (i&0x7FF)) | |
{ case 0: ReturnRD( (s32)RA * (s32)arg2 ); // MUL,MULI | |
case 1: ReturnRD( ((s64)(s32)RA * (s64)(s32)arg2) >> 32 ); // MULH | |
case 2: ReturnRD( ((s64)(s32)RA * (s64)(u32)arg2) >> 32 ); // MULHSU | |
case 3: ReturnRD( ((u64)(u32)RA * (u64)(u32)arg2) >> 32 ); // MULHU | |
} Return; | |
// One-bit copy / shift-right operators: SEXT(8|16), SR(A|C|L); cache control: W(I|D)C | |
case tr(0x24): switch(i & 0x7FF) | |
{ case 0x60: ReturnRD( (s32)( s8)RA ); // SEXT8 (sign-extend 8 bits to 32 bits) | |
case 0x61: ReturnRD( (s32)(s16)RA ); // SEXT16 (sign-extend 16 bits to 32 bits) | |
case 0x01: ReturnRDc( (s32)RA >> 1, RA&1); // SRA (sign-extend) | |
case 0x21: ReturnRDc((regs.CC<<31) | (RA >> 1), RA&1); // SRC (carry-extend) | |
case 0x41: ReturnRDc( RA >> 1, RA&1); // SRL (zero-extend) | |
case 0x64: if(regs.UM) ReturnEx(7,0); Return; // WDC | |
case 0x66: if(regs.UM) ReturnEx(7,0); Return; // WDC.clear | |
case 0x74: if(regs.UM) ReturnEx(7,0); Return; // WDC.flush | |
case 0x76: if(regs.UM) ReturnEx(7,0); Return; // WDC.flush.clear | |
case 0x68: if(regs.UM) ReturnEx(7,0); Return; // WIC (privileged cache-control instruction) | |
} Return; | |
// Conditional branches with delay slots or not: (BEQ|BNE|BLT|BLE|BGT|BGE)(I)(D) | |
case tr(0x27): switch(i_rd & 15) | |
{ case 0: if((s32)RA == 0) ReturnJmp( regs.PC-4 + arg2, i_rd & 16, 0 ); Return; //EQ | |
case 1: if((s32)RA != 0) ReturnJmp( regs.PC-4 + arg2, i_rd & 16, 0 ); Return; //NE | |
case 2: if((s32)RA < 0) ReturnJmp( regs.PC-4 + arg2, i_rd & 16, 0 ); Return; //LT | |
case 3: if((s32)RA <= 0) ReturnJmp( regs.PC-4 + arg2, i_rd & 16, 0 ); Return; //LE | |
case 4: if((s32)RA > 0) ReturnJmp( regs.PC-4 + arg2, i_rd & 16, 0 ); Return; //GT | |
case 5: if((s32)RA >= 0) ReturnJmp( regs.PC-4 + arg2, i_rd & 16, 0 ); Return; //GE | |
} Return; | |
// Unconditional branches / system calls, with delay slots or not: BR(A)(L)(I)(D), BRK(I), MBAR | |
case tr(0x26): switch( (i >> 18) & 7) // This is hairy, because there are so many variants in this. | |
{ | |
#define R if(i_rd) regs.r[i_rd] = regs.PC - 4; /* Macro for saving return address */ | |
case 0: ReturnJmp(arg2+regs.PC-4, false, 0); // D=0, A=0, L=0 (BR(I) or MBAR). | |
case 4: ReturnJmp(arg2+regs.PC-4, true, 0); // D=1, A=0, L=0 (BR(I)D) | |
case 5: R ReturnJmp(arg2+regs.PC-4, true, 0); // D=1, A=0, L=1 (BRL(I)D) | |
case 2: ReturnJmp(arg2, false, 0); // D=0, A=1, L=0 (BRA(I)) | |
case 6: ReturnJmp(arg2, true, 0); // D=1, A=1, L=0 (BRA(I)D) | |
case 7: {bool e = imm && arg2 == v_usr_ex; // D=1, A=1, L=1 (BRAL(I)D) | |
R ReturnJmp(arg2, true, e ? 3 : 0 );} // escalate if target 0x08) | |
case 3: {bool e = imm && (arg2 == v_usr_ex || arg2 == v_brk); | |
if(regs.UM && !e) ReturnEx(7,0); // D=0, A=1, L=1 (BRK(I) = special, not delayed) | |
regs.BIP = true; // deny usermode access, except | |
R ReturnJmp(arg2, false, e ? 3 : 0 );} // escalate if target 0x08 or 0x18 | |
case 1: break; // D=0, A=0, L=1 (BRL(I) = invalid) | |
#undef R | |
} Return; | |
// Subroutine/exception returns: RT(S|I|B|E)D; Special register manipulation: M(F|T)S, MSR(CLR|SET) | |
case tr(0x25): | |
if(imm) // Note: MicroBlaze's return opcodes always include a delay slot. | |
{ | |
if((i_rd&7) && regs.UM) ReturnEx(7,0); // privileged return | |
ReturnJmp(RA+arg2, true, i_rd&7); // RTSD=0/RTID=1/RTBD=2/RTED=4 | |
} | |
// MFS and MTS | |
{u32& op2 = i_rd ? regs.r[i_rd] : arg2; | |
if((i&0x4000) && regs.UM) ReturnEx(7,0); // Write is a privileged operation | |
regs.SPR( i&0x3FFF, (i&0x4000), (i&0x4000) ? RA : op2 ); // Write or read | |
// Update the carry-copy bit | |
regs.CC = regs.C; | |
regs.PVR = true; // Ensure this flag is never turned off | |
ReturnRD(op2);} | |
// Memory access operations: | |
#define HandleLoadStoreOpcode(opcode, rw, size) \ | |
case tr(opcode): \ | |
{ \ | |
if(auto error = MemAccess<rw,size>(RA+arg2, regs.r[i_rd], !imm && (i&0x200))) \ | |
ReturnMMUex(error, RA+arg2, rw==intent_store); \ | |
if(rw == intent_load && !i_rd) regs.r[i_rd] = 0; /* Ensure RD stays zero */ \ | |
if(!imm && (i&0x400)) regs.C = regs.CC = false; /* exclusive access, not implemented */ \ | |
ReturnRD(regs.r[i_rd]); \ | |
} | |
HandleLoadStoreOpcode(0x30, intent_load,1) // LBU(I)(R) | |
HandleLoadStoreOpcode(0x31, intent_load,2) // LHU(I)(R) | |
HandleLoadStoreOpcode(0x32, intent_load,4) // LW(I)(R)(X) | |
HandleLoadStoreOpcode(0x34, intent_store,1) // SB(I)(R) | |
HandleLoadStoreOpcode(0x35, intent_store,2) // SH(I)(R) | |
HandleLoadStoreOpcode(0x36, intent_store,4) // SW(I)(R)(X) | |
} | |
#undef tr | |
// Anything else: Illegal instruction exception | |
ReturnEx(2,0); | |
} | |
// Reset the CPU | |
void DoReset(u32 vector = v_reset, u32 cmdline_pointer = 0) | |
{ | |
// Clear all registers. (Note: Real MicroBlaze does not clear R1..R31.) | |
regs = {}; | |
// Set those registers that should not be zero: | |
regs.SHR = 0xFFFFFFFFu; | |
regs.PC = vector; | |
regs.r[5] = cmdline_pointer; | |
// The following CPU features are not supported by this emulator: | |
// | |
// C_USE_PCMP_INSTR not implemented (PCMPNE,PCMPEQ,PCMPBF,CLZ) because CLZ and PCMPBF require proportionally much code | |
// C_USE_DIV not implemented (IDIV,IDIVU) because handling the corner cases requires much code | |
// C_USE_FPU=1 not implemented (FADD,FRSUB,FMUL,FDIV,FCMP) because lots of code (comparisons, corner cases) | |
// C_USE_FPU=2 not implemented (FLT,FINT,FSQRT) because C_USE_FPU=1 not implemented | |
// C_USE_STACK_PROTECTION not implemented because not utilized by Linux | |
// C_USE_MSR_INSTR not implemented because implementation takes unproportional amount of code and not really needed | |
// C_FSL_LINKS not implemented because is very complex framework but yet completely ignored by Linux | |
// | |
// The following CPU features ARE supported by this emulator: | |
// | |
// C_USE_BARREL=1, because emulating these was cheap in my opinion, and they help the performance a great deal | |
// C_USE_HW_MUL=2, because emulating these was cheap in my opinion, and they help the performance a great deal | |
// C_ILL_OPCODE_EXCEPTIONS=1, trivial to implement | |
// C_UNALIGNED_EXCEPTIONS=1, trivial to implement | |
// C_OPCODE_0x0_ILLEGAL=1, trivial to implement | |
// C_USE_MMU=3, because without a fully functional MMU, Linux would be quite crippled in my opinion | |
// C_MMU_ZONES=16, because once you support one, supporting 16 is trivial. Linux requires at least 2. | |
// C_MMU_PRIVILEGED_INSTR=0 fully protect privileged instructions | |
// C_MMU_TLB_ACCESS=3 full TLB register access | |
// | |
// Now, set up bit fields indicating the list of processor features. | |
// For brevity (to save time in the video), I use hexadecimal constants here. | |
regs.PVR = true; | |
regs.PVRs[0] = 0xD4801500; | |
regs.PVRs[2] = 0x00005470; | |
regs.PVRs[11] = 0xC0200000|regs.MSR; | |
} | |
bool InterruptsEnabled() const | |
{ | |
return regs.IE && !regs.EIP && !regs.BIP; | |
} | |
// Process an interrupt request (note: Call this only when InterruptsEnabled()=true). | |
void DoInterrupt(u32 vector = v_intr) | |
{ | |
regs.PushUVM(); // Go to kernel mode, save current mode | |
regs.IE = false; | |
regs.r[14] = regs.PC; | |
regs.PC = vector; | |
} | |
// Fetch and execute one instruction, and process branch/jump/exception | |
void Singlestep() | |
{ | |
execmode next; | |
auto NextOpcode = [&] (u32& value) -> bool | |
{ | |
unsigned error = MemAccess<intent_execute,4>(regs.PC, value, !CROSS_ENDIAN); | |
if(error) { next = {3,error&~4,regs.PC}; return false; } // Exception | |
regs.PC += 4; | |
return true; | |
}; | |
u32 op_begin = regs.PC; | |
u32 opcode; | |
if(!NextOpcode(opcode)) | |
{ | |
// Loading the instruction failed; produce an exception. | |
goto GotException; | |
} | |
// HACK: When Linux function "emit_log_char" is invoked, output the | |
// character directly to the UART. This makes the boot-process more | |
// verbose when the kernel UART driver has not been loaded yet. | |
if(op_begin == 0xc0010e04) IOmap[0x8400].write(0,regs.r[5]<<24); | |
{s32 immval = (s16)opcode; | |
// If the opcode was IMM, read the next one immediately. | |
if( (opcode >> 16) == 0xB000 ) | |
{ | |
if(!NextOpcode(opcode)) goto GotException; | |
immval = (immval << 16) | (opcode & 0xFFFF); // Extend the literal parameter | |
} | |
next = ExecuteOne(opcode, immval);} | |
switch(next.mode) | |
{ | |
case 0: default: break; | |
case 2: // Delayed jump | |
{ | |
u32 btr = next.target; // Save the branch target | |
if(!NextOpcode(opcode)) { next.mode = 2; regs.BTR = btr; goto GotException; } | |
// Execute the in-delay-slot instruction. | |
// Note: MicroBlaze documentation explictly prohibits the delay | |
// slot being taken by an IMM, BRK or branch instruction, | |
// so we won't deal with those cases here. | |
execmode next2 = ExecuteOne(opcode, (s16)opcode); | |
if(next2.mode == 3) // Did an exception happen in a delay slot? | |
{ regs.BTR = btr; | |
next.type = next2.type; // Copy exception type | |
next.target = next2.target; // Copy exception address | |
goto GotException; } | |
} // passthru | |
case 1: // Jump | |
switch(next.type) // Check for a special type of a jump | |
{ | |
// type 1 = Return From Interrupt (set IE; copy UMS,VMS to UM,VM) | |
case 1: regs.IE = true; regs.PopUVM(); break; | |
// type 2 = Return From Break (clear BIP; copy UMS,VMS to UM,VM) | |
case 2: regs.BIP = false; regs.PopUVM(); break; | |
// type 3 = Break or User Exception (copy UM,VM to UMS,VMS) | |
case 3: regs.PushUVM(); break; | |
// type 4 = Return From Exception (set EE, clear EIP, zero ESR; copy UMS,VMS to UM,VM) | |
case 4: regs.EIP = false; regs.EE = true; regs.ESR = 0; regs.PopUVM(); break; | |
} | |
regs.PC = next.target; | |
break; | |
case 3: GotException:; // Exception | |
if(regs.EE || next.type >= 16) | |
{ | |
// If EE is unset, ignore exception unless caused by MMU | |
// For MMU exceptions, the return address is the beginning | |
// of the opcode that caused the fault. For others, it is | |
// the next opcode. | |
regs.EIP = true; // Exception in progress = true | |
regs.EAR = next.target; // Address causing the exception | |
regs.EE = false; // Exceptions enabled = false | |
regs.EC = next.type; // Exception code | |
regs.DS = next.mode == 2; // Was it a delay slot exception. | |
regs.PushUVM(); // Go kernel mode, save previous mode | |
regs.r[17] = (next.type < 16 ? regs.PC : op_begin); | |
regs.PC = v_hw_ex; | |
} | |
} | |
} | |
}; | |
// Emulate the following hardware (Linux kernel is configured for them using a DTS): | |
MicroBlazeCPU cpu; // Configured at 100 MHz | |
SDRAM<0x1800000> RAM; // IO: 10000000 , 24 MiB of RAM (MicroBlaze Linux requires >=16.) | |
SDRAM<0x10000> LowRAM; // IO: 00000000 , 64 KiB of RAM (needed for exception tables) | |
XilinxINTC intc; // IO: 81800000 | |
XilinxTimer<2> timer; // IO: 83c00000 IRQ 3 | |
XilinxUARTlite uart; // IO: 84000000 IRQ 8 | |
// Helper function for swapping byte order on memory access. | |
static u32 S(u32 v) { return CROSS_ENDIAN ? SwapBytes(v, 4) : v; } | |
int main() | |
{ | |
// Set up the memory mapping. | |
// First, make all unassigned addresses produce a run-time error. | |
for(unsigned a=0; a<0x10000; ++a) | |
cpu.IOmap[a] = { [](u32)->u32{ abort(); }, | |
[](u32,u32) { abort(); } }; | |
cpu.IOmap[0x0000] = { [](u32 i) { return LowRAM.Read(i/4); } , | |
[](u32 i,u32 v) { LowRAM.Write(i/4,v); } }; | |
for(unsigned a=0x1000; a<0x1180; ++a) | |
cpu.IOmap[a] = { [](u32 i) { return RAM.Read((i-0x10000000)/4); } , | |
[](u32 i,u32 v) { RAM.Write((i-0x10000000)/4,v); } }; | |
cpu.IOmap[0x8180] = { [](u32 i) { return S(intc.Read((i/4)%8)); } , | |
[](u32 i,u32 v) { intc.Write((i/4)%8,S(v)); } }; | |
cpu.IOmap[0x8400] = { [](u32 i) { return S(uart.Read((i/4)%4)); } , | |
[](u32 i,u32 v) { uart.Write((i/4)%4,S(v)); } }; | |
cpu.IOmap[0x83C0] = { [](u32 i) { return S(timer.Read((i/16)&1, (i/4)%4)); } , | |
[](u32 i,u32 v) { timer.Write((i/16)&1, (i/4)%4, S(v)); } }; | |
// Load Linux kernel from U-Boot image | |
struct { u32 a[3], image_size, load_offs, entry_offs, b[10]; } Header; | |
const char* kernel = "simple~1.ub"; // simpleImage.emu.ub | |
FILE* fp = fopen(kernel, "rb"); | |
fprintf(stderr, "Loading kernel, %s...\n", kernel); | |
fread(&Header, 0x40, 1, fp); | |
if(LITTLE_ENDIAN_HOST) Header.load_offs = SwapBytes(Header.load_offs, 4); | |
if(LITTLE_ENDIAN_HOST) Header.entry_offs = SwapBytes(Header.entry_offs, 4); | |
if(LITTLE_ENDIAN_HOST) Header.image_size = SwapBytes(Header.image_size, 4); | |
// Reset the CPU (and bring memory mapper to user mode) | |
cpu.DoReset(Header.entry_offs, Header.load_offs + Header.image_size); | |
// Read the kernel image into the RAM | |
for(auto offs = Header.load_offs; offs < Header.load_offs + Header.image_size; offs += 4) | |
{ | |
u32 value; | |
if(fread(&value, 1, 4, fp) <= 0) break; | |
cpu.MemAccess<intent_store,4> (offs, value, CROSS_ENDIAN); | |
} | |
fclose(fp); | |
fprintf(stderr, "Done loading kernel.\n"); | |
// Run the machine | |
for(;;) // Infinite loop | |
{ | |
cpu.Singlestep(); | |
uart.Input(); | |
timer.Tick(); | |
if(timer.CheckIRQ()) intc.TriggerIRQ(3); | |
if(cpu.InterruptsEnabled()) | |
{ | |
if(uart.CheckIRQ()) intc.TriggerIRQ(8); | |
if(intc.CheckIRQ()) cpu.DoInterrupt(); | |
} | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment