Skip to content

Instantly share code, notes, and snippets.

@nooga
Created March 2, 2015 12:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nooga/19a3029953ec4729721b to your computer and use it in GitHub Desktop.
Save nooga/19a3029953ec4729721b to your computer and use it in GitHub Desktop.
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
/* Declare integer datatypes for each number of bits */
typedef uint_least8_t u8; typedef int_least8_t s8;
typedef uint_least16_t u16; typedef int_least16_t s16;
typedef uint_least32_t u32; typedef int_least32_t s32;
typedef uint_least64_t u64; typedef int_least64_t s64;
/* The following data structures provide templated access
* of different data sizes within a 32-bit integer.
*/
template<unsigned size, typename T=void>
struct MemDataRef
{
union { u32 value; T alt[4 / sizeof(T)]; } d;
// Get a reference to the "size" item beginning
// at byte position "ind" within the 32-bit word.
// All requests are assumed to be properly aligned.
T& reffun(unsigned ind) { return d.alt[ind >> (sizeof(T)==2)]; }
};
template<> struct MemDataRef<1,void>: public MemDataRef<0, u8> { };
template<> struct MemDataRef<2,void>: public MemDataRef<0, u16> { };
template<> struct MemDataRef<4,void>: public MemDataRef<0, u32> { };
/* Declare facilities for detecting and dealing with different byteorder */
#define LITTLE_ENDIAN_HOST 1 /* 1=little endian */
#define LITTLE_ENDIAN_SLAVE 0 /* 0=big endian */
#define CROSS_ENDIAN (LITTLE_ENDIAN_HOST != LITTLE_ENDIAN_SLAVE)
static unsigned SwapBytes(unsigned value, unsigned size)
{
if(size >= 2) value = ((value & 0xFF00FF00u) >> 8)
| ((value & 0x00FF00FFu) << 8);
if(size >= 4) value = (value >> 16) | (value << 16);
return value;
}
/* Console I/O (note: there should be terminal emulation here, but
* such implementation is skipped here, for brevity) */
# include <conio.h>
static struct
{
bool Hit() { return kbhit(); }
unsigned Getc() { int r = getch(); return r ? r : ' '; }
void Putc(unsigned c) { putchar(c); fflush(stdout); }
} Console;
/* Declare all the devices that we emulate. */
// RAM device is very simple.
template<unsigned size_bytes>
class SDRAM
{
u32 buffer[size_bytes / 4];
public:
u32 Read(u32 index) const { return buffer[index]; }
void Write(u32 index, u32 value) { buffer[index] = value; }
};
// Xilinx UART lite (serial port), lite version
class XilinxUARTlite
{
bool ints_enabled, tx_emptied_int_pending, overrun;
struct { u8 len, pos, fifo[8]; } in; // Input queue
// List of supported status and control bits
enum { RXVALID=1, RXFULL=2, TXEMPTY=4, IE=16, OVERRUN=32, RST_RX=2 };
public:
XilinxUARTlite(): ints_enabled(0), overrun(0), in{0,0,{0}} { }
u32 Read(u32 index)
{
u32 result = TXEMPTY;
if(ints_enabled) result |= IE;
if(in.len > 0) result |= RXVALID;
if(in.len == 8) result |= RXFULL;
if(overrun) {result |= OVERRUN; overrun=false;}
if(index<2 && in.len) result = in.fifo[ (in.pos + 8 - in.len--) % 8 ];
return result;
}
void Write(u32 index, u32 value)
{
if(index<3) { Console.Putc(value); tx_emptied_int_pending = true; }
else { ints_enabled = value&IE; if(value&RST_RX) in.pos=in.len=0; }
}
bool CheckIRQ()
{
// TX interrupt is an edge; RX interrupt is a level.
if(ints_enabled && (in.len > 0 || tx_emptied_int_pending))
{ tx_emptied_int_pending=false; return true; }
return false;
}
void Input()
{
static unsigned counter = 0;
if(!counter) counter = 2000; else { --counter; return; }
if(Console.Hit())
{
if(in.len >= 8) { overrun = true; return; }
in.fifo[ in.pos++ % 8 ] = Console.Getc();
++in.len;
}
}
};
// Xilinx interrupt timer (multi-timer chip)
template<unsigned N>
class XilinxTimer
{
// List of registers
enum { TCSR=0, TLR=1, TCR=2 };
// List of control bits
enum { UDT=2, ARHT=16, LOAD=32, ENIT=64, ENT=128, TINT=256, ENALL=1024 };
struct impl
{
u32 regs[4];
void Write(u32 index, u32 value)
{
if(index == TCSR)
{ // Setting the TINT bit actually clears it,
// otherwise it is preserved.
if(value & TINT) value &= ~TINT;
else value |= (regs[index] & TINT); }
regs[index] = value;
}
bool CheckIRQ() const { return (regs[TCSR] & (TINT|ENIT)) == (TINT|ENIT); }
void Tick()
{
if(!(regs[TCSR] & ENT)) return; // Timer does nothing unless enabled
if(regs[TCSR] & LOAD) { regs[TCR] = regs[TLR]; return; }
u32 previous = regs[TCSR & UDT] ? regs[TCR]-- : regs[TCR]++;
if( (previous ^ regs[TCR]) == ~u32(0)) // overflow in either direction
{
regs[TCSR] |= TINT; // Raise the interrupt flag
if(regs[TCSR] & ARHT) regs[TCR] = regs[TLR]; // Restart timer
else regs[TCSR] &= ~ENT; // Stop timer
}
}
} timers[N];
public:
u32 Read(u32 which, u32 index)
{
return timers[which].regs[index];
}
void Write(u32 which, u32 index, u32 value)
{
timers[which].Write(index, value);
// Setting the ENALL bit on any of the timers sets ENT on all of them.
if(value & ENALL) for(auto& t: timers) t.regs[TCSR] |= ENT;
}
void Tick()
{
for(auto& t: timers) t.Tick();
}
bool CheckIRQ() const
{
for(const auto& t: timers) if(t.CheckIRQ()) return true;
return false;
}
};
// Xilinx OPB Interrupt Controller
class XilinxINTC
{
// List of registers
enum { ISR/* List of active IRQs */,
IPR,
IER/* List of IRQs that can result in an interrupt */,
IAR, SIE, CIE, IVR, MER };
// List of bits
enum { ME=1, HIE=2 };
u32 regs[8];
public:
XilinxINTC(): regs() { }
u32 Read(u32 index) const
{
if(index == IPR) return regs[ISR] & regs[IER];
if(index == IVR) // Ordinal number of highest triggered interrupt
{
for(unsigned i=0; i<32; ++i) if(Read(IPR) & (1<<i)) return i;
return ~0u;
}
return regs[index];
}
void Write(u32 index, u32 value)
{
switch(index)
{
case SIE: regs[IER] |= value; break; // Atomic set ie.
case CIE: regs[IER] &= ~value; break; // Atomic clear ie.
case IAR: regs[ISR] &= ~value; break; // Acknowledge IRQ (so it's no longer raised)
case ISR: if(!(regs[MER] & HIE)) // If software interrupts are enabled,
default: regs[index] = value; // Update the register as indicated.
}
}
bool CheckIRQ()
{
return (regs[MER] & ME) && Read(IPR);
}
void TriggerIRQ(unsigned irq)
{
if(regs[MER] & HIE) // If hardware interrupts are enabled,
regs[ISR] |= 1 << irq; // set the IRQ level.
}
};
enum intent { intent_load, intent_store, intent_execute };
class MicroBlazeCPU // Micro-Blaze CPU
{
enum {
v_reset = 0x00,
v_usr_ex = 0x08,
v_intr = 0x10,
v_brk = 0x18,
v_hw_ex = 0x20
};
union reg // MicroBlaze register file
{
struct { // As 32-bit integers
u32 MSR; // Machine Status Register, i.e. flags
u32 ESR; // Exception Status Register, i.e. exception flags
// 32 general purpose registers. [0] is always zero and cannot be modified.
u32 r[32];
// Special purpose registers.
// See SPR() below for the list of meanings.
u32 PC, EAR, BTR, SLR, SHR, ZPR, TLBX, PVRs[16];
union
{
struct { u32 LO, HI; u8 TID; } w;
struct { u32 G:1, M:1, I:1, W:1, ZSEL:4, WR:1,EX:1, RPN:22;
// Bits M, I, W are ignored. RPN=Real Page Number
u32 filler:4, U0:1, E:1, V:1, SIZE:3, TAG:22;
// E=endianess. Page size is 4^SIZE kibibytes.
} b;
} UTLB[64];
u8 PID;
};
struct { // Or broken down to individual bits:
// MSR bits:
u32 lsb:1;
u32 IE :1; // bit 30 (Interrupt Enable)
u32 C :1; // bit 29 (carry)
u32 BIP:1; // bit 28 (Break In Progress)
u32 FSL:1, ICE:1;
u32 DZO:1; // bit 25 (Division By Zero)
u32 DCE:1;
u32 EE :1; // bit 23 (Exceptions Enabled)
u32 EIP:1; // bit 22 (Exception In Progress)
u32 PVR:1; // bit 21 (PVR register set exists)
u32 UM :1; // bit 20 (1=Privileged instructions are prohibited, 0=Allowed)
u32 UMS:1; // bit 19 (User Mode Save)
u32 VM :1; // bit 18 (1=MMU address translation and access protection enabled)
u32 VMS:1; // bit 17 (Virtual Protected Mode Save)
u32 res:16;
u32 CC :1; // bit 0 (carry copy)
// ESR bits:
u32 EC: 5; // Exception cause
u32 Rx: 5; // Exception specific status: register
u32 S: 1; // Exception specific status: store
u32 DIZ:1; // Exception specific status: zone protection
u32 DS: 1; // Delay Slot Exception
u32 filler: 19;
};
void PushUVM() { VMS=VM; UMS=UM; VM=0; UM=0; }
void PopUVM() { VM=VMS; UM=UMS; }
// SPR: Special Purpose Register access function (read/write)
void SPR(unsigned index, bool write, u32& param)
{
#define r(reg, writeok) /* write or read macro */ \
if(!writeok && write) break; \
if(write) reg = param; else param = reg; \
return
auto& tlb = UTLB[TLBX&63];
switch(index) // The registers are numbered as follows:
{
case 0x0000: r(PC, 0); // PC, program counter
case 0x0001: r(MSR, 1); // MSR, machine status register (i.e. flags)
case 0x0003: r(EAR, 0); // EAR, exception address register
case 0x0005: r(ESR, 0); // ESR, exception status register
case 0x000B: r(BTR, 0); // BTR, branch target register
case 0x0800: r(SLR, 1); // SLR, stack low register
case 0x0802: r(SHR, 1); // SHR, stack high register
case 0x1000: r(PID, 1); // PID, process identifier register
case 0x1001: r(ZPR, 1); // ZPR, zone protection register
case 0x1002: r(TLBX, 1); // TLBX, translation look-aside buffer index register
case 0x1003: // TLBLO, translation look-aside buffer low register
r(tlb.w.LO, 1);
case 0x1004: // TLBHI, translation look-aside buffer high register
if(write)
{ tlb.w.TID = PID;
tlb.w.HI = param; }
else
{ if(!UM) PID = tlb.w.TID;
param = tlb.w.HI; }
return;
case 0x1005: // TLBSX, translation look-aside buffer search index register
if(write) // Do a TLB search and report the matching TLB index in TLBX.
{ TLBX = 0x80000000u; // If nothing found, this value remains indicating the miss.
TLBfind<intent_load>(param&0xFFFFFC00,
[&](unsigned tlbno) -> unsigned { TLBX = tlbno; return 0; } );
return; }
}
if(index < 0x2000 || write) index = 15; // Fallback to a safe "undefined" slot
// Access the processor version registers
if(write) PVRs[index&15] = param; else param = PVRs[index&15];
#undef r
}
/* Virtual memory management unit (MMU) function */
template<intent rw, typename T>
unsigned TLBfind(unsigned virt_addr, const T& func) const
{
// Cache the last used index for this type of data access
static unsigned last_matched_index = 0;
bool UserMode = UM;
unsigned exception_no = 16+2+4; // Exception for "no tlb"
for(unsigned vv=0; vv<64; ++vv)
{
unsigned v = (last_matched_index + vv) % 64; // Index into TLB array
auto& tlb = UTLB[v];
// Skip invalid TLBs
if(!tlb.b.V) continue;
// Skip TLB where TID mismatches the PID
if(tlb.w.TID != 0 && PID != tlb.w.TID) continue;
unsigned page_bits = 10 + tlb.b.SIZE * 2;
// Skip TLB where TAG mismatches the given address
if( u32(virt_addr ^ (tlb.b.TAG<<10)) >> page_bits ) continue;
unsigned zone_type = (ZPR >> (30 - tlb.b.ZSEL*2)) & 3;
// Remove the bit indicating "no tlb found" exception
exception_no &= ~2;
if(zone_type == 0 && UserMode) continue; // zone-prohibited access
// Remove the bit indicating "zone protection" exception
exception_no &= ~4;
if(zone_type < (UserMode?3:2)) // Check WR and EX, unless exempted by zone
{
if(rw==intent_store && !tlb.b.WR) continue; // write access prohibited
if(rw==intent_execute && !tlb.b.EX) continue; // execute access prohibited
}
if(rw==intent_execute && tlb.b.G) continue; // I/O space is guarded from execution
last_matched_index = v;
return func(v);
}
return exception_no;
}
} regs;
public:
/* List of read/write functions for each 64k page of the memory space */
struct { u32(*read)(u32); void(*write)(u32,u32); } IOmap[0x10000];
/* All memory access is routed through this template function. */
/* It handles reads and writes of different sizes,
* with or without virtual memory mapping,
* with or without byte order swaps (reverse).
*/
template<intent rw=intent_load, unsigned size=4>
unsigned MemAccess(u32 virt_addr, u32& value, bool reverse=false)
{
u32 phys_addr = virt_addr;
bool e_bit = false;
if(regs.VM) // Virtual memory mode? Do TLB lookup.
{
unsigned exception = regs.TLBfind<rw>(virt_addr,
[&](unsigned tlbno) -> unsigned
{
// Map the address using the matching TLB.
unsigned page_mask = (1024 << (2 * regs.UTLB[tlbno].b.SIZE)) - 1;
phys_addr = (regs.UTLB[tlbno].b.RPN << 10)
+ (virt_addr & page_mask);
e_bit = regs.UTLB[tlbno].b.E;
return 0;
});
// Produce an exception if the mapping failed (LSB indicates code/data).
if(exception)
return exception | (rw==intent_execute);
}
// Verify alignment, and produce an exception if the address was not properly aligned
if(virt_addr % size != 0) return 1 | (size==4 ? 4 : 0);
// An "E" bit in a TLB reverses the endianess for that page (except in opcode lookups).
if(rw != intent_execute) reverse ^= e_bit;
// This macro byteswaps if needed due to host/guest endian differences or the reverse bit.
auto S = [=](u32 v) -> u32
{ return size>1 && (reverse != CROSS_ENDIAN) ? SwapBytes(v,size) : v; };
// Read full 32 bits from the memory/device,
// unless we're going to replace it entirely.
MemDataRef<size> data;
if(rw != intent_store || size != 4)
data.d.value = IOmap[phys_addr/0x10000].read(phys_addr & ~3);
// Create a reference to the relevant data within the 32 bit word.
auto& r = data.reffun( (phys_addr & (4-size)) ^ (reverse ? (4-size) : 0) );
if(rw == intent_store)
{
// Write to the relevant data, and commit
// the entire 32-bit word the memory/device. Byteswap if needed.
r = S(value);
IOmap[phys_addr/0x10000].write(phys_addr & ~3, data.d.value);
}
else
value = S(r); // Read the relevant data. Byteswap if needed.
return 0; // Return "no exception" code
}
// This struct records the desired action to happen after instruction execution.
struct execmode
{
unsigned mode:2, type:30, target:32;
// Modes:
// 0 = Normally proceed to the next instruction (other fields are ignored)
// 1 = Jump, right now. (type=type of jump, target=where to jump)
// 2 = Jump, but first execute one more instruction (delay slot)
// 3 = Generate an exception, right now
// (type = type of exception,
// target = memory address causing the exception)
};
// Decode and execute a single instruction.
execmode ExecuteOne(u32 i, u32 arg2)
{
// Decode the operation type and operands from the opcode
unsigned i_op = (i>>26)&0x3F; // Operation
unsigned i_rd = (i>>21)&0x1F; // Target register index
u32 RA = regs.r[(i>>16)&0x1F]; // Source register value
bool imm = i_op & 0x08; // Category of instruction
if(!imm) arg2 = regs.r[(i>>11)&0x1F]; // Second operand
// Exit macros.
#define Return return execmode{0,0,0}
// Exit and store v to RD.
// Note that regs.r[0] is always preserved as zero, even if written to.
#define ReturnRD(v) do { if(i_rd) regs.r[i_rd] = (v); Return; } while(0)
// Exit and store v to RD, and set carry to c (evaluated before v, which may depend on carry).
#define ReturnRDc(v,c) do { bool nc = (c); if(i_rd) regs.r[i_rd] = (v); regs.C = regs.CC = nc; Return; } while(0)
// Exit and prepare to branch/jump to another memory location
#define ReturnJmp(target,delayed,type) return execmode{(delayed)?2:1, type, target}
// Exit and raise an exception, concerning the given address.
#define ReturnEx(extype, addr) return execmode{3,extype,addr}
// Exit and raise a MMU exception, setting the exception info appropriately.
#define ReturnMMUex(extype, ear, write) \
do { regs.Rx = i_rd; regs.S = write; regs.DIZ = error & 4; \
ReturnEx(extype &~ 4, ear); } while(0)
// Determine which kind of operation it was:
if(i == 0x00000000) ReturnEx(2,0); // Specially trap this illegal opcode
#define tr(op) (((op) & 0x07) | (((op) & 0x30) >> 1))
switch( tr(i_op) )
{
// Add and subtract operations: (ADD|RSUB)(I)(K)(C); comparisons: CMP(U)
case tr(0x06): ReturnRD( arg2+RA+regs.C ); // ADD(I)KC
case tr(0x07): ReturnRD( arg2-RA-regs.C ); // RSUB(I)KC
case tr(0x02): ReturnRDc( arg2+RA+regs.C, arg2+RA+regs.C < arg2 ); // ADD(I)C
case tr(0x03): ReturnRDc( arg2-RA-regs.C, arg2-RA-regs.C > arg2 ); // RSUB(I)C
case tr(0x00): ReturnRDc( arg2+RA, arg2+RA < arg2 ); // ADD(I)
case tr(0x01): ReturnRDc( arg2-RA, arg2-RA > arg2 ); // RSUB(I)
case tr(0x04): ReturnRD( arg2+RA ); // ADD(I)K
case tr(0x05): switch(imm?0:(i&0x7FF))
{ case 0: ReturnRD( arg2-RA ); // RSUB(I)K
case 1: ReturnRD((0x7FFFFFFF&(arg2-RA)) | (((s32)arg2 < (s32)RA)<<31) ); // CMP
case 3: ReturnRD((0x7FFFFFFF&(arg2-RA)) | (((u32)arg2 < (u32)RA)<<31) ); // CMPU
} Return;/*Silently ignore bad instruction*/
// Bit manipulation operations: (OR|AND|XOR|ANDN)(I)
case tr(0x20): ReturnRD( RA | arg2 ); // OR, ORI
case tr(0x21): ReturnRD( RA & arg2 ); // AND, ANDI
case tr(0x22): ReturnRD( RA ^ arg2 ); // XOR, XORI
case tr(0x23): ReturnRD( RA &~ arg2 ); // ANDN,ANDNI
// Barrel shift operations: BS(RL|RA|LL)(I)
case tr(0x11):
if(i&0x400) ReturnRD( RA << (arg2 & 31) ); // BSLL(I)
if(i&0x200) ReturnRD( s32(RA) >> (arg2 & 31) ); // BSRA(I)
else ReturnRD( u32(RA) >> (arg2 & 31) ); // BSRL(I)
// Multiplications: MUL(I|H|HU|HSU)
case tr(0x10): switch(imm ? 0 : (i&0x7FF))
{ case 0: ReturnRD( (s32)RA * (s32)arg2 ); // MUL,MULI
case 1: ReturnRD( ((s64)(s32)RA * (s64)(s32)arg2) >> 32 ); // MULH
case 2: ReturnRD( ((s64)(s32)RA * (s64)(u32)arg2) >> 32 ); // MULHSU
case 3: ReturnRD( ((u64)(u32)RA * (u64)(u32)arg2) >> 32 ); // MULHU
} Return;
// One-bit copy / shift-right operators: SEXT(8|16), SR(A|C|L); cache control: W(I|D)C
case tr(0x24): switch(i & 0x7FF)
{ case 0x60: ReturnRD( (s32)( s8)RA ); // SEXT8 (sign-extend 8 bits to 32 bits)
case 0x61: ReturnRD( (s32)(s16)RA ); // SEXT16 (sign-extend 16 bits to 32 bits)
case 0x01: ReturnRDc( (s32)RA >> 1, RA&1); // SRA (sign-extend)
case 0x21: ReturnRDc((regs.CC<<31) | (RA >> 1), RA&1); // SRC (carry-extend)
case 0x41: ReturnRDc( RA >> 1, RA&1); // SRL (zero-extend)
case 0x64: if(regs.UM) ReturnEx(7,0); Return; // WDC
case 0x66: if(regs.UM) ReturnEx(7,0); Return; // WDC.clear
case 0x74: if(regs.UM) ReturnEx(7,0); Return; // WDC.flush
case 0x76: if(regs.UM) ReturnEx(7,0); Return; // WDC.flush.clear
case 0x68: if(regs.UM) ReturnEx(7,0); Return; // WIC (privileged cache-control instruction)
} Return;
// Conditional branches with delay slots or not: (BEQ|BNE|BLT|BLE|BGT|BGE)(I)(D)
case tr(0x27): switch(i_rd & 15)
{ case 0: if((s32)RA == 0) ReturnJmp( regs.PC-4 + arg2, i_rd & 16, 0 ); Return; //EQ
case 1: if((s32)RA != 0) ReturnJmp( regs.PC-4 + arg2, i_rd & 16, 0 ); Return; //NE
case 2: if((s32)RA < 0) ReturnJmp( regs.PC-4 + arg2, i_rd & 16, 0 ); Return; //LT
case 3: if((s32)RA <= 0) ReturnJmp( regs.PC-4 + arg2, i_rd & 16, 0 ); Return; //LE
case 4: if((s32)RA > 0) ReturnJmp( regs.PC-4 + arg2, i_rd & 16, 0 ); Return; //GT
case 5: if((s32)RA >= 0) ReturnJmp( regs.PC-4 + arg2, i_rd & 16, 0 ); Return; //GE
} Return;
// Unconditional branches / system calls, with delay slots or not: BR(A)(L)(I)(D), BRK(I), MBAR
case tr(0x26): switch( (i >> 18) & 7) // This is hairy, because there are so many variants in this.
{
#define R if(i_rd) regs.r[i_rd] = regs.PC - 4; /* Macro for saving return address */
case 0: ReturnJmp(arg2+regs.PC-4, false, 0); // D=0, A=0, L=0 (BR(I) or MBAR).
case 4: ReturnJmp(arg2+regs.PC-4, true, 0); // D=1, A=0, L=0 (BR(I)D)
case 5: R ReturnJmp(arg2+regs.PC-4, true, 0); // D=1, A=0, L=1 (BRL(I)D)
case 2: ReturnJmp(arg2, false, 0); // D=0, A=1, L=0 (BRA(I))
case 6: ReturnJmp(arg2, true, 0); // D=1, A=1, L=0 (BRA(I)D)
case 7: {bool e = imm && arg2 == v_usr_ex; // D=1, A=1, L=1 (BRAL(I)D)
R ReturnJmp(arg2, true, e ? 3 : 0 );} // escalate if target 0x08)
case 3: {bool e = imm && (arg2 == v_usr_ex || arg2 == v_brk);
if(regs.UM && !e) ReturnEx(7,0); // D=0, A=1, L=1 (BRK(I) = special, not delayed)
regs.BIP = true; // deny usermode access, except
R ReturnJmp(arg2, false, e ? 3 : 0 );} // escalate if target 0x08 or 0x18
case 1: break; // D=0, A=0, L=1 (BRL(I) = invalid)
#undef R
} Return;
// Subroutine/exception returns: RT(S|I|B|E)D; Special register manipulation: M(F|T)S, MSR(CLR|SET)
case tr(0x25):
if(imm) // Note: MicroBlaze's return opcodes always include a delay slot.
{
if((i_rd&7) && regs.UM) ReturnEx(7,0); // privileged return
ReturnJmp(RA+arg2, true, i_rd&7); // RTSD=0/RTID=1/RTBD=2/RTED=4
}
// MFS and MTS
{u32& op2 = i_rd ? regs.r[i_rd] : arg2;
if((i&0x4000) && regs.UM) ReturnEx(7,0); // Write is a privileged operation
regs.SPR( i&0x3FFF, (i&0x4000), (i&0x4000) ? RA : op2 ); // Write or read
// Update the carry-copy bit
regs.CC = regs.C;
regs.PVR = true; // Ensure this flag is never turned off
ReturnRD(op2);}
// Memory access operations:
#define HandleLoadStoreOpcode(opcode, rw, size) \
case tr(opcode): \
{ \
if(auto error = MemAccess<rw,size>(RA+arg2, regs.r[i_rd], !imm && (i&0x200))) \
ReturnMMUex(error, RA+arg2, rw==intent_store); \
if(rw == intent_load && !i_rd) regs.r[i_rd] = 0; /* Ensure RD stays zero */ \
if(!imm && (i&0x400)) regs.C = regs.CC = false; /* exclusive access, not implemented */ \
ReturnRD(regs.r[i_rd]); \
}
HandleLoadStoreOpcode(0x30, intent_load,1) // LBU(I)(R)
HandleLoadStoreOpcode(0x31, intent_load,2) // LHU(I)(R)
HandleLoadStoreOpcode(0x32, intent_load,4) // LW(I)(R)(X)
HandleLoadStoreOpcode(0x34, intent_store,1) // SB(I)(R)
HandleLoadStoreOpcode(0x35, intent_store,2) // SH(I)(R)
HandleLoadStoreOpcode(0x36, intent_store,4) // SW(I)(R)(X)
}
#undef tr
// Anything else: Illegal instruction exception
ReturnEx(2,0);
}
// Reset the CPU
void DoReset(u32 vector = v_reset, u32 cmdline_pointer = 0)
{
// Clear all registers. (Note: Real MicroBlaze does not clear R1..R31.)
regs = {};
// Set those registers that should not be zero:
regs.SHR = 0xFFFFFFFFu;
regs.PC = vector;
regs.r[5] = cmdline_pointer;
// The following CPU features are not supported by this emulator:
//
// C_USE_PCMP_INSTR not implemented (PCMPNE,PCMPEQ,PCMPBF,CLZ) because CLZ and PCMPBF require proportionally much code
// C_USE_DIV not implemented (IDIV,IDIVU) because handling the corner cases requires much code
// C_USE_FPU=1 not implemented (FADD,FRSUB,FMUL,FDIV,FCMP) because lots of code (comparisons, corner cases)
// C_USE_FPU=2 not implemented (FLT,FINT,FSQRT) because C_USE_FPU=1 not implemented
// C_USE_STACK_PROTECTION not implemented because not utilized by Linux
// C_USE_MSR_INSTR not implemented because implementation takes unproportional amount of code and not really needed
// C_FSL_LINKS not implemented because is very complex framework but yet completely ignored by Linux
//
// The following CPU features ARE supported by this emulator:
//
// C_USE_BARREL=1, because emulating these was cheap in my opinion, and they help the performance a great deal
// C_USE_HW_MUL=2, because emulating these was cheap in my opinion, and they help the performance a great deal
// C_ILL_OPCODE_EXCEPTIONS=1, trivial to implement
// C_UNALIGNED_EXCEPTIONS=1, trivial to implement
// C_OPCODE_0x0_ILLEGAL=1, trivial to implement
// C_USE_MMU=3, because without a fully functional MMU, Linux would be quite crippled in my opinion
// C_MMU_ZONES=16, because once you support one, supporting 16 is trivial. Linux requires at least 2.
// C_MMU_PRIVILEGED_INSTR=0 fully protect privileged instructions
// C_MMU_TLB_ACCESS=3 full TLB register access
//
// Now, set up bit fields indicating the list of processor features.
// For brevity (to save time in the video), I use hexadecimal constants here.
regs.PVR = true;
regs.PVRs[0] = 0xD4801500;
regs.PVRs[2] = 0x00005470;
regs.PVRs[11] = 0xC0200000|regs.MSR;
}
bool InterruptsEnabled() const
{
return regs.IE && !regs.EIP && !regs.BIP;
}
// Process an interrupt request (note: Call this only when InterruptsEnabled()=true).
void DoInterrupt(u32 vector = v_intr)
{
regs.PushUVM(); // Go to kernel mode, save current mode
regs.IE = false;
regs.r[14] = regs.PC;
regs.PC = vector;
}
// Fetch and execute one instruction, and process branch/jump/exception
void Singlestep()
{
execmode next;
auto NextOpcode = [&] (u32& value) -> bool
{
unsigned error = MemAccess<intent_execute,4>(regs.PC, value, !CROSS_ENDIAN);
if(error) { next = {3,error&~4,regs.PC}; return false; } // Exception
regs.PC += 4;
return true;
};
u32 op_begin = regs.PC;
u32 opcode;
if(!NextOpcode(opcode))
{
// Loading the instruction failed; produce an exception.
goto GotException;
}
// HACK: When Linux function "emit_log_char" is invoked, output the
// character directly to the UART. This makes the boot-process more
// verbose when the kernel UART driver has not been loaded yet.
if(op_begin == 0xc0010e04) IOmap[0x8400].write(0,regs.r[5]<<24);
{s32 immval = (s16)opcode;
// If the opcode was IMM, read the next one immediately.
if( (opcode >> 16) == 0xB000 )
{
if(!NextOpcode(opcode)) goto GotException;
immval = (immval << 16) | (opcode & 0xFFFF); // Extend the literal parameter
}
next = ExecuteOne(opcode, immval);}
switch(next.mode)
{
case 0: default: break;
case 2: // Delayed jump
{
u32 btr = next.target; // Save the branch target
if(!NextOpcode(opcode)) { next.mode = 2; regs.BTR = btr; goto GotException; }
// Execute the in-delay-slot instruction.
// Note: MicroBlaze documentation explictly prohibits the delay
// slot being taken by an IMM, BRK or branch instruction,
// so we won't deal with those cases here.
execmode next2 = ExecuteOne(opcode, (s16)opcode);
if(next2.mode == 3) // Did an exception happen in a delay slot?
{ regs.BTR = btr;
next.type = next2.type; // Copy exception type
next.target = next2.target; // Copy exception address
goto GotException; }
} // passthru
case 1: // Jump
switch(next.type) // Check for a special type of a jump
{
// type 1 = Return From Interrupt (set IE; copy UMS,VMS to UM,VM)
case 1: regs.IE = true; regs.PopUVM(); break;
// type 2 = Return From Break (clear BIP; copy UMS,VMS to UM,VM)
case 2: regs.BIP = false; regs.PopUVM(); break;
// type 3 = Break or User Exception (copy UM,VM to UMS,VMS)
case 3: regs.PushUVM(); break;
// type 4 = Return From Exception (set EE, clear EIP, zero ESR; copy UMS,VMS to UM,VM)
case 4: regs.EIP = false; regs.EE = true; regs.ESR = 0; regs.PopUVM(); break;
}
regs.PC = next.target;
break;
case 3: GotException:; // Exception
if(regs.EE || next.type >= 16)
{
// If EE is unset, ignore exception unless caused by MMU
// For MMU exceptions, the return address is the beginning
// of the opcode that caused the fault. For others, it is
// the next opcode.
regs.EIP = true; // Exception in progress = true
regs.EAR = next.target; // Address causing the exception
regs.EE = false; // Exceptions enabled = false
regs.EC = next.type; // Exception code
regs.DS = next.mode == 2; // Was it a delay slot exception.
regs.PushUVM(); // Go kernel mode, save previous mode
regs.r[17] = (next.type < 16 ? regs.PC : op_begin);
regs.PC = v_hw_ex;
}
}
}
};
// Emulate the following hardware (Linux kernel is configured for them using a DTS):
MicroBlazeCPU cpu; // Configured at 100 MHz
SDRAM<0x1800000> RAM; // IO: 10000000 , 24 MiB of RAM (MicroBlaze Linux requires >=16.)
SDRAM<0x10000> LowRAM; // IO: 00000000 , 64 KiB of RAM (needed for exception tables)
XilinxINTC intc; // IO: 81800000
XilinxTimer<2> timer; // IO: 83c00000 IRQ 3
XilinxUARTlite uart; // IO: 84000000 IRQ 8
// Helper function for swapping byte order on memory access.
static u32 S(u32 v) { return CROSS_ENDIAN ? SwapBytes(v, 4) : v; }
int main()
{
// Set up the memory mapping.
// First, make all unassigned addresses produce a run-time error.
for(unsigned a=0; a<0x10000; ++a)
cpu.IOmap[a] = { [](u32)->u32{ abort(); },
[](u32,u32) { abort(); } };
cpu.IOmap[0x0000] = { [](u32 i) { return LowRAM.Read(i/4); } ,
[](u32 i,u32 v) { LowRAM.Write(i/4,v); } };
for(unsigned a=0x1000; a<0x1180; ++a)
cpu.IOmap[a] = { [](u32 i) { return RAM.Read((i-0x10000000)/4); } ,
[](u32 i,u32 v) { RAM.Write((i-0x10000000)/4,v); } };
cpu.IOmap[0x8180] = { [](u32 i) { return S(intc.Read((i/4)%8)); } ,
[](u32 i,u32 v) { intc.Write((i/4)%8,S(v)); } };
cpu.IOmap[0x8400] = { [](u32 i) { return S(uart.Read((i/4)%4)); } ,
[](u32 i,u32 v) { uart.Write((i/4)%4,S(v)); } };
cpu.IOmap[0x83C0] = { [](u32 i) { return S(timer.Read((i/16)&1, (i/4)%4)); } ,
[](u32 i,u32 v) { timer.Write((i/16)&1, (i/4)%4, S(v)); } };
// Load Linux kernel from U-Boot image
struct { u32 a[3], image_size, load_offs, entry_offs, b[10]; } Header;
const char* kernel = "simple~1.ub"; // simpleImage.emu.ub
FILE* fp = fopen(kernel, "rb");
fprintf(stderr, "Loading kernel, %s...\n", kernel);
fread(&Header, 0x40, 1, fp);
if(LITTLE_ENDIAN_HOST) Header.load_offs = SwapBytes(Header.load_offs, 4);
if(LITTLE_ENDIAN_HOST) Header.entry_offs = SwapBytes(Header.entry_offs, 4);
if(LITTLE_ENDIAN_HOST) Header.image_size = SwapBytes(Header.image_size, 4);
// Reset the CPU (and bring memory mapper to user mode)
cpu.DoReset(Header.entry_offs, Header.load_offs + Header.image_size);
// Read the kernel image into the RAM
for(auto offs = Header.load_offs; offs < Header.load_offs + Header.image_size; offs += 4)
{
u32 value;
if(fread(&value, 1, 4, fp) <= 0) break;
cpu.MemAccess<intent_store,4> (offs, value, CROSS_ENDIAN);
}
fclose(fp);
fprintf(stderr, "Done loading kernel.\n");
// Run the machine
for(;;) // Infinite loop
{
cpu.Singlestep();
uart.Input();
timer.Tick();
if(timer.CheckIRQ()) intc.TriggerIRQ(3);
if(cpu.InterruptsEnabled())
{
if(uart.CheckIRQ()) intc.TriggerIRQ(8);
if(intc.CheckIRQ()) cpu.DoInterrupt();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment