Skip to content

Instantly share code, notes, and snippets.

@hcs64
Created May 13, 2020 05:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hcs64/82d3b22247bca3dadc61d415a6662434 to your computer and use it in GitHub Desktop.
Save hcs64/82d3b22247bca3dadc61d415a6662434 to your computer and use it in GitHub Desktop.
Faster PPU swizzle ucode
// N64 'Bare Metal' 16BPP 272x240 SNES PPU 2BPP Tile 8x8 Demo by krom (Peter Lemon):
// Modified for faster vector ops and benchmark output (count register) by hcs:
arch n64.cpu
endian msb
output "PPU2BPPTile8x8.N64", create
fill 1052672 // Set ROM Size
origin $00000000
base $80000000 // Entry Point Of Code
include "LIB/N64.INC" // Include N64 Definitions
include "LIB/N64_HEADER.ASM" // Include 64 Byte Header & Vector Table
insert "LIB/N64_BOOTCODE.BIN" // Include 4032 Byte Boot Code
Start:
include "LIB/N64_GFX.INC" // Include Graphics Macros
include "LIB/N64_RSP.INC" // Include RSP Macros
N64_INIT() // Run N64 Initialisation Routine
ScreenNTSC(272, 240, BPP16, $A0100000) // Screen NTSC: 272x240, 16BPP, DRAM Origin $A0100000
WaitScanline($200) // Wait For Scanline To Reach Vertical Blank
// Convert SNES Palette To N64 TLUT
// Load RSP Code To IMEM
DMASPRD(RSPPALCode, RSPPALCodeEnd, SP_IMEM) // DMA Data Read DRAM->RSP MEM: Start Address, End Address, Destination RSP MEM Address
DMASPWait() // Wait For RSP DMA To Finish
SetSPPC(RSPPALStart) // Set RSP Program Counter: Start Address
StartSP() // Start RSP Execution: RSP Status = Clear Halt, Broke, Interrupt, Single Step, Interrupt On Break
DelayPAL: // Wait For RSP To Compute
lwu t0,SP_STATUS(a0) // T0 = RSP Status
andi t0,RSP_HLT // RSP Status &= RSP Halt Flag
beqz t0,DelayPAL // IF (RSP Halt Flag == 0) Delay PAL
nop // Delay Slot
// Copy SNES Clear Color To RDP List
la a0,N64TLUT // A0 = N64 TLUT Address
la a1,RDPSNESCLEARCOL+4 // A1 = N64 RDP SNES Clear Color Address
lhu t0,0(a0) // T0 = TLUT Color 0
sh t0,0(a1) // Store Color 0 To RDP Fill Color Hi
sh t0,2(a1) // Store Color 0 To RDP Fill Color Lo
// Convert SNES Tiles To N64 Linear Texture
// Load RSP Code To IMEM
DMASPRD(RSPTILECode, RSPTILECodeEnd, SP_IMEM) // DMA Data Read DRAM->RSP MEM: Start Address, End Address, Destination RSP MEM Address
DMASPWait() // Wait For RSP DMA To Finish
SetSPPC(RSPTILEStart) // Set RSP Program Counter: Start Address
mtc0 r0, 9 // Clear count
StartSP() // Start RSP Execution: RSP Status = Clear Halt, Broke, Interrupt, Single Step, Interrupt On Break
DelayTILES: // Wait For RSP To Compute
lwu t0,SP_STATUS(a0) // T0 = RSP Status
andi t0,RSP_HLT // RSP Status &= RSP Halt Flag
beqz t0,DelayTILES // IF (RSP Halt Flag == 0) Delay TILES
nop // Delay Slot
mfc0 t0, 9
la t1, RSP_Cycles
sw t0, 0(t1)
// Convert SNES Tile Map To RDP List
la a0,SNESMAP // A0 = SNES Tile Map Address
la a1,$A0000000|((RDPSNESTILE+12)&$3FFFFFF) // A1 = N64 RDP SNES Tile Map Address
la a2,N64TILE // A2 = N64 Tile Address
ori t0,r0,895 // T0 = Number Of Tiles To Convert
MAPLoop:
lbu t1,0(a0) // T1 = SNES Tile Map # Lo Byte
lbu t2,1(a0) // T2 = SNES Tile Map # Hi Byte
addiu a0,2 // A0 += 2
sll t2,8 // T2 <<= 8
or t1,t2 // T1 != T2
sll t1,5 // T1 *= 32
addu t1,a2 // T1 += N64 Tile Address
sw t1,0(a1) // Store SNES Tile Map # To N64 RDP SNES Tile Map
addiu a1,40 // A1 += 40
bnez t0,MAPLoop // IF (Number Of Tiles To Convert != 0) Map Loop
subiu t0,1 // Decrement Number Of Tiles To Convert (Delay Slot)
WaitScanline($200) // Wait For Scanline To Reach Vertical Blank
DPC(RDPBuffer, RDPBufferEnd) // Run DPC Command Buffer: Start Address, End Address
// Wait for DP to finish with the screen
lui t0, MI_BASE
-;lw t1, MI_INTR(t0)
andi t1, 1<<5 // DP
beqz t1,-
nop
la a0, RSP_Cycles
lw a0, 0(a0)
la a1, $A0100000+((64*272)+64)*2 // framebuffer pos
la t2, digits
li t3, 10
digit_loop:
divu a0, t3
mfhi a0 // remainder
sll a0, 3
addu a0, t2
ld t0, 0(a0)
li t4, 7 // rows to do
-;li t5, 7 // pixels to do
-;bltz t0,+
lli t6, 0xfffe
lli t6, 0x0000
+;sh t6, 0(a1)
addi a1, 2
dsll t0, 1
bnez t5,-
addi t5, -1
addi a1, (272*2)-(8*2) // move down a line
bnez t4,--
addi t4, -1
mflo a0 // quotient
nop
nop
addi a1, -((8*2)+(8*272*2)) // move back a char
bnez a0, digit_loop
nop
Loop:
j Loop
nop // Delay Slot
align(4)
RSP_Cycles:;dd 0
align(8)
digits:
/////////////////////
// $30: 0
db %00111010
db %01100100
db %01001010
db %01010010
db %00100110
db %01011100
db %00000000
db %00000000
// $31: 1
db %00011000
db %00111000
db %00011000
db %00011000
db %00011000
db %00111100
db %00000000
db %00000000
// $32: 2
db %00111000
db %01001100
db %00001100
db %00011000
db %00110000
db %01111110
db %00000000
db %00000000
// $33: 3
db %00111100
db %01000110
db %00011100
db %00000110
db %01000110
db %00111100
db %00000000
db %00000000
// $34: 4
db %00001100
db %00011100
db %00101100
db %01001100
db %01111110
db %00001100
db %00000000
db %00000000
// $35: 5
db %01111110
db %01100000
db %01111100
db %00000110
db %01000110
db %00111100
db %00000000
db %00000000
// $36: 6
db %00111100
db %01100000
db %01111100
db %01100110
db %01100110
db %00111100
db %00000000
db %00000000
// $37: 7
db %01111110
db %01100110
db %00001100
db %00111110
db %00011000
db %00011000
db %00000000
db %00000000
// $38: 8
db %00111100
db %01100110
db %00111100
db %01100110
db %01100110
db %00111100
db %00000000
db %00000000
// $39: 9
db %00111100
db %01100110
db %01100110
db %00111110
db %00000110
db %00111100
db %00000000
db %00000000
align(8) // Align 64-Bit
N64TLUT:
fill 512 // Generates 512 Bytes Containing $00
align(8) // Align 64-Bit
N64TILE:
fill 65536 // Generates 65536 Bytes Containing $00
align(8) // Align 64-Bit
insert SNESPAL, "BG.pal"
align(8) // Align 64-Bit
insert SNESTILE, "BG.pic"
align(8) // Align 64-Bit
insert SNESMAP, "BG.map" // SNES 32x32 Background Tile Map (2048 Bytes)
align(8) // Align 64-Bit
RSPPALData:
base $0000 // Set Base Of RSP Data Object To Zero
// Uses Whole Vector For 1st 8 Colors To Preserve SNES Palette Color 0 Alpha
// Uses Element 9 To OR Vector By Scalar $0001 For Other Colors
AlphaOR:
dh $0000, $0001, $0001, $0001, $0001, $0001, $0001, $0001
// 1 * $0000, 7 * $0001 (OR Alpha 1 Bit) (1st 8 Colors)
// $0001 (OR Alpha 1 Bit) (Other Colors) (e9)
// Uses Elements 8..12 To AND Vector By Scalar
ANDByte:
dh $00FF, $FF00, $001F, $03E0, $7C00, $0000, $0000, $0000
// $00FF (AND Lo Byte) (e8)
// $FF00 (AND Hi Byte) (e9)
// $001F (AND Red 5 Bits) (e10)
// $03E0 (AND Green 5 Bits) (e11)
// $7C00 (AND Blue 5 Bits) (e12)
// Uses Elements 8..11 To Multiply Vector By Scalar For Pseudo Vector Shifts
PALShift:
dh $0100, $0800, $0002, $0080
// $0100 (Left Shift Using Multiply: << 8), (Right Shift Using Multiply: >> 8) (Big-Endian Convert) (e8)
// $0800 (Left Shift Using Multiply: << 11) (Red) (e9)
// $0002 (Left Shift Using Multiply: << 1) (Green) (e10)
// $0080 (Right Shift Using Multiply: >> 9) (Blue) (e11)
align(8) // Align 64-Bit
base RSPPALData+pc() // Set End Of RSP Data Object
RSPPALDataEnd:
align(8) // Align 64-Bit
RSPSHIFTData:
base $0000 // Set Base Of RSP Data Object To Zero
// Uses Elements 8..15 To Multiply Vector By Scalar For Pseudo Vector Shifts
ShiftLeftRightA:
dh $0001, $0002, $0004, $0008, $0010, $0020, $0040, $0080
// $0001 (Left Shift Using Multiply: << 0), (Right Shift Using Multiply: >> 16) (e8)
// $0002 (Left Shift Using Multiply: << 1), (Right Shift Using Multiply: >> 15) (e9)
// $0004 (Left Shift Using Multiply: << 2), (Right Shift Using Multiply: >> 14) (e10)
// $0008 (Left Shift Using Multiply: << 3), (Right Shift Using Multiply: >> 13) (e11)
// $0010 (Left Shift Using Multiply: << 4), (Right Shift Using Multiply: >> 12) (e12)
// $0020 (Left Shift Using Multiply: << 5), (Right Shift Using Multiply: >> 11) (e13)
// $0040 (Left Shift Using Multiply: << 6), (Right Shift Using Multiply: >> 10) (e14)
// $0080 (Left Shift Using Multiply: << 7), (Right Shift Using Multiply: >> 9) (e15)
ShiftLeftRightB:
dh $0100, $0200, $0400, $0800, $1000, $2000, $4000, $8000
// $0100 (Left Shift Using Multiply: << 8), (Right Shift Using Multiply: >> 8) (e8)
// $0200 (Left Shift Using Multiply: << 9), (Right Shift Using Multiply: >> 7) (e9)
// $0400 (Left Shift Using Multiply: << 10), (Right Shift Using Multiply: >> 6) (e10)
// $0800 (Left Shift Using Multiply: << 11), (Right Shift Using Multiply: >> 5) (e11)
// $1000 (Left Shift Using Multiply: << 12), (Right Shift Using Multiply: >> 4) (e12)
// $2000 (Left Shift Using Multiply: << 13), (Right Shift Using Multiply: >> 3) (e13)
// $4000 (Left Shift Using Multiply: << 14), (Right Shift Using Multiply: >> 2) (e14)
// $8000 (Left Shift Using Multiply: << 15), (Right Shift Using Multiply: >> 1) (e15)
ANDBit:
dh $0101, $0202, $0404, $0808
dh $1010, $2020, $4040, $8080
// Pseudo shifts (and adds) to put bits in 12,11 and 8,7 (for sfv)
ShiftMux0:
dh 1<<(16+( 7- 8)) // -16
dh 1<< (11- 9) // unused, combined with mux1
dh 1<<(16+( 7-10)) // -16
dh 1<< (11-11) // unused, combined with mux1
dh 1<<(16+( 7-12)) // -16
dh 1<<(16+(11-13)) // -16
dh 1<<(16+( 7-14)) // -16
dh 1<<(16+(11-15)) // -16
ShiftMux1:
dh 1<<( 8- 0)
dh (1<<(12- 1))|(1<<(11-9))
dh 1<<( 8- 2)
dh (1<<(12- 3))|(1<<(11-11))
dh 1<<( 8- 4)
dh 1<<(12- 5)
dh 1<<( 8- 6)
dh 1<<(12- 7)
// These may work for pre-swapped tiles
ShiftMux2:
dh (1<< ( 7- 0)) + (1<<(8-8))
dh (1 << (11- 1)) + (1 << (12 - 9))
dh 1<< ( 7- 2)
dh (1<< (11- 3)) + (1<<(12-11))
dh 1<< ( 7- 4)
dh 1<< (11- 5)
dh 1<< ( 7- 6)
dh 1<< (11- 7)
ShiftMux3:
dh 1<< ( 8- 8) // unused, combined with mux0
dh 1<< (12- 9) // unused, combined with mux0
dh 1<<(16+( 8-10)) // -16
dh 1<< (12-11) // unused, combined with mux0
dh 1<<(16+( 8-12)) // -16
dh 1<<(16+(12-13)) // -16
dh 1<<(16+( 8-14)) // -16
dh 1<<(16+(12-15)) // -16
align(8) // Align 64-Bit
base RSPSHIFTData+pc() // Set End Of RSP Data Object
RSPSHIFTDataEnd:
align(8) // Align 64-Bit
RSPPALCode:
arch n64.rsp
base $0000 // Set Base Of RSP Code Object To Zero
RSPPALStart:
// Load Static Palette Data
RSPDMASPRD(RSPPALData, RSPPALDataEnd, SP_DMEM) // RSP DMA Data Read DRAM->RSP MEM: Start Address, End Address, Destination RSP MEM Address
RSPDMASPWait() // Wait For RSP DMA To Finish
lqv v0[e0],AlphaOR(r0) // V0 = 1 * $0000, 7 * $0001 (OR Alpha 1 Bit) (128-Bit Quad)
lqv v1[e0],ANDByte(r0) // V1 = AND Lo/Hi/Red/Green/Blue Bytes (128-Bit Quad)
ldv v2[e0],PALShift(r0) // V2 = Shift Using Multiply: Red/Green/Blue (64-Bit Double)
// Decode Colors
ori a0,r0,0 // A0 = Palette Start Offset
la a1,N64TLUT // A1 = Aligned DRAM Physical RAM Offset ($00000000..$007FFFFF 8MB)
la a2,SNESPAL // A2 = Aligned DRAM Physical RAM Offset ($00000000..$007FFFFF 8MB)
ori t0,r0,511 // T0 = Length Of DMA Transfer In Bytes - 1
ori t1,r0,30 // T1 = Color Counter
mtc0 a0,c0 // Store Memory Offset To SP Memory Address Register ($A4040000)
mtc0 a2,c1 // Store RAM Offset To SP DRAM Address Register ($A4040004)
mtc0 t0,c2 // Store DMA Length To SP Read Length Register ($A4040008)
RSPDMASPWait() // Wait For RSP DMA To Finish
// Vector Grab 1st 8 Colors:
lqv v3[e0],0(a0) // V3 = Palette Colors 0..7
vand v4,v3,v1[e8] // V4 = Lo Byte Color 0..7 (& $00FF)
vand v5,v3,v1[e9] // V5 = Hi Byte Color 0..7 (& $FF00)
vmudn v4,v2[e8] // V4 = Lo Byte Color 0..7 << 8
vmudl v5,v2[e8] // V5 = Hi Byte Color 0..7 >> 8
vor v4,v5[e0] // V4 = Color 0..7 Big-Endian
vand v5,v4,v1[e10] // V5 = RED 5 Bits, Color 0..7 (& $001F)
vmudn v5,v2[e9] // V5 = RED 5 Bits, Color 0..7 << 11
vand v6,v4,v1[e11] // V6 = GREEN 5 Bits, Color 0..7 (& $03E0)
vmudn v6,v2[e10] // V6 = GREEN 5 Bits, Color 0..7 << 1
vor v5,v6[e0] // V5 = RED,GREEN 10 Bits, Color 0..7
vand v6,v4,v1[e12] // V6 = BLUE 5 Bits, Color 0..7 (& $7C00)
vmudl v6,v2[e11] // V6 = BLUE 5 Bits, Color 0..7 >> 9
vor v5,v6[e0] // V5 = RED,GREEN,BLUE 15 Bits, Color 0..7
vor v5,v0[e0] // V5 = RED,GREEN,BLUE,ALPHA 16 Bits, Color 0..7
// Store Colors 0..7:
sqv v5[e0],0(a0) // Palette Colors 0..8 = V5 Quad
LoopColors:
// Vector Grab Next 8 Colors:
addi a0,16
lqv v3[e0],0(a0) // V3 = Palette Colors 0..7
vand v4,v3,v1[e8] // V4 = Lo Byte Color 0..7 (& $00FF)
vand v5,v3,v1[e9] // V5 = Hi Byte Color 0..7 (& $FF00)
vmudn v4,v2[e8] // V4 = Lo Byte Color 0..7 << 8
vmudl v5,v2[e8] // V5 = Hi Byte Color 0..7 >> 8
vor v4,v5[e0] // V4 = Color 0..7 Big-Endian
vand v5,v4,v1[e10] // V5 = RED 5 Bits, Color 0..7 (& $001F)
vmudn v5,v2[e9] // V5 = RED 5 Bits, Color 0..7 << 11
vand v6,v4,v1[e11] // V6 = GREEN 5 Bits, Color 0..7 (& $03E0)
vmudn v6,v2[e10] // V6 = GREEN 5 Bits, Color 0..7 << 1
vor v5,v6[e0] // V5 = RED,GREEN 10 Bits, Color 0..7
vand v6,v4,v1[e12] // V6 = BLUE 5 Bits, Color 0..7 (& $7C00)
vmudl v6,v2[e11] // V6 = BLUE 5 Bits, Color 0..7 >> 9
vor v5,v6[e0] // V5 = RED,GREEN,BLUE 15 Bits, Color 0..7
vor v5,v0[e9] // V5 = RED,GREEN,BLUE,ALPHA 16 Bits, Color 0..7
// Store Colors 0..7:
sqv v5[e0],0(a0) // Palette Colors 0..8 = V5 Quad
bnez t1,LoopColors // IF (Tile Counter != 0) Loop Colors
subi t1,1 // Decrement Color Counter (Delay Slot)
ori a0,r0,0 // A0 = SP Memory Address Offset DMEM ($A4000000..$A4001FFF 8KB)
ori t0,r0,511 // T0 = Length Of DMA Transfer In Bytes - 1
mtc0 a0,c0 // Store Memory Offset To SP Memory Address Register ($A4040000)
mtc0 a1,c1 // Store RAM Offset To SP DRAM Address Register ($A4040004)
mtc0 t0,c3 // Store DMA Length To SP Write Length Register ($A404000C)
RSPDMASPWait() // Wait For RSP DMA To Finish
break // Set SP Status Halt, Broke & Check For Interrupt
align(8) // Align 64-Bit
base RSPPALCode+pc() // Set End Of RSP Code Object
RSPPALCodeEnd:
align(8) // Align 64-Bit
RSPTILECode:
arch n64.rsp
base $0000 // Set Base Of RSP Code Object To Zero
RSPTILEStart:
// Load Static Shift Data
RSPDMASPRD(RSPSHIFTData, RSPSHIFTDataEnd, SP_DMEM) // RSP DMA Data Read DRAM->RSP MEM: Start Address, End Address, Destination RSP MEM Address
RSPDMASPWait() // Wait For RSP DMA To Finish
lqv v8[e0],ShiftMux0(r0)
lqv v9[e0],ShiftMux1(r0)
lqv v10[e0],ANDBit(r0)
// Decode Tiles
ori t3,r0,7 // T3 = Tile Block Repeat Counter
ori a0,r0,2048 // A0 = SNES Tile Start Offset
la a1,N64TILE // A1 = Aligned DRAM Physical RAM Offset ($00000000..$007FFFFF 8MB)
la a2,SNESTILE // A2 = Aligned DRAM Physical RAM Offset ($00000000..$007FFFFF 8MB)
ori a3,r0,0 // A3 = N64 Tile Start Offset
LoopTileBlocks:
// Uses DMA To Copy 2048 Bytes To DMEM, For 2BPPSNES->4BPPN64
ori t0,r0,2047 // T0 = Length Of DMA Transfer In Bytes - 1
ori t1,r0,127 // T1 = Tile Counter
mtc0 a0,c0 // Store Memory Offset To SP Memory Address Register ($A4040000)
mtc0 a2,c1 // Store RAM Offset To SP DRAM Address Register ($A4040004)
mtc0 t0,c2 // Store DMA Length To SP Read Length Register ($A4040008)
RSPDMASPWait() // Wait For RSP DMA To Finish
//j skip3
//nop
lqv v11[e0], 0(a0) // V3 = Tile BitPlane 0,1 Row 0..7
LoopTiles:
// AND out individual bits
vand v7,v11,v10[e15]
vand v6,v11,v10[e14]
vand v5,v11,v10[e13]
vand v4,v11,v10[e12]
vand v3,v11,v10[e11]
vand v2,v11,v10[e10]
vand v1,v11,v10[e9]
vand v0,v11,v10[e8]
// Prefetch the next tile
lqv v11[e0], 16(a0) // V3 = Tile BitPlane 0,1 Row 0..7
addiu a0, 16
// Column 7,6
vmudl v12,v7,v8[e15]
vmadn v12,v7,v9[e15]
vmadl v12,v6,v8[e14]
vmadn v12,v6,v9[e14]
// Column 5,4
vmudl v13,v5,v8[e13]
vmadn v13,v5,v9[e13]
vmadl v13,v4,v8[e12]
vmadn v13,v4,v9[e12]
// Column 3,2
vmudn v14,v3,v9[e11]
vmadl v14,v2,v8[e10]
vmadn v14,v2,v9[e10]
// Column 1,0
vmudn v15,v1,v9[e9]
vmadl v15,v0,v8[e8]
vmadn v15,v0,v9[e8]
if 1 == 1 {
// TODO: would it be better to interleave these stores with the multiplies above?
// Store Columns 7,6
sfv v12[e0],0(a3)
sfv v12[e8],16(a3)
addi a3, 1
// Store Columns 5,4
sfv v13[e0],0(a3)
sfv v13[e8],16(a3)
addi a3, 1
// Store Columns 3,2
sfv v14[e0],0(a3)
sfv v14[e8],16(a3)
addi a3, 1
// Store Columns 1,0
sfv v15[e0],0(a3)
sfv v15[e8],16(a3)
addi a3, 1+(32-4)
} else {
// This is a workaround for cen64 not having sfv support.
mfc2 t0,v12[e0]
srl t2, t0, 7
sb t2, 0(a3)
addiu a3, 1
mfc2 t0,v13[e0]
srl t2, t0, 7
sb t2, 0(a3)
addiu a3, 1
mfc2 t0,v14[e0]
srl t2, t0, 7
sb t2, 0(a3)
addiu a3, 1
mfc2 t0,v15[e0]
srl t2, t0, 7
sb t2, 0(a3)
addiu a3, 1
mfc2 t0,v12[e2]
srl t2, t0, 7
sb t2, 0(a3)
addiu a3, 1
mfc2 t0,v13[e2]
srl t2, t0, 7
sb t2, 0(a3)
addiu a3, 1
mfc2 t0,v14[e2]
srl t2, t0, 7
sb t2, 0(a3)
addiu a3, 1
mfc2 t0,v15[e2]
srl t2, t0, 7
sb t2, 0(a3)
addiu a3, 1
mfc2 t0,v12[e4]
srl t2, t0, 7
sb t2, 0(a3)
addiu a3, 1
mfc2 t0,v13[e4]
srl t2, t0, 7
sb t2, 0(a3)
addiu a3, 1
mfc2 t0,v14[e4]
srl t2, t0, 7
sb t2, 0(a3)
addiu a3, 1
mfc2 t0,v15[e4]
srl t2, t0, 7
sb t2, 0(a3)
addiu a3, 1
mfc2 t0,v12[e6]
srl t2, t0, 7
sb t2, 0(a3)
addiu a3, 1
mfc2 t0,v13[e6]
srl t2, t0, 7
sb t2, 0(a3)
addiu a3, 1
mfc2 t0,v14[e6]
srl t2, t0, 7
sb t2, 0(a3)
addiu a3, 1
mfc2 t0,v15[e6]
srl t2, t0, 7
sb t2, 0(a3)
addiu a3, 1
mfc2 t0,v12[e8]
srl t2, t0, 7
sb t2, 0(a3)
addiu a3, 1
mfc2 t0,v13[e8]
srl t2, t0, 7
sb t2, 0(a3)
addiu a3, 1
mfc2 t0,v14[e8]
srl t2, t0, 7
sb t2, 0(a3)
addiu a3, 1
mfc2 t0,v15[e8]
srl t2, t0, 7
sb t2, 0(a3)
addiu a3, 1
mfc2 t0,v12[e10]
srl t2, t0, 7
sb t2, 0(a3)
addiu a3, 1
mfc2 t0,v13[e10]
srl t2, t0, 7
sb t2, 0(a3)
addiu a3, 1
mfc2 t0,v14[e10]
srl t2, t0, 7
sb t2, 0(a3)
addiu a3, 1
mfc2 t0,v15[e10]
srl t2, t0, 7
sb t2, 0(a3)
addiu a3, 1
mfc2 t0,v12[e12]
srl t2, t0, 7
sb t2, 0(a3)
addiu a3, 1
mfc2 t0,v13[e12]
srl t2, t0, 7
sb t2, 0(a3)
addiu a3, 1
mfc2 t0,v14[e12]
srl t2, t0, 7
sb t2, 0(a3)
addiu a3, 1
mfc2 t0,v15[e12]
srl t2, t0, 7
sb t2, 0(a3)
addiu a3, 1
mfc2 t0,v12[e14]
srl t2, t0, 7
sb t2, 0(a3)
addiu a3, 1
mfc2 t0,v13[e14]
srl t2, t0, 7
sb t2, 0(a3)
addiu a3, 1
mfc2 t0,v14[e14]
srl t2, t0, 7
sb t2, 0(a3)
addiu a3, 1
mfc2 t0,v15[e14]
srl t2, t0, 7
sb t2, 0(a3)
addiu a3, 1
}
bnez t1,LoopTiles // IF (Tile Counter != 0) Loop Tiles
subi t1,1 // Decrement Tile Counter (Delay Slot)
skip3:
// DMA out tiles
ori a0,r0,0 // A0 = SP Memory Address Offset DMEM ($A4000000..$A4001FFF 8KB)
// Uses DMA & To Copy 128 Tiles (4096 Bytes) To RDRAM
li t0,4095
mtc0 a0,c0 // Store Memory Offset To SP Memory Address Register ($A4040000)
mtc0 a1,c1 // Store RAM Offset To SP DRAM Address Register ($A4040004)
mtc0 t0,c3 // Store DMA Length To SP Write Length Register ($A404000C)
RSPDMASPWait() // Wait For RSP DMA To Finish
ori a0,r0,2048 // A0 = SNES Tile Start Offset
addiu a1,4096 // A1 = Next N64 Tile Offset
addiu a2,2048 // A2 = Next SNES Tile Offset
ori a3,r0,0 // A3 = Tile Start Offset
bnez t3,LoopTileBlocks // IF (Tile Block Repeat Counter != 0) Loop Tile Blocks
subi t3,1 // Decrement Tile Block Repeat Counter (Delay Slot)
break // Set SP Status Halt, Broke & Check For Interrupt
align(8) // Align 64-Bit
base RSPTILECode+pc() // Set End Of RSP Code Object
RSPTILECodeEnd:
align(8) // Align 64-Bit
RDPBuffer:
arch n64.rdp
Set_Scissor 8<<2,8<<2, 0,0, 264<<2,232<<2 // Set Scissor: XH 8.0,YH 8.0, Scissor Field Enable Off,Field Off, XL 264.0,YL 232.0
Set_Other_Modes CYCLE_TYPE_FILL // Set Other Modes
Set_Color_Image IMAGE_DATA_FORMAT_RGBA,SIZE_OF_PIXEL_16B,272-1, $00100000 // Set Color Image: FORMAT RGBA,SIZE 16B,WIDTH 272, DRAM ADDRESS $00100000
RDPSNESCLEARCOL:
Set_Fill_Color $00010001 // Set Fill Color: PACKED COLOR 16B R5G5B5A1 Pixels
Fill_Rectangle 271<<2,239<<2, 0<<2,0<<2 // Fill Rectangle: XL 271.0,YL 239.0, XH 0.0,YH 0.0
Set_Other_Modes EN_TLUT|SAMPLE_TYPE|BI_LERP_0|ALPHA_DITHER_SEL_NO_DITHER|B_M2A_0_1|FORCE_BLEND|IMAGE_READ_EN // Set Other Modes
Set_Combine_Mode $0,$00, 0,0, $1,$01, $0,$F, 1,0, 0,0,0, 7,7,7 // Set Combine Mode: SubA RGB0,MulRGB0, SubA Alpha0,MulAlpha0, SubA RGB1,MulRGB1, SubB RGB0,SubB RGB1, SubA Alpha1,MulAlpha1, AddRGB0,SubB Alpha0,AddAlpha0, AddRGB1,SubB Alpha1,AddAlpha1
Set_Texture_Image IMAGE_DATA_FORMAT_RGBA,SIZE_OF_PIXEL_16B,1-1, N64TLUT // Set Texture Image: FORMAT RGBA,SIZE 16B,WIDTH 1, N64TLUT DRAM ADDRESS
Set_Tile 0,0,0, $100, 0,0, 0,0,0,0, 0,0,0,0 // Set Tile: TMEM Address $100, Tile 0
Load_Tlut 0<<2,0<<2, 0, 255<<2,0<<2 // Load Tlut: SL 0.0,TL 0.0, Tile 0, SH 255.0,TH 0.0
Sync_Tile // Sync Tile
// BG Column 0..31 / Row 0..27
Set_Tile IMAGE_DATA_FORMAT_COLOR_INDX,SIZE_OF_PIXEL_4B,1, $000, 0,0, 0,0,0,0, 0,0,0,0 // Set Tile: FORMAT COLOR INDEX,SIZE 4B,Tile Line Size 1 (64bit Words), TMEM Address $000, Tile 0
RDPSNESTILE:
define y(0)
while {y} < 28 {
define x(0)
while {x} < 32 {
Sync_Tile // Sync Tile
Set_Texture_Image IMAGE_DATA_FORMAT_COLOR_INDX,SIZE_OF_PIXEL_8B,4-1, N64TILE+(32*(({y}*32)+{x})) // Set Texture Image: FORMAT COLOR INDEX,SIZE 8B,WIDTH 4, Tile DRAM ADDRESS
Load_Tile 0<<2,0<<2, 0, 7<<2,7<<2 // Load Tile: SL,TL, Tile, SH,TH
Texture_Rectangle (16+({x}*8))<<2,(16+({y}*8))<<2, 0, (8+({x}*8))<<2,(8+({y}*8))<<2, 0<<5,0<<5, 1<<10,1<<10 // Texture Rectangle: XL,YL, Tile, XH,YH, S,T, DSDX,DTDY
evaluate x({x} + 1)
}
evaluate y({y} + 1)
}
Sync_Full // Ensure�Entire�Scene�Is�Fully�Drawn
RDPBufferEnd:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment