-
-
Save StrikerX3/739b1dd54aebf77d5021dec8939dced3 to your computer and use it in GitHub Desktop.
NDS ARM9 CPU cache research
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Adapted from https://github.com/blocksds/sdk/blob/master/tests/data_cache_ops/source/main.c | |
// Credits to AntonioND | |
// | |
// SPDX-License-Identifier: CC0-1.0 | |
// | |
// SPDX-FileContributor: Antonio Niño Díaz, 2023 | |
#include <stdio.h> | |
#include <string.h> | |
#include <fatfs.h> | |
#include <nds.h> | |
// Test for flush: | |
// | |
// - Test buffer to 0. | |
// - Read test buffer to load it into cache. | |
// - Copy from source buffer to test buffer. Writes will go to the cache because | |
// it has been loaded in the previous step. If not, it would have been written | |
// without loading it. | |
// - Execute DC flush. Regions that are flushed will be updated in RAM, regions | |
// that aren't flushed won't be updated in RAM. | |
// - Copy test buffer to destination buffer with DMA. DMA can't see the cache, | |
// so it copies the actual contents in RAM. | |
// | |
// Test for invalidate: | |
// | |
// - Test buffer to 0. | |
// - Read test buffer to load it into cache. | |
// - Copy from source buffer to test buffer. Writes will go to the cache because | |
// it has been loaded in the previous step. If not, it would have been written | |
// without loading it. | |
// - Execute DC invalidate. Regions that are invalidated will never be updated | |
// in RAM (unless they have been updated in RAM before the invalidate). | |
// - Copy test buffer to destination buffer without DMA. We want to see the | |
// parts of the buffer that are still in the cache and haven't been | |
// invalidated. | |
// | |
// The results screen should look like this: | |
// | |
// No operation: | |
// Ones: 256 0 0 | Note that the results here may vary a bit | |
// Twoes: 256 0 0 | because the cache is still active, we are | |
// | just not affecting it directly. | |
// Flush range: | |
// Ones: 128 128 0 | |
// Twoes: 128 0 128 | |
// | |
// Invalidate range: | |
// Ones: 128 128 0 | |
// Twoes: 128 0 128 | |
// | |
// Flush all: | |
// Ones: 0 256 0 | |
// Twoes: 0 0 256 | |
// | |
// Invalidate all: | |
// Ones: 256 0 0 | |
// Twoes: 256 0 0 | |
#define CACHE_LINE_SIZE 32 | |
#define CACHE_ASSOCIATIVITY 4 | |
#define DATA_CACHE_SIZE 4096 | |
#define DATA_CACHE_LINE_COUNT (DATA_CACHE_SIZE/CACHE_LINE_SIZE) | |
#define DATA_CACHE_SET_COUNT (DATA_CACHE_LINE_COUNT/CACHE_ASSOCIATIVITY) | |
#define DATA_CACHE_SET_STRIDE (CACHE_LINE_SIZE*CACHE_ASSOCIATIVITY) | |
#define PAGE_SIZE 4096 | |
#define BUFFER_SIZE (CACHE_LINE_SIZE * 8) | |
#define BIG_BUFFER_SIZE (PAGE_SIZE * 8) | |
ALIGN(CACHE_LINE_SIZE) volatile uint8_t buffer_source_1[BUFFER_SIZE]; | |
ALIGN(CACHE_LINE_SIZE) volatile uint8_t buffer_source_2[BUFFER_SIZE]; | |
ALIGN(CACHE_LINE_SIZE) volatile uint8_t buffer_test[BUFFER_SIZE]; | |
ALIGN(CACHE_LINE_SIZE) volatile uint8_t buffer_destination[BUFFER_SIZE]; | |
ALIGN(PAGE_SIZE) volatile uint8_t buffer_big_test[BIG_BUFFER_SIZE]; | |
ALIGN(PAGE_SIZE) volatile uint8_t buffer_big_destination[BIG_BUFFER_SIZE]; | |
volatile uint8_t helper; | |
void zero(void) | |
{ | |
memset((void *)buffer_test, 0, sizeof(buffer_test)); | |
memset((void *)buffer_destination, 0, sizeof(buffer_destination)); | |
DC_FlushAll(); | |
} | |
void copy_1(void) | |
{ | |
for (int i = 0; i < sizeof(buffer_test); i += CACHE_LINE_SIZE) | |
helper = buffer_test[i]; | |
memcpy((void *)buffer_test, (void *)buffer_source_1, sizeof(buffer_test)); | |
} | |
void copy_2(void) | |
{ | |
for (int i = 0; i < sizeof(buffer_test); i += CACHE_LINE_SIZE) | |
helper = buffer_test[i]; | |
memcpy((void *)buffer_test, (void *)buffer_source_2, sizeof(buffer_test)); | |
} | |
void copy_dma(void) | |
{ | |
dmaCopy((void *)buffer_test, (void *)buffer_destination, sizeof(buffer_test)); | |
} | |
void copy_no_dma(void) | |
{ | |
memcpy((void *)buffer_destination, (void *)buffer_test, sizeof(buffer_test)); | |
} | |
void count(const char *title) | |
{ | |
int zeroes = 0; | |
int ones = 0; | |
int twoes = 0; | |
for (int i = 0; i < sizeof(buffer_destination); i++) | |
{ | |
uint8_t v = buffer_destination[i]; | |
if (v == 0) | |
zeroes++; | |
else if (v == 0x11) | |
ones++; | |
else if (v == 0x22) | |
twoes++; | |
} | |
printf("%s %4d %4d %4d\n", title, zeroes, ones, twoes); | |
} | |
void flush_half(void) | |
{ | |
DC_FlushRange((void *)buffer_test, sizeof(buffer_test) / 2); | |
} | |
void invalidate_half(void) | |
{ | |
DC_InvalidateRange((void *)buffer_test, sizeof(buffer_test) / 2); | |
} | |
void test_flush_invalidate(void) | |
{ | |
consoleDemoInit(); | |
memset((void *)buffer_source_1, 0x11, sizeof(buffer_source_1)); | |
memset((void *)buffer_source_2, 0x22, sizeof(buffer_source_2)); | |
uint32_t ticks; | |
printf("No operation:\n"); | |
DC_FlushAll(); | |
cpuStartTiming(0); | |
{ | |
zero(); | |
copy_1(); | |
copy_dma(); | |
count(" Ones: "); | |
zero(); | |
copy_2(); | |
copy_dma(); | |
count(" Twoes: "); | |
} | |
ticks = cpuEndTiming(); | |
printf(" t=%lu", ticks); | |
printf("Flush range:\n"); | |
DC_FlushAll(); | |
cpuStartTiming(0); | |
{ | |
zero(); | |
copy_1(); | |
flush_half(); | |
copy_dma(); | |
count(" Ones: "); | |
zero(); | |
copy_2(); | |
flush_half(); | |
copy_dma(); | |
count(" Twoes: "); | |
} | |
ticks = cpuEndTiming(); | |
printf(" t=%lu", ticks); | |
printf("Invalidate range:\n"); | |
DC_FlushAll(); | |
cpuStartTiming(0); | |
{ | |
zero(); | |
copy_1(); | |
invalidate_half(); | |
copy_no_dma(); | |
count(" Ones: "); | |
zero(); | |
copy_2(); | |
invalidate_half(); | |
copy_no_dma(); | |
count(" Twoes: "); | |
} | |
ticks = cpuEndTiming(); | |
printf(" t=%lu", ticks); | |
printf("Flush all:\n"); | |
DC_FlushAll(); | |
cpuStartTiming(0); | |
{ | |
zero(); | |
copy_1(); | |
DC_FlushAll(); | |
copy_dma(); | |
count(" Ones: "); | |
zero(); | |
copy_2(); | |
DC_FlushAll(); | |
copy_dma(); | |
count(" Twoes: "); | |
} | |
ticks = cpuEndTiming(); | |
printf(" t=%lu", ticks); | |
printf("Invalidate all:\n"); | |
DC_FlushAll(); | |
cpuStartTiming(0); | |
{ | |
zero(); | |
copy_1(); | |
DC_InvalidateAll(); | |
copy_no_dma(); | |
count(" Ones: "); | |
zero(); | |
copy_2(); | |
DC_InvalidateAll(); | |
copy_no_dma(); | |
count(" Twoes: "); | |
} | |
ticks = cpuEndTiming(); | |
printf(" t=%lu", ticks); | |
printf("\n"); | |
printf("Press START to exit\n"); | |
fflush(stdout); | |
while (1) | |
{ | |
swiWaitForVBlank(); | |
scanKeys(); | |
if (keysHeld() & KEY_START) | |
break; | |
} | |
} | |
// ------------------------------------------------------------------ | |
void set_debug_reg(uint32_t value) | |
{ | |
asm volatile("mcr p15, 3, %0, c15, c0, 0" : : "r"(value)); | |
} | |
uint32_t read_code_tag_ram(uint32_t index, uint32_t segment) | |
{ | |
uint32_t reg = ((index & 63) << 5) | ((segment & 3) << 30); | |
set_debug_reg(reg); | |
uint32_t value; | |
asm volatile("mrc p15, 3, %0, c15, c1, 0" : "=r"(value)); | |
return value; | |
} | |
uint32_t read_data_tag_ram(uint32_t index, uint32_t segment) | |
{ | |
uint32_t reg = ((index & 31) << 5) | ((segment & 3) << 30); | |
set_debug_reg(reg); | |
uint32_t value; | |
asm volatile("mrc p15, 3, %0, c15, c2, 0" : "=r"(value)); | |
return value; | |
} | |
uint32_t read_code_cache_ram(uint32_t index, uint32_t segment, uint32_t word) | |
{ | |
uint32_t reg = ((index & 63) << 5) | ((segment & 3) << 30) | ((word & 7) << 2); | |
set_debug_reg(reg); | |
uint32_t value; | |
asm volatile("mrc p15, 3, %0, c15, c3, 0" : "=r"(value)); | |
return value; | |
} | |
uint32_t read_data_cache_ram(uint32_t index, uint32_t segment, uint32_t word) | |
{ | |
uint32_t reg = ((index & 31) << 5) | ((segment & 3) << 30) | ((word & 7) << 2); | |
set_debug_reg(reg); | |
uint32_t value; | |
asm volatile("mrc p15, 3, %0, c15, c4, 0" : "=r"(value)); | |
return value; | |
} | |
void write_code_tag_ram(uint32_t index, uint32_t segment, uint32_t value) | |
{ | |
uint32_t reg = ((index & 63) << 5) | ((segment & 3) << 30); | |
set_debug_reg(reg); | |
asm volatile("mcr p15, 3, %0, c15, c1, 0" : : "r"(value)); | |
} | |
void write_data_tag_ram(uint32_t index, uint32_t segment, uint32_t value) | |
{ | |
uint32_t reg = ((index & 31) << 5) | ((segment & 3) << 30); | |
set_debug_reg(reg); | |
asm volatile("mcr p15, 3, %0, c15, c2, 0" : : "r"(value)); | |
} | |
void write_code_cache_ram(uint32_t index, uint32_t segment, uint32_t word, uint32_t value) | |
{ | |
uint32_t reg = ((index & 63) << 5) | ((segment & 3) << 30) | ((word & 7) << 2); | |
set_debug_reg(reg); | |
asm volatile("mcr p15, 3, %0, c15, c3, 0" : : "r"(value)); | |
} | |
void write_data_cache_ram(uint32_t index, uint32_t segment, uint32_t word, uint32_t value) | |
{ | |
uint32_t reg = ((index & 31) << 5) | ((segment & 3) << 30) | ((word & 7) << 2); | |
set_debug_reg(reg); | |
asm volatile("mcr p15, 3, %0, c15, c4, 0" : : "r"(value)); | |
} | |
// ------------------------------------------------------------------ | |
void print_raw_code_tag_ram(uint32_t offset) | |
{ | |
printf("\x1b[0;0H"); | |
for (uint32_t index = 0; index < 16; index++) | |
{ | |
for (uint32_t segment = 0; segment < 4; segment++) | |
{ | |
uint32_t tag = read_code_tag_ram(index + offset, segment); | |
printf("\x1b[37;%lum%08lX", segment & 1, tag); | |
} | |
} | |
} | |
void print_raw_data_tag_ram(uint32_t offset) | |
{ | |
printf("\x1b[0;0H"); | |
for (uint32_t index = 0; index < 16; index++) | |
{ | |
for (uint32_t segment = 0; segment < 4; segment++) | |
{ | |
uint32_t tag = read_data_tag_ram(index + offset, segment); | |
printf("\x1b[37;%lum%08lX", segment & 1, tag); | |
} | |
} | |
} | |
void print_raw_code_cache_ram(uint32_t offset) | |
{ | |
printf("\x1b[0;0H"); | |
for (uint32_t index = 0; index < 16; index++) | |
{ | |
for (uint32_t segment = 0; segment < 4; segment++) | |
{ | |
for (uint32_t word = 0; word < 7; word++) | |
{ | |
uint32_t value = read_code_cache_ram(index + offset, segment, word); | |
printf("\x1b[37;%lum%08lX", segment & 1, value); | |
} | |
} | |
} | |
} | |
void print_raw_data_cache_ram(uint32_t offset) | |
{ | |
printf("\x1b[0;0H"); | |
for (uint32_t index = 0; index < 16; index++) | |
{ | |
for (uint32_t segment = 0; segment < 4; segment++) | |
{ | |
for (uint32_t word = 0; word < 7; word++) | |
{ | |
uint32_t value = read_data_cache_ram(index + offset, segment, word); | |
printf("\x1b[37;%lum%08lX", segment & 1, value); | |
} | |
} | |
} | |
} | |
void test_raw_debug_regs() | |
{ | |
consoleDemoInit(); | |
bool code = false; | |
bool tag = false; | |
uint32_t offset = 0; | |
while (1) | |
{ | |
swiWaitForVBlank(); | |
if (code) | |
{ | |
if (offset > 32) | |
offset = 32; | |
if (tag) | |
print_raw_code_tag_ram(offset); | |
else | |
print_raw_code_cache_ram(offset); | |
} | |
else | |
{ | |
if (offset > 16) | |
offset = 16; | |
if (tag) | |
print_raw_data_tag_ram(offset); | |
else | |
print_raw_data_cache_ram(offset); | |
} | |
printf("\n"); | |
printf("[Up/Down] Range: %lu..%lu \n", offset, offset + 15); | |
printf("[X] View %s cache\n", code ? "<Code>/Data" : "Code/<Data>"); | |
printf("[Y] View %s RAM\n", tag ? "<TAG>/Cache" : "TAG/<Cache>"); | |
printf("[START] Exit"); | |
scanKeys(); | |
if (keysHeld() & KEY_UP) | |
if (offset > 0) | |
--offset; | |
if (keysHeld() & KEY_DOWN) | |
++offset; | |
if (keysDown() & KEY_X) | |
code = !code; | |
if (keysDown() & KEY_Y) | |
tag = !tag; | |
if (keysHeld() & KEY_START) | |
break; | |
} | |
} | |
// ------------------------------------------------------------------ | |
void print_code_cache_debug(uint32_t offset) | |
{ | |
printf("\x1b[0;0H\x1b[37;3m"); | |
printf("St I W V DD TAG Data\n"); | |
for (uint32_t row = 0; row < 16; row++) | |
{ | |
uint32_t index = (row + offset) >> 4; | |
uint32_t segment = ((row + offset) >> 2) & 3; | |
uint32_t word = (row + offset) & 3; | |
uint32_t tag = read_code_tag_ram(index, segment); | |
uint32_t data = read_code_cache_ram(index, segment, word); | |
bool valid = (tag >> 4) & 1; | |
bool dirty1 = (tag >> 2) & 1; | |
bool dirty2 = (tag >> 3) & 1; | |
uint32_t tagAddr = tag & 0xFFFFF800; | |
printf("\x1b[37;%lum", segment & 1); | |
printf("%02lu %lu %lu %c %c%c %08lX = %08lX\n", | |
index, | |
segment, | |
word, | |
valid ? 'V' : '.', | |
dirty1 ? 'D' : '.', | |
dirty2 ? 'D' : '.', | |
tagAddr, | |
data | |
); | |
} | |
printf("\x1b[37;3m"); | |
} | |
void print_data_cache_debug(uint32_t offset) | |
{ | |
printf("\x1b[0;0H\x1b[37;3m"); | |
printf("St I W V DD TAG Data\n"); | |
for (uint32_t row = 0; row < 16; row++) | |
{ | |
uint32_t index = (row + offset) >> 4; | |
uint32_t segment = ((row + offset) >> 2) & 3; | |
uint32_t word = (row + offset) & 3; | |
uint32_t tag = read_data_tag_ram(index, segment); | |
uint32_t data = read_data_cache_ram(index, segment, word); | |
bool valid = (tag >> 4) & 1; | |
bool dirty1 = (tag >> 2) & 1; | |
bool dirty2 = (tag >> 3) & 1; | |
uint32_t tagAddr = tag & 0xFFFFFC00; | |
printf("\x1b[37;%lum", segment & 1); | |
printf("%02lu %lu %lu %c %c%c %08lX = %08lX\n", | |
index, | |
segment, | |
word, | |
valid ? 'V' : '.', | |
dirty1 ? 'D' : '.', | |
dirty2 ? 'D' : '.', | |
tagAddr, | |
data | |
); | |
} | |
printf("\x1b[37;3m"); | |
} | |
void test_debug_regs() | |
{ | |
consoleDemoInit(); | |
bool code = false; | |
uint32_t offset = 0; | |
keysSetRepeat(0, 0); | |
while (1) | |
{ | |
swiWaitForVBlank(); | |
if (code) | |
{ | |
if (offset > 64*4*4-16) | |
offset = 64*4*4-16; | |
print_code_cache_debug(offset); | |
} | |
else | |
{ | |
if (offset > 32*4*4-16) | |
offset = 32*4*4-16; | |
print_data_cache_debug(offset); | |
} | |
printf("\n"); | |
printf("[Up/Down] Up/down by 1\n"); | |
printf("[Left/Right] Up/down by 4\n"); | |
printf("[L/R] Up/down by 16\n"); | |
printf("[X] View %s cache\n", code ? "<Code>/Data" : "Code/<Data>"); | |
//printf("[A/B] Test code/data TAG write\n"); | |
printf("[START] Exit"); | |
scanKeys(); | |
if (keysHeld() & KEY_UP) | |
if (offset > 0) | |
--offset; | |
if (keysHeld() & KEY_DOWN) | |
++offset; | |
if (keysHeld() & KEY_LEFT) | |
{ | |
if (offset >= 4) | |
offset -= 4; | |
else | |
offset = 0; | |
} | |
if (keysHeld() & KEY_RIGHT) | |
offset += 4; | |
if (keysHeld() & KEY_L) | |
{ | |
if (offset >= 16) | |
offset -= 16; | |
else | |
offset = 0; | |
} | |
if (keysHeld() & KEY_R) | |
offset += 16; | |
if (keysDown() & KEY_X) | |
code = !code; | |
/*if (keysDown() & KEY_A) | |
{ | |
uint32_t tag = read_code_tag_ram(2, 0); | |
tag |= 0b11 << 2; // set dirty bits | |
tag &= ~(1 << 4); // clear valid bit | |
tag &= ~(3 | (63 << 5)); // clear set and index | |
tag |= 3 | (1 << 5); // modify set and index for the test | |
write_code_tag_ram(2, 0, tag); // will it use set/index 2,0 or 1,3? | |
} | |
if (keysDown() & KEY_B) | |
{ | |
uint32_t tag = read_data_tag_ram(2, 0); | |
tag |= 0b11 << 2; // set dirty bits | |
tag &= ~(1 << 4); // clear valid bit | |
tag &= ~(3 | (31 << 5)); // clear set and index | |
tag |= 3 | (1 << 5); // modify set and index for the test | |
write_data_tag_ram(2, 0, tag); // will it use set/index 2,0 or 1,3? | |
}*/ | |
if (keysHeld() & KEY_START) | |
break; | |
} | |
} | |
// ------------------------------------------------------------------ | |
void test_pu_cache_bits() | |
{ | |
consoleDemoInit(); | |
uint32_t ctl; | |
asm volatile("mrc p15, 0, %0, c1, c0, 0" : "=r"(ctl)); | |
printf("CP15 ctl = %08lX\n", ctl); // 0005307D | |
// disable PU, both caches and write buffer | |
//ctl &= ~((1 << 0) | (1 << 2) | (1 << 3) | (1 << 12)); | |
ctl = 0x52070; | |
asm volatile("mcr p15, 0, %0, c1, c0, 0" : : "r"(ctl)); | |
printf("\x1b[1;0HDisabled PU, caches, WB\n"); | |
asm volatile("mrc p15, 0, %0, c1, c0, 0" : "=r"(ctl)); | |
printf("\x1b[2;0HCP15 ctl = %08lX\n", ctl); | |
// try enabling both caches without PU | |
//ctl |= (1 << 2) | (1 << 12); | |
ctl = 0x53074; | |
asm volatile("mcr p15, 0, %0, c1, c0, 0" : : "r"(ctl)); | |
printf("\x1b[3;0HEnabled caches\n"); | |
asm volatile("mrc p15, 0, %0, c1, c0, 0" : "=r"(ctl)); | |
printf("\x1b[4;0HCP15 ctl = %08lX\n", ctl); | |
// try enabling write buffer without PU | |
//ctl |= (1 << 3); | |
ctl = 0x5307C; | |
asm volatile("mcr p15, 0, %0, c1, c0, 0" : : "r"(ctl)); | |
printf("\x1b[5;0HEnabled write buffer\n"); | |
asm volatile("mrc p15, 0, %0, c1, c0, 0" : "=r"(ctl)); | |
printf("\x1b[6;0HCP15 ctl = %08lX\n", ctl); | |
// enable PU | |
//ctl |= (1 << 0); | |
ctl = 0x5307D; | |
asm volatile("mcr p15, 0, %0, c1, c0, 0" : : "r"(ctl)); | |
printf("\x1b[7;0HEnabled PU\n"); | |
asm volatile("mrc p15, 0, %0, c1, c0, 0" : "=r"(ctl)); | |
printf("\x1b[8;0HCP15 ctl = %08lX\n", ctl); | |
printf("\x1b[10;0HPress START to exit\n"); | |
fflush(stdout); | |
while (1) | |
{ | |
swiWaitForVBlank(); | |
scanKeys(); | |
if (keysHeld() & KEY_START) | |
break; | |
} | |
} | |
// ------------------------------------------------------------------ | |
void test_dirty_cache_read() | |
{ | |
consoleDemoInit(); | |
uint32_t cachability; | |
asm volatile("mrc p15, 0, %0, c2, c0, 0" : "=r"(cachability)); | |
printf("\x1b[0;0HCachability = %08lX\n", cachability); | |
cachability = 0; | |
printf("\x1b[1;0HBefore init: %02X %02X\n", helper, buffer_test[0]); | |
helper = 0xAA; | |
buffer_test[0] = 0xBB; | |
DC_FlushAll(); | |
printf("\x1b[2;0HAfter flush: %02X %02X\n", helper, buffer_test[0]); | |
buffer_test[0] = helper; | |
helper = 0xCC; | |
printf("\x1b[3;0HAfter writes: %02X %02X\n", helper, buffer_test[0]); | |
asm volatile("mcr p15, 0, %0, c2, c0, 0" : : "r"(cachability)); | |
printf("\x1b[4;0HCachable off: %02X %02X\n", helper, buffer_test[0]); | |
DC_FlushAll(); | |
printf("\x1b[5;0HAfter flush: %02X %02X\n", helper, buffer_test[0]); | |
printf("\x1b[7;0HPress START to exit\n"); | |
fflush(stdout); | |
while (1) | |
{ | |
swiWaitForVBlank(); | |
scanKeys(); | |
if (keysHeld() & KEY_START) | |
break; | |
} | |
} | |
// ------------------------------------------------------------------ | |
void print_pu_regions() | |
{ | |
uint32_t pu[8]; | |
uint32_t dataCachability; | |
uint32_t codeCachability; | |
uint32_t bufferability; | |
asm volatile("mrc p15, 0, %0, c6, c0, 0" : "=r"(pu[0])); | |
asm volatile("mrc p15, 0, %0, c6, c1, 0" : "=r"(pu[1])); | |
asm volatile("mrc p15, 0, %0, c6, c2, 0" : "=r"(pu[2])); | |
asm volatile("mrc p15, 0, %0, c6, c3, 0" : "=r"(pu[3])); | |
asm volatile("mrc p15, 0, %0, c6, c4, 0" : "=r"(pu[4])); | |
asm volatile("mrc p15, 0, %0, c6, c5, 0" : "=r"(pu[5])); | |
asm volatile("mrc p15, 0, %0, c6, c6, 0" : "=r"(pu[6])); | |
asm volatile("mrc p15, 0, %0, c6, c7, 0" : "=r"(pu[7])); | |
asm volatile("mrc p15, 0, %0, c2, c0, 0" : "=r"(dataCachability)); | |
asm volatile("mrc p15, 0, %0, c2, c0, 1" : "=r"(codeCachability)); | |
asm volatile("mrc p15, 0, %0, c3, c0, 0" : "=r"(bufferability)); | |
auto puBaseAddr = [](uint32_t pu) { | |
return pu & 0xFFFFF000; | |
}; | |
auto puSize = [](uint32_t pu) { | |
return 2 << ((pu >> 1) & 0x1F); | |
}; | |
for (uint32_t i = 0; i < 8; i++) { | |
bool enable = pu[i] & 1; | |
if (enable) { | |
uint32_t baseAddr = puBaseAddr(pu[i]); | |
uint32_t size = puSize(pu[i]); | |
bool codeCachable = (codeCachability >> i) & 1; | |
bool dataCachable = (dataCachability >> i) & 1; | |
bool bufferable = (bufferability >> i) & 1; | |
printf("%lu: %08lX..%08lX%s%s%s\n", i, baseAddr, baseAddr + size - 1, | |
codeCachable ? " cc" : "", | |
dataCachable ? " dc" : "", | |
bufferable ? " wb" : "" | |
); | |
} else { | |
printf("%lu: (disabled)\n", i); | |
} | |
} | |
} | |
void shift_pu_regions() | |
{ | |
uint32_t pu[8]; | |
uint32_t dataCachability; | |
uint32_t codeCachability; | |
uint32_t bufferability; | |
asm volatile("mrc p15, 0, %0, c6, c3, 0" : "=r"(pu[3])); | |
asm volatile("mrc p15, 0, %0, c6, c4, 0" : "=r"(pu[4])); | |
asm volatile("mrc p15, 0, %0, c6, c5, 0" : "=r"(pu[5])); | |
asm volatile("mrc p15, 0, %0, c6, c6, 0" : "=r"(pu[6])); | |
asm volatile("mrc p15, 0, %0, c6, c7, 0" : "=r"(pu[7])); | |
asm volatile("mrc p15, 0, %0, c2, c0, 0" : "=r"(dataCachability)); | |
asm volatile("mrc p15, 0, %0, c2, c0, 1" : "=r"(codeCachability)); | |
asm volatile("mrc p15, 0, %0, c3, c0, 0" : "=r"(bufferability)); | |
// Move PU regions 4..7 down to 3..6 to make room for a high-priority PU region we can manipulate at will. | |
// On BlocksDS, region 3 is reserved for the GBA cart on regular DS or the switchable IWRAM on DSi, | |
// neither of which we're using here. | |
// Also make everything non-data-cachable and nonbufferable and the new region 7 code-cachable. | |
auto adjust = [](uint32_t bits) { | |
uint32_t fixed = bits & 0b111; | |
uint32_t shifted = (bits & 0b11110000) >> 1; | |
return fixed | shifted; | |
}; | |
dataCachability = 0; | |
codeCachability = adjust(codeCachability); | |
bufferability = 0; | |
codeCachability |= (1 << 7); | |
DC_FlushAll(); | |
asm volatile("mcr p15, 0, %0, c2, c0, 0" : : "r"(dataCachability)); | |
asm volatile("mcr p15, 0, %0, c2, c0, 1" : : "r"(codeCachability)); | |
asm volatile("mcr p15, 0, %0, c3, c0, 0" : : "r"(bufferability)); | |
asm volatile("mcr p15, 0, %0, c6, c3, 0" : : "r"(pu[4])); | |
asm volatile("mcr p15, 0, %0, c6, c4, 0" : : "r"(pu[5])); | |
asm volatile("mcr p15, 0, %0, c6, c5, 0" : : "r"(pu[6])); | |
asm volatile("mcr p15, 0, %0, c6, c6, 0" : : "r"(pu[7])); | |
// Make PU region 7 contain the page buffer | |
assert(((uintptr_t)buffer_big_test & 0xFFF) == 0); | |
assert((sizeof(buffer_big_test) & 0xFFF) == 0); | |
assert(__builtin_popcount(sizeof(buffer_big_test)) == 1); | |
uintptr_t base = (uintptr_t)buffer_big_test; | |
uintptr_t size = sizeof(buffer_big_test); | |
uint32_t sizeVal = __builtin_ctz(size) - 1; | |
pu[7] = base | (sizeVal << 1) | 1; | |
DC_FlushAll(); | |
asm volatile("mcr p15, 0, %0, c6, c7, 0" : : "r"(pu[7])); | |
asm volatile("mcr p15, 0, %0, c2, c0, 0" : : "r"(dataCachability)); | |
asm volatile("mcr p15, 0, %0, c2, c0, 1" : : "r"(codeCachability)); | |
asm volatile("mcr p15, 0, %0, c3, c0, 0" : : "r"(bufferability)); | |
} | |
void set_pu7_data_cachable(bool cachable) | |
{ | |
uint32_t dataCachability; | |
asm volatile("mrc p15, 0, %0, c2, c0, 0" : "=r"(dataCachability)); | |
if (cachable) | |
dataCachability |= (1 << 7); | |
else | |
dataCachability &= ~(1 << 7); | |
asm volatile("mcr p15, 0, %0, c2, c0, 0" : : "r"(dataCachability)); | |
} | |
void set_pu7_bufferable(bool bufferable) | |
{ | |
uint32_t bufferability; | |
asm volatile("mrc p15, 0, %0, c3, c0, 0" : "=r"(bufferability)); | |
if (bufferable) | |
bufferability |= (1 << 7); | |
else | |
bufferability &= ~(1 << 7); | |
asm volatile("mcr p15, 0, %0, c3, c0, 0" : : "r"(bufferability)); | |
} | |
void set_replacement_strategy(bool roundRobin) | |
{ | |
uint32_t ctl; | |
asm volatile("mrc p15, 0, %0, c1, c0, 0" : "=r"(ctl)); | |
if (roundRobin) | |
ctl |= (1 << 14); | |
else | |
ctl &= ~(1 << 14); | |
asm volatile("mcr p15, 0, %0, c1, c0, 0" : : "r"(ctl)); | |
} | |
ITCM_CODE | |
void run_replacement_loop() | |
{ | |
uintptr_t addr = (uintptr_t)buffer_big_test; | |
uint32_t set = (addr >> 5) & 31; | |
int oldIME = enterCriticalSection(); | |
set_replacement_strategy(false); | |
DC_FlushAll(); | |
// Disable and reenable data cache | |
/*uint32_t ctl; | |
asm volatile("mrc p15, 0, %0, c1, c0, 0" : "=r"(ctl)); | |
ctl &= ~(1 << 2); | |
asm volatile("mcr p15, 0, %0, c1, c0, 0" : : "r"(ctl)); | |
ctl |= (1 << 2); | |
asm volatile("mcr p15, 0, %0, c1, c0, 0" : : "r"(ctl));*/ | |
// Get CP15 control register to manipulate during the loop | |
uint32_t ctl; | |
asm volatile("mrc p15, 0, %0, c1, c0, 0" : "=r"(ctl)); | |
// Set up 512 KB of VRAM for ARM9 access and clear VRAM | |
uint32_t *vramData = (uint32_t*)0x6800000; | |
videoSetMode(MODE_FB0); | |
vramSetBankA(VRAM_A_LCD); | |
vramSetBankB(VRAM_B_LCD); | |
vramSetBankC(VRAM_C_LCD); | |
vramSetBankD(VRAM_D_LCD); | |
for (uint32_t i = 0; i < 512*1024 / sizeof(uint32_t); i++) { | |
vramData[i] = 0; | |
} | |
// Abuse data cache lockdown to reset the counter (doesn't work with random replacement strategy) | |
/*asm volatile("mcr p15, 0, %0, c9, c0, 0" : : "r"(0 | (1 << 31))); | |
helper = buffer_big_test[1 * DATA_CACHE_SIZE]; | |
asm volatile("mcr p15, 0, %0, c9, c0, 0" : : "r"(1)); | |
asm volatile("mcr p15, 0, %0, c9, c0, 0" : : "r"(0)); | |
DC_InvalidateAll();*/ | |
//asm volatile("mcr p15, 0, %0, c9, c0, 0" : : "r"(0)); | |
constexpr uint32_t total = 512*1024 * 8/2; | |
//constexpr uint32_t total = 1*1024 * 8/2; | |
for (uint32_t i = 0; i < total; i++) | |
{ | |
// Lockdown cache lines | |
/*constexpr uint32_t linesToLock = 1; | |
for (uint32_t line = 0; line < linesToLock; line++) | |
{ | |
asm volatile("mcr p15, 0, %0, c9, c0, 0" : : "r"(line | (1 << 31))); | |
helper = buffer_big_test[1 * DATA_CACHE_SIZE]; | |
} | |
asm volatile("mcr p15, 0, %0, c9, c0, 0" : : "r"(linesToLock));*/ | |
// Mess with the data cache lockdown register | |
//asm volatile("mcr p15, 0, %0, c9, c0, 0" : : "r"((i >> 7) & 0b11)); | |
// Flip between round-robin and random every so often | |
/*ctl &= ~(1 << 14); | |
ctl |= ((i >> 5) & 1) << 14; | |
asm volatile("mcr p15, 0, %0, c1, c0, 0" : : "r"(ctl));*/ | |
// Switch to round-robin for one iteration after 0x7FF random entries | |
ctl &= ~(1 << 14); | |
ctl |= ((i & 0x7FF) == 0 ? 1 : 0) << 14; | |
asm volatile("mcr p15, 0, %0, c1, c0, 0" : : "r"(ctl)); | |
helper = buffer_big_test[0]; | |
for (int j = 0; j < 4; j++) { | |
uint32_t reg = ((set & 31) << 5) | ((j & 3) << 30); | |
asm volatile("mcr p15, 3, %0, c15, c0, 0" : : "r"(reg)); | |
uint32_t tag; | |
asm volatile("mrc p15, 3, %0, c15, c2, 0" : "=r"(tag)); | |
uint32_t valid = (tag >> 4) & 1; | |
uint32_t value = valid * j; | |
uint32_t pos = i * 2; | |
uint32_t wordIndex = pos / 32; | |
uint32_t shift = pos & 31; | |
vramData[wordIndex] |= value << shift; | |
} | |
DC_InvalidateAll(); | |
} | |
leaveCriticalSection(oldIME); | |
} | |
void test_replacement_strategy() | |
{ | |
consoleDemoInit(); | |
//uintptr_t addr = (uintptr_t)buffer_big_test; | |
//uint32_t set = (addr >> 5) & 31; | |
/*auto printSet = [](uint32_t set) { | |
//printf("St W V DD TAG Data\n"); | |
for (uint32_t segment = 0; segment < CACHE_ASSOCIATIVITY; segment++) | |
{ | |
uint32_t tag = read_data_tag_ram(set, segment); | |
uint32_t data = read_data_cache_ram(set, segment); | |
bool valid = (tag >> 4) & 1; | |
bool dirty1 = (tag >> 2) & 1; | |
bool dirty2 = (tag >> 3) & 1; | |
uint32_t tagAddr = tag & 0xFFFFFC00; | |
printf("%02lu %lu %c %c%c %08lX = %08lX\n", | |
set, | |
segment, | |
valid ? 'V' : '.', | |
dirty1 ? 'D' : '.', | |
dirty2 ? 'D' : '.', | |
tagAddr, | |
data | |
); | |
} | |
};*/ | |
shift_pu_regions(); | |
//print_pu_regions(); | |
// Fill the big buffer with a recognizable pattern | |
for (uint32_t i = 0; i < sizeof(buffer_big_test); i += 2) | |
{ | |
*reinterpret_cast<volatile uint16_t *>(&buffer_big_test[i]) = i; | |
} | |
// Configure region | |
set_pu7_data_cachable(true); | |
set_pu7_bufferable(false); | |
//printf("\n"); | |
//print_pu_regions(); | |
/*printf("Round-robin\n"); | |
set_replacement_strategy(true); | |
DC_FlushAll(); | |
// Read data from the buffer to load it in the cache | |
for (uint32_t i = 0; i < 8; i++) | |
{ | |
helper = buffer_big_test[i * DATA_CACHE_SIZE]; | |
helper = buffer_big_test[i * DATA_CACHE_SIZE + CACHE_LINE_SIZE]; | |
printf("\n"); | |
printSet(set); | |
printSet(set + 1); | |
// Wait for the user to press the A button | |
printf("Press A to continue\n"); | |
scanKeys(); | |
while ((keysDown() & KEY_A) == 0) { scanKeys(); } | |
} | |
printf("\n");*/ | |
fatInitDefault(); | |
int k = 0; | |
//for (int k = 0; k < 3; k++) { | |
// Run the test | |
run_replacement_loop(); | |
// Dump contents from VRAM into a file | |
videoSetMode(MODE_FB3); | |
char filename[13]; | |
sprintf(filename, "randseq%d.bin", k); | |
FILE *fp = fopen(filename, "wb"); | |
if (fp != nullptr) { | |
uint32_t value; | |
for (uint32_t i = 0; i < 512*1024; i+=4) { | |
//for (uint32_t i = 0; i < 1*1024; i+=4) { | |
value = *(uint32_t*)(0x6800000 + i); | |
fwrite(&value, sizeof(uint32_t), 1, fp); | |
} | |
} | |
//uintptr_t ptr = (uintptr_t)buffer_big_test; | |
//fwrite(&ptr, sizeof(ptr), 1, fp); | |
fflush(fp); | |
fclose(fp); | |
//} | |
consoleDemoInit(); | |
printf("Done\n"); | |
printf("Press START to exit\n"); | |
while (1) | |
{ | |
swiWaitForVBlank(); | |
scanKeys(); | |
if (keysDown() & KEY_START) | |
break; | |
} | |
} | |
int main(int argc, char **argv) | |
{ | |
/*memset((void *)buffer_source_1, 0x11, sizeof(buffer_source_1)); | |
memset((void *)buffer_source_2, 0x22, sizeof(buffer_source_2)); | |
zero(); | |
copy_1();*/ | |
//test_flush_invalidate(); | |
//test_raw_debug_regs(); | |
//test_debug_regs(); | |
//test_pu_cache_bits(); | |
//test_dirty_cache_read(); | |
test_replacement_strategy(); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SPDX-License-Identifier: CC0-1.0 | |
# | |
# SPDX-FileContributor: Antonio Niño Díaz, 2023 | |
# User config | |
NAME := data_cache_ops | |
GAME_TITLE := Data cache ops test | |
# Source code paths | |
SOURCEDIRS := source | |
include Makefile.include |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#--------------------------------------------------------------------------------- | |
.SUFFIXES: | |
#--------------------------------------------------------------------------------- | |
ifeq ($(strip $(DEVKITARM)),) | |
$(error "Please set DEVKITARM in your environment. export DEVKITARM=<path to>devkitARM") | |
endif | |
include $(DEVKITARM)/ds_rules | |
#--------------------------------------------------------------------------------- | |
# TARGET is the name of the output | |
# BUILD is the directory where object files & intermediate files will be placed | |
# SOURCES is a list of directories containing source code | |
# INCLUDES is a list of directories containing extra header files | |
#--------------------------------------------------------------------------------- | |
TARGET := cache-test | |
BUILD := build | |
SOURCES := source | |
DATA := data | |
INCLUDES := include | |
#--------------------------------------------------------------------------------- | |
# options for code generation | |
#--------------------------------------------------------------------------------- | |
ARCH := -mthumb -mthumb-interwork -march=armv5te -mtune=arm946e-s | |
CFLAGS := -g -Wall -O2\ | |
-fomit-frame-pointer\ | |
-ffast-math \ | |
$(ARCH) | |
CFLAGS += $(INCLUDE) -DARM9 | |
CXXFLAGS := $(CFLAGS) -fno-rtti -fno-exceptions | |
ASFLAGS := -g $(ARCH) | |
LDFLAGS = -specs=ds_arm9.specs -g $(ARCH) -Wl,-Map,$(notdir $*.map) | |
#--------------------------------------------------------------------------------- | |
# any extra libraries we wish to link with the project | |
#--------------------------------------------------------------------------------- | |
LIBS := -lfilesystem -lfat -lnds9 | |
#--------------------------------------------------------------------------------- | |
# list of directories containing libraries, this must be the top level containing | |
# include and lib | |
#--------------------------------------------------------------------------------- | |
LIBDIRS := $(LIBNDS) | |
#--------------------------------------------------------------------------------- | |
# no real need to edit anything past this point unless you need to add additional | |
# rules for different file extensions | |
#--------------------------------------------------------------------------------- | |
ifneq ($(BUILD),$(notdir $(CURDIR))) | |
#--------------------------------------------------------------------------------- | |
export OUTPUT := $(CURDIR)/$(TARGET) | |
export VPATH := $(foreach dir,$(SOURCES),$(CURDIR)/$(dir)) \ | |
$(foreach dir,$(DATA),$(CURDIR)/$(dir)) | |
export DEPSDIR := $(CURDIR)/$(BUILD) | |
CFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.c))) | |
CPPFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.cpp))) | |
SFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.s))) | |
BINFILES := $(foreach dir,$(DATA),$(notdir $(wildcard $(dir)/*.*))) | |
#--------------------------------------------------------------------------------- | |
# use CXX for linking C++ projects, CC for standard C | |
#--------------------------------------------------------------------------------- | |
ifeq ($(strip $(CPPFILES)),) | |
#--------------------------------------------------------------------------------- | |
export LD := $(CC) | |
#--------------------------------------------------------------------------------- | |
else | |
#--------------------------------------------------------------------------------- | |
export LD := $(CXX) | |
#--------------------------------------------------------------------------------- | |
endif | |
#--------------------------------------------------------------------------------- | |
export OFILES := $(addsuffix .o,$(BINFILES)) \ | |
$(CPPFILES:.cpp=.o) $(CFILES:.c=.o) $(SFILES:.s=.o) | |
export INCLUDE := $(foreach dir,$(INCLUDES),-I$(CURDIR)/$(dir)) \ | |
$(foreach dir,$(LIBDIRS),-I$(dir)/include) \ | |
$(foreach dir,$(LIBDIRS),-I$(dir)/include) \ | |
-I$(CURDIR)/$(BUILD) | |
export LIBPATHS := $(foreach dir,$(LIBDIRS),-L$(dir)/lib) | |
.PHONY: $(BUILD) clean | |
#--------------------------------------------------------------------------------- | |
$(BUILD): | |
@[ -d $@ ] || mkdir -p $@ | |
@$(MAKE) --no-print-directory -C $(BUILD) -f $(CURDIR)/Makefile | |
#--------------------------------------------------------------------------------- | |
clean: | |
@echo clean ... | |
@rm -fr $(BUILD) $(TARGET).elf $(TARGET).nds $(TARGET).ds.gba | |
#--------------------------------------------------------------------------------- | |
else | |
DEPENDS := $(OFILES:.o=.d) | |
#--------------------------------------------------------------------------------- | |
# main targets | |
#--------------------------------------------------------------------------------- | |
$(OUTPUT).nds : $(OUTPUT).elf | |
$(OUTPUT).elf : $(OFILES) | |
#--------------------------------------------------------------------------------- | |
%.bin.o : %.bin | |
#--------------------------------------------------------------------------------- | |
@echo $(notdir $<) | |
@$(bin2o) | |
-include $(DEPENDS) | |
#--------------------------------------------------------------------------------------- | |
endif | |
#--------------------------------------------------------------------------------------- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SPDX-License-Identifier: CC0-1.0 | |
# | |
# SPDX-FileContributor: Antonio Niño Díaz, 2023 | |
BLOCKSDS ?= /opt/blocksds/core | |
BLOCKSDSEXT ?= /opt/blocksds/external | |
# User config | |
# =========== | |
GAME_SUBTITLE := Built with BlocksDS | |
GAME_AUTHOR := github.com/blocksds/sdk | |
GAME_ICON := $(BLOCKSDS)/sys/icon.bmp | |
# DLDI and internal SD slot of DSi | |
# -------------------------------- | |
# Root folder of the SD image | |
SDROOT := sdroot | |
# Name of the generated image it "DSi-1.sd" for no$gba in DSi mode | |
SDIMAGE := image.bin | |
# Libraries | |
# --------- | |
LIBS ?= -lnds9 -lc | |
LIBDIRS += $(BLOCKSDS)/libs/libnds | |
# Build artifacts | |
# --------------- | |
BUILDDIR := build | |
ELF := build/$(NAME).elf | |
DUMP := build/$(NAME).dump | |
NITROFAT_IMG := build/nitrofat.bin | |
MAP := build/$(NAME).map | |
SOUNDBANKDIR := $(BUILDDIR)/maxmod | |
ROM := $(NAME).nds | |
# Tools | |
# ----- | |
PREFIX := arm-none-eabi- | |
CC := $(PREFIX)gcc | |
CXX := $(PREFIX)g++ | |
OBJDUMP := $(PREFIX)objdump | |
MKDIR := mkdir | |
RM := rm -rf | |
# Verbose flag | |
# ------------ | |
ifeq ($(VERBOSE),1) | |
V := | |
else | |
V := @ | |
endif | |
# Source files | |
# ------------ | |
ifneq ($(BINDIRS),) | |
SOURCES_BIN := $(shell find -L $(BINDIRS) -name "*.bin") | |
INCLUDEDIRS += $(addprefix $(BUILDDIR)/,$(BINDIRS)) | |
endif | |
ifneq ($(GFXDIRS),) | |
SOURCES_PNG := $(shell find -L $(GFXDIRS) -name "*.png") | |
INCLUDEDIRS += $(addprefix $(BUILDDIR)/,$(GFXDIRS)) | |
endif | |
ifneq ($(AUDIODIRS),) | |
SOURCES_AUDIO := $(shell find -L $(AUDIODIRS) -regex '.*\.\(it\|mod\|s3m\|wav\|xm\)') | |
ifneq ($(SOURCES_AUDIO),) | |
INCLUDEDIRS += $(SOUNDBANKDIR) | |
endif | |
endif | |
SOURCES_S := $(shell find -L $(SOURCEDIRS) -name "*.s") | |
SOURCES_C := $(shell find -L $(SOURCEDIRS) -name "*.c") | |
SOURCES_CPP := $(shell find -L $(SOURCEDIRS) -name "*.cpp") | |
# Compiler and linker flags | |
# ------------------------- | |
DEFINES += -D__NDS__ -DARM9 | |
ARCH := -march=armv5te -mtune=arm946e-s | |
WARNFLAGS := -Wall | |
ifeq ($(SOURCES_CPP),) | |
LD := $(CC) | |
else | |
LD := $(CXX) | |
endif | |
INCLUDEFLAGS := $(foreach path,$(INCLUDEDIRS),-I$(path)) \ | |
$(foreach path,$(LIBDIRS),-I$(path)/include) | |
LIBDIRSFLAGS := $(foreach path,$(LIBDIRS),-L$(path)/lib) | |
ASFLAGS += -x assembler-with-cpp $(DEFINES) $(ARCH) \ | |
-marm -mthumb-interwork $(INCLUDEFLAGS) \ | |
-ffunction-sections -fdata-sections | |
CFLAGS += -std=gnu11 $(WARNFLAGS) $(DEFINES) $(ARCH) \ | |
-marm -mthumb-interwork $(INCLUDEFLAGS) -O2 \ | |
-ffunction-sections -fdata-sections \ | |
-fomit-frame-pointer | |
CXXFLAGS += -std=gnu++14 $(WARNFLAGS) $(DEFINES) $(ARCH) \ | |
-marm -mthumb-interwork $(INCLUDEFLAGS) -O2 \ | |
-ffunction-sections -fdata-sections \ | |
-fno-exceptions -fno-rtti \ | |
-fomit-frame-pointer | |
LDFLAGS := -marm -mthumb-interwork $(LIBDIRSFLAGS) \ | |
-Wl,-Map,$(MAP) -Wl,--gc-sections -nostdlib \ | |
-T$(BLOCKSDS)/sys/crts/ds_arm9.mem \ | |
-T$(BLOCKSDS)/sys/crts/ds_arm9.ld \ | |
-Wl,--no-warn-rwx-segments \ | |
-Wl,--start-group $(LIBS) -lgcc -Wl,--end-group | |
# Intermediate build files | |
# ------------------------ | |
OBJS_ASSETS := $(addsuffix .o,$(addprefix $(BUILDDIR)/,$(SOURCES_BIN))) \ | |
$(addsuffix .o,$(addprefix $(BUILDDIR)/,$(SOURCES_PNG))) | |
HEADERS_ASSETS := $(patsubst %.bin,%_bin.h,$(addprefix $(BUILDDIR)/,$(SOURCES_BIN))) \ | |
$(patsubst %.png,%.h,$(addprefix $(BUILDDIR)/,$(SOURCES_PNG))) | |
ifneq ($(SOURCES_AUDIO),) | |
OBJS_ASSETS += $(SOUNDBANKDIR)/soundbank.c.o | |
HEADERS_ASSETS += $(SOUNDBANKDIR)/soundbank.h | |
endif | |
OBJS_SOURCES := $(addsuffix .o,$(addprefix $(BUILDDIR)/,$(SOURCES_S))) \ | |
$(addsuffix .o,$(addprefix $(BUILDDIR)/,$(SOURCES_C))) \ | |
$(addsuffix .o,$(addprefix $(BUILDDIR)/,$(SOURCES_CPP))) | |
OBJS := $(OBJS_ASSETS) $(OBJS_SOURCES) | |
DEPS := $(OBJS:.o=.d) | |
# Targets | |
# ------- | |
.PHONY: all clean dump dldipatch sdimage | |
all: $(ROM) | |
ifneq ($(strip $(NITROFATDIR)),) | |
# Additional arguments for ndstool | |
NDSTOOL_FAT := -F $(NITROFAT_IMG) | |
$(NITROFAT_IMG): $(NITROFATDIR) | |
@echo " MKFATIMG $@ $(NITROFATDIR)" | |
$(V)$(BLOCKSDS)/tools/mkfatimg/mkfatimg -t $(NITROFATDIR) $@ 0 | |
# Make the NDS ROM depend on the filesystem image only if it is needed | |
$(ROM): $(NITROFAT_IMG) | |
endif | |
# Combine the title strings | |
ifeq ($(strip $(GAME_SUBTITLE)),) | |
GAME_FULL_TITLE := $(GAME_TITLE);$(GAME_AUTHOR) | |
else | |
GAME_FULL_TITLE := $(GAME_TITLE);$(GAME_SUBTITLE);$(GAME_AUTHOR) | |
endif | |
$(ROM): $(ELF) | |
@echo " NDSTOOL $@" | |
$(V)$(BLOCKSDS)/tools/ndstool/ndstool -c $@ \ | |
-7 $(BLOCKSDS)/sys/default_arm7/arm7.elf -9 $(ELF) \ | |
-b $(GAME_ICON) "$(GAME_FULL_TITLE)" \ | |
$(NDSTOOL_FAT) | |
$(ELF): $(OBJS) | |
@echo " LD $@" | |
$(V)$(LD) -o $@ $(OBJS) $(BLOCKSDS)/sys/crts/ds_arm9_crt0.o $(LDFLAGS) | |
$(DUMP): $(ELF) | |
@echo " OBJDUMP $@" | |
$(V)$(OBJDUMP) -h -C -S $< > $@ | |
dump: $(DUMP) | |
clean: | |
@echo " CLEAN" | |
$(V)$(RM) $(ROM) $(DUMP) $(BUILDDIR) $(SDIMAGE) | |
sdimage: | |
@echo " MKFATIMG $(SDIMAGE) $(SDROOT)" | |
$(V)$(BLOCKSDS)/tools/mkfatimg/mkfatimg -t $(SDROOT) $(SDIMAGE) 0 | |
dldipatch: $(ROM) | |
@echo " DLDITOOL $(ROM)" | |
$(V)$(BLOCKSDS)/tools/dlditool/dlditool \ | |
$(BLOCKSDS)/tools/dldi/r4tfv2.dldi $(ROM) | |
# Rules | |
# ----- | |
$(BUILDDIR)/%.s.o : %.s | |
@echo " AS $<" | |
@$(MKDIR) -p $(@D) | |
$(V)$(CC) $(ASFLAGS) -MMD -MP -c -o $@ $< | |
$(BUILDDIR)/%.c.o : %.c | |
@echo " CC $<" | |
@$(MKDIR) -p $(@D) | |
$(V)$(CC) $(CFLAGS) -MMD -MP -c -o $@ $< | |
$(BUILDDIR)/%.cpp.o : %.cpp | |
@echo " CXX $<" | |
@$(MKDIR) -p $(@D) | |
$(V)$(CXX) $(CXXFLAGS) -MMD -MP -c -o $@ $< | |
$(BUILDDIR)/%.bin.o $(BUILDDIR)/%_bin.h : %.bin | |
@echo " BIN2C $<" | |
@$(MKDIR) -p $(@D) | |
$(V)$(BLOCKSDS)/tools/bin2c/bin2c $< $(@D) | |
$(V)$(CC) $(CFLAGS) -MMD -MP -c -o $(BUILDDIR)/$*.bin.o $(BUILDDIR)/$*_bin.c | |
$(BUILDDIR)/%.png.o $(BUILDDIR)/%.h : %.png %.grit | |
@echo " GRIT $<" | |
@$(MKDIR) -p $(@D) | |
$(V)$(BLOCKSDS)/tools/grit/grit $< -ftc -W1 -o$(BUILDDIR)/$* | |
$(V)$(CC) $(CFLAGS) -MMD -MP -c -o $(BUILDDIR)/$*.png.o $(BUILDDIR)/$*.c | |
$(SOUNDBANKDIR)/soundbank.h: $(SOURCES_AUDIO) | |
@echo " MMUTIL $^" | |
@$(MKDIR) -p $(@D) | |
@$(BLOCKSDS)/tools/mmutil/mmutil $^ -d \ | |
-o$(SOUNDBANKDIR)/soundbank.bin -h$(SOUNDBANKDIR)/soundbank.h | |
$(SOUNDBANKDIR)/soundbank.c.o: $(SOUNDBANKDIR)/soundbank.h | |
@echo " BIN2C soundbank.bin" | |
$(V)$(BLOCKSDS)/tools/bin2c/bin2c $(SOUNDBANKDIR)/soundbank.bin \ | |
$(SOUNDBANKDIR) | |
@echo " CC.9 soundbank_bin.c" | |
$(V)$(CC) $(CFLAGS) -MMD -MP -c -o $(SOUNDBANKDIR)/soundbank.c.o \ | |
$(SOUNDBANKDIR)/soundbank_bin.c | |
# All assets must be built before the source code | |
# ----------------------------------------------- | |
$(SOURCES_S) $(SOURCES_C) $(SOURCES_CPP): $(HEADERS_ASSETS) | |
# Include dependency files if they exist | |
# -------------------------------------- | |
-include $(DEPS) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment