Skip to content

Instantly share code, notes, and snippets.

@profi200
Created April 26, 2020 14:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save profi200/7c5639a9bbe07b7da2d90d0647fe706a to your computer and use it in GitHub Desktop.
Save profi200/7c5639a9bbe07b7da2d90d0647fe706a to your computer and use it in GitHub Desktop.
Texture copy measurements originally done for Corgi3DS
static u32 counter = 1;
stopProfiling();
GFX_deinit(false);
TIMER_sleepMs(1000);
*((vu32*)0x10140140) = 0; // REG_CFG11_GPUPROT
*((vu32*)0x10141200) = 0x1007F; // REG_CFG11_GPU_CNT
IRQ_registerHandler(IRQ_PPF, 14, 0, true, NULL);
__cpsid(i);
static void *const dsts[4] = {(void*)VRAM_BASE, (void*)0x1F000000, (void*)DSP_MEM_BASE, (void*)FCRAM_BASE};
struct
{
u32 vram[5];
u32 qtm[5];
u32 dsp[5];
u32 fcram[5];
u32 avg[4];
} data[4] = {0};
for(u32 i = 0; i < 4; i++)
{
for(u32 n = 0; n < 5; n++)
{
u32 tmp = measureTexcopy((void*)VRAM_BASE, dsts[i], 131072);
data[i].vram[n] = tmp;
data[i].avg[0] += tmp;
tmp = measureTexcopy((void*)0x1F000000, dsts[i], 131072);
data[i].qtm[n] = tmp;
data[i].avg[1] += tmp;
tmp = measureTexcopy((void*)DSP_MEM_BASE, dsts[i], 131072);
data[i].dsp[n] = tmp;
data[i].avg[2] += tmp;
tmp = measureTexcopy((void*)FCRAM_BASE, dsts[i], 131072);
data[i].fcram[n] = tmp;
data[i].avg[3] += tmp;
}
for(u32 n = 0; n < 4; n++) data[i].avg[n] /= 5;
}
__cpsie(i);
IRQ_disable(IRQ_PPF);
*((vu32*)0x10141200) = 0x10001; // REG_CFG11_GPU_CNT
TIMER_sleepMs(1000);
GFX_init(true);
if(counter > 2)
{
const s32 h = fOpen("texcopy.csv", FS_CREATE_ALWAYS | FS_OPEN_WRITE);
fWrite(h, "name, run1, run2, run3, run4, run5, average\n", 44);
char buf[256];
static const char *const names[4] = {"vram", "qtm", "dsp", "fcram"};
for(u32 i = 0; i < 4; i++)
{
memset(buf, 0, 256);
ee_sprintf(buf, "%s->%s, %lu, %lu, %lu, %lu, %lu, %lu\n", names[0], names[i], data[i].vram[0], data[i].vram[1], data[i].vram[2], data[i].vram[3], data[i].vram[4], data[i].avg[0]);
fWrite(h, buf, strlen(buf));
memset(buf, 0, 256);
ee_sprintf(buf, "%s->%s, %lu, %lu, %lu, %lu, %lu, %lu\n", names[1], names[i], data[i].qtm[0], data[i].qtm[1], data[i].qtm[2], data[i].qtm[3], data[i].qtm[4], data[i].avg[1]);
fWrite(h, buf, strlen(buf));
memset(buf, 0, 256);
ee_sprintf(buf, "%s->%s, %lu, %lu, %lu, %lu, %lu, %lu\n", names[2], names[i], data[i].dsp[0], data[i].dsp[1], data[i].dsp[2], data[i].dsp[3], data[i].dsp[4], data[i].avg[2]);
fWrite(h, buf, strlen(buf));
memset(buf, 0, 256);
ee_sprintf(buf, "%s->%s, %lu, %lu, %lu, %lu, %lu, %lu\n", names[3], names[i], data[i].fcram[0], data[i].fcram[1], data[i].fcram[2], data[i].fcram[3], data[i].fcram[4], data[i].avg[3]);
fWrite(h, buf, strlen(buf));
}
fClose(h);
}
counter++;
.align 2
.global measureTexcopy
.type measureTexcopy %function
@ u32 measureTexcopy(const void *src, void *dest, u32 size);
measureTexcopy:
ldr r3, =0x10400C00
lsr r0, r0, #3
lsr r1, r1, #3
str r0, [r3] // REGs_TRANS_ENGINE[0]
mov r0, #(1u<<3)
str r1, [r3, #4] // REGs_TRANS_ENGINE[1]
str r0, [r3, #16] // REGs_TRANS_ENGINE[4]
mov r1, #0
str r2, [r3, #32] // REGs_TRANS_ENGINE[8]
str r1, [r3, #36] // REGs_TRANS_ENGINE[9]
str r1, [r3, #40] // REGs_TRANS_ENGINE[10]
ldr r0, =0x17E00100
mov r2, #-1
mov r12, #1
str r2, [r0, #0x500] // REG_TIMER_LOAD
mcr p15, 0, r1, c7, c10, 0 @ "Clean Entire Data Cache"
mcr p15, 0, r1, c7, c10, 4 @ Data Synchronization Barrier
str r12, [r0, #0x500 + 8] // REG_TIMER_CNT
str r12, [r3, #24] // REGs_TRANS_ENGINE[6]
wfi
str r1, [r0, #0x500 + 8] // REG_TIMER_CNT
ldr r3, [r0, #12] // REG_CPU_II_AKN
str r12, [r0, #0x500 + 12] // REG_TIMER_INT_STAT
str r3, [r0, #16] // REG_CPU_II_EOI
ldr r3, [r0, #0x500 + 4] // REG_TIMER_COUNTER
sub r0, r2, r3
bx lr
.pool
name run1 run2 run3 run4 run5 average
vram->vram 10482 9493 9493 9494 9493 9691
qtm->vram 16644 16644 16644 16644 16644 16644
dsp->vram 16644 16644 16644 16644 16644 16644
fcram->vram 16930 16930 16919 16908 16930 16923
vram->qtm 5931 5930 5931 5931 5930 5930
qtm->qtm 16643 16643 16643 16643 16643 16643
dsp->qtm 16643 16643 16643 16643 16643 16643
fcram->qtm 16929 16918 16918 16907 16940 16922
vram->dsp 5931 5931 5930 5930 5931 5930
qtm->dsp 16643 16643 16643 16643 16643 16643
dsp->dsp 16643 16643 16643 16643 16643 16643
fcram->dsp 16940 16929 16929 16918 16918 16926
vram->fcram 17919 17918 17918 17918 17918 17918
qtm->fcram 44053 44038 44038 44038 44179 44069
dsp->fcram 44162 44147 44147 44147 44153 44151
fcram->fcram 42894 42894 42894 42894 42954 42906
name run1 run2 run3 run4 run5 average
vram->vram 10492 9494 9494 9493 9493 9693
qtm->vram 16642 16642 16642 16642 16642 16642
dsp->vram 16642 16642 16642 16642 16642 16642
fcram->vram 16906 16924 16906 16924 16906 16913
vram->qtm 5930 5930 5931 5930 5930 5930
qtm->qtm 16640 16640 16644 16640 16640 16640
dsp->qtm 16644 16640 16640 16640 16644 16641
fcram->qtm 16937 16922 16908 16904 16904 16915
vram->dsp 5931 5931 5930 5930 5930 5930
qtm->dsp 16640 16640 16644 16640 16640 16640
dsp->dsp 16640 16640 16640 16640 16644 16640
fcram->dsp 16937 16922 16908 16904 16937 16921
vram->fcram 17916 17917 17916 17912 17916 17915
qtm->fcram 47881 47785 47903 47785 47816 47834
dsp->fcram 47820 47884 47820 47841 47882 47849
fcram->fcram 46205 46205 46205 46205 46330 46230
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment