Created
April 26, 2020 14:46
-
-
Save profi200/7c5639a9bbe07b7da2d90d0647fe706a to your computer and use it in GitHub Desktop.
Texture copy measurements originally done for Corgi3DS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static u32 counter = 1; | |
stopProfiling(); | |
GFX_deinit(false); | |
TIMER_sleepMs(1000); | |
*((vu32*)0x10140140) = 0; // REG_CFG11_GPUPROT | |
*((vu32*)0x10141200) = 0x1007F; // REG_CFG11_GPU_CNT | |
IRQ_registerHandler(IRQ_PPF, 14, 0, true, NULL); | |
__cpsid(i); | |
static void *const dsts[4] = {(void*)VRAM_BASE, (void*)0x1F000000, (void*)DSP_MEM_BASE, (void*)FCRAM_BASE}; | |
struct | |
{ | |
u32 vram[5]; | |
u32 qtm[5]; | |
u32 dsp[5]; | |
u32 fcram[5]; | |
u32 avg[4]; | |
} data[4] = {0}; | |
for(u32 i = 0; i < 4; i++) | |
{ | |
for(u32 n = 0; n < 5; n++) | |
{ | |
u32 tmp = measureTexcopy((void*)VRAM_BASE, dsts[i], 131072); | |
data[i].vram[n] = tmp; | |
data[i].avg[0] += tmp; | |
tmp = measureTexcopy((void*)0x1F000000, dsts[i], 131072); | |
data[i].qtm[n] = tmp; | |
data[i].avg[1] += tmp; | |
tmp = measureTexcopy((void*)DSP_MEM_BASE, dsts[i], 131072); | |
data[i].dsp[n] = tmp; | |
data[i].avg[2] += tmp; | |
tmp = measureTexcopy((void*)FCRAM_BASE, dsts[i], 131072); | |
data[i].fcram[n] = tmp; | |
data[i].avg[3] += tmp; | |
} | |
for(u32 n = 0; n < 4; n++) data[i].avg[n] /= 5; | |
} | |
__cpsie(i); | |
IRQ_disable(IRQ_PPF); | |
*((vu32*)0x10141200) = 0x10001; // REG_CFG11_GPU_CNT | |
TIMER_sleepMs(1000); | |
GFX_init(true); | |
if(counter > 2) | |
{ | |
const s32 h = fOpen("texcopy.csv", FS_CREATE_ALWAYS | FS_OPEN_WRITE); | |
fWrite(h, "name, run1, run2, run3, run4, run5, average\n", 44); | |
char buf[256]; | |
static const char *const names[4] = {"vram", "qtm", "dsp", "fcram"}; | |
for(u32 i = 0; i < 4; i++) | |
{ | |
memset(buf, 0, 256); | |
ee_sprintf(buf, "%s->%s, %lu, %lu, %lu, %lu, %lu, %lu\n", names[0], names[i], data[i].vram[0], data[i].vram[1], data[i].vram[2], data[i].vram[3], data[i].vram[4], data[i].avg[0]); | |
fWrite(h, buf, strlen(buf)); | |
memset(buf, 0, 256); | |
ee_sprintf(buf, "%s->%s, %lu, %lu, %lu, %lu, %lu, %lu\n", names[1], names[i], data[i].qtm[0], data[i].qtm[1], data[i].qtm[2], data[i].qtm[3], data[i].qtm[4], data[i].avg[1]); | |
fWrite(h, buf, strlen(buf)); | |
memset(buf, 0, 256); | |
ee_sprintf(buf, "%s->%s, %lu, %lu, %lu, %lu, %lu, %lu\n", names[2], names[i], data[i].dsp[0], data[i].dsp[1], data[i].dsp[2], data[i].dsp[3], data[i].dsp[4], data[i].avg[2]); | |
fWrite(h, buf, strlen(buf)); | |
memset(buf, 0, 256); | |
ee_sprintf(buf, "%s->%s, %lu, %lu, %lu, %lu, %lu, %lu\n", names[3], names[i], data[i].fcram[0], data[i].fcram[1], data[i].fcram[2], data[i].fcram[3], data[i].fcram[4], data[i].avg[3]); | |
fWrite(h, buf, strlen(buf)); | |
} | |
fClose(h); | |
} | |
counter++; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.align 2 | |
.global measureTexcopy | |
.type measureTexcopy %function | |
@ u32 measureTexcopy(const void *src, void *dest, u32 size); | |
measureTexcopy: | |
ldr r3, =0x10400C00 | |
lsr r0, r0, #3 | |
lsr r1, r1, #3 | |
str r0, [r3] // REGs_TRANS_ENGINE[0] | |
mov r0, #(1u<<3) | |
str r1, [r3, #4] // REGs_TRANS_ENGINE[1] | |
str r0, [r3, #16] // REGs_TRANS_ENGINE[4] | |
mov r1, #0 | |
str r2, [r3, #32] // REGs_TRANS_ENGINE[8] | |
str r1, [r3, #36] // REGs_TRANS_ENGINE[9] | |
str r1, [r3, #40] // REGs_TRANS_ENGINE[10] | |
ldr r0, =0x17E00100 | |
mov r2, #-1 | |
mov r12, #1 | |
str r2, [r0, #0x500] // REG_TIMER_LOAD | |
mcr p15, 0, r1, c7, c10, 0 @ "Clean Entire Data Cache" | |
mcr p15, 0, r1, c7, c10, 4 @ Data Synchronization Barrier | |
str r12, [r0, #0x500 + 8] // REG_TIMER_CNT | |
str r12, [r3, #24] // REGs_TRANS_ENGINE[6] | |
wfi | |
str r1, [r0, #0x500 + 8] // REG_TIMER_CNT | |
ldr r3, [r0, #12] // REG_CPU_II_AKN | |
str r12, [r0, #0x500 + 12] // REG_TIMER_INT_STAT | |
str r3, [r0, #16] // REG_CPU_II_EOI | |
ldr r3, [r0, #0x500 + 4] // REG_TIMER_COUNTER | |
sub r0, r2, r3 | |
bx lr | |
.pool |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name | run1 | run2 | run3 | run4 | run5 | average | |
---|---|---|---|---|---|---|---|
vram->vram | 10482 | 9493 | 9493 | 9494 | 9493 | 9691 | |
qtm->vram | 16644 | 16644 | 16644 | 16644 | 16644 | 16644 | |
dsp->vram | 16644 | 16644 | 16644 | 16644 | 16644 | 16644 | |
fcram->vram | 16930 | 16930 | 16919 | 16908 | 16930 | 16923 | |
vram->qtm | 5931 | 5930 | 5931 | 5931 | 5930 | 5930 | |
qtm->qtm | 16643 | 16643 | 16643 | 16643 | 16643 | 16643 | |
dsp->qtm | 16643 | 16643 | 16643 | 16643 | 16643 | 16643 | |
fcram->qtm | 16929 | 16918 | 16918 | 16907 | 16940 | 16922 | |
vram->dsp | 5931 | 5931 | 5930 | 5930 | 5931 | 5930 | |
qtm->dsp | 16643 | 16643 | 16643 | 16643 | 16643 | 16643 | |
dsp->dsp | 16643 | 16643 | 16643 | 16643 | 16643 | 16643 | |
fcram->dsp | 16940 | 16929 | 16929 | 16918 | 16918 | 16926 | |
vram->fcram | 17919 | 17918 | 17918 | 17918 | 17918 | 17918 | |
qtm->fcram | 44053 | 44038 | 44038 | 44038 | 44179 | 44069 | |
dsp->fcram | 44162 | 44147 | 44147 | 44147 | 44153 | 44151 | |
fcram->fcram | 42894 | 42894 | 42894 | 42894 | 42954 | 42906 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name | run1 | run2 | run3 | run4 | run5 | average | |
---|---|---|---|---|---|---|---|
vram->vram | 10492 | 9494 | 9494 | 9493 | 9493 | 9693 | |
qtm->vram | 16642 | 16642 | 16642 | 16642 | 16642 | 16642 | |
dsp->vram | 16642 | 16642 | 16642 | 16642 | 16642 | 16642 | |
fcram->vram | 16906 | 16924 | 16906 | 16924 | 16906 | 16913 | |
vram->qtm | 5930 | 5930 | 5931 | 5930 | 5930 | 5930 | |
qtm->qtm | 16640 | 16640 | 16644 | 16640 | 16640 | 16640 | |
dsp->qtm | 16644 | 16640 | 16640 | 16640 | 16644 | 16641 | |
fcram->qtm | 16937 | 16922 | 16908 | 16904 | 16904 | 16915 | |
vram->dsp | 5931 | 5931 | 5930 | 5930 | 5930 | 5930 | |
qtm->dsp | 16640 | 16640 | 16644 | 16640 | 16640 | 16640 | |
dsp->dsp | 16640 | 16640 | 16640 | 16640 | 16644 | 16640 | |
fcram->dsp | 16937 | 16922 | 16908 | 16904 | 16937 | 16921 | |
vram->fcram | 17916 | 17917 | 17916 | 17912 | 17916 | 17915 | |
qtm->fcram | 47881 | 47785 | 47903 | 47785 | 47816 | 47834 | |
dsp->fcram | 47820 | 47884 | 47820 | 47841 | 47882 | 47849 | |
fcram->fcram | 46205 | 46205 | 46205 | 46205 | 46330 | 46230 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment