Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@profi200
Created April 13, 2020 15:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save profi200/fb95910e1b72b1c6e9344ae0e9c7f257 to your computer and use it in GitHub Desktop.
Save profi200/fb95910e1b72b1c6e9344ae0e9c7f257 to your computer and use it in GitHub Desktop.
Some crypto measurements for Corgi3DS.
void setupStuff(bool sha, u32 aesMode)
{
if(!sha)
{
REG_AESCNT = (AES_INPUT_BIG | AES_INPUT_NORMAL)<<23;
static const u32 whatever[4] = {0x12345678, 0x87654321, 0x12345678, 0x87654321};
for(u32 i = 0; i < 4; i++) REG_AESCTR[i] = whatever[i];
REG_NDMA3_SRC_ADDR = 0;
REG_NDMA3_DST_ADDR = REG_AESWRFIFO;
REG_NDMA3_TOTAL_CNT = 0x8000;
REG_NDMA3_LOG_BLK_CNT = 8;
REG_NDMA3_INT_CNT = NDMA_INT_SYS_FREQ;
REG_NDMA3_FILL_DATA = 0xAABBCDEF;
REG_NDMA3_CNT = NDMA_ENABLE | NDMA_TOTAL_CNT_MODE | NDMA_STARTUP_AES_IN |
NDMA_BURST_WORDS(8) | NDMA_SRC_UPDATE_FILL | NDMA_DST_UPDATE_FIXED;
REG_NDMA4_SRC_ADDR = REG_AESRDFIFO;
REG_NDMA4_DST_ADDR = 0x08100000u - (1024u * 128u);
REG_NDMA4_TOTAL_CNT = 0x8000;
REG_NDMA4_LOG_BLK_CNT = 8;
REG_NDMA4_INT_CNT = NDMA_INT_SYS_FREQ;
REG_NDMA4_CNT = NDMA_ENABLE | NDMA_TOTAL_CNT_MODE | NDMA_STARTUP_AES_OUT |
NDMA_BURST_WORDS(8) | NDMA_SRC_UPDATE_FIXED | NDMA_DST_UPDATE_INC;
REG_AES_BLKCNT_LOW = 0;
REG_AES_BLKCNT_HIGH = 0x2000;
REG_AESCNT = AES_IRQ_ENABLE | aesMode | 1u<<14 | (3u - 1u)<<12 | AES_FLUSH_READ_FIFO |
AES_FLUSH_WRITE_FIFO | (AES_INPUT_BIG | AES_INPUT_NORMAL)<<23 |
(AES_OUTPUT_BIG | AES_OUTPUT_NORMAL)<<22;
}
else
{
REG_NDMA7_SRC_ADDR = 0;
REG_NDMA7_DST_ADDR = (u32)REGs_SHA_INFIFO;
REG_NDMA7_TOTAL_CNT = 0x8000;
REG_NDMA7_LOG_BLK_CNT = 16;
REG_NDMA7_INT_CNT = NDMA_INT_SYS_FREQ;
REG_NDMA7_FILL_DATA = 0xAABBCDEF;
REG_NDMA7_CNT = NDMA_ENABLE | NDMA_IRQ_ENABLE | NDMA_TOTAL_CNT_MODE | NDMA_STARTUP_SHA_IN |
NDMA_BURST_WORDS(16) | NDMA_SRC_UPDATE_FILL | NDMA_DST_UPDATE_FIXED;
}
}
u32 measureAes(void);
u32 measureShaDma(void);
u32 measureShaCpu(void);
#include "hardware/cache.h"
void measureCrypto(void *data)
{
struct
{
u32 ccm[5];
u32 ctr[5];
u32 cbc[5];
u32 ecb[5];
u32 shaD[5];
u32 shaC[5];
u32 avg[6];
} *blah = data;
static const u32 whatever[4] = {0x12345678, 0x87654321, 0x12345678, 0x87654321};
AES_setKey(0x3B, AES_KEY_Y, AES_INPUT_BIG | AES_INPUT_NORMAL, false, whatever);
AES_selectKeyslot(0x3B);
flushDCache();
for(u32 n = 0; n < 5; n++)
{
setupStuff(false, AES_MODE_CCM_ENCRYPT);
u32 tmp = measureAes();
blah->ccm[n] = tmp;
blah->avg[0] += tmp;
setupStuff(false, AES_MODE_CTR);
tmp = measureAes();
blah->ctr[n] = tmp;
blah->avg[1] += tmp;
setupStuff(false, AES_MODE_CBC_ENCRYPT);
tmp = measureAes();
blah->cbc[n] = tmp;
blah->avg[2] += tmp;
setupStuff(false, AES_MODE_ECB_ENCRYPT);
tmp = measureAes();
blah->ecb[n] = tmp;
blah->avg[3] += tmp;
setupStuff(true, 0);
tmp = measureShaDma();
blah->shaD[n] = tmp;
blah->avg[4] += tmp;
tmp = measureShaCpu();
blah->shaC[n] = tmp;
blah->avg[5] += tmp;
}
// Dividing average is done on ARM11.
}
name run 1 run 2 run 3 run 4 run 5 average
ccm 131135 131127 131127 131127 131127 131129
ctr 98338 98338 98338 98338 98338 98338
cbc 98323 98323 98323 98323 98323 98323
ecb 98323 98323 98323 98323 98323 98323
sha DMA 139358 139350 139350 139350 139350 139351
sha CPU 170079 170072 170067 170067 170067 170070
.align 2
.global measureAes
.type measureAes %function
@ u32 measureAes(void);
measureAes:
mrs r3, cpsr
mov r0, #0
orr r3, r3, #1u<<7
ldr r1, =0x10003000 @ Timer regs
msr cpsr_c, r3
strh r0, [r1, #2] @ Stop timer 0
strh r0, [r1, #6] @ Stop timer 1
strh r0, [r1, #0] @ Set timer 0 val to 0
mov r3, #0x84 @ Timer start with count up
strh r0, [r1, #4] @ Set timer 1 val to 0
ldr r2, =0x10009000 @ AES regs
strh r3, [r1, #6] @ Start count up timer 1
ldr r3, [r2] @ Read AESCNT
mov r12, #0x80
orr r3, #1u<<31
strh r12, [r1, #2] @ Start timer 0
str r3, [r2] @@ Start AES
mcr p15, 0, r0, c7, c0, 4 @ wfi
strh r0, [r1, #2] @ Stop timer 0
strh r0, [r1, #6] @ Stop timer 1
mrs r2, cpsr
bic r2, r2, #1u<<7
msr cpsr_c, r2
ldrh r0, [r1, #0] @ Get timer 0 val
ldrh r1, [r1, #4] @ Get timer 1 val
orr r0, r0, r1, lsl #16
bx lr
.pool
.align 2
.global measureShaDma
.type measureShaDma %function
@ u32 measureShaDma(void);
measureShaDma:
mrs r3, cpsr
mov r0, #0
orr r3, r3, #1u<<7
ldr r1, =0x10003000 @ Timer regs
msr cpsr_c, r3
strh r0, [r1, #2] @ Stop timer 0
strh r0, [r1, #6] @ Stop timer 1
strh r0, [r1, #0] @ Set timer 0 val to 0
mov r3, #0x84 @ Timer start with count up
strh r0, [r1, #4] @ Set timer 1 val to 0
ldr r2, =0x1000A000 @ SHA regs
strh r3, [r1, #6] @ Start count up timer 1
mov r12, #0x80
mov r3, #5
strh r12, [r1, #2] @ Start timer 0
str r3, [r2] @ Start SHA
mov r12, #0xA
mcr p15, 0, r0, c7, c0, 4 @ wfi
measureShaDma_last_round_lp:
ldr r3, [r2] @ Get SHA_CNT
tst r3, #1
bne measureShaDma_last_round_lp
str r12, [r2] @ SHA final round
measureShaDma_final_round_lp:
ldr r12, [r2] @ Get SHA_CNT
tst r12, #1
bne measureShaDma_final_round_lp
strh r0, [r1, #2] @ Stop timer 0
strh r0, [r1, #6] @ Stop timer 1
mrs r2, cpsr
bic r2, r2, #1u<<7
msr cpsr_c, r2
ldrh r0, [r1, #0] @ Get timer 0 val
ldrh r1, [r1, #4] @ Get timer 1 val
orr r0, r0, r1, lsl #16
bx lr
.pool
.align 2
.global measureShaCpu
.type measureShaCpu %function
@ u32 measureShaCpu(void);
measureShaCpu:
stmfd sp!, {r4-r8}
ldr r4, =0xAABBCDEF
mov r5, r4
mov r6, r4
mov r7, r4
mrs r3, cpsr
mov r0, #0
orr r3, r3, #1u<<7
ldr r1, =0x10003000 @ Timer regs
msr cpsr_c, r3
strh r0, [r1, #2] @ Stop timer 0
strh r0, [r1, #6] @ Stop timer 1
strh r0, [r1, #0] @ Set timer 0 val to 0
mov r3, #0x84 @ Timer start with count up
strh r0, [r1, #4] @ Set timer 1 val to 0
ldr r2, =0x1000A000 @ SHA regs
strh r3, [r1, #6] @ Start count up timer 1
mov r12, #0x80
mov r3, #5
strh r12, [r1, #2] @ Start timer 0
str r3, [r2] @ Start SHA
add r12, r2, #0x80 @ SHA input FIFO
mov r3, #0x800
measureShaCpu_block_lp:
stm r12, {r4-r7}
stm r12, {r4-r7}
stm r12, {r4-r7}
stm r12, {r4-r7}
measureShaCpu_round_lp:
ldr r8, [r2] @ Get SHA_CNT
tst r8, #1
bne measureShaCpu_round_lp
subs r3, r3, #1
bne measureShaCpu_block_lp
mov r12, #0xA
str r12, [r2] @ SHA final round
measureShaCpu_final_round_lp:
ldr r12, [r2] @ Get SHA_CNT
tst r12, #1
bne measureShaCpu_final_round_lp
strh r0, [r1, #2] @ Stop timer 0
strh r0, [r1, #6] @ Stop timer 1
mrs r2, cpsr
bic r2, r2, #1u<<7
msr cpsr_c, r2
ldrh r0, [r1, #0] @ Get timer 0 val
ldrh r1, [r1, #4] @ Get timer 1 val
orr r0, r0, r1, lsl #16
ldmfd sp!, {r4-r8}
bx lr
.pool
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment