Skip to content

Instantly share code, notes, and snippets.

@ericfont
Last active December 13, 2022 06:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ericfont/4195abf303e1a39846176ae548c77a78 to your computer and use it in GitHub Desktop.
Save ericfont/4195abf303e1a39846176ae548c77a78 to your computer and use it in GitHub Desktop.
teensy QSPI PSRAM, comparing linear access speed with cache thrashing, based upon https://github.com/PaulStoffregen/teensy41_psram_memtest/blob/master/teensy41_psram_memtest.ino at 132 Mhz, but comparing linear with pathological
extern "C" uint8_t external_psram_size;
bool memory_ok = false;
uint32_t *memory_begin, *memory_end;
bool check_fixed_pattern(uint32_t pattern);
bool check_lfsr_pattern(uint32_t seed);
void setup()
{
while (!Serial) ; // wait
pinMode(13, OUTPUT);
uint8_t size = external_psram_size;
Serial.printf("EXTMEM Memory Test, %d Mbyte\n", size);
if (size == 0) return;
const float clocks[4] = {396.0f, 720.0f, 664.62f, 528.0f};
// set clock speed for PSRAM to 132 MHz
// select clock PLL2 with 528 Mhz
// set clock divider to 4
CCM_CBCMR &= ~(CCM_CBCMR_FLEXSPI2_PODF_MASK | CCM_CBCMR_FLEXSPI2_CLK_SEL_MASK); // clear settings
CCM_CBCMR |= (CCM_CBCMR_FLEXSPI2_PODF(3) | CCM_CBCMR_FLEXSPI2_CLK_SEL(3)); // 132 MHz
//CCM_CBCMR &= ~(0b111 << 29);
//CCM_CBCMR |= (0b011 << 29);
Serial.print(" FLEXSPI2_CLK_SEL = ");
Serial.println(((CCM_CBCMR >> 8) & 3),BIN);
Serial.print(" LPSPI_PODF = ");
Serial.println(((CCM_CBCMR >> 29) & 7),BIN);
const float frequency = clocks[(CCM_CBCMR >> 8) & 3] / (float)(((CCM_CBCMR >> 29) & 7) + 1);
Serial.printf(" CCM_CBCMR=%08X (%.1f MHz)\n", CCM_CBCMR, frequency);
memory_begin = (uint32_t *)(0x70000000);
memory_end = (uint32_t *)(0x70000000 + size * 1048576);
check_fixed_pattern_linear(0x5A698421);
check_fixed_pattern_in_cache(0x5A698421);
check_fixed_pattern_pathological(0x5A698421);
Serial.println("All memory tests passed :-)");
memory_ok = true;
}
bool fail_message(volatile uint32_t *location, uint32_t actual, uint32_t expected)
{
Serial.printf(" Error at %08X, read %08X but expected %08X\n",
(uint32_t)location, actual, expected);
return false;
}
// fill the entire RAM with a fixed pattern, then check it
bool check_fixed_pattern_linear(uint32_t pattern)
{
volatile uint32_t *p;
Serial.printf("linear addressing test with fixed pattern %08X: ", pattern);
unsigned long startmillis = millis();
for (p = memory_begin; p < memory_end; p++) {
*p = pattern;
}
unsigned long finishwritemillis = millis();
arm_dcache_flush_delete((void *)memory_begin,
(uint32_t)memory_end - (uint32_t)memory_begin);
unsigned long finishflushmillis = millis();
for (p = memory_begin; p < memory_end; p++) {
uint32_t actual = *p;
if (actual != pattern) return fail_message(p, actual, pattern);
}
unsigned long finishreadmillis = millis();
Serial.printf("Write %d milliseconds, flush %d milliseconds, read %d milliseconds\n", finishwritemillis - startmillis, finishflushmillis - finishwritemillis, finishreadmillis - finishflushmillis);
return true;
}
// same, but only for 32KB of memory already in cache
bool check_fixed_pattern_in_cache(uint32_t pattern)
{
volatile uint32_t *p;
const uint32_t nAccessPerRepeat = 2048; // The 32 KB cache can fit 8192 uint32s...but just do 8192/4=2048
uint32_t nRepeat = external_psram_size * 1048576 / 4 / nAccessPerRepeat; // instead of accessing entire memory, instead do same number of memory accesses but all within same 32-KB region.
Serial.printf("in cache addressing test with fixed pattern %08X: ", pattern);
unsigned long startmillis = millis();
for (uint32_t i=0; i<nRepeat; i++ ) {
for (p = memory_end - nAccessPerRepeat; p < memory_end; p++) {
*p = pattern;
}
}
unsigned long finishwritemillis = millis();
for (uint32_t i=0; i<nRepeat; i++ ) {
for (p = memory_end - nAccessPerRepeat; p < memory_end; p++) {
uint32_t actual = *p;
if (actual != pattern) return fail_message(p, actual, pattern);
}
}
unsigned long finishreadmillis = millis();
Serial.printf("Write %d milliseconds, read %d milliseconds\n", finishwritemillis - startmillis, finishreadmillis - finishwritemillis);
return true;
}
// like check_fixed_pattern_linear but tries to access memory by touching different 32-byte cache lines
bool check_fixed_pattern_pathological(uint32_t pattern)
{
volatile uint32_t *p;
Serial.printf("pathological addr test with fixed pattern %08X: ", pattern);
unsigned long startmillis = millis();
for (uint32_t offset = 0; offset < 8; offset++ ) {
for (p = memory_begin + offset; p < memory_end; p += 8) {
*p = pattern;
}
}
unsigned long finishwritemillis = millis();
arm_dcache_flush_delete((void *)memory_begin,
(uint32_t)memory_end - (uint32_t)memory_begin);
unsigned long finishflushmillis = millis();
for (uint32_t offset = 0; offset < 8; offset++ ) {
for (p = memory_begin + offset; p < memory_end; p += 8) {
uint32_t actual = *p;
if (actual != pattern) return fail_message(p, actual, pattern);
}
}
unsigned long finishreadmillis = millis();
Serial.printf("Write %d milliseconds, flush %d milliseconds, read %d milliseconds\n", finishwritemillis - startmillis, finishflushmillis - finishwritemillis, finishreadmillis - finishflushmillis);
return true;
}
void loop()
{
digitalWrite(13, HIGH);
delay(100);
if (!memory_ok) digitalWrite(13, LOW); // rapid blink if any test fails
delay(100);
}
@ericfont
Copy link
Author

EXTMEM Memory Test, 16 Mbyte
FLEXSPI2_CLK_SEL = 11
LPSPI_PODF = 11
CCM_CBCMR=75AE8304 (132.0 MHz)
linear addressing test with fixed pattern 5A698421: Write 456 milliseconds, flush 7 milliseconds, read 407 milliseconds
in cache addressing test with fixed pattern 5A698421: Write 14 milliseconds, read 21 milliseconds
pathological addr test with fixed pattern 5A698421: Write 6175 milliseconds, flush 7 milliseconds, read 3203 milliseconds

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment