Skip to content

Instantly share code, notes, and snippets.

@mzero
Created December 26, 2021 05:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mzero/89955e14d41d7e37a439ba806746f632 to your computer and use it in GitHub Desktop.
Save mzero/89955e14d41d7e37a439ba806746f632 to your computer and use it in GitHub Desktop.
Sketch for Curcuit Playground Express board demonstrating bug in FreeTouch library
/* DMA Stall
This sketch will demonstrate how the FreeTrouch library code makes reads to
PTC peripheral registers without the required Read-Synchronization. While
such reads succeed, they stall the AHB-APB Bridge C.
This is normally not an issue - but if there are other accesses going through
that bridge, then they will be stalled as well. A common example is to use the
DMAC to transfer data to the DAC or from the ADC. These peripherals are on
the same bridge (C). Stalls caused by non-scyhcronized reading seem to be on
the order of 185µs per read. Even if the DMAC is being used to transfer data
at slow audio rates, say 48kHz, This stall is enough to drop 8 samples. Since
FreeTrouch makes such reads in a spin loop waiting for the PTC conversion to
be ready, it causes significant distortion to audio data.
This bridge stall can be induced with any peripheral on the C bridge that has
a Read Synchronized register. To induce the stall:
a) the peripheral needs to be clocked with a clock slower than GCLK0
b) make the register read without first performing the READREQ sequence
Note that if the peripheral is clocked via GLCK0 - as most are by default -
then the read made without the READREQ sequence will still stall the bridge,
but it will be for a very short time (docs. imply <8 clocks). This won't
materially affect DMA audio.
The PTC must be clocked at 4MHz, and this is achieved by setting GCLK1
to use the 8MHz clock source and dividing down. FreeTouch sets this up.
We can demonstrate that any peripheral's Read Synchronized register can cause
this by clocking TC3 with the same clock.
------
This sketch will generate a simple triangle wavform out the DAC via DMA. Then
pressing the A button will cycle between these actions performed every 50ms:
1) do nothing
2) make a touch reading via readCap()
3) make 20 reads to a PTC Read-Synchronized register
4~7) make 20 reads to a TC3 Read-Synchronized register as:
4) TC3 clocked slow like PTC, no read request
5) TC3 clocked slow like PTC, with read request
6) TC3 clocked fast with GCLK0, no read request
7) TC3 clocked fast with GCLK0, with read request
Options 2, 3, and 4 will all cause audible distortion in the audio.
------
N.B.: Sketch waits for serial monitor
*/
#include <Adafruit_CircuitPlayground.h>
#include <Adafruit_ZeroDMA.h>
#include <initializer_list>
// *** Forward delcarations
namespace Action {
void setup();
void perform();
void selectNext();
}
namespace Audio {
void setup();
}
// ** Main routines
void setup() {
CircuitPlayground.begin();
Serial.begin(115200);
while (!Serial) ;
Serial.println("DMA Stall Demonstration");
Serial.println(" Button A/Left/D4: Change the 50ms loop action");
Serial.println(" Button B/Right/D5: Toggle speaker on/off");
Serial.println();
Action::setup();
Audio::setup();
}
void loop() {
if (CircuitPlayground.leftButton()) {
Action::selectNext();
while (CircuitPlayground.leftButton())
delay(20);
}
if (CircuitPlayground.rightButton()) {
static bool speakerOn = true;
speakerOn = !speakerOn;
#ifdef ADAFRUIT_CIRCUITPLAYGROUND_M0
pinMode(11, OUTPUT);
digitalWrite(11, speakerOn ? HIGH : LOW);
#endif
while (CircuitPlayground.rightButton())
delay(20);
}
static long next_action_time = 0;
const long action_period = 50;
long now = millis();
if (now > next_action_time) {
next_action_time += action_period;
if (next_action_time <= now) next_action_time = now + action_period;
Action::perform();
}
}
//
// ============= ACTIONS =============
//
// These are the seven different things the main loop() can do periodically
// In theory, you'd want none of these to affect the DMA based audio at all,
// since the DMA audio should happen all in the background. However, some of
// these possible actions stall the bridge, and cause DMA to stutter.
//
namespace Action {
enum action_t {
ACT_BEGIN,
ACT_NONE = ACT_BEGIN,
ACT_READCAP,
ACT_PTC_READ,
ACT_TC3_READ_SLOW_NOSYNC,
ACT_TC3_READ_SLOW_SYNC,
ACT_TC3_READ_FAST_NOSYNC,
ACT_TC3_READ_FAST_SYNC,
ACT_END
};
bool isTC3Fast(action_t a) {
return a == ACT_TC3_READ_FAST_NOSYNC || a == ACT_TC3_READ_FAST_SYNC;
}
bool isTC3Sync(action_t a) {
return a == ACT_TC3_READ_SLOW_SYNC || a == ACT_TC3_READ_FAST_SYNC;
}
inline void syncTC3() { while (TC3->COUNT16.STATUS.bit.SYNCBUSY) ; }
inline void syncGCLK() { while (GCLK->STATUS.bit.SYNCBUSY) ; }
void setupTC3Clock(bool fast) {
PM->APBCMASK.bit.TC3_ = 1; // power timer
TC3->COUNT16.CTRLA.reg &= ~TC_CTRLA_ENABLE; // disable timer
syncTC3();
GCLK->CLKCTRL.reg = GCLK_CLKCTRL_ID(GCM_TCC2_TC3); // disable the clock
syncGCLK();
GCLK->CLKCTRL.reg = (uint16_t)( // reenable clock
GCLK_CLKCTRL_CLKEN
| (fast ? GCLK_CLKCTRL_GEN_GCLK0 : GCLK_CLKCTRL_GEN_GCLK1)
| GCLK_CLKCTRL_ID(GCM_TCC2_TC3)
);
syncGCLK();
TC3->COUNT16.CTRLA.reg = TC_CTRLA_SWRST; // reset timer
syncTC3();
TC3->COUNT16.CTRLA.reg |= TC_CTRLA_ENABLE; // enable timer
syncTC3();
}
uint16_t readTC3(bool sync) {
if (sync) {
// Make the Read Request synchronization
TC3->COUNT16.READREQ.reg = TC_READREQ_RREQ | TC_READREQ_ADDR(0x10);
syncTC3();
}
// Read the Read Synchronized register
return TC3->COUNT16.COUNT.reg;
}
void setupAction(action_t a) {
switch (a) {
case ACT_NONE:
Serial.println("Loop action: none, just playing audio");
break;
case ACT_READCAP:
Serial.println("Loop action: call capRead()");
break;
case ACT_PTC_READ:
Serial.println("Loop action: read the PTC->CAPTURE register");
break;
case ACT_TC3_READ_SLOW_NOSYNC:
case ACT_TC3_READ_SLOW_SYNC:
case ACT_TC3_READ_FAST_NOSYNC:
case ACT_TC3_READ_FAST_SYNC:
Serial.printf("Loop action: %s read from TC3, clocked %s\n",
isTC3Sync(a) ? "sync" : "no sync",
isTC3Fast(a) ? "fast" : "slow");
setupTC3Clock(isTC3Fast(a));
break;
defualt:
Serial.println("Loop action: unknown!");
}
}
action_t action;
void setup() {
action = ACT_BEGIN;
setupAction(action);
}
void selectNext() {
action = action_t((int)action + 1);
if (action == ACT_END) action = ACT_BEGIN;
setupAction(action);
}
void perform() {
switch (action) {
case ACT_NONE:
break;
case ACT_READCAP:
CircuitPlayground.readCap(A1);
break;
case ACT_PTC_READ:
for (int i = 20; i; --i)
QTOUCH_PTC->CONVCONTROL.reg;
break;
case ACT_TC3_READ_SLOW_NOSYNC:
case ACT_TC3_READ_SLOW_SYNC:
case ACT_TC3_READ_FAST_NOSYNC:
case ACT_TC3_READ_FAST_SYNC:
for (int i = 20; i; --i)
readTC3(isTC3Sync(action));
break;
default:
Serial.println("huh?");
break;
}
}
} // namespace Action
//
// ============= AUDIO =============
//
// This sets up a double buffered audio output to the DAC via DMA. After each
// buffer is sent, the DMA interrupt is used to refill the buffer just sent
// with the next set of samples. A simple decaying tone at 600Hz is generated
// every 1.5 seconds.
//
namespace Audio {
constexpr float SAMPLE_RATE_TARGET = 48000.0;
constexpr long SAMPLE_RATE_CPU_DIVISOR = F_CPU / (long)SAMPLE_RATE_TARGET;
constexpr float SAMPLE_RATE = (float)F_CPU / (float)SAMPLE_RATE_CPU_DIVISOR;
void setupTimer() {
// TC4 is used because it has a WO[] output mappable to a pin on the CPE
pinPeripheral(A7, PIO_TIMER);
// outputs the 1/2 the sample clock on A7, useful for an oscilliscope.
GCLK->CLKCTRL.reg = (uint16_t)(GCLK_CLKCTRL_CLKEN | GCLK_CLKCTRL_GEN_GCLK0 |
GCLK_CLKCTRL_ID(GCM_TC4_TC5));
while (GCLK->STATUS.bit.SYNCBUSY)
;
TC4->COUNT16.CTRLA.reg &= ~TC_CTRLA_ENABLE; // Disable TCx to config it
while (TC4->COUNT16.STATUS.bit.SYNCBUSY)
;
TC4->COUNT16.CTRLA.reg = // Configure timer counter
TC_CTRLA_MODE_COUNT16 | // 16-bit counter mode
TC_CTRLA_WAVEGEN_MFRQ | // Match Frequency mode
TC_CTRLA_PRESCALER_DIV1; // 1:1 Prescale
while (TC4->COUNT16.STATUS.bit.SYNCBUSY)
;
TC4->COUNT16.CC[0].reg = SAMPLE_RATE_CPU_DIVISOR - 1;
while (TC4->COUNT16.STATUS.bit.SYNCBUSY)
;
TC4->COUNT16.CTRLA.reg |= TC_CTRLA_ENABLE; // Re-enable TCx
while (TC4->COUNT16.STATUS.bit.SYNCBUSY)
;
}
using sample_t = uint16_t;
const int SAMPLE_BITS = 10; // DAC on SAM D21 is only 10 bits
const sample_t SAMPLE_ZERO = 1 << (SAMPLE_BITS - 1);
const sample_t SAMPLE_UNIT = SAMPLE_ZERO - 1;
const sample_t SAMPLE_POS_ONE = SAMPLE_ZERO + SAMPLE_UNIT;
const sample_t SAMPLE_NEG_ONE = SAMPLE_ZERO - SAMPLE_UNIT;
void setupDAC() {
analogWriteResolution(SAMPLE_BITS); // Let Arduino core initialize the DAC,
analogWrite(A0, SAMPLE_ZERO); // ain't nobody got time for that!
DAC->CTRLB.bit.REFSEL = 0; // VMAX = 1.0V
while (DAC->STATUS.bit.SYNCBUSY)
;
}
constexpr int TONE_QUARTER_PERIOD_SAMPLES = 20;
constexpr int TONE_PERIOD_SAMPLES = 4 * TONE_QUARTER_PERIOD_SAMPLES;
// integral so it is easy to see and measure on an oscilliscope
constexpr float TONE_FREQUENCY = SAMPLE_RATE / TONE_PERIOD_SAMPLES; // ~D5
constexpr int TONE_DURATION_SAMPLES = SAMPLE_RATE * 1.5;
void fillAudioSamples(sample_t* buf, int count) {
static int sample_num = 0;
for (int i = 0; i < count; ++i) {
int n = sample_num + i;
int c = n % TONE_PERIOD_SAMPLES; // sample number in the cycle
int q = c / TONE_QUARTER_PERIOD_SAMPLES; // which quarter cycle is it
int f = c % TONE_QUARTER_PERIOD_SAMPLES; // fraction through quarter
long v = SAMPLE_UNIT * f / TONE_QUARTER_PERIOD_SAMPLES; // ramp
switch (q) { // make a triangle wave
case 0: v = SAMPLE_ZERO + v; break; // 0 -> 1
case 1: v = SAMPLE_POS_ONE - v; break; // 0 <- 1
case 2: v = SAMPLE_ZERO - v; break; // -1 <- 0
case 3: v = SAMPLE_NEG_ONE + v; break; // -1 -> 0
}
v = v * (TONE_DURATION_SAMPLES - n) / TONE_DURATION_SAMPLES;
// apply an envelope to it
buf[i] = (sample_t)v;
}
sample_num = (sample_num + count) % TONE_DURATION_SAMPLES;
}
constexpr int SAMPLE_BUFFER_COUNT = 96;
sample_t sample_buffer_a[SAMPLE_BUFFER_COUNT];
sample_t sample_buffer_b[SAMPLE_BUFFER_COUNT];
void dmaDoneCallback(Adafruit_ZeroDMA*) {
static bool fill_a = true;
sample_t* buf = fill_a ? sample_buffer_a : sample_buffer_b;
fillAudioSamples(buf, SAMPLE_BUFFER_COUNT);
fill_a = !fill_a;
}
void setupDMA() {
static Adafruit_ZeroDMA dma;
dma.allocate();
dma.setTrigger(TC4_DMAC_ID_OVF);
dma.setAction(DMA_TRIGGER_ACTON_BEAT);
dma.setPriority(DMA_PRIORITY_3); // highest priority for DMAC
for ( auto& buf : { sample_buffer_a, sample_buffer_b }) {
fillAudioSamples(buf, SAMPLE_BUFFER_COUNT);
auto desc = dma.addDescriptor(
buf,
(void *)&DAC->DATABUF.reg,
SAMPLE_BUFFER_COUNT,
DMA_BEAT_SIZE_HWORD,
true,
false
);
desc->BTCTRL.bit.BLOCKACT = DMA_BLOCK_ACTION_INT;
// callback after each block, so we can refill its buffer
}
dma.loop(true);
dma.setCallback(dmaDoneCallback);
dma.startJob();
}
void setup() {
setupTimer();
setupDAC();
setupDMA();
}
} // namespace Audio
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment