Skip to content

Instantly share code, notes, and snippets.

@salkinium
Last active April 17, 2021 08:21
Show Gist options
  • Save salkinium/001beaad049b3a58ac21be20b2177398 to your computer and use it in GitHub Desktop.
Save salkinium/001beaad049b3a58ac21be20b2177398 to your computer and use it in GitHub Desktop.
20 port parallel UART via timer-triggered DMA transfers from/to GPIO
/*
* Copyright (c) 2020, Niklas Hauser
*
* This file is part of the ELVA project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
// ----------------------------------------------------------------------------
#include "port_uart.hpp"
#include <modm/architecture/driver/atomic/queue.hpp>
#include <bit>
using namespace modm::platform;
using RxTim = Timer8;
using TxTim = Timer1;
template< class... Gpios >
class GpioSetPublic : public GpioSet<Gpios...>
{
public:
static constexpr uint16_t
mask(uint8_t id) { return GpioSet<Gpios...>::mask(id); }
};
// Receiver Pins
using RxPins = GpioSetPublic
<
// GpioA3, // => UART2
GpioB3,
GpioA8,
// GpioA10, // => UART1
// GpioC7, // => UART6
GpioA7,
GpioB9,
GpioC9,
GpioC8,
GpioA11,
GpioB15,
GpioB5,
GpioB1,
GpioC1,
// GpioA1, // => UART4
// GpioD2, // => UART5
// GpioC11, // => UART3
GpioC14,
GpioC2,
GpioC3
>;
/* Squashing all GPIO masks into one mask:
0000100110000000 << 13
1000001000101010 << 0
0100001100001110 << 9
1001100000000000000000000
0000000001000001000101010
0100001100001110000000000
1101101101001111000101010
*/
static constexpr uint16_t portMasks[] = {RxPins::mask(0), RxPins::mask(1), RxPins::mask(2)};
static constexpr uint8_t portShifts[] = {13, 0, 9};
static constexpr uint8_t CHANNEL_COUNT{RxPins::width};
static constexpr uint32_t CHANNEL_MASK{
(portMasks[0] << portShifts[0]) |
(portMasks[1] << portShifts[1]) |
(portMasks[2] << portShifts[2])
};
static constexpr uint8_t CHANNEL_WIDTH{modm::leftmostBit(CHANNEL_MASK)};
static_assert(std::popcount(CHANNEL_MASK) == CHANNEL_COUNT, "PortMasks are overlapping!");
template<class IO> constexpr uint8_t _pos()
{
uint8_t channel{0};
const uint8_t pos = portShifts[uint8_t(IO::port)] + IO::pin;
for (uint8_t bit{0}; bit < pos; bit++)
if (CHANNEL_MASK & (1ul << bit)) channel++;
return channel;
}
// Compact version for the received data queues
template<class TX, class RX> constexpr uint16_t _p()
{ return (portShifts[uint8_t(RX::port)] + RX::pin) << 8 | (uint8_t(TX::port) << 4) | TX::pin; }
struct PortPins
{
const uint16_t data;
uint8_t tx_port() const { return (data & 0x70) >> 4; }
uint8_t tx_pin() const { return data & 0xf; }
uint8_t rx_pos() const { return data >> 8; }
uint8_t uart() const { return (data & 0x80) ? (data & 0xf) >> 8 : 0; }
};
static constexpr PortPins ports[] =
{
0x82,
_p<GpioC4, GpioB3>(),
_p<GpioB10, GpioA8>(),
0x81,
0x86,
_p<GpioB6, GpioA7>(),
_p<GpioA6, GpioB9>(),
_p<GpioB8, GpioC9>(),
_p<GpioC5, GpioC8>(),
_p<GpioA12, GpioA11>(),
_p<GpioB14, GpioB15>(),
_p<GpioB4, GpioB5>(),
_p<GpioB2, GpioB1>(),
_p<GpioB0, GpioC1>(),
0x84,
0x85,
0x83,
_p<GpioB7, GpioC14>(),
_p<GpioC15, GpioC2>(),
_p<GpioC0, GpioC3>()
};
static constexpr size_t SAMPLE_BUFFER = 64;
static constexpr size_t RESERVED = 11;
static constexpr size_t SAMPLES = SAMPLE_BUFFER - RESERVED;
struct DmaRxPort
{
DMA_Stream_TypeDef *stream;
GPIO_TypeDef *gpio;
uint8_t channel_prio;
uint8_t irq = 0;
uint8_t irq_prio = 0;
modm_aligned(2) uint16_t rx_buffer0[SAMPLES] = {};
modm_aligned(2) uint16_t rx_buffer1[SAMPLES] = {};
public:
void
initialize()
{
if (irq) {
NVIC_SetPriority(IRQn_Type(irq), irq_prio);
NVIC_EnableIRQ(IRQn_Type(irq));
}
stream->PAR = uint32_t(&gpio->IDR);
stream->M0AR = uint32_t(rx_buffer0);
stream->M1AR = uint32_t(rx_buffer1);
stream->NDTR = SAMPLES;
stream->CR =
(7 << DMA_SxCR_CHSEL_Pos) |
(channel_prio << DMA_SxCR_PL_Pos) |
(0b01 << DMA_SxCR_MSIZE_Pos) |
(0b01 << DMA_SxCR_PSIZE_Pos) |
DMA_SxCR_MINC |
(0b00 << DMA_SxCR_DIR_Pos) |
DMA_SxCR_CIRC |
DMA_SxCR_DBM;
if (irq) stream->CR |= DMA_SxCR_TCIE;
stream->CR |= DMA_SxCR_EN;
}
uint16_t*
getBuffer()
{
return (stream->CR & DMA_SxCR_CT) ? rx_buffer0 : rx_buffer1;
}
};
struct DmaTxPort
{
// 1 start bit, 8 data bits, 1 parity bit, 1 stop bit
static const size_t BufferSize{11};
static const size_t TransferSize{BufferSize};
DMA_Stream_TypeDef *const stream;
GPIO_TypeDef *const gpio;
const uint8_t channel_prio;
modm_aligned(4) uint32_t buffer[BufferSize] = {};
public:
void
initialize()
{
stream->PAR = uint32_t(&gpio->BSRR);
stream->M0AR = uint32_t(buffer);
stream->NDTR = TransferSize;
stream->CR =
(6 << DMA_SxCR_CHSEL_Pos) |
(channel_prio << DMA_SxCR_PL_Pos) |
(0b10 << DMA_SxCR_MSIZE_Pos) |
(0b10 << DMA_SxCR_PSIZE_Pos) |
DMA_SxCR_MINC |
(0b01 << DMA_SxCR_DIR_Pos);
reset();
}
void
enable()
{ stream->CR |= DMA_SxCR_EN; }
void
reset()
{ memset(buffer, 0, sizeof(buffer)); }
void
setByte(uint8_t pin, uint8_t byte)
{
const uint32_t high = 1ul << pin;
const uint32_t low = high << 16;
// start bit
buffer[0] |= low;
// data part
uint8_t parity{0};
for (uint8_t bit{1}; bit <= 8; bit++, byte >>= 1)
{
if (byte & 1) { buffer[bit] |= high; parity++; }
else buffer[bit] |= low;
}
// parity bit
buffer[9] |= (parity & 1) ? high : low;
// stop bit
buffer[10] |= high;
}
};
static DmaRxPort dmas_rx[] =
{
{DMA2_Stream2, GPIOA, 0b01}, // TIM8_CH1
{DMA2_Stream3, GPIOB, 0b01}, // TIM8_CH2
{DMA2_Stream7, GPIOC, 0b01, DMA2_Stream7_IRQn, 15}, // TIM8_CH3
};
static DmaTxPort dmas_tx[] =
{
{DMA2_Stream1, GPIOA, 0b00}, // TIM1_CH1
{DMA2_Stream4, GPIOB, 0b00}, // TIM1_CH4
{DMA2_Stream6, GPIOC, 0b00}, // TIM1_CH3
};
// ----------------------------------------------------------------------------
static modm_fastdata uint64_t stream[CHANNEL_WIDTH];
static modm_fastdata modm::atomic::Queue<uint16_t, 32> data_rx[CHANNEL_WIDTH];
static modm_fastdata uint32_t has_error{0};
static modm_fastdata volatile uint32_t active_mask{0};
static modm_fastdata volatile uint32_t enable_mask{0};
void
process_rx()
{
// Convert the parallel samples into bit streams
uint16_t *const buffers[] =
{dmas_rx[0].getBuffer(), dmas_rx[1].getBuffer(), dmas_rx[2].getBuffer()};
for (size_t sample{0}; sample < SAMPLES; sample++)
{
uint32_t bit_mask{0};
// squash the GPIO masks into one value
for (size_t ii{0}; ii < 3; ii++)
bit_mask |= uint32_t(buffers[ii][sample] & portMasks[ii]) << portShifts[ii];
// convert the sample into a channel
// Each channel has network order! (requires bit reverse on the payload!)
for (size_t bit{0}; bit < CHANNEL_WIDTH; bit++, bit_mask >>= 1)
stream[bit] = (stream[bit] << 1) | (bit_mask & 1);
}
// Search the bit stream for a received byte
for (uint32_t bit{0}, mask{active_mask}; bit < CHANNEL_WIDTH; bit++, mask >>= 1)
{
if (not (mask & 1)) continue;
while(1)
{
// find start bit in stream
uint8_t pos = __CLZ(~uint32_t(stream[bit] >> 32));
if (pos == 32) pos += __CLZ(~uint32_t(stream[bit]));
// Do not bother with incomplete bytes
if (pos > SAMPLES) break;
// store the received data: sddd'dddd'dpt ... 53bit
data_rx[bit].push(uint16_t(stream[bit] >> (SAMPLES-pos)));
// mask out the received data
stream[bit] |= (0xffe0'0000'0000'0000ull >> pos);
}
}
}
static modm_fastdata uint64_t cyccnt;
static modm_fastdata uint32_t call_count;
// ~150us = 24-27% CPU utilization
MODM_ISR(DMA2_Stream7)
{
const uint32_t start = DWT->CYCCNT;
DMA2->LIFCR = DMA_LIFCR_CTCIF3 | DMA_LIFCR_CTCIF2; // Stream 3 and 2
DMA2->HIFCR = DMA_HIFCR_CTCIF7; // Stream 7
process_rx();
cyccnt += DWT->CYCCNT - start;
call_count++;
}
namespace elva::signal
{
uint64_t& PortUart::cycles() { return cyccnt; }
uint32_t& PortUart::calls() { return call_count; }
bool
PortUart::read(uint8_t port, uint8_t &byte)
{
if (port >= 20 or not (enable_mask & (1ul << port))) return false;
switch(ports[port].uart())
{
case 1: return Usart1::read(byte);
case 2: return Usart2::read(byte);
case 3: return Usart3::read(byte);
case 4: return Uart4::read(byte);
case 5: return Uart5::read(byte);
case 6: return Usart6::read(byte);
default: break;
}
const uint8_t bit = ports[port].rx_pos();
if (data_rx[bit].isNotEmpty())
{
const uint16_t frame = data_rx[bit].get();
data_rx[bit].pop();
// check correct start and stop bit
if ((frame & 0b1'0000'0000'01) == 0b0'0000'0000'01)
{
const uint8_t raw_data = uint8_t(frame >> 2);
// check parity
if (bool(modm::bitCount(raw_data) & 1) == bool(frame & 0b10))
{
// undo network order
byte = modm::bitReverse(raw_data);
return true;
}
}
has_error |= (1ul << port);
}
return false;
}
bool
PortUart::hasError(uint8_t port)
{ return has_error & (1ul << port); }
void
PortUart::clearError(uint8_t port)
{ has_error &= ~(1ul << port); }
void
PortUart::discardReceiveBuffer(uint8_t port)
{
const uint8_t bit = ports[port].rx_pos();
while(data_rx[bit].isNotEmpty()) data_rx[bit].pop();
}
size_t
PortUart::receiveBufferSize(uint8_t port)
{
const uint8_t bit = ports[port].rx_pos();
return data_rx[bit].getSize();
}
static uint8_t tx_status{0};
bool check_finished()
{
if (DMA2->HISR & DMA_HISR_TCIF6)
{
DMA2->LIFCR = DMA_LIFCR_CTCIF1;
DMA2->HIFCR = DMA_HIFCR_CTCIF6 | DMA_HIFCR_CTCIF4;
for (auto &dma : dmas_tx) dma.reset();
tx_status = 0;
return true;
}
return false;
}
bool
PortUart::write(uint8_t port, uint8_t byte)
{
if (port >= 20 or not (enable_mask & (1ul << port))) return false;
switch(ports[port].uart())
{
case 1: return Usart1::write(byte);
case 2: return Usart2::write(byte);
case 3: return Usart3::write(byte);
case 4: return Uart4::write(byte);
case 5: return Uart5::write(byte);
case 6: return Usart6::write(byte);
default: break;
}
if (tx_status == 2 and not check_finished()) return false;
dmas_tx[ports[port].tx_port()].setByte(ports[port].tx_pin(), byte);
tx_status = 1;
return true;
}
void
PortUart::update()
{
if (tx_status == 1)
{
tx_status = 2;
for (auto &dma : dmas_tx) dma.enable();
TIM1->RCR = DmaTxPort::TransferSize - 1;
TxTim::setValue(1520);
TxTim::applyAndReset();
TxTim::start();
}
else check_finished();
}
// ----------------------------------------------------------------------------
void
PortUart::initializeTimers()
{
// RxPins::setInput();
// TxPins::setOutput(modm::Gpio::High);
Rcc::enable<Peripheral::Dma2>();
for (auto &dma : dmas_rx) dma.initialize();
for (auto &dma : dmas_tx) dma.initialize();
RxTim::enable();
RxTim::setMode(RxTim::Mode::UpCounter);
TIM8->DIER = TIM_DIER_CC1DE | TIM_DIER_CC2DE | TIM_DIER_CC4DE;
RxTim::setPrescaler(1);
RxTim::setOverflow(1563); // 180MHz / 115200 = 1563
RxTim::applyAndReset();
RxTim::start();
TxTim::enable();
TxTim::setMode(TxTim::Mode::OneShotUpCounter);
TIM1->DIER = TIM_DIER_CC1DE | TIM_DIER_CC3DE | TIM_DIER_CC4DE;
TxTim::configureOutputChannel(1, TxTim::OutputCompareMode::Pwm2, 1);
TxTim::configureOutputChannel(3, TxTim::OutputCompareMode::Pwm2, 1);
TxTim::configureOutputChannel(4, TxTim::OutputCompareMode::Pwm2, 1);
TxTim::setPrescaler(1);
TxTim::setOverflow(1563); // 180MHz / 115200 = 1563
TxTim::applyAndReset();
}
bool
PortUart::enable(uint8_t port)
{
switch(port)
{
case 0: Usart2::connect<GpioA3::Rx, GpioA2::Tx>(); return true;
case 1: GpioB3::setInput(); GpioC4::setOutput(1); break;
case 2: GpioA8::setInput(); GpioB10::setOutput(1); break;
case 3: Usart1::connect<GpioA10::Rx, GpioA9::Tx>(); return true;
case 4: Usart6::connect<GpioC7::Rx, GpioC6::Tx>(); return true;
case 5: GpioA7::setInput(); GpioB6::setOutput(1); break;
case 6: GpioB9::setInput(); GpioA6::setOutput(1); break;
case 7: GpioC9::setInput(); GpioB8::setOutput(1); break;
case 8: GpioC8::setInput(); GpioC5::setOutput(1); break;
case 9: GpioA11::setInput(); GpioA12::setOutput(1); break;
case 10: GpioB15::setInput(); GpioB14::setOutput(1); break;
case 11: GpioB5::setInput(); GpioB4::setOutput(1); break;
case 12: GpioB1::setInput(); GpioB2::setOutput(1); break;
case 13: GpioC1::setInput(); GpioB0::setOutput(1); break;
case 14: Uart4::connect<GpioA1::Rx, GpioA0::Tx>(); return true;
case 15: Uart5::connect<GpioD2::Rx, GpioC12::Tx>(); return true;
case 16: Usart3::connect<GpioC11::Rx, GpioC10::Tx>(); return true;
case 17: GpioC14::setInput(); GpioB7::setOutput(1); break;
case 18: GpioC2::setInput(); GpioC15::setOutput(1); break;
case 19: GpioC3::setInput(); GpioC0::setOutput(1); break;
default: return false;
}
enable_mask |= (1ul << port);
active_mask |= (1ul << ports[port].rx_pos());
if (active_mask) RxTim::start();
return true;
}
bool
PortUart::disable(uint8_t port)
{
switch(port)
{
case 0: GpioA3::disconnect(); GpioA2::disconnect(); return true;
case 1: GpioB3::setInput(); GpioC4::setInput(); break;
case 2: GpioA8::setInput(); GpioB10::setInput(); break;
case 3: GpioA10::disconnect(); GpioA9::disconnect(); return true;
case 4: GpioC7::disconnect(); GpioC6::disconnect(); return true;
case 5: GpioA7::setInput(); GpioB6::setInput(); break;
case 6: GpioB9::setInput(); GpioA6::setInput(); break;
case 7: GpioC9::setInput(); GpioB8::setInput(); break;
case 8: GpioC8::setInput(); GpioC5::setInput(); break;
case 9: GpioA11::setInput(); GpioA12::setInput(); break;
case 10: GpioB15::setInput(); GpioB14::setInput(); break;
case 11: GpioB5::setInput(); GpioB4::setInput(); break;
case 12: GpioB1::setInput(); GpioB2::setInput(); break;
case 13: GpioC1::setInput(); GpioB0::setInput(); break;
case 14: GpioA1::disconnect(); GpioA0::disconnect(); return true;
case 15: GpioD2::disconnect(); GpioC12::disconnect(); return true;
case 16: GpioC11::disconnect(); GpioC10::disconnect(); return true;
case 17: GpioC14::setInput(); GpioB7::setInput(); break;
case 18: GpioC2::setInput(); GpioC15::setInput(); break;
case 19: GpioC3::setInput(); GpioC0::setInput(); break;
default: return false;
}
enable_mask &= ~(1ul << port);
active_mask &= ~(1ul << ports[port].rx_pos());
if (not active_mask) RxTim::pause();
return true;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment