Skip to content

Instantly share code, notes, and snippets.

@jedp
Last active January 9, 2024 05:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jedp/69c0f812d6b18041a01ec9587ab0b474 to your computer and use it in GitHub Desktop.
Save jedp/69c0f812d6b18041a01ec9587ab0b474 to your computer and use it in GitHub Desktop.
SPI DMA LED hacking

Controlling addressable LEDs with STM32 SPI + DMA

Overview

RGB5050 LEDs are programmed by sending a stream of 24-bit GRB packets, MSB first.

The logical 1s and 0s in the packets are encoded on a single line which is sent high or low with these timings:

  • 0 -> 0.3µs high ± 0.15µs, 0.9us low ± 0.15µs
  • 1 -> 0.6µs high ± 0.15µs, 0.6us low ± 0.15µs

With a SPI transfer rate of 5Mbps, we can encode 0 and 1 with the following timings:

  • 0 -> 0b100000 (0.2µs high, 1.0µs low)
  • 1 -> 0b111000 (0.6µs high, 0.6µs low)

These are well within spec.

Color encoding

I don't need 24-bit color on my LEDs. So I'm saving space by using an RGB565 color space.

A common way to convert RGB565 to RGB888 is simply to shift the most significant bits right or left to encode or decode. When I tried this, the results looked pretty stair-steppy. In particular, the transitions between the dimmer values were noticeably large.

So here are two lookup tables to map 5- and 6-bit color values to 8-bit log, which matches human visual perception much better.

uint8_t five_bit_log[32] = {
              0,  51,  80, 102, 118, 131, 143, 153,
            161, 169, 176, 182, 188, 194, 199, 204,
            208, 212, 216, 220, 224, 227, 230, 233,
            236, 239, 242, 245, 247, 250, 252, 255,
};

uint8_t six_bit_log[64] = {
              0,  42,  67,  85,  98, 110, 119, 127,
            135, 141, 147, 152, 157, 162, 166, 170,
            174, 177, 180, 184, 187, 189, 192, 195,
            197, 200, 202, 204, 206, 208, 211, 212,
            214, 216, 218, 220, 221, 223, 225, 226,
            228, 229, 231, 232, 233, 235, 236, 237,
            239, 240, 241, 242, 243, 245, 246, 247,
            248, 249, 250, 251, 252, 253, 254, 255,
};

Packing bits to ship out

The following function and lookup table take a pixels from a framebuffer of 16-bit RGB565 values and renders them into an 8-bit buffer of GRB values to ship out to the LED chain.

/*
 * Lookup table to convert 4-bit nibbles to 24-bit RGB pixel data.
 */
static uint32_t color_nibbles[16] = {
                0x820820, // 0b0000 -> 0b00000000100000100000100000100000
                0x820838, // 0b0001 -> 0b00000000100000100000100000111000
                0x820e20, // 0b0010 -> 000000000b100000100000111000100000
                0x820e38, // etc.
                0x838820,
                0x838838,
                0x838e20,
                0x838e38,
                0xe20820,
                0xe20838,
                0xe20e20,
                0xe20e38,
                0xe38820,
                0xe38838,
                0xe38e20,
                0xe38e38, // 0b1111 -> 0b00000000111000111000111000111000
};

int framebuffer_to_bits(const uint16_t *framebuf, uint8_t *bitbuf, int num_pixels) {
        uint8_t r, g, b;
        uint32_t r_high, r_low;
        uint32_t g_high, g_low;
        uint32_t b_high, b_low;

        for (int i = 0; i < num_pixels; i++) {
                // Convert frame to RGB888.
                r = five_bit_log[(framebuf[i] >> 11) & 0x1f];
                g = six_bit_log[(framebuf[i] >> 5) & 0x3f];
                b = five_bit_log[framebuf[i] & 0x1f];

                // Convert RGB888 to RGB5050 signals.
                r_high = color_nibbles[r >> 4];
                r_low  = color_nibbles[r & 0xf];
                g_high = color_nibbles[g >> 4];
                g_low  = color_nibbles[g & 0xf];
                b_high = color_nibbles[b >> 4];
                b_low  = color_nibbles[b & 0xf];

                // Fill bitbuffer.
                bitbuf[18 * i + 0]  = (g_high >> 16) & 0xff;
                bitbuf[18 * i + 1]  = (g_high >>  8) & 0xff;
                bitbuf[18 * i + 2]  = g_high;
                bitbuf[18 * i + 3]  = (g_low  >> 16) & 0xff;
                bitbuf[18 * i + 4]  = (g_low  >>  8) & 0xff;
                bitbuf[18 * i + 5]  = g_low;

                bitbuf[18 * i + 6]  = (r_high >> 16) & 0xff;
                bitbuf[18 * i + 7]  = (r_high >>  8) & 0xff;
                bitbuf[18 * i + 8]  = r_high;
                bitbuf[18 * i + 9]  = (r_low  >> 16) & 0xff;
                bitbuf[18 * i + 10] = (r_low  >>  8) & 0xff;
                bitbuf[18 * i + 11] = r_low;

                bitbuf[18 * i + 12] = (b_high >> 16) & 0xff;
                bitbuf[18 * i + 13] = (b_high >>  8) & 0xff;
                bitbuf[18 * i + 14] = b_high;
                bitbuf[18 * i + 15] = (b_low  >> 16) & 0xff;
                bitbuf[18 * i + 16] = (b_low  >>  8) & 0xff;
                bitbuf[18 * i + 17] = b_low;
        }

        return num_pixels;
}

DMA state machine

In the main loop, we start cirular DMA/SPI and repeatedly fill halves of a bitbuffer as packets are sent.

After the last packet of data is sent, we synchronously send some zeros. These are ignored by the RGBs, but have the important side-effect of causing the STM to leave MOSI low. Without writing zeros, the MOSI line stays high, which breaks the LED protocol.

#define FRAMEBUFFER_PIXELS (128)
#define BITBUFFER_PIXEL_CAPACITY (16)
#define BITBUFFER_TOTAL_BYTES ((RGB5050_PIXEL_BYTESIZE) * (BITBUFFER_PIXEL_CAPACITY))

enum {
        SPI_DMA_STOPPED,
        SPI_DMA_SENDING,
        SPI_DMA_ON_TX_HALF,
        SPI_DMA_BUF_FIRST_HALF_SENT,
        SPI_DMA_BUF_SECOND_HALF_SENT,
        SPI_DMA_DONE_SENDING,
        SPI_DMA_AWAIT_NEXT_TICK,
};

uint16_t framebuffer[FRAMEBUFFER_PIXELS] = { 0 };
uint8_t bitbuffer[BITBUFFER_TOTAL_BYTES] = { 0 };
uint8_t zeros[8] = { 0 };

int framebuffer_pixel_index = 0;
volatile int spi_dma_state = SPI_DMA_STOPPED;

Initially populate the frame buffer with something. For testing, I'm filling it with obvious values of single hues.

  uint8_t r, g, b;
  uint16_t p;
  for (uint8_t i = 0; i < 32; i++) {
          // RGB565 ...
          r = 0;
          g = 0;
          b = 0x1f;
          p = (r << 11) | (g << 5) | b;
          framebuffer[i] = p;
  }
  // etc. for all pixels

The simplified logic of the main loop state machine is:

  1. If STOPPED, fill the bitbuffer and start DMA.
  2. On DMA half-sent callback, refill the half of the buffer that was just sent.
  3. When all data bits have been sent, send a buffer full of zeros.
  4. Stop circular DMA.
  5. WAIT until some specified interval has elapsed (frame rate)
  6. Goto STOPPED
switch (spi_dma_state) {
case SPI_DMA_STOPPED:
        // Set timing flag pin.
        HAL_GPIO_WritePin(GPIO_Timing_Flag_GPIO_Port, GPIO_Timing_Flag_Pin, GPIO_PIN_SET);
        // Render to fill entire pixel bitbuffer (both halves).
        // Reset framebuffer_pixel_index to the next frame that will be rendered.
        framebuffer_pixel_index = framebuffer_to_bits(framebuffer, bitbuffer, BITBUFFER_PIXEL_CAPACITY);
        // Circular DMA will keep looping over BITBUFFER_TOTAL_BYTES bytes of the bitbuffer.
        HAL_SPI_Transmit_DMA(&hspi1, (uint8_t*) bitbuffer, BITBUFFER_TOTAL_BYTES);
        spi_dma_state = SPI_DMA_SENDING;
        break;

case SPI_DMA_SENDING:
        break;

case SPI_DMA_ON_TX_HALF:
        if (framebuffer_pixel_index == FRAMEBUFFER_PIXELS) {
                // All frames have been sent.
                spi_dma_state = SPI_DMA_DONE_SENDING;
        } else if (framebuffer_pixel_index % BITBUFFER_PIXEL_CAPACITY == 0) {
                // Sent second half of buffer. Back at the start of the buffer.
                spi_dma_state = SPI_DMA_BUF_SECOND_HALF_SENT;
        } else {
                // Completed sending the first half of the buffer.
                spi_dma_state = SPI_DMA_BUF_FIRST_HALF_SENT;
        }
        break;

case SPI_DMA_BUF_FIRST_HALF_SENT:
        // Re-render the first half of the bit buffer.
        framebuffer_pixel_index += framebuffer_to_bits(
                        framebuffer + framebuffer_pixel_index,
                        bitbuffer,
                        BITBUFFER_PIXEL_CAPACITY >> 1);
        spi_dma_state = SPI_DMA_SENDING;
        break;

case SPI_DMA_BUF_SECOND_HALF_SENT:
        // RE-render the second half of the bit buffer.
        framebuffer_pixel_index += framebuffer_to_bits(
                        (uint16_t*) framebuffer + framebuffer_pixel_index,
                        (uint8_t*) bitbuffer + (BITBUFFER_TOTAL_BYTES >> 1),
                        BITBUFFER_PIXEL_CAPACITY >> 1);
        spi_dma_state = SPI_DMA_SENDING;
        break;

case SPI_DMA_DONE_SENDING:
        // Stop circular DMA.
        // Note that we will lose a race condition with DMA and a few extra bits will be transmitted.
        // Circular DMA will also probably send a little extra garbage and leave MOSI high.
        // We don't care because we have already addressed all LEDs and the garbage will just
        // roll off into the ether.
        HAL_SPI_DMAStop(&hspi1);
        // Send zeros to make MOSI stay low.
        HAL_SPI_Transmit(&hspi1, zeros, 8, 1);
        // Reset timing pin.
        HAL_GPIO_WritePin(GPIO_Timing_Flag_GPIO_Port, GPIO_Timing_Flag_Pin, GPIO_PIN_RESET);
        spi_dma_state = SPI_DMA_AWAIT_NEXT_TICK;
        break;

case SPI_DMA_AWAIT_NEXT_TICK:
        // Control frame rate. This is where we would make sure we keep 30fps.
        if (HAL_GetTick() > lasttick + 1000) {
                lasttick = HAL_GetTick();
                spi_dma_state = SPI_DMA_STOPPED;
        }
        break;
}

The SPI callback sets the state to "half buffer sent".

This callback is invoked after either half of the buffer has been sent. So it's the only one we need.

void HAL_SPI_TxHalfCpltCallback(SPI_HandleTypeDef * hspi)
{
        spi_dma_state = SPI_DMA_ON_TX_HALF;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment