Skip to content

Instantly share code, notes, and snippets.

@zephray
Last active March 18, 2022 03:11
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save zephray/cb9340d278ed2ab6eb47398d2ca29b3c to your computer and use it in GitHub Desktop.
Save zephray/cb9340d278ed2ab6eb47398d2ca29b3c to your computer and use it in GitHub Desktop.
Driving EL display with Raspberry Pi Pico's PIO
// Copyright 2021 Wenting Zhang <zephray@outlook.com>
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
#include <stdio.h>
#include "pico/stdlib.h"
#include "hardware/pio.h"
#include "hardware/pll.h"
#include "hardware/clocks.h"
#include "hardware/dma.h"
#include "hardware/irq.h"
#include "hardware/structs/pll.h"
#include "hardware/structs/clocks.h"
#include "eldata.pio.h"
#include "font.h"
const uint VSYNC_PIN = 2;
const uint HSYNC_PIN = 3;
const uint PIXCLK_PIN = 4;
// UD0-3: 5-8
const uint UD0_PIN = 5;
const uint UD1_PIN = 6;
const uint UD2_PIN = 7;
const uint UD3_PIN = 8;
// LD0-3: 9-12
const uint LD0_PIN = 9;
const uint LD1_PIN = 10;
const uint LD2_PIN = 11;
const uint LD3_PIN = 12;
// PIO related
#define EL_UDATA_SM (0)
#define EL_LDATA_SM (1)
PIO el_pio = pio0;
int el_udma_chan, el_ldma_chan;
#define EL_TARGET_PIXCLK (4500000)
#define SCR_WIDTH (640)
#define SCR_HEIGHT (480)
#define SCR_LINE_TRANSFERS (SCR_WIDTH / 4)
#define SCR_STRIDE (SCR_WIDTH / 8)
#define SCR_STRIDE_WORDS (SCR_WIDTH / 32)
#define SCR_REFRESH_LINES (SCR_HEIGHT / 2)
unsigned char framebuf[SCR_STRIDE * SCR_HEIGHT];
static void el_sm_load_reg(uint sm, enum pio_src_dest dst, uint32_t val) {
pio_sm_put_blocking(el_pio, sm, val);
pio_sm_exec(el_pio, sm, pio_encode_pull(false, false));
pio_sm_exec(el_pio, sm, pio_encode_out(dst, 32));
}
static void el_pio_irq_handler() {
uint32_t *rdptr_ud = (uint32_t *)framebuf;
uint32_t *rdptr_ld = (uint32_t *)(framebuf + SCR_STRIDE * SCR_HEIGHT / 2);
dma_channel_set_read_addr(el_udma_chan, rdptr_ud, false);
dma_channel_set_read_addr(el_ldma_chan, rdptr_ld, false);
pio_sm_set_enabled(el_pio, EL_UDATA_SM, false);
pio_sm_set_enabled(el_pio, EL_LDATA_SM, false);
pio_sm_clear_fifos(el_pio, EL_UDATA_SM);
pio_sm_clear_fifos(el_pio, EL_LDATA_SM);
pio_sm_restart(el_pio, EL_UDATA_SM);
pio_sm_restart(el_pio, EL_LDATA_SM);
// Load configuration values
el_sm_load_reg(EL_UDATA_SM, pio_y, SCR_REFRESH_LINES - 2);
el_sm_load_reg(EL_UDATA_SM, pio_isr, SCR_LINE_TRANSFERS - 1);
el_sm_load_reg(EL_LDATA_SM, pio_isr, SCR_LINE_TRANSFERS - 1);
// Setup DMA
dma_channel_start(el_udma_chan);
dma_channel_start(el_ldma_chan);
// Clear IRQ flag
el_pio->irq = 0x02;
// start SM
pio_enable_sm_mask_in_sync(el_pio,
(1u << EL_UDATA_SM) | (1u << EL_LDATA_SM));
}
void putpixel(unsigned char *buf, int x, int y, int c) {
if (c)
buf[SCR_STRIDE * y + x / 8] |= 1 << (x % 8);
else
buf[SCR_STRIDE * y + x / 8] &= ~(1 << (x % 8));
}
void putch(unsigned char *buf, int x, int y, char c) {
uint8_t *rdptr = &(Font_Ascii_5X7E[(c - 0x20) * 8]);
for (int yy = 0; yy < 7; yy++) {
uint8_t d = *rdptr++;
for (int xx = 0; xx < 5; xx++) {
putpixel(buf, x + xx, y + yy, (d & 0x80));
d <<= 1;
}
}
}
void putstr(unsigned char *buf, int x, int y, char *str) {
char c;
int xx = 0;
while (c = *str++) {
putch(buf, x + xx, y, c);
xx += 6;
}
}
static void el_sm_init() {
static uint udata_offset, ldata_offset;
for (int i = 0; i < 4; i++) {
pio_gpio_init(el_pio, UD0_PIN + i);
pio_gpio_init(el_pio, LD0_PIN + i);
}
pio_gpio_init(el_pio, PIXCLK_PIN);
pio_gpio_init(el_pio, HSYNC_PIN);
pio_gpio_init(el_pio, VSYNC_PIN);
pio_sm_set_consecutive_pindirs(el_pio, EL_UDATA_SM, UD0_PIN, 4, true);
pio_sm_set_consecutive_pindirs(el_pio, EL_UDATA_SM, PIXCLK_PIN, 1, true);
pio_sm_set_consecutive_pindirs(el_pio, EL_UDATA_SM, VSYNC_PIN, 1, true);
pio_sm_set_consecutive_pindirs(el_pio, EL_LDATA_SM, LD0_PIN, 4, true);
pio_sm_set_consecutive_pindirs(el_pio, EL_LDATA_SM, HSYNC_PIN, 1, true);
udata_offset = pio_add_program(el_pio, &el_udata_program);
ldata_offset = pio_add_program(el_pio, &el_ldata_program);
printf("EL USM offset: %d, EL LSM offset: %d\n", udata_offset, ldata_offset);
int cycles_per_pclk = 2;
float div = clock_get_hz(clk_sys) / (EL_TARGET_PIXCLK * cycles_per_pclk);
pio_sm_config cu = el_udata_program_get_default_config(udata_offset);
sm_config_set_sideset_pins(&cu, PIXCLK_PIN);
sm_config_set_out_pins(&cu, UD0_PIN, 4);
sm_config_set_set_pins(&cu, VSYNC_PIN, 1);
sm_config_set_fifo_join(&cu, PIO_FIFO_JOIN_TX);
sm_config_set_out_shift(&cu, true, true, 32);
sm_config_set_clkdiv(&cu, div);
pio_sm_init(el_pio, EL_UDATA_SM, udata_offset, &cu);
pio_sm_config cl = el_ldata_program_get_default_config(ldata_offset);
sm_config_set_set_pins(&cl, HSYNC_PIN, 1);
sm_config_set_out_pins(&cl, LD0_PIN, 4);
sm_config_set_fifo_join(&cl, PIO_FIFO_JOIN_TX);
sm_config_set_out_shift(&cl, true, true, 32);
sm_config_set_clkdiv(&cl, div);
pio_sm_init(el_pio, EL_LDATA_SM, ldata_offset, &cl);
el_pio->inte0 = PIO_IRQ0_INTE_SM1_BITS;
irq_set_exclusive_handler(PIO0_IRQ_0, el_pio_irq_handler);
irq_set_enabled(PIO0_IRQ_0, true);
}
static void el_dma_init() {
el_udma_chan = dma_claim_unused_channel(true);
dma_channel_config cu = dma_channel_get_default_config(el_udma_chan);
channel_config_set_transfer_data_size(&cu, DMA_SIZE_32);
channel_config_set_read_increment(&cu, true);
channel_config_set_write_increment(&cu, false);
channel_config_set_dreq(&cu, DREQ_PIO0_TX0 + EL_UDATA_SM);
dma_channel_configure(el_udma_chan, &cu, &el_pio->txf[EL_UDATA_SM], NULL, SCR_STRIDE_WORDS * SCR_REFRESH_LINES, false);
el_ldma_chan = dma_claim_unused_channel(true);
dma_channel_config cl = dma_channel_get_default_config(el_ldma_chan);
channel_config_set_transfer_data_size(&cl, DMA_SIZE_32);
channel_config_set_read_increment(&cl, true);
channel_config_set_write_increment(&cl, false);
channel_config_set_dreq(&cl, DREQ_PIO0_TX0 + EL_LDATA_SM);
dma_channel_configure(el_ldma_chan, &cl, &el_pio->txf[EL_LDATA_SM], NULL, SCR_STRIDE_WORDS * SCR_REFRESH_LINES, false);
}
int main()
{
stdio_init_all();
puts("Plannar EL demo\n");
memset(framebuf, 0x00, SCR_STRIDE * SCR_HEIGHT);
putstr(framebuf, 10, 10, "Hello, world");
for (int i = 0; i < 640; i++) {
putpixel(framebuf, i, 0, 1);
putpixel(framebuf, i, 479, 1);
}
for (int i = 0; i < 480; i++) {
putpixel(framebuf, 0, i, 1);
putpixel(framebuf, 639, i, 1);
}
el_sm_init();
el_dma_init();
puts("Start\n");
el_pio_irq_handler();
const uint LED_PIN = 25;
gpio_init(LED_PIN);
gpio_set_dir(LED_PIN, GPIO_OUT);
while (1) {
gpio_put(LED_PIN, 0);
sleep_ms(500);
gpio_put(LED_PIN, 1);
sleep_ms(500);
}
return 0;
}
; Copyright 2021 Wenting Zhang <zephray@outlook.com>
; License under MIT
; UDATA SM handles UD0-3, PCLK, and VSYNC
; PCLK is mapped to SIDE, VSYNC is mapped to SET, and UD0-3 are mapped to OUT
.program el_udata
.side_set 1
irq set 5 side 0
mov x, isr side 0
loop_first_line:
out pins, 4 side 1
jmp x-- loop_first_line side 0
end_first_line:
set pins, 1 [6] side 0
set pins, 0 [9] side 0
line_start:
irq set 5 side 0
mov x, isr side 0
loop:
out pins, 4 side 1 ; Output 4 bit data
jmp x-- loop side 0 ; Loop until x hits 0, then wait for next line
loop_end:
nop [15] side 0
jmp y-- line_start side 0
; end of frame, signal CPU
irq wait 1 side 0
; LDATA SM handles LD0-3 and HSYNC
; HSYNC is mapped to SET, and LD0-3 are mapped to OUT
.program el_ldata
; Signal UDATA SM to start outputting data
mov x, isr
wait irq 5
loop:
out pins, 4
jmp x-- loop
; toggle Hsync and signal Vsync SM
set pins, 1 [5]
set pins, 0 [10]
@Crest
Copy link

Crest commented Mar 18, 2022

This code can be further improved by using the RP2040 DMA engine channel chaining feature allowing an additional DMA channel to program the two already used in the current version of your program before restarting itself. This change would remove the CPU cores from the last remaining timing constraint I found in your code.

Increasing the frame buffer by at least one line would allow smooth scrolling by changing the DMA start address (probably best done by modifying the DMA control waiting for the vsynced DMA channel to reload the other two channels).

@zephray
Copy link
Author

zephray commented Mar 18, 2022

This code can be further improved by using the RP2040 DMA engine channel chaining feature allowing an additional DMA channel to program the two already used in the current version of your program before restarting itself. This change would remove the CPU cores from the last remaining timing constraint I found in your code.

Thanks for the suggestion. I have also considered that before, but I decided to leave that out for the following reasons:

  • In my application, I need an Vsync interrupt to swap buffers, so some CPU intervention at frame end is always required
  • Interrupt timing on microcontrollers are quite deterministic with correct priority settings, so timing accuracy isn't a concern for me.
  • The measured CPU usage for the interrupt is only 750ns per frame (125MHz core clock). This is negligible for my use case.
  • I am bit lazy to figured out the required PIO changes to reinitialize themselves

But you are right, it's possible to completely free the CPU core. And I do see it could be favorable:

  • If your system have something that has to be serviced immediately and couldn't afford to wait 750ns when it collide AND the screen also couldn't wait for that high priority thing to finish, it's necessary to eliminate this interrupt
  • If Vsync interrupt is not needed, then there is less reason to keep it.

If you have the code to demo that, it would be great if you could post here.

Increasing the frame buffer by at least one line would allow smooth scrolling by changing the DMA start address (probably best done by modifying the DMA control waiting for the vsynced DMA channel to reload the other two channels).

I have also implemented that in my application. In my case I increased the frame buffer by 32 lines, so I could smoothly scrolling through 8x16 text displays (might have 1 line text at the top and 1 line text at bottom, both only partially visible). The code are shown here: https://github.com/zephray/ELTerm/blob/master/firmware/el.c#L81 I was using 3 DMA channels to implement the smooth scrolling (2 chained DMA channels to handle address wrap-around at the end). I didn't found any way to do that with only 2 DMA channels back then, but I might be stupid. Suggestions welcomed

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment