Skip to content

Instantly share code, notes, and snippets.

@g3grau
Created October 6, 2023 19:51
Show Gist options
  • Save g3grau/7940a516ae68addd022b07fd2c5a05fe to your computer and use it in GitHub Desktop.
Save g3grau/7940a516ae68addd022b07fd2c5a05fe to your computer and use it in GitHub Desktop.
Fixing SPI Flash access in step22 (https://github.com/fm4dd/gatemate-riscv/issues/2)
### ----------------------------------------------------------- ###
### Local Makefile for building the RISC-V native application. ###
### Requires riscv-toolchain, linker script and firmware_words ###
### hex conversion program. Creates the 'firmware.hex' output ###
### for FPGA upload together with the bitstream. ###
### ###
### This program needs optimizer flag -O2 to fit into 6K BRAM. ###
### ----------------------------------------------------------- ###
TOOLCHAINDIR = /usr/cadtools/riscv-gnu-toolchain
RVLINKSCRIPT=/home/ggrau/projects/FPGAsrc/GateMate/CologneChip/gatemate-riscv/ldscripts-shared/bram.ld
# RVLINKSCRIPT=/home/ggrau/projects/FPGAsrc/GateMate/CologneChip/gatemate-riscv/ldscripts-shared/spiflash1.ld
FW_WORDS_DIR=$(TOOLCHAINDIR)/firmware_words
RV32I_LIBGCC=$(TOOLCHAINDIR)/build/riscv64_multilib/lib64/gcc/riscv64-unknown-elf/11.1.0/rv32i/ilp32/libgcc.a
ASFLAGS= -march=rv32i -mabi=ilp32 -mno-relax
LDFLAGS= -m elf32lriscv -nostdlib --no-relax -T $(RVLINKSCRIPT)
# -O2 for memory optimization
CFLAGS= -march=rv32i -mabi=ilp32 -fno-pic -fno-stack-protector -w -Wl,--no-relax -O2
# -O2
all: firmware.hex
# Step-3: Convert the RISCV elf binary into an readmem() formatted hex file
# -------------------------------------------------------------------------
# 1536*4=6144 -> GG: increased 4x to test gcc without -O2
# check with SOC.v reg[] MEM and start.S (sp)
# RAMSIZE=6144
RAMSIZE=24576
firmware.hex: ST_NICCC.bram.elf
$(FW_WORDS_DIR)/firmware_words $< -ram $(RAMSIZE) -max_addr $(RAMSIZE) -out $@
# Step-2: Link the object files into a RISCV elf binary
# -----------------------------------------------------
ST_NICCC.bram.elf: %.o
# $(TOOLCHAINDIR)/bin/riscv64-unknown-elf-ld start_spiflash1.o putchar.o print.o ST_NICCC.o $(RV32I_LIBGCC) $(LDFLAGS) -o $@
$(TOOLCHAINDIR)/bin/riscv64-unknown-elf-ld start.o putchar.o print.o wait.o ST_NICCC.o $(RV32I_LIBGCC) $(LDFLAGS) -o $@
# Step-1: build object files (.o) from assembler source files (.S)
# ----------------------------------------------------------------
%o:
$(TOOLCHAINDIR)/bin/riscv64-unknown-elf-as $(ASFLAGS) start.S -o start.o
$(TOOLCHAINDIR)/bin/riscv64-unknown-elf-as $(ASFLAGS) start_spiflash1.S -o start_spiflash1.o
$(TOOLCHAINDIR)/bin/riscv64-unknown-elf-as $(ASFLAGS) putchar.S -o putchar.o
$(TOOLCHAINDIR)/bin/riscv64-unknown-elf-as $(ASFLAGS) wait.S -o wait.o
$(TOOLCHAINDIR)/bin/riscv64-unknown-elf-gcc $(CFLAGS) -c print.c -o print.o
$(TOOLCHAINDIR)/bin/riscv64-unknown-elf-gcc $(CFLAGS) -c ST_NICCC.c -o ST_NICCC.o
clean:
rm -f *.o *.hex *.elf
.SECONDARY:
.PHONY: all clean
// -------------------------------------------------------
// soc.v FemtoRV Tutorial step22 SPI Flash @20230711 fm4dd
//
// Requires: Gatemate E1 eval board v3.1B
//
// Notes:
// Step 22 creates a separate RISC-V app written in 'C',
// located in the src folder. The app code is loaded
// into block ram, which requires the hex conversion with
// firmware_words. 'make' creates the firmware.hex app
// code before synthesis. We use the $readmemh() function
// to load the app. Additionally, application data is
// written into flash, and retrieved as ROM from the
// RISC-V program.
//
// Code is tested on a Gatemate E1 eval board v3.1B
// E1 onboard user button SW3 is assigned to RESET.
// LEDS[7:0] is assigned to E1 onboard Leds D1..D8.
// TXD/RXD is assigned to USB<->serial converter on PMODB.
//
// How to run: make ST_NICCC, make prog and make test
//
// GG: compiles with -O2 in 6k, but doesn't work in test or flash
// compiles without -O2 using 24k, but still doesn't work .. no output
// NOTE that this is not yet set up to use the Flash as storage!?
//
// GG: SPI signals don't work unless we probe them for LED output
// reg dbg with
//
// -------------------------------------------------------
`default_nettype none
`define CPU_FREQ 10 // RISCV CPU frequency in MHz
// `define ARTY // define board type for SPI mode
`define CCA1 // GG: same as ARTY, just simple SPI so far
module Memory (
input clk,
input [31:0] mem_addr, // address to be read
output reg [31:0] mem_rdata, // data read from memory
input mem_rstrb, // goes high when processor wants to read
input [31:0] mem_wdata, // data to be written
input [3:0] mem_wmask // masks for writing the 4 bytes (1=write byte)
);
// GG: increased 4x to test without gcc -O2
// reg [31:0] MEM [0:1535]; // 1536 4-bytes words = 6 Kb of RAM in total
reg [31:0] MEM [0:6143]; // 6912 4-bytes words = 24 Kb of RAM in total
initial begin
$readmemh("firmware.hex",MEM);
end
wire [10:0] word_addr = mem_addr[12:2];
always @(posedge clk) begin
if(mem_rstrb) begin
mem_rdata <= MEM[word_addr];
end
if(mem_wmask[0]) MEM[word_addr][ 7:0 ] <= mem_wdata[ 7:0 ];
if(mem_wmask[1]) MEM[word_addr][15:8 ] <= mem_wdata[15:8 ];
if(mem_wmask[2]) MEM[word_addr][23:16] <= mem_wdata[23:16];
if(mem_wmask[3]) MEM[word_addr][31:24] <= mem_wdata[31:24];
end
endmodule
module Processor (
input clk,
input resetn,
output [31:0] mem_addr,
input [31:0] mem_rdata,
input mem_rbusy,
output mem_rstrb,
output [31:0] mem_wdata,
output [3:0] mem_wmask
);
// Internal width for addresses.
localparam ADDR_WIDTH=24;
reg [ADDR_WIDTH:0] PC=0; // program counter
reg [31:2] instr; // current instruction
// See the table P. 105 in RISC-V manual
// The 10 RISC-V instructions
wire isALUreg = (instr[6:2] == 5'b01100); // rd <- rs1 OP rs2
wire isALUimm = (instr[6:2] == 5'b00100); // rd <- rs1 OP Iimm
wire isBranch = (instr[6:2] == 5'b11000); // if(rs1 OP rs2) PC<-PC+Bimm
wire isJALR = (instr[6:2] == 5'b11001); // rd <- PC+4; PC<-rs1+Iimm
wire isJAL = (instr[6:2] == 5'b11011); // rd <- PC+4; PC<-PC+Jimm
wire isAUIPC = (instr[6:2] == 5'b00101); // rd <- PC + Uimm
wire isLUI = (instr[6:2] == 5'b01101); // rd <- Uimm
wire isLoad = (instr[6:2] == 5'b00000); // rd <- mem[rs1+Iimm]
wire isStore = (instr[6:2] == 5'b01000); // mem[rs1+Simm] <- rs2
wire isSYSTEM = (instr[6:2] == 5'b11100); // special
// The 5 immediate formats
wire [31:0] Uimm={ instr[31], instr[30:12], {12{1'b0}}};
wire [31:0] Iimm={{21{instr[31]}}, instr[30:20]};
/* verilator lint_off UNUSED */ // MSBs of SBJimms are not used by addr adder.
wire [31:0] Simm={{21{instr[31]}}, instr[30:25],instr[11:7]};
wire [31:0] Bimm={{20{instr[31]}}, instr[7],instr[30:25],instr[11:8],1'b0};
wire [31:0] Jimm={{12{instr[31]}}, instr[19:12],instr[20],instr[30:21],1'b0};
/* verilator lint_on UNUSED */
// Destination registers
wire [4:0] rdId = instr[11:7];
// function codes
wire [2:0] funct3 = instr[14:12];
wire [6:0] funct7 = instr[31:25];
// The registers bank
reg [31:0] RegisterBank [0:31];
reg [31:0] rs1; // value of source
reg [31:0] rs2; // registers.
wire [31:0] writeBackData; // data to be written to rd
wire writeBackEn; // asserted if data should be written to rd
`ifdef BENCH
integer i;
initial begin
for(i=0; i<32; ++i) begin
RegisterBank[i] = 0;
end
end
`endif
// The ALU
wire [31:0] aluIn1 = rs1;
wire [31:0] aluIn2 = isALUreg | isBranch ? rs2 : Iimm;
wire [4:0] shamt = isALUreg ? rs2[4:0] : instr[24:20]; // shift amount
// The adder is used by both arithmetic instructions and JALR.
wire [31:0] aluPlus = aluIn1 + aluIn2;
// Use a single 33 bits subtract to do subtraction and all comparisons
// (trick borrowed from swapforth/J1)
wire [32:0] aluMinus = {1'b1, ~aluIn2} + {1'b0,aluIn1} + 33'b1;
wire LT = (aluIn1[31] ^ aluIn2[31]) ? aluIn1[31] : aluMinus[32];
wire LTU = aluMinus[32];
wire EQ = (aluMinus[31:0] == 0);
// Flip a 32 bit word. Used by the shifter (a single shifter for
// left and right shifts, saves silicium !)
function [31:0] flip32;
input [31:0] x;
flip32 = {x[ 0], x[ 1], x[ 2], x[ 3], x[ 4], x[ 5], x[ 6], x[ 7],
x[ 8], x[ 9], x[10], x[11], x[12], x[13], x[14], x[15],
x[16], x[17], x[18], x[19], x[20], x[21], x[22], x[23],
x[24], x[25], x[26], x[27], x[28], x[29], x[30], x[31]};
endfunction
wire [31:0] shifter_in = (funct3 == 3'b001) ? flip32(aluIn1) : aluIn1;
/* verilator lint_off WIDTH */
wire [31:0] shifter =
$signed({instr[30] & aluIn1[31], shifter_in}) >>> aluIn2[4:0];
/* verilator lint_on WIDTH */
wire [31:0] leftshift = flip32(shifter);
// ADD/SUB/ADDI:
// funct7[5] is 1 for SUB and 0 for ADD. We need also to test instr[5]
// to make the difference with ADDI
//
// SRLI/SRAI/SRL/SRA:
// funct7[5] is 1 for arithmetic shift (SRA/SRAI) and
// 0 for logical shift (SRL/SRLI)
reg [31:0] aluOut;
always @(*) begin
case(funct3)
3'b000: aluOut = (funct7[5] & instr[5]) ? aluMinus[31:0] : aluPlus;
3'b001: aluOut = leftshift;
3'b010: aluOut = {31'b0, LT};
3'b011: aluOut = {31'b0, LTU};
3'b100: aluOut = (aluIn1 ^ aluIn2);
3'b101: aluOut = shifter;
3'b110: aluOut = (aluIn1 | aluIn2);
3'b111: aluOut = (aluIn1 & aluIn2);
endcase
end
// The predicate for branch instructions
reg takeBranch;
always @(*) begin
case(funct3)
3'b000: takeBranch = EQ;
3'b001: takeBranch = !EQ;
3'b100: takeBranch = LT;
3'b101: takeBranch = !LT;
3'b110: takeBranch = LTU;
3'b111: takeBranch = !LTU;
default: takeBranch = 1'b0;
endcase
end
// Address computation
/* verilator lint_off WIDTH */
// An adder used to compute branch address, JAL address and AUIPC.
// branch->PC+Bimm AUIPC->PC+Uimm JAL->PC+Jimm
// Equivalent to PCplusImm = PC + (isJAL ? Jimm : isAUIPC ? Uimm : Bimm)
// Note: doing so with ADDR_WIDTH < 32, AUIPC may fail in
// some RISC-V compliance tests because one can is supposed to use
// it to generate arbitrary 32-bit values (and not only addresses).
wire [ADDR_WIDTH-1:0] PCplusImm = PC + ( instr[3] ? Jimm[31:0] :
instr[4] ? Uimm[31:0] :
Bimm[31:0] );
wire [ADDR_WIDTH-1:0] PCplus4 = PC+4;
wire [ADDR_WIDTH-1:0] nextPC = ((isBranch && takeBranch) || isJAL) ? PCplusImm :
isJALR ? {aluPlus[31:1],1'b0} :
PCplus4;
wire [ADDR_WIDTH-1:0] loadstore_addr = rs1 + (isStore ? Simm : Iimm);
// register write back
assign writeBackData = (isJAL || isJALR) ? PCplus4 :
isLUI ? Uimm :
isAUIPC ? PCplusImm :
isLoad ? LOAD_data :
aluOut;
/* verilator lint_on WIDTH */
// Load
// All memory accesses are aligned on 32 bits boundary. For this
// reason, we need some circuitry that does unaligned halfword
// and byte load/store, based on:
// - funct3[1:0]: 00->byte 01->halfword 10->word
// - mem_addr[1:0]: indicates which byte/halfword is accessed
wire mem_byteAccess = funct3[1:0] == 2'b00;
wire mem_halfwordAccess = funct3[1:0] == 2'b01;
wire [15:0] LOAD_halfword =
loadstore_addr[1] ? mem_rdata[31:16] : mem_rdata[15:0];
wire [7:0] LOAD_byte =
loadstore_addr[0] ? LOAD_halfword[15:8] : LOAD_halfword[7:0];
// LOAD, in addition to funct3[1:0], LOAD depends on:
// - funct3[2] (instr[14]): 0->do sign expansion 1->no sign expansion
wire LOAD_sign =
!funct3[2] & (mem_byteAccess ? LOAD_byte[7] : LOAD_halfword[15]);
wire [31:0] LOAD_data =
mem_byteAccess ? {{24{LOAD_sign}}, LOAD_byte} :
mem_halfwordAccess ? {{16{LOAD_sign}}, LOAD_halfword} :
mem_rdata ;
// Store
// ------------------------------------------------------------------------
assign mem_wdata[ 7: 0] = rs2[7:0];
assign mem_wdata[15: 8] = loadstore_addr[0] ? rs2[7:0] : rs2[15: 8];
assign mem_wdata[23:16] = loadstore_addr[1] ? rs2[7:0] : rs2[23:16];
assign mem_wdata[31:24] = loadstore_addr[0] ? rs2[7:0] :
loadstore_addr[1] ? rs2[15:8] : rs2[31:24];
// The memory write mask:
// 1111 if writing a word
// 0011 or 1100 if writing a halfword
// (depending on loadstore_addr[1])
// 0001, 0010, 0100 or 1000 if writing a byte
// (depending on loadstore_addr[1:0])
wire [3:0] STORE_wmask =
mem_byteAccess ?
(loadstore_addr[1] ?
(loadstore_addr[0] ? 4'b1000 : 4'b0100) :
(loadstore_addr[0] ? 4'b0010 : 4'b0001)
) :
mem_halfwordAccess ?
(loadstore_addr[1] ? 4'b1100 : 4'b0011) :
4'b1111;
// The state machine
localparam FETCH_INSTR = 0;
localparam WAIT_INSTR = 1;
localparam EXECUTE = 2;
localparam WAIT_DATA = 3;
reg [1:0] state = FETCH_INSTR;
always @(posedge clk) begin
if(!resetn) begin
PC <= 0;
state <= FETCH_INSTR;
end else begin
if(writeBackEn && rdId != 0) begin
RegisterBank[rdId] <= writeBackData;
end
case(state)
FETCH_INSTR: begin
state <= WAIT_INSTR;
end
WAIT_INSTR: begin
instr <= mem_rdata[31:2];
rs1 <= RegisterBank[mem_rdata[19:15]];
rs2 <= RegisterBank[mem_rdata[24:20]];
state <= EXECUTE;
end
EXECUTE: begin
if(!isSYSTEM) begin
/* verilator lint_off WIDTH */
PC <= nextPC;
/* verilator lint_on WIDTH */
end
state <= isLoad ? WAIT_DATA : FETCH_INSTR;
`ifdef BENCH
if(isSYSTEM) $finish();
`endif
end
WAIT_DATA: begin
if(!mem_rbusy) begin
state <= FETCH_INSTR;
end
end
endcase
end
end
assign writeBackEn = (state==EXECUTE && !isBranch && !isStore) ||
(state==WAIT_DATA) ;
/* verilator lint_off WIDTH */
assign mem_addr = (state == WAIT_INSTR || state == FETCH_INSTR) ?
PC : loadstore_addr ;
/* verilator lint_on WIDTH */
assign mem_rstrb = (state == FETCH_INSTR || (state == EXECUTE & isLoad));
assign mem_wmask = {4{(state == EXECUTE) & isStore}} & STORE_wmask;
endmodule
module SOC (
input CLK, // system clock
input RESET,// reset button
output [7:0] LEDS, // system LEDs
input RXD, // UART receive
output TXD, // UART transmit
// GG: added SPI signals
output SPIFLASH_CLK, // SPI flash clock
output SPIFLASH_CS_N, // SPI flash chip select (active low)
// inout SPIFLASH_MOSI, // SPI flash IO pins // GG: change IO to
// inout SPIFLASH_MISO // SPI flash IO pins
output SPIFLASH_MOSI, // SPI flash IO pins // output and
input SPIFLASH_MISO // SPI flash IO pins // input
);
wire clk;
wire resetn;
wire [31:0] mem_addr;
wire [31:0] mem_rdata;
wire mem_rbusy;
wire mem_rstrb;
wire [31:0] mem_wdata;
wire [3:0] mem_wmask;
Processor CPU(
.clk(clk),
.resetn(resetn),
.mem_addr(mem_addr),
.mem_rdata(mem_rdata),
.mem_rstrb(mem_rstrb),
.mem_rbusy(mem_rbusy),
.mem_wdata(mem_wdata),
.mem_wmask(mem_wmask)
);
wire [31:0] RAM_rdata;
wire [29:0] mem_wordaddr = mem_addr[31:2];
wire isSPIFlash = mem_addr[23]; // 1xxxx 0x800000
wire isIO = mem_addr[23:22] == 2'b01; // 01xxx
wire isRAM = !(mem_addr[23] | mem_addr[22]); // 00xxx
wire mem_wstrb = |mem_wmask;
Memory RAM(
.clk(clk),
.mem_addr(mem_addr),
.mem_rdata(RAM_rdata),
.mem_rstrb(isRAM & mem_rstrb),
.mem_wdata(mem_wdata),
.mem_wmask({4{isRAM}}&mem_wmask)
);
wire [31:0] SPIFlash_rdata;
wire SPIFlash_rbusy;
MappedSPIFlash SPIFlash(
.clk(clk),
.word_address(mem_wordaddr[19:0]),
.rdata(SPIFlash_rdata),
.rstrb(isSPIFlash & mem_rstrb),
.rbusy(SPIFlash_rbusy),
.CLK(SPIFLASH_CLK),
.CS_N(SPIFLASH_CS_N),
.MOSI(SPIFLASH_MOSI),
.MISO(SPIFLASH_MISO)
);
// Memory-mapped IO in IO page, 1-hot addressing in word address.
localparam IO_LEDS_bit = 0; // W five leds
localparam IO_UART_DAT_bit = 1; // W data to send (8 bits)
localparam IO_UART_CNTL_bit = 2; // R status. bit 9: busy sending
reg [4:0] leds;
always @(posedge clk) begin
if(isIO & mem_wstrb & mem_wordaddr[IO_LEDS_bit]) begin
leds <= mem_wdata[4:0];
// $display("Value sent to LEDS: %b %d %d",mem_wdata,mem_wdata,$signed(mem_wdata));
end
end
// assign {LEDS[4:0], LEDS[7:5]} = {~leds, 3'b111};
assign LEDS[4:0] = ~leds; // connected to VDD, invert logic
// **************************************************************
// GG: the magic SPI Flash fix: without, CS#=0 and MISO=0
// with, everything works just fine!
// (simulation with Flash model still doesn't work)
// w/o the reg/always block, the demo may start somehwere in the middle and the printf doesn't execute?
reg dbg[2:0];
always @(posedge clk) begin
dbg[2] <= SPIFLASH_CS_N;
dbg[1] <= SPIFLASH_MOSI;
dbg[0] <= SPIFLASH_MISO;
end
// reg only, no assign -> no output to the terminal at all ?!
// with dbg, always and LED assign we get the full expected output! Who can explain?
assign LEDS[7] = dbg[2];
assign LEDS[6] = dbg[1];
assign LEDS[5] = dbg[0];
// **************************************************************
wire uart_valid = isIO & mem_wstrb & mem_wordaddr[IO_UART_DAT_bit];
wire uart_ready;
corescore_emitter_uart #(
.clk_freq_hz(`CPU_FREQ*1000000),
.baud_rate(1000000)
) UART(
.i_clk(clk),
.i_rst(!resetn),
.i_data(mem_wdata[7:0]),
.i_valid(uart_valid),
.o_ready(uart_ready),
.o_uart_tx(TXD)
);
wire [31:0] IO_rdata =
mem_wordaddr[IO_UART_CNTL_bit] ? { 22'b0, !uart_ready, 9'b0}
: 32'b0;
assign mem_rdata = isRAM ? RAM_rdata :
isSPIFlash ? SPIFlash_rdata :
IO_rdata ;
assign mem_rbusy = SPIFlash_rbusy;
assign LEDS[4:0] = leds;
// assign LEDS[7:5] = 3'b000;
`ifdef BENCH
always @(posedge clk) begin
if(uart_valid) begin
$write("%c", mem_wdata[7:0] );
$fflush(32'h8000_0001);
end
end
`endif
// Gearbox and reset circuitry.
Clockworks CW(
.CLK(CLK),
.RESET(~RESET),
.clk(clk),
.resetn(resetn)
);
endmodule
// femtorv32, a minimalistic RISC-V RV32I core
// (minus SYSTEM and FENCE that are not implemented)
//
// Bruno Levy, 2020-2021
// Matthias Koch, 2021
//
// This file: driver for SPI Flash, projected in memory space (readonly)
// used in step22..24 only
//
// TODO: go faster with XIP mode and dummy cycles customization
// - send write enable command (06h)
// - send write volatile config register command (08h REG)
// REG=dummy_cycles[7:4]=4'b0100 XIP[3]=1'b1 reserved[2]=1'b0 wrap[1:0]=2'b11
// (4 dummy cycles, works at up to 90 MHz according to datasheet)
//
// DataSheets:
// https://media-www.micron.com/-/media/client/global/documents/products/data-sheet/nor-flash/serial-nor/n25q/n25q_32mb_3v_65nm.pdf?rev=27fc6016fc5249adb4bb8f221e72b395
// https://www.winbond.com/resource-files/w25q128jv%20spi%20revc%2011162016.pdf (not the same chip, mostly compatible, datasheet is easier to read)
// The one on the ULX3S: https://www.issi.com/WW/pdf/25LP-WP128F.pdf
// this one supports quad-SPI mode, IO0=SI, IO1=SO, IO2=WP, IO3=Hold/Reset
// There are four versions (from slowest to fastest)
//
// Version (used command) | cycles per 32-bits read | Specificity |
// ----------------------------------------------------------|-----------------------|
// SPI_FLASH_READ | 64 slow (50 MHz) | Standard |
// SPI_FLASH_FAST_READ | 72 fast (100 MHz) | Uses dummy cycles |
// SPI_FLASH_FAST_READ_DUAL_OUTPUT | 56 fast | Reverts MOSI |
// SPI_FLASH_FAST_READ_DUAL_IO | 44 fast | Reverts MISO and MOSI |
// One can go even faster by configuring number of dummy cycles (can save up to 4 cycles per read)
// and/or using XIP mode (that just requires the address to be sent, saves 16 cycles per 32-bits read)
// (I tried both without success). This may require another mechanism to change configuration register.
//
// Most chips support a QUAD IO mode, using four bidirectional pins,
// however, is not possible because the IO2 and IO3 pins
// are not wired on the IceStick (one may solder a tiny wire and plug it
// to a GPIO pin but I haven't soldering skills for things of that size !!)
// It is a pity, because one could go really fast with these pins !
// Macros to select version and number of dummy cycles based on the board.
`ifdef ICE_STICK
`endif
`ifdef ICE_BREAKER
`endif
`ifdef ULX3S
`endif
`ifdef ARTY
`endif
`ifdef CCA1
`define SPI_FLASH_READ
// `define SPI_FLASH_FAST_READ_DUAL_IO
`define SPI_FLASH_CONFIGURED
`endif
`ifndef SPI_FLASH_DUMMY_CLOCKS
`define SPI_FLASH_DUMMY_CLOCKS 8
`endif
`ifndef SPI_FLASH_CONFIGURED // Default: using slowest / simplest mode (command $03)
`define SPI_FLASH_READ
`endif
/********************************************************************************************************************************/
// GG: this is not an atomic SPI read (1 byte), it reads 4 bytes!
// GG: increased address range for 64Mbit
`ifdef SPI_FLASH_READ
module MappedSPIFlash(
input wire clk, // system clock
input wire rstrb, // read strobe
input wire [21:0] word_address, // address of the 32bit-word to be read 19->21
output wire [31:0] rdata, // data read
output wire rbusy, // asserted if busy receiving data
// SPI flash pins
output wire CLK, // clock
output reg CS_N, // chip select negated (active low)
output wire MOSI, // master out slave in (data to be sent to flash)
input wire MISO // master in slave out (data received from flash)
);
reg [5:0] snd_bitcount;
reg [31:0] cmd_addr;
reg [5:0] rcv_bitcount;
reg [31:0] rcv_data;
wire sending = (snd_bitcount != 0);
wire receiving = (rcv_bitcount != 0);
wire busy = sending | receiving;
assign rbusy = !CS_N;
assign MOSI = cmd_addr[31]; // data will be shifted out
initial CS_N = 1'b1;
assign CLK = !CS_N && !clk; // CLK needs to be inverted (sample on posedge, shift of negedge)
// and needs to be disabled when not sending/receiving (&& !CS_N).
// since least significant bytes are read first, we need to swizzle...
// GG: why? that statement seems to be wrong! both A and D have MSB first
assign rdata = {rcv_data[7:0],rcv_data[15:8],rcv_data[23:16],rcv_data[31:24]};
always @(posedge clk) begin
if(rstrb) begin
CS_N <= 1'b0;
// GG: read byte sequence: 8'h03 + 24'addr
// 2 LSB 0 make 4-byte-alignment, but 20bit addr limit to 1MB!
// debug: MISO remains Z, word_address is initially 0x40000
// cmd_addr <= {8'h03, 2'b00,word_address[19:0], 2'b00};
cmd_addr <= {8'h03, word_address[21:0], 2'b00};
snd_bitcount <= 6'd32;
$strobe("SPI TX starts at t=%d cmd_addr=%h", $time, cmd_addr); // do NOT use $display!
end else begin
if(sending) begin
if(snd_bitcount == 1) begin
// $display("SPI TX starts at t=%d cmd_addr=%h", $time, cmd_addr);
rcv_bitcount <= 6'd32;
end
snd_bitcount <= snd_bitcount - 6'd1;
cmd_addr <= {cmd_addr[30:0],1'b1};
end
if(receiving) begin
if(rcv_bitcount == 6'd32) $strobe("SPI RX starts at t=%d",$time);
rcv_bitcount <= rcv_bitcount - 6'd1;
rcv_data <= {rcv_data[30:0],MISO};
if(rcv_bitcount == 6'd0) $strobe(" RX done at t=%d rcv_data=%x",$time, rcv_data);
end
if(!busy) begin
CS_N <= 1'b1;
// $strobe(" CS_N=1 "); // is getting executed most of the time
end
end
end
endmodule
`endif
/********************************************************************************************************************************/
`ifdef SPI_FLASH_FAST_READ
module MappedSPIFlash(
input wire clk, // system clock
input wire rstrb, // read strobe
input wire [19:0] word_address, // address of the word to be read
output wire [31:0] rdata, // data read
output wire rbusy, // asserted if busy receiving data
// SPI flash pins
output wire CLK, // clock
output reg CS_N, // chip select negated (active low)
output wire MOSI, // master out slave in (data to be sent to flash)
input wire MISO // master in slave out (data received from flash)
);
reg [5:0] snd_bitcount;
reg [31:0] cmd_addr;
reg [5:0] rcv_bitcount;
reg [31:0] rcv_data;
wire sending = (snd_bitcount != 0);
wire receiving = (rcv_bitcount != 0);
wire busy = sending | receiving;
assign rbusy = !CS_N;
assign MOSI = cmd_addr[31];
initial CS_N = 1'b1;
assign CLK = !CS_N && !clk;
// since least significant bytes are read first, we need to swizzle...
assign rdata = {rcv_data[7:0],rcv_data[15:8],rcv_data[23:16],rcv_data[31:24]};
always @(posedge clk) begin
if(rstrb) begin
CS_N <= 1'b0;
cmd_addr <= {8'h0b, 2'b00,word_address[19:0], 2'b00};
snd_bitcount <= 6'd40; // TODO: check dummy clocks
end else begin
if(sending) begin
if(snd_bitcount == 1) begin
rcv_bitcount <= 6'd32;
end
snd_bitcount <= snd_bitcount - 6'd1;
cmd_addr <= {cmd_addr[30:0],1'b1};
end
if(receiving) begin
rcv_bitcount <= rcv_bitcount - 6'd1;
rcv_data <= {rcv_data[30:0],MISO};
end
if(!busy) begin
CS_N <= 1'b1;
end
end
end
endmodule
`endif
/********************************************************************************************************************************/
`ifdef SPI_FLASH_FAST_READ_DUAL_OUTPUT
module MappedSPIFlash(
input wire clk, // system clock
input wire rstrb, // read strobe
input wire [19:0] word_address, // address of the word to be read
output wire [31:0] rdata, // data read
output wire rbusy, // asserted if busy receiving data
// SPI flash pins
output wire CLK, // clock
output reg CS_N, // chip select negated (active low)
inout wire MOSI, // master out slave in (data to be sent to flash)
input wire MISO // master in slave out (data received from flash)
);
wire MOSI_out;
wire MOSI_in;
wire MOSI_oe;
assign MOSI = MOSI_oe ? MOSI_out : 1'bZ;
assign MOSI_in = MOSI;
reg [5:0] snd_bitcount;
reg [31:0] cmd_addr;
reg [5:0] rcv_bitcount;
reg [31:0] rcv_data;
wire sending = (snd_bitcount != 0);
wire receiving = (rcv_bitcount != 0);
wire busy = sending | receiving;
assign rbusy = !CS_N;
assign MOSI_oe = !receiving;
assign MOSI_out = sending && cmd_addr[31];
initial CS_N = 1'b1;
assign CLK = !CS_N && !clk;
// since least significant bytes are read first, we need to swizzle...
assign rdata = {rcv_data[7:0],rcv_data[15:8],rcv_data[23:16],rcv_data[31:24]};
always @(posedge clk) begin
if(rstrb) begin
CS_N <= 1'b0;
cmd_addr <= {8'h3b, 2'b00,word_address[19:0], 2'b00};
snd_bitcount <= 6'd40; // TODO: check dummy clocks
end else begin
if(sending) begin
if(snd_bitcount == 1) begin
rcv_bitcount <= 6'd32;
end
snd_bitcount <= snd_bitcount - 6'd1;
cmd_addr <= {cmd_addr[30:0],1'b1};
end
if(receiving) begin
rcv_bitcount <= rcv_bitcount - 6'd2;
rcv_data <= {rcv_data[29:0],MISO,MOSI_in};
end
if(!busy) begin
CS_N <= 1'b1;
end
end
end
endmodule
`endif
/********************************************************************************************************************************/
`ifdef SPI_FLASH_FAST_READ_DUAL_IO
/*
module MappedSPIFlash(
input wire clk, // system clock
input wire rstrb, // read strobe
input wire [19:0] word_address, // address to be read
output wire [31:0] rdata, // data read
output wire rbusy, // asserted if busy receiving data
output wire CLK, // clock
output reg CS_N, // chip select negated (active low)
inout wire [1:0] IO // two bidirectional IO pins
);
reg [4:0] clock_cnt; // send/receive clock, 2 bits per clock (dual IO)
reg [39:0] shifter; // used for sending and receiving
reg dir; // 1 if sending, 0 otherwise
wire busy = (clock_cnt != 0);
wire sending = (dir && busy);
wire receiving = (!dir && busy);
assign rbusy = !CS_N;
// The two data pins IO0 (=MOSI) and IO1 (=MISO) used in bidirectional mode.
reg IO_oe = 1'b1;
wire [1:0] IO_out = shifter[39:38];
wire [1:0] IO_in = IO;
assign IO = IO_oe ? IO_out : 2'bZZ;
initial CS_N = 1'b1;
assign CLK = !CS_N && !clk;
// since least significant bytes are read first, we need to swizzle...
assign rdata={shifter[7:0],shifter[15:8],shifter[23:16],shifter[31:24]};
// Duplicates the bits (used because when sending command, dual IO is
// not active yet, and I do not want to have a separate shifter for
// the command and for the args...).
function [15:0] bbyyttee;
input [7:0] x;
begin
bbyyttee = {
x[7],x[7],x[6],x[6],x[5],x[5],x[4],x[4],
x[3],x[3],x[2],x[2],x[1],x[1],x[0],x[0]
};
end
endfunction
always @(posedge clk) begin
if(rstrb) begin
CS_N <= 1'b0;
IO_oe <= 1'b1;
dir <= 1'b1;
shifter <= {bbyyttee(8'hbb), 2'b00, word_address[19:0], 2'b00};
clock_cnt <= 5'd20 + `SPI_FLASH_DUMMY_CLOCKS; // cmd: 8 clocks address: 12 clocks + dummy clocks
end else begin
if(busy) begin
shifter <= {shifter[37:0], (receiving ? IO_in : 2'b11)};
clock_cnt <= clock_cnt - 5'd1;
if(dir && clock_cnt == 1) begin
clock_cnt <= 5'd16; // 32 bits, 2 bits per clock
IO_oe <= 1'b0;
dir <= 1'b0;
end
end else begin
CS_N <= 1'b1;
end
end
end
endmodule
*/
// 04/02/2021 This version optimized by Matthias Koch
module MappedSPIFlash(
input wire clk, // system clock
input wire rstrb, // read strobe
input wire [19:0] word_address, // read address
output wire [31:0] rdata, // data read
output wire rbusy, // asserted if busy receiving data
output wire CLK, // clock
output wire CS_N, // chip select negated (active low)
inout wire [1:0] IO // two bidirectional IO pins
);
reg [6:0] clock_cnt; // send/receive clock, 2 bits per clock (dual IO)
reg [39:0] shifter; // used for sending and receiving
wire busy = ~clock_cnt[6];
assign CS_N = clock_cnt[6];
assign rbusy = busy;
assign CLK = busy & !clk; // CLK needs to be disabled when not active.
// Since least significant bytes are read first, we need to swizzle...
assign rdata={shifter[7:0],shifter[15:8],shifter[23:16],shifter[31:24]};
// The two data pins IO0 (=MOSI) and IO1 (=MISO) used in bidirectional mode.
wire [1:0] IO_out = shifter[39:38];
wire [1:0] IO_in = IO;
assign IO = clock_cnt > 7'd15 ? IO_out : 2'bZZ;
// assign IO = |clock_cnt[5:4] ? IO_out : 2'bZZ; // optimized version of the line above
always @(posedge clk) begin
if(rstrb) begin
shifter <= {16'hCFCF, 2'b00, word_address[19:0], 2'b00}; // 16'hCFCF is 8'hbb with bits doubled
clock_cnt <= 7'd43; // cmd: 8 clocks address: 12 clocks dummy: 8 clocks. data: 16 clocks, 2 bits per clock
end else begin
if(busy) begin
shifter <= {shifter[37:0], IO_in};
clock_cnt <= clock_cnt - 7'd1;
end
end
end
endmodule
`endif
// ...
int main() {
printf("Debugging Flash\n");
printf("Reading data\n"); // printing the memory content, compare with hexdump..
spi_reset();
for(i = 0; i<32; i++) {
B = next_spi_byte();
printf(" %d:b=%x ",i, B);
if(i%8 == 7)
printf("\n");
}
printf("\n");
for(int i=0; i<10; ++i) { // comment out for simulation (30s per call)
wait();
}
// ... rest of the ST_NICCC.c file
}
.equ IO_BASE, 0x400000
# FLASH_BASE 0x800000
.section .text
.globl start
start:
li gp,IO_BASE
# GG: increased 4x
# li sp,0x1800
li sp,0x6000
call main
ebreak
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment