Created
October 6, 2023 19:51
-
-
Save g3grau/7940a516ae68addd022b07fd2c5a05fe to your computer and use it in GitHub Desktop.
Fixing SPI Flash access in step22 (https://github.com/fm4dd/gatemate-riscv/issues/2)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### ----------------------------------------------------------- ### | |
### Local Makefile for building the RISC-V native application. ### | |
### Requires riscv-toolchain, linker script and firmware_words ### | |
### hex conversion program. Creates the 'firmware.hex' output ### | |
### for FPGA upload together with the bitstream. ### | |
### ### | |
### This program needs optimizer flag -O2 to fit into 6K BRAM. ### | |
### ----------------------------------------------------------- ### | |
TOOLCHAINDIR = /usr/cadtools/riscv-gnu-toolchain | |
RVLINKSCRIPT=/home/ggrau/projects/FPGAsrc/GateMate/CologneChip/gatemate-riscv/ldscripts-shared/bram.ld | |
# RVLINKSCRIPT=/home/ggrau/projects/FPGAsrc/GateMate/CologneChip/gatemate-riscv/ldscripts-shared/spiflash1.ld | |
FW_WORDS_DIR=$(TOOLCHAINDIR)/firmware_words | |
RV32I_LIBGCC=$(TOOLCHAINDIR)/build/riscv64_multilib/lib64/gcc/riscv64-unknown-elf/11.1.0/rv32i/ilp32/libgcc.a | |
ASFLAGS= -march=rv32i -mabi=ilp32 -mno-relax | |
LDFLAGS= -m elf32lriscv -nostdlib --no-relax -T $(RVLINKSCRIPT) | |
# -O2 for memory optimization | |
CFLAGS= -march=rv32i -mabi=ilp32 -fno-pic -fno-stack-protector -w -Wl,--no-relax -O2 | |
# -O2 | |
all: firmware.hex | |
# Step-3: Convert the RISCV elf binary into an readmem() formatted hex file | |
# ------------------------------------------------------------------------- | |
# 1536*4=6144 -> GG: increased 4x to test gcc without -O2 | |
# check with SOC.v reg[] MEM and start.S (sp) | |
# RAMSIZE=6144 | |
RAMSIZE=24576 | |
firmware.hex: ST_NICCC.bram.elf | |
$(FW_WORDS_DIR)/firmware_words $< -ram $(RAMSIZE) -max_addr $(RAMSIZE) -out $@ | |
# Step-2: Link the object files into a RISCV elf binary | |
# ----------------------------------------------------- | |
ST_NICCC.bram.elf: %.o | |
# $(TOOLCHAINDIR)/bin/riscv64-unknown-elf-ld start_spiflash1.o putchar.o print.o ST_NICCC.o $(RV32I_LIBGCC) $(LDFLAGS) -o $@ | |
$(TOOLCHAINDIR)/bin/riscv64-unknown-elf-ld start.o putchar.o print.o wait.o ST_NICCC.o $(RV32I_LIBGCC) $(LDFLAGS) -o $@ | |
# Step-1: build object files (.o) from assembler source files (.S) | |
# ---------------------------------------------------------------- | |
%o: | |
$(TOOLCHAINDIR)/bin/riscv64-unknown-elf-as $(ASFLAGS) start.S -o start.o | |
$(TOOLCHAINDIR)/bin/riscv64-unknown-elf-as $(ASFLAGS) start_spiflash1.S -o start_spiflash1.o | |
$(TOOLCHAINDIR)/bin/riscv64-unknown-elf-as $(ASFLAGS) putchar.S -o putchar.o | |
$(TOOLCHAINDIR)/bin/riscv64-unknown-elf-as $(ASFLAGS) wait.S -o wait.o | |
$(TOOLCHAINDIR)/bin/riscv64-unknown-elf-gcc $(CFLAGS) -c print.c -o print.o | |
$(TOOLCHAINDIR)/bin/riscv64-unknown-elf-gcc $(CFLAGS) -c ST_NICCC.c -o ST_NICCC.o | |
clean: | |
rm -f *.o *.hex *.elf | |
.SECONDARY: | |
.PHONY: all clean |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ------------------------------------------------------- | |
// soc.v FemtoRV Tutorial step22 SPI Flash @20230711 fm4dd | |
// | |
// Requires: Gatemate E1 eval board v3.1B | |
// | |
// Notes: | |
// Step 22 creates a separate RISC-V app written in 'C', | |
// located in the src folder. The app code is loaded | |
// into block ram, which requires the hex conversion with | |
// firmware_words. 'make' creates the firmware.hex app | |
// code before synthesis. We use the $readmemh() function | |
// to load the app. Additionally, application data is | |
// written into flash, and retrieved as ROM from the | |
// RISC-V program. | |
// | |
// Code is tested on a Gatemate E1 eval board v3.1B | |
// E1 onboard user button SW3 is assigned to RESET. | |
// LEDS[7:0] is assigned to E1 onboard Leds D1..D8. | |
// TXD/RXD is assigned to USB<->serial converter on PMODB. | |
// | |
// How to run: make ST_NICCC, make prog and make test | |
// | |
// GG: compiles with -O2 in 6k, but doesn't work in test or flash | |
// compiles without -O2 using 24k, but still doesn't work .. no output | |
// NOTE that this is not yet set up to use the Flash as storage!? | |
// | |
// GG: SPI signals don't work unless we probe them for LED output | |
// reg dbg with | |
// | |
// ------------------------------------------------------- | |
`default_nettype none | |
`define CPU_FREQ 10 // RISCV CPU frequency in MHz | |
// `define ARTY // define board type for SPI mode | |
`define CCA1 // GG: same as ARTY, just simple SPI so far | |
module Memory ( | |
input clk, | |
input [31:0] mem_addr, // address to be read | |
output reg [31:0] mem_rdata, // data read from memory | |
input mem_rstrb, // goes high when processor wants to read | |
input [31:0] mem_wdata, // data to be written | |
input [3:0] mem_wmask // masks for writing the 4 bytes (1=write byte) | |
); | |
// GG: increased 4x to test without gcc -O2 | |
// reg [31:0] MEM [0:1535]; // 1536 4-bytes words = 6 Kb of RAM in total | |
reg [31:0] MEM [0:6143]; // 6912 4-bytes words = 24 Kb of RAM in total | |
initial begin | |
$readmemh("firmware.hex",MEM); | |
end | |
wire [10:0] word_addr = mem_addr[12:2]; | |
always @(posedge clk) begin | |
if(mem_rstrb) begin | |
mem_rdata <= MEM[word_addr]; | |
end | |
if(mem_wmask[0]) MEM[word_addr][ 7:0 ] <= mem_wdata[ 7:0 ]; | |
if(mem_wmask[1]) MEM[word_addr][15:8 ] <= mem_wdata[15:8 ]; | |
if(mem_wmask[2]) MEM[word_addr][23:16] <= mem_wdata[23:16]; | |
if(mem_wmask[3]) MEM[word_addr][31:24] <= mem_wdata[31:24]; | |
end | |
endmodule | |
module Processor ( | |
input clk, | |
input resetn, | |
output [31:0] mem_addr, | |
input [31:0] mem_rdata, | |
input mem_rbusy, | |
output mem_rstrb, | |
output [31:0] mem_wdata, | |
output [3:0] mem_wmask | |
); | |
// Internal width for addresses. | |
localparam ADDR_WIDTH=24; | |
reg [ADDR_WIDTH:0] PC=0; // program counter | |
reg [31:2] instr; // current instruction | |
// See the table P. 105 in RISC-V manual | |
// The 10 RISC-V instructions | |
wire isALUreg = (instr[6:2] == 5'b01100); // rd <- rs1 OP rs2 | |
wire isALUimm = (instr[6:2] == 5'b00100); // rd <- rs1 OP Iimm | |
wire isBranch = (instr[6:2] == 5'b11000); // if(rs1 OP rs2) PC<-PC+Bimm | |
wire isJALR = (instr[6:2] == 5'b11001); // rd <- PC+4; PC<-rs1+Iimm | |
wire isJAL = (instr[6:2] == 5'b11011); // rd <- PC+4; PC<-PC+Jimm | |
wire isAUIPC = (instr[6:2] == 5'b00101); // rd <- PC + Uimm | |
wire isLUI = (instr[6:2] == 5'b01101); // rd <- Uimm | |
wire isLoad = (instr[6:2] == 5'b00000); // rd <- mem[rs1+Iimm] | |
wire isStore = (instr[6:2] == 5'b01000); // mem[rs1+Simm] <- rs2 | |
wire isSYSTEM = (instr[6:2] == 5'b11100); // special | |
// The 5 immediate formats | |
wire [31:0] Uimm={ instr[31], instr[30:12], {12{1'b0}}}; | |
wire [31:0] Iimm={{21{instr[31]}}, instr[30:20]}; | |
/* verilator lint_off UNUSED */ // MSBs of SBJimms are not used by addr adder. | |
wire [31:0] Simm={{21{instr[31]}}, instr[30:25],instr[11:7]}; | |
wire [31:0] Bimm={{20{instr[31]}}, instr[7],instr[30:25],instr[11:8],1'b0}; | |
wire [31:0] Jimm={{12{instr[31]}}, instr[19:12],instr[20],instr[30:21],1'b0}; | |
/* verilator lint_on UNUSED */ | |
// Destination registers | |
wire [4:0] rdId = instr[11:7]; | |
// function codes | |
wire [2:0] funct3 = instr[14:12]; | |
wire [6:0] funct7 = instr[31:25]; | |
// The registers bank | |
reg [31:0] RegisterBank [0:31]; | |
reg [31:0] rs1; // value of source | |
reg [31:0] rs2; // registers. | |
wire [31:0] writeBackData; // data to be written to rd | |
wire writeBackEn; // asserted if data should be written to rd | |
`ifdef BENCH | |
integer i; | |
initial begin | |
for(i=0; i<32; ++i) begin | |
RegisterBank[i] = 0; | |
end | |
end | |
`endif | |
// The ALU | |
wire [31:0] aluIn1 = rs1; | |
wire [31:0] aluIn2 = isALUreg | isBranch ? rs2 : Iimm; | |
wire [4:0] shamt = isALUreg ? rs2[4:0] : instr[24:20]; // shift amount | |
// The adder is used by both arithmetic instructions and JALR. | |
wire [31:0] aluPlus = aluIn1 + aluIn2; | |
// Use a single 33 bits subtract to do subtraction and all comparisons | |
// (trick borrowed from swapforth/J1) | |
wire [32:0] aluMinus = {1'b1, ~aluIn2} + {1'b0,aluIn1} + 33'b1; | |
wire LT = (aluIn1[31] ^ aluIn2[31]) ? aluIn1[31] : aluMinus[32]; | |
wire LTU = aluMinus[32]; | |
wire EQ = (aluMinus[31:0] == 0); | |
// Flip a 32 bit word. Used by the shifter (a single shifter for | |
// left and right shifts, saves silicium !) | |
function [31:0] flip32; | |
input [31:0] x; | |
flip32 = {x[ 0], x[ 1], x[ 2], x[ 3], x[ 4], x[ 5], x[ 6], x[ 7], | |
x[ 8], x[ 9], x[10], x[11], x[12], x[13], x[14], x[15], | |
x[16], x[17], x[18], x[19], x[20], x[21], x[22], x[23], | |
x[24], x[25], x[26], x[27], x[28], x[29], x[30], x[31]}; | |
endfunction | |
wire [31:0] shifter_in = (funct3 == 3'b001) ? flip32(aluIn1) : aluIn1; | |
/* verilator lint_off WIDTH */ | |
wire [31:0] shifter = | |
$signed({instr[30] & aluIn1[31], shifter_in}) >>> aluIn2[4:0]; | |
/* verilator lint_on WIDTH */ | |
wire [31:0] leftshift = flip32(shifter); | |
// ADD/SUB/ADDI: | |
// funct7[5] is 1 for SUB and 0 for ADD. We need also to test instr[5] | |
// to make the difference with ADDI | |
// | |
// SRLI/SRAI/SRL/SRA: | |
// funct7[5] is 1 for arithmetic shift (SRA/SRAI) and | |
// 0 for logical shift (SRL/SRLI) | |
reg [31:0] aluOut; | |
always @(*) begin | |
case(funct3) | |
3'b000: aluOut = (funct7[5] & instr[5]) ? aluMinus[31:0] : aluPlus; | |
3'b001: aluOut = leftshift; | |
3'b010: aluOut = {31'b0, LT}; | |
3'b011: aluOut = {31'b0, LTU}; | |
3'b100: aluOut = (aluIn1 ^ aluIn2); | |
3'b101: aluOut = shifter; | |
3'b110: aluOut = (aluIn1 | aluIn2); | |
3'b111: aluOut = (aluIn1 & aluIn2); | |
endcase | |
end | |
// The predicate for branch instructions | |
reg takeBranch; | |
always @(*) begin | |
case(funct3) | |
3'b000: takeBranch = EQ; | |
3'b001: takeBranch = !EQ; | |
3'b100: takeBranch = LT; | |
3'b101: takeBranch = !LT; | |
3'b110: takeBranch = LTU; | |
3'b111: takeBranch = !LTU; | |
default: takeBranch = 1'b0; | |
endcase | |
end | |
// Address computation | |
/* verilator lint_off WIDTH */ | |
// An adder used to compute branch address, JAL address and AUIPC. | |
// branch->PC+Bimm AUIPC->PC+Uimm JAL->PC+Jimm | |
// Equivalent to PCplusImm = PC + (isJAL ? Jimm : isAUIPC ? Uimm : Bimm) | |
// Note: doing so with ADDR_WIDTH < 32, AUIPC may fail in | |
// some RISC-V compliance tests because one can is supposed to use | |
// it to generate arbitrary 32-bit values (and not only addresses). | |
wire [ADDR_WIDTH-1:0] PCplusImm = PC + ( instr[3] ? Jimm[31:0] : | |
instr[4] ? Uimm[31:0] : | |
Bimm[31:0] ); | |
wire [ADDR_WIDTH-1:0] PCplus4 = PC+4; | |
wire [ADDR_WIDTH-1:0] nextPC = ((isBranch && takeBranch) || isJAL) ? PCplusImm : | |
isJALR ? {aluPlus[31:1],1'b0} : | |
PCplus4; | |
wire [ADDR_WIDTH-1:0] loadstore_addr = rs1 + (isStore ? Simm : Iimm); | |
// register write back | |
assign writeBackData = (isJAL || isJALR) ? PCplus4 : | |
isLUI ? Uimm : | |
isAUIPC ? PCplusImm : | |
isLoad ? LOAD_data : | |
aluOut; | |
/* verilator lint_on WIDTH */ | |
// Load | |
// All memory accesses are aligned on 32 bits boundary. For this | |
// reason, we need some circuitry that does unaligned halfword | |
// and byte load/store, based on: | |
// - funct3[1:0]: 00->byte 01->halfword 10->word | |
// - mem_addr[1:0]: indicates which byte/halfword is accessed | |
wire mem_byteAccess = funct3[1:0] == 2'b00; | |
wire mem_halfwordAccess = funct3[1:0] == 2'b01; | |
wire [15:0] LOAD_halfword = | |
loadstore_addr[1] ? mem_rdata[31:16] : mem_rdata[15:0]; | |
wire [7:0] LOAD_byte = | |
loadstore_addr[0] ? LOAD_halfword[15:8] : LOAD_halfword[7:0]; | |
// LOAD, in addition to funct3[1:0], LOAD depends on: | |
// - funct3[2] (instr[14]): 0->do sign expansion 1->no sign expansion | |
wire LOAD_sign = | |
!funct3[2] & (mem_byteAccess ? LOAD_byte[7] : LOAD_halfword[15]); | |
wire [31:0] LOAD_data = | |
mem_byteAccess ? {{24{LOAD_sign}}, LOAD_byte} : | |
mem_halfwordAccess ? {{16{LOAD_sign}}, LOAD_halfword} : | |
mem_rdata ; | |
// Store | |
// ------------------------------------------------------------------------ | |
assign mem_wdata[ 7: 0] = rs2[7:0]; | |
assign mem_wdata[15: 8] = loadstore_addr[0] ? rs2[7:0] : rs2[15: 8]; | |
assign mem_wdata[23:16] = loadstore_addr[1] ? rs2[7:0] : rs2[23:16]; | |
assign mem_wdata[31:24] = loadstore_addr[0] ? rs2[7:0] : | |
loadstore_addr[1] ? rs2[15:8] : rs2[31:24]; | |
// The memory write mask: | |
// 1111 if writing a word | |
// 0011 or 1100 if writing a halfword | |
// (depending on loadstore_addr[1]) | |
// 0001, 0010, 0100 or 1000 if writing a byte | |
// (depending on loadstore_addr[1:0]) | |
wire [3:0] STORE_wmask = | |
mem_byteAccess ? | |
(loadstore_addr[1] ? | |
(loadstore_addr[0] ? 4'b1000 : 4'b0100) : | |
(loadstore_addr[0] ? 4'b0010 : 4'b0001) | |
) : | |
mem_halfwordAccess ? | |
(loadstore_addr[1] ? 4'b1100 : 4'b0011) : | |
4'b1111; | |
// The state machine | |
localparam FETCH_INSTR = 0; | |
localparam WAIT_INSTR = 1; | |
localparam EXECUTE = 2; | |
localparam WAIT_DATA = 3; | |
reg [1:0] state = FETCH_INSTR; | |
always @(posedge clk) begin | |
if(!resetn) begin | |
PC <= 0; | |
state <= FETCH_INSTR; | |
end else begin | |
if(writeBackEn && rdId != 0) begin | |
RegisterBank[rdId] <= writeBackData; | |
end | |
case(state) | |
FETCH_INSTR: begin | |
state <= WAIT_INSTR; | |
end | |
WAIT_INSTR: begin | |
instr <= mem_rdata[31:2]; | |
rs1 <= RegisterBank[mem_rdata[19:15]]; | |
rs2 <= RegisterBank[mem_rdata[24:20]]; | |
state <= EXECUTE; | |
end | |
EXECUTE: begin | |
if(!isSYSTEM) begin | |
/* verilator lint_off WIDTH */ | |
PC <= nextPC; | |
/* verilator lint_on WIDTH */ | |
end | |
state <= isLoad ? WAIT_DATA : FETCH_INSTR; | |
`ifdef BENCH | |
if(isSYSTEM) $finish(); | |
`endif | |
end | |
WAIT_DATA: begin | |
if(!mem_rbusy) begin | |
state <= FETCH_INSTR; | |
end | |
end | |
endcase | |
end | |
end | |
assign writeBackEn = (state==EXECUTE && !isBranch && !isStore) || | |
(state==WAIT_DATA) ; | |
/* verilator lint_off WIDTH */ | |
assign mem_addr = (state == WAIT_INSTR || state == FETCH_INSTR) ? | |
PC : loadstore_addr ; | |
/* verilator lint_on WIDTH */ | |
assign mem_rstrb = (state == FETCH_INSTR || (state == EXECUTE & isLoad)); | |
assign mem_wmask = {4{(state == EXECUTE) & isStore}} & STORE_wmask; | |
endmodule | |
module SOC ( | |
input CLK, // system clock | |
input RESET,// reset button | |
output [7:0] LEDS, // system LEDs | |
input RXD, // UART receive | |
output TXD, // UART transmit | |
// GG: added SPI signals | |
output SPIFLASH_CLK, // SPI flash clock | |
output SPIFLASH_CS_N, // SPI flash chip select (active low) | |
// inout SPIFLASH_MOSI, // SPI flash IO pins // GG: change IO to | |
// inout SPIFLASH_MISO // SPI flash IO pins | |
output SPIFLASH_MOSI, // SPI flash IO pins // output and | |
input SPIFLASH_MISO // SPI flash IO pins // input | |
); | |
wire clk; | |
wire resetn; | |
wire [31:0] mem_addr; | |
wire [31:0] mem_rdata; | |
wire mem_rbusy; | |
wire mem_rstrb; | |
wire [31:0] mem_wdata; | |
wire [3:0] mem_wmask; | |
Processor CPU( | |
.clk(clk), | |
.resetn(resetn), | |
.mem_addr(mem_addr), | |
.mem_rdata(mem_rdata), | |
.mem_rstrb(mem_rstrb), | |
.mem_rbusy(mem_rbusy), | |
.mem_wdata(mem_wdata), | |
.mem_wmask(mem_wmask) | |
); | |
wire [31:0] RAM_rdata; | |
wire [29:0] mem_wordaddr = mem_addr[31:2]; | |
wire isSPIFlash = mem_addr[23]; // 1xxxx 0x800000 | |
wire isIO = mem_addr[23:22] == 2'b01; // 01xxx | |
wire isRAM = !(mem_addr[23] | mem_addr[22]); // 00xxx | |
wire mem_wstrb = |mem_wmask; | |
Memory RAM( | |
.clk(clk), | |
.mem_addr(mem_addr), | |
.mem_rdata(RAM_rdata), | |
.mem_rstrb(isRAM & mem_rstrb), | |
.mem_wdata(mem_wdata), | |
.mem_wmask({4{isRAM}}&mem_wmask) | |
); | |
wire [31:0] SPIFlash_rdata; | |
wire SPIFlash_rbusy; | |
MappedSPIFlash SPIFlash( | |
.clk(clk), | |
.word_address(mem_wordaddr[19:0]), | |
.rdata(SPIFlash_rdata), | |
.rstrb(isSPIFlash & mem_rstrb), | |
.rbusy(SPIFlash_rbusy), | |
.CLK(SPIFLASH_CLK), | |
.CS_N(SPIFLASH_CS_N), | |
.MOSI(SPIFLASH_MOSI), | |
.MISO(SPIFLASH_MISO) | |
); | |
// Memory-mapped IO in IO page, 1-hot addressing in word address. | |
localparam IO_LEDS_bit = 0; // W five leds | |
localparam IO_UART_DAT_bit = 1; // W data to send (8 bits) | |
localparam IO_UART_CNTL_bit = 2; // R status. bit 9: busy sending | |
reg [4:0] leds; | |
always @(posedge clk) begin | |
if(isIO & mem_wstrb & mem_wordaddr[IO_LEDS_bit]) begin | |
leds <= mem_wdata[4:0]; | |
// $display("Value sent to LEDS: %b %d %d",mem_wdata,mem_wdata,$signed(mem_wdata)); | |
end | |
end | |
// assign {LEDS[4:0], LEDS[7:5]} = {~leds, 3'b111}; | |
assign LEDS[4:0] = ~leds; // connected to VDD, invert logic | |
// ************************************************************** | |
// GG: the magic SPI Flash fix: without, CS#=0 and MISO=0 | |
// with, everything works just fine! | |
// (simulation with Flash model still doesn't work) | |
// w/o the reg/always block, the demo may start somehwere in the middle and the printf doesn't execute? | |
reg dbg[2:0]; | |
always @(posedge clk) begin | |
dbg[2] <= SPIFLASH_CS_N; | |
dbg[1] <= SPIFLASH_MOSI; | |
dbg[0] <= SPIFLASH_MISO; | |
end | |
// reg only, no assign -> no output to the terminal at all ?! | |
// with dbg, always and LED assign we get the full expected output! Who can explain? | |
assign LEDS[7] = dbg[2]; | |
assign LEDS[6] = dbg[1]; | |
assign LEDS[5] = dbg[0]; | |
// ************************************************************** | |
wire uart_valid = isIO & mem_wstrb & mem_wordaddr[IO_UART_DAT_bit]; | |
wire uart_ready; | |
corescore_emitter_uart #( | |
.clk_freq_hz(`CPU_FREQ*1000000), | |
.baud_rate(1000000) | |
) UART( | |
.i_clk(clk), | |
.i_rst(!resetn), | |
.i_data(mem_wdata[7:0]), | |
.i_valid(uart_valid), | |
.o_ready(uart_ready), | |
.o_uart_tx(TXD) | |
); | |
wire [31:0] IO_rdata = | |
mem_wordaddr[IO_UART_CNTL_bit] ? { 22'b0, !uart_ready, 9'b0} | |
: 32'b0; | |
assign mem_rdata = isRAM ? RAM_rdata : | |
isSPIFlash ? SPIFlash_rdata : | |
IO_rdata ; | |
assign mem_rbusy = SPIFlash_rbusy; | |
assign LEDS[4:0] = leds; | |
// assign LEDS[7:5] = 3'b000; | |
`ifdef BENCH | |
always @(posedge clk) begin | |
if(uart_valid) begin | |
$write("%c", mem_wdata[7:0] ); | |
$fflush(32'h8000_0001); | |
end | |
end | |
`endif | |
// Gearbox and reset circuitry. | |
Clockworks CW( | |
.CLK(CLK), | |
.RESET(~RESET), | |
.clk(clk), | |
.resetn(resetn) | |
); | |
endmodule |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// femtorv32, a minimalistic RISC-V RV32I core | |
// (minus SYSTEM and FENCE that are not implemented) | |
// | |
// Bruno Levy, 2020-2021 | |
// Matthias Koch, 2021 | |
// | |
// This file: driver for SPI Flash, projected in memory space (readonly) | |
// used in step22..24 only | |
// | |
// TODO: go faster with XIP mode and dummy cycles customization | |
// - send write enable command (06h) | |
// - send write volatile config register command (08h REG) | |
// REG=dummy_cycles[7:4]=4'b0100 XIP[3]=1'b1 reserved[2]=1'b0 wrap[1:0]=2'b11 | |
// (4 dummy cycles, works at up to 90 MHz according to datasheet) | |
// | |
// DataSheets: | |
// https://media-www.micron.com/-/media/client/global/documents/products/data-sheet/nor-flash/serial-nor/n25q/n25q_32mb_3v_65nm.pdf?rev=27fc6016fc5249adb4bb8f221e72b395 | |
// https://www.winbond.com/resource-files/w25q128jv%20spi%20revc%2011162016.pdf (not the same chip, mostly compatible, datasheet is easier to read) | |
// The one on the ULX3S: https://www.issi.com/WW/pdf/25LP-WP128F.pdf | |
// this one supports quad-SPI mode, IO0=SI, IO1=SO, IO2=WP, IO3=Hold/Reset | |
// There are four versions (from slowest to fastest) | |
// | |
// Version (used command) | cycles per 32-bits read | Specificity | | |
// ----------------------------------------------------------|-----------------------| | |
// SPI_FLASH_READ | 64 slow (50 MHz) | Standard | | |
// SPI_FLASH_FAST_READ | 72 fast (100 MHz) | Uses dummy cycles | | |
// SPI_FLASH_FAST_READ_DUAL_OUTPUT | 56 fast | Reverts MOSI | | |
// SPI_FLASH_FAST_READ_DUAL_IO | 44 fast | Reverts MISO and MOSI | | |
// One can go even faster by configuring number of dummy cycles (can save up to 4 cycles per read) | |
// and/or using XIP mode (that just requires the address to be sent, saves 16 cycles per 32-bits read) | |
// (I tried both without success). This may require another mechanism to change configuration register. | |
// | |
// Most chips support a QUAD IO mode, using four bidirectional pins, | |
// however, is not possible because the IO2 and IO3 pins | |
// are not wired on the IceStick (one may solder a tiny wire and plug it | |
// to a GPIO pin but I haven't soldering skills for things of that size !!) | |
// It is a pity, because one could go really fast with these pins ! | |
// Macros to select version and number of dummy cycles based on the board. | |
`ifdef ICE_STICK | |
`endif | |
`ifdef ICE_BREAKER | |
`endif | |
`ifdef ULX3S | |
`endif | |
`ifdef ARTY | |
`endif | |
`ifdef CCA1 | |
`define SPI_FLASH_READ | |
// `define SPI_FLASH_FAST_READ_DUAL_IO | |
`define SPI_FLASH_CONFIGURED | |
`endif | |
`ifndef SPI_FLASH_DUMMY_CLOCKS | |
`define SPI_FLASH_DUMMY_CLOCKS 8 | |
`endif | |
`ifndef SPI_FLASH_CONFIGURED // Default: using slowest / simplest mode (command $03) | |
`define SPI_FLASH_READ | |
`endif | |
/********************************************************************************************************************************/ | |
// GG: this is not an atomic SPI read (1 byte), it reads 4 bytes! | |
// GG: increased address range for 64Mbit | |
`ifdef SPI_FLASH_READ | |
module MappedSPIFlash( | |
input wire clk, // system clock | |
input wire rstrb, // read strobe | |
input wire [21:0] word_address, // address of the 32bit-word to be read 19->21 | |
output wire [31:0] rdata, // data read | |
output wire rbusy, // asserted if busy receiving data | |
// SPI flash pins | |
output wire CLK, // clock | |
output reg CS_N, // chip select negated (active low) | |
output wire MOSI, // master out slave in (data to be sent to flash) | |
input wire MISO // master in slave out (data received from flash) | |
); | |
reg [5:0] snd_bitcount; | |
reg [31:0] cmd_addr; | |
reg [5:0] rcv_bitcount; | |
reg [31:0] rcv_data; | |
wire sending = (snd_bitcount != 0); | |
wire receiving = (rcv_bitcount != 0); | |
wire busy = sending | receiving; | |
assign rbusy = !CS_N; | |
assign MOSI = cmd_addr[31]; // data will be shifted out | |
initial CS_N = 1'b1; | |
assign CLK = !CS_N && !clk; // CLK needs to be inverted (sample on posedge, shift of negedge) | |
// and needs to be disabled when not sending/receiving (&& !CS_N). | |
// since least significant bytes are read first, we need to swizzle... | |
// GG: why? that statement seems to be wrong! both A and D have MSB first | |
assign rdata = {rcv_data[7:0],rcv_data[15:8],rcv_data[23:16],rcv_data[31:24]}; | |
always @(posedge clk) begin | |
if(rstrb) begin | |
CS_N <= 1'b0; | |
// GG: read byte sequence: 8'h03 + 24'addr | |
// 2 LSB 0 make 4-byte-alignment, but 20bit addr limit to 1MB! | |
// debug: MISO remains Z, word_address is initially 0x40000 | |
// cmd_addr <= {8'h03, 2'b00,word_address[19:0], 2'b00}; | |
cmd_addr <= {8'h03, word_address[21:0], 2'b00}; | |
snd_bitcount <= 6'd32; | |
$strobe("SPI TX starts at t=%d cmd_addr=%h", $time, cmd_addr); // do NOT use $display! | |
end else begin | |
if(sending) begin | |
if(snd_bitcount == 1) begin | |
// $display("SPI TX starts at t=%d cmd_addr=%h", $time, cmd_addr); | |
rcv_bitcount <= 6'd32; | |
end | |
snd_bitcount <= snd_bitcount - 6'd1; | |
cmd_addr <= {cmd_addr[30:0],1'b1}; | |
end | |
if(receiving) begin | |
if(rcv_bitcount == 6'd32) $strobe("SPI RX starts at t=%d",$time); | |
rcv_bitcount <= rcv_bitcount - 6'd1; | |
rcv_data <= {rcv_data[30:0],MISO}; | |
if(rcv_bitcount == 6'd0) $strobe(" RX done at t=%d rcv_data=%x",$time, rcv_data); | |
end | |
if(!busy) begin | |
CS_N <= 1'b1; | |
// $strobe(" CS_N=1 "); // is getting executed most of the time | |
end | |
end | |
end | |
endmodule | |
`endif | |
/********************************************************************************************************************************/ | |
`ifdef SPI_FLASH_FAST_READ | |
module MappedSPIFlash( | |
input wire clk, // system clock | |
input wire rstrb, // read strobe | |
input wire [19:0] word_address, // address of the word to be read | |
output wire [31:0] rdata, // data read | |
output wire rbusy, // asserted if busy receiving data | |
// SPI flash pins | |
output wire CLK, // clock | |
output reg CS_N, // chip select negated (active low) | |
output wire MOSI, // master out slave in (data to be sent to flash) | |
input wire MISO // master in slave out (data received from flash) | |
); | |
reg [5:0] snd_bitcount; | |
reg [31:0] cmd_addr; | |
reg [5:0] rcv_bitcount; | |
reg [31:0] rcv_data; | |
wire sending = (snd_bitcount != 0); | |
wire receiving = (rcv_bitcount != 0); | |
wire busy = sending | receiving; | |
assign rbusy = !CS_N; | |
assign MOSI = cmd_addr[31]; | |
initial CS_N = 1'b1; | |
assign CLK = !CS_N && !clk; | |
// since least significant bytes are read first, we need to swizzle... | |
assign rdata = {rcv_data[7:0],rcv_data[15:8],rcv_data[23:16],rcv_data[31:24]}; | |
always @(posedge clk) begin | |
if(rstrb) begin | |
CS_N <= 1'b0; | |
cmd_addr <= {8'h0b, 2'b00,word_address[19:0], 2'b00}; | |
snd_bitcount <= 6'd40; // TODO: check dummy clocks | |
end else begin | |
if(sending) begin | |
if(snd_bitcount == 1) begin | |
rcv_bitcount <= 6'd32; | |
end | |
snd_bitcount <= snd_bitcount - 6'd1; | |
cmd_addr <= {cmd_addr[30:0],1'b1}; | |
end | |
if(receiving) begin | |
rcv_bitcount <= rcv_bitcount - 6'd1; | |
rcv_data <= {rcv_data[30:0],MISO}; | |
end | |
if(!busy) begin | |
CS_N <= 1'b1; | |
end | |
end | |
end | |
endmodule | |
`endif | |
/********************************************************************************************************************************/ | |
`ifdef SPI_FLASH_FAST_READ_DUAL_OUTPUT | |
module MappedSPIFlash( | |
input wire clk, // system clock | |
input wire rstrb, // read strobe | |
input wire [19:0] word_address, // address of the word to be read | |
output wire [31:0] rdata, // data read | |
output wire rbusy, // asserted if busy receiving data | |
// SPI flash pins | |
output wire CLK, // clock | |
output reg CS_N, // chip select negated (active low) | |
inout wire MOSI, // master out slave in (data to be sent to flash) | |
input wire MISO // master in slave out (data received from flash) | |
); | |
wire MOSI_out; | |
wire MOSI_in; | |
wire MOSI_oe; | |
assign MOSI = MOSI_oe ? MOSI_out : 1'bZ; | |
assign MOSI_in = MOSI; | |
reg [5:0] snd_bitcount; | |
reg [31:0] cmd_addr; | |
reg [5:0] rcv_bitcount; | |
reg [31:0] rcv_data; | |
wire sending = (snd_bitcount != 0); | |
wire receiving = (rcv_bitcount != 0); | |
wire busy = sending | receiving; | |
assign rbusy = !CS_N; | |
assign MOSI_oe = !receiving; | |
assign MOSI_out = sending && cmd_addr[31]; | |
initial CS_N = 1'b1; | |
assign CLK = !CS_N && !clk; | |
// since least significant bytes are read first, we need to swizzle... | |
assign rdata = {rcv_data[7:0],rcv_data[15:8],rcv_data[23:16],rcv_data[31:24]}; | |
always @(posedge clk) begin | |
if(rstrb) begin | |
CS_N <= 1'b0; | |
cmd_addr <= {8'h3b, 2'b00,word_address[19:0], 2'b00}; | |
snd_bitcount <= 6'd40; // TODO: check dummy clocks | |
end else begin | |
if(sending) begin | |
if(snd_bitcount == 1) begin | |
rcv_bitcount <= 6'd32; | |
end | |
snd_bitcount <= snd_bitcount - 6'd1; | |
cmd_addr <= {cmd_addr[30:0],1'b1}; | |
end | |
if(receiving) begin | |
rcv_bitcount <= rcv_bitcount - 6'd2; | |
rcv_data <= {rcv_data[29:0],MISO,MOSI_in}; | |
end | |
if(!busy) begin | |
CS_N <= 1'b1; | |
end | |
end | |
end | |
endmodule | |
`endif | |
/********************************************************************************************************************************/ | |
`ifdef SPI_FLASH_FAST_READ_DUAL_IO | |
/* | |
module MappedSPIFlash( | |
input wire clk, // system clock | |
input wire rstrb, // read strobe | |
input wire [19:0] word_address, // address to be read | |
output wire [31:0] rdata, // data read | |
output wire rbusy, // asserted if busy receiving data | |
output wire CLK, // clock | |
output reg CS_N, // chip select negated (active low) | |
inout wire [1:0] IO // two bidirectional IO pins | |
); | |
reg [4:0] clock_cnt; // send/receive clock, 2 bits per clock (dual IO) | |
reg [39:0] shifter; // used for sending and receiving | |
reg dir; // 1 if sending, 0 otherwise | |
wire busy = (clock_cnt != 0); | |
wire sending = (dir && busy); | |
wire receiving = (!dir && busy); | |
assign rbusy = !CS_N; | |
// The two data pins IO0 (=MOSI) and IO1 (=MISO) used in bidirectional mode. | |
reg IO_oe = 1'b1; | |
wire [1:0] IO_out = shifter[39:38]; | |
wire [1:0] IO_in = IO; | |
assign IO = IO_oe ? IO_out : 2'bZZ; | |
initial CS_N = 1'b1; | |
assign CLK = !CS_N && !clk; | |
// since least significant bytes are read first, we need to swizzle... | |
assign rdata={shifter[7:0],shifter[15:8],shifter[23:16],shifter[31:24]}; | |
// Duplicates the bits (used because when sending command, dual IO is | |
// not active yet, and I do not want to have a separate shifter for | |
// the command and for the args...). | |
function [15:0] bbyyttee; | |
input [7:0] x; | |
begin | |
bbyyttee = { | |
x[7],x[7],x[6],x[6],x[5],x[5],x[4],x[4], | |
x[3],x[3],x[2],x[2],x[1],x[1],x[0],x[0] | |
}; | |
end | |
endfunction | |
always @(posedge clk) begin | |
if(rstrb) begin | |
CS_N <= 1'b0; | |
IO_oe <= 1'b1; | |
dir <= 1'b1; | |
shifter <= {bbyyttee(8'hbb), 2'b00, word_address[19:0], 2'b00}; | |
clock_cnt <= 5'd20 + `SPI_FLASH_DUMMY_CLOCKS; // cmd: 8 clocks address: 12 clocks + dummy clocks | |
end else begin | |
if(busy) begin | |
shifter <= {shifter[37:0], (receiving ? IO_in : 2'b11)}; | |
clock_cnt <= clock_cnt - 5'd1; | |
if(dir && clock_cnt == 1) begin | |
clock_cnt <= 5'd16; // 32 bits, 2 bits per clock | |
IO_oe <= 1'b0; | |
dir <= 1'b0; | |
end | |
end else begin | |
CS_N <= 1'b1; | |
end | |
end | |
end | |
endmodule | |
*/ | |
// 04/02/2021 This version optimized by Matthias Koch | |
module MappedSPIFlash( | |
input wire clk, // system clock | |
input wire rstrb, // read strobe | |
input wire [19:0] word_address, // read address | |
output wire [31:0] rdata, // data read | |
output wire rbusy, // asserted if busy receiving data | |
output wire CLK, // clock | |
output wire CS_N, // chip select negated (active low) | |
inout wire [1:0] IO // two bidirectional IO pins | |
); | |
reg [6:0] clock_cnt; // send/receive clock, 2 bits per clock (dual IO) | |
reg [39:0] shifter; // used for sending and receiving | |
wire busy = ~clock_cnt[6]; | |
assign CS_N = clock_cnt[6]; | |
assign rbusy = busy; | |
assign CLK = busy & !clk; // CLK needs to be disabled when not active. | |
// Since least significant bytes are read first, we need to swizzle... | |
assign rdata={shifter[7:0],shifter[15:8],shifter[23:16],shifter[31:24]}; | |
// The two data pins IO0 (=MOSI) and IO1 (=MISO) used in bidirectional mode. | |
wire [1:0] IO_out = shifter[39:38]; | |
wire [1:0] IO_in = IO; | |
assign IO = clock_cnt > 7'd15 ? IO_out : 2'bZZ; | |
// assign IO = |clock_cnt[5:4] ? IO_out : 2'bZZ; // optimized version of the line above | |
always @(posedge clk) begin | |
if(rstrb) begin | |
shifter <= {16'hCFCF, 2'b00, word_address[19:0], 2'b00}; // 16'hCFCF is 8'hbb with bits doubled | |
clock_cnt <= 7'd43; // cmd: 8 clocks address: 12 clocks dummy: 8 clocks. data: 16 clocks, 2 bits per clock | |
end else begin | |
if(busy) begin | |
shifter <= {shifter[37:0], IO_in}; | |
clock_cnt <= clock_cnt - 7'd1; | |
end | |
end | |
end | |
endmodule | |
`endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ... | |
int main() { | |
printf("Debugging Flash\n"); | |
printf("Reading data\n"); // printing the memory content, compare with hexdump.. | |
spi_reset(); | |
for(i = 0; i<32; i++) { | |
B = next_spi_byte(); | |
printf(" %d:b=%x ",i, B); | |
if(i%8 == 7) | |
printf("\n"); | |
} | |
printf("\n"); | |
for(int i=0; i<10; ++i) { // comment out for simulation (30s per call) | |
wait(); | |
} | |
// ... rest of the ST_NICCC.c file | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.equ IO_BASE, 0x400000 | |
# FLASH_BASE 0x800000 | |
.section .text | |
.globl start | |
start: | |
li gp,IO_BASE | |
# GG: increased 4x | |
# li sp,0x1800 | |
li sp,0x6000 | |
call main | |
ebreak |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment