Skip to content

Commit 5959c04

Browse files
committed
feat(tomasulo): add LR/SC/AMO atomics with reservation tracking
Implement RISC-V atomic memory operations in the Tomasulo OoO pipeline: - LR.W: Load-Reserved acquires reservation via Load Queue, commits normally without requiring SQ-empty stall - SC.W: Store-Conditional resolves via CDB (not done-at-dispatch) gated on ROB head + SQ committed-empty so reservation state is final; checks both reservation validity and address match per spec - AMO (AMOSWAP/ADD/XOR/AND/OR/MIN/MAX/MINU/MAXU): serialized at ROB head with SQ-empty, read-modify-write through memory adapter - Reservation invalidation on SQ write completion (snoop) ensures SC correctness for intervening stores - AMO write completion invalidates L0 cache to prevent stale data - FP64 forwarding edge-case coverage for mixed-width store-to-load RTL: load_queue, store_queue, reorder_buffer, tomasulo_wrapper, riscv_pkg Tests: 162 cocotb tests passing (LQ 38, SQ 37, ROB 43, wrapper 44)
1 parent 823be9e commit 5959c04

File tree

21 files changed

+2981
-269
lines changed

21 files changed

+2981
-269
lines changed

hw/rtl/cpu_and_mem/cpu/riscv_pkg.sv

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1478,6 +1478,9 @@ package riscv_pkg;
14781478
logic is_fp;
14791479
mem_size_e size;
14801480
logic sign_ext;
1481+
logic is_lr; // Load-reserved
1482+
logic is_amo; // AMO instruction
1483+
instr_op_e amo_op; // AMO operation type
14811484
} lq_alloc_req_t;
14821485

14831486
// LQ address update (from address calculation)
@@ -1486,6 +1489,7 @@ package riscv_pkg;
14861489
logic [ReorderBufferTagWidth-1:0] rob_tag;
14871490
logic [XLEN-1:0] address;
14881491
logic is_mmio;
1492+
logic [XLEN-1:0] amo_rs2; // AMO rs2 operand value
14891493
} lq_addr_update_t;
14901494

14911495
// ---------------------------------------------------------------------------
@@ -1515,6 +1519,7 @@ package riscv_pkg;
15151519
logic [ReorderBufferTagWidth-1:0] rob_tag;
15161520
logic is_fp;
15171521
mem_size_e size;
1522+
logic is_sc; // Store-conditional
15181523
} sq_alloc_req_t;
15191524

15201525
// SQ address update (from address calculation)

hw/rtl/cpu_and_mem/cpu/tomasulo/load_queue/load_queue.sv

Lines changed: 198 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,27 @@ module load_queue #(
8989
// =========================================================================
9090
input logic [riscv_pkg::ReorderBufferTagWidth-1:0] i_rob_head_tag,
9191

92+
// =========================================================================
93+
// Reservation Register (LR/SC support)
94+
// =========================================================================
95+
output logic o_reservation_valid,
96+
output logic [riscv_pkg::XLEN-1:0] o_reservation_addr,
97+
input logic i_sc_clear_reservation,
98+
input logic i_reservation_snoop_invalidate,
99+
100+
// =========================================================================
101+
// SQ Committed-Empty (for LR/AMO issue gating)
102+
// =========================================================================
103+
input logic i_sq_committed_empty,
104+
105+
// =========================================================================
106+
// AMO Memory Write Interface
107+
// =========================================================================
108+
output logic o_amo_mem_write_en,
109+
output logic [riscv_pkg::XLEN-1:0] o_amo_mem_write_addr,
110+
output logic [riscv_pkg::XLEN-1:0] o_amo_mem_write_data,
111+
input logic i_amo_mem_write_done,
112+
92113
// =========================================================================
93114
// Flush
94115
// =========================================================================
@@ -158,12 +179,31 @@ module load_queue #(
158179
logic [DEPTH-1:0] lq_issued;
159180
logic [DEPTH-1:0] lq_data_valid;
160181
logic [DEPTH-1:0] lq_forwarded;
182+
logic [DEPTH-1:0] lq_is_lr;
183+
logic [DEPTH-1:0] lq_is_amo;
161184

162185
// Per-entry multi-bit fields
163186
logic [ReorderBufferTagWidth-1:0] lq_rob_tag[DEPTH];
164187
logic [XLEN-1:0] lq_address[DEPTH];
165188
riscv_pkg::mem_size_e lq_size[DEPTH];
166189
logic [FLEN-1:0] lq_data[DEPTH];
190+
riscv_pkg::instr_op_e lq_amo_op[DEPTH];
191+
logic [XLEN-1:0] lq_amo_rs2[DEPTH];
192+
193+
// Reservation register (LR/SC)
194+
logic reservation_valid;
195+
logic [XLEN-1:0] reservation_addr;
196+
assign o_reservation_valid = reservation_valid;
197+
assign o_reservation_addr = reservation_addr;
198+
199+
// AMO FSM
200+
typedef enum logic {
201+
AMO_IDLE,
202+
AMO_WRITE_ACTIVE
203+
} amo_state_e;
204+
amo_state_e amo_state;
205+
logic [XLEN-1:0] amo_old_value;
206+
logic [IdxWidth-1:0] amo_entry_idx;
167207

168208
// ===========================================================================
169209
// Internal Signals
@@ -193,6 +233,33 @@ module load_queue #(
193233
// Head advancement target (scans past all contiguous invalid entries)
194234
logic [PtrWidth-1:0] head_advance_target;
195235

236+
// ===========================================================================
237+
// AMO ALU (combinational)
238+
// ===========================================================================
239+
function automatic logic [XLEN-1:0] amo_compute(
240+
input riscv_pkg::instr_op_e op, input logic [XLEN-1:0] old_val, input logic [XLEN-1:0] rs2);
241+
case (op)
242+
riscv_pkg::AMOSWAP_W: amo_compute = rs2;
243+
riscv_pkg::AMOADD_W: amo_compute = old_val + rs2;
244+
riscv_pkg::AMOXOR_W: amo_compute = old_val ^ rs2;
245+
riscv_pkg::AMOAND_W: amo_compute = old_val & rs2;
246+
riscv_pkg::AMOOR_W: amo_compute = old_val | rs2;
247+
riscv_pkg::AMOMIN_W: amo_compute = ($signed(old_val) < $signed(rs2)) ? old_val : rs2;
248+
riscv_pkg::AMOMAX_W: amo_compute = ($signed(old_val) > $signed(rs2)) ? old_val : rs2;
249+
riscv_pkg::AMOMINU_W: amo_compute = (old_val < rs2) ? old_val : rs2;
250+
riscv_pkg::AMOMAXU_W: amo_compute = (old_val > rs2) ? old_val : rs2;
251+
default: amo_compute = old_val;
252+
endcase
253+
endfunction
254+
255+
// AMO write interface signals
256+
logic amo_write_pending;
257+
logic [XLEN-1:0] amo_new_value;
258+
259+
// AMO cache invalidation: invalidate L0 cache when AMO write completes
260+
logic amo_cache_inv;
261+
assign amo_cache_inv = (amo_state == AMO_WRITE_ACTIVE) && i_amo_mem_write_done;
262+
196263
// ===========================================================================
197264
// Count, Full, Empty
198265
// ===========================================================================
@@ -244,9 +311,14 @@ module load_queue #(
244311
issue_cdb_idx = scan_idx[i];
245312
end
246313
// Phase B: Memory issue candidate
314+
// LR/AMO require ROB head (like MMIO); AMO also needs SQ committed-empty
247315
if (!issue_mem_found && lq_addr_valid[scan_idx[i]]
248316
&& !lq_issued[scan_idx[i]]
249-
&& !lq_data_valid[scan_idx[i]]) begin
317+
&& !lq_data_valid[scan_idx[i]]
318+
&& (!lq_is_lr[scan_idx[i]] || (lq_rob_tag[scan_idx[i]] == i_rob_head_tag))
319+
&& (!lq_is_amo[scan_idx[i]]
320+
|| (lq_rob_tag[scan_idx[i]] == i_rob_head_tag && i_sq_committed_empty))
321+
) begin
250322
issue_mem_found = 1'b1;
251323
issue_mem_idx = scan_idx[i];
252324
end
@@ -289,7 +361,8 @@ module load_queue #(
289361
logic sq_do_forward;
290362

291363
assign sq_can_issue = o_sq_check_valid && i_sq_all_older_addrs_known && !i_sq_forward.match;
292-
assign sq_do_forward = o_sq_check_valid && i_sq_forward.can_forward && !lq_is_mmio[issue_mem_idx];
364+
assign sq_do_forward = o_sq_check_valid && i_sq_forward.can_forward
365+
&& !lq_is_mmio[issue_mem_idx] && !lq_is_lr[issue_mem_idx] && !lq_is_amo[issue_mem_idx];
293366

294367
always_comb begin
295368
o_mem_read_en = 1'b0;
@@ -356,14 +429,24 @@ module load_queue #(
356429
.i_fill_addr (cache_fill_addr),
357430
.i_fill_data (cache_fill_data),
358431

359-
// Invalidation (from SQ, external)
360-
.i_invalidate_valid(i_cache_invalidate_valid),
361-
.i_invalidate_addr (i_cache_invalidate_addr),
432+
// Invalidation (from SQ or AMO write completion)
433+
.i_invalidate_valid(i_cache_invalidate_valid || amo_cache_inv),
434+
.i_invalidate_addr (amo_cache_inv ? lq_address[amo_entry_idx] : i_cache_invalidate_addr),
362435

363436
// Flush
364437
.i_flush_all(i_flush_all)
365438
);
366439

440+
// AMO serialization (ROB head + SQ committed-empty) guarantees these
441+
// two invalidation sources are mutually exclusive.
442+
`ifndef ICARUS
443+
// synopsys translate_off
444+
assert property (@(posedge i_clk) disable iff (!i_rst_n)
445+
!(i_cache_invalidate_valid && amo_cache_inv))
446+
else $error("BUG: SQ and AMO cache invalidation fired simultaneously");
447+
// synopsys translate_on
448+
`endif
449+
367450
// Cache-hit fast path signal: Phase B candidate hits L0 cache
368451
// AND SQ disambiguation confirms no conflicting store (sq_can_issue)
369452
// AND it's a word-sized or byte/half non-FP load (not FLD — FLD needs
@@ -373,7 +456,9 @@ module load_queue #(
373456
assign cache_hit_fast_path = sq_can_issue
374457
&& cache_lookup_hit
375458
&& !(lq_is_fp[issue_mem_idx] && lq_size[issue_mem_idx] == riscv_pkg::MEM_SIZE_DOUBLE)
376-
&& !lq_is_mmio[issue_mem_idx];
459+
&& !lq_is_mmio[issue_mem_idx]
460+
&& !lq_is_lr[issue_mem_idx]
461+
&& !lq_is_amo[issue_mem_idx];
377462

378463
// Load unit for cache hit path: feed cache data through load unit
379464
// for byte/half extraction.
@@ -411,9 +496,35 @@ module load_queue #(
411496
end
412497
end
413498

414-
assign cache_fill_valid = i_mem_read_valid && mem_outstanding && lq_valid[issued_idx];
415-
assign cache_fill_addr = cache_fill_actual_addr;
416-
assign cache_fill_data = i_mem_read_data;
499+
assign cache_fill_valid = i_mem_read_valid && mem_outstanding && lq_valid[issued_idx]
500+
&& !lq_is_lr[issued_idx] && !lq_is_amo[issued_idx];
501+
assign cache_fill_addr = cache_fill_actual_addr;
502+
assign cache_fill_data = i_mem_read_data;
503+
504+
// AMO write interface: compute new value combinationally from outstanding AMO read
505+
always_comb begin
506+
amo_write_pending = 1'b0;
507+
amo_new_value = '0;
508+
o_amo_mem_write_en = 1'b0;
509+
o_amo_mem_write_addr = '0;
510+
o_amo_mem_write_data = '0;
511+
512+
if (amo_state == AMO_WRITE_ACTIVE) begin
513+
// Maintain write request until done
514+
o_amo_mem_write_en = 1'b1;
515+
o_amo_mem_write_addr = lq_address[amo_entry_idx];
516+
o_amo_mem_write_data =
517+
amo_compute(lq_amo_op[amo_entry_idx], amo_old_value, lq_amo_rs2[amo_entry_idx]);
518+
end else if (i_mem_read_valid && mem_outstanding && lq_valid[issued_idx]
519+
&& lq_is_amo[issued_idx]) begin
520+
// AMO read just arrived: start write in the same cycle
521+
amo_write_pending = 1'b1;
522+
amo_new_value = amo_compute(lq_amo_op[issued_idx], i_mem_read_data, lq_amo_rs2[issued_idx]);
523+
o_amo_mem_write_en = 1'b1;
524+
o_amo_mem_write_addr = lq_address[issued_idx];
525+
o_amo_mem_write_data = amo_new_value;
526+
end
527+
end
417528

418529
// Drive load unit inputs from the entry awaiting response (memory path)
419530
always_comb begin
@@ -524,34 +635,48 @@ module load_queue #(
524635

525636
always_ff @(posedge i_clk or negedge i_rst_n) begin
526637
if (!i_rst_n) begin
527-
head_ptr <= '0;
528-
tail_ptr <= '0;
529-
lq_valid <= '0;
530-
lq_is_fp <= '0;
531-
lq_addr_valid <= '0;
532-
lq_sign_ext <= '0;
533-
lq_is_mmio <= '0;
534-
lq_fp64_phase <= '0;
535-
lq_issued <= '0;
536-
lq_data_valid <= '0;
537-
lq_forwarded <= '0;
538-
mem_outstanding <= 1'b0;
539-
issued_idx <= '0;
638+
head_ptr <= '0;
639+
tail_ptr <= '0;
640+
lq_valid <= '0;
641+
lq_is_fp <= '0;
642+
lq_addr_valid <= '0;
643+
lq_sign_ext <= '0;
644+
lq_is_mmio <= '0;
645+
lq_fp64_phase <= '0;
646+
lq_issued <= '0;
647+
lq_data_valid <= '0;
648+
lq_forwarded <= '0;
649+
lq_is_lr <= '0;
650+
lq_is_amo <= '0;
651+
mem_outstanding <= 1'b0;
652+
issued_idx <= '0;
653+
reservation_valid <= 1'b0;
654+
reservation_addr <= '0;
655+
amo_state <= AMO_IDLE;
656+
amo_old_value <= '0;
657+
amo_entry_idx <= '0;
540658
end else if (i_flush_all) begin
541659
// Full flush: reset everything
542-
head_ptr <= '0;
543-
tail_ptr <= '0;
544-
lq_valid <= '0;
545-
lq_is_fp <= '0;
546-
lq_addr_valid <= '0;
547-
lq_sign_ext <= '0;
548-
lq_is_mmio <= '0;
549-
lq_fp64_phase <= '0;
550-
lq_issued <= '0;
551-
lq_data_valid <= '0;
552-
lq_forwarded <= '0;
553-
mem_outstanding <= 1'b0;
554-
issued_idx <= '0;
660+
head_ptr <= '0;
661+
tail_ptr <= '0;
662+
lq_valid <= '0;
663+
lq_is_fp <= '0;
664+
lq_addr_valid <= '0;
665+
lq_sign_ext <= '0;
666+
lq_is_mmio <= '0;
667+
lq_fp64_phase <= '0;
668+
lq_issued <= '0;
669+
lq_data_valid <= '0;
670+
lq_forwarded <= '0;
671+
lq_is_lr <= '0;
672+
lq_is_amo <= '0;
673+
mem_outstanding <= 1'b0;
674+
issued_idx <= '0;
675+
reservation_valid <= 1'b0;
676+
reservation_addr <= '0;
677+
amo_state <= AMO_IDLE;
678+
amo_old_value <= '0;
679+
amo_entry_idx <= '0;
555680
end else begin
556681

557682
// -----------------------------------------------------------------
@@ -588,6 +713,10 @@ module load_queue #(
588713
lq_data_valid[tail_idx] <= 1'b0;
589714
lq_data[tail_idx] <= '0;
590715
lq_forwarded[tail_idx] <= 1'b0;
716+
lq_is_lr[tail_idx] <= i_alloc.is_lr;
717+
lq_is_amo[tail_idx] <= i_alloc.is_amo;
718+
lq_amo_op[tail_idx] <= i_alloc.amo_op;
719+
lq_amo_rs2[tail_idx] <= '0;
591720
tail_ptr <= tail_ptr + PtrWidth'(1);
592721
end
593722

@@ -600,6 +729,7 @@ module load_queue #(
600729
lq_addr_valid[i] <= 1'b1;
601730
lq_address[i] <= i_addr_update.address;
602731
lq_is_mmio[i] <= i_addr_update.is_mmio;
732+
lq_amo_rs2[i] <= i_addr_update.amo_rs2;
603733
end
604734
end
605735
end
@@ -649,7 +779,23 @@ module load_queue #(
649779
// Flushed entry — drain stale response
650780
mem_outstanding <= 1'b0;
651781
end else if (i_mem_read_valid && mem_outstanding && lq_valid[issued_idx]) begin
652-
if (lq_is_fp[issued_idx] &&
782+
if (lq_is_amo[issued_idx]) begin
783+
// AMO: latch old value, start write phase (don't set data_valid yet)
784+
amo_old_value <= i_mem_read_data;
785+
amo_entry_idx <= issued_idx;
786+
amo_state <= AMO_WRITE_ACTIVE;
787+
mem_outstanding <= 1'b0;
788+
end else if (lq_is_lr[issued_idx]) begin
789+
// LR: normal data capture + set reservation
790+
lq_data[issued_idx][XLEN-1:0] <= lu_data_out;
791+
if (FLEN > XLEN) begin
792+
lq_data[issued_idx][FLEN-1:XLEN] <= '0;
793+
end
794+
lq_data_valid[issued_idx] <= 1'b1;
795+
mem_outstanding <= 1'b0;
796+
reservation_valid <= 1'b1;
797+
reservation_addr <= lq_address[issued_idx];
798+
end else if (lq_is_fp[issued_idx] &&
653799
lq_size[issued_idx] == riscv_pkg::MEM_SIZE_DOUBLE &&
654800
!lq_fp64_phase[issued_idx]) begin
655801
// FLD phase 0: store low word, reset issued, advance to phase 1
@@ -675,6 +821,22 @@ module load_queue #(
675821
end
676822
end
677823

824+
// -----------------------------------------------------------------
825+
// AMO Write Completion: latch old value as result, invalidate cache
826+
// -----------------------------------------------------------------
827+
if (amo_state == AMO_WRITE_ACTIVE && i_amo_mem_write_done) begin
828+
lq_data[amo_entry_idx] <= {{(FLEN - XLEN) {1'b0}}, amo_old_value};
829+
lq_data_valid[amo_entry_idx] <= 1'b1;
830+
amo_state <= AMO_IDLE;
831+
end
832+
833+
// -----------------------------------------------------------------
834+
// Reservation clear (priority: clear wins over set)
835+
// -----------------------------------------------------------------
836+
if (i_sc_clear_reservation || i_reservation_snoop_invalidate) begin
837+
reservation_valid <= 1'b0;
838+
end
839+
678840
// -----------------------------------------------------------------
679841
// Entry Freeing + Head Advancement
680842
// -----------------------------------------------------------------

0 commit comments

Comments
 (0)