diff --git a/src/main/scala/coupledL2/Directory.scala b/src/main/scala/coupledL2/Directory.scala index 67003d269..82a9dd0eb 100644 --- a/src/main/scala/coupledL2/Directory.scala +++ b/src/main/scala/coupledL2/Directory.scala @@ -137,6 +137,7 @@ class Directory(implicit p: Parameters) extends L2Module { val tagWen = io.tagWReq.valid val metaWen = io.metaWReq.valid val replacerWen = WireInit(false.B) + val replacerWen_s4 = RegNext(replacerWen) // val tagArray = Module(new SRAMTemplate(UInt(tagBits.W), sets, ways, singlePort = true)) private val mbist = p(L2ParamKey).hasMbist @@ -308,7 +309,7 @@ class Directory(implicit p: Parameters) extends L2Module { dontTouch(metaArray.io) dontTouch(tagArray.io) - io.read.ready := !io.metaWReq.valid && !io.tagWReq.valid && !replacerWen + io.read.ready := !io.metaWReq.valid && !io.tagWReq.valid && !replacerWen_s4 /* ======!! Replacement logic !!====== */ /* ====== Read, choose replaceWay ====== */ @@ -347,6 +348,14 @@ class Directory(implicit p: Parameters) extends L2Module { val updateRefill = refillReqValid_s3 && !refillRetry // update replacer when A/C hit or refill replacerWen := updateHit || updateRefill + val hit_s4 = RegInit(false.B) + val set_s4 = RegInit(0.U(setBits.W)) + val way_s4 = RegInit(0.U(wayBits.W)) + when(replacerWen) { + hit_s4 := hit_s3 + set_s4 := set_s3 + way_s4 := way_s3 + } // hit-Promotion, miss-Insertion for RRIP // origin-bit marks whether the data_block is reused @@ -356,10 +365,10 @@ class Directory(implicit p: Parameters) extends L2Module { val origin_bits_hold = Wire(Vec(ways, Bool())) origin_bits_hold := HoldUnless(origin_bits_r, RegNext(io.read.fire, false.B)) origin_bit_opt.get.io.w( - !resetFinish || replacerWen, - Mux(resetFinish, hit_s3, false.B), - Mux(resetFinish, req_s3.set, resetIdx), - UIntToOH(way_s3) + !resetFinish || replacerWen_s4, + Mux(resetFinish, hit_s4, false.B), + Mux(resetFinish, set_s4, resetIdx), + UIntToOH(way_s4) ) val rrip_req_type = WireInit(0.U(4.W)) // [3]: 0-firstuse, 1-reuse; @@ -374,12 +383,13 @@ class Directory(implicit p: Parameters) extends L2Module { private val mbistPl = MbistPipeline.PlaceMbistPipeline(1, "L2Directory", mbist) if(cacheParams.replacement == "srrip"){ val next_state_s3 = repl.get_next_state(repl_state_s3, way_s3, hit_s3, inv, rrip_req_type) + val next_state_s4 = RegNext(next_state_s3) val repl_init = Wire(Vec(ways, UInt(2.W))) repl_init.foreach(_ := 2.U(2.W)) replacer_sram_opt.get.io.w( - !resetFinish || replacerWen, - Mux(resetFinish, next_state_s3, repl_init.asUInt), - Mux(resetFinish, set_s3, resetIdx), + !resetFinish || replacerWen_s4, + Mux(resetFinish, next_state_s4, repl_init.asUInt), + Mux(resetFinish, set_s4, resetIdx), 1.U ) @@ -407,21 +417,23 @@ class Directory(implicit p: Parameters) extends L2Module { Mux(PSEL(9)===0.U, false.B, true.B))) // false.B - srrip, true.B - brrip val next_state_s3 = repl.get_next_state(repl_state_s3, way_s3, hit_s3, inv, repl_type, rrip_req_type) + val next_state_s4 = RegNext(next_state_s3) val repl_init = Wire(Vec(ways, UInt(2.W))) repl_init.foreach(_ := 2.U(2.W)) replacer_sram_opt.get.io.w( - !resetFinish || replacerWen, - Mux(resetFinish, next_state_s3, repl_init.asUInt), - Mux(resetFinish, set_s3, resetIdx), + !resetFinish || replacerWen_s4, + Mux(resetFinish, next_state_s4, repl_init.asUInt), + Mux(resetFinish, set_s4, resetIdx), 1.U ) } else { val next_state_s3 = repl.get_next_state(repl_state_s3, way_s3) + val next_state_s4 = RegNext(next_state_s3) replacer_sram_opt.get.io.w( - !resetFinish || replacerWen, - Mux(resetFinish, next_state_s3, 0.U), - Mux(resetFinish, set_s3, resetIdx), + !resetFinish || replacerWen_s4, + Mux(resetFinish, next_state_s4, 0.U), + Mux(resetFinish, set_s4, resetIdx), 1.U ) } diff --git a/src/main/scala/coupledL2/tl2chi/MainPipe.scala b/src/main/scala/coupledL2/tl2chi/MainPipe.scala index 9636189bf..11662873a 100644 --- a/src/main/scala/coupledL2/tl2chi/MainPipe.scala +++ b/src/main/scala/coupledL2/tl2chi/MainPipe.scala @@ -583,12 +583,15 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes Mux(mshr_req_s3, req_s3.way, dirResult_s3.way) ) - io.metaWReq.valid := !resetFinish || task_s3.valid && ( + // dir write signals in s3 + val metaWReq_s3 = Wire(Valid(new MetaWrite())) + val tagWReq_s3 = Wire(Valid(new TagWrite())) + metaWReq_s3.valid := !resetFinish || task_s3.valid && ( metaW_valid_s3_a || metaW_valid_s3_b || metaW_valid_s3_c || metaW_valid_s3_mshr || metaW_valid_s3_cmo - ) - io.metaWReq.bits.set := Mux(resetFinish, req_s3.set, resetIdx) - io.metaWReq.bits.wayOH := Mux(resetFinish, UIntToOH(metaW_way), Fill(cacheParams.ways, true.B)) - io.metaWReq.bits.wmeta := Mux( + ) + metaWReq_s3.bits.set := Mux(resetFinish, req_s3.set, resetIdx) + metaWReq_s3.bits.wayOH := Mux(resetFinish, UIntToOH(metaW_way), Fill(cacheParams.ways, true.B)) + metaWReq_s3.bits.wmeta := Mux( resetFinish, ParallelPriorityMux( Seq(metaW_valid_s3_a, metaW_valid_s3_b, metaW_valid_s3_c, metaW_valid_s3_mshr, metaW_valid_s3_cmo), @@ -596,11 +599,10 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes ), MetaEntry() ) - - io.tagWReq.valid := task_s3.valid && req_s3.tagWen && mshr_refill_s3 && !retry - io.tagWReq.bits.set := req_s3.set - io.tagWReq.bits.way := Mux(mshr_refill_s3 && req_s3.replTask, io.replResp.bits.way, req_s3.way) - io.tagWReq.bits.wtag := req_s3.tag + tagWReq_s3.valid := task_s3.valid && req_s3.tagWen && mshr_refill_s3 && !retry + tagWReq_s3.bits.set := req_s3.set + tagWReq_s3.bits.way := Mux(mshr_refill_s3 && req_s3.replTask, io.replResp.bits.way, req_s3.way) + tagWReq_s3.bits.wtag := req_s3.tag sink_resp_s3_b_metaWen := metaW_valid_s3_b sink_resp_s3_b_meta := metaW_s3_b @@ -723,6 +725,9 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes /* ======== Stage 4 ======== */ val task_s4 = RegInit(0.U.asTypeOf(Valid(new TaskBundle()))) + val taskWDir_s4 = RegInit(0.U.asTypeOf(Valid(new TaskBundle()))) + val metaWReq_s4 = RegInit(0.U.asTypeOf(Valid(new MetaWrite()))) + val tagWReq_s4 = RegInit(0.U.asTypeOf(Valid(new TagWrite()))) val data_unready_s4 = RegInit(false.B) val data_s4 = Reg(UInt((blockBytes * 8).W)) val ren_s4 = RegInit(false.B) @@ -758,6 +763,18 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes l2Error_s4 := l2Error_s3 } + taskWDir_s4.valid := task_s3.valid && (metaWReq_s3.valid || tagWReq_s3.valid) + when (task_s3.valid || !resetFinish) { + taskWDir_s4.bits := source_req_s3 + metaWReq_s4 := metaWReq_s3 + tagWReq_s4 := tagWReq_s3 + } + + io.metaWReq.valid := metaWReq_s4.valid && (taskWDir_s4.valid || RegNext(!resetFinish)) + io.metaWReq.bits := metaWReq_s4.bits + io.tagWReq.valid := tagWReq_s4.valid && (taskWDir_s4.valid || RegNext(!resetFinish)) + io.tagWReq.bits := tagWReq_s4.bits + // for reqs that CANNOT give response in MainPipe, but needs to write releaseBuf/refillBuf // we cannot drop them at s3, we must let them go to s4/s5 val chnl_fire_s4 = d_s4.fire || txreq_s4.fire || txrsp_s4.fire || txdat_s4.fire @@ -876,17 +893,20 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes io.toReqBuf(0) := task_s2.valid && s23Block('a', task_s2.bits) io.toReqBuf(1) := task_s3.valid && s23Block('a', task_s3.bits) - io.toReqArb.blockC_s1 := task_s2.valid && s23Block('c', task_s2.bits) + io.toReqArb.blockC_s1 := task_s2.valid && s23Block('c', task_s2.bits) || + task_s3.valid && s23Block('c', task_s3.bits) && metaWReq_s3.valid io.toReqArb.blockB_s1 := task_s2.valid && bBlock(task_s2.bits) || task_s3.valid && bBlock(task_s3.bits) || task_s4.valid && bBlock(task_s4.bits, tag = true) || + taskWDir_s4.valid && bBlock(taskWDir_s4.bits, tag = true) || task_s5.valid && bBlock(task_s5.bits, tag = true) io.toReqArb.blockA_s1 := false.B - io.toReqArb.blockG_s1 := task_s2.valid && s23Block('g', task_s2.bits) + io.toReqArb.blockG_s1 := task_s2.valid && s23Block('g', task_s2.bits) || + task_s3.valid && s23Block('g', task_s3.bits) && metaWReq_s3.valid /* ======== Pipeline Status ======== */ require(io.status_vec_toD.size == 3) diff --git a/src/main/scala/coupledL2/tl2tl/MainPipe.scala b/src/main/scala/coupledL2/tl2tl/MainPipe.scala index 605ca022b..febcc34cb 100644 --- a/src/main/scala/coupledL2/tl2tl/MainPipe.scala +++ b/src/main/scala/coupledL2/tl2tl/MainPipe.scala @@ -390,10 +390,13 @@ class MainPipe(implicit p: Parameters) extends L2Module with HasPerfEvents { val metaW_way = Mux(mshr_refill_s3 && req_s3.replTask, io.replResp.bits.way, // grant always use replResp way Mux(mshr_req_s3, req_s3.way, dirResult_s3.way)) - io.metaWReq.valid := !resetFinish || task_s3.valid && (metaW_valid_s3_a || metaW_valid_s3_b || metaW_valid_s3_c || metaW_valid_s3_mshr) - io.metaWReq.bits.set := Mux(resetFinish, req_s3.set, resetIdx) - io.metaWReq.bits.wayOH := Mux(resetFinish, UIntToOH(metaW_way), Fill(cacheParams.ways, true.B)) - io.metaWReq.bits.wmeta := Mux( + // dir write signals in s3 + val metaWReq_s3 = Wire(Valid(new MetaWrite())) + val tagWReq_s3 = Wire(Valid(new TagWrite())) + metaWReq_s3.valid := !resetFinish || task_s3.valid && (metaW_valid_s3_a || metaW_valid_s3_b || metaW_valid_s3_c || metaW_valid_s3_mshr) + metaWReq_s3.bits.set := Mux(resetFinish, req_s3.set, resetIdx) + metaWReq_s3.bits.wayOH := Mux(resetFinish, UIntToOH(metaW_way), Fill(cacheParams.ways, true.B)) + metaWReq_s3.bits.wmeta := Mux( resetFinish, ParallelPriorityMux( Seq(metaW_valid_s3_a, metaW_valid_s3_b, metaW_valid_s3_c, metaW_valid_s3_mshr), @@ -401,11 +404,10 @@ class MainPipe(implicit p: Parameters) extends L2Module with HasPerfEvents { ), MetaEntry() ) - - io.tagWReq.valid := task_s3.valid && req_s3.tagWen && mshr_refill_s3 && !retry - io.tagWReq.bits.set := req_s3.set - io.tagWReq.bits.way := Mux(mshr_refill_s3 && req_s3.replTask, io.replResp.bits.way, req_s3.way) - io.tagWReq.bits.wtag := req_s3.tag + tagWReq_s3.valid := task_s3.valid && req_s3.tagWen && mshr_refill_s3 && !retry + tagWReq_s3.bits.set := req_s3.set + tagWReq_s3.bits.way := Mux(mshr_refill_s3 && req_s3.replTask, io.replResp.bits.way, req_s3.way) + tagWReq_s3.bits.wtag := req_s3.tag /* ======== Interact with Channels (C & D) ======== */ // do not need s4 & s5 @@ -462,6 +464,9 @@ class MainPipe(implicit p: Parameters) extends L2Module with HasPerfEvents { /* ======== Stage 4 ======== */ val task_s4 = RegInit(0.U.asTypeOf(Valid(new TaskBundle()))) + val taskWDir_s4 = RegInit(0.U.asTypeOf(Valid(new TaskBundle()))) + val metaWReq_s4 = RegInit(0.U.asTypeOf(Valid(new MetaWrite()))) + val tagWReq_s4 = RegInit(0.U.asTypeOf(Valid(new TagWrite()))) val data_unready_s4 = RegInit(false.B) val data_s4 = Reg(UInt((blockBytes * 8).W)) val ren_s4 = RegInit(false.B) @@ -486,6 +491,18 @@ class MainPipe(implicit p: Parameters) extends L2Module with HasPerfEvents { l2Error_s4 := l2Error_s3 } + taskWDir_s4.valid := task_s3.valid && (metaWReq_s3.valid || tagWReq_s3.valid) + when (task_s3.valid || !resetFinish) { + taskWDir_s4.bits := source_req_s3 + metaWReq_s4 := metaWReq_s3 + tagWReq_s4 := tagWReq_s3 + } + + io.metaWReq.valid := metaWReq_s4.valid && (taskWDir_s4.valid || RegNext(!resetFinish)) + io.metaWReq.bits := metaWReq_s4.bits + io.tagWReq.valid := tagWReq_s4.valid && (taskWDir_s4.valid || RegNext(!resetFinish)) + io.tagWReq.bits := tagWReq_s4.bits + // A-alias-Acquire should send neither C nor D // val isC_s4 = task_s4.bits.opcode(2, 1) === Release(2, 1) && task_s4.bits.fromA && !RegNext(cache_alias, false.B) || // task_s4.bits.opcode(2, 1) === ProbeAck(2, 1) && task_s4.bits.fromB @@ -594,17 +611,20 @@ class MainPipe(implicit p: Parameters) extends L2Module with HasPerfEvents { io.toReqBuf(0) := task_s2.valid && s23Block('a', task_s2.bits) io.toReqBuf(1) := task_s3.valid && s23Block('a', task_s3.bits) - io.toReqArb.blockC_s1 := task_s2.valid && s23Block('c', task_s2.bits) + io.toReqArb.blockC_s1 := task_s2.valid && s23Block('c', task_s2.bits) || + task_s3.valid && s23Block('c', task_s3.bits) && metaWReq_s3.valid io.toReqArb.blockB_s1 := task_s2.valid && bBlock(task_s2.bits) || task_s3.valid && bBlock(task_s3.bits) || task_s4.valid && bBlock(task_s4.bits, tag = true) || + taskWDir_s4.valid && bBlock(taskWDir_s4.bits, tag = true) || task_s5.valid && bBlock(task_s5.bits, tag = true) io.toReqArb.blockA_s1 := false.B - io.toReqArb.blockG_s1 := task_s2.valid && s23Block('g', task_s2.bits) + io.toReqArb.blockG_s1 := task_s2.valid && s23Block('g', task_s2.bits) || + task_s3.valid && s23Block('g', task_s3.bits) && metaWReq_s3.valid /* ======== Pipeline Status ======== */ require(io.status_vec_toD.size == 3) io.status_vec_toD(0).valid := task_s3.valid && Mux(