Skip to content

Commit 0d20557

Browse files
authored
fix(AsyncBridge): restore performance considering async depth is 4 (OpenXiangShan#472)
* fix(AsyncBridge): restore performance considering async depth is 4 * For rx channel, enhances the asyncBridge by adding a front-end shadow buffer(16) to decouple local processing from asynchronous latency and provide instant credit return to downstream modules(CMN). * For tx channel, use L-Credits number more than 4 in CoupledL2 to cover lcrdv sync delay from DownStream CHI to CoupledL2 * * fix: add option to determine the reset value of tx credit
1 parent 3c9525f commit 0d20557

File tree

3 files changed

+131
-23
lines changed

3 files changed

+131
-23
lines changed

src/main/scala/coupledL2/L2Param.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,8 @@ case class L2Param(
102102
prefetch: Seq[PrefetchParameters] = Nil,
103103
// L2 Flush
104104
enableL2Flush: Boolean = false,
105+
// AsyncBridge
106+
enableCHIAsyncBridge: Option[Boolean] = None,
105107
// Performance analysis
106108
enablePerf: Boolean = true,
107109
// RollingDB

src/main/scala/coupledL2/tl2chi/chi/AsyncBridge.scala

Lines changed: 110 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,49 @@ class AsyncPortIO(
5050
val tx = new AsyncDownwardsLinkIO(params)
5151
val rx = Flipped(new AsyncUpwardsLinkIO(params))
5252
}
53+
/*
54+
* This module enhances the standard async bridge by adding a front-end shadow buffer
55+
* to decouple local processing from asynchronous latency and provide instant credit
56+
* return to upstream modules
57+
*
58+
* rx: DownStream(CMN) → [Shadow Buffer (16)] → [AsyncQueueSink (4)] → [AsyncQueueSource (4)] → Upstream (L2)
59+
* ↑
60+
* Instant Credit return
61+
*
62+
* tx: UpStream(L2) → [Shadow Buffer (16)] → [AsyncQueueSource (4)] → [AsyncQueueSink (4)] → Downstream (CMN)
63+
* ↑ ↑
64+
* CHI Credit + over Credit(4) Credit manage to gen back-pressure
65+
*
66+
*/
67+
object ToAsyncBundleWithBuf {
68+
def channel[T <: Data](
69+
chn: ChannelIO[T],
70+
params: AsyncQueueParams = AsyncQueueParams(depth = 4),
71+
name: Option[String] = None
72+
): (Data, Bool) = {
73+
/*
74+
1. Shadow Buffer (depth=16, flow mode for low latency)
75+
*/
76+
val shadow_buffer = Module(new Queue(chiselTypeOf(chn.flit), 16, flow = true, pipe = false))
77+
if (name.isDefined) { shadow_buffer.suggestName("shadowBuffer_" + name.get) }
78+
shadow_buffer.io.enq.valid := chn.flitv
79+
shadow_buffer.io.enq.bits := chn.flit
80+
/*
81+
2. For rx channel (CMN->L2), send out lcrdv right after a flit entering Shadow buffer if has space
82+
*/
83+
val deqReady = shadow_buffer.io.deq.ready
84+
dontTouch(deqReady)
85+
assert(!chn.flitv || shadow_buffer.io.enq.ready, s"${name.getOrElse("ToAsyncBundle")}: Shadow buffer overflow!")
86+
/*
87+
3. AsyncQueueSource (depth=4)
88+
*/
89+
val source = Module(new AsyncQueueSource(chiselTypeOf(chn.flit), params))
90+
if (name.isDefined) { source.suggestName("asyncQSource_" + name.get) }
91+
source.io.enq <> shadow_buffer.io.deq
5392

93+
(source.io.async, deqReady)
94+
}
95+
}
5496
object ToAsyncBundle {
5597
def channel[T <: Data](
5698
chn: ChannelIO[T],
@@ -81,15 +123,16 @@ object FromAsyncBundle {
81123
def channel(
82124
async: AsyncBundle[UInt],
83125
params: AsyncQueueParams = AsyncQueueParams(),
84-
name: Option[String] = None
126+
name: Option[String] = None,
127+
lcrdvReady: Option[Bool]= None
85128
) = {
86129
val gen = chiselTypeOf(async.mem.head)
87130
val out = Wire(new ChannelIO(gen))
88131
val sink = Module(new AsyncQueueSink(gen, params))
89132
if (name.isDefined) { sink.suggestName("asyncQSink_" + name.get) }
90133
sink.io.async <> async
91-
sink.io.deq.ready := true.B
92-
out.flitv := sink.io.deq.valid
134+
sink.io.deq.ready := lcrdvReady.getOrElse(true.B)
135+
out.flitv := sink.io.deq.valid & sink.io.deq.ready
93136
out.flit := sink.io.deq.bits
94137
// flitpend and lcrdv are assigned independently
95138
out.flitpend := DontCare
@@ -121,9 +164,13 @@ class CHIAsyncBridgeSource(params: AsyncQueueParams = AsyncQueueParams())(implic
121164
val resetFinish = Output(Bool())
122165
})
123166

124-
io.async.tx.req.flit <> ToAsyncBundle.channel(io.enq.tx.req, params, Some("txreq_flit"))
125-
io.async.tx.rsp.flit <> ToAsyncBundle.channel(io.enq.tx.rsp, params, Some("txrsp_flit"))
126-
io.async.tx.dat.flit <> ToAsyncBundle.channel(io.enq.tx.dat, params, Some("txdat_flit"))
167+
val async_tx_req = ToAsyncBundleWithBuf.channel(io.enq.tx.req, params, Some("txreq_flit"))
168+
val async_tx_rsp = ToAsyncBundleWithBuf.channel(io.enq.tx.rsp, params, Some("txrsp_flit"))
169+
val async_tx_dat = ToAsyncBundleWithBuf.channel(io.enq.tx.dat, params, Some("txdat_flit"))
170+
171+
io.async.tx.req.flit <> async_tx_req._1
172+
io.async.tx.rsp.flit <> async_tx_rsp._1
173+
io.async.tx.dat.flit <> async_tx_dat._1
127174

128175
io.enq.tx.req.lcrdv <> FromAsyncBundle.bitPulse(io.async.tx.req.lcrdv, params, Some("txreq_lcrdv"))
129176
io.enq.tx.rsp.lcrdv <> FromAsyncBundle.bitPulse(io.async.tx.rsp.lcrdv, params, Some("txrsp_lcrdv"))
@@ -187,16 +234,21 @@ class CHIAsyncBridgeSink(params: AsyncQueueParams = AsyncQueueParams())(implicit
187234
val resetFinish = Output(Bool())
188235
})
189236

190-
io.deq.tx.req <> FromAsyncBundle.channel(io.async.tx.req.flit, params, Some("txreq_flit"))
191-
io.deq.tx.rsp <> FromAsyncBundle.channel(io.async.tx.rsp.flit, params, Some("txrsp_flit"))
192-
io.deq.tx.dat <> FromAsyncBundle.channel(io.async.tx.dat.flit, params, Some("txdat_flit"))
237+
val txreq_lcrdvReady = Wire(Bool())
238+
val txrsp_lcrdvReady = Wire(Bool())
239+
val txdat_lcrdvReady = Wire(Bool())
240+
io.deq.tx.req <> FromAsyncBundle.channel(io.async.tx.req.flit, params, Some("txreq_flit"), Some(txreq_lcrdvReady))
241+
io.deq.tx.rsp <> FromAsyncBundle.channel(io.async.tx.rsp.flit, params, Some("txrsp_flit"), Some(txrsp_lcrdvReady))
242+
io.deq.tx.dat <> FromAsyncBundle.channel(io.async.tx.dat.flit, params, Some("txdat_flit"), Some(txdat_lcrdvReady))
193243

194244
io.async.tx.req.lcrdv <> ToAsyncBundle.bitPulse(io.deq.tx.req.lcrdv, params, Some("txreq_lcrdv"))
195245
io.async.tx.rsp.lcrdv <> ToAsyncBundle.bitPulse(io.deq.tx.rsp.lcrdv, params, Some("txrsp_lcrdv"))
196246
io.async.tx.dat.lcrdv <> ToAsyncBundle.bitPulse(io.deq.tx.dat.lcrdv, params, Some("txdat_lcrdv"))
197247

198-
io.async.rx.rsp.flit <> ToAsyncBundle.channel(io.deq.rx.rsp, params, Some("rxrsp_flit"))
199-
io.async.rx.dat.flit <> ToAsyncBundle.channel(io.deq.rx.dat, params, Some("rxdat_flit"))
248+
val async_rx_rsp = ToAsyncBundleWithBuf.channel(io.deq.rx.rsp, params, Some("rxrsp_flit"))
249+
val async_rx_dat = ToAsyncBundleWithBuf.channel(io.deq.rx.dat, params, Some("rxdat_flit"))
250+
io.async.rx.rsp.flit <> async_rx_rsp._1
251+
io.async.rx.dat.flit <> async_rx_dat._1
200252
io.async.rx.snp.flit <> ToAsyncBundle.channel(io.deq.rx.snp, params, Some("rxsnp_flit"))
201253

202254
io.deq.rx.rsp.lcrdv <> FromAsyncBundle.bitPulse(io.async.rx.rsp.lcrdv, params, Some("rxrsp_lcrdv"))
@@ -239,6 +291,52 @@ class CHIAsyncBridgeSink(params: AsyncQueueParams = AsyncQueueParams())(implicit
239291
resetFinish := resetFinishCounter >= RESET_FINISH_MAX.U
240292
io.resetFinish := resetFinish
241293
}
294+
/*
295+
Duplicate Link Monitor tx/rx state FSM by using deq.rx deq.tx active signals which outuput to DownStream CHI
296+
*/
297+
val txState = RegInit(LinkStates.STOP)
298+
val rxState = RegInit(LinkStates.STOP)
299+
300+
Seq(txState, rxState).zip(MixedVecInit(Seq(io.deq.tx, io.deq.rx))).foreach { case (state, link) =>
301+
state := MuxLookup(Cat(link.linkactivereq, link.linkactiveack), LinkStates.STOP)(Seq(
302+
Cat(true.B, false.B) -> LinkStates.ACTIVATE,
303+
Cat(true.B, true.B) -> LinkStates.RUN,
304+
Cat(false.B, true.B) -> LinkStates.DEACTIVATE,
305+
Cat(false.B, false.B) -> LinkStates.STOP
306+
))
307+
}
308+
309+
/*
310+
For rx channel, add l-credit manager module to generate lcrdv inside bridge
311+
a. Try to use io.deq.rx as LCredit interface to output lcrdv right after rx flit received.
312+
b. Try to generate io.deq.rx.dat.lcrdv and io.deq.rx.rsp.lcrdv as instant credit return
313+
c. rxsnp is not in this practice and still use lcrdv generated in CoupledL2 since snoop may be unpredictablely blocked
314+
*/
315+
val rxrspDeact, rxdatDeact = Wire(Bool())
316+
val rxin = WireInit(0.U asTypeOf(Flipped(new DecoupledPortIO()))) //fake Decoupled IO to provide ready
317+
rxin.rx.rsp.ready := async_rx_rsp._2
318+
rxin.rx.dat.ready := async_rx_dat._2
319+
LCredit2Decoupled(io.deq.rx.rsp, rxin.rx.rsp, LinkState(rxState), rxrspDeact, Some("rxrsp"), 15, false)
320+
LCredit2Decoupled(io.deq.rx.dat, rxin.rx.dat, LinkState(rxState), rxdatDeact, Some("rxdat"), 15, false)
321+
/*
322+
For tx channel, add l-credit manager module to generate 'ready' to block tx flit to DownStream CHI
323+
a. The maximum number of L-Credits in tx channel is 4 inside bridge
324+
b. Use L-Credits number more than 4 in CoupledL2 to cover lcrdv sync delay from DownStream CHI to CoupledL2
325+
*/
326+
val txin = WireInit(0.U asTypeOf(Flipped(new DecoupledPortIO()))) //fake Decoupled IO to provide flitv
327+
val txout = WireInit(0.U asTypeOf(new PortIO))//fake LCredit IO to provide lcrdv
328+
txout.tx.req.lcrdv := io.deq.tx.req.lcrdv
329+
txout.tx.rsp.lcrdv := io.deq.tx.rsp.lcrdv
330+
txout.tx.dat.lcrdv := io.deq.tx.dat.lcrdv
331+
txin.tx.req.valid := io.deq.tx.req.flitv
332+
txin.tx.rsp.valid := io.deq.tx.rsp.flitv
333+
txin.tx.dat.valid := io.deq.tx.dat.flitv
334+
Decoupled2LCredit(txin.tx.req, txout.tx.req, LinkState(txState), Some("txreq"))
335+
Decoupled2LCredit(txin.tx.rsp, txout.tx.rsp, LinkState(txState), Some("txrsp"))
336+
Decoupled2LCredit(txin.tx.dat, txout.tx.dat, LinkState(txState), Some("txdat"))
337+
txreq_lcrdvReady := txin.tx.req.ready
338+
txrsp_lcrdvReady := txin.tx.rsp.ready
339+
txdat_lcrdvReady := txin.tx.dat.ready
242340

243341
dontTouch(io)
244-
}
342+
}

src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import chisel3.util._
2222
import org.chipsalliance.cde.config.Parameters
2323
import utility._
2424
import coupledL2.L2Module
25+
import coupledL2.HasCoupledL2Parameters
2526

2627
class ChannelIO[+T <: Data](gen: T) extends Bundle {
2728
// Flit Pending. Early indication that a flit might be transmitted in the following cycle
@@ -208,9 +209,9 @@ class LCredit2Decoupled[T <: Bundle](
208209
QueuePerf(size = lcreditNum, utilization = queue.io.count, full = queue.io.count === lcreditNum.U)
209210
} else {
210211
val lcreditReturn = WireInit(false.B)
211-
lcreditOut := (lcreditPool > 0.U) && enableLCredit
212+
lcreditOut := (lcreditPool > 0.U) && enableLCredit && io.out.ready
212213

213-
assert(!accept || io.out.ready)
214+
//assert(!accept || io.out.ready) /*reuse this module and considering upstream may not always ready to accept*/
214215

215216
io.out.valid := accept && !lcreditReturn
216217
var lsb = 0
@@ -254,7 +255,10 @@ object LCredit2Decoupled {
254255
}
255256
}
256257

257-
class Decoupled2LCredit[T <: Bundle](gen: T)(implicit p: Parameters) extends Module {
258+
class Decoupled2LCredit[T <: Bundle](
259+
gen: T,
260+
overlcreditNum: Option[Int] = None,
261+
)(implicit val p: Parameters) extends Module with HasCoupledL2Parameters {
258262
val io = IO(new Bundle() {
259263
val in = Flipped(DecoupledIO(gen.cloneType))
260264
val out = ChannelIO(gen.cloneType)
@@ -270,15 +274,18 @@ class Decoupled2LCredit[T <: Bundle](gen: T)(implicit p: Parameters) extends Mod
270274

271275
// The maximum number of L-Credits that a receiver can provide is 15.
272276
val lcreditsMax = 15
273-
val lcreditPool = RegInit(0.U(log2Up(lcreditsMax).W))
277+
val enableCHIAsync = cacheParams.enableCHIAsyncBridge.getOrElse(false)
278+
val overlcreditVal = if(enableCHIAsync) overlcreditNum.getOrElse(0) else 0
279+
val lcreditsMaxAll = lcreditsMax + overlcreditVal
280+
val lcreditPool = RegInit(overlcreditVal.U(log2Up(lcreditsMaxAll+1).W))
274281

275-
val returnLCreditValid = !io.in.valid && state === LinkStates.DEACTIVATE && lcreditPool =/= 0.U
282+
val returnLCreditValid = !io.in.valid && state === LinkStates.DEACTIVATE && lcreditPool =/= overlcreditVal.U
276283
val flitv = io.in.fire || returnLCreditValid
277284

278285
when (acceptLCredit) {
279286
when (!flitv) {
280287
lcreditPool := lcreditPool + 1.U
281-
assert(lcreditPool + 1.U =/= 0.U, "L-Credit pool overflow")
288+
assert(lcreditPool < lcreditsMaxAll.U, "L-Credit pool overflow")
282289
}
283290
}.otherwise {
284291
when (flitv) {
@@ -304,9 +311,10 @@ object Decoupled2LCredit {
304311
left: DecoupledIO[T],
305312
right: ChannelIO[T],
306313
state: LinkState,
307-
suggestName: Option[String] = None
314+
suggestName: Option[String] = None,
315+
overlcreditNum: Option[Int] = None
308316
)(implicit p: Parameters): Unit = {
309-
val mod = Module(new Decoupled2LCredit(left.bits.cloneType))
317+
val mod = Module(new Decoupled2LCredit(left.bits.cloneType, overlcreditNum))
310318
suggestName.foreach(name => mod.suggestName(s"Decoupled2LCredit_${name}"))
311319

312320
mod.io.in <> left
@@ -339,9 +347,9 @@ class LinkMonitor(implicit p: Parameters) extends L2Module with HasCHIOpcodes {
339347
/* IO assignment */
340348
val rxsnpDeact, rxrspDeact, rxdatDeact = Wire(Bool())
341349
val rxDeact = rxsnpDeact && rxrspDeact && rxdatDeact
342-
Decoupled2LCredit(setSrcID(io.in.tx.req, io.nodeID), io.out.tx.req, LinkState(txState), Some("txreq"))
343-
Decoupled2LCredit(setSrcID(io.in.tx.rsp, io.nodeID), io.out.tx.rsp, LinkState(txState), Some("txrsp"))
344-
Decoupled2LCredit(setSrcID(io.in.tx.dat, io.nodeID), io.out.tx.dat, LinkState(txState), Some("txdat"))
350+
Decoupled2LCredit(setSrcID(io.in.tx.req, io.nodeID), io.out.tx.req, LinkState(txState), Some("txreq"), Some(8))
351+
Decoupled2LCredit(setSrcID(io.in.tx.rsp, io.nodeID), io.out.tx.rsp, LinkState(txState), Some("txrsp"), Some(8))
352+
Decoupled2LCredit(setSrcID(io.in.tx.dat, io.nodeID), io.out.tx.dat, LinkState(txState), Some("txdat"), Some(8))
345353
LCredit2Decoupled(io.out.rx.snp, io.in.rx.snp, LinkState(rxState), rxsnpDeact, Some("rxsnp"))
346354
LCredit2Decoupled(io.out.rx.rsp, io.in.rx.rsp, LinkState(rxState), rxrspDeact, Some("rxrsp"), 15, false)
347355
LCredit2Decoupled(io.out.rx.dat, io.in.rx.dat, LinkState(rxState), rxdatDeact, Some("rxdat"), 15, false)

0 commit comments

Comments
 (0)