@@ -50,7 +50,49 @@ class AsyncPortIO(
5050 val tx = new AsyncDownwardsLinkIO (params)
5151 val rx = Flipped (new AsyncUpwardsLinkIO (params))
5252}
53+ /*
54+ * This module enhances the standard async bridge by adding a front-end shadow buffer
55+ * to decouple local processing from asynchronous latency and provide instant credit
56+ * return to upstream modules
57+ *
58+ * rx: DownStream(CMN) → [Shadow Buffer (16)] → [AsyncQueueSink (4)] → [AsyncQueueSource (4)] → Upstream (L2)
59+ * ↑
60+ * Instant Credit return
61+ *
62+ * tx: UpStream(L2) → [Shadow Buffer (16)] → [AsyncQueueSource (4)] → [AsyncQueueSink (4)] → Downstream (CMN)
63+ * ↑ ↑
64+ * CHI Credit + over Credit(4) Credit manage to gen back-pressure
65+ *
66+ */
67+ object ToAsyncBundleWithBuf {
68+ def channel [T <: Data ](
69+ chn : ChannelIO [T ],
70+ params : AsyncQueueParams = AsyncQueueParams (depth = 4 ),
71+ name : Option [String ] = None
72+ ): (Data , Bool ) = {
73+ /*
74+ 1. Shadow Buffer (depth=16, flow mode for low latency)
75+ */
76+ val shadow_buffer = Module (new Queue (chiselTypeOf(chn.flit), 16 , flow = true , pipe = false ))
77+ if (name.isDefined) { shadow_buffer.suggestName(" shadowBuffer_" + name.get) }
78+ shadow_buffer.io.enq.valid := chn.flitv
79+ shadow_buffer.io.enq.bits := chn.flit
80+ /*
81+ 2. For rx channel (CMN->L2), send out lcrdv right after a flit entering Shadow buffer if has space
82+ */
83+ val deqReady = shadow_buffer.io.deq.ready
84+ dontTouch(deqReady)
85+ assert(! chn.flitv || shadow_buffer.io.enq.ready, s " ${name.getOrElse(" ToAsyncBundle" )}: Shadow buffer overflow! " )
86+ /*
87+ 3. AsyncQueueSource (depth=4)
88+ */
89+ val source = Module (new AsyncQueueSource (chiselTypeOf(chn.flit), params))
90+ if (name.isDefined) { source.suggestName(" asyncQSource_" + name.get) }
91+ source.io.enq <> shadow_buffer.io.deq
5392
93+ (source.io.async, deqReady)
94+ }
95+ }
5496object ToAsyncBundle {
5597 def channel [T <: Data ](
5698 chn : ChannelIO [T ],
@@ -81,15 +123,16 @@ object FromAsyncBundle {
81123 def channel (
82124 async : AsyncBundle [UInt ],
83125 params : AsyncQueueParams = AsyncQueueParams (),
84- name : Option [String ] = None
126+ name : Option [String ] = None ,
127+ lcrdvReady : Option [Bool ]= None
85128 ) = {
86129 val gen = chiselTypeOf(async.mem.head)
87130 val out = Wire (new ChannelIO (gen))
88131 val sink = Module (new AsyncQueueSink (gen, params))
89132 if (name.isDefined) { sink.suggestName(" asyncQSink_" + name.get) }
90133 sink.io.async <> async
91- sink.io.deq.ready := true .B
92- out.flitv := sink.io.deq.valid
134+ sink.io.deq.ready := lcrdvReady.getOrElse( true .B )
135+ out.flitv := sink.io.deq.valid & sink.io.deq.ready
93136 out.flit := sink.io.deq.bits
94137 // flitpend and lcrdv are assigned independently
95138 out.flitpend := DontCare
@@ -121,9 +164,13 @@ class CHIAsyncBridgeSource(params: AsyncQueueParams = AsyncQueueParams())(implic
121164 val resetFinish = Output (Bool ())
122165 })
123166
124- io.async.tx.req.flit <> ToAsyncBundle .channel(io.enq.tx.req, params, Some (" txreq_flit" ))
125- io.async.tx.rsp.flit <> ToAsyncBundle .channel(io.enq.tx.rsp, params, Some (" txrsp_flit" ))
126- io.async.tx.dat.flit <> ToAsyncBundle .channel(io.enq.tx.dat, params, Some (" txdat_flit" ))
167+ val async_tx_req = ToAsyncBundleWithBuf .channel(io.enq.tx.req, params, Some (" txreq_flit" ))
168+ val async_tx_rsp = ToAsyncBundleWithBuf .channel(io.enq.tx.rsp, params, Some (" txrsp_flit" ))
169+ val async_tx_dat = ToAsyncBundleWithBuf .channel(io.enq.tx.dat, params, Some (" txdat_flit" ))
170+
171+ io.async.tx.req.flit <> async_tx_req._1
172+ io.async.tx.rsp.flit <> async_tx_rsp._1
173+ io.async.tx.dat.flit <> async_tx_dat._1
127174
128175 io.enq.tx.req.lcrdv <> FromAsyncBundle .bitPulse(io.async.tx.req.lcrdv, params, Some (" txreq_lcrdv" ))
129176 io.enq.tx.rsp.lcrdv <> FromAsyncBundle .bitPulse(io.async.tx.rsp.lcrdv, params, Some (" txrsp_lcrdv" ))
@@ -187,16 +234,21 @@ class CHIAsyncBridgeSink(params: AsyncQueueParams = AsyncQueueParams())(implicit
187234 val resetFinish = Output (Bool ())
188235 })
189236
190- io.deq.tx.req <> FromAsyncBundle .channel(io.async.tx.req.flit, params, Some (" txreq_flit" ))
191- io.deq.tx.rsp <> FromAsyncBundle .channel(io.async.tx.rsp.flit, params, Some (" txrsp_flit" ))
192- io.deq.tx.dat <> FromAsyncBundle .channel(io.async.tx.dat.flit, params, Some (" txdat_flit" ))
237+ val txreq_lcrdvReady = Wire (Bool ())
238+ val txrsp_lcrdvReady = Wire (Bool ())
239+ val txdat_lcrdvReady = Wire (Bool ())
240+ io.deq.tx.req <> FromAsyncBundle .channel(io.async.tx.req.flit, params, Some (" txreq_flit" ), Some (txreq_lcrdvReady))
241+ io.deq.tx.rsp <> FromAsyncBundle .channel(io.async.tx.rsp.flit, params, Some (" txrsp_flit" ), Some (txrsp_lcrdvReady))
242+ io.deq.tx.dat <> FromAsyncBundle .channel(io.async.tx.dat.flit, params, Some (" txdat_flit" ), Some (txdat_lcrdvReady))
193243
194244 io.async.tx.req.lcrdv <> ToAsyncBundle .bitPulse(io.deq.tx.req.lcrdv, params, Some (" txreq_lcrdv" ))
195245 io.async.tx.rsp.lcrdv <> ToAsyncBundle .bitPulse(io.deq.tx.rsp.lcrdv, params, Some (" txrsp_lcrdv" ))
196246 io.async.tx.dat.lcrdv <> ToAsyncBundle .bitPulse(io.deq.tx.dat.lcrdv, params, Some (" txdat_lcrdv" ))
197247
198- io.async.rx.rsp.flit <> ToAsyncBundle .channel(io.deq.rx.rsp, params, Some (" rxrsp_flit" ))
199- io.async.rx.dat.flit <> ToAsyncBundle .channel(io.deq.rx.dat, params, Some (" rxdat_flit" ))
248+ val async_rx_rsp = ToAsyncBundleWithBuf .channel(io.deq.rx.rsp, params, Some (" rxrsp_flit" ))
249+ val async_rx_dat = ToAsyncBundleWithBuf .channel(io.deq.rx.dat, params, Some (" rxdat_flit" ))
250+ io.async.rx.rsp.flit <> async_rx_rsp._1
251+ io.async.rx.dat.flit <> async_rx_dat._1
200252 io.async.rx.snp.flit <> ToAsyncBundle .channel(io.deq.rx.snp, params, Some (" rxsnp_flit" ))
201253
202254 io.deq.rx.rsp.lcrdv <> FromAsyncBundle .bitPulse(io.async.rx.rsp.lcrdv, params, Some (" rxrsp_lcrdv" ))
@@ -239,6 +291,52 @@ class CHIAsyncBridgeSink(params: AsyncQueueParams = AsyncQueueParams())(implicit
239291 resetFinish := resetFinishCounter >= RESET_FINISH_MAX .U
240292 io.resetFinish := resetFinish
241293 }
294+ /*
295+ Duplicate Link Monitor tx/rx state FSM by using deq.rx deq.tx active signals which outuput to DownStream CHI
296+ */
297+ val txState = RegInit (LinkStates .STOP )
298+ val rxState = RegInit (LinkStates .STOP )
299+
300+ Seq (txState, rxState).zip(MixedVecInit (Seq (io.deq.tx, io.deq.rx))).foreach { case (state, link) =>
301+ state := MuxLookup (Cat (link.linkactivereq, link.linkactiveack), LinkStates .STOP )(Seq (
302+ Cat (true .B , false .B ) -> LinkStates .ACTIVATE ,
303+ Cat (true .B , true .B ) -> LinkStates .RUN ,
304+ Cat (false .B , true .B ) -> LinkStates .DEACTIVATE ,
305+ Cat (false .B , false .B ) -> LinkStates .STOP
306+ ))
307+ }
308+
309+ /*
310+ For rx channel, add l-credit manager module to generate lcrdv inside bridge
311+ a. Try to use io.deq.rx as LCredit interface to output lcrdv right after rx flit received.
312+ b. Try to generate io.deq.rx.dat.lcrdv and io.deq.rx.rsp.lcrdv as instant credit return
313+ c. rxsnp is not in this practice and still use lcrdv generated in CoupledL2 since snoop may be unpredictablely blocked
314+ */
315+ val rxrspDeact, rxdatDeact = Wire (Bool ())
316+ val rxin = WireInit (0 .U asTypeOf(Flipped (new DecoupledPortIO ()))) // fake Decoupled IO to provide ready
317+ rxin.rx.rsp.ready := async_rx_rsp._2
318+ rxin.rx.dat.ready := async_rx_dat._2
319+ LCredit2Decoupled (io.deq.rx.rsp, rxin.rx.rsp, LinkState (rxState), rxrspDeact, Some (" rxrsp" ), 15 , false )
320+ LCredit2Decoupled (io.deq.rx.dat, rxin.rx.dat, LinkState (rxState), rxdatDeact, Some (" rxdat" ), 15 , false )
321+ /*
322+ For tx channel, add l-credit manager module to generate 'ready' to block tx flit to DownStream CHI
323+ a. The maximum number of L-Credits in tx channel is 4 inside bridge
324+ b. Use L-Credits number more than 4 in CoupledL2 to cover lcrdv sync delay from DownStream CHI to CoupledL2
325+ */
326+ val txin = WireInit (0 .U asTypeOf(Flipped (new DecoupledPortIO ()))) // fake Decoupled IO to provide flitv
327+ val txout = WireInit (0 .U asTypeOf(new PortIO ))// fake LCredit IO to provide lcrdv
328+ txout.tx.req.lcrdv := io.deq.tx.req.lcrdv
329+ txout.tx.rsp.lcrdv := io.deq.tx.rsp.lcrdv
330+ txout.tx.dat.lcrdv := io.deq.tx.dat.lcrdv
331+ txin.tx.req.valid := io.deq.tx.req.flitv
332+ txin.tx.rsp.valid := io.deq.tx.rsp.flitv
333+ txin.tx.dat.valid := io.deq.tx.dat.flitv
334+ Decoupled2LCredit (txin.tx.req, txout.tx.req, LinkState (txState), Some (" txreq" ))
335+ Decoupled2LCredit (txin.tx.rsp, txout.tx.rsp, LinkState (txState), Some (" txrsp" ))
336+ Decoupled2LCredit (txin.tx.dat, txout.tx.dat, LinkState (txState), Some (" txdat" ))
337+ txreq_lcrdvReady := txin.tx.req.ready
338+ txrsp_lcrdvReady := txin.tx.rsp.ready
339+ txdat_lcrdvReady := txin.tx.dat.ready
242340
243341 dontTouch(io)
244- }
342+ }
0 commit comments