Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 127 additions & 12 deletions src/main/scala/coupledL2/tl2chi/chi/AsyncBridge.scala
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,51 @@ class AsyncPortIO(
val rx = Flipped(new AsyncUpwardsLinkIO(params))
}

/*
* This module enhances the standard async bridge by adding a front-end shadow buffer
* to decouple local processing from asynchronous latency and provide instant credit
* return to upstream modules
*
* Upstream → [Shadow Buffer (16)] → [AsyncQueueSource (4)] → Async Domain → Downstream
* ↑ ↑
* Instant Credit return Async Credit
*
* The shadow buffer allows immediate credit return to upstream for flits accepted into
* the shadow buffer, while actual credit recovery happens asynchronously when downstream
* processes the flits.
*/
object ToAsyncBundleWithBuf {
def channel[T <: Data](
chn: ChannelIO[T],
params: AsyncQueueParams = AsyncQueueParams(depth = 4),
name: Option[String] = None
): (Data, Bool) = {
/*
1. Shadow Buffer (depth=16, flow mode for low latency)
*/
val shadow_buffer = Module(new Queue(chiselTypeOf(chn.flit), 16, flow = true, pipe = false))
if (name.isDefined) { shadow_buffer.suggestName("shadowBuffer_" + name.get) }
shadow_buffer.io.enq.valid := chn.flitv
shadow_buffer.io.enq.bits := chn.flit
/*
2. For rx channel (CMN->L2), send out lcrdv right after a flit entering Shadow buffer(enqFire)
*/
val enqReady = shadow_buffer.io.enq.ready
val enqFire = enqReady & chn.flitv
dontTouch(enqFire)
assert(!chn.flitv || shadow_buffer.io.enq.ready, s"${name.getOrElse("ToAsyncBundle")}: Shadow buffer overflow!")
/*
3. AsyncQueueSource (depth=4)
*/
val source = Module(new AsyncQueueSource(chiselTypeOf(chn.flit), params))
if (name.isDefined) { source.suggestName("asyncQSource_" + name.get) }
source.io.enq <> shadow_buffer.io.deq

(source.io.async, enqFire)
}
}


object ToAsyncBundle {
def channel[T <: Data](
chn: ChannelIO[T],
Expand Down Expand Up @@ -81,15 +126,16 @@ object FromAsyncBundle {
def channel(
async: AsyncBundle[UInt],
params: AsyncQueueParams = AsyncQueueParams(),
name: Option[String] = None
name: Option[String] = None,
lcrdvReady: Option[Bool]= None
) = {
val gen = chiselTypeOf(async.mem.head)
val out = Wire(new ChannelIO(gen))
val sink = Module(new AsyncQueueSink(gen, params))
if (name.isDefined) { sink.suggestName("asyncQSink_" + name.get) }
sink.io.async <> async
sink.io.deq.ready := true.B
out.flitv := sink.io.deq.valid
sink.io.deq.ready := lcrdvReady.getOrElse(true.B)
out.flitv := sink.io.deq.valid & sink.io.deq.ready
out.flit := sink.io.deq.bits
// flitpend and lcrdv are assigned independently
out.flitpend := DontCare
Expand Down Expand Up @@ -121,9 +167,17 @@ class CHIAsyncBridgeSource(params: AsyncQueueParams = AsyncQueueParams())(implic
val resetFinish = Output(Bool())
})

io.async.tx.req.flit <> ToAsyncBundle.channel(io.enq.tx.req, params, Some("txreq_flit"))
io.async.tx.rsp.flit <> ToAsyncBundle.channel(io.enq.tx.rsp, params, Some("txrsp_flit"))
io.async.tx.dat.flit <> ToAsyncBundle.channel(io.enq.tx.dat, params, Some("txdat_flit"))
val async_tx_req = ToAsyncBundleWithBuf.channel(io.enq.tx.req, params, Some("txreq_flit"))
val async_tx_rsp = ToAsyncBundleWithBuf.channel(io.enq.tx.rsp, params, Some("txrsp_flit"))
val async_tx_dat = ToAsyncBundleWithBuf.channel(io.enq.tx.dat, params, Some("txdat_flit"))

io.async.tx.req.flit <> async_tx_req._1
io.async.tx.rsp.flit <> async_tx_rsp._1
io.async.tx.dat.flit <> async_tx_dat._1

// io.async.tx.req.flit <> ToAsyncBundle.channel(io.enq.tx.req, params, Some("txreq_flit"))
// io.async.tx.rsp.flit <> ToAsyncBundle.channel(io.enq.tx.rsp, params, Some("txrsp_flit"))
// io.async.tx.dat.flit <> ToAsyncBundle.channel(io.enq.tx.dat, params, Some("txdat_flit"))

io.enq.tx.req.lcrdv <> FromAsyncBundle.bitPulse(io.async.tx.req.lcrdv, params, Some("txreq_lcrdv"))
io.enq.tx.rsp.lcrdv <> FromAsyncBundle.bitPulse(io.async.tx.rsp.lcrdv, params, Some("txrsp_lcrdv"))
Expand Down Expand Up @@ -187,16 +241,25 @@ class CHIAsyncBridgeSink(params: AsyncQueueParams = AsyncQueueParams())(implicit
val resetFinish = Output(Bool())
})

io.deq.tx.req <> FromAsyncBundle.channel(io.async.tx.req.flit, params, Some("txreq_flit"))
io.deq.tx.rsp <> FromAsyncBundle.channel(io.async.tx.rsp.flit, params, Some("txrsp_flit"))
io.deq.tx.dat <> FromAsyncBundle.channel(io.async.tx.dat.flit, params, Some("txdat_flit"))
val txreq_lcrdvReady = Wire(Bool())
val txrsp_lcrdvReady = Wire(Bool())
val txdat_lcrdvReady = Wire(Bool())
io.deq.tx.req <> FromAsyncBundle.channel(io.async.tx.req.flit, params, Some("txreq_flit"), Some(txreq_lcrdvReady))
io.deq.tx.rsp <> FromAsyncBundle.channel(io.async.tx.rsp.flit, params, Some("txrsp_flit"), Some(txrsp_lcrdvReady))
io.deq.tx.dat <> FromAsyncBundle.channel(io.async.tx.dat.flit, params, Some("txdat_flit"), Some(txdat_lcrdvReady))
// io.deq.tx.req <> FromAsyncBundle.channel(io.async.tx.req.flit, params, Some("txreq_flit"))
// io.deq.tx.rsp <> FromAsyncBundle.channel(io.async.tx.rsp.flit, params, Some("txrsp_flit"))
// io.deq.tx.dat <> FromAsyncBundle.channel(io.async.tx.dat.flit, params, Some("txdat_flit"))

io.async.tx.req.lcrdv <> ToAsyncBundle.bitPulse(io.deq.tx.req.lcrdv, params, Some("txreq_lcrdv"))
io.async.tx.rsp.lcrdv <> ToAsyncBundle.bitPulse(io.deq.tx.rsp.lcrdv, params, Some("txrsp_lcrdv"))
io.async.tx.dat.lcrdv <> ToAsyncBundle.bitPulse(io.deq.tx.dat.lcrdv, params, Some("txdat_lcrdv"))

io.async.rx.rsp.flit <> ToAsyncBundle.channel(io.deq.rx.rsp, params, Some("rxrsp_flit"))
io.async.rx.dat.flit <> ToAsyncBundle.channel(io.deq.rx.dat, params, Some("rxdat_flit"))
val async_rx_rsp = ToAsyncBundleWithBuf.channel(io.deq.rx.rsp, params, Some("rxrsp_flit"))
val async_rx_dat = ToAsyncBundleWithBuf.channel(io.deq.rx.dat, params, Some("rxdat_flit"))
io.async.rx.rsp.flit <> async_rx_rsp._1
io.async.rx.dat.flit <> async_rx_dat._1

io.async.rx.snp.flit <> ToAsyncBundle.channel(io.deq.rx.snp, params, Some("rxsnp_flit"))

io.deq.rx.rsp.lcrdv <> FromAsyncBundle.bitPulse(io.async.rx.rsp.lcrdv, params, Some("rxrsp_lcrdv"))
Expand Down Expand Up @@ -240,5 +303,57 @@ class CHIAsyncBridgeSink(params: AsyncQueueParams = AsyncQueueParams())(implicit
io.resetFinish := resetFinish
}


/*
Duplicate Link Monitor tx/rx state FSM by using deq.rx deq.tx active signals which outuput to DownStream CHI
*/
val txState = RegInit(LinkStates.STOP)
val rxState = RegInit(LinkStates.STOP)

Seq(txState, rxState).zip(MixedVecInit(Seq(io.deq.tx, io.deq.rx))).foreach { case (state, link) =>
state := MuxLookup(Cat(link.linkactivereq, link.linkactiveack), LinkStates.STOP)(Seq(
Cat(true.B, false.B) -> LinkStates.ACTIVATE,
Cat(true.B, true.B) -> LinkStates.RUN,
Cat(false.B, true.B) -> LinkStates.DEACTIVATE,
Cat(false.B, false.B) -> LinkStates.STOP
))
}
/*
For rx channel, add l-credit manager module to generate lcrdv inside bridge
a. Try to use io.deq.rx as LCredit interface to output lcrdv right after rx flit received.
b. rxsnp is not in this practice and still use lcrdv generated in CoupledL2 since snoop may be unpredictablely blocked
*/
val rxrspDeact, rxdatDeact = Wire(Bool())
val rxin = WireInit(0.U asTypeOf(Flipped(new DecoupledPortIO()))) //fake Decoupled IO to provide ready
rxin.rx.rsp.ready := async_rx_rsp._2
rxin.rx.dat.ready := async_rx_dat._2
LCredit2Decoupled(io.deq.rx.rsp, rxin.rx.rsp, LinkState(rxState), rxrspDeact, Some("rxrsp"), 15, false)
LCredit2Decoupled(io.deq.rx.dat, rxin.rx.dat, LinkState(rxState), rxdatDeact, Some("rxdat"), 15, false)


/*
For tx channel, add l-credit manager module to generate 'ready' to block tx flit to DownStream CHI
a. The maximum number of L-Credits in tx channel is 4 inside bridge
b. Use L-Credits number more than 4 in CoupledL2 to cover lcrdv sync delay from DownStream CHI to CoupledL2
*/
val txin = WireInit(0.U asTypeOf(Flipped(new DecoupledPortIO()))) //fake Decoupled IO to provide flitv
val txout = WireInit(0.U asTypeOf(new PortIO))//fake LCredit IO to provide lcrdv
txout.tx.req.lcrdv := io.deq.tx.req.lcrdv
txout.tx.rsp.lcrdv := io.deq.tx.rsp.lcrdv
txout.tx.dat.lcrdv := io.deq.tx.dat.lcrdv

txin.tx.req.valid := io.deq.tx.req.flitv
txin.tx.rsp.valid := io.deq.tx.rsp.flitv
txin.tx.dat.valid := io.deq.tx.dat.flitv

Decoupled2LCredit(txin.tx.req, txout.tx.req, LinkState(txState), Some("txreq"))
Decoupled2LCredit(txin.tx.rsp, txout.tx.rsp, LinkState(txState), Some("txrsp"))
Decoupled2LCredit(txin.tx.dat, txout.tx.dat, LinkState(txState), Some("txdat"))

txreq_lcrdvReady := txin.tx.req.ready
txrsp_lcrdvReady := txin.tx.rsp.ready
txdat_lcrdvReady := txin.tx.dat.ready

dontTouch(io)
}

}
35 changes: 25 additions & 10 deletions src/main/scala/coupledL2/tl2chi/chi/LinkLayer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,16 @@ object LCredit2Decoupled {
}
}

class Decoupled2LCredit[T <: Bundle](gen: T)(implicit p: Parameters) extends Module {
/**
* overlcreditNum: Additional credits beyond CHI protocol maximum (15)
* In async systems, credit return suffers round-trip latency through the async bridge.
* This parameter "pre-allocates" credits so L2 can send immediately, with credits
* replenished slowly as CMN processes flits and returns credits through async path.
*/
class Decoupled2LCredit[T <: Bundle](
gen: T,
overlcreditNum: Option[Int] = None,
)(implicit p: Parameters) extends Module {
val io = IO(new Bundle() {
val in = Flipped(DecoupledIO(gen.cloneType))
val out = ChannelIO(gen.cloneType)
Expand All @@ -268,17 +277,22 @@ class Decoupled2LCredit[T <: Bundle](gen: T)(implicit p: Parameters) extends Mod
val disableLCredit = state === LinkStates.STOP
val acceptLCredit = out.lcrdv && !disableLCredit

// The maximum number of L-Credits that a receiver can provide is 15.
/* Total available L-credits = CHI protocol maximum (15) + overlcreditNum
* Initial credit pool = overlcreditNum
*/
val lcreditsMax = 15
val lcreditPool = RegInit(0.U(log2Up(lcreditsMax).W))
val overlcreditVal = overlcreditNum.getOrElse(0)
val lcreditsMaxAll = lcreditsMax + overlcreditVal
val lcreditPool = RegInit(overlcreditVal.U(log2Up(lcreditsMaxAll+1).W))

val returnLCreditValid = !io.in.valid && state === LinkStates.DEACTIVATE && lcreditPool =/= 0.U
// val returnLCreditValid = !io.in.valid && state === LinkStates.DEACTIVATE && lcreditPool =/= 0.U
val returnLCreditValid = !io.in.valid && state === LinkStates.DEACTIVATE && lcreditPool =/= overlcreditVal.U
val flitv = io.in.fire || returnLCreditValid

when (acceptLCredit) {
when (!flitv) {
lcreditPool := lcreditPool + 1.U
assert(lcreditPool + 1.U =/= 0.U, "L-Credit pool overflow")
assert(lcreditPool < lcreditsMaxAll.U, "L-Credit pool overflow")
}
}.otherwise {
when (flitv) {
Expand All @@ -304,9 +318,10 @@ object Decoupled2LCredit {
left: DecoupledIO[T],
right: ChannelIO[T],
state: LinkState,
suggestName: Option[String] = None
suggestName: Option[String] = None,
overlcreditNum: Option[Int] = None
)(implicit p: Parameters): Unit = {
val mod = Module(new Decoupled2LCredit(left.bits.cloneType))
val mod = Module(new Decoupled2LCredit(left.bits.cloneType, overlcreditNum))
suggestName.foreach(name => mod.suggestName(s"Decoupled2LCredit_${name}"))

mod.io.in <> left
Expand Down Expand Up @@ -339,9 +354,9 @@ class LinkMonitor(implicit p: Parameters) extends L2Module with HasCHIOpcodes {
/* IO assignment */
val rxsnpDeact, rxrspDeact, rxdatDeact = Wire(Bool())
val rxDeact = rxsnpDeact && rxrspDeact && rxdatDeact
Decoupled2LCredit(setSrcID(io.in.tx.req, io.nodeID), io.out.tx.req, LinkState(txState), Some("txreq"))
Decoupled2LCredit(setSrcID(io.in.tx.rsp, io.nodeID), io.out.tx.rsp, LinkState(txState), Some("txrsp"))
Decoupled2LCredit(setSrcID(io.in.tx.dat, io.nodeID), io.out.tx.dat, LinkState(txState), Some("txdat"))
Decoupled2LCredit(setSrcID(io.in.tx.req, io.nodeID), io.out.tx.req, LinkState(txState), Some("txreq"), Some(4))
Decoupled2LCredit(setSrcID(io.in.tx.rsp, io.nodeID), io.out.tx.rsp, LinkState(txState), Some("txrsp"), Some(4))
Decoupled2LCredit(setSrcID(io.in.tx.dat, io.nodeID), io.out.tx.dat, LinkState(txState), Some("txdat"), Some(4))
LCredit2Decoupled(io.out.rx.snp, io.in.rx.snp, LinkState(rxState), rxsnpDeact, Some("rxsnp"))
LCredit2Decoupled(io.out.rx.rsp, io.in.rx.rsp, LinkState(rxState), rxrspDeact, Some("rxrsp"), 15, false)
LCredit2Decoupled(io.out.rx.dat, io.in.rx.dat, LinkState(rxState), rxdatDeact, Some("rxdat"), 15, false)
Expand Down