Skip to content

Commit

Permalink
feat(CMO): merged CMO requests into TileLink A Channel (#282)
Browse files Browse the repository at this point in the history
* Removed 'hasCMO' parameter.

* Removed all SinkCMO related modules.

* Merged CMO requests into TileLink A Channel with extended 4-bit opcode and custom encodings.  
 See OpenXiangShan/rocket-chip#17.

* Added support for **WriteCleanFull**.

* Expanded all opcode decoding to be compatible with new 4-bit opcode encoding.

* Enabled datapath of ```mp_release``` -> ```ReleaseBuffer``` -> ```DataStorage```.

* Reuse ```mp_release``` for CMO-derived **WriteBackFull**, **WriteCleanFull** and **Evict**.

* Reuse ```mp_acquire``` for CMO-derived **CleanShared**, **CleanInvalid** and **MakeInvalid**.

* Bump OpenXiangShan/rocket-chip.
  • Loading branch information
Kumonda221-CrO3 authored Nov 30, 2024
1 parent d66cd85 commit c4ce81f
Show file tree
Hide file tree
Showing 19 changed files with 216 additions and 284 deletions.
2 changes: 0 additions & 2 deletions src/main/scala/coupledL2/BaseSlice.scala
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@ abstract class BaseSliceIO[T_OUT <: BaseOuterBundle](implicit p: Parameters) ext
// val msStatus = topDownOpt.map(_ => Vec(mshrsAll, ValidIO(new MSHRStatus)))
val dirResult = topDownOpt.map(_ => ValidIO(new DirResult))
val latePF = topDownOpt.map(_ => Output(Bool()))
val cmoReq = Flipped(DecoupledIO(new CMOReq()))
val cmoResp = DecoupledIO(new CMOResp())
}

abstract class BaseSlice[T_OUT <: BaseOuterBundle](implicit p: Parameters) extends L2Module with HasPerfEvents {
Expand Down
25 changes: 10 additions & 15 deletions src/main/scala/coupledL2/Common.scala
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class TaskBundle(implicit p: Parameters) extends L2Bundle
val vaddr = vaddrBitsOpt.map(_ => UInt(vaddrBitsOpt.get.W)) // vaddr passed by client cache
// from L1 load miss require
val isKeyword = isKeywordBitsOpt.map(_ => Bool())
val opcode = UInt(3.W) // type of the task operation
val opcode = UInt(4.W) // type of the task operation
val param = UInt(3.W)
val size = UInt(msgSizeBits.W)
val sourceId = UInt(sourceIdBits.W) // tilelink sourceID
Expand All @@ -81,9 +81,6 @@ class TaskBundle(implicit p: Parameters) extends L2Bundle
val useProbeData = Bool() // data source, true for ReleaseBuf and false for RefillBuf
val mshrRetry = Bool() // is retry task for mshr conflict

// For CMO request
val cmoTask = Bool()

// For Intent
val fromL2pft = prefetchOpt.map(_ => Bool()) // Is the prefetch req from L2(BOP) or from L1 prefetch?
// If true, MSHR should send an ack to L2 prefetcher.
Expand All @@ -106,6 +103,9 @@ class TaskBundle(implicit p: Parameters) extends L2Bundle
// for Release to read refillBuf and write to DS
val replTask = Bool()

// for CMO
val cmoTask = Bool()

// for TopDown Monitor (# TopDown)
val reqSource = UInt(MemReqSource.reqSourceBits.W)

Expand Down Expand Up @@ -207,12 +207,17 @@ class MSHRInfo(implicit p: Parameters) extends L2Bundle with HasTLChannelBits {
val mergeA = Bool() // whether the mshr already merge an acquire(avoid alias merge)

val w_grantfirst = Bool()
val s_release = Bool()
val s_refill = Bool()
val s_cmoresp = Bool()
val w_releaseack = Bool()
val w_replResp = Bool()
val w_rprobeacklast = Bool()

val replaceData = Bool() // If there is a replace, WriteBackFull or Evict

// exclude Release toB for nested snoop of releases
val releaseToB = Bool()
}

class RespInfoBundle(implicit p: Parameters) extends L2Bundle
Expand Down Expand Up @@ -254,7 +259,7 @@ class FSMState(implicit p: Parameters) extends L2Bundle {
// val s_grantack = Bool() // respond grantack downwards, moved to GrantBuf
// val s_triggerprefetch = prefetchOpt.map(_ => Bool())
val s_retry = Bool() // need retry when conflict
val s_cmoresp = Bool() // resp upwards for finishing cmo inst
val s_cmoresp = Bool() // resp upwards for finishing CMO transactions

// wait
val w_rprobeackfirst = Bool()
Expand Down Expand Up @@ -328,16 +333,6 @@ class L2ToL1Hint(implicit p: Parameters) extends Bundle {
val isKeyword = Bool() // miss entry keyword
}

// custom l2 - l1 CMO inst req
class CMOReq(implicit p: Parameters) extends Bundle {
val opcode = UInt(3.W) // 0-cbo.clean, 1-cbo.flush, 2-cbo.inval, 3-cbo.zero
val address = UInt(64.W)
}
// custom l2 - l1 CMO inst resp(ack)
class CMOResp(implicit p: Parameters) extends Bundle {
val address = UInt(64.W)
}

// custom l2 - l1 tlb
// FIXME lyq: Tlbcmd and TlbExceptionBundle, how to use L1 corresponding bundles?
object TlbCmd {
Expand Down
36 changes: 1 addition & 35 deletions src/main/scala/coupledL2/CoupledL2.scala
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ trait HasCoupledL2Parameters {
def hasTPPrefetcher = prefetchers.exists(_.isInstanceOf[TPParameters])
def hasPrefetchBit = prefetchers.exists(_.hasPrefetchBit) // !! TODO.test this
def hasPrefetchSrc = prefetchers.exists(_.hasPrefetchSrc)
def hasCMO = cacheParams.hasCMO
def topDownOpt = if(cacheParams.elaboratedTopDown) Some(true) else None

def enableHintGuidedGrant = true
Expand Down Expand Up @@ -195,7 +194,7 @@ trait HasCoupledL2Parameters {

def odOpGen(r: UInt) = {
val grantOp = GrantData
val opSeq = Seq(AccessAck, AccessAck, AccessAckData, AccessAckData, AccessAckData, HintAck, grantOp, Grant)
val opSeq = Seq(AccessAck, AccessAck, AccessAckData, AccessAckData, AccessAckData, HintAck, grantOp, Grant, 0.U, 0.U, 0.U, 0.U, CBOAck, CBOAck, CBOAck)
val opToA = VecInit(opSeq)(r)
opToA
}
Expand Down Expand Up @@ -226,9 +225,6 @@ abstract class CoupledL2Base(implicit p: Parameters) extends LazyModule with Has
val pf_recv_node: Option[BundleBridgeSink[PrefetchRecv]] =
if(hasReceiver) Some(BundleBridgeSink(Some(() => new PrefetchRecv))) else None

val cmo_sink_node = if(hasCMO) Some(BundleBridgeSink(Some(() => DecoupledIO(new CMOReq)))) else None
val cmo_source_node = if(hasCMO) Some(BundleBridgeSource(Some(() => DecoupledIO(new CMOResp)))) else None

val managerPortParams = (m: TLSlavePortParameters) => TLSlavePortParameters.v1(
m.managers.map { m =>
m.v2copy(
Expand Down Expand Up @@ -379,8 +375,6 @@ abstract class CoupledL2Base(implicit p: Parameters) extends LazyModule with Has
val releaseSourceD = Wire(Vec(banks, Bool()))
val allCanFire = (RegNextN(!hintFire, sliceAhead) && RegNextN(!hintFire, sliceAhead + 1)) || Cat(releaseSourceD).orR

val cmoSinkReady = Wire(Vec(node.in.length, Bool()))

val slices = node.in.zip(node.out).zipWithIndex.map {
case (((in, edgeIn), (out, edgeOut)), i) =>
require(in.params.dataBits == out.params.dataBits)
Expand Down Expand Up @@ -449,22 +443,6 @@ abstract class CoupledL2Base(implicit p: Parameters) extends LazyModule with Has
s.tlb_req.resp.ready := true.B
}

cmo_sink_node match {
case Some(x) =>
val cmoReq = Wire(DecoupledIO(new CMOReq))
val bankSelect = bank_eq(x.in.head._1.bits.address >> offsetBits, i, bankBits)
cmoReq.valid := x.in.head._1.valid && bankSelect
cmoReq.bits := x.in.head._1.bits
PipelineConnect(cmoReq, slice.io.cmoReq, slice.io.cmoReq.ready, false.B, false.B)
cmoSinkReady(i) := cmoReq.ready && bankSelect
case None =>
slice.io.cmoReq.valid := false.B
slice.io.cmoReq.bits.opcode := 0.U
slice.io.cmoReq.bits.address := 0.U
slice.io.cmoResp.ready := false.B
cmoSinkReady(i) := false.B
}

slice
}

Expand All @@ -474,18 +452,6 @@ abstract class CoupledL2Base(implicit p: Parameters) extends LazyModule with Has
}.flatten
generatePerfEvent()

cmo_sink_node match {
case Some(x) =>
x.in.head._1.ready := cmoSinkReady.orR
case None =>
}

cmo_source_node match {
case Some(x) =>
fastArb(slices.map(_.io.cmoResp), x.out.head._1, Some("cmo_resp"))
case None =>
}

// Refill hint
if (enableHintGuidedGrant) {
// for timing consideration, hint should latch one cycle before sending to L1
Expand Down
4 changes: 2 additions & 2 deletions src/main/scala/coupledL2/GrantBuffer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -262,15 +262,15 @@ class GrantBuffer(implicit p: Parameters) extends L2Module {

// =========== record unreceived GrantAck ===========
// Addrs with Grant sent and GrantAck not received
when (io.d_task.fire && (dtaskOpcode(2, 1) === Grant(2, 1) || io.d_task.bits.task.mergeA)) {
when (io.d_task.fire && (dtaskOpcode === Grant || dtaskOpcode === GrantData || io.d_task.bits.task.mergeA)) {
// choose an empty entry
val entry = inflightGrant(inflight_insertIdx)
entry.valid := true.B
entry.bits.set := io.d_task.bits.task.set
entry.bits.tag := io.d_task.bits.task.tag
}
val inflight_full = Cat(inflightGrant.map(_.valid)).andR
assert(!(inflight_full & (io.d_task.fire && (dtaskOpcode(2, 1) === Grant(2, 1) || io.d_task.bits.task.mergeA))), "inflightGrant entries overflow")
assert(!(inflight_full & (io.d_task.fire && (dtaskOpcode === Grant || dtaskOpcode === GrantData || io.d_task.bits.task.mergeA))), "inflightGrant entries overflow")

// report status to SourceB to block same-addr Probe
io.grantStatus zip inflightGrant foreach {
Expand Down
2 changes: 0 additions & 2 deletions src/main/scala/coupledL2/L2Param.scala
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,6 @@ case class L2Param(
elaboratedTopDown: Boolean = true,
// env
FPGAPlatform: Boolean = false,
// CMO
hasCMO: Boolean = false,

// Network layer SAM
sam: Seq[(AddressSet, Int)] = Seq(AddressSet.everything -> 0)
Expand Down
28 changes: 14 additions & 14 deletions src/main/scala/coupledL2/RequestArb.scala
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ class RequestArb(implicit p: Parameters) extends L2Module
val sinkB = Flipped(DecoupledIO(new TaskBundle))
val sinkC = Flipped(DecoupledIO(new TaskBundle))
val mshrTask = Flipped(DecoupledIO(new TaskBundle))
val cmoTask = if (hasCMO) Some(Flipped(DecoupledIO(new TaskBundle))) else None

/* read/write directory */
val dirRead_s1 = DecoupledIO(new DirRead()) // To directory, read meta/tag
Expand Down Expand Up @@ -94,7 +93,8 @@ class RequestArb(implicit p: Parameters) extends L2Module
val mshr_task_s1 = RegInit(0.U.asTypeOf(Valid(new TaskBundle())))

val s1_needs_replRead = mshr_task_s1.valid && mshr_task_s1.bits.fromA && mshr_task_s1.bits.replTask && (
mshr_task_s1.bits.opcode(2, 1) === Grant(2, 1) ||
mshr_task_s1.bits.opcode === Grant ||
mshr_task_s1.bits.opcode === GrantData ||
mshr_task_s1.bits.opcode === AccessAckData ||
mshr_task_s1.bits.opcode === HintAck && mshr_task_s1.bits.dsWen
)
Expand Down Expand Up @@ -151,17 +151,9 @@ class RequestArb(implicit p: Parameters) extends L2Module
chnl_task_s1.valid := io.dirRead_s1.ready && sinkValids.orR && resetFinish
chnl_task_s1.bits := ParallelPriorityMux(sinkValids, Seq(C_task, B_task, A_task))

// put CMO at the lowest priority because it goes in at s1
val cmo_task_s1 = Wire(Valid(new TaskBundle()))
cmo_task_s1.valid := (if (io.cmoTask.isDefined) io.dirRead_s1.ready && io.cmoTask.get.valid && resetFinish else false.B)
cmo_task_s1.bits := (if (io.cmoTask.isDefined) io.cmoTask.get.bits else 0.U.asTypeOf(new TaskBundle))
if (io.cmoTask.isDefined) {
io.cmoTask.get.ready := io.dirRead_s1.ready && resetFinish && s2_ready && !mshr_task_s1.valid && !chnl_task_s1.valid
}

// mshr_task_s1 is s1_[reg]
// task_s1 is [wire] to s2_reg
val task_s1 = Mux(mshr_task_s1.valid, mshr_task_s1, Mux(chnl_task_s1.valid, chnl_task_s1, cmo_task_s1))
val task_s1 = Mux(mshr_task_s1.valid, mshr_task_s1, chnl_task_s1)
val s1_to_s2_valid = task_s1.valid && !mshr_replRead_stall

s1_cango := task_s1.valid && !mshr_replRead_stall
Expand All @@ -172,7 +164,7 @@ class RequestArb(implicit p: Parameters) extends L2Module

/* Meta read request */
// ^ only sinkA/B/C tasks need to read directory
io.dirRead_s1.valid := s2_ready && (chnl_task_s1.valid && !mshr_task_s1.valid || s1_needs_replRead && !io.fromMainPipe.blockG_s1 || cmo_task_s1.valid)
io.dirRead_s1.valid := s2_ready && (chnl_task_s1.valid && !mshr_task_s1.valid || s1_needs_replRead && !io.fromMainPipe.blockG_s1)
io.dirRead_s1.bits.set := task_s1.bits.set
io.dirRead_s1.bits.tag := task_s1.bits.tag
// invalid way which causes mshr_retry
Expand Down Expand Up @@ -227,10 +219,11 @@ class RequestArb(implicit p: Parameters) extends L2Module
task_s2.bits.chiOpcode.get === Evict
)
} else {
task_s2.bits.opcode(2, 1) === Release(2, 1)
task_s2.bits.opcode === Release ||
task_s2.bits.opcode === ReleaseData
})
io.refillBufRead_s2.valid := mshrTask_s2 && (
task_s2.bits.fromB && task_s2.bits.opcode(2, 1) === ProbeAck(2, 1) && task_s2.bits.replTask || // ???
task_s2.bits.fromB && (task_s2.bits.opcode === ProbeAck || task_s2.bits.opcode === ProbeAckData) && task_s2.bits.replTask || // ???
releaseRefillData ||
mshrTask_s2_a_upwards && !task_s2.bits.useProbeData)
io.refillBufRead_s2.bits.id := task_s2.bits.mshrId
Expand All @@ -248,6 +241,12 @@ class RequestArb(implicit p: Parameters) extends L2Module
val dctNeedData = if (enableCHI) {
task_s2.bits.toTXDAT && task_s2.bits.chiOpcode.get === CompData
} else false.B
val cmoNeedData = if (enableCHI) {
task_s2.bits.toTXREQ && task_s2.bits.cmoTask && (
task_s2.bits.chiOpcode.get === WriteCleanFull ||
task_s2.bits.chiOpcode.get === WriteBackFull
)
} else false.B
val snpHitReleaseNeedData = if (enableCHI) {
!mshrTask_s2 && task_s2.bits.fromB && task_s2.bits.snpHitReleaseWithData
} else false.B
Expand All @@ -256,6 +255,7 @@ class RequestArb(implicit p: Parameters) extends L2Module
releaseNeedData ||
snoopNeedData ||
dctNeedData ||
cmoNeedData ||
mshrTask_s2_a_upwards && task_s2.bits.useProbeData,
task_s2.valid && snpHitReleaseNeedData
)
Expand Down
6 changes: 4 additions & 2 deletions src/main/scala/coupledL2/SinkA.scala
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ class SinkA(implicit p: Parameters) extends L2Module {
val prefetchReq = prefetchOpt.map(_ => Flipped(DecoupledIO(new PrefetchReq)))
val task = DecoupledIO(new TaskBundle)
})
assert(!(io.a.valid && io.a.bits.opcode(2, 1) === 0.U), "no Put")
assert(!(io.a.valid && (io.a.bits.opcode === PutFullData ||
io.a.bits.opcode === PutPartialData)),
"no Put");

def fromTLAtoTaskBundle(a: TLBundleA): TaskBundle = {
val task = Wire(new TaskBundle)
Expand Down Expand Up @@ -129,7 +131,7 @@ class SinkA(implicit p: Parameters) extends L2Module {
// Performance counters
// num of reqs
XSPerfAccumulate("sinkA_req", io.task.fire)
XSPerfAccumulate("sinkA_acquire_req", io.a.fire && io.a.bits.opcode(2, 1) === AcquireBlock(2, 1))
XSPerfAccumulate("sinkA_acquire_req", io.a.fire && (io.a.bits.opcode === AcquirePerm || io.a.bits.opcode === AcquireBlock))
XSPerfAccumulate("sinkA_acquireblock_req", io.a.fire && io.a.bits.opcode === AcquireBlock)
XSPerfAccumulate("sinkA_acquireperm_req", io.a.fire && io.a.bits.opcode === AcquirePerm)
XSPerfAccumulate("sinkA_get_req", io.a.fire && io.a.bits.opcode === Get)
Expand Down
74 changes: 0 additions & 74 deletions src/main/scala/coupledL2/SinkCMO.scala

This file was deleted.

Loading

0 comments on commit c4ce81f

Please sign in to comment.