Skip to content

Commit 2e46566

Browse files
committed
cpu: remove legacy resolve update path
Collapse resolved-stage BTB training onto the packet-based full resolve path, remove the old resolvedCFI/resolveQueue helper chain, and reinterpret config intent around full-resolve participation versus commit fallback. Change-Id: I777f46178bc7b1ce891bb2242cefff0b166e48a2
1 parent 7a06f85 commit 2e46566

File tree

13 files changed

+29
-248
lines changed

13 files changed

+29
-248
lines changed

configs/example/idealkmhv3.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ def setKmhV3IdealParams(args, system):
8383

8484
# branch predictor
8585
if args.bp_type == 'DecoupledBPUWithBTB':
86+
cpu.enableFullResolveTrain = False
8687
cpu.branchPred.ftq_size = 64
8788
cpu.branchPred.fsq_size = 64
8889
# cpu.branchPred.microtage.enabled = False

configs/example/kmhv3.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -93,17 +93,14 @@ def setKmhV3Params(args, system):
9393

9494
# branch predictor
9595
if args.bp_type == 'DecoupledBPUWithBTB':
96+
cpu.enableFullResolveTrain = True
9697
cpu.branchPred.ftq_size = 64
9798
cpu.branchPred.fsq_size = 64
9899

99100
if args.btb_tage_upper_bound:
100101
cpu.branchPred.tage = BTBTAGEUpperBound(
101102
usePathHashHistory=True)
102103

103-
cpu.branchPred.mbtb.resolvedUpdate = True
104-
cpu.branchPred.tage.resolvedUpdate = True
105-
cpu.branchPred.ittage.resolvedUpdate = True
106-
107104
cpu.branchPred.ubtb.enabled = True
108105
cpu.branchPred.abtb.enabled = True
109106
cpu.branchPred.microtage.enabled = True

src/cpu/o3/BaseO3CPU.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -248,9 +248,7 @@ def support_take_over(cls):
248248
"Branch Predictor")
249249
resolveQueueSize = Param.Unsigned(16, "Number of entries in the branch resolution queue")
250250
enableFullResolveTrain = Param.Bool(True,
251-
"Enable packet-based resolve training rollout plumbing")
252-
enableLegacyResolveUpdate = Param.Bool(True,
253-
"Enable legacy PC-only resolve update")
251+
"Train eligible BTB components from full resolve packets instead of commit fallback")
254252
needsTSO = Param.Bool(False, "Enable TSO Memory model")
255253

256254
scheduler = Param.Scheduler("")

src/cpu/o3/comm.hh

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -197,12 +197,6 @@ struct SquashVersion
197197
SquashVersion() : version(0) {}
198198
};
199199

200-
struct ResolveQueueEntry
201-
{
202-
uint64_t resolvedFTQId;
203-
std::vector<uint64_t> resolvedInstPC;
204-
};
205-
206200
/** Struct that defines all backwards communication. */
207201
struct TimeStruct
208202
{
@@ -239,14 +233,6 @@ struct TimeStruct
239233
StallReason lqHeadStallReason;
240234
StallReason sqHeadStallReason;
241235

242-
struct ResolvedCFIEntry
243-
{
244-
uint64_t ftqId;
245-
uint64_t pc;
246-
};
247-
/** Resolved control-flow PCs produced this cycle (fetch buffers/merges). */
248-
std::vector<ResolvedCFIEntry> resolvedCFIs; // *F
249-
250236
struct ResolveTrainEntry
251237
{
252238
uint64_t ftqId;

src/cpu/o3/fetch.cc

Lines changed: 0 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,6 @@ Fetch::Fetch(CPU *_cpu, const BaseO3CPUParams &params)
115115
branchPred(nullptr),
116116
resolveQueueSize(params.resolveQueueSize),
117117
enableFullResolveTrain(params.enableFullResolveTrain),
118-
enableLegacyResolveUpdate(params.enableLegacyResolveUpdate),
119118
decodeToFetchDelay(params.decodeToFetchDelay),
120119
renameToFetchDelay(params.renameToFetchDelay),
121120
iewToFetchDelay(params.iewToFetchDelay),
@@ -285,16 +284,6 @@ Fetch::FetchStatGroup::FetchStatGroup(CPU *cpu, Fetch *fetch)
285284
statistics::units::Count, statistics::units::Cycle>::get(),
286285
"Frontend Bandwidth Bound",
287286
frontendBound - frontendLatencyBound),
288-
ADD_STAT(resolveQueueFullEvents, statistics::units::Count::get(),
289-
"Number of events the resolve queue becomes full"),
290-
ADD_STAT(resolveEnqueueFailEvent, statistics::units::Count::get(),
291-
"Number of times an entry could not be enqueued to the resolve queue"),
292-
ADD_STAT(resolveDequeueCount, statistics::units::Count::get(),
293-
"Number of times an entry is dequeued from the resolve queue"),
294-
ADD_STAT(resolveEnqueueCount, statistics::units::Count::get(),
295-
"Number of times an entry is enqueued to the resolve queue"),
296-
ADD_STAT(resolveQueueOccupancy, statistics::units::Count::get(),
297-
"Number of entries in the resolve queue"),
298287
ADD_STAT(fullResolveEntriesReceived, statistics::units::Count::get(),
299288
"Number of full resolve entries received by fetch"),
300289
ADD_STAT(fullResolveEntriesMerged, statistics::units::Count::get(),
@@ -387,10 +376,6 @@ Fetch::FetchStatGroup::FetchStatGroup(CPU *cpu, Fetch *fetch)
387376
.flags(statistics::total);
388377
frontendBandwidthBound
389378
.flags(statistics::total);
390-
resolveEnqueueCount
391-
.init(1, 8, 1);
392-
resolveQueueOccupancy
393-
.init(0, 32, 1);
394379
fullResolveEntriesReceived
395380
.prereq(fullResolveEntriesReceived);
396381
fullResolveEntriesMerged
@@ -1517,70 +1502,6 @@ Fetch::handleIEWSignals()
15171502
return;
15181503
}
15191504

1520-
auto &incoming = fromIEW->iewInfo->resolvedCFIs;
1521-
1522-
if (!enableLegacyResolveUpdate) {
1523-
for (ThreadID tid = 0; tid < numThreads; ++tid) {
1524-
fromIEW->iewInfo[tid].resolvedCFIs.clear();
1525-
}
1526-
}
1527-
1528-
if (enableLegacyResolveUpdate) {
1529-
const bool had_pending_resolve = !resolveQueue.empty();
1530-
uint8_t enqueueSize = fromIEW->iewInfo->resolvedCFIs.size();
1531-
uint8_t enqueueCount = 0;
1532-
1533-
if (resolveQueueSize && resolveQueue.size() > resolveQueueSize - 4) {
1534-
fetchStats.resolveQueueFullEvents++;
1535-
fetchStats.resolveEnqueueFailEvent += enqueueSize;
1536-
} else {
1537-
1538-
for (const auto &resolved : incoming) {
1539-
bool merged = false;
1540-
for (auto &queued : resolveQueue) {
1541-
if (queued.resolvedFTQId == resolved.ftqId) {
1542-
queued.resolvedInstPC.push_back(resolved.pc);
1543-
merged = true;
1544-
break;
1545-
}
1546-
}
1547-
1548-
if (merged) {
1549-
continue;
1550-
}
1551-
1552-
ResolveQueueEntry new_entry;
1553-
new_entry.resolvedFTQId = resolved.ftqId;
1554-
new_entry.resolvedInstPC.push_back(resolved.pc);
1555-
resolveQueue.push_back(std::move(new_entry));
1556-
enqueueCount++;
1557-
}
1558-
fetchStats.resolveEnqueueCount.sample(enqueueCount);
1559-
}
1560-
1561-
fetchStats.resolveQueueOccupancy.sample(resolveQueue.size());
1562-
1563-
// Process only entries that were already pending before this cycle.
1564-
// This preserves a cycle of separation between IEW producing resolved
1565-
// CFIs and fetch consuming them as predictor resolved updates.
1566-
if (had_pending_resolve && !resolveQueue.empty()) {
1567-
auto &entry = resolveQueue.front();
1568-
unsigned int stream_id = entry.resolvedFTQId;
1569-
dbpbtb->prepareResolveUpdateEntries(stream_id, 0);
1570-
for (const auto resolvedInstPC : entry.resolvedInstPC) {
1571-
dbpbtb->markCFIResolved(stream_id, resolvedInstPC, 0);
1572-
}
1573-
bool success = dbpbtb->resolveUpdate(stream_id, 0);
1574-
if (success) {
1575-
dbpbtb->notifyResolveSuccess();
1576-
resolveQueue.pop_front();
1577-
fetchStats.resolveDequeueCount++;
1578-
} else {
1579-
dbpbtb->notifyResolveFailure();
1580-
}
1581-
}
1582-
}
1583-
15841505
if (!enableFullResolveTrain) {
15851506
for (ThreadID tid = 0; tid < numThreads; ++tid) {
15861507
fromIEW->iewInfo[tid].resolveTrainEntries.clear();

src/cpu/o3/fetch.hh

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -465,8 +465,7 @@ class Fetch
465465
*/
466466
bool handleCommitSignals(ThreadID tid);
467467

468-
/** Handles iew signals including resolved cfi, mark their btb entries
469-
* and train predictors if they are configured to update in resolve stage.
468+
/** Handles IEW signals for full resolve-train predictor updates.
470469
*/
471470
void handleIEWSignals();
472471

@@ -639,12 +638,6 @@ class Fetch
639638
/** Enable packet-based resolve training rollout plumbing. */
640639
const bool enableFullResolveTrain;
641640

642-
/** Keep legacy PC-only resolve updates enabled. */
643-
const bool enableLegacyResolveUpdate;
644-
645-
/** FIFO storing resolve entries waiting for BPU training. */
646-
std::deque<ResolveQueueEntry> resolveQueue;
647-
648641
/** FIFO storing aggregated full resolve-train entries. */
649642
std::deque<ResolveTrainQueueEntry> resolveTrainQueue;
650643

@@ -1129,17 +1122,6 @@ class Fetch
11291122
statistics::Formula frontendLatencyBound;
11301123
/** Frontend Bandwidth Bound */
11311124
statistics::Formula frontendBandwidthBound;
1132-
/** Stat for total cycles the resolve queue is full. */
1133-
statistics::Scalar resolveQueueFullEvents;
1134-
/** Stat for total number of resolve enqueue fail events. */
1135-
statistics::Scalar resolveEnqueueFailEvent;
1136-
1137-
/** Stat for total number of resolve dequeue events. */
1138-
statistics::Scalar resolveDequeueCount;
1139-
/** Stat for total number of resolve enqueue events. */
1140-
statistics::Distribution resolveEnqueueCount;
1141-
/** Stat for entry occupancy distribution of the resolve queue. */
1142-
statistics::Distribution resolveQueueOccupancy;
11431125
/** Full resolve entries observed at fetch. */
11441126
statistics::Scalar fullResolveEntriesReceived;
11451127
/** Full resolve entries merged with an existing target. */

src/cpu/o3/iew.cc

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1442,14 +1442,6 @@ IEW::SquashCheckAfterExe(DynInstPtr inst)
14421442
is_control && !loadNotExecuted && inst->mispredicted();
14431443

14441444
if (is_control) {
1445-
if (params.enableLegacyResolveUpdate) {
1446-
auto &resolved_cfis = toFetch->iewInfo[tid].resolvedCFIs;
1447-
TimeStruct::IewComm::ResolvedCFIEntry entry;
1448-
entry.ftqId = inst->getFtqId();
1449-
entry.pc = inst->getPC();
1450-
resolved_cfis.push_back(entry);
1451-
}
1452-
14531445
if (params.enableFullResolveTrain) {
14541446
auto &resolve_entries = toFetch->iewInfo[tid].resolveTrainEntries;
14551447
TimeStruct::IewComm::ResolveTrainEntry entry;
@@ -1556,10 +1548,10 @@ IEW::executeInsts()
15561548
// @todo This doesn't actually work anymore, we should fix it.
15571549
// printAvailableInsts();
15581550

1559-
// Clear resolvedFSQId and resolvedInstPC since they are already handled in frontend
1560-
ThreadID tid = *activeThreads->begin();
1561-
toFetch->iewInfo[tid].resolvedCFIs.clear();
1562-
toFetch->iewInfo[tid].resolveTrainEntries.clear();
1551+
// Clear resolve-train entries before producing this cycle's updates.
1552+
for (ThreadID active_tid : *activeThreads) {
1553+
toFetch->iewInfo[active_tid].resolveTrainEntries.clear();
1554+
}
15631555

15641556
// Execute/writeback any instructions that are available.
15651557
int insts_to_execute = fromIssue->size;

src/cpu/pred/BranchPredictor.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -968,7 +968,8 @@ class TimedBaseBTBPredictor(SimObject):
968968
blockSize = Param.Unsigned(Parent.predictWidth, "Block size in bytes")
969969
predictWidth = Param.Unsigned(Parent.predictWidth, "Maximum range in bytes that a single prediction can cover")
970970
numDelay = Param.Unsigned(1000, "Number of bubbles to put on a prediction")
971-
resolvedUpdate = Param.Bool(False, "Enable resolved update, no need to wait until commit")
971+
resolvedUpdate = Param.Bool(False,
972+
"Train from full resolve packets instead of commit fallback")
972973
enabled = Param.Bool(True, "Enable this predictor component")
973974

974975
class MBTB(TimedBaseBTBPredictor):
@@ -982,6 +983,8 @@ class MBTB(TimedBaseBTBPredictor):
982983
numThreads = Param.Unsigned(1, "Number of threads")
983984
numWays = Param.Unsigned(4, "Number of ways per set") # for 2 SRAMs, 4 ways per SRAM
984985
numDelay = 2
986+
resolvedUpdate = Param.Bool(Parent.enableFullResolveTrain,
987+
"Train MBTB from full resolve packets instead of commit fallback")
985988
blockSize = 32 # max 64 byte block, 32 byte aligned
986989
# MBTB is always half-aligned - no parameter needed
987990
victimCacheSize = Param.Unsigned(0, "Number of entries in the victim cache")
@@ -1059,6 +1062,8 @@ class BTBTAGE(TimedBaseBTBPredictor):
10591062
numBanks = Param.Unsigned(4, "Number of banks for bank conflict simulation")
10601063
enableBankConflict = Param.Bool(False, "Enable bank conflict simulation")
10611064
numDelay = 2
1065+
resolvedUpdate = Param.Bool(Parent.enableFullResolveTrain,
1066+
"Train BTBTAGE from full resolve packets instead of commit fallback")
10621067

10631068
class BTBTAGEUpperBound(BTBTAGE):
10641069
type = 'BTBTAGEUpperBound'
@@ -1114,6 +1119,8 @@ class BTBITTAGE(TimedBaseBTBPredictor):
11141119
maxHistLen = Param.Unsigned(970, "The length of history passed from DBP")
11151120
numTablesToAlloc = Param.Unsigned(1,"The number of table to allocated each time")
11161121
numDelay = 2
1122+
resolvedUpdate = Param.Bool(Parent.enableFullResolveTrain,
1123+
"Train BTBITTAGE from full resolve packets instead of commit fallback")
11171124

11181125
class BTBMGSC(TimedBaseBTBPredictor):
11191126
type = 'BTBMGSC'
@@ -1207,6 +1214,4 @@ class DecoupledBPUWithBTB(BranchPredictor):
12071214
bpDBSwitches = VectorParam.String([], "Enable which traces in the form of database")
12081215
resolveBlockThreshold = Param.Unsigned(8, "Consecutive resolve dequeue failures before blocking prediction once")
12091216
enableFullResolveTrain = Param.Bool(Parent.enableFullResolveTrain,
1210-
"Enable packet-based resolve training rollout plumbing")
1211-
enableLegacyResolveUpdate = Param.Bool(Parent.enableLegacyResolveUpdate,
1212-
"Enable legacy PC-only resolve update")
1217+
"Train eligible BTB components from full resolve packets instead of commit fallback")

src/cpu/pred/btb/btb_ittage.cc

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -223,17 +223,12 @@ BTBITTAGE::update(const FetchTarget &stream)
223223
all_entries_to_update.push_back(stream.updateNewBTBEntry);
224224
}
225225

226-
// // only update indirect branches that are not returns
227-
if (getResolvedUpdate()) {
228-
auto remove_it =
229-
std::remove_if(all_entries_to_update.begin(), all_entries_to_update.end(),
230-
[](const BTBEntry &e) { return !(e.isIndirect && !e.isReturn && e.resolved); });
231-
all_entries_to_update.erase(remove_it, all_entries_to_update.end());
232-
} else {
233-
auto remove_it = std::remove_if(all_entries_to_update.begin(), all_entries_to_update.end(),
234-
[](const BTBEntry &e) { return !(e.isIndirect && !e.isReturn); });
235-
all_entries_to_update.erase(remove_it, all_entries_to_update.end());
236-
}
226+
auto remove_it = std::remove_if(all_entries_to_update.begin(),
227+
all_entries_to_update.end(),
228+
[](const BTBEntry &e) {
229+
return !(e.isIndirect && !e.isReturn);
230+
});
231+
all_entries_to_update.erase(remove_it, all_entries_to_update.end());
237232

238233
// get tage predictions from meta
239234
// TODO: use component idx

src/cpu/pred/btb/btb_tage.cc

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -423,16 +423,9 @@ BTBTAGE::prepareUpdateEntries(const FetchTarget &stream) {
423423
all_entries.push_back(potential_new_entry);
424424
}
425425

426-
// Filter: only keep conditional branches that are not always taken
427-
if (getResolvedUpdate()) {
428-
auto remove_it = std::remove_if(all_entries.begin(), all_entries.end(),
429-
[](const BTBEntry &e) { return !(e.isCond && !e.alwaysTaken && e.resolved); });
430-
all_entries.erase(remove_it, all_entries.end());
431-
} else {
432-
auto remove_it = std::remove_if(all_entries.begin(), all_entries.end(),
433-
[](const BTBEntry &e) { return !(e.isCond && !e.alwaysTaken); });
434-
all_entries.erase(remove_it, all_entries.end());
435-
}
426+
auto remove_it = std::remove_if(all_entries.begin(), all_entries.end(),
427+
[](const BTBEntry &e) { return !(e.isCond && !e.alwaysTaken); });
428+
all_entries.erase(remove_it, all_entries.end());
436429

437430
return all_entries;
438431
}

0 commit comments

Comments
 (0)