Skip to content

Commit f95836f

Browse files
committed
cpu-o3: add 2Fetch features
- Introduced support for 2Fetch features in the branch predictor and Fetch Stage. - Enhanced the FetchTargetQueue to manage next FTQ entries for 2Fetch functionality. - Now if 2 fetchBlock in the same 64 byte fetchBuffer, it could be 2 fetched at the same cycle. Change-Id: I3b112cc844c485d81cea1f4ed0ff221cb37d2782
1 parent 39cd6af commit f95836f

File tree

9 files changed

+262
-9
lines changed

9 files changed

+262
-9
lines changed

src/cpu/o3/fetch.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2070,7 +2070,7 @@ Fetch::performInstructionFetch(ThreadID tid)
20702070
// Main instruction fetch loop - process until fetch width or other limits
20712071
StallReason stall = StallReason::NoStall;
20722072
while (numInst < fetchWidth && fetchQueue[tid].size() < fetchQueueSize &&
2073-
!predictedBranch && !ftqEmpty() && !waitForVsetvl) {
2073+
!shouldStopFetchThisCycle(predictedBranch)) {
20742074

20752075
// Check memory needs and supply bytes to decoder if required
20762076
stall = checkMemoryNeeds(tid, pc_state, curMacroop);

src/cpu/o3/fetch.hh

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -514,8 +514,21 @@ class Fetch
514514
/** Profile the reasons of fetch stall. */
515515
void profileStall(ThreadID tid);
516516

517-
518-
bool ftqEmpty() { return isDecoupledFrontend() && usedUpFetchTargets; }
517+
/**
518+
* Decide whether fetch should stop for this cycle based on frontend mode.
519+
* - Decoupled frontend stops when the current FTQ supply is exhausted.
520+
* - Non-decoupled frontend stops when a branch is predicted taken.
521+
*/
522+
bool shouldStopFetchThisCycle(bool predictedBranch)
523+
{
524+
if (waitForVsetvl) {
525+
return true;
526+
}
527+
if (isDecoupledFrontend()) {
528+
return usedUpFetchTargets;
529+
}
530+
return predictedBranch;
531+
}
519532

520533
/** Set the reasons of all fetch stalls. */
521534
void setAllFetchStalls(StallReason stall);

src/cpu/o3/fetch.md

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,16 @@ bool isFTBPred() { return branchPred->isFTB(); }
341341
bool isBTBPred() { return branchPred->isBTB(); } // 主要使用的预测器类型
342342

343343
// Track if FTQ is empty
344-
bool ftqEmpty() { return isDecoupledFrontend() && usedUpFetchTargets; }
344+
bool shouldStopFetchThisCycle(bool predictedBranch)
345+
{
346+
if (waitForVsetvl) {
347+
return true;
348+
}
349+
if (isDecoupledFrontend()) {
350+
return usedUpFetchTargets;
351+
}
352+
return predictedBranch;
353+
}
345354
```
346355
347356
### DecoupledBPUWithBTB 工作流程:
@@ -573,7 +582,7 @@ void fetch(bool &status_change) {
573582
void performInstructionFetch(ThreadID tid, Addr fetch_addr, bool &status_change) {
574583
// 主循环: 处理直到fetch宽度或其他限制
575584
while (numInst < fetchWidth && fetchQueue[tid].size() < fetchQueueSize &&
576-
!predictedBranch && !ftqEmpty() && !waitForVsetvl) {
585+
!shouldStopFetchThisCycle(predictedBranch)) {
577586

578587
// 1. 检查内存需求并供给decoder
579588
stall = checkMemoryNeeds(tid, this_pc, curMacroop);

src/cpu/pred/BranchPredictor.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1181,3 +1181,7 @@ class DecoupledBPUWithBTB(BranchPredictor):
11811181
enableLoopPredictor = Param.Bool(False, "Use loop predictor to predict loop exit")
11821182
enableJumpAheadPredictor = Param.Bool(False, "Use jump ahead predictor to skip no-need-to-predict blocks")
11831183
resolveBlockThreshold = Param.Unsigned(8, "Consecutive resolve dequeue failures before blocking prediction once")
1184+
1185+
enable2Taken = Param.Bool(False, "Enable 2taken feature")
1186+
enable2Fetch = Param.Bool(False, "Enable 2fetch feature")
1187+
maxFetchBytesPerCycle = Param.Unsigned(64, "Maximum fetch bytes per cycle for 2fetch")

src/cpu/pred/btb/decoupled_bpred.cc

Lines changed: 106 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "cpu/pred/btb/decoupled_bpred.hh"
22

3+
#include <algorithm>
34
#include <array>
45

56
#include "base/debug_helper.hh"
@@ -27,6 +28,7 @@ DecoupledBPUWithBTB::DecoupledBPUWithBTB(const DecoupledBPUWithBTBParams &p)
2728
enableLoopBuffer(p.enableLoopBuffer),
2829
enableLoopPredictor(p.enableLoopPredictor),
2930
enableJumpAheadPredictor(p.enableJumpAheadPredictor),
31+
enable2Taken(p.enable2Taken),
3032
fetchTargetQueue(p.ftq_size),
3133
fetchStreamQueueSize(p.fsq_size),
3234
predictWidth(p.predictWidth),
@@ -45,7 +47,9 @@ DecoupledBPUWithBTB::DecoupledBPUWithBTB(const DecoupledBPUWithBTBParams &p)
4547
numStages(p.numStages),
4648
historyManager(16), // TODO: fix this
4749
resolveBlockThreshold(p.resolveBlockThreshold),
48-
dbpBtbStats(this, p.numStages, p.fsq_size, maxInstsNum)
50+
dbpBtbStats(this, p.numStages, p.fsq_size, maxInstsNum),
51+
enable2Fetch(p.enable2Fetch),
52+
maxFetchBytesPerCycle(p.maxFetchBytesPerCycle)
4953
{
5054
if (bpDBSwitches.size() > 0) {
5155
initDB();
@@ -160,7 +164,7 @@ DecoupledBPUWithBTB::tick()
160164
// Clear each predictor's output
161165
for (int i = 0; i < numStages; i++) {
162166
predsOfEachStage[i].btbEntries.clear();
163-
}
167+
}
164168
}
165169

166170
if (bpuState == BpuState::PREDICTION_OUTSTANDING && numOverrideBubbles > 0) {
@@ -436,7 +440,20 @@ DecoupledBPUWithBTB::decoupledPredict(const StaticInstPtr &inst,
436440
// Increment instruction counter for current FTQ entry
437441
currentFtqEntryInstNum++;
438442
if (run_out_of_this_entry) {
443+
// Check if 2fetch is enabled, not fetched first FTQ yet, and if we can extend to the next FTQ
444+
// NEW: 2Fetch extension check - before processing completion
445+
dbpBtbStats.fetch2Attempts++;
446+
if (enable2Fetch && !has1Fetched && canExtendToNextFTQ(pc, target_to_fetch)) {
447+
DPRINTF(DecoupleBP, "2Fetch: extending to next FTQ in same cycle\n");
448+
has1Fetched = true;
449+
processFetchTargetCompletion(target_to_fetch);
450+
extendToNextFTQ(pc);
451+
// first fetchBlock is always taken, do not run out of FTQ now
452+
return std::make_pair(true, false);
453+
}
454+
439455
processFetchTargetCompletion(target_to_fetch);
456+
has1Fetched = false; // reset 2fetch flag
440457
}
441458

442459
DPRINTF(DecoupleBP, "Predict it %staken to %#lx\n", taken ? "" : "not ",
@@ -1372,6 +1389,93 @@ DecoupledBPUWithBTB::recoverHistoryForSquash(
13721389
}
13731390

13741391

1392+
// NEW: 2Fetch support methods implementation
1393+
1394+
/**
1395+
* @brief Check if we can extend to next FTQ entry for 2fetch
1396+
*
1397+
* @param current_pc Current program counter
1398+
* @param current_ftq Current FTQ entry that is being completed
1399+
* @return true if 2fetch extension is possible
1400+
*/
1401+
bool
1402+
DecoupledBPUWithBTB::canExtendToNextFTQ(const PCStateBase &current_pc, const FtqEntry &current_ftq)
1403+
{
1404+
// Early exit if 2fetch is disabled
1405+
if (!enable2Fetch) {
1406+
return false;
1407+
}
1408+
1409+
if (!current_ftq.taken) {
1410+
DPRINTF(DecoupleBP, "2Fetch rejected: current FTQ is not taken\n");
1411+
dbpBtbStats.fetch2FirstNotTaken++;
1412+
return false;
1413+
}
1414+
1415+
// Check if next FTQ entry is available
1416+
if (!fetchTargetQueue.hasNext()) {
1417+
DPRINTF(DecoupleBP, "2Fetch rejected: no next FTQ entry available\n");
1418+
dbpBtbStats.fetch2NoNextFTQ++;
1419+
return false;
1420+
}
1421+
1422+
// Get next FTQ entry (without consuming it)
1423+
const auto &next_ftq = fetchTargetQueue.peekNext();
1424+
// current_ftq is passed as parameter
1425+
1426+
// Check if current PC is the jump target of the next FTQ start
1427+
if (current_pc.instAddr() != next_ftq.startPC) {
1428+
DPRINTF(DecoupleBP, "2Fetch rejected: PC %#x not at next FTQ start %#x\n",
1429+
current_pc.instAddr(), next_ftq.startPC);
1430+
dbpBtbStats.fetch2FirstNotAtStart++;
1431+
return false;
1432+
}
1433+
1434+
// Check if both FTQs fit in maxFetchBytesPerCycle window
1435+
Addr span = next_ftq.endPC - current_ftq.startPC;
1436+
if (span > maxFetchBytesPerCycle) {
1437+
DPRINTF(DecoupleBP, "2Fetch rejected: span %d exceeds %d bytes\n",
1438+
span, maxFetchBytesPerCycle);
1439+
dbpBtbStats.fetch2SpanTooLarge++;
1440+
return false;
1441+
}
1442+
1443+
DPRINTF(DecoupleBP, "2Fetch enabled: extending to next FTQ [%#x, %#x), total span: %d bytes\n",
1444+
next_ftq.startPC, next_ftq.endPC, span);
1445+
return true;
1446+
}
1447+
1448+
/**
1449+
* @brief Extend to process next FTQ entry for 2fetch
1450+
*
1451+
* @param pc Program counter reference to update
1452+
* @param seqNum Sequence number
1453+
* @param tid Thread ID
1454+
* @param currentLoopIter Current loop iteration
1455+
*/
1456+
void
1457+
DecoupledBPUWithBTB::extendToNextFTQ(PCStateBase &pc)
1458+
{
1459+
// Move to next FTQ entry
1460+
fetchTargetQueue.advance();
1461+
currentFtqEntryInstNum = 0; // Reset instruction counter for new FTQ
1462+
1463+
// Get the new FTQ entry
1464+
const auto &target_to_fetch = fetchTargetQueue.getTarget();
1465+
1466+
DPRINTF(DecoupleBP, "Processing extended FTQ entry: [%#x, %#x)\n",
1467+
target_to_fetch.startPC, target_to_fetch.endPC);
1468+
1469+
// Set PC to start of new FTQ
1470+
auto &rpc = pc.as<GenericISA::PCStateWithNext>();
1471+
rpc.pc(target_to_fetch.startPC);
1472+
rpc.npc(target_to_fetch.startPC + 4);
1473+
rpc.uReset();
1474+
1475+
// Record successful 2fetch
1476+
dbpBtbStats.fetch2Successes++;
1477+
}
1478+
13751479
} // namespace btb_pred
13761480

13771481
} // namespace branch_prediction

src/cpu/pred/btb/decoupled_bpred.hh

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,9 @@ class DecoupledBPUWithBTB : public BPredUnit
8282
JumpAheadPredictor jap;
8383
bool enableJumpAheadPredictor{false};
8484

85+
// 2taken feature support
86+
bool enable2Taken{true}; // Default enabled
87+
8588
private:
8689
std::string _name;
8790

@@ -377,6 +380,14 @@ class DecoupledBPUWithBTB : public BPredUnit
377380
statistics::Scalar s3PredWrongIttage;
378381
statistics::Scalar s3PredWrongRas;
379382

383+
// NEW: 2Fetch statistics
384+
statistics::Scalar fetch2Attempts; ///< Number of 2fetch attempts
385+
statistics::Scalar fetch2Successes; ///< Number of successful 2fetch cycles
386+
statistics::Scalar fetch2SpanTooLarge; ///< Rejected due to span > maxFetchBytes
387+
statistics::Scalar fetch2NoNextFTQ; ///< Rejected due to no next FTQ entry
388+
statistics::Scalar fetch2FirstNotTaken; ///< Rejected due to current FTQ is not taken
389+
statistics::Scalar fetch2FirstNotAtStart; ///< Rejected due to current PC is not at next FTQ start
390+
380391
DBPBTBStats(statistics::Group* parent, unsigned numStages, unsigned fsqSize, unsigned maxInstsNum);
381392
} dbpBtbStats;
382393

@@ -999,6 +1010,36 @@ class DecoupledBPUWithBTB : public BPredUnit
9991010
*/
10001011
int currentFtqEntryInstNum{0};
10011012

1013+
// NEW: 2Fetch support variables
1014+
/**
1015+
* @brief Enable 2fetch capability
1016+
*/
1017+
bool enable2Fetch{true};
1018+
1019+
/**
1020+
* @brief Whether fetched first FTQ
1021+
*/
1022+
bool has1Fetched{false};
1023+
1024+
/**
1025+
* @brief Maximum fetch bytes per cycle for 2fetch
1026+
*/
1027+
unsigned maxFetchBytesPerCycle{64};
1028+
1029+
// NEW: 2Fetch support methods
1030+
/**
1031+
* @brief Check if we can extend to next FTQ entry for 2fetch
1032+
*
1033+
* @param current_pc Current program counter
1034+
* @param current_ftq Current FTQ entry that is being completed
1035+
* @return true if extension to next FTQ is possible
1036+
*/
1037+
bool canExtendToNextFTQ(const PCStateBase &current_pc, const FtqEntry &current_ftq);
1038+
1039+
// Extend processing to next FTQ entry for 2fetch
1040+
1041+
void extendToNextFTQ(PCStateBase &pc);
1042+
10021043
/**
10031044
* @brief Dump statistics on program exit
10041045
*

src/cpu/pred/btb/decoupled_bpred_stats.cc

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -505,8 +505,13 @@ DecoupledBPUWithBTB::DBPBTBStats::DBPBTBStats(
505505
ADD_STAT(s3PredWrongMbtb, statistics::units::Count::get(), "S3pred wrong blame mbtb "),
506506
ADD_STAT(s3PredWrongTage, statistics::units::Count::get(), "S3pred wrong blame tage "),
507507
ADD_STAT(s3PredWrongIttage, statistics::units::Count::get(), "S3pred wrong blame ittage "),
508-
ADD_STAT(s3PredWrongRas, statistics::units::Count::get(), "S3pred wrong blame ras ")
509-
508+
ADD_STAT(s3PredWrongRas, statistics::units::Count::get(), "S3pred wrong blame ras "),
509+
ADD_STAT(fetch2Attempts, statistics::units::Count::get(), "Number of 2fetch attempts"),
510+
ADD_STAT(fetch2Successes, statistics::units::Count::get(), "Number of successful 2fetch cycles"),
511+
ADD_STAT(fetch2SpanTooLarge, statistics::units::Count::get(), "Rejected due to span > maxFetchBytes"),
512+
ADD_STAT(fetch2NoNextFTQ, statistics::units::Count::get(), "Rejected due to no next FTQ entry"),
513+
ADD_STAT(fetch2FirstNotTaken, statistics::units::Count::get(), "Rejected due to current FTQ is not taken"),
514+
ADD_STAT(fetch2FirstNotAtStart, statistics::units::Count::get(), "Rejected due to PC is != next FTQ start")
510515
{
511516
predsOfEachStage.init(numStages);
512517
commitPredsFromEachStage.init(numStages+1);

src/cpu/pred/btb/fetch_target_queue.cc

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,60 @@ FetchTargetQueue::resetPC(Addr new_pc)
264264
fetchTargetEnqState.pc = new_pc;
265265
}
266266

267+
// NEW: 2Fetch support methods implementation
268+
269+
/**
270+
* @brief Check if there is a next available FTQ entry
271+
*
272+
* @return true if next FTQ entry is available
273+
*/
274+
bool
275+
FetchTargetQueue::hasNext() const
276+
{
277+
// Check if there's an entry with ID = fetchDemandTargetId + 1
278+
auto next_it = ftq.find(fetchDemandTargetId + 1);
279+
return next_it != ftq.end();
280+
}
281+
282+
/**
283+
* @brief Peek at the next FTQ entry without consuming it
284+
*
285+
* @return Reference to the next FTQ entry
286+
*/
287+
const FtqEntry&
288+
FetchTargetQueue::peekNext() const
289+
{
290+
assert(hasNext());
291+
auto next_it = ftq.find(fetchDemandTargetId + 1);
292+
return next_it->second;
293+
}
294+
295+
/**
296+
* @brief Advance to the next FTQ entry without dequeuing current one
297+
*
298+
* Used for 2fetch when we want to process the next entry
299+
* while keeping the current one active
300+
*/
301+
void
302+
FetchTargetQueue::advance()
303+
{
304+
// Already moved to next target ID in processFetchTargetCompletion
305+
// Update supply state to point to new target
306+
auto next_it = ftq.find(fetchDemandTargetId);
307+
if (next_it != ftq.end()) {
308+
supplyFetchTargetState.valid = true;
309+
supplyFetchTargetState.targetId = fetchDemandTargetId;
310+
supplyFetchTargetState.entry = &(next_it->second);
311+
312+
DPRINTF(DecoupleBP,
313+
"Advanced to next FTQ entry: ID %lu, PC [%#lx, %#lx)\n",
314+
fetchDemandTargetId, next_it->second.startPC, next_it->second.endPC);
315+
} else {
316+
supplyFetchTargetState.valid = false;
317+
supplyFetchTargetState.entry = nullptr;
318+
}
319+
}
320+
267321
} // namespace btb_pred
268322

269323
} // namespace branch_prediction

src/cpu/pred/btb/fetch_target_queue.hh

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,29 @@ class FetchTargetQueue
157157
*/
158158
void finishCurrentFetchTarget();
159159

160+
// NEW: 2Fetch support methods
161+
/**
162+
* @brief Check if there is a next available FTQ entry
163+
*
164+
* @return true if next FTQ entry is available
165+
*/
166+
bool hasNext() const;
167+
168+
/**
169+
* @brief Peek at the next FTQ entry without consuming it
170+
*
171+
* @return Reference to the next FTQ entry
172+
*/
173+
const FtqEntry& peekNext() const;
174+
175+
/**
176+
* @brief Advance to the next FTQ entry without dequeuing current one
177+
*
178+
* Used for 2fetch when we want to process the next entry
179+
* while keeping the current one active
180+
*/
181+
void advance();
182+
160183
/**
161184
* @brief Try to supply fetch with a target matching the demand PC
162185
*

0 commit comments

Comments
 (0)