Skip to content

Commit 4fc39e5

Browse files
committed
cpu-o3: using reverse ordered tick & refactor the stalls logic
Change-Id: I9b599a4e0d704215ad1a3bf543dbd075384fe1f4
1 parent a2215bd commit 4fc39e5

File tree

17 files changed

+711
-2250
lines changed

17 files changed

+711
-2250
lines changed

src/cpu/o3/comm.hh

Lines changed: 14 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -219,29 +219,17 @@ struct TimeStruct
219219
StallReason blockReason;
220220
};
221221

222-
DecodeComm decodeInfo[MaxThreads];
222+
DecodeComm decodeInfo[MaxThreads]; // decode to fetch
223223

224224
struct RenameComm
225225
{
226226
StallReason blockReason;
227227
};
228228

229-
RenameComm renameInfo[MaxThreads];
229+
RenameComm renameInfo[MaxThreads]; // rename to decode
230230

231231
struct IewComm
232232
{
233-
// Also eventually include skid buffer space.
234-
unsigned freeLQEntries;
235-
unsigned freeSQEntries;
236-
unsigned dispatchedToLQ;
237-
unsigned dispatchedToSQ;
238-
239-
unsigned ldstqCount;
240-
241-
unsigned dispatched;
242-
bool usedIQ;
243-
bool usedLSQ;
244-
245233
StallReason robHeadStallReason;
246234
StallReason blockReason;
247235
StallReason lqHeadStallReason;
@@ -256,7 +244,7 @@ struct TimeStruct
256244
std::vector<ResolvedCFIEntry> resolvedCFIs; // *F
257245
};
258246

259-
IewComm iewInfo[MaxThreads];
247+
IewComm iewInfo[MaxThreads]; // iew to rename, fetch
260248

261249
struct CommitComm
262250
{
@@ -304,9 +292,6 @@ struct TimeStruct
304292
uint64_t squashedTargetId; // F
305293
unsigned squashedLoopIter; // F
306294

307-
/// Tell Rename how many free entries it has in the ROB
308-
unsigned freeROBEntries; // *R
309-
310295
bool isTrapSquash;
311296
bool squash; // *F, D, R, I
312297
bool robSquashing; // *F, D, R, I
@@ -336,16 +321,20 @@ struct TimeStruct
336321

337322
};
338323

339-
CommitComm commitInfo[MaxThreads];
324+
CommitComm commitInfo[MaxThreads];// commit to iew, rename, fetch
325+
};
340326

341-
bool decodeBlock[MaxThreads];
342-
bool decodeUnblock[MaxThreads];
343-
bool renameBlock[MaxThreads];
344-
bool renameUnblock[MaxThreads];
345-
bool iewBlock[MaxThreads];
346-
bool iewUnblock[MaxThreads];
327+
328+
struct StallSignals
329+
{
330+
331+
bool blockFetch[MaxThreads];// decode to fetch
332+
bool blockDecode[MaxThreads];// rename to decode
333+
bool blockRename[MaxThreads];// iew to rename (if iew is stalling, rename all threads would be stalled)
334+
bool blockIEW[MaxThreads];// commit to iew
347335
};
348336

337+
349338
} // namespace o3
350339
} // namespace gem5
351340

src/cpu/o3/commit.cc

Lines changed: 51 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ Commit::Commit(CPU *_cpu, branch_prediction::BPredUnit *_bp, const BaseO3CPUPara
129129
"--debug-start=%llu --debug-end=%llu\n",
130130
lastCommitCycle, cpu->curCycle(),
131131
cpu->cyclesToTicks(Cycles(lastCommitCycle - 200)),
132-
cpu->cyclesToTicks(Cycles(lastCommitCycle + 50)));
132+
cpu->cyclesToTicks(Cycles(lastCommitCycle + 200)));
133133
}
134134
cpu->schedule(this->stuckCheckEvent, cpu->clockEdge(Cycles(40010)));
135135
}, "CommitStuckCheckEvent"),
@@ -165,7 +165,9 @@ Commit::Commit(CPU *_cpu, branch_prediction::BPredUnit *_bp, const BaseO3CPUPara
165165
}
166166
}
167167

168-
for (ThreadID tid = 0; tid < MaxThreads; tid++) {
168+
assert(renameToROBDelay == 1);
169+
170+
for (ThreadID tid = 0; tid < numThreads; tid++) {
169171
commitStatus[tid] = Idle;
170172
changedROBNumEntries[tid] = false;
171173
trapSquash[tid] = false;
@@ -181,6 +183,7 @@ Commit::Commit(CPU *_cpu, branch_prediction::BPredUnit *_bp, const BaseO3CPUPara
181183
htmStarts[tid] = 0;
182184
htmStops[tid] = 0;
183185
traceCommitIndex[tid] = 0;
186+
fixedbuffer[tid] = boost::circular_buffer<DynInstPtr>(renameWidth);
184187
}
185188
interrupt = NoFault;
186189

@@ -455,7 +458,6 @@ Commit::startupStage()
455458
// Broadcast the number of free entries.
456459
for (ThreadID tid = 0; tid < numThreads; tid++) {
457460
toIEW->commitInfo[tid].usedROB = true;
458-
toIEW->commitInfo[tid].freeROBEntries = rob->numFreeEntries(tid);
459461
toIEW->commitInfo[tid].emptyROB = true;
460462
}
461463

@@ -1114,7 +1116,6 @@ Commit::commit()
11141116

11151117
if (changedROBNumEntries[tid]) {
11161118
toIEW->commitInfo[tid].usedROB = true;
1117-
toIEW->commitInfo[tid].freeROBEntries = rob->numFreeEntries(tid);
11181119

11191120
wroteToTimeBuffer = true;
11201121
changedROBNumEntries[tid] = false;
@@ -1136,7 +1137,6 @@ Commit::commit()
11361137
checkEmptyROB[tid] = false;
11371138
toIEW->commitInfo[tid].usedROB = true;
11381139
toIEW->commitInfo[tid].emptyROB = true;
1139-
toIEW->commitInfo[tid].freeROBEntries = rob->numFreeEntries(tid);
11401140
wroteToTimeBuffer = true;
11411141
}
11421142

@@ -1236,7 +1236,9 @@ Commit::commitInsts()
12361236
if (commit_success) {
12371237
cpu->perfCCT->updateInstPos(head_inst->seqNum, PerfRecord::AtCommit);
12381238
cpu->perfCCT->commitMeta(head_inst->seqNum);
1239-
head_inst->printDisassemblyAndResult(cpu->name());
1239+
1240+
DPRINTF(CommitTrace, "CT: %s\n", head_inst->genDisassembly());
1241+
12401242
if (ismispred) {
12411243
ismispred = false;
12421244
stats.recovery_bubble += (cpu->curCycle() - lastCommitCycle) * renameWidth;
@@ -1784,12 +1786,46 @@ Commit::getInsts()
17841786
{
17851787
DPRINTF(Commit, "Getting instructions from Rename stage.\n");
17861788

1787-
// Read any renamed instructions and place them into the ROB.
1788-
int insts_to_process = std::min((int)renameWidth, fromRename->size);
1789+
int insts_from_rename = fromRename->size;
1790+
ThreadID tid = insts_from_rename > 0 ? fromRename->insts[0]->threadNumber : -1;
1791+
if (tid != -1) assert(fixedbuffer[tid].empty());
1792+
for (int i = 0; i < insts_from_rename; ++i) {
1793+
const DynInstPtr &inst = fromRename->insts[i];
1794+
assert(inst->threadNumber == tid);
1795+
if (localSquashVer.largerThan(inst->getVersion())) {
1796+
inst->setSquashed();
1797+
}
1798+
fixedbuffer[tid].push_back(inst);
1799+
}
17891800

1801+
// check threads stall & status
1802+
tid = InvalidThreadID;
1803+
for (int i = 0; i < numThreads; i++) {
1804+
bool block = (rob->getMaxEntries(i) - rob->getThreadEntries(i) < fixedbuffer[i].size());
1805+
bool active = !block && !fixedbuffer[i].empty();
1806+
1807+
stallSig->blockIEW[i] = block;
1808+
if (active) {
1809+
if (tid == InvalidThreadID) tid = i;
1810+
else {
1811+
// if there are multiple active threads, must exhaust all threads first
1812+
// to avoid starvation of other threads and also avoid resource conflict
1813+
stallSig->blockIEW[tid] = true;
1814+
stallSig->blockIEW[i] = true;
1815+
DPRINTF(IEW, "Multiple active threads detected, blocking all threads\n");
1816+
}
1817+
}
1818+
}
1819+
if (tid == InvalidThreadID) {
1820+
DPRINTF(Commit, "No instructions from Rename stage.\n");
1821+
return;
1822+
}
1823+
1824+
// Read any renamed instructions and place them into the ROB.
1825+
int insts_to_process = fixedbuffer[tid].size();
17901826
for (int inst_num = 0; inst_num < insts_to_process; ++inst_num) {
1791-
const DynInstPtr &inst = fromRename->insts[inst_num];
1792-
ThreadID tid = inst->threadNumber;
1827+
const DynInstPtr &inst = fixedbuffer[tid].front();
1828+
fixedbuffer[tid].pop_front();
17931829

17941830
if (localSquashVer.largerThan(inst->getVersion())) {
17951831
inst->setSquashed();
@@ -1814,6 +1850,11 @@ Commit::getInsts()
18141850
tid, inst->seqNum, inst->pcState());
18151851
}
18161852
}
1853+
1854+
if (!fixedbuffer[tid].empty()) {
1855+
stallSig->blockIEW[tid] = true;
1856+
DPRINTF(Commit, "Not all instructions from Rename stage could be processed, blocking thread %i\n", tid);
1857+
}
18171858
}
18181859

18191860

@@ -1875,10 +1916,7 @@ Commit::updateComInstStats(const DynInstPtr &inst)
18751916
cpu->instDone(tid, inst);
18761917
}
18771918

1878-
//
18791919
// Control Instructions
1880-
//
1881-
//
18821920
if (inst->isControl()) {
18831921
bool mispred = inst->mispredicted();
18841922
std::unique_ptr<PCStateBase> tmp_next_pc(inst->pcState().clone());

src/cpu/o3/commit.hh

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ private:
144144
uint64_t totalCount = 0;
145145
};
146146

147-
class Commit
147+
class Commit
148148
{
149149
public:
150150
/** Overall commit status. Used to determine if the CPU can deschedule
@@ -176,6 +176,10 @@ private:
176176
/** Per-thread status. */
177177
ThreadStatus commitStatus[MaxThreads];
178178

179+
boost::circular_buffer<DynInstPtr> fixedbuffer[MaxThreads];
180+
181+
StallSignals* stallSig;
182+
179183
bool robSquashHolding{false};
180184
/** Commit policy used in SMT mode. */
181185
CommitPolicy commitPolicy;
@@ -230,6 +234,8 @@ private:
230234

231235
void setDecodeStage(Decode *decode_stage);
232236

237+
void setStallSignals(StallSignals* stall_signals) { stallSig = stall_signals; }
238+
233239
/** The pointer to the IEW stage. Used solely to ensure that
234240
* various events (traps, interrupts, syscalls) do not occur until
235241
* all stores have written back.

src/cpu/o3/cpu.cc

Lines changed: 39 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -114,10 +114,10 @@ CPU::CPU(const BaseO3CPUParams &params)
114114
isa(numThreads, NULL),
115115

116116
timeBuffer(params.backComSize, params.forwardComSize),
117-
fetchQueue(params.backComSize, params.forwardComSize),
118-
decodeQueue(params.backComSize, params.forwardComSize),
119-
renameQueue(params.backComSize, params.forwardComSize),
120-
iewQueue(params.backComSize, params.forwardComSize),
117+
fetchTimebuffer(params.backComSize, params.forwardComSize),
118+
decodeTimebuffer(params.backComSize, params.forwardComSize),
119+
renameTimebuffer(params.backComSize, params.forwardComSize),
120+
iewTimebuffer(params.backComSize, params.forwardComSize),
121121
activityRec(name(), NumStages,
122122
params.backComSize + params.forwardComSize,
123123
params.activity),
@@ -179,23 +179,29 @@ CPU::CPU(const BaseO3CPUParams &params)
179179
commit.setTimeBuffer(&timeBuffer);
180180

181181
// Also setup each of the stages' queues.
182-
fetch.setFetchQueue(&fetchQueue);
183-
decode.setFetchQueue(&fetchQueue);
184-
commit.setFetchQueue(&fetchQueue);
185-
decode.setDecodeQueue(&decodeQueue);
186-
rename.setDecodeQueue(&decodeQueue);
187-
rename.setRenameQueue(&renameQueue);
188-
iew.setRenameQueue(&renameQueue);
189-
iew.setIEWQueue(&iewQueue);
190-
commit.setIEWQueue(&iewQueue);
191-
commit.setRenameQueue(&renameQueue);
182+
fetch.setFetchQueue(&fetchTimebuffer);
183+
decode.setFetchQueue(&fetchTimebuffer);
184+
commit.setFetchQueue(&fetchTimebuffer);
185+
decode.setDecodeQueue(&decodeTimebuffer);
186+
rename.setDecodeQueue(&decodeTimebuffer);
187+
rename.setRenameQueue(&renameTimebuffer);
188+
iew.setRenameQueue(&renameTimebuffer);
189+
iew.setIEWQueue(&iewTimebuffer);
190+
commit.setIEWQueue(&iewTimebuffer);
191+
commit.setRenameQueue(&renameTimebuffer);
192192

193193
decode.setFetchStage(&fetch);
194194
commit.setIEWStage(&iew);
195195
commit.setDecodeStage(&decode);
196196
rename.setIEWStage(&iew);
197197
rename.setCommitStage(&commit);
198198

199+
fetch.setStallSignals(&stallSignals);
200+
decode.setStallSignals(&stallSignals);
201+
rename.setStallSignals(&stallSignals);
202+
iew.setStallSignals(&stallSignals);
203+
commit.setStallSignals(&stallSignals);
204+
199205
ThreadID active_threads;
200206
if (FullSystem) {
201207
active_threads = 1;
@@ -572,24 +578,19 @@ CPU::tick()
572578
// activity = false;
573579

574580
//Tick each of the stages
575-
fetch.tick();
576-
577-
decode.tick();
578-
579-
rename.tick();
580-
581-
iew.tick();
582581

583582
commit.tick();
583+
iew.tick();
584+
rename.tick();
585+
decode.tick();
586+
fetch.tick();
584587

585-
// Now advance the time buffers
588+
fetchTimebuffer.advance();
589+
decodeTimebuffer.advance();
590+
renameTimebuffer.advance();
591+
iewTimebuffer.advance();
586592
timeBuffer.advance();
587593

588-
fetchQueue.advance();
589-
decodeQueue.advance();
590-
renameQueue.advance();
591-
iewQueue.advance();
592-
593594
activityRec.advance();
594595

595596
if (removeInstsThisCycle) {
@@ -855,10 +856,10 @@ CPU::removeThread(ThreadID tid)
855856
// Flush out any old data from the time buffers.
856857
for (int i = 0; i < timeBuffer.getSize(); ++i) {
857858
timeBuffer.advance();
858-
fetchQueue.advance();
859-
decodeQueue.advance();
860-
renameQueue.advance();
861-
iewQueue.advance();
859+
fetchTimebuffer.advance();
860+
decodeTimebuffer.advance();
861+
renameTimebuffer.advance();
862+
iewTimebuffer.advance();
862863
}
863864

864865
assert(iew.ldstQueue.getCount(tid) == 0);
@@ -978,10 +979,10 @@ CPU::drain()
978979
// test in isCpuDrained().
979980
for (int i = 0; i < timeBuffer.getSize(); ++i) {
980981
timeBuffer.advance();
981-
fetchQueue.advance();
982-
decodeQueue.advance();
983-
renameQueue.advance();
984-
iewQueue.advance();
982+
fetchTimebuffer.advance();
983+
decodeTimebuffer.advance();
984+
renameTimebuffer.advance();
985+
iewTimebuffer.advance();
985986
}
986987

987988
drainSanityCheck();
@@ -1759,7 +1760,7 @@ CPU::readArchIntReg(int reg_idx, ThreadID tid)
17591760
PhysRegIdPtr phys_reg =
17601761
commitRenameMap[tid].lookup(RegId(IntRegClass, reg_idx)).PhyReg();
17611762

1762-
DPRINTF(Commit, "Get map: x%i -> p%i\n", reg_idx, phys_reg->flatIndex());
1763+
DPRINTF(Scoreboard, "Get map: x%i -> p%i\n", reg_idx, phys_reg->flatIndex());
17631764

17641765
return regFile.getReg(phys_reg);
17651766
}
@@ -1770,7 +1771,7 @@ CPU::readArchFloatReg(int reg_idx, ThreadID tid)
17701771
cpuStats.fpRegfileReads++;
17711772
PhysRegIdPtr phys_reg =
17721773
commitRenameMap[tid].lookup(RegId(FloatRegClass, reg_idx)).PhyReg();
1773-
DPRINTF(Commit, "Get map: f%i -> p%i\n", reg_idx, phys_reg->flatIndex());
1774+
DPRINTF(Scoreboard, "Get map: f%i -> p%i\n", reg_idx, phys_reg->flatIndex());
17741775

17751776
return regFile.getReg(phys_reg);
17761777
}
@@ -1781,7 +1782,7 @@ CPU::readArchVecReg(int reg_idx, uint64_t *val,ThreadID tid)
17811782
cpuStats.vecRegfileReads++;
17821783
PhysRegIdPtr phys_reg =
17831784
commitRenameMap[tid].lookup(RegId(VecRegClass, reg_idx)).PhyReg();
1784-
DPRINTF(Commit, "Get map: v%i -> p%i\n", reg_idx, phys_reg->flatIndex());
1785+
DPRINTF(Scoreboard, "Get map: v%i -> p%i\n", reg_idx, phys_reg->flatIndex());
17851786

17861787
regFile.getReg(phys_reg, val);
17871788
}

0 commit comments

Comments
 (0)