Skip to content

Commit aef9710

Browse files
committed
cpu-o3: stabilize fs-smt shared-memory execution
Change-Id: Iacbf424326a2c391b327b01f49d9795e9a6c5fcb
1 parent 3566e2d commit aef9710

File tree

12 files changed

+1306
-111
lines changed

12 files changed

+1306
-111
lines changed

src/cpu/base.cc

Lines changed: 347 additions & 44 deletions
Large diffs are not rendered by default.

src/cpu/base.hh

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,20 @@ class BaseCPU : public ClockedObject
148148
uint8_t data[16] = {};
149149
};
150150

151+
struct RecentVisibleSharedStore
152+
{
153+
bool valid = false;
154+
ThreadID tid = InvalidThreadID;
155+
Addr addr = 0;
156+
size_t size = 0;
157+
InstSeqNum seq = 0;
158+
uint8_t data[16] = {};
159+
};
160+
151161
std::vector<std::deque<RecentCommittedStore>> recentCommittedStores;
162+
std::deque<RecentVisibleSharedStore> recentVisibleSharedStores;
163+
std::vector<uint8_t> sharedReplayCatchupBudget;
164+
std::vector<bool> syncVisibleStoreReplayArmed;
152165

153166
const unsigned IntRegIndexBase = 0;
154167
const unsigned FPRegIndexBase = 32;
@@ -790,6 +803,21 @@ class BaseCPU : public ClockedObject
790803
void difftestStep(ThreadID tid, InstSeqNum seq);
791804

792805
void recordCommittedStore(ThreadID tid, const o3::DynInstPtr &inst);
806+
void recordVisibleSharedStore(ThreadID tid, Addr addr, size_t size,
807+
InstSeqNum seq, const uint8_t *data);
808+
809+
virtual bool findPendingStoreValue(ThreadID tid, Addr addr, size_t size,
810+
InstSeqNum seq, uint8_t *data) const
811+
{
812+
return false;
813+
}
814+
815+
virtual bool findPendingLocalStoreValue(ThreadID tid, Addr addr,
816+
size_t size, InstSeqNum seq,
817+
uint8_t *data) const
818+
{
819+
return false;
820+
}
793821

794822
inline bool difftestEnabled() const { return enableDifftest; }
795823

@@ -824,6 +852,18 @@ class BaseCPU : public ClockedObject
824852
uint8_t *getGoldenMemPtr() { return goldenMemPtr; }
825853

826854
gem5::GoldenGloablMem *goldenMemManager() { return _goldenMemManager; }
855+
void syncVisibleSharedStoreToDiffRefs(Addr paddr, size_t size);
856+
void armSharedReplayCatchup(ThreadID tid);
857+
void armSyncVisibleStoreReplay(ThreadID tid)
858+
{
859+
syncVisibleStoreReplayArmed.at(tid) = true;
860+
}
861+
bool consumeSyncVisibleStoreReplay(ThreadID tid)
862+
{
863+
bool armed = syncVisibleStoreReplayArmed.at(tid);
864+
syncVisibleStoreReplayArmed.at(tid) = false;
865+
return armed;
866+
}
827867

828868
void checkL1DRefill(Addr paddr, const uint8_t *refill_data, size_t size);
829869
};

src/cpu/difftest.cc

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -174,9 +174,7 @@ void
174174
NemuProxy::initState(int coreid, uint8_t *golden_mem)
175175
{
176176
if (multiCore) {
177-
warn("Setting mhartid to %d\n", coreid);
178177
setHartId(coreid);
179-
warn("Setting gmaddr to %#lx\n", (uint64_t) golden_mem);
180178
nemuPutGmaddr(golden_mem);
181179
}
182180
}

src/cpu/o3/commit.cc

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1001,6 +1001,33 @@ Commit::commit()
10011001
std::list<ThreadID>::iterator end = activeThreads->end();
10021002

10031003
int num_squashing_threads = 0;
1004+
auto hasYoungerInflight = [&](ThreadID tid, InstSeqNum squashed_seq,
1005+
bool include_squash_inst) {
1006+
const InstSeqNum cutoff =
1007+
include_squash_inst ? squashed_seq - 1 : squashed_seq;
1008+
1009+
if (cutoff <= youngestSeqNum[tid]) {
1010+
return true;
1011+
}
1012+
1013+
for (const auto &inst : fixedbuffer[tid]) {
1014+
if (inst && !inst->isSquashed() && inst->seqNum > cutoff) {
1015+
return true;
1016+
}
1017+
}
1018+
1019+
for (int i = 0; i < fromRename->size; ++i) {
1020+
const auto &inst = fromRename->insts[i];
1021+
if (!inst || inst->threadNumber != tid || inst->isSquashed()) {
1022+
continue;
1023+
}
1024+
if (inst->seqNum > cutoff) {
1025+
return true;
1026+
}
1027+
}
1028+
1029+
return false;
1030+
};
10041031

10051032
while (threads != end) {
10061033
ThreadID tid = *threads++;
@@ -1030,11 +1057,14 @@ Commit::commit()
10301057
// Squashed sequence number must be older than youngest valid
10311058
// instruction in the ROB. This prevents squashes from younger
10321059
// instructions overriding squashes from older instructions.
1060+
const bool has_younger_inflight =
1061+
hasYoungerInflight(tid, fromIEW->squashedSeqNum[tid],
1062+
fromIEW->includeSquashInst[tid]);
10331063
DPRINTF(Commit, "fromIEW->squash %d, commitStatus %d, fromIEW->squashedSeqNum %d, youngestSeqNum %d\n",
10341064
fromIEW->squash[tid], commitStatus[tid], fromIEW->squashedSeqNum[tid], youngestSeqNum[tid]);
10351065
if (fromIEW->squash[tid] &&
10361066
commitStatus[tid] != TrapPending &&
1037-
fromIEW->squashedSeqNum[tid] <= youngestSeqNum[tid]) {
1067+
has_younger_inflight) {
10381068

10391069
if (fromIEW->mispredictInst[tid]) {
10401070
DPRINTF(Commit,
@@ -1212,9 +1242,9 @@ Commit::commitInsts()
12121242
continue;
12131243
}
12141244

1215-
while (num_committed < commit_width &&
1216-
num_committed_per_thread[commit_thread] <
1217-
commit_width_per_thread[commit_thread]) {
1245+
while (num_committed < commit_width &&
1246+
num_committed_per_thread[commit_thread] <
1247+
commit_width_per_thread[commit_thread]) {
12181248
// hardware transactionally memory
12191249
// If executing within a transaction,
12201250
// need to handle interrupts specially
@@ -1453,6 +1483,11 @@ Commit::commitInsts()
14531483

14541484
}
14551485

1486+
if (head_inst->isReadBarrier() ||
1487+
head_inst->isWriteBarrier()) {
1488+
cpu->armSyncVisibleStoreReplay(tid);
1489+
}
1490+
14561491
if (cpu->difftestEnabled()) {
14571492
diffInst(tid, head_inst);
14581493
}
@@ -1651,7 +1686,7 @@ Commit::commitHead(const DynInstPtr &head_inst, unsigned inst_num)
16511686
// race with the TLB invalidation.
16521687
if ((head_inst->isMemRef() || head_inst->isReturn() ||
16531688
head_inst->isReadBarrier() || head_inst->isWriteBarrier()) &&
1654-
(inst_num > 0 || !iewStage->flushStores(tid))) {
1689+
(inst_num > 0 || !iewStage->flushStores(tid, head_inst->seqNum))) {
16551690
DPRINTF(Commit,
16561691
"[tid:%i] [sn:%llu] "
16571692
"Waiting for all stores to writeback.\n",
@@ -1705,7 +1740,7 @@ Commit::commitHead(const DynInstPtr &head_inst, unsigned inst_num)
17051740

17061741
if (inst_fault != NoFault) {
17071742
traceLogInstFault(head_inst, inst_fault);
1708-
if (!iewStage->flushStores(tid) || inst_num > 0) {
1743+
if (!iewStage->flushStores(tid, head_inst->seqNum) || inst_num > 0) {
17091744
DPRINTF(Commit,
17101745
"[tid:%i] [sn:%llu] "
17111746
"Stores outstanding, fault must wait.\n",

src/cpu/o3/cpu.hh

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,19 @@ class CPU : public BaseCPU
433433

434434
uint32_t getIQInsts() { return iew.getIQInsts(); }
435435

436+
bool findPendingStoreValue(ThreadID tid, Addr addr, size_t size,
437+
InstSeqNum seq, uint8_t *data) const override
438+
{
439+
return iew.findPendingStoreValue(tid, addr, size, seq, data);
440+
}
441+
442+
bool findPendingLocalStoreValue(ThreadID tid, Addr addr, size_t size,
443+
InstSeqNum seq,
444+
uint8_t *data) const override
445+
{
446+
return iew.findPendingLocalStoreValue(tid, addr, size, seq, data);
447+
}
448+
436449
/**
437450
* Return the oldest in-flight instruction sequence number.
438451
* If there are no in-flight instructions, returns the maximum value

src/cpu/o3/decode.cc

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,13 @@ Decode::fetchInstsValid()
255255
void
256256
Decode::selfSquash(const DynInstPtr &inst, ThreadID tid)
257257
{
258+
if (tid == 1 && inst->seqNum >= 182805 && inst->seqNum <= 182808) {
259+
warn("DECODE-SQUASH tid=%d seq=%llu pc=%#lx pred_taken=%d pred_tgt=%#lx "
260+
"ftq=%u\n",
261+
tid, static_cast<unsigned long long>(inst->seqNum),
262+
inst->pcState().instAddr(), inst->readPredTaken(),
263+
inst->readPredTarg().instAddr(), inst->getFtqId());
264+
}
258265
DPRINTF(Decode, "[tid:%i] [sn:%llu] Squashing due to incorrect branch "
259266
"prediction detected at decode.\n", tid, inst->seqNum);
260267

@@ -718,29 +725,42 @@ Decode::decodeInsts(ThreadID tid)
718725
pred.pc(), pred.npc(), t.pc(), t.npc());
719726
inst->setPredTarg(t);
720727
}
721-
if (*target != inst->readPredTarg()) {
728+
bool wrong_uncond_direction =
729+
inst->isUncondCtrl() && !inst->readPredTaken();
730+
bool wrong_target = *target != inst->readPredTarg();
731+
if (wrong_uncond_direction || wrong_target) {
722732
++stats.branchMispred;
723733

724-
RiscvISA::PCState cpTarget = target->clone()->as<RiscvISA::PCState>();
725-
RiscvISA::PCState cpPredTarget = inst->readPredTarg().clone()->as<RiscvISA::PCState>();
734+
RiscvISA::PCState cpTarget =
735+
target->clone()->as<RiscvISA::PCState>();
736+
RiscvISA::PCState cpPredTarget =
737+
inst->readPredTarg().clone()->as<RiscvISA::PCState>();
726738

727-
if (cpTarget.instAddr() != cpPredTarget.instAddr() && cpTarget.npc() == cpPredTarget.npc()) {
739+
if (wrong_target &&
740+
cpTarget.instAddr() != cpPredTarget.instAddr() &&
741+
cpTarget.npc() == cpPredTarget.npc()) {
728742
++stats.mispredictedByPC;
729-
} else if (cpTarget.instAddr() == cpPredTarget.instAddr() && cpTarget.npc() != cpPredTarget.npc()) {
743+
} else if (wrong_target &&
744+
cpTarget.instAddr() == cpPredTarget.instAddr() &&
745+
cpTarget.npc() != cpPredTarget.npc()) {
730746
++stats.mispredictedByNPC;
731747
}
732748

733749
// Might want to set some sort of boolean and just do
734750
// a check at the end
751+
if (wrong_uncond_direction) {
752+
inst->setPredTaken(true);
753+
}
735754
selfSquash(inst, inst->threadNumber);
736755

737756
decode_stalls.push(StallReason::InstMisPred);
738757
breakDecode = StallReason::InstMisPred;
739758

740759
DPRINTF(Decode,
741760
"[tid:%i] [sn:%llu] Updating predictions:"
742-
" Wrong predicted target: %s PredPC: %s\n",
743-
tid, inst->seqNum, inst->readPredTarg(), *target);
761+
" wrong_dir=%d wrong_target=%d PredPC: %s Actual: %s\n",
762+
tid, inst->seqNum, wrong_uncond_direction,
763+
wrong_target, inst->readPredTarg(), *target);
744764
//The micro pc after an instruction level branch should be 0
745765
inst->setPredTarg(*target);
746766
break;

src/cpu/o3/iew.hh

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,23 @@ class IEW
260260
* the store queue or the store buffer to write back to.
261261
*/
262262
bool flushStores(ThreadID tid) { return ldstQueue.flushStores(tid); }
263+
bool flushStores(ThreadID tid, InstSeqNum seq_num)
264+
{
265+
return ldstQueue.flushStores(tid, seq_num);
266+
}
267+
268+
bool findPendingStoreValue(ThreadID tid, Addr addr, size_t size,
269+
InstSeqNum seq, uint8_t *data) const
270+
{
271+
return ldstQueue.findPendingStoreValue(tid, addr, size, seq, data);
272+
}
273+
274+
bool findPendingLocalStoreValue(ThreadID tid, Addr addr, size_t size,
275+
InstSeqNum seq, uint8_t *data) const
276+
{
277+
return ldstQueue.findPendingLocalStoreValue(tid, addr, size, seq,
278+
data);
279+
}
263280

264281
/** Check if we need to squash after a load/store/branch is executed. */
265282
void SquashCheckAfterExe(DynInstPtr inst);

0 commit comments

Comments
 (0)