Skip to content

Commit d461056

Browse files
committed
cpu-o3: fix smt framework
1 parent 51fa60b commit d461056

File tree

21 files changed

+157
-85
lines changed

21 files changed

+157
-85
lines changed

configs/common/Options.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -344,16 +344,14 @@ def addCommonOptions(parser, configure_xiangshan=False):
344344
"that are present under any of the roots. If not given, dump all "
345345
"stats. ")
346346

347+
parser.add_argument("--smt", action="store_true", default=False,
348+
help=""" RISCV SMT support, which requires multitThread-supported gcpt restore and diff-ref-so""")
349+
347350
if configure_xiangshan:
348351
return
349352
# Following options are not available in XiangShan
350353

351354
parser.add_argument("--checker", action="store_true")
352-
parser.add_argument("--smt", action="store_true", default=False,
353-
help="""
354-
Only used if multiple programs are specified. If true,
355-
then the number of threads per cpu is same as the
356-
number of programs.""")
357355
parser.add_argument(
358356
"--elastic-trace-en", action="store_true",
359357
help="""Enable capture of data dependency and instruction

configs/common/xiangshan.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,7 +443,12 @@ def _finish_xiangshan_system(args, test_sys, TestCPUClass, ruby):
443443
test_sys.cpu = [TestCPUClass(clk_domain=test_sys.cpu_clk_domain, cpu_id=i)
444444
for i in range(np)]
445445
# Configure MMU for trace-aware FS mode
446+
if args.smt:
447+
test_sys.multi_thread = True
448+
446449
for cpu in test_sys.cpu:
450+
if args.smt:
451+
cpu.numThreads = 2
447452
cpu.mmu.pma_checker = PMAChecker(
448453
uncacheable=[AddrRange(0, size=0x80000000)])
449454
cpu.mmu.functional = args.functional_tlb

src/cpu/o3/comm.hh

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -187,14 +187,23 @@ struct SquashVersion
187187
return (version + 1) % versionLimit;
188188
}
189189
bool largerThan(uint8_t other) const {
190-
bool larger = version > other && version - other <= maxInflightSquash;
191-
bool wrapped_larger =
192-
version + versionLimit > other &&
193-
version + versionLimit - other <= maxInflightSquash;
194-
if (!(larger || wrapped_larger || (version == other))) {
190+
const uint8_t distance = (version + versionLimit - other) % versionLimit;
191+
if (distance == 0) {
192+
return false;
193+
}
194+
195+
if (distance <= maxInflightSquash) {
196+
return true;
197+
}
198+
199+
if (versionLimit - distance <= maxInflightSquash) {
200+
return false;
201+
}
202+
203+
if (version != other) {
195204
panic("SquashVersion: %d, other: %d\n", version, other);
196205
}
197-
return larger || wrapped_larger;
206+
return false;
198207
}
199208
void update(uint8_t v) {
200209
version = v;
@@ -205,6 +214,7 @@ struct SquashVersion
205214

206215
struct ResolveQueueEntry
207216
{
217+
ThreadID resolvedTid;
208218
uint64_t resolvedFTQId;
209219
std::vector<uint64_t> resolvedInstPC;
210220
};

src/cpu/o3/commit.cc

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1970,10 +1970,10 @@ Commit::squashInflightAndUpdateVersion(ThreadID tid)
19701970

19711971
fixedbuffer[tid].clear();
19721972

1973-
localSquashVer.update(localSquashVer.nextVersion());
1974-
toIEW->commitInfo[tid].squashVersion = localSquashVer;
1973+
localSquashVer[tid].update(localSquashVer[tid].nextVersion());
1974+
toIEW->commitInfo[tid].squashVersion = localSquashVer[tid];
19751975
DPRINTF(Commit, "Updating squash version to %u\n",
1976-
localSquashVer.getVersion());
1976+
localSquashVer[tid].getVersion());
19771977
}
19781978

19791979
void
@@ -1994,7 +1994,9 @@ Commit::markCompletedInsts()
19941994
fromIEW->insts[inst_num]->setCanCommit();
19951995
auto &inst = fromIEW->insts[inst_num];
19961996

1997-
panic_if(!rob->findInst(0, inst->seqNum), "[sn:%llu] Committed instruction not found in ROB",
1997+
panic_if(!rob->findInst(inst->threadNumber, inst->seqNum),
1998+
"[tid:%i] [sn:%llu] Committed instruction not found in ROB",
1999+
inst->threadNumber,
19982000
inst->seqNum);
19992001
}
20002002
}

src/cpu/o3/commit.hh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,7 @@ class Commit
430430
/** Wire to read information from rename queue. */
431431
TimeBuffer<RenameStruct>::wire fromRename;
432432

433-
SquashVersion localSquashVer;
433+
SquashVersion localSquashVer[MaxThreads];
434434

435435
public:
436436
/** ROB interface. */

src/cpu/o3/cpu.cc

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -134,13 +134,6 @@ CPU::CPU(const BaseO3CPUParams &params)
134134
cpuStats(this),
135135
valuePred(params.valuePred)
136136
{
137-
fatal_if(FullSystem && params.numThreads > 1,
138-
"SMT is not supported in O3 in full system mode currently.");
139-
140-
fatal_if(!FullSystem && params.numThreads < params.workload.size(),
141-
"More workload items (%d) than threads (%d) on CPU %s.",
142-
params.workload.size(), params.numThreads, name());
143-
144137
if (!params.switched_out) {
145138
_status = Running;
146139
} else {
@@ -205,7 +198,10 @@ CPU::CPU(const BaseO3CPUParams &params)
205198

206199
ThreadID active_threads;
207200
if (FullSystem) {
208-
active_threads = 1;
201+
// FS-SMT still uses one shared workload/system image, but the O3 core
202+
// must provision per-thread architectural state for every hardware
203+
// thread context exposed by the CPU.
204+
active_threads = numThreads;
209205
} else {
210206
active_threads = params.workload.size();
211207

@@ -282,9 +278,7 @@ CPU::CPU(const BaseO3CPUParams &params)
282278

283279
for (ThreadID tid = 0; tid < numThreads; ++tid) {
284280
if (FullSystem) {
285-
// SMT is not supported in FS mode yet.
286-
assert(numThreads == 1);
287-
thread[tid] = new ThreadState(this, 0, NULL);
281+
thread[tid] = new ThreadState(this, tid, NULL);
288282
} else {
289283
if (tid < params.workload.size()) {
290284
DPRINTF(O3CPU, "Workload[%i] process is %#x", tid,

src/cpu/o3/decode.cc

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,7 @@ Decode::moveInstsToBuffer()
401401
for (int i = 0; i < insts_from_fetch; ++i) {
402402
const DynInstPtr &inst = stallBuffer.front();
403403
assert(tid == inst->threadNumber);
404-
if (localSquashVer.largerThan(inst->getVersion())) {
404+
if (localSquashVer[tid].largerThan(inst->getVersion())) {
405405
inst->setSquashed();
406406
}
407407
assert(!fixedbuffer[inst->threadNumber].full());
@@ -419,9 +419,10 @@ Decode::checkSquash()
419419
DPRINTF(Decode, "[tid:%i] Squashing instructions due to squash "
420420
"from commit.\n", i);
421421
squash(i);
422-
localSquashVer.update(fromCommit->commitInfo[i].squashVersion.getVersion());
422+
localSquashVer[i].update(
423+
fromCommit->commitInfo[i].squashVersion.getVersion());
423424
DPRINTF(Decode, "Updating squash version to %u\n",
424-
localSquashVer.getVersion());
425+
localSquashVer[i].getVersion());
425426
}
426427
}
427428
}

src/cpu/o3/decode.hh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ class Decode
293293

294294
void setAllStalls(StallReason decodeStall);
295295

296-
SquashVersion localSquashVer;
296+
SquashVersion localSquashVer[MaxThreads];
297297
};
298298

299299
} // namespace o3

src/cpu/o3/fetch.cc

Lines changed: 38 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -656,8 +656,8 @@ Fetch::processCacheCompletion(PacketPtr pkt)
656656
}
657657

658658
// Verify fetchBufferPC alignment with the supplying FSQ entry.
659-
if (threads[tid].valid && dbpbtb->ftqHasFetching(0)) {
660-
const auto &stream = dbpbtb->ftqFetchingTarget(0);
659+
if (threads[tid].valid && dbpbtb->ftqHasFetching(tid)) {
660+
const auto &stream = dbpbtb->ftqFetchingTarget(tid);
661661
if (threads[tid].startPC != stream.startPC) {
662662
panic("fetchBufferPC %#x should be aligned with FSQ startPC %#x",
663663
threads[tid].startPC, stream.startPC);
@@ -793,7 +793,7 @@ Fetch::lookupAndUpdateNextPC(const DynInstPtr &inst, PCStateBase &next_pc)
793793
// Decoupled+BTB-only: compute next PC directly from the supplying FSQ entry.
794794
ThreadID tid = inst->threadNumber;
795795
assert(dbpbtb);
796-
assert(dbpbtb->ftqHasFetching(0));
796+
assert(dbpbtb->ftqHasFetching(tid));
797797
const auto &stream = dbpbtb->ftqFetchingTarget(tid);
798798

799799
const Addr curr_pc = next_pc.instAddr();
@@ -1002,7 +1002,7 @@ Fetch::handleTranslationFault(ThreadID tid, const RequestPtr &mem_req, const Fau
10021002
// We will use a nop in order to carry the fault.
10031003
DynInstPtr instruction = buildInst(tid, nopStaticInstPtr, nullptr,
10041004
fetch_pc, fetch_pc, false);
1005-
instruction->setVersion(localSquashVer);
1005+
instruction->setVersion(localSquashVer[tid]);
10061006
instruction->setNotAnInst();
10071007

10081008
instruction->setPredTarg(fetch_pc);
@@ -1522,35 +1522,42 @@ Fetch::handleIEWSignals()
15221522
return;
15231523
}
15241524

1525-
auto &incoming = fromIEW->iewInfo->resolvedCFIs;
15261525
const bool had_pending_resolve = !resolveQueue.empty();
1527-
uint8_t enqueueSize = fromIEW->iewInfo->resolvedCFIs.size();
15281526
uint8_t enqueueCount = 0;
1527+
uint8_t enqueueSize = 0;
1528+
1529+
for (ThreadID tid = 0; tid < numThreads; ++tid) {
1530+
enqueueSize += fromIEW->iewInfo[tid].resolvedCFIs.size();
1531+
}
15291532

15301533
if (resolveQueueSize && resolveQueue.size() > resolveQueueSize - 4) {
15311534
fetchStats.resolveQueueFullEvents++;
15321535
fetchStats.resolveEnqueueFailEvent += enqueueSize;
15331536
} else {
1537+
for (ThreadID tid = 0; tid < numThreads; ++tid) {
1538+
auto &incoming = fromIEW->iewInfo[tid].resolvedCFIs;
1539+
for (const auto &resolved : incoming) {
1540+
bool merged = false;
1541+
for (auto &queued : resolveQueue) {
1542+
if (queued.resolvedTid == tid &&
1543+
queued.resolvedFTQId == resolved.ftqId) {
1544+
queued.resolvedInstPC.push_back(resolved.pc);
1545+
merged = true;
1546+
break;
1547+
}
1548+
}
15341549

1535-
for (const auto &resolved : incoming) {
1536-
bool merged = false;
1537-
for (auto &queued : resolveQueue) {
1538-
if (queued.resolvedFTQId == resolved.ftqId) {
1539-
queued.resolvedInstPC.push_back(resolved.pc);
1540-
merged = true;
1541-
break;
1550+
if (merged) {
1551+
continue;
15421552
}
1543-
}
15441553

1545-
if (merged) {
1546-
continue;
1554+
ResolveQueueEntry new_entry;
1555+
new_entry.resolvedTid = tid;
1556+
new_entry.resolvedFTQId = resolved.ftqId;
1557+
new_entry.resolvedInstPC.push_back(resolved.pc);
1558+
resolveQueue.push_back(std::move(new_entry));
1559+
enqueueCount++;
15471560
}
1548-
1549-
ResolveQueueEntry new_entry;
1550-
new_entry.resolvedFTQId = resolved.ftqId;
1551-
new_entry.resolvedInstPC.push_back(resolved.pc);
1552-
resolveQueue.push_back(std::move(new_entry));
1553-
enqueueCount++;
15541561
}
15551562
fetchStats.resolveEnqueueCount.sample(enqueueCount);
15561563
}
@@ -1562,12 +1569,13 @@ Fetch::handleIEWSignals()
15621569
// and fetch consuming them as predictor resolved updates.
15631570
if (had_pending_resolve && !resolveQueue.empty()) {
15641571
auto &entry = resolveQueue.front();
1572+
ThreadID tid = entry.resolvedTid;
15651573
unsigned int stream_id = entry.resolvedFTQId;
1566-
dbpbtb->prepareResolveUpdateEntries(stream_id, 0);
1574+
dbpbtb->prepareResolveUpdateEntries(stream_id, tid);
15671575
for (const auto resolvedInstPC : entry.resolvedInstPC) {
1568-
dbpbtb->markCFIResolved(stream_id, resolvedInstPC, 0);
1576+
dbpbtb->markCFIResolved(stream_id, resolvedInstPC, tid);
15691577
}
1570-
bool success = dbpbtb->resolveUpdate(stream_id, 0);
1578+
bool success = dbpbtb->resolveUpdate(stream_id, tid);
15711579
if (success) {
15721580
dbpbtb->notifyResolveSuccess();
15731581
resolveQueue.pop_front();
@@ -1612,8 +1620,10 @@ Fetch::handleCommitSignals(ThreadID tid)
16121620
squash(*fromCommit->commitInfo[tid].pc, squash_seq,
16131621
squash_inst, tid);
16141622

1615-
localSquashVer.update(fromCommit->commitInfo[tid].squashVersion.getVersion());
1616-
DPRINTF(Fetch, "Updating squash version to %u\n", localSquashVer.getVersion());
1623+
localSquashVer[tid].update(
1624+
fromCommit->commitInfo[tid].squashVersion.getVersion());
1625+
DPRINTF(Fetch, "Updating squash version to %u\n",
1626+
localSquashVer[tid].getVersion());
16171627

16181628
auto mispred_inst = fromCommit->commitInfo[tid].mispredictInst;
16191629

@@ -1924,7 +1934,7 @@ Fetch::processSingleInstruction(ThreadID tid, PCStateBase &pc,
19241934
tid, waitForVsetvl);
19251935
}
19261936

1927-
instruction->setVersion(localSquashVer);
1937+
instruction->setVersion(localSquashVer[tid]);
19281938
ppFetch->notify(instruction);
19291939
numInst++;
19301940

src/cpu/o3/fetch.hh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1126,7 +1126,7 @@ class Fetch
11261126
statistics::Scalar traceMetaCleanupCommitCalls;
11271127
} fetchStats;
11281128

1129-
SquashVersion localSquashVer;
1129+
SquashVersion localSquashVer[MaxThreads];
11301130

11311131
public:
11321132
const FetchStatGroup &getFetchStats() { return fetchStats; }

0 commit comments

Comments
 (0)