Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions configs/example/kmhv3.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ def setKmhV3Params(args, system):

cpu.branchPred.mbtb.resolvedUpdate = True
cpu.branchPred.tage.resolvedUpdate = True
cpu.branchPred.tage.enableBankConflict = False

cpu.branchPred.ubtb.enabled = True
cpu.branchPred.abtb.enabled = False
Expand Down
8 changes: 7 additions & 1 deletion src/cpu/o3/comm.hh
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,13 @@ struct TimeStruct
StallReason lqHeadStallReason;
StallReason sqHeadStallReason;

std::vector<ResolveQueueEntry> resolveQueue; // *F
struct ResolvedCFIEntry
{
uint64_t fsqId;
uint64_t pc;
};
/** Resolved control-flow PCs produced this cycle (fetch buffers/merges). */
std::vector<ResolvedCFIEntry> resolvedCFIs; // *F
};

IewComm iewInfo[MaxThreads];
Expand Down
45 changes: 39 additions & 6 deletions src/cpu/o3/fetch.cc
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ Fetch::Fetch(CPU *_cpu, const BaseO3CPUParams &params)
: fetchPolicy(params.smtFetchPolicy),
cpu(_cpu),
branchPred(nullptr),
resolveQueueSize(params.resolveQueueSize),
decodeToFetchDelay(params.decodeToFetchDelay),
renameToFetchDelay(params.renameToFetchDelay),
iewToFetchDelay(params.iewToFetchDelay),
Expand Down Expand Up @@ -249,7 +250,13 @@ Fetch::FetchStatGroup::FetchStatGroup(CPU *cpu, Fetch *fetch)
ADD_STAT(frontendBandwidthBound, statistics::units::Rate<
statistics::units::Count, statistics::units::Cycle>::get(),
"Frontend Bandwidth Bound",
frontendBound - frontendLatencyBound)
frontendBound - frontendLatencyBound),
ADD_STAT(resolveQueueFullCycles, statistics::units::Count::get(),
"Number of cycles the resolve queue is full"),
ADD_STAT(resolveQueueFullEvents, statistics::units::Count::get(),
"Number of events the resolve queue becomes full"),
ADD_STAT(resolveEnqueueFailEvent, statistics::units::Count::get(),
"Number of times an entry could not be enqueued to the resolve queue")
{
icacheStallCycles
.prereq(icacheStallCycles);
Expand Down Expand Up @@ -1494,17 +1501,43 @@ Fetch::handleIEWSignals()
return;
}

// iterate resolved stream_id and PC value from ResolveQueue
for (auto entry : fromIEW->iewInfo->resolveQueue) {
auto &incoming = fromIEW->iewInfo->resolvedCFIs;

for (const auto &resolved : incoming) {
bool merged = false;
for (auto &queued : resolveQueue) {
if (queued.resolvedFSQId == resolved.fsqId) {
queued.resolvedInstPC.push_back(resolved.pc);
merged = true;
break;
}
}

if (merged) {
continue;
}

if (resolveQueueSize && resolveQueue.size() >= resolveQueueSize) {
fetchStats.resolveQueueFullEvents++;
continue;
}

ResolveQueueEntry new_entry;
new_entry.resolvedFSQId = resolved.fsqId;
new_entry.resolvedInstPC.push_back(resolved.pc);
resolveQueue.push_back(std::move(new_entry));
}

if (!resolveQueue.empty()) {
auto &entry = resolveQueue.front();
unsigned int stream_id = entry.resolvedFSQId;
dbpbtb->prepareResolveUpdateEntries(stream_id);
for (uint64_t &resolvedInstPC : entry.resolvedInstPC) {
for (const auto resolvedInstPC : entry.resolvedInstPC) {
dbpbtb->markCFIResolved(stream_id, resolvedInstPC);
}
dbpbtb->resolveUpdate(stream_id);
resolveQueue.pop_front();
}

return;
}

bool
Expand Down
13 changes: 13 additions & 0 deletions src/cpu/o3/fetch.hh
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#define __CPU_O3_FETCH_HH__

#include <cstring>
#include <deque>
#include <utility>

#include "arch/generic/decoder.hh"
Expand Down Expand Up @@ -612,6 +613,12 @@ class Fetch

branch_prediction::btb_pred::DecoupledBPUWithBTB *dbpbtb;

/** Maximum number of resolve entries buffered in fetch before training. */
const unsigned resolveQueueSize;

/** FIFO storing resolve entries waiting for BPU training. */
std::deque<ResolveQueueEntry> resolveQueue;

/** PC of each thread. */
std::unique_ptr<PCStateBase> pc[MaxThreads];

Expand Down Expand Up @@ -1095,6 +1102,12 @@ class Fetch
statistics::Formula frontendLatencyBound;
/** Frontend Bandwidth Bound */
statistics::Formula frontendBandwidthBound;
/** Stat for total cycles the resolve queue is full. */
statistics::Scalar resolveQueueFullCycles;
/** Stat for total events of the resolve queue becomes full. */
statistics::Scalar resolveQueueFullEvents;
/** Stat for total number of resolve enqueue fail events. */
statistics::Scalar resolveEnqueueFailEvent;
} fetchStats;

SquashVersion localSquashVer;
Expand Down
47 changes: 7 additions & 40 deletions src/cpu/o3/iew.cc
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ IEW::IEW(CPU *_cpu, const BaseO3CPUParams &params)
wbWidth(params.wbWidth),
enableStoreSetTrain(params.enable_storeSet_train),
numThreads(params.numThreads),
resolveQueueSize(params.resolveQueueSize),
iewStats(cpu)
{
if (wbWidth > MaxWidth)
Expand Down Expand Up @@ -181,13 +180,6 @@ IEW::IEWStats::IEWStats(CPU *cpu)
"Number of branches that were predicted taken incorrectly"),
ADD_STAT(predictedNotTakenIncorrect, statistics::units::Count::get(),
"Number of branches that were predicted not taken incorrectly"),
ADD_STAT(resolveQueueFullCycles, statistics::units::Count::get(),
"Number of cycles the resolve queue is full"),
ADD_STAT(resolveQueueFullEvents, statistics::units::Count::get(),
"Number of events the resolve queue becomes full"),
ADD_STAT(resolveEnqueueFailEvent, statistics::units::Count::get(),
"Number of times an instruction could not be enqueued to the "
"resolve queue"),
ADD_STAT(branchMispredicts, statistics::units::Count::get(),
"Number of branch mispredicts detected at execute",
predictedTakenIncorrect + predictedNotTakenIncorrect),
Expand Down Expand Up @@ -1572,31 +1564,12 @@ IEW::SquashCheckAfterExe(DynInstPtr inst)
{
ThreadID tid = inst->threadNumber;

uint64_t fsqId = inst->getFsqId();
uint64_t pc = inst->getPC();
bool found = false;
for (auto &entry : resolveQueue) {
if (entry.resolvedFSQId == fsqId) {
entry.resolvedInstPC.push_back(pc);
found = true;
}
}

if (!found && resolveQueue.size() < resolveQueueSize) {
ResolveQueueEntry newEntry;
newEntry.resolvedFSQId = fsqId;
newEntry.resolvedInstPC.push_back(pc);
resolveQueue.push_back(newEntry);
if (resolveQueue.size() == resolveQueueSize) {
iewStats.resolveQueueFullEvents++;
}
}

if (resolveQueue.size() >= resolveQueueSize) {
if (!found) {
iewStats.resolveEnqueueFailEvent++;
}
iewStats.resolveQueueFullCycles++;
if (inst->isControl()) {
auto &resolved_cfis = toFetch->iewInfo[tid].resolvedCFIs;
TimeStruct::IewComm::ResolvedCFIEntry entry;
entry.fsqId = inst->getFsqId();
entry.pc = inst->getPC();
resolved_cfis.push_back(entry);
}

if (!fetchRedirect[tid] ||
Expand Down Expand Up @@ -1694,7 +1667,7 @@ IEW::executeInsts()

// Clear resolvedFSQId and resolvedInstPC since they are already handled in frontend
ThreadID tid = *activeThreads->begin();
toFetch->iewInfo[tid].resolveQueue.clear();
toFetch->iewInfo[tid].resolvedCFIs.clear();

// Execute/writeback any instructions that are available.
int insts_to_execute = fromIssue->size;
Expand Down Expand Up @@ -1812,12 +1785,6 @@ IEW::executeInsts()
}
}

if (!resolveQueue.empty()) {
ResolveQueueEntry entry = resolveQueue.front();
resolveQueue.erase(resolveQueue.begin());
toFetch->iewInfo[tid].resolveQueue.push_back(entry);
}

ldstQueue.executePipeSx();

// Update and record activity if we processed any instructions.
Expand Down
9 changes: 0 additions & 9 deletions src/cpu/o3/iew.hh
Original file line number Diff line number Diff line change
Expand Up @@ -495,9 +495,6 @@ class IEW
/** Maximum size of the skid buffer. */
unsigned skidBufferMax;

unsigned resolveQueueSize;
std::vector<ResolveQueueEntry> resolveQueue;

struct IEWStats : public statistics::Group
{
IEWStats(CPU *cpu);
Expand Down Expand Up @@ -530,12 +527,6 @@ class IEW
statistics::Scalar predictedTakenIncorrect;
/** Stat for total number of incorrect predicted not taken branches. */
statistics::Scalar predictedNotTakenIncorrect;
/** Stat for total cycles the resolve queue is full. */
statistics::Scalar resolveQueueFullCycles;
/** Stat for total events of the resolve queue becomes full. */
statistics::Scalar resolveQueueFullEvents;
/** Stat for total number of enqueue fail events. */
statistics::Scalar resolveEnqueueFailEvent;
/** Stat for total number of mispredicted branches detected at
* execute. */
statistics::Formula branchMispredicts;
Expand Down