Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion src/cpu/o3/comm.hh
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,13 @@ struct TimeStruct
StallReason lqHeadStallReason;
StallReason sqHeadStallReason;

std::vector<ResolveQueueEntry> resolveQueue; // *F
struct ResolvedCFIEntry
{
uint64_t fsqId;
uint64_t pc;
};
/** Resolved control-flow PCs produced this cycle (fetch buffers/merges). */
std::vector<ResolvedCFIEntry> resolvedCFIs; // *F
};

IewComm iewInfo[MaxThreads];
Expand Down
45 changes: 39 additions & 6 deletions src/cpu/o3/fetch.cc
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ Fetch::Fetch(CPU *_cpu, const BaseO3CPUParams &params)
: fetchPolicy(params.smtFetchPolicy),
cpu(_cpu),
branchPred(nullptr),
resolveQueueSize(params.resolveQueueSize),
decodeToFetchDelay(params.decodeToFetchDelay),
renameToFetchDelay(params.renameToFetchDelay),
iewToFetchDelay(params.iewToFetchDelay),
Expand Down Expand Up @@ -249,7 +250,13 @@ Fetch::FetchStatGroup::FetchStatGroup(CPU *cpu, Fetch *fetch)
ADD_STAT(frontendBandwidthBound, statistics::units::Rate<
statistics::units::Count, statistics::units::Cycle>::get(),
"Frontend Bandwidth Bound",
frontendBound - frontendLatencyBound)
frontendBound - frontendLatencyBound),
ADD_STAT(resolveQueueFullCycles, statistics::units::Count::get(),
"Number of cycles the resolve queue is full"),
ADD_STAT(resolveQueueFullEvents, statistics::units::Count::get(),
"Number of events the resolve queue becomes full"),
ADD_STAT(resolveEnqueueFailEvent, statistics::units::Count::get(),
"Number of times an entry could not be enqueued to the resolve queue")
{
icacheStallCycles
.prereq(icacheStallCycles);
Expand Down Expand Up @@ -1494,17 +1501,43 @@ Fetch::handleIEWSignals()
return;
}

// iterate resolved stream_id and PC value from ResolveQueue
for (auto entry : fromIEW->iewInfo->resolveQueue) {
auto &incoming = fromIEW->iewInfo->resolvedCFIs;

for (const auto &resolved : incoming) {
bool merged = false;
for (auto &queued : resolveQueue) {
if (queued.resolvedFSQId == resolved.fsqId) {
queued.resolvedInstPC.push_back(resolved.pc);
merged = true;
break;
}
}

if (merged) {
continue;
}

if (resolveQueueSize && resolveQueue.size() >= resolveQueueSize) {
fetchStats.resolveQueueFullEvents++;
continue;
}

ResolveQueueEntry new_entry;
new_entry.resolvedFSQId = resolved.fsqId;
new_entry.resolvedInstPC.push_back(resolved.pc);
resolveQueue.push_back(std::move(new_entry));
}

if (!resolveQueue.empty()) {
auto &entry = resolveQueue.front();
unsigned int stream_id = entry.resolvedFSQId;
dbpbtb->prepareResolveUpdateEntries(stream_id);
for (uint64_t &resolvedInstPC : entry.resolvedInstPC) {
for (const auto resolvedInstPC : entry.resolvedInstPC) {
dbpbtb->markCFIResolved(stream_id, resolvedInstPC);
}
dbpbtb->resolveUpdate(stream_id);
resolveQueue.pop_front();
}

return;
}

bool
Expand Down
13 changes: 13 additions & 0 deletions src/cpu/o3/fetch.hh
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#define __CPU_O3_FETCH_HH__

#include <cstring>
#include <deque>
#include <utility>

#include "arch/generic/decoder.hh"
Expand Down Expand Up @@ -612,6 +613,12 @@ class Fetch

branch_prediction::btb_pred::DecoupledBPUWithBTB *dbpbtb;

/** Maximum number of resolve entries buffered in fetch before training. */
const unsigned resolveQueueSize;

/** FIFO storing resolve entries waiting for BPU training. */
std::deque<ResolveQueueEntry> resolveQueue;

/** PC of each thread. */
std::unique_ptr<PCStateBase> pc[MaxThreads];

Expand Down Expand Up @@ -1095,6 +1102,12 @@ class Fetch
statistics::Formula frontendLatencyBound;
/** Frontend Bandwidth Bound */
statistics::Formula frontendBandwidthBound;
/** Stat for total cycles the resolve queue is full. */
statistics::Scalar resolveQueueFullCycles;
/** Stat for total events of the resolve queue becomes full. */
statistics::Scalar resolveQueueFullEvents;
/** Stat for total number of resolve enqueue fail events. */
statistics::Scalar resolveEnqueueFailEvent;
} fetchStats;

SquashVersion localSquashVer;
Expand Down
47 changes: 7 additions & 40 deletions src/cpu/o3/iew.cc
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ IEW::IEW(CPU *_cpu, const BaseO3CPUParams &params)
wbWidth(params.wbWidth),
enableStoreSetTrain(params.enable_storeSet_train),
numThreads(params.numThreads),
resolveQueueSize(params.resolveQueueSize),
iewStats(cpu)
{
if (wbWidth > MaxWidth)
Expand Down Expand Up @@ -181,13 +180,6 @@ IEW::IEWStats::IEWStats(CPU *cpu)
"Number of branches that were predicted taken incorrectly"),
ADD_STAT(predictedNotTakenIncorrect, statistics::units::Count::get(),
"Number of branches that were predicted not taken incorrectly"),
ADD_STAT(resolveQueueFullCycles, statistics::units::Count::get(),
"Number of cycles the resolve queue is full"),
ADD_STAT(resolveQueueFullEvents, statistics::units::Count::get(),
"Number of events the resolve queue becomes full"),
ADD_STAT(resolveEnqueueFailEvent, statistics::units::Count::get(),
"Number of times an instruction could not be enqueued to the "
"resolve queue"),
ADD_STAT(branchMispredicts, statistics::units::Count::get(),
"Number of branch mispredicts detected at execute",
predictedTakenIncorrect + predictedNotTakenIncorrect),
Expand Down Expand Up @@ -1572,31 +1564,12 @@ IEW::SquashCheckAfterExe(DynInstPtr inst)
{
ThreadID tid = inst->threadNumber;

uint64_t fsqId = inst->getFsqId();
uint64_t pc = inst->getPC();
bool found = false;
for (auto &entry : resolveQueue) {
if (entry.resolvedFSQId == fsqId) {
entry.resolvedInstPC.push_back(pc);
found = true;
}
}

if (!found && resolveQueue.size() < resolveQueueSize) {
ResolveQueueEntry newEntry;
newEntry.resolvedFSQId = fsqId;
newEntry.resolvedInstPC.push_back(pc);
resolveQueue.push_back(newEntry);
if (resolveQueue.size() == resolveQueueSize) {
iewStats.resolveQueueFullEvents++;
}
}

if (resolveQueue.size() >= resolveQueueSize) {
if (!found) {
iewStats.resolveEnqueueFailEvent++;
}
iewStats.resolveQueueFullCycles++;
if (inst->isControl()) {
auto &resolved_cfis = toFetch->iewInfo[tid].resolvedCFIs;
TimeStruct::IewComm::ResolvedCFIEntry entry;
entry.fsqId = inst->getFsqId();
entry.pc = inst->getPC();
resolved_cfis.push_back(entry);
}

if (!fetchRedirect[tid] ||
Expand Down Expand Up @@ -1694,7 +1667,7 @@ IEW::executeInsts()

// Clear resolvedFSQId and resolvedInstPC since they are already handled in frontend
ThreadID tid = *activeThreads->begin();
toFetch->iewInfo[tid].resolveQueue.clear();
toFetch->iewInfo[tid].resolvedCFIs.clear();

// Execute/writeback any instructions that are available.
int insts_to_execute = fromIssue->size;
Expand Down Expand Up @@ -1812,12 +1785,6 @@ IEW::executeInsts()
}
}

if (!resolveQueue.empty()) {
ResolveQueueEntry entry = resolveQueue.front();
resolveQueue.erase(resolveQueue.begin());
toFetch->iewInfo[tid].resolveQueue.push_back(entry);
}

ldstQueue.executePipeSx();

// Update and record activity if we processed any instructions.
Expand Down
9 changes: 0 additions & 9 deletions src/cpu/o3/iew.hh
Original file line number Diff line number Diff line change
Expand Up @@ -495,9 +495,6 @@ class IEW
/** Maximum size of the skid buffer. */
unsigned skidBufferMax;

unsigned resolveQueueSize;
std::vector<ResolveQueueEntry> resolveQueue;

struct IEWStats : public statistics::Group
{
IEWStats(CPU *cpu);
Expand Down Expand Up @@ -530,12 +527,6 @@ class IEW
statistics::Scalar predictedTakenIncorrect;
/** Stat for total number of incorrect predicted not taken branches. */
statistics::Scalar predictedNotTakenIncorrect;
/** Stat for total cycles the resolve queue is full. */
statistics::Scalar resolveQueueFullCycles;
/** Stat for total events of the resolve queue becomes full. */
statistics::Scalar resolveQueueFullEvents;
/** Stat for total number of enqueue fail events. */
statistics::Scalar resolveEnqueueFailEvent;
/** Stat for total number of mispredicted branches detected at
* execute. */
statistics::Formula branchMispredicts;
Expand Down