Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
222 changes: 99 additions & 123 deletions src/cpu/o3/fetch.cc

Large diffs are not rendered by default.

148 changes: 73 additions & 75 deletions src/cpu/o3/fetch.hh
Original file line number Diff line number Diff line change
Expand Up @@ -514,11 +514,6 @@ class Fetch
/** Set the reasons of all fetch stalls. */
void setAllFetchStalls(StallReason stall);

/** Select the thread to fetch from.
* @return Thread ID to fetch from, or InvalidThreadID if none available
*/
ThreadID selectFetchThread();

/** Check decoupled frontend (FTQ) availability.
* @param tid Thread ID
* @return true if frontend is ready for fetch, false otherwise
Expand Down Expand Up @@ -614,7 +609,7 @@ class Fetch
std::unique_ptr<TraceFetch> traceFetch;

/** PC of each thread. */
std::unique_ptr<PCStateBase> pc[MaxThreads];
// std::unique_ptr<PCStateBase> pc[MaxThreads];

/** Macroop of each thread. */
StaticInstPtr macroop[MaxThreads];
Expand Down Expand Up @@ -670,72 +665,6 @@ class Fetch
/** Cache block size. */
unsigned int cacheBlkSize;

/**
* Fetch buffer structure to encapsulate instruction fetch data.
* Encapsulates buffer data, PC tracking, validity state, and size.
* Designed to prepare for 2fetch implementation with potential multi-stream support.
*/
struct FetchBuffer
{
/** Pointer to the fetch data buffer */
uint8_t *data;

/** PC of the first instruction loaded into the fetch buffer */
Addr startPC;

/** Whether the fetch buffer data is valid */
bool valid;

/** Size of the fetch buffer in bytes. Set by Fetch class during init. */
unsigned size;

/** Constructor initializes buffer with default size */
FetchBuffer() : data(nullptr), startPC(0), valid(false), size(0) {
}

/** Destructor is not needed as Fetch class manages memory */
~FetchBuffer() {
}

/** Reset buffer state */
void reset() {
valid = false;
startPC = 0;
// No need to clear data as it will be overwritten
}

/** Check if a PC is within the current buffer range */
bool contains(Addr pc) const {
return valid && (pc >= startPC) && (pc < startPC + size);
}

/** Get offset of PC within the buffer */
unsigned getOffset(Addr pc) const {
assert(contains(pc));
return pc - startPC;
}

/** Set buffer data and update metadata */
void setData(Addr pc, const uint8_t* src_data, unsigned bytes_copied) {
startPC = pc;
valid = true;
memcpy(data, src_data, bytes_copied);
}

/** Get end PC of the buffer */
Addr getEndPC() const {
return startPC + size;
}
};

/** Fetch buffer for each thread */
FetchBuffer fetchBuffer[MaxThreads];

/** The size of the fetch buffer in bytes. Default is 66 bytes,
* make sure we could decode tail 4bytes if it is in [62, 66)
*/
unsigned fetchBufferSize;

// Constants for misaligned fetch handling
static constexpr unsigned CACHE_LINE_SIZE_BYTES = 64;

Expand Down Expand Up @@ -908,8 +837,77 @@ class Fetch
}
};

/** Cache request for each thread, replacing multiple redundant state variables */
CacheRequest cacheReq[MaxThreads];
/** The size of the fetch buffer in bytes. Default is 66 bytes,
* make sure we could decode tail 4bytes if it is in [62, 66)
*/
unsigned fetchBufferSize;

/**
* Fetch buffer structure to encapsulate instruction fetch data.
* Encapsulates buffer data, PC tracking, validity state, and size.
* Designed to prepare for 2fetch implementation with potential multi-stream support.
*/
struct FetchBuffer
{
std::unique_ptr<PCStateBase> fetchpc;
CacheRequest cacheReq;

/** Pointer to the fetch data buffer */
uint8_t *data;

/** PC of the first instruction loaded into the fetch buffer */
Addr startPC;

/** Whether the fetch buffer data is valid */
bool valid;

/** Size of the fetch buffer in bytes. Set by Fetch class during init. */
unsigned size;

/** Constructor initializes buffer with default size */
FetchBuffer() : data(nullptr), startPC(0), valid(false), size(0) {
}

/** Destructor is not needed as Fetch class manages memory */
~FetchBuffer() {
}

/** Reset buffer state */
void reset() {
valid = false;
startPC = 0;
// No need to clear data as it will be overwritten
}

/** Check if a PC is within the current buffer range */
bool contains(Addr pc) const {
return valid && (pc >= startPC) && (pc < startPC + size);
}

/** Get offset of PC within the buffer */
unsigned getOffset(Addr pc) const {
assert(contains(pc));
return pc - startPC;
}

/** Set buffer data and update metadata */
void setData(Addr pc, const uint8_t* src_data, unsigned bytes_copied) {
startPC = pc;
valid = true;
memcpy(data, src_data, bytes_copied);
}

/** Get end PC of the buffer */
Addr getEndPC() const {
return startPC + size;
}
};

/** Fetch buffer for each thread */
FetchBuffer threads[MaxThreads];

// /** Cache request for each thread, replacing multiple redundant state variables */
// CacheRequest cacheReq[MaxThreads];

/** The size of the fetch queue in micro-ops */
unsigned fetchQueueSize;
Expand Down Expand Up @@ -951,7 +949,7 @@ class Fetch

// Decoupled+BTB-only: fetch consumes the supplying FSQ entry directly.
// If no head is available, fetch stalls (no extra "supply" state machine).
bool ftqEmpty() const { return !dbpbtb || !dbpbtb->ftqHasHead(); }
bool ftqEmpty(ThreadID tid) const { return !dbpbtb || !dbpbtb->ftqHasFetching(tid); }

// Number of dynamic instructions fetched within the current FTQ entry.
// Used to explicitly notify the BPU when an entry is consumed (Phase5 prep).
Expand Down
130 changes: 130 additions & 0 deletions src/cpu/o3/smt_sched.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
#ifndef __GEM5_O3_SMT_SCHED_HH__
#define __GEM5_O3_SMT_SCHED_HH__


#include <vector>

#include <boost/circular_buffer.hpp>

#include "base/types.hh"
#include "cpu/o3/limits.hh"

namespace gem5
{

namespace o3
{

class InstsCounter
{
// can be placed in ftq、rob、lsq、iq, etc. to count the number of instructions
uint64_t counter[MaxThreads];
public:
InstsCounter() {
for (int i = 0; i < MaxThreads; ++i) {
counter[i] = 0;
}
}

uint64_t getCounter(ThreadID tid) { return counter[tid]; }
void setCounter(ThreadID tid, uint64_t value) { counter[tid] = value; }
};

class SMTScheduler
{
protected:
int numThreads;
public:
SMTScheduler(int numThreads) : numThreads(numThreads) {}
virtual ThreadID getThread();
};


class ICountScheduler : public SMTScheduler
{
// count of inst based smt shceduler
protected:
InstsCounter* counter;

public:
ICountScheduler(int numThreads, InstsCounter* counter) : SMTScheduler(numThreads), counter(counter) {}

ThreadID getThread() override
{
// return the thread with the least number of instructions executed
ThreadID selectedTid = 0;
uint64_t minCount = counter->getCounter(0);
for (ThreadID tid = 1; tid < numThreads; ++tid) {
uint64_t count = counter->getCounter(tid);
if (count < minCount) {
minCount = count;
selectedTid = tid;
}
}
return selectedTid;
}
};

class DelayedICountScheduler : public ICountScheduler
{
// delayed count of inst based smt shceduler, which can be used to implement round-robin scheduling
protected:
boost::circular_buffer<ThreadID> timebuffer;

public:
DelayedICountScheduler(int numThreads, InstsCounter* counter, int delay) : ICountScheduler(numThreads, counter)
{
timebuffer.set_capacity(delay);
for (int i = 0; i < delay; ++i) {
timebuffer.push_back(i % numThreads);
}
}

ThreadID getThread() override
{
// return the thread with the least number of instructions executed among the threads in the timebuffer
ThreadID selectedTid = timebuffer.front();
timebuffer.pop_front();
timebuffer.push_back(ICountScheduler::getThread());
return selectedTid;
}
};


class MultiPrioritySched : public SMTScheduler
{
// multi priority based smt scheduler
private:
std::vector<InstsCounter*> counter;

public:
// priority: higest -> lowest
MultiPrioritySched(int numThreads, std::initializer_list<InstsCounter*> counters)
: SMTScheduler(numThreads), counter(counters) {}

ThreadID getThread() override
{
for (size_t i = 0; i < counter.size(); ++i) {
bool set = false;
ThreadID selectedTid = 0;
uint64_t minCount = counter[i]->getCounter(0);
for (ThreadID tid = 1; tid < numThreads; ++tid) {
uint64_t count = counter[i]->getCounter(tid);
if (count < minCount) {
minCount = count;
selectedTid = tid;
set = true;
}
}
if (set) {
return selectedTid;
}
}
return 0;
}
};



}}
#endif
16 changes: 8 additions & 8 deletions src/cpu/o3/trace/TraceFetch.cc
Original file line number Diff line number Diff line change
Expand Up @@ -332,10 +332,10 @@ TraceFetch::initTraceMode()
return false;
}

std::unique_ptr<PCStateBase> tracePC(fetch.pc[0]->clone());
std::unique_ptr<PCStateBase> tracePC(fetch.threads[0].fetchpc->clone());
auto& riscv_pc = tracePC->as<RiscvISA::PCState>();
riscv_pc.set(firstInstr.getPC());
set(fetch.pc[0], *tracePC);
set(fetch.threads[0].fetchpc, *tracePC);
fetch.cpu->pcState(*tracePC, 0);

auto* tc0 = fetch.cpu->getContext(0);
Expand All @@ -358,12 +358,12 @@ TraceFetch::initTraceMode()
if (tc0) {
DPRINTF(Fetch,
"Trace mode: fetch PC = 0x%llx, cpu PC = 0x%llx, TC PC = 0x%llx\n",
fetch.pc[0]->instAddr(), fetch.cpu->pcState(0).instAddr(),
fetch.threads[0].fetchpc->instAddr(), fetch.cpu->pcState(0).instAddr(),
tc0->pcState().instAddr());
} else {
DPRINTF(Fetch,
"Trace mode: fetch PC = 0x%llx, cpu PC = 0x%llx (no TC)\n",
fetch.pc[0]->instAddr(), fetch.cpu->pcState(0).instAddr());
fetch.threads[0].fetchpc->instAddr(), fetch.cpu->pcState(0).instAddr());
}

if (fetch.branchPred) {
Expand Down Expand Up @@ -487,8 +487,8 @@ TraceFetch::supplyTraceToDecoder(ThreadID tid, const PCStateBase &this_pc,
auto *dec_ptr = fetch.decoder[tid];
memcpy(dec_ptr->moreBytesPtr(), &machInst, sizeof(machInst));
fetch.decoder[tid]->moreBytes(this_pc, instrPC);
fetch.fetchBuffer[tid].startPC = instrPC;
fetch.fetchBuffer[tid].valid = true;
fetch.threads[tid].startPC = instrPC;
fetch.threads[tid].valid = true;
DPRINTF(Fetch, "[tid:%i] Trace on-demand: %s at PC=0x%llx\n",
tid, tag, (unsigned long long)instrPC);
}
Expand Down Expand Up @@ -536,8 +536,8 @@ TraceFetch::chooseWrongPathNopSize(ThreadID tid, Addr pc)
bool taken = false;
if (fetch.isBTBPred()) {
assert(fetch.dbpbtb);
if (fetch.dbpbtb->ftqHasHead()) {
const auto &stream = fetch.dbpbtb->ftqHead();
if (fetch.dbpbtb->ftqHasFetching(tid)) {
const auto &stream = fetch.dbpbtb->ftqFetchingTarget(tid);
block_end = stream.predEndPC;
taken_pc = stream.predBranchInfo.pc;
taken = stream.predTaken;
Expand Down
1 change: 1 addition & 0 deletions src/cpu/pred/SConscript
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ Source('btb/btb_mgsc.cc')
Source('btb/folded_hist.cc')
Source('btb/ras.cc')
Source('btb/btb_ubtb.cc')
Source('btb/ftq.cc')
# Source('btb/uras.cc')
Source('general_arch_db.cc')
DebugFlag('FreeList')
Expand Down
1 change: 1 addition & 0 deletions src/cpu/pred/btb/common.hh
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,7 @@ struct FetchTarget
*/
struct FullBTBPrediction
{
ThreadID tid;
Addr bbStart;
std::vector<BTBEntry> btbEntries; // for BTB, only assigned when hit, sorted by inst order
// for conditional branch predictors, mapped with lowest bits of branches
Expand Down
Loading
Loading