Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 61 additions & 23 deletions src/cpu/o3/issue_queue.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@
#define POPINST(x) \
do { \
assert(instNum != 0); \
assert(opNum[x->opClass()] != 0); \
opNum[x->opClass()]--; \
assert((*instNumClassify[x->opClass()]) != 0); \
(*instNumClassify[x->opClass()])--; \
instNum--; \
selector->deallocate(x); \
} while (0)
Expand Down Expand Up @@ -68,7 +68,7 @@ IssuePort::IssuePort(const IssuePortParams& params) : SimObject(params), rp(para
{
for (auto it0 : params.fu) {
for (auto it1 : it0->opDescList) {
mask.set(it1->opClass);
opbits.set(it1->opClass);
}
}
}
Expand Down Expand Up @@ -200,7 +200,6 @@ IssueQue::IssueQue(const IssueQueParams& params)
panic("%s: outports > 8 is not supported\n", iqname);
}

opNum.resize(enums::Num_OpClass, 0);
portBusy.resize(outports, 0);

intRdRfTPI.resize(outports);
Expand All @@ -210,9 +209,10 @@ IssueQue::IssueQue(const IssueQueParams& params)
readyQs.resize(outports, nullptr);

readyQclassify.resize(Num_OpClasses, nullptr);
instNumClassify.resize(enums::Num_OpClass, nullptr);
opPipelined.resize(Num_OpClasses, false);

std::unordered_map<std::bitset<Num_OpClasses>, ReadyQue*> readyQmap;
std::unordered_map<std::bitset<Num_OpClasses>, std::pair<ReadyQue*, uint8_t*>> readyQmap;
for (int i = 0; i < outports; i++) {
auto oport = params.oports[i];

Expand Down Expand Up @@ -257,28 +257,33 @@ IssueQue::IssueQue(const IssueQueParams& params)

// safety check for outports
for (int j = i + 1; j < outports; j++) {
if ((oport->mask != params.oports[j]->mask) && (oport->mask & params.oports[j]->mask).any()) {
if ((oport->opbits != params.oports[j]->opbits) && (oport->opbits & params.oports[j]->opbits).any()) {
panic("%s: Found the same opClass in different FU, portid: %d and %d\n", iqname, i, j);
}
}
fuDescs.insert(fuDescs.begin(), oport->fu.begin(), oport->fu.end());
portFuDescs.push_back(oport->opbits);

auto it = readyQmap.find(oport->mask);
ReadyQue* t = nullptr;
auto it = readyQmap.find(oport->opbits);
ReadyQue* readyQ = nullptr;
uint8_t* counter = nullptr;
if (it == readyQmap.end()) {
// create a new ReadyQue
t = new ReadyQue;
readyQmap[oport->mask] = t;
readyQ = new ReadyQue;
counter = new uint8_t(0);
readyQmap[oport->opbits] = std::make_pair(readyQ, counter);
} else {
// use the existing one
t = it->second;
readyQ = it->second.first;
counter = it->second.second;
}
readyQs[i] = t;
readyQs[i] = readyQ;

bool storePipeAcc = false, loadPipeAcc = false;
for (auto fu : oport->fu) {
for (auto op : fu->opDescList) {
readyQclassify[op->opClass] = t;
readyQclassify[op->opClass] = readyQ;
instNumClassify[op->opClass] = counter;
opPipelined[op->opClass] = op->pipelined;

if (op->opClass >= MemReadOp && op->opClass <= VectorWholeRegisterLoadOp) {
Expand Down Expand Up @@ -556,8 +561,9 @@ IssueQue::selectInst()
continue;
}

if (!(portBusy[pi] &
(scheduler->getCorrectedOpLat(inst) > 63 ? -1 : (1llu << scheduler->getCorrectedOpLat(inst))))) {
int lat = scheduler->getCorrectedOpLat(inst);
uint64_t busy_bit = (lat > 63 ? -1 : (1llu << lat));
if (!(portBusy[pi] & busy_bit)) {
DPRINTF(Schedule, "[sn %ld] was selected\n", inst->seqNum);

// get regfile write port
Expand Down Expand Up @@ -680,7 +686,7 @@ void
IssueQue::insert(const DynInstPtr& inst)
{
assert(instNum < iqsize);
opNum[inst->opClass()]++;
(*instNumClassify[inst->opClass()])++;
instNum++;
instNumInsert++;

Expand Down Expand Up @@ -828,8 +834,8 @@ bool
Scheduler::disp_policy::operator()(IssueQue* a, IssueQue* b) const
{
// initNum smaller first
int p0 = a->opNum[disp_op];
int p1 = b->opNum[disp_op];
int p0 = *a->instNumClassify[disp_op];
int p1 = *b->instNumClassify[disp_op];
return p0 < p1;
}

Expand Down Expand Up @@ -891,9 +897,42 @@ Scheduler::Scheduler(const SchedulerParams& params)
for (int i = 0; i < intRegfileBanks; i++) {
rdRfPortOccupancy[i].resize(maxRdTypePortId, {nullptr, 0});
}

wrRfPortOccupancy.resize(maxWrTypePortId, {nullptr, 0, 0});


// dispatch distance counter allocate
dispOpdist.resize(Num_OpClasses, nullptr);
totalDispCounter.reserve(Num_OpClasses);
std::vector<std::vector<OpClass>> reuse_table;
for (int i = 0; i < Num_OpClasses; i++) {
bool counter_reuse = false;
int reuse_op = 0;
for (int j = 0; j < Num_OpClasses; j++) {
if (dispTable[i] == dispTable[j]) {
counter_reuse = true;
reuse_op = j; // op "i" can reuse the "j" counter
break;
}
}

if (counter_reuse && dispOpdist[reuse_op]) {
dispOpdist[i] = dispOpdist[reuse_op];
} else {
totalDispCounter.push_back(0);
dispOpdist[i] = &totalDispCounter.back();
reuse_table.push_back(std::vector<OpClass>());
}
reuse_table.back().push_back((OpClass)i);
}

for (auto it : reuse_table) {
std::cout << "Dispatch Grouping: ";
for (auto op : it) {
std::cout << enums::OpClassStrings[op] << " ";
}
std::cout << std::endl;
}

// Set TX dynamic read port optimization for all IssueQues
setMainRdpOpt(params.enableMainRdpOpt);

Expand Down Expand Up @@ -1030,8 +1069,7 @@ Scheduler::lookahead(std::deque<DynInstPtr>& insts)
if (old_disp) {
// donothing
} else {
uint8_t disp_op_num[Num_OpClasses];
std::memset(disp_op_num, 0, Num_OpClasses);
std::fill(totalDispCounter.begin(), totalDispCounter.end(), 0);
int i = 0;
for (auto& it : insts) {
auto& iqs = dispTable[it->opClass()];
Expand All @@ -1041,8 +1079,8 @@ Scheduler::lookahead(std::deque<DynInstPtr>& insts)
std::sort(iqs.begin(), iqs.end(), disp_policy(StoreDataOp));
}

dispSeqVec[i] = disp_op_num[it->opClass()] % dispTable[it->opClass()].size();
disp_op_num[it->opClass()]++;
dispSeqVec[i] = (*dispOpdist[it->opClass()]) % iqs.size();
(*dispOpdist[it->opClass()])++;
i++;
}
}
Expand Down
13 changes: 9 additions & 4 deletions src/cpu/o3/issue_queue.hh
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ class IssuePort : public SimObject
public:
std::vector<int> rp; // [typeid, portid]
std::vector<FUDesc*> fu;
std::bitset<Num_OpClasses> mask;
std::bitset<Num_OpClasses> opbits;
IssuePort(const IssuePortParams& params);
};

Expand Down Expand Up @@ -112,6 +112,7 @@ class IssueQue : public SimObject
const int replayQsize = 32;
const int scheduleToExecDelay;
const std::string iqname;
std::vector<std::bitset<Num_OpClasses>> portFuDescs;
std::vector<FUDesc*> fuDescs;
std::vector<bool> opPipelined;
int IQID = -1;
Expand Down Expand Up @@ -139,7 +140,8 @@ class IssueQue : public SimObject

std::list<DynInstPtr> instList;
uint64_t instNumInsert = 0;
std::vector<uint8_t> opNum;

std::vector<uint8_t*> instNumClassify;
uint64_t instNum = 0;

// issueport : regfileport : priority
Expand Down Expand Up @@ -279,15 +281,18 @@ class Scheduler : public SimObject
disp_policy(OpClass op) : disp_op(op) {}
bool operator()(IssueQue* a, IssueQue* b) const;
};
using DispPolicy = std::vector<IssueQue*>;
using IQGroup = std::vector<IssueQue*>;

std::vector<int> opExecTimeTable;
std::vector<bool> opPipelined;
std::vector<DispPolicy> dispTable;
std::vector<IQGroup> dispTable;
std::vector<IssueQue*> issueQues;
std::vector<std::vector<IssueQue*>> wakeMatrix;
uint32_t combinedFus;

std::vector<uint8_t> totalDispCounter;
std::vector<uint8_t*> dispOpdist;

std::vector<DynInstPtr> instsToFu;

std::vector<bool> earlyScoreboard;
Expand Down