Skip to content

Commit 4f70d27

Browse files
authored
cpu-o3: fix dispatch to align (#674)
Change-Id: If0a65e4ca0c6f6fead995e88977ca79e8cb7f97c
1 parent e5e5261 commit 4f70d27

File tree

2 files changed

+70
-27
lines changed

2 files changed

+70
-27
lines changed

src/cpu/o3/issue_queue.cc

Lines changed: 61 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@
3030
#define POPINST(x) \
3131
do { \
3232
assert(instNum != 0); \
33-
assert(opNum[x->opClass()] != 0); \
34-
opNum[x->opClass()]--; \
33+
assert((*instNumClassify[x->opClass()]) != 0); \
34+
(*instNumClassify[x->opClass()])--; \
3535
instNum--; \
3636
selector->deallocate(x); \
3737
} while (0)
@@ -68,7 +68,7 @@ IssuePort::IssuePort(const IssuePortParams& params) : SimObject(params), rp(para
6868
{
6969
for (auto it0 : params.fu) {
7070
for (auto it1 : it0->opDescList) {
71-
mask.set(it1->opClass);
71+
opbits.set(it1->opClass);
7272
}
7373
}
7474
}
@@ -200,7 +200,6 @@ IssueQue::IssueQue(const IssueQueParams& params)
200200
panic("%s: outports > 8 is not supported\n", iqname);
201201
}
202202

203-
opNum.resize(enums::Num_OpClass, 0);
204203
portBusy.resize(outports, 0);
205204

206205
intRdRfTPI.resize(outports);
@@ -210,9 +209,10 @@ IssueQue::IssueQue(const IssueQueParams& params)
210209
readyQs.resize(outports, nullptr);
211210

212211
readyQclassify.resize(Num_OpClasses, nullptr);
212+
instNumClassify.resize(enums::Num_OpClass, nullptr);
213213
opPipelined.resize(Num_OpClasses, false);
214214

215-
std::unordered_map<std::bitset<Num_OpClasses>, ReadyQue*> readyQmap;
215+
std::unordered_map<std::bitset<Num_OpClasses>, std::pair<ReadyQue*, uint8_t*>> readyQmap;
216216
for (int i = 0; i < outports; i++) {
217217
auto oport = params.oports[i];
218218

@@ -257,28 +257,33 @@ IssueQue::IssueQue(const IssueQueParams& params)
257257

258258
// safety check for outports
259259
for (int j = i + 1; j < outports; j++) {
260-
if ((oport->mask != params.oports[j]->mask) && (oport->mask & params.oports[j]->mask).any()) {
260+
if ((oport->opbits != params.oports[j]->opbits) && (oport->opbits & params.oports[j]->opbits).any()) {
261261
panic("%s: Found the same opClass in different FU, portid: %d and %d\n", iqname, i, j);
262262
}
263263
}
264264
fuDescs.insert(fuDescs.begin(), oport->fu.begin(), oport->fu.end());
265+
portFuDescs.push_back(oport->opbits);
265266

266-
auto it = readyQmap.find(oport->mask);
267-
ReadyQue* t = nullptr;
267+
auto it = readyQmap.find(oport->opbits);
268+
ReadyQue* readyQ = nullptr;
269+
uint8_t* counter = nullptr;
268270
if (it == readyQmap.end()) {
269271
// create a new ReadyQue
270-
t = new ReadyQue;
271-
readyQmap[oport->mask] = t;
272+
readyQ = new ReadyQue;
273+
counter = new uint8_t(0);
274+
readyQmap[oport->opbits] = std::make_pair(readyQ, counter);
272275
} else {
273276
// use the existing one
274-
t = it->second;
277+
readyQ = it->second.first;
278+
counter = it->second.second;
275279
}
276-
readyQs[i] = t;
280+
readyQs[i] = readyQ;
277281

278282
bool storePipeAcc = false, loadPipeAcc = false;
279283
for (auto fu : oport->fu) {
280284
for (auto op : fu->opDescList) {
281-
readyQclassify[op->opClass] = t;
285+
readyQclassify[op->opClass] = readyQ;
286+
instNumClassify[op->opClass] = counter;
282287
opPipelined[op->opClass] = op->pipelined;
283288

284289
if (op->opClass >= MemReadOp && op->opClass <= VectorWholeRegisterLoadOp) {
@@ -556,8 +561,9 @@ IssueQue::selectInst()
556561
continue;
557562
}
558563

559-
if (!(portBusy[pi] &
560-
(scheduler->getCorrectedOpLat(inst) > 63 ? -1 : (1llu << scheduler->getCorrectedOpLat(inst))))) {
564+
int lat = scheduler->getCorrectedOpLat(inst);
565+
uint64_t busy_bit = (lat > 63 ? -1 : (1llu << lat));
566+
if (!(portBusy[pi] & busy_bit)) {
561567
DPRINTF(Schedule, "[sn %ld] was selected\n", inst->seqNum);
562568

563569
// get regfile write port
@@ -680,7 +686,7 @@ void
680686
IssueQue::insert(const DynInstPtr& inst)
681687
{
682688
assert(instNum < iqsize);
683-
opNum[inst->opClass()]++;
689+
(*instNumClassify[inst->opClass()])++;
684690
instNum++;
685691
instNumInsert++;
686692

@@ -828,8 +834,8 @@ bool
828834
Scheduler::disp_policy::operator()(IssueQue* a, IssueQue* b) const
829835
{
830836
// initNum smaller first
831-
int p0 = a->opNum[disp_op];
832-
int p1 = b->opNum[disp_op];
837+
int p0 = *a->instNumClassify[disp_op];
838+
int p1 = *b->instNumClassify[disp_op];
833839
return p0 < p1;
834840
}
835841

@@ -891,9 +897,42 @@ Scheduler::Scheduler(const SchedulerParams& params)
891897
for (int i = 0; i < intRegfileBanks; i++) {
892898
rdRfPortOccupancy[i].resize(maxRdTypePortId, {nullptr, 0});
893899
}
894-
895900
wrRfPortOccupancy.resize(maxWrTypePortId, {nullptr, 0, 0});
896901

902+
903+
// dispatch distance counter allocate
904+
dispOpdist.resize(Num_OpClasses, nullptr);
905+
totalDispCounter.reserve(Num_OpClasses);
906+
std::vector<std::vector<OpClass>> reuse_table;
907+
for (int i = 0; i < Num_OpClasses; i++) {
908+
bool counter_reuse = false;
909+
int reuse_op = 0;
910+
for (int j = 0; j < Num_OpClasses; j++) {
911+
if (dispTable[i] == dispTable[j]) {
912+
counter_reuse = true;
913+
reuse_op = j; // op "i" can reuse the "j" counter
914+
break;
915+
}
916+
}
917+
918+
if (counter_reuse && dispOpdist[reuse_op]) {
919+
dispOpdist[i] = dispOpdist[reuse_op];
920+
} else {
921+
totalDispCounter.push_back(0);
922+
dispOpdist[i] = &totalDispCounter.back();
923+
reuse_table.push_back(std::vector<OpClass>());
924+
}
925+
reuse_table.back().push_back((OpClass)i);
926+
}
927+
928+
for (auto it : reuse_table) {
929+
std::cout << "Dispatch Grouping: ";
930+
for (auto op : it) {
931+
std::cout << enums::OpClassStrings[op] << " ";
932+
}
933+
std::cout << std::endl;
934+
}
935+
897936
// Set TX dynamic read port optimization for all IssueQues
898937
setMainRdpOpt(params.enableMainRdpOpt);
899938

@@ -1030,8 +1069,7 @@ Scheduler::lookahead(std::deque<DynInstPtr>& insts)
10301069
if (old_disp) {
10311070
// donothing
10321071
} else {
1033-
uint8_t disp_op_num[Num_OpClasses];
1034-
std::memset(disp_op_num, 0, Num_OpClasses);
1072+
std::fill(totalDispCounter.begin(), totalDispCounter.end(), 0);
10351073
int i = 0;
10361074
for (auto& it : insts) {
10371075
auto& iqs = dispTable[it->opClass()];
@@ -1041,8 +1079,8 @@ Scheduler::lookahead(std::deque<DynInstPtr>& insts)
10411079
std::sort(iqs.begin(), iqs.end(), disp_policy(StoreDataOp));
10421080
}
10431081

1044-
dispSeqVec[i] = disp_op_num[it->opClass()] % dispTable[it->opClass()].size();
1045-
disp_op_num[it->opClass()]++;
1082+
dispSeqVec[i] = (*dispOpdist[it->opClass()]) % iqs.size();
1083+
(*dispOpdist[it->opClass()])++;
10461084
i++;
10471085
}
10481086
}

src/cpu/o3/issue_queue.hh

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ class IssuePort : public SimObject
6767
public:
6868
std::vector<int> rp; // [typeid, portid]
6969
std::vector<FUDesc*> fu;
70-
std::bitset<Num_OpClasses> mask;
70+
std::bitset<Num_OpClasses> opbits;
7171
IssuePort(const IssuePortParams& params);
7272
};
7373

@@ -112,6 +112,7 @@ class IssueQue : public SimObject
112112
const int replayQsize = 32;
113113
const int scheduleToExecDelay;
114114
const std::string iqname;
115+
std::vector<std::bitset<Num_OpClasses>> portFuDescs;
115116
std::vector<FUDesc*> fuDescs;
116117
std::vector<bool> opPipelined;
117118
int IQID = -1;
@@ -139,7 +140,8 @@ class IssueQue : public SimObject
139140

140141
std::list<DynInstPtr> instList;
141142
uint64_t instNumInsert = 0;
142-
std::vector<uint8_t> opNum;
143+
144+
std::vector<uint8_t*> instNumClassify;
143145
uint64_t instNum = 0;
144146

145147
// issueport : regfileport : priority
@@ -279,15 +281,18 @@ class Scheduler : public SimObject
279281
disp_policy(OpClass op) : disp_op(op) {}
280282
bool operator()(IssueQue* a, IssueQue* b) const;
281283
};
282-
using DispPolicy = std::vector<IssueQue*>;
284+
using IQGroup = std::vector<IssueQue*>;
283285

284286
std::vector<int> opExecTimeTable;
285287
std::vector<bool> opPipelined;
286-
std::vector<DispPolicy> dispTable;
288+
std::vector<IQGroup> dispTable;
287289
std::vector<IssueQue*> issueQues;
288290
std::vector<std::vector<IssueQue*>> wakeMatrix;
289291
uint32_t combinedFus;
290292

293+
std::vector<uint8_t> totalDispCounter;
294+
std::vector<uint8_t*> dispOpdist;
295+
291296
std::vector<DynInstPtr> instsToFu;
292297

293298
std::vector<bool> earlyScoreboard;

0 commit comments

Comments
 (0)