3030#define POPINST (x ) \
3131 do { \
3232 assert (instNum != 0 ); \
33- assert (opNum [x->opClass ()] != 0 ); \
34- opNum [x->opClass ()]--; \
33+ assert ((*instNumClassify [x->opClass ()]) != 0 ); \
34+ (*instNumClassify [x->opClass ()]) --; \
3535 instNum--; \
3636 selector->deallocate (x); \
3737 } while (0 )
@@ -68,7 +68,7 @@ IssuePort::IssuePort(const IssuePortParams& params) : SimObject(params), rp(para
6868{
6969 for (auto it0 : params.fu ) {
7070 for (auto it1 : it0->opDescList ) {
71- mask .set (it1->opClass );
71+ opbits .set (it1->opClass );
7272 }
7373 }
7474}
@@ -200,7 +200,6 @@ IssueQue::IssueQue(const IssueQueParams& params)
200200 panic (" %s: outports > 8 is not supported\n " , iqname);
201201 }
202202
203- opNum.resize (enums::Num_OpClass, 0 );
204203 portBusy.resize (outports, 0 );
205204
206205 intRdRfTPI.resize (outports);
@@ -210,9 +209,10 @@ IssueQue::IssueQue(const IssueQueParams& params)
210209 readyQs.resize (outports, nullptr );
211210
212211 readyQclassify.resize (Num_OpClasses, nullptr );
212+ instNumClassify.resize (enums::Num_OpClass, nullptr );
213213 opPipelined.resize (Num_OpClasses, false );
214214
215- std::unordered_map<std::bitset<Num_OpClasses>, ReadyQue*> readyQmap;
215+ std::unordered_map<std::bitset<Num_OpClasses>, std::pair< ReadyQue*, uint8_t *> > readyQmap;
216216 for (int i = 0 ; i < outports; i++) {
217217 auto oport = params.oports [i];
218218
@@ -257,28 +257,33 @@ IssueQue::IssueQue(const IssueQueParams& params)
257257
258258 // safety check for outports
259259 for (int j = i + 1 ; j < outports; j++) {
260- if ((oport->mask != params.oports [j]->mask ) && (oport->mask & params.oports [j]->mask ).any ()) {
260+ if ((oport->opbits != params.oports [j]->opbits ) && (oport->opbits & params.oports [j]->opbits ).any ()) {
261261 panic (" %s: Found the same opClass in different FU, portid: %d and %d\n " , iqname, i, j);
262262 }
263263 }
264264 fuDescs.insert (fuDescs.begin (), oport->fu .begin (), oport->fu .end ());
265+ portFuDescs.push_back (oport->opbits );
265266
266- auto it = readyQmap.find (oport->mask );
267- ReadyQue* t = nullptr ;
267+ auto it = readyQmap.find (oport->opbits );
268+ ReadyQue* readyQ = nullptr ;
269+ uint8_t * counter = nullptr ;
268270 if (it == readyQmap.end ()) {
269271 // create a new ReadyQue
270- t = new ReadyQue;
271- readyQmap[oport->mask ] = t;
272+ readyQ = new ReadyQue;
273+ counter = new uint8_t (0 );
274+ readyQmap[oport->opbits ] = std::make_pair (readyQ, counter);
272275 } else {
273276 // use the existing one
274- t = it->second ;
277+ readyQ = it->second .first ;
278+ counter = it->second .second ;
275279 }
276- readyQs[i] = t ;
280+ readyQs[i] = readyQ ;
277281
278282 bool storePipeAcc = false , loadPipeAcc = false ;
279283 for (auto fu : oport->fu ) {
280284 for (auto op : fu->opDescList ) {
281- readyQclassify[op->opClass ] = t;
285+ readyQclassify[op->opClass ] = readyQ;
286+ instNumClassify[op->opClass ] = counter;
282287 opPipelined[op->opClass ] = op->pipelined ;
283288
284289 if (op->opClass >= MemReadOp && op->opClass <= VectorWholeRegisterLoadOp) {
@@ -556,8 +561,9 @@ IssueQue::selectInst()
556561 continue ;
557562 }
558563
559- if (!(portBusy[pi] &
560- (scheduler->getCorrectedOpLat (inst) > 63 ? -1 : (1llu << scheduler->getCorrectedOpLat (inst))))) {
564+ int lat = scheduler->getCorrectedOpLat (inst);
565+ uint64_t busy_bit = (lat > 63 ? -1 : (1llu << lat));
566+ if (!(portBusy[pi] & busy_bit)) {
561567 DPRINTF (Schedule, " [sn %ld] was selected\n " , inst->seqNum );
562568
563569 // get regfile write port
680686IssueQue::insert (const DynInstPtr& inst)
681687{
682688 assert (instNum < iqsize);
683- opNum [inst->opClass ()]++;
689+ (*instNumClassify [inst->opClass ()]) ++;
684690 instNum++;
685691 instNumInsert++;
686692
828834Scheduler::disp_policy::operator ()(IssueQue* a, IssueQue* b) const
829835{
830836 // initNum smaller first
831- int p0 = a->opNum [disp_op];
832- int p1 = b->opNum [disp_op];
837+ int p0 = * a->instNumClassify [disp_op];
838+ int p1 = * b->instNumClassify [disp_op];
833839 return p0 < p1;
834840}
835841
@@ -891,9 +897,42 @@ Scheduler::Scheduler(const SchedulerParams& params)
891897 for (int i = 0 ; i < intRegfileBanks; i++) {
892898 rdRfPortOccupancy[i].resize (maxRdTypePortId, {nullptr , 0 });
893899 }
894-
895900 wrRfPortOccupancy.resize (maxWrTypePortId, {nullptr , 0 , 0 });
896901
902+
903+ // dispatch distance counter allocate
904+ dispOpdist.resize (Num_OpClasses, nullptr );
905+ totalDispCounter.reserve (Num_OpClasses);
906+ std::vector<std::vector<OpClass>> reuse_table;
907+ for (int i = 0 ; i < Num_OpClasses; i++) {
908+ bool counter_reuse = false ;
909+ int reuse_op = 0 ;
910+ for (int j = 0 ; j < Num_OpClasses; j++) {
911+ if (dispTable[i] == dispTable[j]) {
912+ counter_reuse = true ;
913+ reuse_op = j; // op "i" can reuse the "j" counter
914+ break ;
915+ }
916+ }
917+
918+ if (counter_reuse && dispOpdist[reuse_op]) {
919+ dispOpdist[i] = dispOpdist[reuse_op];
920+ } else {
921+ totalDispCounter.push_back (0 );
922+ dispOpdist[i] = &totalDispCounter.back ();
923+ reuse_table.push_back (std::vector<OpClass>());
924+ }
925+ reuse_table.back ().push_back ((OpClass)i);
926+ }
927+
928+ for (auto it : reuse_table) {
929+ std::cout << " Dispatch Grouping: " ;
930+ for (auto op : it) {
931+ std::cout << enums::OpClassStrings[op] << " " ;
932+ }
933+ std::cout << std::endl;
934+ }
935+
897936 // Set TX dynamic read port optimization for all IssueQues
898937 setMainRdpOpt (params.enableMainRdpOpt );
899938
@@ -1030,8 +1069,7 @@ Scheduler::lookahead(std::deque<DynInstPtr>& insts)
10301069 if (old_disp) {
10311070 // donothing
10321071 } else {
1033- uint8_t disp_op_num[Num_OpClasses];
1034- std::memset (disp_op_num, 0 , Num_OpClasses);
1072+ std::fill (totalDispCounter.begin (), totalDispCounter.end (), 0 );
10351073 int i = 0 ;
10361074 for (auto & it : insts) {
10371075 auto & iqs = dispTable[it->opClass ()];
@@ -1041,8 +1079,8 @@ Scheduler::lookahead(std::deque<DynInstPtr>& insts)
10411079 std::sort (iqs.begin (), iqs.end (), disp_policy (StoreDataOp));
10421080 }
10431081
1044- dispSeqVec[i] = disp_op_num [it->opClass ()] % dispTable[it-> opClass ()] .size ();
1045- disp_op_num [it->opClass ()]++;
1082+ dispSeqVec[i] = (*dispOpdist [it->opClass ()]) % iqs .size ();
1083+ (*dispOpdist [it->opClass ()]) ++;
10461084 i++;
10471085 }
10481086 }
0 commit comments