Skip to content

Commit e0a2739

Browse files
authored
mem: Fix a RAR/RAWQueue bug. (#722)
The current load/storeCompletedIdx ignores certain situations, which may cause some instructions before Idx to not have been committed. Change-Id: I7f79ad2ea0bc305f9d389e4372b45a453bf59278
1 parent d5e0972 commit e0a2739

File tree

1 file changed

+75
-44
lines changed

1 file changed

+75
-44
lines changed

src/cpu/o3/lsq_unit.cc

Lines changed: 75 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -584,8 +584,10 @@ LSQUnit::LSQUnit(uint32_t lqEntries, uint32_t sqEntries, uint32_t sbufferEntries
584584
lsqID(-1),
585585
storeQueue(sqEntries),
586586
loadQueue(lqEntries),
587-
loadCompletedIdx(loadQueue.head()),
588-
storeCompletedIdx(storeQueue.head()),
587+
// Use head-1 as a sentinel: no completed entry yet; advance must verify head.
588+
loadCompletedIdx(loadQueue.head() - 1),
589+
// Use head-1 as a sentinel: no completed entry yet; advance must verify head.
590+
storeCompletedIdx(storeQueue.head() - 1),
589591
loadPipe(ldPipeStages - 1, 0),
590592
storePipe(stPipeStages - 1, 0),
591593
storesToWB(0),
@@ -705,9 +707,10 @@ LSQUnit::resetState()
705707

706708
storeWBIt = storeQueue.begin();
707709

708-
// Reset completed iterators
709-
loadCompletedIdx = loadQueue.head();
710-
storeCompletedIdx = storeQueue.head();
710+
// Reset completed indices to head-1 sentinel: no completed entry yet.
711+
// This forces updateCompletedIdx to verify head before advancing.
712+
loadCompletedIdx = loadQueue.head() - 1;
713+
storeCompletedIdx = storeQueue.head() - 1;
711714

712715
retryPkt = NULL;
713716
memDepViolator = NULL;
@@ -2355,16 +2358,37 @@ LSQUnit::squash(const InstSeqNum &squashed_num)
23552358
++stats.squashedLoads;
23562359
}
23572360

2358-
auto loadCompletedIt = loadQueue.getIterator(loadCompletedIdx);
2359-
if (loadCompletedIt->valid() && loadCompletedIt->instruction() &&
2360-
loadCompletedIt->instruction()->seqNum > squashed_num) {
2361-
for (auto it = loadQueue.end(); it != loadQueue.begin(); it--) {
2362-
if (it->instruction()->seqNum < squashed_num) {
2363-
loadCompletedIdx = it.idx();
2364-
break;
2361+
// Squash only removes entries with seqNum > squashed_num. completedIdx
2362+
// is rewound only when it points past squashed_num or is invalid.
2363+
auto rewindCompletedIdx = [&](auto &queue, size_t &completed_idx) {
2364+
if (queue.empty()) {
2365+
completed_idx = queue.head() - 1;
2366+
return;
2367+
}
2368+
2369+
if (!queue.isValidIdx(completed_idx)) {
2370+
completed_idx = queue.head() - 1;
2371+
return;
2372+
}
2373+
2374+
auto cur_it = queue.getIterator(completed_idx);
2375+
if (cur_it->valid() && cur_it->instruction() &&
2376+
cur_it->instruction()->seqNum <= squashed_num) {
2377+
return;
2378+
}
2379+
2380+
for (auto it = queue.end(); it != queue.begin();) {
2381+
--it;
2382+
if (it->valid() && it->instruction() &&
2383+
it->instruction()->seqNum <= squashed_num) {
2384+
completed_idx = it.idx();
2385+
return;
23652386
}
23662387
}
2367-
}
2388+
completed_idx = queue.head() - 1;
2389+
};
2390+
2391+
rewindCompletedIdx(loadQueue, loadCompletedIdx);
23682392

23692393
for (auto it = inflightLoads.begin(); it != inflightLoads.end();) {
23702394
if ((*it)->instruction()->isSquashed()) {
@@ -2446,16 +2470,8 @@ LSQUnit::squash(const InstSeqNum &squashed_num)
24462470
++stats.squashedStores;
24472471
}
24482472

2449-
auto storeCompletedIt = storeQueue.getIterator(storeCompletedIdx);
2450-
if (storeCompletedIt->valid() && storeCompletedIt->instruction() &&
2451-
storeCompletedIt->instruction()->seqNum > squashed_num) {
2452-
for (auto it = storeQueue.end(); it != storeQueue.begin(); it--) {
2453-
if (it->instruction()->seqNum < squashed_num) {
2454-
storeCompletedIdx = it.idx();
2455-
break;
2456-
}
2457-
}
2458-
}
2473+
// Store side follows the same rewind rule.
2474+
rewindCompletedIdx(storeQueue, storeCompletedIdx);
24592475

24602476
auto RARIt = RARQueue.begin();
24612477
while (RARIt != RARQueue.end()) {
@@ -2870,33 +2886,48 @@ LSQUnit::checkStaleTranslations() const
28702886
void
28712887
LSQUnit::updateCompletedIdx()
28722888
{
2873-
// Ensure completed indices are within valid range
2874-
if (loadCompletedIdx < loadQueue.head() - 1 || loadCompletedIdx > loadQueue.tail())
2875-
loadCompletedIdx = loadQueue.head();
2876-
if (storeCompletedIdx < storeQueue.head() - 1 || storeCompletedIdx > storeQueue.tail())
2877-
storeCompletedIdx = storeQueue.head();
2889+
// Keep completed indices within [head-1, tail]; head-1 means no completed entry.
2890+
if (loadQueue.empty()) {
2891+
loadCompletedIdx = loadQueue.head() - 1;
2892+
} else if (loadCompletedIdx < loadQueue.head() - 1 ||
2893+
loadCompletedIdx > loadQueue.tail()) {
2894+
loadCompletedIdx = loadQueue.head() - 1;
2895+
}
2896+
if (storeQueue.empty()) {
2897+
storeCompletedIdx = storeQueue.head() - 1;
2898+
} else if (storeCompletedIdx < storeQueue.head() - 1 ||
2899+
storeCompletedIdx > storeQueue.tail()) {
2900+
storeCompletedIdx = storeQueue.head() - 1;
2901+
}
28782902

28792903
// Advance load completed index (controls RAR queue dequeue rate)
2880-
for (unsigned i = 0; i < loadCompletionWidth; i++) {
2881-
const int currentIdx = loadCompletedIdx;
2882-
auto loadIt = loadQueue.getIterator(loadCompletedIdx + 1);
2883-
if (loadIt->valid() && loadIt->instruction() && loadIt->instruction()->isExecuted()) {
2884-
loadCompletedIdx++;
2885-
DPRINTF(LSQUnit, "loadCompletedIdx [%d]->[%d]\n", currentIdx, loadCompletedIdx);
2886-
} else {
2887-
break;
2904+
if (!loadQueue.empty()) {
2905+
for (unsigned i = 0; i < loadCompletionWidth; i++) {
2906+
const int currentIdx = loadCompletedIdx;
2907+
auto loadIt = loadQueue.getIterator(loadCompletedIdx + 1);
2908+
if (loadIt->valid() && loadIt->instruction() &&
2909+
loadIt->instruction()->isExecuted()) {
2910+
loadCompletedIdx++;
2911+
DPRINTF(LSQUnit, "loadCompletedIdx [%d]->[%d]\n", currentIdx,
2912+
loadCompletedIdx);
2913+
} else {
2914+
break;
2915+
}
28882916
}
28892917
}
28902918

28912919
// Advance store completed index (controls RAW queue dequeue rate)
2892-
for (unsigned i = 0; i < storeCompletionWidth; i++) {
2893-
const int currentIdx = storeCompletedIdx;
2894-
auto storeIt = storeQueue.getIterator(storeCompletedIdx + 1);
2895-
if (storeIt->addrReady() || storeIt->canWB()) {
2896-
storeCompletedIdx++;
2897-
DPRINTF(LSQUnit, "storeCompletedIdx [%d]->[%d]\n", currentIdx, storeCompletedIdx);
2898-
} else {
2899-
break;
2920+
if (!storeQueue.empty()) {
2921+
for (unsigned i = 0; i < storeCompletionWidth; i++) {
2922+
const int currentIdx = storeCompletedIdx;
2923+
auto storeIt = storeQueue.getIterator(storeCompletedIdx + 1);
2924+
if (storeIt->addrReady() || storeIt->canWB()) {
2925+
storeCompletedIdx++;
2926+
DPRINTF(LSQUnit, "storeCompletedIdx [%d]->[%d]\n", currentIdx,
2927+
storeCompletedIdx);
2928+
} else {
2929+
break;
2930+
}
29002931
}
29012932
}
29022933

0 commit comments

Comments
 (0)