Skip to content

Commit bf722ed

Browse files
committed
mem: add strict mechanism in mdp
Change-Id: Ic8ecdc370e8ff3eb7d99065c7b389b062a133f5e
1 parent e46fe3b commit bf722ed

File tree

4 files changed

+51
-4
lines changed

4 files changed

+51
-4
lines changed

src/cpu/o3/BaseO3CPU.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ def support_take_over(cls):
197197
LFSTEntrySize = Param.Unsigned(4,"The number of store table inst in every entry of LFST can contain")
198198
SSITSize = Param.Unsigned(1024, "Store set ID table size")
199199
enable_storeSet_train = Param.Bool(True, "Training store set predictor")
200+
enable_storeSet_strict_wait = Param.Bool(True, "Enable StoreSet strict wait for loads")
200201

201202
BankConflictCheck = Param.Bool(True, "open Bank conflict check")
202203
sbufferBankWriteAccurately = Param.Bool(False, "Sbuffer write to memory with bank conflict check")

src/cpu/o3/mem_dep_unit.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ MemDepUnit::MemDepUnit(const BaseO3CPUParams &params)
6363
stats(nullptr)
6464
{
6565
DPRINTF(MemDepUnit, "Creating MemDepUnit object.\n");
66+
depPred.setStrictWaitEnabled(params.enable_storeSet_strict_wait);
6667
}
6768

6869
MemDepUnit::~MemDepUnit()
@@ -99,6 +100,7 @@ MemDepUnit::init(const BaseO3CPUParams &params, ThreadID tid, CPU *cpu)
99100

100101
depPred.init(params.store_set_clear_period, params.store_set_clear_thres, params.SSITSize,
101102
params.LFSTSize, params.LFSTEntrySize);
103+
depPred.setStrictWaitEnabled(params.enable_storeSet_strict_wait);
102104

103105
std::string stats_group_name = csprintf("MemDepUnit__%i", tid);
104106
cpu->addStatGroup(stats_group_name.c_str(), &stats);

src/cpu/o3/store_set.cc

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,12 @@ StoreSet::StoreSet(uint64_t clear_period, int _SSIT_size, int _LFST_size,int _st
5454
SSIT.resize(SSITSize);
5555

5656
validSSIT.resize(SSITSize);
57+
SSITStrict.resize(SSITSize);
5758

58-
for (int i = 0; i < SSITSize; ++i)
59+
for (int i = 0; i < SSITSize; ++i) {
5960
validSSIT[i] = false;
61+
SSITStrict[i] = false;
62+
}
6063

6164
if (!isPowerOf2(LFSTSize)) {
6265
fatal("Invalid LFST size!\n");
@@ -68,6 +71,7 @@ StoreSet::StoreSet(uint64_t clear_period, int _SSIT_size, int _LFST_size,int _st
6871
validLFSTLarge.resize(LFSTSize);
6972
//validLFST.resize(LFSTSize);
7073
VictimEntryID.resize(LFSTSize);
74+
pendingStores.clear();
7175

7276
for (int i = 0; i < LFSTSize; ++i) {
7377
// validLFST[i] = false;
@@ -120,6 +124,7 @@ StoreSet::init(uint64_t clear_period, int clear_period_thres, int _SSIT_size, in
120124
LFSTLargePC.resize(LFSTSize);
121125
validLFSTLarge.resize(LFSTSize);
122126
VictimEntryID.resize(LFSTSize);
127+
pendingStores.clear();
123128

124129

125130
// LFST.resize(LFSTSize);
@@ -170,10 +175,12 @@ StoreSet::violation(Addr store_PC, Addr load_PC)
170175
validSSIT[load_index] = true;
171176

172177
SSIT[load_index] = ld_new_set;
178+
SSITStrict[load_index] = false;
173179

174180
validSSIT[store_index] = true;
175181

176182
SSIT[store_index] = sd_new_set;
183+
SSITStrict[store_index] = false;
177184

178185
assert(ld_new_set < LFSTSize);
179186
assert(sd_new_set < LFSTSize);
@@ -187,6 +194,7 @@ StoreSet::violation(Addr store_PC, Addr load_PC)
187194
validSSIT[store_index] = true;
188195

189196
SSIT[store_index] = sd_new_set;
197+
SSITStrict[store_index] = false;
190198

191199
assert(sd_new_set < LFSTSize);
192200

@@ -200,6 +208,7 @@ StoreSet::violation(Addr store_PC, Addr load_PC)
200208
validSSIT[load_index] = true;
201209

202210
SSIT[load_index] = ld_new_set;
211+
SSITStrict[load_index] = false;
203212

204213
DPRINTF(StoreSet, "StoreSet: Store had a valid store set: %i for "
205214
"load %#x, store %#x\n",
@@ -213,16 +222,15 @@ StoreSet::violation(Addr store_PC, Addr load_PC)
213222
// The store set with the lower number wins
214223
if (store_SSID > load_SSID) {
215224
SSIT[store_index] = load_SSID;
225+
SSITStrict[store_index] = false;
216226

217227
DPRINTF(StoreSet, "StoreSet: Load had smaller store set: %i; "
218228
"for load %#x, store %#x\n",
219229
load_SSID, load_PC, store_PC);
220230
} else {
221231
SSIT[load_index] = store_SSID;
222232

223-
if (store_SSID == load_SSID) {
224-
SSITStrict[load_index] = true;
225-
}
233+
SSITStrict[load_index] = (store_SSID == load_SSID);
226234

227235
DPRINTF(StoreSet, "StoreSet: Store had smaller store set: %i; "
228236
"for load %#x, store %#x\n",
@@ -264,6 +272,7 @@ StoreSet::insertStore(Addr store_PC, InstSeqNum store_seq_num, ThreadID tid, Cyc
264272
// checkClear();
265273
int victim_inst;
266274
checkClear(curCycle);
275+
pendingStores.insert(store_seq_num);
267276
assert(index < SSITSize);
268277

269278
if (!validSSIT[index]) {
@@ -327,6 +336,16 @@ StoreSet::checkInst(Addr PC)
327336

328337
assert(inst_SSID < LFSTSize);
329338

339+
if (enableStrictWait && checkInstStrict(PC)) {
340+
vec.insert(vec.end(),
341+
pendingStores.begin(),
342+
pendingStores.end());
343+
DPRINTF(StoreSet,
344+
"Strict inst %#x with index=%i, ssid=%i, had %lu outstanding stores\n",
345+
PC, index, inst_SSID, vec.size());
346+
return vec;
347+
}
348+
330349
// if (!validLFST[inst_SSID]) {
331350

332351
// DPRINTF(StoreSet, "Inst %#x with index %i and SSID %i had no "
@@ -358,6 +377,8 @@ StoreSet::issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store)
358377
return;
359378
}
360379

380+
pendingStores.erase(issued_seq_num);
381+
361382
int index = calcIndexSSIT(issued_PC);
362383

363384
int store_SSID;
@@ -398,6 +419,14 @@ StoreSet::issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store)
398419
void
399420
StoreSet::squash(InstSeqNum squashed_num, ThreadID tid)
400421
{
422+
for (auto it = pendingStores.begin(); it != pendingStores.end();) {
423+
if (*it > squashed_num) {
424+
it = pendingStores.erase(it);
425+
} else {
426+
++it;
427+
}
428+
}
429+
401430
for (int i=0;i<LFSTSize;++i) {
402431
for (int j=0; j<LFSTEntrySize; ++j) {
403432
if (validLFSTLarge[i][j] && LFSTLarge[i][j] > squashed_num) {
@@ -418,13 +447,15 @@ StoreSet::clear()
418447
{
419448
for (int i = 0; i < SSITSize; ++i) {
420449
validSSIT[i] = false;
450+
SSITStrict[i] = false;
421451
}
422452

423453
for (int i = 0; i < LFSTSize; ++i) {
424454
for (int j=0;j<LFSTEntrySize;++j) {
425455
validLFSTLarge[i][j] = false;
426456
}
427457
}
458+
pendingStores.clear();
428459

429460
}
430461

src/cpu/o3/store_set.hh

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include <cmath>
3333
#include <list>
3434
#include <map>
35+
#include <unordered_set>
3536
#include <utility>
3637
#include <vector>
3738

@@ -103,6 +104,8 @@ class StoreSet
103104
*/
104105
std::vector<InstSeqNum> checkInst(Addr PC);
105106

107+
void setStrictWaitEnabled(bool enable) { enableStrictWait = enable; }
108+
106109
/** Records this PC/sequence number as issued. */
107110
void issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store);
108111

@@ -146,6 +149,16 @@ class StoreSet
146149
/** Bit vector to tell if the LFST has a valid entry. */
147150
std::vector<std::vector<bool>> validLFSTLarge;
148151

152+
/**
153+
* Stores inserted but not yet issued/squashed/cleared.
154+
*
155+
* For strict loads, we conservatively wait on all outstanding stores
156+
* tracked here.
157+
*/
158+
std::unordered_set<InstSeqNum> pendingStores;
159+
160+
bool enableStrictWait = true;
161+
149162
/** Map of stores that have been inserted into the store set, but
150163
* not yet issued or squashed.
151164
*/

0 commit comments

Comments
 (0)