diff --git a/src/cpu/pred/btb/btb_mgsc.cc b/src/cpu/pred/btb/btb_mgsc.cc index a7d32137a1..694bb0b3c1 100755 --- a/src/cpu/pred/btb/btb_mgsc.cc +++ b/src/cpu/pred/btb/btb_mgsc.cc @@ -1,7 +1,19 @@ #include "cpu/pred/btb/btb_mgsc.hh" -#ifndef UNIT_TEST +#include "base/intmath.hh" + +#ifdef UNIT_TEST +#include "cpu/pred/btb/test/test_dprintf.hh" + +// Define debug flags for unit testing +namespace gem5 { +namespace debug { + bool MGSC = true; +} +} +#else #include "cpu/o3/dyn_inst.hh" +#include "debug/MGSC.hh" #endif @@ -13,8 +25,6 @@ #include #include -#include "debug/MGSC.hh" - namespace gem5 { @@ -24,6 +34,140 @@ namespace branch_prediction namespace btb_pred { +#ifdef UNIT_TEST +namespace test +{ +#endif + +void +BTBMGSC::initStorage() +{ + auto pow2 = [](unsigned width) -> uint64_t { + assert(width < 63); + return 1ULL << width; + }; + auto allocPredTable = [&](std::vector>> &table, unsigned numTables, + unsigned idxWidth) -> uint64_t { + table.resize(numTables); + auto tableSize = pow2(idxWidth); + assert(tableSize > numCtrsPerLine); + for (unsigned int i = 0; i < numTables; ++i) { + table[i].resize(tableSize / numCtrsPerLine, std::vector(numCtrsPerLine, 0)); + } + return tableSize; + }; + + assert(isPowerOf2(numCtrsPerLine)); + numCtrsPerLineBits = log2i(numCtrsPerLine); + + auto bwTableSize = allocPredTable(bwTable, bwTableNum, bwTableIdxWidth); + for (unsigned int i = 0; i < bwTableNum; ++i) { + indexBwFoldedHist.push_back(GlobalBwFoldedHist(bwHistLen[i], bwTableIdxWidth - numCtrsPerLineBits, 16)); + } + bwIndex.resize(bwTableNum); + + auto lTableSize = allocPredTable(lTable, lTableNum, lTableIdxWidth); + indexLFoldedHist.resize(numEntriesFirstLocalHistories); + for (unsigned int i = 0; i < lTableNum; ++i) { + for (unsigned int k = 0; k < numEntriesFirstLocalHistories; ++k) { + indexLFoldedHist[k].push_back(LocalFoldedHist(lHistLen[i], lTableIdxWidth - numCtrsPerLineBits, 16)); + } + } + lIndex.resize(lTableNum); + + auto iTableSize = allocPredTable(iTable, iTableNum, iTableIdxWidth); + for (unsigned int i = 0; i < iTableNum; ++i) { + assert(iHistLen[i] >= 0); + assert(static_cast(iHistLen[i]) < 63); + assert(pow2(static_cast(iHistLen[i])) <= iTableSize); + indexIFoldedHist.push_back(ImliFoldedHist(iHistLen[i], iTableIdxWidth - numCtrsPerLineBits, 16)); + } + iIndex.resize(iTableNum); + + auto gTableSize = allocPredTable(gTable, gTableNum, gTableIdxWidth); + for (unsigned int i = 0; i < gTableNum; ++i) { + assert(gTable.size() >= gTableNum); + indexGFoldedHist.push_back(GlobalFoldedHist(gHistLen[i], gTableIdxWidth - numCtrsPerLineBits, 16)); + } + gIndex.resize(gTableNum); + + auto pTableSize = allocPredTable(pTable, pTableNum, pTableIdxWidth); + for (unsigned int i = 0; i < pTableNum; ++i) { + assert(pTable.size() >= pTableNum); + indexPFoldedHist.push_back(PathFoldedHist(pHistLen[i], pTableIdxWidth - numCtrsPerLineBits, 2)); + } + pIndex.resize(pTableNum); + + allocPredTable(biasTable, biasTableNum, biasTableIdxWidth); + biasIndex.resize(biasTableNum); + + auto weightTableSize = pow2(weightTableIdxWidth); + bwWeightTable.resize(weightTableSize); + lWeightTable.resize(weightTableSize); + iWeightTable.resize(weightTableSize); + gWeightTable.resize(weightTableSize); + pWeightTable.resize(weightTableSize); + biasWeightTable.resize(weightTableSize); + + pUpdateThreshold.resize(pow2(thresholdTablelogSize)); +} + +#ifdef UNIT_TEST +BTBMGSC::BTBMGSC() + : TimedBaseBTBPredictor(), + bwTableNum(1), + // Use a slightly larger idx width so foldedLen is not too small (helps pattern-learning tests). + bwTableIdxWidth(6), + bwHistLen({4}), + numEntriesFirstLocalHistories(4), + lTableNum(1), + // Use a slightly larger idx width so foldedLen is not too small (helps pattern-learning tests). + lTableIdxWidth(6), + lHistLen({4}), + iTableNum(1), + iTableIdxWidth(5), + // `ImliFoldedHist` requires foldedLen >= histLen. With `numCtrsPerLine=8` and `iTableIdxWidth=5`, + // foldedLen is small (5 - log2(8) = 2), so keep histLen=1 for unit tests. + // Also keep it >= 2 so we can build loop-trip-count tests on IMLI. + iHistLen({2}), + gTableNum(1), + // Use a slightly larger idx width so foldedLen is not too small (helps pattern-learning tests). + gTableIdxWidth(6), + gHistLen({4}), + pTableNum(1), + // Use a slightly larger idx width so foldedLen is not too small (helps pattern-learning tests). + pTableIdxWidth(6), + pHistLen({4}), + biasTableNum(1), + biasTableIdxWidth(5), + scCountersWidth(6), + thresholdTablelogSize(4), + updateThresholdWidth(12), + pUpdateThresholdWidth(8), + extraWeightsWidth(6), + weightTableIdxWidth(4), + // Keep consistent with `src/cpu/pred/BranchPredictor.py` default (8 counters per SRAM line). + // This models "read a whole SRAM line, then pick a lane" behavior in `posHash()`. + numCtrsPerLine(8), + forceUseSC(false), + enableBwTable(true), + enableLTable(true), + enableITable(true), + enableGTable(true), + enablePTable(true), + enableBiasTable(true), + enablePCThreshold(false), + mgscStats() +{ + // Test-only small config: keep tables tiny and deterministic for fast unit tests. + // MGSC uses multiple histories (GHR/PHR/BWHR/LHR). Keep it enabled in unit tests so we can + // build training-loop style tests that exercise each table. + needMoreHistories = true; + + initStorage(); + updateThreshold = 35 * 8; +} +#else // Constructor: Initialize MGSC predictor with given parameters BTBMGSC::BTBMGSC(const Params &p) : TimedBaseBTBPredictor(p), @@ -64,82 +208,13 @@ BTBMGSC::BTBMGSC(const Params &p) { DPRINTF(MGSC, "BTBMGSC constructor\n"); this->needMoreHistories = p.needMoreHistories; - - assert(isPowerOf2(numCtrsPerLine)); - numCtrsPerLineBits = log2i(numCtrsPerLine); - - bwTable.resize(bwTableNum); - auto bwTableSize = std::pow(2, bwTableIdxWidth); - assert(bwTableSize > numCtrsPerLine); - for (unsigned int i = 0; i < bwTableNum; ++i) { - bwTable[i].resize(bwTableSize / numCtrsPerLine, std::vector(numCtrsPerLine, 0)); - indexBwFoldedHist.push_back(GlobalBwFoldedHist(bwHistLen[i], bwTableIdxWidth - numCtrsPerLineBits, 16)); - } - bwIndex.resize(bwTableNum); - - lTable.resize(lTableNum); - indexLFoldedHist.resize(numEntriesFirstLocalHistories); - auto lTableSize = std::pow(2, lTableIdxWidth); - assert(lTableSize > numCtrsPerLine); - for (unsigned int i = 0; i < lTableNum; ++i) { - lTable[i].resize(lTableSize / numCtrsPerLine, std::vector(numCtrsPerLine, 0)); - for (unsigned int k = 0; k < numEntriesFirstLocalHistories; ++k) { - indexLFoldedHist[k].push_back(LocalFoldedHist(lHistLen[i], lTableIdxWidth - numCtrsPerLineBits, 16)); - } - } - lIndex.resize(lTableNum); - - iTable.resize(iTableNum); - auto iTableSize = std::pow(2, iTableIdxWidth); - assert(iTableSize > numCtrsPerLine); - for (unsigned int i = 0; i < iTableNum; ++i) { - assert(std::pow(2, iHistLen[i]) <= iTableSize); - iTable[i].resize(iTableSize / numCtrsPerLine, std::vector(numCtrsPerLine, 0)); - indexIFoldedHist.push_back(ImliFoldedHist(iHistLen[i], iTableIdxWidth - numCtrsPerLineBits, 16)); - } - iIndex.resize(iTableNum); - - gTable.resize(gTableNum); - auto gTableSize = std::pow(2, gTableIdxWidth); - assert(gTableSize > numCtrsPerLine); - for (unsigned int i = 0; i < gTableNum; ++i) { - assert(gTable.size() >= gTableNum); - gTable[i].resize(gTableSize / numCtrsPerLine, std::vector(numCtrsPerLine, 0)); - indexGFoldedHist.push_back(GlobalFoldedHist(gHistLen[i], gTableIdxWidth - numCtrsPerLineBits, 16)); - } - gIndex.resize(gTableNum); - - pTable.resize(pTableNum); - auto pTableSize = std::pow(2, pTableIdxWidth); - assert(pTableSize > numCtrsPerLine); - for (unsigned int i = 0; i < pTableNum; ++i) { - assert(pTable.size() >= pTableNum); - pTable[i].resize(pTableSize / numCtrsPerLine, std::vector(numCtrsPerLine, 0)); - indexPFoldedHist.push_back(PathFoldedHist(pHistLen[i], pTableIdxWidth - numCtrsPerLineBits, 2)); - } - pIndex.resize(pTableNum); - - biasTable.resize(biasTableNum); - auto biasTableSize = std::pow(2, biasTableIdxWidth); - assert(biasTableSize > numCtrsPerLine); - for (unsigned int i = 0; i < biasTableNum; ++i) { - biasTable[i].resize(biasTableSize / numCtrsPerLine, std::vector(numCtrsPerLine, 0)); - } - biasIndex.resize(biasTableNum); - - bwWeightTable.resize(std::pow(2, weightTableIdxWidth)); - lWeightTable.resize(std::pow(2, weightTableIdxWidth)); - iWeightTable.resize(std::pow(2, weightTableIdxWidth)); - gWeightTable.resize(std::pow(2, weightTableIdxWidth)); - pWeightTable.resize(std::pow(2, weightTableIdxWidth)); - biasWeightTable.resize(std::pow(2, weightTableIdxWidth)); - pUpdateThreshold.resize(std::pow(2, thresholdTablelogSize)); - + initStorage(); updateThreshold = 35 * 8; hasDB = true; dbName = std::string("mgsc"); } +#endif BTBMGSC::~BTBMGSC() {} // Set up tracing for debugging @@ -284,6 +359,9 @@ BTBMGSC::generateSinglePrediction(const BTBEntry &btb_entry, const Addr &startPC lIndex[i] = getHistIndex(startPC, lTableIdxWidth - numCtrsPerLineBits, indexLFoldedHist[getPcIndex(startPC, log2(numEntriesFirstLocalHistories))][i].get()); } + // std::string buf; + // boost::to_string(indexLFoldedHist[getPcIndex(startPC, log2(numEntriesFirstLocalHistories))][0].getAsBitset(), buf); + // DPRINTF(MGSC, "startPC: %#lx, local index: %d, local_folded_hist: %s\n", startPC, lIndex[0], buf.c_str()); for (unsigned int i = 0; i < iTableNum; ++i) { iIndex[i] = getHistIndex(startPC, iTableIdxWidth - numCtrsPerLineBits, indexIFoldedHist[i].get()); @@ -355,6 +433,10 @@ BTBMGSC::generateSinglePrediction(const BTBEntry &btb_entry, const Addr &startPC // Final prediction, total_sum >= 0 means taken if use_sc_pred bool taken = use_sc_pred ? (total_sum >= 0) : tage_info.tage_pred_taken; + // DPRINTF(MGSC, "global tag_index: %d, global_percsum: %d, total_sum: %d\n", gIndex[0], g_percsum, total_sum); + // DPRINTF(MGSC, "local tag_index: %d, local_percsum: %d, total_sum: %d\n", lIndex[0], l_percsum, total_sum); + // DPRINTF(MGSC, "path tag_index: %d, path_percsum: %d, total_sum: %d\n", pIndex[0], p_percsum, total_sum); + // Calculate weight scale differences bool bw_weight_scale_diff = calculateWeightScaleDiff(total_sum, bw_scaled_percsum, bw_percsum); bool l_weight_scale_diff = calculateWeightScaleDiff(total_sum, l_scaled_percsum, l_percsum); @@ -601,7 +683,7 @@ BTBMGSC::recordPredictionStats(const MgscPrediction &pred, bool actual_taken, bo } // Record raw percsum correctness and weight criticality for each table - auto recordPercsum = [&](int percsum, statistics::Scalar &correct, statistics::Scalar &wrong) { + auto recordPercsum = [&](int percsum, auto &correct, auto &wrong) { if ((percsum >= 0) == actual_taken) { correct++; } else { @@ -1027,21 +1109,23 @@ BTBMGSC::specUpdateBwHist(const boost::dynamic_bitset<> &history, FullBTBPredict * @brief Updates IMLI branch history for speculative execution * * This function updates the branch history for speculative execution - * based on the provided history and prediction information. + * based on the prediction information. * * It first retrieves the history information from the prediction metadata * and then calls the doUpdateHist function to update the folded histories. + * Note: IMLI only uses counter, not history bits. * - * @param history The current imli branch history * @param pred The prediction metadata containing history information */ void -BTBMGSC::specUpdateIHist(const boost::dynamic_bitset<> &history, FullBTBPrediction &pred) +BTBMGSC::specUpdateIHist(FullBTBPrediction &pred) { int shamt; bool cond_taken; std::tie(shamt, cond_taken) = pred.getBwHistInfo(); - doUpdateHist(history, shamt, cond_taken, indexIFoldedHist); + // IMLI uses counter only, pass empty bitset (not used by ImliFoldedHist::update) + boost::dynamic_bitset<> dummy; + doUpdateHist(dummy, shamt, cond_taken, indexIFoldedHist); } /** @@ -1151,14 +1235,14 @@ BTBMGSC::recoverBwHist(const boost::dynamic_bitset<> &history, const FetchStream * 1. Restores the folded histories from the saved metadata * 2. Updates the histories with the correct branch outcome * 3. Ensures predictor state is consistent after recovery + * Note: IMLI only uses counter, not history bits. * - * @param history The branch history to recover to * @param entry The fetch stream entry containing recovery information * @param shamt Number of bits to shift in history update * @param cond_taken The actual branch outcome */ void -BTBMGSC::recoverIHist(const boost::dynamic_bitset<> &history, const FetchStream &entry, int shamt, bool cond_taken) +BTBMGSC::recoverIHist(const FetchStream &entry, int shamt, bool cond_taken) { if (!isEnabled()) { return; // No recover when disabled @@ -1167,7 +1251,9 @@ BTBMGSC::recoverIHist(const boost::dynamic_bitset<> &history, const FetchStream for (int i = 0; i < iTableNum; i++) { indexIFoldedHist[i].recover(predMeta->indexIFoldedHist[i]); } - doUpdateHist(history, shamt, cond_taken, indexIFoldedHist); + // IMLI uses counter only, pass empty bitset (not used by ImliFoldedHist::update) + boost::dynamic_bitset<> dummy; + doUpdateHist(dummy, shamt, cond_taken, indexIFoldedHist); } /** @@ -1196,10 +1282,11 @@ BTBMGSC::recoverLHist(const std::vector> &history, const indexLFoldedHist[k][i].recover(predMeta->indexLFoldedHist[k][i]); } } - doUpdateHist(history[getPcIndex(entry.startPC, log2(numEntriesFirstLocalHistories))], shamt, cond_taken, - indexLFoldedHist[getPcIndex(entry.startPC, log2(numEntriesFirstLocalHistories))]); -} + doUpdateHist(history[getPcIndex(entry.startPC, log2(numEntriesFirstLocalHistories))], shamt, cond_taken, + indexLFoldedHist[getPcIndex(entry.startPC, log2(numEntriesFirstLocalHistories))]); + } +#ifndef UNIT_TEST // Constructor for TAGE statistics BTBMGSC::MgscStats::MgscStats(statistics::Group *parent) : statistics::Group(parent), @@ -1258,7 +1345,9 @@ BTBMGSC::MgscStats::MgscStats(statistics::Group *parent) ADD_STAT(scLowBypass, statistics::units::Count::get(), "tage low conf, sc not used") { } +#endif +#ifndef UNIT_TEST void BTBMGSC::commitBranch(const FetchStream &stream, const DynInstPtr &inst) { @@ -1309,6 +1398,7 @@ BTBMGSC::commitBranch(const FetchStream &stream, const DynInstPtr &inst) } } +#endif void BTBMGSC::checkFoldedHist(const boost::dynamic_bitset<> &Ghistory, const boost::dynamic_bitset<> &PHistory, @@ -1337,6 +1427,10 @@ BTBMGSC::checkFoldedHist(const boost::dynamic_bitset<> &Ghistory, const boost::d } } +#ifdef UNIT_TEST +} // namespace test +#endif + } // namespace btb_pred } // namespace branch_prediction diff --git a/src/cpu/pred/btb/btb_mgsc.hh b/src/cpu/pred/btb/btb_mgsc.hh index f0e834e735..a1fdae5c01 100755 --- a/src/cpu/pred/btb/btb_mgsc.hh +++ b/src/cpu/pred/btb/btb_mgsc.hh @@ -4,7 +4,9 @@ #include #include #include +#include #include +#include #include #include @@ -15,7 +17,10 @@ #include "cpu/pred/btb/folded_hist.hh" #include "cpu/pred/btb/stream_struct.hh" #include "cpu/pred/btb/timed_base_pred.hh" + +#ifndef UNIT_TEST #include "params/BTBMGSC.hh" +#endif namespace gem5 { @@ -26,11 +31,20 @@ namespace branch_prediction namespace btb_pred { +// Conditional namespace wrapper for testing +#ifdef UNIT_TEST +namespace test { +#endif + class BTBMGSC : public TimedBaseBTBPredictor { public: +#ifdef UNIT_TEST + BTBMGSC(); +#else typedef BTBMGSCParams Params; - + BTBMGSC(const Params &p); +#endif // Contains the complete prediction result struct MgscPrediction { @@ -133,7 +147,6 @@ class BTBMGSC : public TimedBaseBTBPredictor }; public: - BTBMGSC(const Params &p); ~BTBMGSC(); void tickStart() override; @@ -149,7 +162,7 @@ class BTBMGSC : public TimedBaseBTBPredictor void specUpdateHist(const boost::dynamic_bitset<> &history, FullBTBPrediction &pred) override; void specUpdatePHist(const boost::dynamic_bitset<> &history, FullBTBPrediction &pred) override; void specUpdateBwHist(const boost::dynamic_bitset<> &history, FullBTBPrediction &pred) override; - void specUpdateIHist(const boost::dynamic_bitset<> &history, FullBTBPrediction &pred) override; + void specUpdateIHist(FullBTBPrediction &pred) override; void specUpdateLHist(const std::vector> &history, FullBTBPrediction &pred) override; // Recover all folded history after a misprediction, then update all folded history according to history and @@ -160,7 +173,7 @@ class BTBMGSC : public TimedBaseBTBPredictor bool cond_taken) override; void recoverBwHist(const boost::dynamic_bitset<> &history, const FetchStream &entry, int shamt, bool cond_taken) override; - void recoverIHist(const boost::dynamic_bitset<> &history, const FetchStream &entry, int shamt, + void recoverIHist(const FetchStream &entry, int shamt, bool cond_taken) override; void recoverLHist(const std::vector> &history, const FetchStream &entry, int shamt, bool cond_taken) override; @@ -168,7 +181,9 @@ class BTBMGSC : public TimedBaseBTBPredictor // Update predictor state based on actual branch outcomes void update(const FetchStream &entry) override; +#ifndef UNIT_TEST void commitBranch(const FetchStream &stream, const DynInstPtr &inst) override; +#endif void setTrace() override; @@ -400,77 +415,149 @@ class BTBMGSC : public TimedBaseBTBPredictor std::vector pIndex; std::vector biasIndex; - // Statistics for MGSC predictor +#ifdef UNIT_TEST + typedef uint64_t Scalar; +#else + typedef statistics::Scalar Scalar; +#endif + + // Statistics for TAGE predictor +#ifdef UNIT_TEST + struct MgscStats + { +#else struct MgscStats : public statistics::Group { - statistics::Scalar scCorrectTageWrong; - statistics::Scalar scWrongTageCorrect; - statistics::Scalar scCorrectTageCorrect; - statistics::Scalar scWrongTageWrong; - statistics::Scalar scUsed; - statistics::Scalar scNotUsed; - - statistics::Scalar predHit; - statistics::Scalar predMiss; - statistics::Scalar scPredCorrect; - statistics::Scalar scPredWrong; - statistics::Scalar scPredMissTaken; - statistics::Scalar scPredMissNotTaken; - statistics::Scalar scPredCorrectTageWrong; - statistics::Scalar scPredWrongTageCorrect; +#endif + Scalar scCorrectTageWrong{}; + Scalar scWrongTageCorrect{}; + Scalar scCorrectTageCorrect{}; + Scalar scWrongTageWrong{}; + Scalar scUsed{}; + Scalar scNotUsed{}; + + Scalar predHit{}; + Scalar predMiss{}; + Scalar scPredCorrect{}; + Scalar scPredWrong{}; + Scalar scPredMissTaken{}; + Scalar scPredMissNotTaken{}; + Scalar scPredCorrectTageWrong{}; + Scalar scPredWrongTageCorrect{}; // Weight scale sensitivity (how often this table is decisive) - statistics::Scalar bwWeightScaleDiff; - statistics::Scalar lWeightScaleDiff; - statistics::Scalar iWeightScaleDiff; - statistics::Scalar gWeightScaleDiff; - statistics::Scalar pWeightScaleDiff; - statistics::Scalar biasWeightScaleDiff; + Scalar bwWeightScaleDiff{}; + Scalar lWeightScaleDiff{}; + Scalar iWeightScaleDiff{}; + Scalar gWeightScaleDiff{}; + Scalar pWeightScaleDiff{}; + Scalar biasWeightScaleDiff{}; // Raw percsum correctness per table - statistics::Scalar bwPercsumCorrect; - statistics::Scalar bwPercsumWrong; - statistics::Scalar lPercsumCorrect; - statistics::Scalar lPercsumWrong; - statistics::Scalar iPercsumCorrect; - statistics::Scalar iPercsumWrong; - statistics::Scalar gPercsumCorrect; - statistics::Scalar gPercsumWrong; - statistics::Scalar pPercsumCorrect; - statistics::Scalar pPercsumWrong; - statistics::Scalar biasPercsumCorrect; - statistics::Scalar biasPercsumWrong; + Scalar bwPercsumCorrect{}; + Scalar bwPercsumWrong{}; + Scalar lPercsumCorrect{}; + Scalar lPercsumWrong{}; + Scalar iPercsumCorrect{}; + Scalar iPercsumWrong{}; + Scalar gPercsumCorrect{}; + Scalar gPercsumWrong{}; + Scalar pPercsumCorrect{}; + Scalar pPercsumWrong{}; + Scalar biasPercsumCorrect{}; + Scalar biasPercsumWrong{}; // Threshold updates - statistics::Scalar pcThresholdInc; - statistics::Scalar pcThresholdDec; - statistics::Scalar globalThresholdInc; - statistics::Scalar globalThresholdDec; + Scalar pcThresholdInc{}; + Scalar pcThresholdDec{}; + Scalar globalThresholdInc{}; + Scalar globalThresholdDec{}; // Use/non-use of SC under different TAGE confidences - statistics::Scalar scHighUseCorrect; - statistics::Scalar scHighUseWrong; - statistics::Scalar scMidUseCorrect; - statistics::Scalar scMidUseWrong; - statistics::Scalar scLowUseCorrect; - statistics::Scalar scLowUseWrong; - statistics::Scalar scHighBypass; - statistics::Scalar scMidBypass; - statistics::Scalar scLowBypass; - + Scalar scHighUseCorrect{}; + Scalar scHighUseWrong{}; + Scalar scMidUseCorrect{}; + Scalar scMidUseWrong{}; + Scalar scLowUseCorrect{}; + Scalar scLowUseWrong{}; + Scalar scHighBypass{}; + Scalar scMidBypass{}; + Scalar scLowBypass{}; + +#ifndef UNIT_TEST MgscStats(statistics::Group *parent); +#endif }; MgscStats mgscStats; +#ifndef UNIT_TEST TraceManager *mgscMissTrace; +#endif public: // Recover folded history after misprediction void recoverFoldedHist(const boost::dynamic_bitset<> &history); unsigned getNumEntriesFirstLocalHistories() { return numEntriesFirstLocalHistories; }; +#ifdef UNIT_TEST + struct TestAccess + { + static unsigned numCtrsPerLine(const BTBMGSC &mgsc) { return mgsc.numCtrsPerLine; } + static unsigned numCtrsPerLineBits(const BTBMGSC &mgsc) { return mgsc.numCtrsPerLineBits; } + + static unsigned bwTableIdxWidth(const BTBMGSC &mgsc) { return mgsc.bwTableIdxWidth; } + static unsigned lTableIdxWidth(const BTBMGSC &mgsc) { return mgsc.lTableIdxWidth; } + static unsigned iTableIdxWidth(const BTBMGSC &mgsc) { return mgsc.iTableIdxWidth; } + static unsigned gTableIdxWidth(const BTBMGSC &mgsc) { return mgsc.gTableIdxWidth; } + static unsigned pTableIdxWidth(const BTBMGSC &mgsc) { return mgsc.pTableIdxWidth; } + static unsigned biasTableIdxWidth(const BTBMGSC &mgsc) { return mgsc.biasTableIdxWidth; } + + static bool &forceUseSC(BTBMGSC &mgsc) { return mgsc.forceUseSC; } + static bool &enableBwTable(BTBMGSC &mgsc) { return mgsc.enableBwTable; } + static bool &enableLTable(BTBMGSC &mgsc) { return mgsc.enableLTable; } + static bool &enableITable(BTBMGSC &mgsc) { return mgsc.enableITable; } + static bool &enableGTable(BTBMGSC &mgsc) { return mgsc.enableGTable; } + static bool &enablePTable(BTBMGSC &mgsc) { return mgsc.enablePTable; } + static bool &enableBiasTable(BTBMGSC &mgsc) { return mgsc.enableBiasTable; } + static bool &enablePCThreshold(BTBMGSC &mgsc) { return mgsc.enablePCThreshold; } + + static auto &bwTable(BTBMGSC &mgsc) { return mgsc.bwTable; } + static auto &lTable(BTBMGSC &mgsc) { return mgsc.lTable; } + static auto &iTable(BTBMGSC &mgsc) { return mgsc.iTable; } + static auto &gTable(BTBMGSC &mgsc) { return mgsc.gTable; } + static auto &pTable(BTBMGSC &mgsc) { return mgsc.pTable; } + static auto &biasTable(BTBMGSC &mgsc) { return mgsc.biasTable; } + + static auto &updateThreshold(BTBMGSC &mgsc) { return mgsc.updateThreshold; } + static auto &pUpdateThreshold(BTBMGSC &mgsc) { return mgsc.pUpdateThreshold; } + + static Addr getHistIndex(BTBMGSC &mgsc, Addr pc, unsigned tableIndexBits, uint64_t foldedHist) + { + return mgsc.getHistIndex(pc, tableIndexBits, foldedHist); + } + + static Addr getBiasIndex(BTBMGSC &mgsc, Addr pc, unsigned tableIndexBits, bool lowbit0, bool lowbit1) + { + return mgsc.getBiasIndex(pc, tableIndexBits, lowbit0, lowbit1); + } + + static std::tuple posHash(BTBMGSC &mgsc, Addr pc, unsigned tableIdx) + { + return mgsc.posHash(pc, tableIdx); + } + + static const std::unordered_map &preds(const BTBMGSC &mgsc) + { + assert(mgsc.meta); + return mgsc.meta->preds; + } + }; +#endif + private: + void initStorage(); + // Metadata for MGSC predictions typedef struct MgscMeta { @@ -506,10 +593,15 @@ class BTBMGSC : public TimedBaseBTBPredictor std::shared_ptr meta; }; + +// Close conditional namespace wrapper for testing +#ifdef UNIT_TEST +} // namespace test +#endif } } } -#endif // __CPU_PRED_BTB_TAGE_HH__ +#endif // __CPU_PRED_BTB_MGSC_HH__ diff --git a/src/cpu/pred/btb/decoupled_bpred.cc b/src/cpu/pred/btb/decoupled_bpred.cc index bef8cb0862..4a3fd2bbae 100644 --- a/src/cpu/pred/btb/decoupled_bpred.cc +++ b/src/cpu/pred/btb/decoupled_bpred.cc @@ -94,7 +94,6 @@ DecoupledBPUWithBTB::DecoupledBPUWithBTB(const DecoupledBPUWithBTBParams &p) s0History.resize(historyBits, 0); s0PHistory.resize(historyBits, 0); s0BwHistory.resize(historyBits, 0); - s0IHistory.resize(historyBits, 0); s0LHistory.resize(mgsc->getNumEntriesFirstLocalHistories()); for (unsigned int i = 0; i < mgsc->getNumEntriesFirstLocalHistories(); ++i) { s0LHistory[i].resize(historyBits, 0); @@ -1051,7 +1050,6 @@ DecoupledBPUWithBTB::createFetchStreamEntry() entry.history = s0History; entry.phistory = s0PHistory; entry.bwhistory = s0BwHistory; - entry.ihistory = s0IHistory; entry.lhistory = s0LHistory; entry.predTick = finalPred.predTick; entry.predSource = finalPred.predSource; @@ -1217,7 +1215,7 @@ DecoupledBPUWithBTB::updateHistoryForPrediction(FetchStream &entry) if(components[i]->needMoreHistories){ components[i]->specUpdatePHist(s0PHistory, finalPred); components[i]->specUpdateBwHist(s0BwHistory, finalPred); - components[i]->specUpdateIHist(s0IHistory, finalPred); + components[i]->specUpdateIHist(finalPred); components[i]->specUpdateLHist(s0LHistory, finalPred); } } @@ -1248,9 +1246,6 @@ DecoupledBPUWithBTB::updateHistoryForPrediction(FetchStream &entry) // Update path history pHistShiftIn(2, p_taken, s0PHistory, p_pc, p_target); - // Update imli history - histShiftIn(bw_shamt, bw_taken, s0IHistory); //s0IHistory is not used - // Update local history histShiftIn(shamt, taken, s0LHistory[mgsc->getPcIndex(finalPred.bbStart, log2(mgsc->getNumEntriesFirstLocalHistories()))]); @@ -1296,7 +1291,6 @@ DecoupledBPUWithBTB::recoverHistoryForSquash( s0History = stream.history; s0PHistory = stream.phistory; s0BwHistory = stream.bwhistory; - s0IHistory = stream.ihistory; s0LHistory = stream.lhistory; // Get actual history shift information @@ -1317,7 +1311,7 @@ DecoupledBPUWithBTB::recoverHistoryForSquash( if(components[i]->needMoreHistories){ components[i]->recoverPHist(s0PHistory, stream, real_shamt, real_taken); components[i]->recoverBwHist(s0BwHistory, stream, real_bw_shamt, real_bw_taken); - components[i]->recoverIHist(s0IHistory, stream, real_bw_shamt, real_bw_taken); //s0IHistory is not used + components[i]->recoverIHist(stream, real_bw_shamt, real_bw_taken); components[i]->recoverLHist(s0LHistory, stream, real_shamt, real_taken); } } @@ -1331,9 +1325,6 @@ DecoupledBPUWithBTB::recoverHistoryForSquash( // Update global backward history with actual outcome histShiftIn(real_bw_shamt, real_bw_taken, s0BwHistory); - // Update imli history with actual outcome - histShiftIn(real_bw_shamt, real_bw_taken, s0IHistory); //s0IHistory is not used - // Update local history with actual outcome histShiftIn(real_shamt, real_taken, s0LHistory[mgsc->getPcIndex(stream.startPC, log2(mgsc->getNumEntriesFirstLocalHistories()))]); diff --git a/src/cpu/pred/btb/decoupled_bpred.hh b/src/cpu/pred/btb/decoupled_bpred.hh index a6f4180244..4e85a6e18d 100644 --- a/src/cpu/pred/btb/decoupled_bpred.hh +++ b/src/cpu/pred/btb/decoupled_bpred.hh @@ -156,7 +156,6 @@ class DecoupledBPUWithBTB : public BPredUnit boost::dynamic_bitset<> s0History; ///< global History bits boost::dynamic_bitset<> s0PHistory; ///< path History bits boost::dynamic_bitset<> s0BwHistory; ///< global backward History bits - boost::dynamic_bitset<> s0IHistory; ///< IMLI History bits std::vector> s0LHistory; ///< local History bits FullBTBPrediction finalPred; ///< Final prediction diff --git a/src/cpu/pred/btb/stream_struct.hh b/src/cpu/pred/btb/stream_struct.hh index 720b04e75e..00f12a088e 100644 --- a/src/cpu/pred/btb/stream_struct.hh +++ b/src/cpu/pred/btb/stream_struct.hh @@ -339,7 +339,6 @@ struct FetchStream boost::dynamic_bitset<> history; // record GHR/s0History boost::dynamic_bitset<> phistory; // record PATH/s0History boost::dynamic_bitset<> bwhistory; // record BWHR/s0History - boost::dynamic_bitset<> ihistory; // record IHR/s0History std::vector> lhistory; // record LHR/s0History std::queue previousPCs; // previous PCs, used by ahead BTB @@ -370,7 +369,6 @@ struct FetchStream history(), phistory(), bwhistory(), - ihistory(), lhistory(), fetchInstNum(0), commitInstNum(0), diff --git a/src/cpu/pred/btb/test/SConscript b/src/cpu/pred/btb/test/SConscript index 220d4d57eb..aa2dcbb763 100644 --- a/src/cpu/pred/btb/test/SConscript +++ b/src/cpu/pred/btb/test/SConscript @@ -28,6 +28,13 @@ GTest('tage.test', '../timed_base_pred.cc', ) +GTest('mgsc.test', + '../btb_mgsc.cc', + 'btb_mgsc.test.cc', + '../folded_hist.cc', + '../timed_base_pred.cc', +) + GTest('folded_hist.test', 'folded_hist.test.cc', '../folded_hist.cc', @@ -69,6 +76,7 @@ GTest('decoupled_bpred.test', env.Append(UNITTESTS=['uras.test', 'btb.test', 'tage.test', + 'mgsc.test', 'folded_hist.test', 'jump_ahead.test', 'fetch_target_queue.test', diff --git a/src/cpu/pred/btb/test/btb_mgsc.test.cc b/src/cpu/pred/btb/test/btb_mgsc.test.cc new file mode 100644 index 0000000000..67679c4ff1 --- /dev/null +++ b/src/cpu/pred/btb/test/btb_mgsc.test.cc @@ -0,0 +1,852 @@ +#include + +#include +#include + +#include + +#include "cpu/pred/btb/btb_mgsc.hh" + +namespace gem5 +{ +namespace branch_prediction +{ +namespace btb_pred +{ +namespace test +{ + +namespace +{ + +BTBEntry +makeCondBTBEntry(Addr pc) +{ + BTBEntry entry; + entry.pc = pc; + entry.target = pc + 4; + entry.isCond = true; + entry.valid = true; + entry.alwaysTaken = false; + entry.size = 4; + return entry; +} + +std::pair +lineLaneForHistIndex(BTBMGSC &mgsc, Addr start_pc, Addr branch_pc, unsigned table_idx_width) +{ + const unsigned num_bits = BTBMGSC::TestAccess::numCtrsPerLineBits(mgsc); + const unsigned idx_bits = table_idx_width - num_bits; + Addr line_idx = BTBMGSC::TestAccess::getHistIndex(mgsc, start_pc, idx_bits, /*foldedHist=*/0); + auto [idx1, idx2] = BTBMGSC::TestAccess::posHash(mgsc, branch_pc, line_idx); + return {idx1, idx2}; +} + +std::pair +lineLaneForBiasIndex(BTBMGSC &mgsc, Addr start_pc, Addr branch_pc, unsigned bias_idx_width, + const TageInfoForMGSC &tage_info) +{ + const unsigned num_bits = BTBMGSC::TestAccess::numCtrsPerLineBits(mgsc); + const unsigned idx_bits = bias_idx_width - num_bits; + Addr line_idx = BTBMGSC::TestAccess::getBiasIndex( + mgsc, start_pc, idx_bits, tage_info.tage_main_taken, tage_info.tage_pred_conf_low); + auto [idx1, idx2] = BTBMGSC::TestAccess::posHash(mgsc, branch_pc, line_idx); + return {idx1, idx2}; +} + +void +setAllTableCountersForPc(BTBMGSC &mgsc, Addr start_pc, Addr branch_pc, const TageInfoForMGSC &tage_info, + int16_t bw_ctr, int16_t l_ctr, int16_t i_ctr, int16_t g_ctr, int16_t p_ctr, int16_t bias_ctr) +{ + auto &bw_table = BTBMGSC::TestAccess::bwTable(mgsc); + auto &l_table = BTBMGSC::TestAccess::lTable(mgsc); + auto &i_table = BTBMGSC::TestAccess::iTable(mgsc); + auto &g_table = BTBMGSC::TestAccess::gTable(mgsc); + auto &p_table = BTBMGSC::TestAccess::pTable(mgsc); + auto &bias_table = BTBMGSC::TestAccess::biasTable(mgsc); + + const auto [bw_i1, bw_i2] = + lineLaneForHistIndex(mgsc, start_pc, branch_pc, BTBMGSC::TestAccess::bwTableIdxWidth(mgsc)); + const auto [l_i1, l_i2] = + lineLaneForHistIndex(mgsc, start_pc, branch_pc, BTBMGSC::TestAccess::lTableIdxWidth(mgsc)); + const auto [i_i1, i_i2] = + lineLaneForHistIndex(mgsc, start_pc, branch_pc, BTBMGSC::TestAccess::iTableIdxWidth(mgsc)); + const auto [g_i1, g_i2] = + lineLaneForHistIndex(mgsc, start_pc, branch_pc, BTBMGSC::TestAccess::gTableIdxWidth(mgsc)); + const auto [p_i1, p_i2] = + lineLaneForHistIndex(mgsc, start_pc, branch_pc, BTBMGSC::TestAccess::pTableIdxWidth(mgsc)); + const auto [bias_i1, bias_i2] = + lineLaneForBiasIndex(mgsc, start_pc, branch_pc, BTBMGSC::TestAccess::biasTableIdxWidth(mgsc), tage_info); + + bw_table[0][bw_i1][bw_i2] = bw_ctr; + l_table[0][l_i1][l_i2] = l_ctr; + i_table[0][i_i1][i_i2] = i_ctr; + g_table[0][g_i1][g_i2] = g_ctr; + p_table[0][p_i1][p_i2] = p_ctr; + bias_table[0][bias_i1][bias_i2] = bias_ctr; +} + +std::pair +findCondTaken(const CondTakens &condTakens, Addr pc) +{ + auto it = CondTakens_find(condTakens, pc); + if (it == condTakens.end()) { + return {false, false}; + } + return {true, it->second}; +} + +void +histShiftIn(int shamt, bool taken, boost::dynamic_bitset<> &history) +{ + if (shamt == 0) { + return; + } + history <<= shamt; + history[0] = taken; +} + +void +pHistShiftIn(int shamt, bool taken, boost::dynamic_bitset<> &history, Addr pc, Addr target) +{ + if (shamt == 0) { + return; + } + if (taken) { + uint64_t hash = pathHash(pc, target); + history <<= shamt; + for (std::size_t i = 0; i < pathHashLength && i < history.size(); i++) { + history[i] = (hash & 1) ^ history[i]; + hash >>= 1; + } + } +} + +struct MgscHarness +{ + BTBMGSC mgsc; + boost::dynamic_bitset<> ghr; + boost::dynamic_bitset<> phr; + boost::dynamic_bitset<> bwhr; + std::vector> lhr; + std::vector stage_preds; + + explicit MgscHarness(std::size_t hist_len = 64) + : mgsc(), + ghr(hist_len, 0), + phr(hist_len, 0), + bwhr(hist_len, 0), + lhr(mgsc.getNumEntriesFirstLocalHistories(), boost::dynamic_bitset<>(hist_len, 0)), + stage_preds(2) + {} + + void + setOnlyGTable() + { + BTBMGSC::TestAccess::enableBwTable(mgsc) = false; + BTBMGSC::TestAccess::enableLTable(mgsc) = false; + BTBMGSC::TestAccess::enableITable(mgsc) = false; + BTBMGSC::TestAccess::enableGTable(mgsc) = true; + BTBMGSC::TestAccess::enablePTable(mgsc) = false; + BTBMGSC::TestAccess::enableBiasTable(mgsc) = false; + BTBMGSC::TestAccess::enablePCThreshold(mgsc) = false; + BTBMGSC::TestAccess::forceUseSC(mgsc) = true; + } + + void + setOnlyBwTable() + { + BTBMGSC::TestAccess::enableBwTable(mgsc) = true; + BTBMGSC::TestAccess::enableLTable(mgsc) = false; + BTBMGSC::TestAccess::enableITable(mgsc) = false; + BTBMGSC::TestAccess::enableGTable(mgsc) = false; + BTBMGSC::TestAccess::enablePTable(mgsc) = false; + BTBMGSC::TestAccess::enableBiasTable(mgsc) = false; + BTBMGSC::TestAccess::enablePCThreshold(mgsc) = false; + BTBMGSC::TestAccess::forceUseSC(mgsc) = true; + } + + void + setOnlyITable() + { + BTBMGSC::TestAccess::enableBwTable(mgsc) = false; + BTBMGSC::TestAccess::enableLTable(mgsc) = false; + BTBMGSC::TestAccess::enableITable(mgsc) = true; + BTBMGSC::TestAccess::enableGTable(mgsc) = false; + BTBMGSC::TestAccess::enablePTable(mgsc) = false; + BTBMGSC::TestAccess::enableBiasTable(mgsc) = false; + BTBMGSC::TestAccess::enablePCThreshold(mgsc) = false; + BTBMGSC::TestAccess::forceUseSC(mgsc) = true; + } + + void + setOnlyBiasTable() + { + BTBMGSC::TestAccess::enableBwTable(mgsc) = false; + BTBMGSC::TestAccess::enableLTable(mgsc) = false; + BTBMGSC::TestAccess::enableITable(mgsc) = false; + BTBMGSC::TestAccess::enableGTable(mgsc) = false; + BTBMGSC::TestAccess::enablePTable(mgsc) = false; + BTBMGSC::TestAccess::enableBiasTable(mgsc) = true; + BTBMGSC::TestAccess::enablePCThreshold(mgsc) = false; + BTBMGSC::TestAccess::forceUseSC(mgsc) = true; + } + + void + setOnlyLTable() + { + BTBMGSC::TestAccess::enableBwTable(mgsc) = false; + BTBMGSC::TestAccess::enableLTable(mgsc) = true; + BTBMGSC::TestAccess::enableITable(mgsc) = false; + BTBMGSC::TestAccess::enableGTable(mgsc) = false; + BTBMGSC::TestAccess::enablePTable(mgsc) = false; + BTBMGSC::TestAccess::enableBiasTable(mgsc) = false; + BTBMGSC::TestAccess::enablePCThreshold(mgsc) = false; + BTBMGSC::TestAccess::forceUseSC(mgsc) = true; + } + + void + setOnlyPTable() + { + BTBMGSC::TestAccess::enableBwTable(mgsc) = false; + BTBMGSC::TestAccess::enableLTable(mgsc) = false; + BTBMGSC::TestAccess::enableITable(mgsc) = false; + BTBMGSC::TestAccess::enableGTable(mgsc) = false; + BTBMGSC::TestAccess::enablePTable(mgsc) = true; + BTBMGSC::TestAccess::enableBiasTable(mgsc) = false; + BTBMGSC::TestAccess::enablePCThreshold(mgsc) = false; + BTBMGSC::TestAccess::forceUseSC(mgsc) = true; + } + + struct StepResult + { + bool predicted_taken{false}; + BTBMGSC::MgscPrediction mgsc_pred{}; + }; + + StepResult + step(Addr start_pc, const BTBEntry &entry, const TageInfoForMGSC &tage_info, bool actual_taken) + { + for (auto &pred : stage_preds) { + pred.bbStart = start_pc; + pred.btbEntries = {entry}; + pred.tageInfoForMgscs.clear(); + pred.tageInfoForMgscs[entry.pc] = tage_info; + } + + // Prediction + boost::dynamic_bitset<> ghr_before = ghr; + boost::dynamic_bitset<> phr_before = phr; + boost::dynamic_bitset<> bwhr_before = bwhr; + auto lhr_before = lhr; + + mgsc.putPCHistory(start_pc, ghr, stage_preds); + auto meta = mgsc.getPredictionMeta(); + + auto [found, pred_taken] = findCondTaken(stage_preds[1].condTakens, entry.pc); + EXPECT_TRUE(found); + + // Snapshot per-branch MGSC prediction info (indexes/percsums/thresholds). + StepResult result; + result.predicted_taken = pred_taken; + { + const auto &preds = BTBMGSC::TestAccess::preds(mgsc); + auto it = preds.find(entry.pc); + EXPECT_NE(it, preds.end()); + if (it != preds.end()) { + result.mgsc_pred = it->second; + } + } + + // Speculative folded-history update (use pre-update histories, like DecoupledBPUWithBTB does). + mgsc.specUpdateHist(ghr_before, stage_preds[1]); + mgsc.specUpdatePHist(phr_before, stage_preds[1]); + mgsc.specUpdateBwHist(bwhr_before, stage_preds[1]); + mgsc.specUpdateIHist(stage_preds[1]); + mgsc.specUpdateLHist(lhr_before, stage_preds[1]); + + // Speculative external history update using predicted outcome. + int shamt; + bool cond_taken; + std::tie(shamt, cond_taken) = stage_preds[1].getHistInfo(); + histShiftIn(shamt, cond_taken, ghr); + + int bw_shamt; + bool bw_taken; + std::tie(bw_shamt, bw_taken) = stage_preds[1].getBwHistInfo(); + histShiftIn(bw_shamt, bw_taken, bwhr); + + auto [p_pc, p_target, p_taken] = stage_preds[1].getPHistInfo(); + pHistShiftIn(2, p_taken, phr, p_pc, p_target); + + unsigned lhr_idx = + mgsc.getPcIndex(stage_preds[1].bbStart, log2(mgsc.getNumEntriesFirstLocalHistories())); + histShiftIn(shamt, cond_taken, lhr[lhr_idx]); + + // std::string buf; + // boost::to_string(lhr[lhr_idx], buf); + // std::cout << "lhr_idx: " << lhr_idx << ", lhr: " << buf.c_str() << std::endl; + + + // If mispredicted, recover folded histories and external histories with actual outcome. + if (pred_taken != actual_taken) { + ghr = ghr_before; + phr = phr_before; + bwhr = bwhr_before; + lhr = lhr_before; + + FetchStream recover_stream; + recover_stream.startPC = start_pc; + recover_stream.predMetas[mgsc.getComponentIdx()] = meta; + recover_stream.resolved = true; + recover_stream.exeBranchInfo = entry; + recover_stream.exeTaken = actual_taken; + + mgsc.recoverHist(ghr, recover_stream, shamt, actual_taken); + mgsc.recoverPHist(phr, recover_stream, 2, actual_taken); + + bool actual_bw_taken = actual_taken && (entry.target < entry.pc); + mgsc.recoverBwHist(bwhr, recover_stream, bw_shamt, actual_bw_taken); + mgsc.recoverIHist(recover_stream, bw_shamt, actual_bw_taken); + mgsc.recoverLHist(lhr, recover_stream, shamt, actual_taken); + + // Apply correct external history update. + histShiftIn(shamt, actual_taken, ghr); + histShiftIn(bw_shamt, actual_bw_taken, bwhr); + pHistShiftIn(2, actual_taken, phr, entry.pc, entry.target); + histShiftIn(shamt, actual_taken, lhr[lhr_idx]); + } + + // Training update using prediction meta + FetchStream update_stream; + update_stream.startPC = start_pc; + update_stream.updateBTBEntries = {entry}; + update_stream.updateIsOldEntry = true; + update_stream.resolved = true; + update_stream.exeBranchInfo = entry; + update_stream.exeTaken = actual_taken; + update_stream.predMetas[mgsc.getComponentIdx()] = meta; + mgsc.update(update_stream); + + return result; + } +}; + +} // namespace + +TEST(BTBMGSCTest, CanConstructAndCreateMetaOnEmptyInput) +{ + BTBMGSC mgsc; + + Addr start_pc = 0x1000; + boost::dynamic_bitset<> history(64, 0); + std::vector stage_preds(2); + for (auto &pred : stage_preds) { + pred.bbStart = start_pc; + } + + mgsc.putPCHistory(start_pc, history, stage_preds); + + auto meta = mgsc.getPredictionMeta(); + EXPECT_NE(meta, nullptr); + EXPECT_TRUE(stage_preds[0].condTakens.empty()); + EXPECT_TRUE(stage_preds[1].condTakens.empty()); +} + +TEST(BTBMGSCTest, GateHighConfUsesSCWhenStrong) +{ + BTBMGSC mgsc; + Addr start_pc = 0x1000; + Addr branch_pc = 0x1000; + auto entry = makeCondBTBEntry(branch_pc); + + boost::dynamic_bitset<> history(64, 0); + std::vector stage_preds(2); + for (auto &pred : stage_preds) { + pred.bbStart = start_pc; + pred.btbEntries = {entry}; + pred.tageInfoForMgscs[branch_pc] = TageInfoForMGSC( + /*tage_pred_taken=*/false, + /*tage_main_taken=*/false, + /*tage_pred_conf_high=*/true, + /*tage_pred_conf_mid=*/false, + /*tage_pred_conf_low=*/false, + /*tage_pred_alt_diff=*/false); + } + + const auto &tage_info = stage_preds[0].tageInfoForMgscs[branch_pc]; + // total_sum = 9 + 9 + 1 + 1 - 1 - 1 = 18, total_thres = 35 => high-conf uses SC when abs(sum) > thres/2 (=17) + setAllTableCountersForPc(mgsc, start_pc, branch_pc, tage_info, + /*bw=*/4, /*l=*/4, /*i=*/0, /*g=*/0, /*p=*/-1, /*bias=*/-1); + + mgsc.putPCHistory(start_pc, history, stage_preds); + + auto [found, taken] = findCondTaken(stage_preds[1].condTakens, branch_pc); + ASSERT_TRUE(found); + EXPECT_TRUE(taken); // overridden by SC + + const auto &preds = BTBMGSC::TestAccess::preds(mgsc); + auto it = preds.find(branch_pc); + ASSERT_NE(it, preds.end()); + EXPECT_TRUE(it->second.use_mgsc); +} + +TEST(BTBMGSCTest, GateHighConfBypassWhenWeak) +{ + BTBMGSC mgsc; + Addr start_pc = 0x1000; + Addr branch_pc = 0x1000; + auto entry = makeCondBTBEntry(branch_pc); + + boost::dynamic_bitset<> history(64, 0); + std::vector stage_preds(2); + for (auto &pred : stage_preds) { + pred.bbStart = start_pc; + pred.btbEntries = {entry}; + pred.tageInfoForMgscs[branch_pc] = TageInfoForMGSC( + /*tage_pred_taken=*/false, + /*tage_main_taken=*/false, + /*tage_pred_conf_high=*/true, + /*tage_pred_conf_mid=*/false, + /*tage_pred_conf_low=*/false, + /*tage_pred_alt_diff=*/false); + } + + const auto &tage_info = stage_preds[0].tageInfoForMgscs[branch_pc]; + // total_sum = 9 + 9 + 1 + 1 - 1 - 3 = 16, total_thres = 35 => high-conf bypass when abs(sum) <= 17 + setAllTableCountersForPc(mgsc, start_pc, branch_pc, tage_info, + /*bw=*/4, /*l=*/4, /*i=*/0, /*g=*/0, /*p=*/-1, /*bias=*/-2); + + mgsc.putPCHistory(start_pc, history, stage_preds); + + auto [found, taken] = findCondTaken(stage_preds[1].condTakens, branch_pc); + ASSERT_TRUE(found); + EXPECT_FALSE(taken); // fall back to tage_pred_taken + + const auto &preds = BTBMGSC::TestAccess::preds(mgsc); + auto it = preds.find(branch_pc); + ASSERT_NE(it, preds.end()); + EXPECT_FALSE(it->second.use_mgsc); +} + +TEST(BTBMGSCTest, ForceUseSCOverridesTage) +{ + BTBMGSC mgsc; + BTBMGSC::TestAccess::forceUseSC(mgsc) = true; + + Addr start_pc = 0x1000; + Addr branch_pc = 0x1000; + auto entry = makeCondBTBEntry(branch_pc); + + boost::dynamic_bitset<> history(64, 0); + std::vector stage_preds(2); + for (auto &pred : stage_preds) { + pred.bbStart = start_pc; + pred.btbEntries = {entry}; + pred.tageInfoForMgscs[branch_pc] = TageInfoForMGSC( + /*tage_pred_taken=*/true, + /*tage_main_taken=*/true, + /*tage_pred_conf_high=*/true, + /*tage_pred_conf_mid=*/false, + /*tage_pred_conf_low=*/false, + /*tage_pred_alt_diff=*/false); + } + + const auto &tage_info = stage_preds[0].tageInfoForMgscs[branch_pc]; + // total_sum = 1 + 1 + 1 + 1 - 3 - 3 = -2 => forceUseSC makes final pred not-taken. + setAllTableCountersForPc(mgsc, start_pc, branch_pc, tage_info, + /*bw=*/0, /*l=*/0, /*i=*/0, /*g=*/0, /*p=*/-2, /*bias=*/-2); + + mgsc.putPCHistory(start_pc, history, stage_preds); + + auto [found, taken] = findCondTaken(stage_preds[1].condTakens, branch_pc); + ASSERT_TRUE(found); + EXPECT_FALSE(taken); + + const auto &preds = BTBMGSC::TestAccess::preds(mgsc); + auto it = preds.find(branch_pc); + ASSERT_NE(it, preds.end()); + EXPECT_TRUE(it->second.use_mgsc); +} + +TEST(BTBMGSCTest, UpdateOnlyOnWrongOrLowMargin) +{ + BTBMGSC mgsc; + Addr start_pc = 0x1000; + Addr branch_pc = 0x1000; + auto entry = makeCondBTBEntry(branch_pc); + + boost::dynamic_bitset<> history(64, 0); + std::vector stage_preds(2); + for (auto &pred : stage_preds) { + pred.bbStart = start_pc; + pred.btbEntries = {entry}; + pred.tageInfoForMgscs[branch_pc] = TageInfoForMGSC( + /*tage_pred_taken=*/false, + /*tage_main_taken=*/false, + /*tage_pred_conf_high=*/true, + /*tage_pred_conf_mid=*/false, + /*tage_pred_conf_low=*/false, + /*tage_pred_alt_diff=*/false); + } + + const auto &tage_info = stage_preds[0].tageInfoForMgscs[branch_pc]; + // Make a very confident SC prediction (large positive sum). + setAllTableCountersForPc(mgsc, start_pc, branch_pc, tage_info, + /*bw=*/31, /*l=*/31, /*i=*/0, /*g=*/0, /*p=*/0, /*bias=*/0); + + // Prediction + mgsc.putPCHistory(start_pc, history, stage_preds); + auto meta = mgsc.getPredictionMeta(); + + const auto [bw_i1, bw_i2] = + lineLaneForHistIndex(mgsc, start_pc, branch_pc, BTBMGSC::TestAccess::bwTableIdxWidth(mgsc)); + auto &bw_table = BTBMGSC::TestAccess::bwTable(mgsc); + int16_t before = bw_table[0][bw_i1][bw_i2]; + + // Update with correct outcome (taken): should NOT train because abs(sum) >= total_thres and prediction correct. + { + FetchStream stream; + stream.startPC = start_pc; + stream.updateBTBEntries = {entry}; + stream.updateIsOldEntry = true; + stream.resolved = true; + stream.exeBranchInfo = entry; + stream.exeTaken = true; + stream.predMetas[mgsc.getComponentIdx()] = meta; + mgsc.update(stream); + EXPECT_EQ(bw_table[0][bw_i1][bw_i2], before); + } + + // Update with wrong outcome (not taken): should train (decrement signed counter). + { + FetchStream stream; + stream.startPC = start_pc; + stream.updateBTBEntries = {entry}; + stream.updateIsOldEntry = true; + stream.resolved = true; + stream.exeBranchInfo = entry; + stream.exeTaken = false; + stream.predMetas[mgsc.getComponentIdx()] = meta; + mgsc.update(stream); + EXPECT_EQ(bw_table[0][bw_i1][bw_i2], static_cast(before - 1)); + } +} + +TEST(BTBMGSCTest, GTableLearnsAlternatingPattern) +{ + MgscHarness h; + h.setOnlyGTable(); + + const Addr start_pc = 0x1000; + const Addr branch_pc = 0x1000; + auto entry = makeCondBTBEntry(branch_pc); + + // TAGE info is required by MGSC, but forceUseSC makes final decision depend on SC only. + const TageInfoForMGSC tage_info( + /*tage_pred_taken=*/false, + /*tage_main_taken=*/false, + /*tage_pred_conf_high=*/true, + /*tage_pred_conf_mid=*/false, + /*tage_pred_conf_low=*/false, + /*tage_pred_alt_diff=*/false); + + // Alternating pattern: T, N, T, N... This is a classic case where pure bias can't do better than 50%, + // while a global-history indexed table can learn distinct counters for different GHR contexts. + const int iters = 200; + const int warmup = 100; + int correct_after_warmup = 0; + int total_after_warmup = 0; + std::set seen_g_indices; + + for (int i = 0; i < iters; ++i) { + bool actual_taken = (i % 2) == 0; + auto step = h.step(start_pc, entry, tage_info, actual_taken); + + if (!step.mgsc_pred.gIndex.empty()) { + seen_g_indices.insert(step.mgsc_pred.gIndex[0]); + } + + if (i >= warmup) { + total_after_warmup++; + if (step.predicted_taken == actual_taken) { + correct_after_warmup++; + } + } + } + + // Ensure the global table actually observes more than one context (history affects index). + EXPECT_GE(seen_g_indices.size(), 2u); + + // After enough training, accuracy should be noticeably better than a constant predictor (~50%). + double acc = static_cast(correct_after_warmup) / static_cast(total_after_warmup); + EXPECT_GE(acc, 0.80) << "Accuracy too low for alternating pattern: " << acc; +} + +TEST(BTBMGSCTest, BwTableLearnsAlternatingPatternOnBackwardBranches) +{ + MgscHarness h; + h.setOnlyBwTable(); + + const Addr start_pc = 0x1000; + const Addr branch_pc = 0x1000; + auto entry = makeCondBTBEntry(branch_pc); + entry.target = branch_pc - 4; // backward branch so bw_taken == taken + + const TageInfoForMGSC tage_info( + /*tage_pred_taken=*/false, + /*tage_main_taken=*/false, + /*tage_pred_conf_high=*/true, + /*tage_pred_conf_mid=*/false, + /*tage_pred_conf_low=*/false, + /*tage_pred_alt_diff=*/false); + + const int iters = 200; + const int warmup = 100; + int correct_after_warmup = 0; + int total_after_warmup = 0; + std::set seen_bw_indices; + + for (int i = 0; i < iters; ++i) { + bool actual_taken = (i % 2) == 0; + auto step = h.step(start_pc, entry, tage_info, actual_taken); + + if (!step.mgsc_pred.bwIndex.empty()) { + seen_bw_indices.insert(step.mgsc_pred.bwIndex[0]); + } + + if (i >= warmup) { + total_after_warmup++; + if (step.predicted_taken == actual_taken) { + correct_after_warmup++; + } + } + } + + EXPECT_GE(seen_bw_indices.size(), 2u); + double acc = static_cast(correct_after_warmup) / static_cast(total_after_warmup); + EXPECT_GE(acc, 0.80) << "Accuracy too low for alternating backward branches: " << acc; +} + +TEST(BTBMGSCTest, ITableLearnsFixedTripCountLoop) +{ + MgscHarness h; + h.setOnlyITable(); + + const Addr start_pc = 0x1000; + const Addr branch_pc = 0x1000; + auto entry = makeCondBTBEntry(branch_pc); + entry.target = branch_pc - 4; // backward loop branch + + const TageInfoForMGSC tage_info( + /*tage_pred_taken=*/false, + /*tage_main_taken=*/false, + /*tage_pred_conf_high=*/true, + /*tage_pred_conf_mid=*/false, + /*tage_pred_conf_low=*/false, + /*tage_pred_alt_diff=*/false); + + // Loop pattern: T, T, T, N, repeat. IMLI counter (consecutive backward-taken count) should separate phases. + const int iters = 400; + const int warmup = 200; + int correct_after_warmup = 0; + int total_after_warmup = 0; + std::set seen_i_indices; + + for (int i = 0; i < iters; ++i) { + bool actual_taken = (i % 4) != 3; + auto step = h.step(start_pc, entry, tage_info, actual_taken); + + if (!step.mgsc_pred.iIndex.empty()) { + seen_i_indices.insert(step.mgsc_pred.iIndex[0]); + } + + if (i >= warmup) { + total_after_warmup++; + if (step.predicted_taken == actual_taken) { + correct_after_warmup++; + } + } + } + + EXPECT_GE(seen_i_indices.size(), 3u); + double acc = static_cast(correct_after_warmup) / static_cast(total_after_warmup); + EXPECT_GE(acc, 0.85) << "Accuracy too low for fixed-trip loop: " << acc; +} + +TEST(BTBMGSCTest, BiasTableLearnsTwoTageContexts) +{ + MgscHarness h; + h.setOnlyBiasTable(); + + const Addr start_pc = 0x1000; + const Addr branch_pc = 0x1000; + auto entry = makeCondBTBEntry(branch_pc); + + // Two contexts keyed by (tage_main_taken, tage_pred_conf_low). Bias table should learn separate counters. + const TageInfoForMGSC ctx_a( + /*tage_pred_taken=*/false, + /*tage_main_taken=*/false, + /*tage_pred_conf_high=*/false, + /*tage_pred_conf_mid=*/false, + /*tage_pred_conf_low=*/false, + /*tage_pred_alt_diff=*/false); + const TageInfoForMGSC ctx_b( + /*tage_pred_taken=*/false, + /*tage_main_taken=*/true, + /*tage_pred_conf_high=*/false, + /*tage_pred_conf_mid=*/false, + /*tage_pred_conf_low=*/true, + /*tage_pred_alt_diff=*/false); + + const int iters = 200; + const int warmup = 100; + int correct_after_warmup = 0; + int total_after_warmup = 0; + std::set seen_bias_indices; + + for (int i = 0; i < iters; ++i) { + bool use_a = (i % 2) == 0; + const auto &tage_info = use_a ? ctx_a : ctx_b; + bool actual_taken = use_a; // ctx_a => taken, ctx_b => not taken + + auto step = h.step(start_pc, entry, tage_info, actual_taken); + if (!step.mgsc_pred.biasIndex.empty()) { + seen_bias_indices.insert(step.mgsc_pred.biasIndex[0]); + } + + if (i >= warmup) { + total_after_warmup++; + if (step.predicted_taken == actual_taken) { + correct_after_warmup++; + } + } + } + + EXPECT_GE(seen_bias_indices.size(), 2u); + double acc = static_cast(correct_after_warmup) / static_cast(total_after_warmup); + EXPECT_GE(acc, 0.90) << "Accuracy too low for two-context bias learning: " << acc; +} + +TEST(BTBMGSCTest, LTableLearnsTwoIndependentLocalHistories) +{ + MgscHarness h; + h.setOnlyLTable(); + + // Two different fetch-block starts map to different local-history slots. + const Addr start_pc_a = 0x1000; + const Addr branch_pc_a = 0x1000; + auto entry_a = makeCondBTBEntry(branch_pc_a); + + const Addr start_pc_b = 0x1020; // +32B (blockSize) => different `getPcIndex()` with 4 local-history entries + const Addr branch_pc_b = 0x1020; + auto entry_b = makeCondBTBEntry(branch_pc_b); + + const unsigned lhr_bits = log2(h.mgsc.getNumEntriesFirstLocalHistories()); + EXPECT_NE(h.mgsc.getPcIndex(start_pc_a, lhr_bits), h.mgsc.getPcIndex(start_pc_b, lhr_bits)); + + const TageInfoForMGSC tage_info( + /*tage_pred_taken=*/false, + /*tage_main_taken=*/false, + /*tage_pred_conf_high=*/true, + /*tage_pred_conf_mid=*/false, + /*tage_pred_conf_low=*/false, + /*tage_pred_alt_diff=*/false); + + // Interleave two branches with opposite-phase alternating patterns. LTable should learn each one using + // its own local history, despite the interleaving. + const int iters = 300; + const int warmup = 150; + int correct_after_warmup = 0; + int total_after_warmup = 0; + std::set seen_l_indices_a; + std::set seen_l_indices_b; + + for (int i = 0; i < iters; ++i) { + bool actual_taken_a = (i % 2) == 0; + auto step_a = h.step(start_pc_a, entry_a, tage_info, actual_taken_a); + if (!step_a.mgsc_pred.lIndex.empty()) { + seen_l_indices_a.insert(step_a.mgsc_pred.lIndex[0]); + } + + bool actual_taken_b = (i % 2) != 0; + auto step_b = h.step(start_pc_b, entry_b, tage_info, actual_taken_b); + if (!step_b.mgsc_pred.lIndex.empty()) { + seen_l_indices_b.insert(step_b.mgsc_pred.lIndex[0]); + } + + if (i >= warmup) { + total_after_warmup += 2; + correct_after_warmup += (step_a.predicted_taken == actual_taken_a); + correct_after_warmup += (step_b.predicted_taken == actual_taken_b); + } + } + + EXPECT_GE(seen_l_indices_a.size(), 2u); + EXPECT_GE(seen_l_indices_b.size(), 2u); + double acc = static_cast(correct_after_warmup) / static_cast(total_after_warmup); + EXPECT_GE(acc, 0.80) << "Accuracy too low for LTable local-history learning: " << acc; +} + +TEST(BTBMGSCTest, PTableLearnsOutcomeFromPreviousTakenBranchTarget) +{ + MgscHarness h; + h.setOnlyPTable(); + + // A: always taken, but its target alternates => different path-hash contexts. + const Addr start_pc_a = 0x1000; + const Addr branch_pc_a = 0x1000; + auto entry_a = makeCondBTBEntry(branch_pc_a); + const Addr target0 = 0x2000; + const Addr target1 = 0x3000; + + // B: outcome depends on the path context created by A's target. + const Addr start_pc_b = 0x1020; + const Addr branch_pc_b = 0x1020; + auto entry_b = makeCondBTBEntry(branch_pc_b); + + const TageInfoForMGSC tage_info( + /*tage_pred_taken=*/false, + /*tage_main_taken=*/false, + /*tage_pred_conf_high=*/true, + /*tage_pred_conf_mid=*/false, + /*tage_pred_conf_low=*/false, + /*tage_pred_alt_diff=*/false); + + const int iters = 400; + const int warmup = 200; + int correct_after_warmup = 0; + int total_after_warmup = 0; + std::set seen_p_indices_b; + + for (int i = 0; i < iters; ++i) { + bool use_target0 = (i % 2) == 0; + entry_a.target = use_target0 ? target0 : target1; + + // Step A: always taken so path history is updated with its (pc,target) hash. + (void)h.step(start_pc_a, entry_a, tage_info, /*actual_taken=*/true); + + // Step B: taken iff the previous taken branch (A) used target0. + bool actual_taken_b = use_target0; + auto step_b = h.step(start_pc_b, entry_b, tage_info, actual_taken_b); + if (!step_b.mgsc_pred.pIndex.empty()) { + seen_p_indices_b.insert(step_b.mgsc_pred.pIndex[0]); + } + + if (i >= warmup) { + total_after_warmup++; + if (step_b.predicted_taken == actual_taken_b) { + correct_after_warmup++; + } + } + } + + EXPECT_GE(seen_p_indices_b.size(), 2u); + double acc = static_cast(correct_after_warmup) / static_cast(total_after_warmup); + EXPECT_GE(acc, 0.80) << "Accuracy too low for PTable path-target learning: " << acc; +} + +} // namespace test +} // namespace btb_pred +} // namespace branch_prediction +} // namespace gem5 diff --git a/src/cpu/pred/btb/timed_base_pred.cc b/src/cpu/pred/btb/timed_base_pred.cc index 59948d3dd2..96b4514165 100644 --- a/src/cpu/pred/btb/timed_base_pred.cc +++ b/src/cpu/pred/btb/timed_base_pred.cc @@ -12,7 +12,13 @@ namespace btb_pred #ifdef UNIT_TEST namespace test { -TimedBaseBTBPredictor::TimedBaseBTBPredictor() : blockSize(32), predictWidth(64), numDelay(0), resolvedUpdate(false) {} +TimedBaseBTBPredictor::TimedBaseBTBPredictor() + : blockSize(32), + predictWidth(64), + numDelay(0), + resolvedUpdate(false), + enabled(true) +{} } // namespace test #else TimedBaseBTBPredictor::TimedBaseBTBPredictor(const Params &p) diff --git a/src/cpu/pred/btb/timed_base_pred.hh b/src/cpu/pred/btb/timed_base_pred.hh index ea533d3447..880d160f9e 100644 --- a/src/cpu/pred/btb/timed_base_pred.hh +++ b/src/cpu/pred/btb/timed_base_pred.hh @@ -66,12 +66,12 @@ class TimedBaseBTBPredictor: public SimObject virtual void specUpdateHist(const boost::dynamic_bitset<> &history, FullBTBPrediction &pred) {} virtual void specUpdatePHist(const boost::dynamic_bitset<> &history, FullBTBPrediction &pred) {} virtual void specUpdateBwHist(const boost::dynamic_bitset<> &history, FullBTBPrediction &pred) {} - virtual void specUpdateIHist(const boost::dynamic_bitset<> &history, FullBTBPrediction &pred) {} + virtual void specUpdateIHist(FullBTBPrediction &pred) {} virtual void specUpdateLHist(const std::vector> &history, FullBTBPrediction &pred) {} virtual void recoverHist(const boost::dynamic_bitset<> &history, const FetchStream &entry, int shamt, bool cond_taken) {} virtual void recoverPHist(const boost::dynamic_bitset<> &history, const FetchStream &entry, int shamt, bool cond_taken) {} virtual void recoverBwHist(const boost::dynamic_bitset<> &history, const FetchStream &entry, int shamt, bool cond_taken) {} - virtual void recoverIHist(const boost::dynamic_bitset<> &history, const FetchStream &entry, int shamt, bool cond_taken) {} + virtual void recoverIHist(const FetchStream &entry, int shamt, bool cond_taken) {} virtual void recoverLHist(const std::vector> &history, const FetchStream &entry, int shamt, bool cond_taken) {} virtual void update(const FetchStream &entry) {} virtual unsigned getDelay() {return numDelay;}