-
Notifications
You must be signed in to change notification settings - Fork 72
Expand file tree
/
Copy pathidealkmhv3.py
More file actions
146 lines (120 loc) · 4.71 KB
/
idealkmhv3.py
File metadata and controls
146 lines (120 loc) · 4.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import argparse
import sys
import m5
from m5.defines import buildEnv
from m5.objects import *
from m5.util import addToPath, fatal, warn
from m5.util.fdthelper import *
addToPath('../')
from ruby import Ruby
from common.LSQBankConflict import set_lsq_bank_conflict_cache_params
from common.FSConfig import *
from common.SysPaths import *
from common.Benchmarks import *
from common import Simulation
from common.Caches import *
from common.xiangshan import *
def setKmhV3IdealParams(args, system):
for cpu in system.cpu:
# fetch
cpu.mmu.itb.size = 96
cpu.fetchWidth = 32
cpu.iewToFetchDelay = 2 # for resolved update, should train branch after squash
cpu.commitToFetchDelay = 2
cpu.fetchQueueSize = 64
# decode
cpu.decodeWidth = 8
cpu.enable_loadFusion = False
cpu.enableConstantFolding = False
# rename
cpu.renameWidth = 8
cpu.numPhysIntRegs = 224
cpu.numPhysFloatRegs = 256
# dispatch
cpu.enableDispatchStage = True
cpu.numDQEntries = [8, 8, 8]
cpu.dispWidth = [8, 8, 8]
# scheduler
cpu.scheduler = KMHV3Scheduler()
# rob
cpu.commitWidth = 12
cpu.squashWidth = 12
cpu.phyregReleaseWidth = 8
cpu.RobCompressPolicy = 'kmhv3'
cpu.numROBEntries = 160
cpu.CROB_instPerGroup = 2 # 1 if not using ROB compression
# lsu
cpu.StoreWbStage = 4
cpu.EnableLdMissReplay = True
cpu.EnablePipeNukeCheck = True
cpu.BankConflictCheck = True
cpu.sbufferBankWriteAccurately = True
cpu.DcacheSetDivNum = 2
# lsq
cpu.LQEntries = 128
cpu.SQEntries = 64
cpu.RARQEntries = 96
cpu.RAWQEntries = 56
cpu.LoadCompletionWidth = 8
cpu.StoreCompletionWidth = 4
cpu.RARDequeuePerCycle = 4
cpu.RAWDequeuePerCycle = 4
cpu.SbufferEntries = 24
cpu.SbufferEvictThreshold = 16
cpu.store_prefetch_train = False
# branch predictor
if args.bp_type == 'DecoupledBPUWithBTB':
cpu.branchPred.ftq_size = 64
cpu.branchPred.fsq_size = 64
# cpu.branchPred.microtage.enabled = False
# l1 cache per core
if args.caches:
cpu.icache.size = '64kB'
cpu.dcache.size = '64kB'
cpu.dcache.tag_load_read_ports = 100
cpu.dcache.mshrs = 16
cpu.dcache.simulate_dcache_refill = True
set_lsq_bank_conflict_cache_params(cpu, system)
# l2 caches
if args.l2cache:
for i in range(args.num_cpus):
if args.classic_l2:
system.l2_caches[i].slice_num = 0 # 4 -> 0, no slice
else:
l2_wrapper = system.l2_wrappers[i]
l2_wrapper.data_sram_banks = 2
l2_wrapper.dir_sram_banks = 2
l2_wrapper.pipe_dir_write_stage = 4
l2_wrapper.dir_read_bypass = True
for j in range(args.l2_slices):
# Configure XSDRRIP replacement policy (DRRIP mode)
# Each slice: 2MB/4 = 512KB, 8-way, 64B line → 1024 sets
l2_wrapper.slices[j].inner_cache.replacement_policy = XSDRRIPRP(mode=2, num_sets=1024)
system.tol2bus_list[i].forward_latency = 0 # 3->0
system.tol2bus_list[i].response_latency = 0 # 3->0
system.tol2bus_list[i].hint_wakeup_ahead_cycles = 0 # 2->0
# Enable dual-port for DCache → L2 communication
# ReqLayer[0]: ICache+DCache+ITB+DTB → L2, allow 2 requests per cycle
# RespLayer[1]: L2 → DCache, allow 2 responses per cycle
system.tol2bus_list[i].layer_bandwidth_configs = [
LayerBandwidthConfig(direction="req", port_index=0, max_per_cycle=2),
LayerBandwidthConfig(direction="resp", port_index=1, max_per_cycle=2),
]
# l3 cache
if args.l3cache:
system.l3.mshrs = 128
if __name__ == '__m5_main__':
FutureClass = None
args = xiangshan_system_init()
assert not args.external_memory_system
# Set default bp_type based on ideal_kmhv3 flag
# If user didn't specify bp_type, set default based on ideal_kmhv3
args.bp_type = 'DecoupledBPUWithBTB'
args.l2_size = '2MB'
# Match the memories with the CPUs, based on the options for the test system
TestMemClass = Simulation.setMemClass(args)
test_sys = build_xiangshan_system(args)
# Set ideal parameters here with the highest priority, over command-line arguments
setKmhV3IdealParams(args, test_sys)
root = Root(full_system=True, system=test_sys)
Simulation.run_vanilla(args, root, test_sys, FutureClass)