Skip to content

Commit 4163564

Browse files
authored
Fixed o3 memory access interface; Added a merge-sort workload;
2 parents 6b78c3a + 16fd348 commit 4163564

File tree

10 files changed

+622
-70
lines changed

10 files changed

+622
-70
lines changed

.github/workflows/test.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ jobs:
1212
test:
1313
name: Test
1414
runs-on: self-hosted
15+
env:
16+
CARGO_HOME: ${{ github.workspace }}/.cargo-home
1517
steps:
1618
- uses: actions/checkout@v4
1719
with:
@@ -22,6 +24,16 @@ jobs:
2224
toolchain: stable
2325
components: rustfmt, clippy
2426
override: true
27+
28+
- name: Cache Cargo Dependencies
29+
uses: actions/cache@v4
30+
with:
31+
path: |
32+
.cargo-home/registry
33+
.cargo-home/git
34+
key: ${{ runner.os }}-cargo-${{ hashFiles('tools/rust-sim-runtime/Cargo.toml', 'tools/rust-sim-runtime/Cargo.lock') }}
35+
restore-keys: |
36+
${{ runner.os }}-cargo-
2537
2638
- name: Apply Patches
2739
run: |
@@ -34,12 +46,18 @@ jobs:
3446
echo "verilator-hash=$(git rev-parse HEAD:3rd-party/verilator)" >> $GITHUB_OUTPUT
3547
echo "ramulator2-hash=$(git rev-parse HEAD:3rd-party/ramulator2)" >> $GITHUB_OUTPUT
3648
49+
- name: Prefetch Rust Dependencies
50+
run: |
51+
. setup.sh && cargo fetch --manifest-path tools/rust-sim-runtime/Cargo.toml
52+
3753
- name: Build All Components
3854
run: |
3955
echo "Building all components"
4056
. setup.sh && make build-all
4157
4258
- name: Run All Tests
59+
env:
60+
ASSASSYN_CARGO_NET_OFFLINE: "1"
4361
run: |
4462
. setup.sh && make test-all
4563

examples/minor-cpu/src/main.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ def bypass(bypass_reg, bypass_data, idx, value):
242242
with Condition(rd != Bits(5)(0)):
243243
log("own x{:02} |", rd)
244244

245-
return rd, ex_valid ,exec_br_jump,dcache
245+
return rd, ex_valid, exec_br_jump, dcache
246246

247247
class Decoder(Module):
248248

@@ -319,7 +319,7 @@ def build(self,
319319
with Condition(~fetch_valid[0]):
320320
fetch_valid[0] = Bits(1)(1)
321321

322-
should_fetch = (~ on_branch) & (~ br_sm[0] ) & fetch_valid[0]
322+
should_fetch = (~on_branch) & (~br_sm[0]) & fetch_valid[0]
323323

324324

325325
jump_flag = br_jump[0] & br_no_jump[0]
@@ -329,7 +329,7 @@ def build(self,
329329
to_fetch = Bits(32)(0)
330330
#to_fetch = should_fetch.select(pc_addr, to_fetch)
331331
to_fetch = (jump_flag).select(ex_bypass[0].bitcast(Bits(32)), pc_addr)
332-
real_fetch = (should_fetch )& (new_cnt < Int(8)(3))
332+
real_fetch = should_fetch & (new_cnt < Int(8)(3))
333333
log("on_br: {} | br_sm: {} | br_jump: {} | fetch: {} | ex_bypass: 0x{:05x} | ongoing: {} | jump_flag: {}",
334334
on_branch, br_sm[0], br_jump[0], should_fetch, ex_bypass[0], ongoing[0],jump_flag)
335335
icache.build(Bits(1)(0), real_fetch, to_fetch[2:2+depth_log-1].bitcast(Int(depth_log)), Bits(32)(0))
@@ -424,7 +424,6 @@ def build_cpu(depth_log):
424424

425425
wb_bypass_reg = RegArray(bits5, 1)
426426
wb_bypass_data = RegArray(bits32, 1)
427-
428427
exec_br_dest = RegArray(Bits(32), 1)
429428
exec_br_jumped = RegArray(Bits(1), 1)
430429
mem_br_no_jump = RegArray(Bits(1), 1)
@@ -443,7 +442,7 @@ def build_cpu(depth_log):
443442
executor = Execution()
444443
offset_reg = user.build(init_cache.dout)
445444

446-
exec_rd, ex_valid, exec_br_jump,dcache = executor.build(
445+
exec_rd, ex_valid, exec_br_jump, dcache = executor.build(
447446
pc = pc_reg,
448447
exec_bypass_reg = exec_bypass_reg,
449448
exec_bypass_data = exec_bypass_data,
@@ -648,4 +647,4 @@ def init_workspace(base_path, case):
648647
for wl in args:
649648
init_workspace(wl_path, wl)
650649
run_cpu(sys, simulator_binary, verilog_path, wl)
651-
print("Done running user-specified workload(s)!")
650+
print("Done running user-specified workload(s)!")
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{ "offset": 0x100b4, "data_offset": 0x11ec }
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
// Generated from DISC_CPU/src/msort.cpp and DISC_CPU/src/msort_dataset.h.
2+
3+
#include <algorithm>
4+
#include <cstddef>
5+
#include <limits>
6+
7+
#ifndef PREALLOCATE
8+
#define PREALLOCATE 0
9+
#endif
10+
11+
using type = unsigned int;
12+
13+
unsigned int stack_space[256] __asm__("stack_space") __attribute__((aligned(16), used));
14+
15+
extern "C" __attribute__((naked, section(".text.startup"))) void _start(void) {
16+
__asm__ volatile(
17+
"la gp, __global_pointer$\n"
18+
"la sp, stack_space\n"
19+
"addi sp, sp, 1024\n"
20+
"jal ra, main\n"
21+
"ebreak\n"
22+
"1: j 1b\n"
23+
);
24+
}
25+
26+
static inline void setStats(int enable) {
27+
if (enable) {
28+
__asm__ volatile(".global stat_start\nstat_start:");
29+
} else {
30+
__asm__ volatile(".global stat_end\nstat_end:");
31+
}
32+
}
33+
34+
template <typename T>
35+
static inline void printArray(const char name[], int n, const T arr[]) {
36+
(void)name;
37+
(void)n;
38+
(void)arr;
39+
}
40+
41+
template <typename T>
42+
static inline int verify(int n, const volatile T* test, const T* expected) {
43+
for (int i = 0; i < (n / 2) * 2; i += 2) {
44+
T t0 = test[i];
45+
T t1 = test[i + 1];
46+
T e0 = expected[i];
47+
T e1 = expected[i + 1];
48+
if (t0 != e0) {
49+
return i + 1;
50+
}
51+
if (t1 != e1) {
52+
return i + 2;
53+
}
54+
}
55+
if ((n & 1) != 0 && test[n - 1] != expected[n - 1]) {
56+
return n;
57+
}
58+
return 0;
59+
}
60+
61+
static constexpr int DATA_SIZE = 100;
62+
63+
static type input_data[DATA_SIZE] = {
64+
179, 968, 116, 259, 844, 769, 182, 1002, 1011, 856, 392, 36, 383, 959, 527, 275, 512,
65+
874, 851, 592, 238, 608, 930, 457, 0, 234, 563, 168, 844, 513, 886, 730, 767, 159, 743,
66+
657, 970, 139, 518, 686, 272, 222, 940, 569, 492, 393, 304, 70, 766, 148, 363, 478, 236,
67+
841, 480, 258, 321, 262, 110, 192, 602, 351, 855, 125, 105, 136, 996, 687, 27, 26, 527,
68+
531, 576, 826, 567, 469, 391, 537, 388, 759, 325, 819, 744, 668, 69, 1011, 344, 264, 132,
69+
439, 565, 703, 719, 643, 556, 601, 596, 27, 26, 783,
70+
};
71+
72+
static type verify_data[DATA_SIZE] = {
73+
0, 26, 26, 27, 27, 36, 69, 70, 105, 110, 116, 125, 132, 136, 139, 148, 159, 168, 179, 182,
74+
192, 222, 234, 236, 238, 258, 259, 262, 264, 272, 275, 304, 321, 325, 344, 351, 363, 383,
75+
388, 391, 392, 393, 439, 457, 469, 478, 480, 492, 512, 513, 518, 527, 527, 531, 537, 556,
76+
563, 565, 567, 569, 576, 592, 596, 601, 602, 608, 643, 657, 668, 686, 687, 703, 719, 730,
77+
743, 744, 759, 766, 767, 769, 783, 819, 826, 841, 844, 844, 851, 855, 856, 874, 886, 930,
78+
940, 959, 968, 970, 996, 1002, 1011, 1011,
79+
};
80+
81+
static constexpr type kInf = std::numeric_limits<type>::max();
82+
83+
static void sort(std::size_t n, type arr_in[], type scratch_in[]) {
84+
type* a = arr_in;
85+
type* b = scratch_in;
86+
87+
for (std::size_t i = 1; i < n; i <<= 1) {
88+
std::swap(a, b);
89+
90+
for (std::size_t j = 0; j < n; j += (i << 1)) {
91+
std::size_t l_end = std::min(j + i, n);
92+
std::size_t r_end = std::min(j + (i << 1), n);
93+
94+
for (std::size_t l = j, r = l_end, k = j; l < l_end || r < r_end; ++k) {
95+
type v0 = (l < l_end) ? b[l] : kInf;
96+
type v1 = (r < r_end) ? b[r] : kInf;
97+
98+
if (v0 <= v1) {
99+
a[k] = v0;
100+
++l;
101+
} else {
102+
a[k] = v1;
103+
++r;
104+
}
105+
}
106+
}
107+
}
108+
109+
if (a != arr_in) {
110+
for (std::size_t i = 0; i < n; ++i) {
111+
arr_in[i] = a[i];
112+
}
113+
}
114+
}
115+
116+
extern "C" int main() {
117+
static type scratch[DATA_SIZE];
118+
119+
printArray("input", DATA_SIZE, input_data);
120+
printArray("verify", DATA_SIZE, verify_data);
121+
122+
#if PREALLOCATE
123+
sort(DATA_SIZE, verify_data, scratch);
124+
if (verify(DATA_SIZE, input_data, input_data)) {
125+
return 1;
126+
}
127+
#endif
128+
129+
setStats(1);
130+
sort(DATA_SIZE, input_data, scratch);
131+
setStats(0);
132+
133+
printArray("test", DATA_SIZE, input_data);
134+
return verify(DATA_SIZE, input_data, verify_data);
135+
}

0 commit comments

Comments
 (0)