You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
for(intunsigned pe =0; pe <PE_END-PE_BEG; pe++) begin
126
-
dd[D[pe +PE_REM]+:3] = ww[pe];
127
-
aa[D[pe +PE_REM]+3] = ww[pe][3];
174
+
automatic intunsigned ofs =OFFSETS[pe +PE_REM];
175
+
dd[ofs+:3] = ww[pe];
176
+
assert(!NARROW_WEIGHTS|| rst ||!en || zero || (ww[pe] !=-8)) elsebegin
177
+
$warning("%m: Weight of -8 violates NARROW_WEIGHTS commitment.");
178
+
end
179
+
180
+
// The sign of the weights are generally put on the subtracted A port.
181
+
// However, when coinciding with the actual sign bit position of the
182
+
// multiplier input path, it also goes onto the D input. This prevents
183
+
// sign extensions that may happen when a DSP primitive is auto-promoted
184
+
// to a newer generation.
185
+
if(ofs+3==A_WIDTH-1) dd[ofs+3] = ww[pe][3];
186
+
else aa[ofs+3] = ww[pe][3];
128
187
end
129
188
end
130
189
end:blkVectorize
@@ -135,14 +194,15 @@ module mvu_4sx4u #(
135
194
// rst can be only applied to AD and zero only to B
136
195
// with the same effect as zeroing both.
137
196
if(BEHAVIORAL) begin:genBehav
197
+
138
198
// Stage #1: Input Refine
139
199
logicsigned [17:0] B1=0;
140
200
always_ff@(posedge clk) begin
141
201
if(zero) B1<=0;
142
202
elseif(en) B1<= bb;
143
203
end
144
204
145
-
logicsigned [26:0] AD1=0;
205
+
logicsigned [A_WIDTH-1:0] AD1=0;
146
206
always_ff@(posedge clk) begin
147
207
if(rst) AD1<=0;
148
208
elseif(en) AD1<= dd - aa;
@@ -429,14 +489,14 @@ module mvu_4sx4u #(
429
489
X1<= xx;
430
490
X2<=X1;
431
491
foreach(X3[i]) begin
432
-
X3[i] <=X2[i] + (L[3]?2'h0: pp[D[i]+:2]);
492
+
X3[i] <=X2[i] + (L[3]?2'h0: pp[OFFSETS[i]+:2]);
433
493
end
434
494
end
435
495
end
436
496
437
497
// Derive actual cross-lane overflows
438
498
for(genvar i =0; i <3; i++) begin
439
-
assign h3[s][i] = pp[D[i+1]+:2] -X3[i+1];
499
+
assign h3[s][i] = pp[OFFSETS[i+1]+:2] -X3[i+1];
440
500
end
441
501
assign p3[s] = pp;
442
502
@@ -445,48 +505,59 @@ module mvu_4sx4u #(
445
505
// Stage #4: Cross-SIMD Reduction
446
506
447
507
// Count leaves reachable from each node
448
-
localparam leave_load_tLEAVE_LOAD=SIMD>1?init_leave_loads() :'{default:1}; // SIMD=1 requires no adder tree, so zero-ing out, otherwise init_leave_loads ends up in infinite loop
508
+
localparam leave_load_tLEAVE_LOAD=SIMD>1?init_leave_loads() :'{default:1}; // SIMD=1 requires no adder tree, so zero-ing out, otherwise init_leave_loads ends up in infinite loop
0 commit comments