-
Notifications
You must be signed in to change notification settings - Fork 14
Expand file tree
/
Copy pathVectorFloatFMA.scala
More file actions
1942 lines (1844 loc) · 131 KB
/
VectorFloatFMA.scala
File metadata and controls
1942 lines (1844 loc) · 131 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
package yunsuan.vector
import chisel3._
import chisel3.util._
import scala.collection.mutable.ListBuffer
import yunsuan.VfmaOpCode
import yunsuan.util._
class VectorFloatFMA() extends Module{
val exponentWidth : Int = 11
val significandWidth : Int = 53
val floatWidth = exponentWidth + significandWidth
val io = IO(new Bundle() {
val fire = Input (Bool())
val fp_a, fp_b, fp_c = Input (UInt(floatWidth.W)) // fp_a->VS2,fp_b->VS1,fp_c->VD
val uop_idx = Input (Bool())
val widen_a = Input (UInt(floatWidth.W)) // widen_a -> Cat(vs2(95,64),vs2(31,0)) or Cat(vs2(127,96),vs2(63,32))
val widen_b = Input (UInt(floatWidth.W)) // widen_b -> Cat(vs1(95,64),vs1(31,0)) or Cat(vs1(127,96),vs1(63,32))
val round_mode = Input (UInt(3.W))
val fp_format = Input (UInt(2.W)) // result format b01->fp16,b10->fp32,b11->fp64
val op_code = Input (UInt(4.W))
val frs1 = Input (UInt(64.W))
val is_vec = Input (Bool())
val is_frs1 = Input (Bool())
val res_widening = Input (Bool())
val fp_result = Output(UInt(floatWidth.W))
val fflags = Output(UInt(20.W))
val fp_aIsFpCanonicalNAN = Input(Bool())
val fp_bIsFpCanonicalNAN = Input(Bool())
val fp_cIsFpCanonicalNAN = Input(Bool())
})
val printfen: Boolean = false
def shiftLeftWithMux(srcValue: UInt, shiftValue: UInt): UInt = {
val vecLength = shiftValue.getWidth + 1
val res_vec = Wire(Vec(vecLength,UInt(srcValue.getWidth.W)))
res_vec(0) := srcValue
for (i <- 0 until shiftValue.getWidth) {
res_vec(i+1) := Mux(shiftValue(shiftValue.getWidth-1-i), res_vec(i) << (1<<(shiftValue.getWidth-1-i)), res_vec(i))
}
res_vec(vecLength-1)
}
def shiftRightWithMuxSticky(srcValue: UInt, shiftValue: UInt): UInt = {
val vecLength = shiftValue.getWidth + 1
val res_vec = Wire(Vec(vecLength,UInt(srcValue.getWidth.W)))
val sticky_vec = Wire(Vec(vecLength,UInt(1.W)))
res_vec(0) := srcValue
sticky_vec(0) := 0.U
for (i <- 0 until shiftValue.getWidth) {
res_vec(i+1) := Mux(shiftValue(i), res_vec(i) >> (1<<i), res_vec(i))
sticky_vec(i+1) := Mux(shiftValue(i), sticky_vec(i) | res_vec(i)((1<<i)-1,0).orR, sticky_vec(i))
}
Cat(res_vec(vecLength-1),sticky_vec(vecLength-1))
}
def printfEn(pable: Printable): Unit ={
if (printfen) printf(pable)
}
val fire = io.fire
val fire_reg0 = GatedValidRegNext(fire)
val fire_reg1 = GatedValidRegNext(fire_reg0)
val is_vfmul = io.op_code === VfmaOpCode.vfmul
val is_vfmacc = io.op_code === VfmaOpCode.vfmacc
val is_vfnmacc = io.op_code === VfmaOpCode.vfnmacc
val is_vfmsac = io.op_code === VfmaOpCode.vfmsac
val is_vfnmsac = io.op_code === VfmaOpCode.vfnmsac
val is_vfmadd = io.op_code === VfmaOpCode.vfmadd
val is_vfnmadd = io.op_code === VfmaOpCode.vfnmadd
val is_vfmsub = io.op_code === VfmaOpCode.vfmsub
val is_vfnmsub = io.op_code === VfmaOpCode.vfnmsub
val is_fp64 = io.fp_format === 3.U(2.W)
val is_fp64_reg0 = RegEnable(is_fp64, fire)
val is_fp64_reg1 = RegEnable(is_fp64_reg0, fire_reg0)
val is_fp64_reg2 = RegEnable(is_fp64_reg1, fire_reg1)
val is_fp32 = io.fp_format === 2.U(2.W)
val is_fp32_reg0 = RegEnable(is_fp32, fire)
val is_fp32_reg1 = RegEnable(is_fp32_reg0, fire_reg0)
val is_fp32_reg2 = RegEnable(is_fp32_reg1, fire_reg1)
def sign_inv(src: UInt,sel:Bool): UInt = {
Cat(Mux(sel,~src.head(1),src.head(1)),src.tail(1))
}
val fp_a_is_sign_inv = is_vfnmacc || is_vfnmsac || is_vfnmadd || is_vfnmsub
val fp_c_is_sign_inv = is_vfnmacc || is_vfmsac || is_vfnmadd || is_vfmsub
val swap_fp_a_fp_c = is_vfmadd || is_vfnmadd || is_vfmsub || is_vfnmsub
val fp_a_f64 = sign_inv(Mux(swap_fp_a_fp_c,io.fp_c(63,0),io.fp_a(63,0)),fp_a_is_sign_inv)
val fp_b_f64 = Mux(io.is_frs1,io.frs1,io.fp_b(63,0))
val fp_c_f64 = Mux(is_vfmul,0.U(64.W),sign_inv(Mux(swap_fp_a_fp_c,io.fp_a(63,0),io.fp_c(63,0)),fp_c_is_sign_inv))
val fp_a_f32_0 = sign_inv(Mux(swap_fp_a_fp_c,io.fp_c(31,0 ),io.fp_a(31,0 )),fp_a_is_sign_inv)
val fp_a_f32_1 = sign_inv(Mux(swap_fp_a_fp_c,io.fp_c(63,32),io.fp_a(63,32)),fp_a_is_sign_inv)
val fp_b_f32_0 = Mux(io.is_frs1,io.frs1(31,0),io.fp_b(31,0 ))
val fp_b_f32_1 = Mux(io.is_frs1,io.frs1(31,0),io.fp_b(63,32))
val fp_c_f32_0 = Mux(is_vfmul,0.U(32.W),sign_inv(Mux(swap_fp_a_fp_c,io.fp_a(31,0 ),io.fp_c(31,0 )),fp_c_is_sign_inv))
val fp_c_f32_1 = Mux(is_vfmul,0.U(32.W),sign_inv(Mux(swap_fp_a_fp_c,io.fp_a(63,32),io.fp_c(63,32)),fp_c_is_sign_inv))
val fp_a_f16_0 = sign_inv(Mux(swap_fp_a_fp_c,io.fp_c(15,0 ),io.fp_a(15,0 )),fp_a_is_sign_inv)
val fp_a_f16_1 = sign_inv(Mux(swap_fp_a_fp_c,io.fp_c(31,16),io.fp_a(31,16)),fp_a_is_sign_inv)
val fp_a_f16_2 = sign_inv(Mux(swap_fp_a_fp_c,io.fp_c(47,32),io.fp_a(47,32)),fp_a_is_sign_inv)
val fp_a_f16_3 = sign_inv(Mux(swap_fp_a_fp_c,io.fp_c(63,48),io.fp_a(63,48)),fp_a_is_sign_inv)
val fp_b_f16_0 = Mux(io.is_frs1,io.frs1(15,0),io.fp_b(15,0 ))
val fp_b_f16_1 = Mux(io.is_frs1,io.frs1(15,0),io.fp_b(31,16))
val fp_b_f16_2 = Mux(io.is_frs1,io.frs1(15,0),io.fp_b(47,32))
val fp_b_f16_3 = Mux(io.is_frs1,io.frs1(15,0),io.fp_b(63,48))
val fp_c_f16_0 = Mux(is_vfmul,0.U(16.W),sign_inv(Mux(swap_fp_a_fp_c,io.fp_a(15,0 ),io.fp_c(15,0 )),fp_c_is_sign_inv))
val fp_c_f16_1 = Mux(is_vfmul,0.U(16.W),sign_inv(Mux(swap_fp_a_fp_c,io.fp_a(31,16),io.fp_c(31,16)),fp_c_is_sign_inv))
val fp_c_f16_2 = Mux(is_vfmul,0.U(16.W),sign_inv(Mux(swap_fp_a_fp_c,io.fp_a(47,32),io.fp_c(47,32)),fp_c_is_sign_inv))
val fp_c_f16_3 = Mux(is_vfmul,0.U(16.W),sign_inv(Mux(swap_fp_a_fp_c,io.fp_a(63,48),io.fp_c(63,48)),fp_c_is_sign_inv))
val sign_a_b_f16_0 = (fp_a_f16_0.head(1) ^ fp_b_f16_0.head(1)).asBool
val sign_a_b_f16_1 = (fp_a_f16_1.head(1) ^ fp_b_f16_1.head(1)).asBool
val sign_a_b_f16_2 = (fp_a_f16_2.head(1) ^ fp_b_f16_2.head(1)).asBool
val sign_a_b_f16_3 = (fp_a_f16_3.head(1) ^ fp_b_f16_3.head(1)).asBool
val widen_a_is_sign_inv = is_vfnmacc || is_vfnmsac
val widen_a_f16_0 = sign_inv(Mux(io.uop_idx,io.widen_a(47,32),io.widen_a(15,0)),widen_a_is_sign_inv)
val widen_b_f16_0 = Mux(io.is_frs1,io.frs1(15,0),Mux(io.uop_idx,io.widen_b(47,32),io.widen_b(15,0)))
val widen_a_f16_1 = sign_inv(Mux(io.uop_idx,io.widen_a(63,48),io.widen_a(31,16)),widen_a_is_sign_inv)
val widen_b_f16_1 = Mux(io.is_frs1,io.frs1(15,0),Mux(io.uop_idx,io.widen_b(63,48),io.widen_b(31,16)))
val widen_a_f32_0 = sign_inv(Mux(io.uop_idx,io.widen_a(63,32),io.widen_a(31,0)),widen_a_is_sign_inv)
val widen_b_f32_0 = Mux(io.is_frs1,io.frs1(31,0),Mux(io.uop_idx,io.widen_b(63,32),io.widen_b(31,0)))
val widen_sign_a_b_f16_0 = (widen_a_f16_0.head(1) ^ widen_b_f16_0.head(1)).asBool
val widen_sign_a_b_f16_1 = (widen_a_f16_1.head(1) ^ widen_b_f16_1.head(1)).asBool
val widen_sign_a_b_f32_0 = (widen_a_f32_0.head(1) ^ widen_b_f32_0.head(1)).asBool
val sign_a_b_f32_0 = Mux(io.res_widening & is_fp32,widen_sign_a_b_f16_0,(fp_a_f32_0.head(1) ^ fp_b_f32_0.head(1)).asBool)
val sign_a_b_f32_1 = Mux(io.res_widening & is_fp32,widen_sign_a_b_f16_1,(fp_a_f32_1.head(1) ^ fp_b_f32_1.head(1)).asBool)
val sign_a_b_f64 = Mux(io.res_widening & is_fp64,widen_sign_a_b_f32_0,(fp_a_f64(63) ^ fp_b_f64(63)).asBool)
val sign_c_f64 = fp_c_f64(63).asBool
val sign_c_f32_0 = fp_c_f32_0.head(1).asBool
val sign_c_f32_1 = fp_c_f32_1.head(1).asBool
val sign_c_f16_0 = fp_c_f16_0.head(1).asBool
val sign_c_f16_1 = fp_c_f16_1.head(1).asBool
val sign_c_f16_2 = fp_c_f16_2.head(1).asBool
val sign_c_f16_3 = fp_c_f16_3.head(1).asBool
val is_sub_f64 = sign_a_b_f64 ^ sign_c_f64
val is_sub_f64_reg0 = RegEnable(is_sub_f64, fire)
val is_sub_f64_reg1 = RegEnable(is_sub_f64_reg0, fire_reg0)
val is_sub_f64_reg2 = RegEnable(is_sub_f64_reg1, fire_reg1)
val is_sub_f32_0 = sign_a_b_f32_0 ^ sign_c_f32_0
val is_sub_f32_1 = sign_a_b_f32_1 ^ sign_c_f32_1
val is_sub_f16_0 = sign_a_b_f16_0 ^ sign_c_f16_0
val is_sub_f16_1 = sign_a_b_f16_1 ^ sign_c_f16_1
val is_sub_f16_2 = sign_a_b_f16_2 ^ sign_c_f16_2
val is_sub_f16_3 = sign_a_b_f16_3 ^ sign_c_f16_3
val Ea_f64 = fp_a_f64.tail(1).head(exponentWidth)
val Eb_f64 = fp_b_f64.tail(1).head(exponentWidth)
val Ec_f64 = fp_c_f64.tail(1).head(exponentWidth)
val Ea_f32_0 = fp_a_f32_0(30,23)
val Eb_f32_0 = fp_b_f32_0(30,23)
val Ec_f32_0 = fp_c_f32_0(30,23)
val Ea_f32_1 = fp_a_f32_1(30,23)
val Eb_f32_1 = fp_b_f32_1(30,23)
val Ec_f32_1 = fp_c_f32_1(30,23)
val Ea_f16_0 = fp_a_f16_0(14,10)
val Eb_f16_0 = fp_b_f16_0(14,10)
val Ec_f16_0 = fp_c_f16_0(14,10)
val Ea_f16_1 = fp_a_f16_1(14,10)
val Eb_f16_1 = fp_b_f16_1(14,10)
val Ec_f16_1 = fp_c_f16_1(14,10)
val Ea_f16_2 = fp_a_f16_2(14,10)
val Eb_f16_2 = fp_b_f16_2(14,10)
val Ec_f16_2 = fp_c_f16_2(14,10)
val Ea_f16_3 = fp_a_f16_3(14,10)
val Eb_f16_3 = fp_b_f16_3(14,10)
val Ec_f16_3 = fp_c_f16_3(14,10)
val widen_Ea_f16_0 = widen_a_f16_0(14,10)
val widen_Eb_f16_0 = widen_b_f16_0(14,10)
val widen_Ea_f16_1 = widen_a_f16_1(14,10)
val widen_Eb_f16_1 = widen_b_f16_1(14,10)
val widen_Ea_f32_0 = widen_a_f32_0(30,23)
val widen_Eb_f32_0 = widen_b_f32_0(30,23)
val Ea_f64_is_not_zero = Ea_f64.orR
val Eb_f64_is_not_zero = Eb_f64.orR
val Ec_f64_is_not_zero = Ec_f64.orR
val Ea_f32_0_is_not_zero = Ea_f32_0.orR
val Eb_f32_0_is_not_zero = Eb_f32_0.orR
val Ec_f32_0_is_not_zero = Ec_f32_0.orR
val Ea_f32_1_is_not_zero = Ea_f32_1.orR
val Eb_f32_1_is_not_zero = Eb_f32_1.orR
val Ec_f32_1_is_not_zero = Ec_f32_1.orR
val Ea_f16_0_is_not_zero = Ea_f16_0.orR
val Eb_f16_0_is_not_zero = Eb_f16_0.orR
val Ec_f16_0_is_not_zero = Ec_f16_0.orR
val Ea_f16_1_is_not_zero = Ea_f16_1.orR
val Eb_f16_1_is_not_zero = Eb_f16_1.orR
val Ec_f16_1_is_not_zero = Ec_f16_1.orR
val Ea_f16_2_is_not_zero = Ea_f16_2.orR
val Eb_f16_2_is_not_zero = Eb_f16_2.orR
val Ec_f16_2_is_not_zero = Ec_f16_2.orR
val Ea_f16_3_is_not_zero = Ea_f16_3.orR
val Eb_f16_3_is_not_zero = Eb_f16_3.orR
val Ec_f16_3_is_not_zero = Ec_f16_3.orR
val fp_a_significand_f16_0 = Cat(Ea_f16_0_is_not_zero,fp_a_f16_0(9,0))
val fp_b_significand_f16_0 = Cat(Eb_f16_0_is_not_zero,fp_b_f16_0(9,0))
val fp_c_significand_f16_0 = Cat(Ec_f16_0_is_not_zero,fp_c_f16_0(9,0))
val fp_a_significand_f16_1 = Cat(Ea_f16_1_is_not_zero,fp_a_f16_1(9,0))
val fp_b_significand_f16_1 = Cat(Eb_f16_1_is_not_zero,fp_b_f16_1(9,0))
val fp_c_significand_f16_1 = Cat(Ec_f16_1_is_not_zero,fp_c_f16_1(9,0))
val fp_a_significand_f16_2 = Cat(Ea_f16_2_is_not_zero,fp_a_f16_2(9,0))
val fp_b_significand_f16_2 = Cat(Eb_f16_2_is_not_zero,fp_b_f16_2(9,0))
val fp_c_significand_f16_2 = Cat(Ec_f16_2_is_not_zero,fp_c_f16_2(9,0))
val fp_a_significand_f16_3 = Cat(Ea_f16_3_is_not_zero,fp_a_f16_3(9,0))
val fp_b_significand_f16_3 = Cat(Eb_f16_3_is_not_zero,fp_b_f16_3(9,0))
val fp_c_significand_f16_3 = Cat(Ec_f16_3_is_not_zero,fp_c_f16_3(9,0))
val widen_a_significand_f16_0 = Cat(widen_a_f16_0(14,10).orR,widen_a_f16_0(9,0))
val widen_b_significand_f16_0 = Cat(widen_b_f16_0(14,10).orR,widen_b_f16_0(9,0))
val widen_a_significand_f16_1 = Cat(widen_a_f16_1(14,10).orR,widen_a_f16_1(9,0))
val widen_b_significand_f16_1 = Cat(widen_b_f16_1(14,10).orR,widen_b_f16_1(9,0))
val widen_a_significand_f32_0 = Cat(widen_a_f32_0(30,23).orR,widen_a_f32_0(22,0))
val widen_b_significand_f32_0 = Cat(widen_b_f32_0(30,23).orR,widen_b_f32_0(22,0))
val fp_a_significand_f32_0 = Mux(io.res_widening & is_fp32,Cat(widen_a_significand_f16_0,0.U(13.W)),Cat(Ea_f32_0_is_not_zero,fp_a_f32_0(22,0)))
val fp_b_significand_f32_0 = Mux(io.res_widening & is_fp32,Cat(widen_b_significand_f16_0,0.U(13.W)),Cat(Eb_f32_0_is_not_zero,fp_b_f32_0(22,0)))
val fp_c_significand_f32_0 = Cat(Ec_f32_0_is_not_zero,fp_c_f32_0(22,0))
val fp_a_significand_f32_1 = Mux(io.res_widening & is_fp32,Cat(widen_a_significand_f16_1,0.U(13.W)),Cat(Ea_f32_1_is_not_zero,fp_a_f32_1(22,0)))
val fp_b_significand_f32_1 = Mux(io.res_widening & is_fp32,Cat(widen_b_significand_f16_1,0.U(13.W)),Cat(Eb_f32_1_is_not_zero,fp_b_f32_1(22,0)))
val fp_c_significand_f32_1 = Cat(Ec_f32_1_is_not_zero,fp_c_f32_1(22,0))
val fp_a_significand_f64 = Mux(io.res_widening & is_fp64,Cat(widen_a_significand_f32_0,0.U(29.W)),Cat(Ea_f64_is_not_zero,fp_a_f64.tail(exponentWidth+1)))
val fp_b_significand_f64 = Mux(io.res_widening & is_fp64,Cat(widen_b_significand_f32_0,0.U(29.W)),Cat(Eb_f64_is_not_zero,fp_b_f64.tail(exponentWidth+1)))
val fp_c_significand_f64 = Cat(Ec_f64_is_not_zero,fp_c_f64.tail(exponentWidth+1))
val rshiftBasicF64 = significandWidth + 3
val rshiftMaxF64 = 3*significandWidth + 4
val rshiftBasicF32 = 24 + 3
val rshiftMaxF32 = 3*24 + 4
val rshiftBasicF16 = 11 + 3
val rshiftMaxF16 = 3*11 + 4
val Ea_fix_f64 = Cat(Ea_f64.head(exponentWidth-1),!Ea_f64_is_not_zero | Ea_f64(0))
val Eb_fix_f64 = Cat(Eb_f64.head(exponentWidth-1),!Eb_f64_is_not_zero | Eb_f64(0))
val Ec_fix_f64 = Cat(Ec_f64.head(exponentWidth-1),!Ec_f64_is_not_zero | Ec_f64(0))
val Ea_fix_f32_0 = Cat(Ea_f32_0.head(8-1),!Ea_f32_0_is_not_zero | Ea_f32_0(0))
val Eb_fix_f32_0 = Cat(Eb_f32_0.head(8-1),!Eb_f32_0_is_not_zero | Eb_f32_0(0))
val Ec_fix_f32_0 = Cat(Ec_f32_0.head(8-1),!Ec_f32_0_is_not_zero | Ec_f32_0(0))
val Ea_fix_f32_1 = Cat(Ea_f32_1.head(8-1),!Ea_f32_1_is_not_zero | Ea_f32_1(0))
val Eb_fix_f32_1 = Cat(Eb_f32_1.head(8-1),!Eb_f32_1_is_not_zero | Eb_f32_1(0))
val Ec_fix_f32_1 = Cat(Ec_f32_1.head(8-1),!Ec_f32_1_is_not_zero | Ec_f32_1(0))
val Ea_fix_f16_0 = Cat(Ea_f16_0.head(5-1),!Ea_f16_0_is_not_zero | Ea_f16_0(0))
val Eb_fix_f16_0 = Cat(Eb_f16_0.head(5-1),!Eb_f16_0_is_not_zero | Eb_f16_0(0))
val Ec_fix_f16_0 = Cat(Ec_f16_0.head(5-1),!Ec_f16_0_is_not_zero | Ec_f16_0(0))
val Ea_fix_f16_1 = Cat(Ea_f16_1.head(5-1),!Ea_f16_1_is_not_zero | Ea_f16_1(0))
val Eb_fix_f16_1 = Cat(Eb_f16_1.head(5-1),!Eb_f16_1_is_not_zero | Eb_f16_1(0))
val Ec_fix_f16_1 = Cat(Ec_f16_1.head(5-1),!Ec_f16_1_is_not_zero | Ec_f16_1(0))
val Ea_fix_f16_2 = Cat(Ea_f16_2.head(5-1),!Ea_f16_2_is_not_zero | Ea_f16_2(0))
val Eb_fix_f16_2 = Cat(Eb_f16_2.head(5-1),!Eb_f16_2_is_not_zero | Eb_f16_2(0))
val Ec_fix_f16_2 = Cat(Ec_f16_2.head(5-1),!Ec_f16_2_is_not_zero | Ec_f16_2(0))
val Ea_fix_f16_3 = Cat(Ea_f16_3.head(5-1),!Ea_f16_3_is_not_zero | Ea_f16_3(0))
val Eb_fix_f16_3 = Cat(Eb_f16_3.head(5-1),!Eb_f16_3_is_not_zero | Eb_f16_3(0))
val Ec_fix_f16_3 = Cat(Ec_f16_3.head(5-1),!Ec_f16_3_is_not_zero | Ec_f16_3(0))
val widen_Ea_fix_f16_0 = Cat(widen_Ea_f16_0.head(4), (!widen_Ea_f16_0.orR) | widen_Ea_f16_0(0))
val widen_Eb_fix_f16_0 = Cat(widen_Eb_f16_0.head(4), (!widen_Eb_f16_0.orR) | widen_Eb_f16_0(0))
val widen_Ea_fix_f16_1 = Cat(widen_Ea_f16_1.head(4), (!widen_Ea_f16_1.orR) | widen_Ea_f16_1(0))
val widen_Eb_fix_f16_1 = Cat(widen_Eb_f16_1.head(4), (!widen_Eb_f16_1.orR) | widen_Eb_f16_1(0))
val widen_Ea_fix_f32_0 = Cat(widen_Ea_f32_0.head(7), (!widen_Ea_f32_0.orR) | widen_Ea_f32_0(0))
val widen_Eb_fix_f32_0 = Cat(widen_Eb_f32_0.head(7), (!widen_Eb_f32_0.orR) | widen_Eb_f32_0(0))
val biasF64 = (1 << (exponentWidth-1)) - 1
val biasF32 = (1 << (8-1)) - 1
val biasF16 = (1 << (5-1)) - 1
val Ea_fix_f64_widening = Mux(io.res_widening & is_fp64, Cat(widen_Ea_fix_f32_0.head(1),Fill(3,(~widen_Ea_fix_f32_0.head(1)).asUInt),widen_Ea_fix_f32_0(6,0)), Ea_fix_f64)
val Eb_fix_f64_widening = Mux(io.res_widening & is_fp64, Cat(widen_Eb_fix_f32_0.head(1),Fill(3,(~widen_Eb_fix_f32_0.head(1)).asUInt),widen_Eb_fix_f32_0(6,0)), Eb_fix_f64)
val Ea_fix_f32_widening_0 = Mux(io.res_widening & is_fp32, Cat(widen_Ea_fix_f16_0.head(1),Fill(3,(~widen_Ea_fix_f16_0.head(1)).asUInt),widen_Ea_fix_f16_0(3,0)), Ea_fix_f32_0)
val Eb_fix_f32_widening_0 = Mux(io.res_widening & is_fp32, Cat(widen_Eb_fix_f16_0.head(1),Fill(3,(~widen_Eb_fix_f16_0.head(1)).asUInt),widen_Eb_fix_f16_0(3,0)), Eb_fix_f32_0)
val Ea_fix_f32_widening_1 = Mux(io.res_widening & is_fp32, Cat(widen_Ea_fix_f16_1.head(1),Fill(3,(~widen_Ea_fix_f16_1.head(1)).asUInt),widen_Ea_fix_f16_1(3,0)), Ea_fix_f32_1)
val Eb_fix_f32_widening_1 = Mux(io.res_widening & is_fp32, Cat(widen_Eb_fix_f16_1.head(1),Fill(3,(~widen_Eb_fix_f16_1.head(1)).asUInt),widen_Eb_fix_f16_1(3,0)), Eb_fix_f32_1)
val Eab_f64 = Cat(0.U,Ea_fix_f64_widening +& Eb_fix_f64_widening).asSInt - biasF64.S + rshiftBasicF64.S
val Eab_f32_0 = Cat(0.U,Ea_fix_f32_widening_0 +& Eb_fix_f32_widening_0).asSInt - biasF32.S + rshiftBasicF32.S
val Eab_f32_1 = Cat(0.U,Ea_fix_f32_widening_1 +& Eb_fix_f32_widening_1).asSInt - biasF32.S + rshiftBasicF32.S
val Eab_f16_0 = Cat(0.U,Ea_fix_f16_0 +& Eb_fix_f16_0).asSInt - biasF16.S + rshiftBasicF16.S
val Eab_f16_1 = Cat(0.U,Ea_fix_f16_1 +& Eb_fix_f16_1).asSInt - biasF16.S + rshiftBasicF16.S
val Eab_f16_2 = Cat(0.U,Ea_fix_f16_2 +& Eb_fix_f16_2).asSInt - biasF16.S + rshiftBasicF16.S
val Eab_f16_3 = Cat(0.U,Ea_fix_f16_3 +& Eb_fix_f16_3).asSInt - biasF16.S + rshiftBasicF16.S
val rshift_value_f64 = Eab_f64 - Cat(0.U,Ec_fix_f64).asSInt
val rshift_value_f32_0 = Eab_f32_0 - Cat(0.U,Ec_fix_f32_0).asSInt
val rshift_value_f32_1 = Eab_f32_1 - Cat(0.U,Ec_fix_f32_1).asSInt
val rshift_value_f16_0 = Eab_f16_0 - Cat(0.U,Ec_fix_f16_0).asSInt
val rshift_value_f16_1 = Eab_f16_1 - Cat(0.U,Ec_fix_f16_1).asSInt
val rshift_value_f16_2 = Eab_f16_2 - Cat(0.U,Ec_fix_f16_2).asSInt
val rshift_value_f16_3 = Eab_f16_3 - Cat(0.U,Ec_fix_f16_3).asSInt
val rshift_value_cut_f64 = rshift_value_f64(rshiftMaxF64.U.getWidth-1,0)
val rshift_value_cut_f32_0 = rshift_value_f32_0(rshiftMaxF32.U.getWidth-1,0)
val rshift_value_cut_f32_1 = rshift_value_f32_1(rshiftMaxF32.U.getWidth-1,0)
val rshift_value_cut_f16_0 = rshift_value_f16_0(rshiftMaxF16.U.getWidth-1,0)
val rshift_value_cut_f16_1 = rshift_value_f16_1(rshiftMaxF16.U.getWidth-1,0)
val rshift_value_cut_f16_2 = rshift_value_f16_2(rshiftMaxF16.U.getWidth-1,0)
val rshift_value_cut_f16_3 = rshift_value_f16_3(rshiftMaxF16.U.getWidth-1,0)
val fp_c_significand_cat0_f64 = Cat(fp_c_significand_f64,0.U((rshiftMaxF64-significandWidth).W))
val fp_c_significand_cat0_f32_0 = Cat(fp_c_significand_f32_0,0.U((rshiftMaxF32-24).W))
val fp_c_significand_cat0_f32_1 = Cat(fp_c_significand_f32_1,0.U((rshiftMaxF32-24).W))
val fp_c_significand_cat0_f16_0 = Cat(fp_c_significand_f16_0,0.U((rshiftMaxF16-11).W))
val fp_c_significand_cat0_f16_1 = Cat(fp_c_significand_f16_1,0.U((rshiftMaxF16-11).W))
val fp_c_significand_cat0_f16_2 = Cat(fp_c_significand_f16_2,0.U((rshiftMaxF16-11).W))
val fp_c_significand_cat0_f16_3 = Cat(fp_c_significand_f16_3,0.U((rshiftMaxF16-11).W))
val rshift_result_with_grs_f64 = shiftRightWithMuxSticky(fp_c_significand_cat0_f64,rshift_value_cut_f64)
val rshift_result_with_grs_f64_f32_1 = shiftRightWithMuxSticky(
Mux(is_fp64,fp_c_significand_cat0_f64,fp_c_significand_cat0_f32_1.asTypeOf(fp_c_significand_cat0_f64)),
Mux(is_fp64,rshift_value_cut_f64,rshift_value_cut_f32_1.asTypeOf(rshift_value_cut_f64))
)
val rshift_result_with_grs_f32_0 = shiftRightWithMuxSticky(fp_c_significand_cat0_f32_0,rshift_value_cut_f32_0)
val rshift_result_with_grs_f32_1 = rshift_result_with_grs_f64_f32_1.asTypeOf(rshift_result_with_grs_f32_0)
val rshift_result_with_grs_f16_0 = shiftRightWithMuxSticky(fp_c_significand_cat0_f16_0,rshift_value_cut_f16_0)
val rshift_result_with_grs_f16_1 = shiftRightWithMuxSticky(fp_c_significand_cat0_f16_1,rshift_value_cut_f16_1)
val rshift_result_with_grs_f16_2 = shiftRightWithMuxSticky(fp_c_significand_cat0_f16_2,rshift_value_cut_f16_2)
val rshift_result_with_grs_f16_3 = shiftRightWithMuxSticky(fp_c_significand_cat0_f16_3,rshift_value_cut_f16_3)
val Ec_is_too_big_f64 = rshift_value_f64 <= 0.S
val Ec_is_too_big_f32_0 = rshift_value_f32_0 <= 0.S
val Ec_is_too_big_f32_1 = rshift_value_f32_1 <= 0.S
val Ec_is_too_big_f16_0 = rshift_value_f16_0 <= 0.S
val Ec_is_too_big_f16_1 = rshift_value_f16_1 <= 0.S
val Ec_is_too_big_f16_2 = rshift_value_f16_2 <= 0.S
val Ec_is_too_big_f16_3 = rshift_value_f16_3 <= 0.S
val Ec_is_too_small_f64 = rshift_value_f64.asSInt > rshiftMaxF64.S
val Ec_is_too_small_f32_0 = rshift_value_f32_0.asSInt > rshiftMaxF32.S
val Ec_is_too_small_f32_1 = rshift_value_f32_1.asSInt > rshiftMaxF32.S
val Ec_is_too_small_f16_0 = rshift_value_f16_0.asSInt > rshiftMaxF16.S
val Ec_is_too_small_f16_1 = rshift_value_f16_1.asSInt > rshiftMaxF16.S
val Ec_is_too_small_f16_2 = rshift_value_f16_2.asSInt > rshiftMaxF16.S
val Ec_is_too_small_f16_3 = rshift_value_f16_3.asSInt > rshiftMaxF16.S
val Ec_is_medium_f64 = !Ec_is_too_big_f64 & !Ec_is_too_small_f64
val Ec_is_medium_f32_0 = !Ec_is_too_big_f32_0 & !Ec_is_too_small_f32_0
val Ec_is_medium_f32_1 = !Ec_is_too_big_f32_1 & !Ec_is_too_small_f32_1
val Ec_is_medium_f16_0 = !Ec_is_too_big_f16_0 & !Ec_is_too_small_f16_0
val Ec_is_medium_f16_1 = !Ec_is_too_big_f16_1 & !Ec_is_too_small_f16_1
val Ec_is_medium_f16_2 = !Ec_is_too_big_f16_2 & !Ec_is_too_small_f16_2
val Ec_is_medium_f16_3 = !Ec_is_too_big_f16_3 & !Ec_is_too_small_f16_3
// save 9bit reg
val rshift_guard_reg_d = Cat(
Mux(Ec_is_medium_f16_3, rshift_result_with_grs_f16_3(2), 0.U),
Mux(Ec_is_medium_f16_2, rshift_result_with_grs_f16_2(2), 0.U),
Mux(is_fp32, Mux(Ec_is_medium_f32_1, rshift_result_with_grs_f32_1(2), 0.U),
Mux(Ec_is_medium_f16_1, rshift_result_with_grs_f16_1(2), 0.U)),
Mux(is_fp64, Mux(Ec_is_medium_f64, rshift_result_with_grs_f64(2), 0.U),
Mux(is_fp32, Mux(Ec_is_medium_f32_0, rshift_result_with_grs_f32_0(2), 0.U),
Mux(Ec_is_medium_f16_0, rshift_result_with_grs_f16_0(2), 0.U)))
)
val rshift_round_reg_d = Cat(
Mux(Ec_is_medium_f16_3, rshift_result_with_grs_f16_3(1), 0.U),
Mux(Ec_is_medium_f16_2, rshift_result_with_grs_f16_2(1), 0.U),
Mux(is_fp32, Mux(Ec_is_medium_f32_1, rshift_result_with_grs_f32_1(1), 0.U),
Mux(Ec_is_medium_f16_1, rshift_result_with_grs_f16_1(1), 0.U)),
Mux(is_fp64, Mux(Ec_is_medium_f64, rshift_result_with_grs_f64(1), 0.U),
Mux(is_fp32, Mux(Ec_is_medium_f32_0, rshift_result_with_grs_f32_0(1), 0.U),
Mux(Ec_is_medium_f16_0, rshift_result_with_grs_f16_0(1), 0.U)))
)
val rshift_sticky_reg_d = Cat(
Mux(Ec_is_medium_f16_3, rshift_result_with_grs_f16_3(0), Mux(Ec_is_too_big_f16_3, 0.U, fp_c_significand_f16_3.orR)),
Mux(Ec_is_medium_f16_2, rshift_result_with_grs_f16_2(0), Mux(Ec_is_too_big_f16_2, 0.U, fp_c_significand_f16_2.orR)),
Mux(is_fp32, Mux(Ec_is_medium_f32_1, rshift_result_with_grs_f32_1(0), Mux(Ec_is_too_big_f32_1, 0.U, fp_c_significand_f32_1.orR)),
Mux(Ec_is_medium_f16_1, rshift_result_with_grs_f16_1(0), Mux(Ec_is_too_big_f16_1, 0.U, fp_c_significand_f16_1.orR))),
Mux(is_fp64, Mux(Ec_is_medium_f64, rshift_result_with_grs_f64(0), Mux(Ec_is_too_big_f64, 0.U, fp_c_significand_f64.orR)),
Mux(is_fp32, Mux(Ec_is_medium_f32_0, rshift_result_with_grs_f32_0(0), Mux(Ec_is_too_big_f32_0, 0.U, fp_c_significand_f32_0.orR)),
Mux(Ec_is_medium_f16_0, rshift_result_with_grs_f16_0(0), Mux(Ec_is_too_big_f16_0, 0.U, fp_c_significand_f16_0.orR))))
)
val rshift_guard_reg = RegEnable(rshift_guard_reg_d, fire)
val rshift_round_reg = RegEnable(rshift_round_reg_d, fire)
val rshift_sticky_reg = RegEnable(rshift_sticky_reg_d, fire)
val rshift_guard_f64 = rshift_guard_reg(0)
val rshift_guard_f32_0 = rshift_guard_reg(0)
val rshift_guard_f32_1 = rshift_guard_reg(1)
val rshift_guard_f16_0 = rshift_guard_reg(0)
val rshift_guard_f16_1 = rshift_guard_reg(1)
val rshift_guard_f16_2 = rshift_guard_reg(2)
val rshift_guard_f16_3 = rshift_guard_reg(3)
val rshift_round_f64 = rshift_round_reg(0)
val rshift_round_f32_0 = rshift_round_reg(0)
val rshift_round_f32_1 = rshift_round_reg(1)
val rshift_round_f16_0 = rshift_round_reg(0)
val rshift_round_f16_1 = rshift_round_reg(1)
val rshift_round_f16_2 = rshift_round_reg(2)
val rshift_round_f16_3 = rshift_round_reg(3)
val rshift_sticky_f64 = rshift_sticky_reg(0)
val rshift_sticky_f32_0 = rshift_sticky_reg(0)
val rshift_sticky_f32_1 = rshift_sticky_reg(1)
val rshift_sticky_f16_0 = rshift_sticky_reg(0)
val rshift_sticky_f16_1 = rshift_sticky_reg(1)
val rshift_sticky_f16_2 = rshift_sticky_reg(2)
val rshift_sticky_f16_3 = rshift_sticky_reg(3)
val rshift_result_temp_f64 = rshift_result_with_grs_f64.head(rshiftMaxF64-2)
val rshift_result_temp_f32_0 = rshift_result_with_grs_f32_0.head(rshiftMaxF32-2)
val rshift_result_temp_f32_1 = rshift_result_with_grs_f32_1.head(rshiftMaxF32-2)
val rshift_result_temp_f16_0 = rshift_result_with_grs_f16_0.head(rshiftMaxF16-2)
val rshift_result_temp_f16_1 = rshift_result_with_grs_f16_1.head(rshiftMaxF16-2)
val rshift_result_temp_f16_2 = rshift_result_with_grs_f16_2.head(rshiftMaxF16-2)
val rshift_result_temp_f16_3 = rshift_result_with_grs_f16_3.head(rshiftMaxF16-2)
val rshift_result_f64 = Mux(Ec_is_medium_f64,
rshift_result_temp_f64,
Mux(Ec_is_too_big_f64, fp_c_significand_cat0_f64.head(rshiftMaxF64-2), 0.U((rshiftMaxF64-2).W))
)
val rshift_result_f32_0 = Mux(Ec_is_medium_f32_0,
rshift_result_temp_f32_0,
Mux(Ec_is_too_big_f32_0, fp_c_significand_cat0_f32_0.head(rshiftMaxF32-2), 0.U((rshiftMaxF32-2).W))
)
val rshift_result_f32_1 = Mux(Ec_is_medium_f32_1,
rshift_result_temp_f32_1,
Mux(Ec_is_too_big_f32_1, fp_c_significand_cat0_f32_1.head(rshiftMaxF32-2), 0.U((rshiftMaxF32-2).W))
)
val rshift_result_f16_0 = Mux(Ec_is_medium_f16_0,
rshift_result_temp_f16_0,
Mux(Ec_is_too_big_f16_0, fp_c_significand_cat0_f16_0.head(rshiftMaxF16-2), 0.U((rshiftMaxF16-2).W))
)
val rshift_result_f16_1 = Mux(Ec_is_medium_f16_1,
rshift_result_temp_f16_1,
Mux(Ec_is_too_big_f16_1, fp_c_significand_cat0_f16_1.head(rshiftMaxF16-2), 0.U((rshiftMaxF16-2).W))
)
val rshift_result_f16_2 = Mux(Ec_is_medium_f16_2,
rshift_result_temp_f16_2,
Mux(Ec_is_too_big_f16_2, fp_c_significand_cat0_f16_2.head(rshiftMaxF16-2), 0.U((rshiftMaxF16-2).W))
)
val rshift_result_f16_3 = Mux(Ec_is_medium_f16_3,
rshift_result_temp_f16_3,
Mux(Ec_is_too_big_f16_3, fp_c_significand_cat0_f16_3.head(rshiftMaxF16-2), 0.U((rshiftMaxF16-2).W))
)
// save 294 bit reg
val fp_c_rshiftValue_inv_reg_d = Mux(is_fp64,
Mux(is_sub_f64.asBool, Cat(1.U,~rshift_result_f64), Cat(0.U,rshift_result_f64)),
Mux(is_fp32,
Cat(Mux(is_sub_f32_1.asBool ,Cat(1.U,~rshift_result_f32_1),Cat(0.U,rshift_result_f32_1)),
Mux(is_sub_f32_0.asBool ,Cat(1.U,~rshift_result_f32_0),Cat(0.U,rshift_result_f32_0))),
Cat(Mux(is_sub_f16_3.asBool ,Cat(1.U,~rshift_result_f16_3),Cat(0.U,rshift_result_f16_3)),
Mux(is_sub_f16_2.asBool ,Cat(1.U,~rshift_result_f16_2),Cat(0.U,rshift_result_f16_2)),
Mux(is_sub_f16_1.asBool ,Cat(1.U,~rshift_result_f16_1),Cat(0.U,rshift_result_f16_1)),
Mux(is_sub_f16_0.asBool ,Cat(1.U,~rshift_result_f16_0),Cat(0.U,rshift_result_f16_0)))
)
)
val fp_c_rshiftValue_inv_reg = RegEnable(fp_c_rshiftValue_inv_reg_d, fire)
val fp_c_rshiftValue_inv_f64_reg0 = fp_c_rshiftValue_inv_reg
val fp_c_rshiftValue_inv_f32_0_reg0 = fp_c_rshiftValue_inv_reg(74, 0)
val fp_c_rshiftValue_inv_f32_1_reg0 = fp_c_rshiftValue_inv_reg(149, 75)
val fp_c_rshiftValue_inv_f16_0_reg0 = fp_c_rshiftValue_inv_reg(35, 0)
val fp_c_rshiftValue_inv_f16_1_reg0 = fp_c_rshiftValue_inv_reg(71, 36)
val fp_c_rshiftValue_inv_f16_2_reg0 = fp_c_rshiftValue_inv_reg(107, 72)
val fp_c_rshiftValue_inv_f16_3_reg0 = fp_c_rshiftValue_inv_reg(143, 108)
val booth_in_a = Mux(
is_fp64,
fp_a_significand_f64,
Mux(
is_fp32,
Cat(fp_a_significand_f32_1,0.U(5.W),fp_a_significand_f32_0),
Cat(Cat(fp_a_significand_f16_3,0.U(2.W),fp_a_significand_f16_2),0.U(5.W),Cat(fp_a_significand_f16_1,0.U(2.W),fp_a_significand_f16_0))
)
)
val booth_in_b = Mux(
is_fp64,
fp_b_significand_f64,
Mux(
is_fp32,
Cat(fp_b_significand_f32_1,0.U(5.W),fp_b_significand_f32_0),
Cat(Cat(fp_b_significand_f16_3,0.U(2.W),fp_b_significand_f16_2),0.U(5.W),Cat(fp_b_significand_f16_1,0.U(2.W),fp_b_significand_f16_0))
)
)
val U_BoothEncoder = Module(new BoothEncoderF64F32F16(width = significandWidth, is_addend_expand_1bit = true))
U_BoothEncoder.io.in_a := booth_in_a
U_BoothEncoder.io.in_b := booth_in_b
U_BoothEncoder.io.is_fp64 := is_fp64
U_BoothEncoder.io.is_fp32 := is_fp32
val U_CSAnto2 = Module(new CSA_Nto2With3to2MainPipeline(U_BoothEncoder.io.out_pp.length,U_BoothEncoder.io.out_pp.head.getWidth,pipeLevel = 5))
U_CSAnto2.io.fire := fire
U_CSAnto2.io.in := U_BoothEncoder.io.out_pp
val CSA3to2_in_a = U_CSAnto2.io.out_sum
val CSA3to2_in_b = Mux(
is_fp64_reg0,
Cat(U_CSAnto2.io.out_car.head(106), is_sub_f64_reg0 & !rshift_guard_f64 & !rshift_round_f64 & !rshift_sticky_f64),
Mux(
is_fp32_reg0,
Cat(U_CSAnto2.io.out_car.head(48), RegEnable(is_sub_f32_1, fire) & !rshift_guard_f32_1 & !rshift_round_f32_1 & !rshift_sticky_f32_1,
U_CSAnto2.io.out_car(57,49),
U_CSAnto2.io.out_car(48,1), RegEnable(is_sub_f32_0, fire) & !rshift_guard_f32_0 & !rshift_round_f32_0 & !rshift_sticky_f32_0
),
Cat(Cat(U_CSAnto2.io.out_car.head(22), RegEnable(is_sub_f16_3, fire) & !rshift_guard_f16_3 & !rshift_round_f16_3 & !rshift_sticky_f16_3,
U_CSAnto2.io.out_car(25+58,23+58),
U_CSAnto2.io.out_car(22+58,1+58), RegEnable(is_sub_f16_2, fire) & !rshift_guard_f16_2 & !rshift_round_f16_2 & !rshift_sticky_f16_2),
U_CSAnto2.io.out_car(57,49),
Cat(U_CSAnto2.io.out_car(48,27), RegEnable(is_sub_f16_1, fire) & !rshift_guard_f16_1 & !rshift_round_f16_1 & !rshift_sticky_f16_1,
U_CSAnto2.io.out_car(25,23),
U_CSAnto2.io.out_car(22,1), RegEnable(is_sub_f16_0, fire) & !rshift_guard_f16_0 & !rshift_round_f16_0 & !rshift_sticky_f16_0)
)
)
)
val CSA3to2_in_c = Mux(
is_fp64_reg0,
Cat(0.U,fp_c_rshiftValue_inv_f64_reg0(2*significandWidth-1,0)),
Mux(
is_fp32_reg0,
Cat(0.U,fp_c_rshiftValue_inv_f32_1_reg0(2*24-1,0),0.U(10.W),fp_c_rshiftValue_inv_f32_0_reg0(2*24-1,0)),
Cat(
Cat(0.U,fp_c_rshiftValue_inv_f16_3_reg0(2*11-1,0),0.U(4.W),fp_c_rshiftValue_inv_f16_2_reg0(2*11-1,0)),
0.U(10.W),
Cat(fp_c_rshiftValue_inv_f16_1_reg0(2*11-1,0),0.U(4.W),fp_c_rshiftValue_inv_f16_0_reg0(2*11-1,0))
)
)
)
val U_CSA3to2 = Module(new CSA3to2(width = U_CSAnto2.io.out_sum.getWidth))
U_CSA3to2.io.in_a := CSA3to2_in_a
U_CSA3to2.io.in_b := CSA3to2_in_b
U_CSA3to2.io.in_c := CSA3to2_in_c
val adder_lowbit_f64 = U_CSA3to2.io.out_sum + U_CSA3to2.io.out_car
val adder_lowbit_f32_0 = adder_lowbit_f64(48,0)
val adder_lowbit_f32_1 = adder_lowbit_f64(106,58)
val adder_lowbit_f16_0 = adder_lowbit_f32_0(22,0)
val adder_lowbit_f16_1 = adder_lowbit_f32_0(48,26)
val adder_lowbit_f16_2 = adder_lowbit_f32_1(22,0)
val adder_lowbit_f16_3 = adder_lowbit_f32_1(48,26)
val fp_c_rshift_result_high_inv_add0_f64 = fp_c_rshiftValue_inv_f64_reg0.head(significandWidth+3)
val fp_c_rshift_result_high_inv_add0_f32_0 = fp_c_rshiftValue_inv_f32_0_reg0.head(24+3)
val fp_c_rshift_result_high_inv_add0_f32_1 = fp_c_rshiftValue_inv_f32_1_reg0.head(24+3)
val fp_c_rshift_result_high_inv_add0_f16_0 = fp_c_rshiftValue_inv_f16_0_reg0.head(11+3)
val fp_c_rshift_result_high_inv_add0_f16_1 = fp_c_rshiftValue_inv_f16_1_reg0.head(11+3)
val fp_c_rshift_result_high_inv_add0_f16_2 = fp_c_rshiftValue_inv_f16_2_reg0.head(11+3)
val fp_c_rshift_result_high_inv_add0_f16_3 = fp_c_rshiftValue_inv_f16_3_reg0.head(11+3)
val fp_c_rshift_result_high_inv_add1 = Mux(is_fp64_reg0,
Cat(0.U(3.W),fp_c_rshiftValue_inv_f64_reg0.head(significandWidth+3)),
Mux(
is_fp32_reg0,
Cat(fp_c_rshiftValue_inv_f32_1_reg0.head(24+3),0.U(5.W),fp_c_rshiftValue_inv_f32_0_reg0.head(24+3)),
Cat(
fp_c_rshiftValue_inv_f16_3_reg0.head(11+3),0.U,fp_c_rshiftValue_inv_f16_2_reg0.head(11+3),
0.U,fp_c_rshiftValue_inv_f16_1_reg0.head(11+3),0.U,fp_c_rshiftValue_inv_f16_0_reg0.head(11+3)
)
)
) + Cat(!is_fp32_reg0 & !is_fp64_reg0, 0.U(12.W), is_fp32_reg0, 0.U, !is_fp32_reg0 & !is_fp64_reg0, 0.U(14.W), !is_fp32_reg0 & !is_fp64_reg0, 0.U(14.W), 1.U)
val fp_c_rshift_result_high_inv_add1_f64 = fp_c_rshift_result_high_inv_add1(55,0)
val fp_c_rshift_result_high_inv_add1_f32_0 = fp_c_rshift_result_high_inv_add1(26,0)
val fp_c_rshift_result_high_inv_add1_f32_1 = fp_c_rshift_result_high_inv_add1(58,32)
val fp_c_rshift_result_high_inv_add1_f16_0 = fp_c_rshift_result_high_inv_add1(13,0)
val fp_c_rshift_result_high_inv_add1_f16_1 = fp_c_rshift_result_high_inv_add1(28,15)
val fp_c_rshift_result_high_inv_add1_f16_2 = fp_c_rshift_result_high_inv_add1(43,30)
val fp_c_rshift_result_high_inv_add1_f16_3 = fp_c_rshift_result_high_inv_add1(58,45)
val fra_mul = booth_in_a * booth_in_b
val adder_f64 = Cat(Mux(adder_lowbit_f64.head(1).asBool, fp_c_rshift_result_high_inv_add1_f64, fp_c_rshift_result_high_inv_add0_f64),adder_lowbit_f64.tail(1),
Mux(is_sub_f64_reg0, ((~Cat(rshift_guard_f64,rshift_round_f64,rshift_sticky_f64)).asUInt+1.U).head(2), Cat(rshift_guard_f64,rshift_round_f64))
)
val adder_f32_0 = Cat(Mux(adder_lowbit_f32_0.head(1).asBool, fp_c_rshift_result_high_inv_add1_f32_0, fp_c_rshift_result_high_inv_add0_f32_0),adder_lowbit_f32_0.tail(1),
Mux(RegEnable(is_sub_f32_0, fire), ((~Cat(rshift_guard_f32_0,rshift_round_f32_0,rshift_sticky_f32_0)).asUInt+1.U).head(2), Cat(rshift_guard_f32_0,rshift_round_f32_0))
)
val adder_f32_1 = Cat(Mux(adder_lowbit_f32_1.head(1).asBool, fp_c_rshift_result_high_inv_add1_f32_1, fp_c_rshift_result_high_inv_add0_f32_1),adder_lowbit_f32_1.tail(1),
Mux(RegEnable(is_sub_f32_1, fire), ((~Cat(rshift_guard_f32_1,rshift_round_f32_1,rshift_sticky_f32_1)).asUInt+1.U).head(2), Cat(rshift_guard_f32_1,rshift_round_f32_1))
)
val adder_f16_0 = Cat(Mux(adder_lowbit_f16_0.head(1).asBool, fp_c_rshift_result_high_inv_add1_f16_0, fp_c_rshift_result_high_inv_add0_f16_0),adder_lowbit_f16_0.tail(1),
Mux(RegEnable(is_sub_f16_0, fire), ((~Cat(rshift_guard_f16_0,rshift_round_f16_0,rshift_sticky_f16_0)).asUInt+1.U).head(2), Cat(rshift_guard_f16_0,rshift_round_f16_0))
)
val adder_f16_1 = Cat(Mux(adder_lowbit_f16_1.head(1).asBool, fp_c_rshift_result_high_inv_add1_f16_1, fp_c_rshift_result_high_inv_add0_f16_1),adder_lowbit_f16_1.tail(1),
Mux(RegEnable(is_sub_f16_1, fire), ((~Cat(rshift_guard_f16_1,rshift_round_f16_1,rshift_sticky_f16_1)).asUInt+1.U).head(2), Cat(rshift_guard_f16_1,rshift_round_f16_1))
)
val adder_f16_2 = Cat(Mux(adder_lowbit_f16_2.head(1).asBool, fp_c_rshift_result_high_inv_add1_f16_2, fp_c_rshift_result_high_inv_add0_f16_2),adder_lowbit_f16_2.tail(1),
Mux(RegEnable(is_sub_f16_2, fire), ((~Cat(rshift_guard_f16_2,rshift_round_f16_2,rshift_sticky_f16_2)).asUInt+1.U).head(2), Cat(rshift_guard_f16_2,rshift_round_f16_2))
)
val adder_f16_3 = Cat(Mux(adder_lowbit_f16_3.head(1).asBool, fp_c_rshift_result_high_inv_add1_f16_3, fp_c_rshift_result_high_inv_add0_f16_3),adder_lowbit_f16_3.tail(1),
Mux(RegEnable(is_sub_f16_3, fire), ((~Cat(rshift_guard_f16_3,rshift_round_f16_3,rshift_sticky_f16_3)).asUInt+1.U).head(2), Cat(rshift_guard_f16_3,rshift_round_f16_3))
)
val adder_is_negative_f64 = adder_f64.head(1).asBool
val adder_is_negative_f32_0 = adder_f32_0.head(1).asBool
val adder_is_negative_f32_1 = adder_f32_1.head(1).asBool
val adder_is_negative_f16_0 = adder_f16_0.head(1).asBool
val adder_is_negative_f16_1 = adder_f16_1.head(1).asBool
val adder_is_negative_f16_2 = adder_f16_2.head(1).asBool
val adder_is_negative_f16_3 = adder_f16_3.head(1).asBool
// save 3*2 = 6 bit reg
val adder_is_negative_reg_d = Cat(
adder_is_negative_f16_3,
adder_is_negative_f16_2,
Mux(is_fp32_reg0, adder_is_negative_f32_1, adder_is_negative_f16_1),
Mux(is_fp64_reg0, adder_is_negative_f64, Mux(is_fp32_reg0, adder_is_negative_f32_0, adder_is_negative_f16_0))
)
val adder_is_negative_reg1 = RegEnable(adder_is_negative_reg_d, fire_reg0)
val adder_is_negative_reg2 = RegEnable(adder_is_negative_reg1, fire_reg1)
val adder_is_negative_f64_reg2 = adder_is_negative_reg2(0)
val adder_is_negative_f32_0_reg2 = adder_is_negative_reg2(0)
val adder_is_negative_f32_1_reg2 = adder_is_negative_reg2(1)
val adder_is_negative_f16_0_reg2 = adder_is_negative_reg2(0)
val adder_is_negative_f16_1_reg2 = adder_is_negative_reg2(1)
val adder_is_negative_f16_2_reg2 = adder_is_negative_reg2(2)
val adder_is_negative_f16_3_reg2 = adder_is_negative_reg2(3)
val adder_inv_f64 = Mux(adder_is_negative_f64 , (~adder_f64.tail(1)).asUInt, adder_f64.tail(1))
val adder_inv_f32_0 = Mux(adder_is_negative_f32_0, (~adder_f32_0.tail(1)).asUInt, adder_f32_0.tail(1))
val adder_inv_f32_1 = Mux(adder_is_negative_f32_1, (~adder_f32_1.tail(1)).asUInt, adder_f32_1.tail(1))
val adder_inv_f16_0 = Mux(adder_is_negative_f16_0, (~adder_f16_0.tail(1)).asUInt, adder_f16_0.tail(1))
val adder_inv_f16_1 = Mux(adder_is_negative_f16_1, (~adder_f16_1.tail(1)).asUInt, adder_f16_1.tail(1))
val adder_inv_f16_2 = Mux(adder_is_negative_f16_2, (~adder_f16_2.tail(1)).asUInt, adder_f16_2.tail(1))
val adder_inv_f16_3 = Mux(adder_is_negative_f16_3, (~adder_f16_3.tail(1)).asUInt, adder_f16_3.tail(1))
val Eab_is_greater_f64 = rshift_value_f64 > 0.S
val Eab_is_greater_f32_0 = rshift_value_f32_0 > 0.S
val Eab_is_greater_f32_1 = rshift_value_f32_1 > 0.S
val Eab_is_greater_f16_0 = rshift_value_f16_0 > 0.S
val Eab_is_greater_f16_1 = rshift_value_f16_1 > 0.S
val Eab_is_greater_f16_2 = rshift_value_f16_2 > 0.S
val Eab_is_greater_f16_3 = rshift_value_f16_3 > 0.S
val Ec_is_greater_f64 = !Eab_is_greater_f64
val Ec_is_greater_f32_0 = !Eab_is_greater_f32_0
val Ec_is_greater_f32_1 = !Eab_is_greater_f32_1
val Ec_is_greater_f16_0 = !Eab_is_greater_f16_0
val Ec_is_greater_f16_1 = !Eab_is_greater_f16_1
val Ec_is_greater_f16_2 = !Eab_is_greater_f16_2
val Ec_is_greater_f16_3 = !Eab_is_greater_f16_3
// save 30*3 = 90 bit reg
val E_greater_f64_reg_d = Mux(Eab_is_greater_f64, Eab_f64(exponentWidth,0).asUInt, Cat(0.U(1.W),Ec_fix_f64))
val E_greater_f32_reg_d = Cat(Mux(Eab_is_greater_f32_1, Eab_f32_1(8,0).asUInt, Cat(0.U(1.W),Ec_fix_f32_1)),
Mux(Eab_is_greater_f32_0, Eab_f32_0(8,0).asUInt, Cat(0.U(1.W),Ec_fix_f32_0)))
val E_greater_f16_reg_d = Cat(Mux(Eab_is_greater_f16_3, Eab_f16_3(5,0).asUInt, Cat(0.U(1.W),Ec_fix_f16_3)),
Mux(Eab_is_greater_f16_2, Eab_f16_2(5,0).asUInt, Cat(0.U(1.W),Ec_fix_f16_2)),
Mux(Eab_is_greater_f16_1, Eab_f16_1(5,0).asUInt, Cat(0.U(1.W),Ec_fix_f16_1)),
Mux(Eab_is_greater_f16_0, Eab_f16_0(5,0).asUInt, Cat(0.U(1.W),Ec_fix_f16_0)))
val E_greater_reg_d = Mux(is_fp64, E_greater_f64_reg_d, Mux(is_fp32, E_greater_f32_reg_d, E_greater_f16_reg_d))
val E_greater_reg2 = RegEnable(RegEnable(RegEnable(E_greater_reg_d, fire), fire_reg0), fire_reg1)
val E_greater_f64_reg2 = E_greater_reg2(11,0)
val E_greater_f32_0_reg2 = E_greater_reg2(8,0)
val E_greater_f32_1_reg2 = E_greater_reg2(17,9)
val E_greater_f16_0_reg2 = E_greater_reg2(5,0)
val E_greater_f16_1_reg2 = E_greater_reg2(11,6)
val E_greater_f16_2_reg2 = E_greater_reg2(17,12)
val E_greater_f16_3_reg2 = E_greater_reg2(23,18)
// save 30 bit reg
val lshift_value_max_f64_reg_d = Mux(Eab_is_greater_f64, Eab_f64(exponentWidth,0).asUInt - 1.U, Cat(0.U,Ec_fix_f64 - 1.U))
val lshift_value_max_f32_reg_d = Cat(Mux(Eab_is_greater_f32_1, Eab_f32_1(8,0).asUInt - 1.U, Cat(0.U,Ec_fix_f32_1 - 1.U)),
Mux(Eab_is_greater_f32_0, Eab_f32_0(8,0).asUInt - 1.U, Cat(0.U,Ec_fix_f32_0 - 1.U)))
val lshift_value_max_f16_reg_d = Cat(Mux(Eab_is_greater_f16_3, Eab_f16_3(5,0).asUInt - 1.U, Cat(0.U,Ec_fix_f16_3 - 1.U)),
Mux(Eab_is_greater_f16_2, Eab_f16_2(5,0).asUInt - 1.U, Cat(0.U,Ec_fix_f16_2 - 1.U)),
Mux(Eab_is_greater_f16_1, Eab_f16_1(5,0).asUInt - 1.U, Cat(0.U,Ec_fix_f16_1 - 1.U)),
Mux(Eab_is_greater_f16_0, Eab_f16_0(5,0).asUInt - 1.U, Cat(0.U,Ec_fix_f16_0 - 1.U)))
val lshift_value_max_reg_d = Mux(is_fp64, lshift_value_max_f64_reg_d, Mux(is_fp32, lshift_value_max_f32_reg_d, lshift_value_max_f16_reg_d))
val lshift_value_max_reg0 = RegEnable(lshift_value_max_reg_d, fire)
val lshift_value_max_f64_reg0 = lshift_value_max_reg0(11,0)
val lshift_value_max_f32_0_reg0 = lshift_value_max_reg0(8,0)
val lshift_value_max_f32_1_reg0 = lshift_value_max_reg0(17,9)
val lshift_value_max_f16_0_reg0 = lshift_value_max_reg0(5,0)
val lshift_value_max_f16_1_reg0 = lshift_value_max_reg0(11,6)
val lshift_value_max_f16_2_reg0 = lshift_value_max_reg0(17,12)
val lshift_value_max_f16_3_reg0 = lshift_value_max_reg0(23,18)
val LZDWidth_f64 = adder_inv_f64.getWidth.U.getWidth
val LZDWidth_f32_0 = adder_inv_f32_0.getWidth.U.getWidth
val LZDWidth_f32_1 = adder_inv_f32_1.getWidth.U.getWidth
val LZDWidth_f16_0 = adder_inv_f16_0.getWidth.U.getWidth
val LZDWidth_f16_1 = adder_inv_f16_1.getWidth.U.getWidth
val LZDWidth_f16_2 = adder_inv_f16_2.getWidth.U.getWidth
val LZDWidth_f16_3 = adder_inv_f16_3.getWidth.U.getWidth
val lshift_value_mask_f64 = Mux(lshift_value_max_f64_reg0.head(lshift_value_max_f64_reg0.getWidth-LZDWidth_f64).orR,
0.U(adder_inv_f64.getWidth.W),
Fill(adder_inv_f64.getWidth, 1.U) >> lshift_value_max_f64_reg0.tail(lshift_value_max_f64_reg0.getWidth-LZDWidth_f64)
).asUInt
val lshift_value_mask_f32_0 = Mux(lshift_value_max_f32_0_reg0.head(lshift_value_max_f32_0_reg0.getWidth-LZDWidth_f32_0).orR,
0.U(adder_inv_f32_0.getWidth.W),
Fill(adder_inv_f32_0.getWidth, 1.U) >> lshift_value_max_f32_0_reg0.tail(lshift_value_max_f32_0_reg0.getWidth-LZDWidth_f32_0)
).asUInt
val lshift_value_mask_f32_1 = Mux(lshift_value_max_f32_1_reg0.head(lshift_value_max_f32_1_reg0.getWidth-LZDWidth_f32_1).orR,
0.U(adder_inv_f32_1.getWidth.W),
Fill(adder_inv_f32_1.getWidth, 1.U) >> lshift_value_max_f32_1_reg0.tail(lshift_value_max_f32_1_reg0.getWidth-LZDWidth_f32_1)
).asUInt
val lshift_value_mask_f16_0 = Mux(lshift_value_max_f16_0_reg0.head(lshift_value_max_f16_0_reg0.getWidth-LZDWidth_f16_0).orR,
0.U(adder_inv_f16_0.getWidth.W),
Fill(adder_inv_f16_0.getWidth, 1.U) >> lshift_value_max_f16_0_reg0.tail(lshift_value_max_f16_0_reg0.getWidth-LZDWidth_f16_0)
).asUInt
val lshift_value_mask_f16_1 = Mux(lshift_value_max_f16_1_reg0.head(lshift_value_max_f16_1_reg0.getWidth-LZDWidth_f16_1).orR,
0.U(adder_inv_f16_1.getWidth.W),
Fill(adder_inv_f16_1.getWidth, 1.U) >> lshift_value_max_f16_1_reg0.tail(lshift_value_max_f16_1_reg0.getWidth-LZDWidth_f16_1)
).asUInt
val lshift_value_mask_f16_2 = Mux(lshift_value_max_f16_2_reg0.head(lshift_value_max_f16_2_reg0.getWidth-LZDWidth_f16_2).orR,
0.U(adder_inv_f16_2.getWidth.W),
Fill(adder_inv_f16_2.getWidth, 1.U) >> lshift_value_max_f16_2_reg0.tail(lshift_value_max_f16_2_reg0.getWidth-LZDWidth_f16_2)
).asUInt
val lshift_value_mask_f16_3 = Mux(lshift_value_max_f16_3_reg0.head(lshift_value_max_f16_3_reg0.getWidth-LZDWidth_f16_3).orR,
0.U(adder_inv_f16_3.getWidth.W),
Fill(adder_inv_f16_3.getWidth, 1.U) >> lshift_value_max_f16_3_reg0.tail(lshift_value_max_f16_3_reg0.getWidth-LZDWidth_f16_3)
).asUInt
// save 306bit
val tzd_adder_f64_reg_d = Reverse(adder_f64.asUInt)
val tzd_adder_f32_reg_d = Cat(Reverse(adder_f32_1.asUInt), Reverse(adder_f32_0.asUInt))
val tzd_adder_f16_reg_d = Cat(Reverse(adder_f16_3.asUInt), Reverse(adder_f16_2.asUInt),
Reverse(adder_f16_1.asUInt), Reverse(adder_f16_0.asUInt))
val tzd_adder_reg_d = Mux(is_fp64_reg0, tzd_adder_f64_reg_d, Mux(is_fp32_reg0, tzd_adder_f32_reg_d, tzd_adder_f16_reg_d))
val tzd_adder_reg1 = RegEnable(tzd_adder_reg_d, fire_reg0)
val tzd_adder_f64_reg1 = LZD(tzd_adder_reg1(163,0).asTypeOf(adder_f64))
val tzd_adder_f32_0_reg1 = LZD(tzd_adder_reg1(76,0).asTypeOf(adder_f32_0))
val tzd_adder_f32_1_reg1 = LZD(tzd_adder_reg1(153,77).asTypeOf(adder_f32_1))
val tzd_adder_f16_0_reg1 = LZD(tzd_adder_reg1(37,0).asTypeOf(adder_f16_0))
val tzd_adder_f16_1_reg1 = LZD(tzd_adder_reg1(75,38).asTypeOf(adder_f16_1))
val tzd_adder_f16_2_reg1 = LZD(tzd_adder_reg1(113,76).asTypeOf(adder_f16_2))
val tzd_adder_f16_3_reg1 = LZD(tzd_adder_reg1(151,114).asTypeOf(adder_f16_3))
// save 300bit
val lzd_adder_inv_mask_f64_reg_d = adder_inv_f64 | lshift_value_mask_f64
val lzd_adder_inv_mask_f32_reg_d = Cat(adder_inv_f32_1 | lshift_value_mask_f32_1, adder_inv_f32_0 | lshift_value_mask_f32_0)
val lzd_adder_inv_mask_f16_reg_d = Cat(adder_inv_f16_3 | lshift_value_mask_f16_3, adder_inv_f16_2 | lshift_value_mask_f16_2,
adder_inv_f16_1 | lshift_value_mask_f16_1, adder_inv_f16_0 | lshift_value_mask_f16_0)
val lzd_adder_inv_mask_reg_d = Mux(is_fp64_reg0, lzd_adder_inv_mask_f64_reg_d, Mux(is_fp32_reg0, lzd_adder_inv_mask_f32_reg_d, lzd_adder_inv_mask_f16_reg_d))
val lzd_adder_inv_mask_reg1 = RegEnable(lzd_adder_inv_mask_reg_d, fire_reg0)
val lzd_adder_inv_mask_f64_reg1 = LZD(lzd_adder_inv_mask_reg1(162,0).asTypeOf(adder_inv_f64))
val lzd_adder_inv_mask_f32_0_reg1 = LZD(lzd_adder_inv_mask_reg1(75,0).asTypeOf(adder_inv_f32_0))
val lzd_adder_inv_mask_f32_1_reg1 = LZD(lzd_adder_inv_mask_reg1(151,76).asTypeOf(adder_inv_f32_1))
val lzd_adder_inv_mask_f16_0_reg1 = LZD(lzd_adder_inv_mask_reg1(36,0).asTypeOf(adder_inv_f16_0))
val lzd_adder_inv_mask_f16_1_reg1 = LZD(lzd_adder_inv_mask_reg1(73,37).asTypeOf(adder_inv_f16_1))
val lzd_adder_inv_mask_f16_2_reg1 = LZD(lzd_adder_inv_mask_reg1(110,74).asTypeOf(adder_inv_f16_2))
val lzd_adder_inv_mask_f16_3_reg1 = LZD(lzd_adder_inv_mask_reg1(147,111).asTypeOf(adder_inv_f16_3))
// save 1389 bit reg
val lshift_mask_valid_f64_reg_d = (adder_inv_f64 | lshift_value_mask_f64) === lshift_value_mask_f64
val lshift_mask_valid_f32_reg_d = Cat((adder_inv_f32_1 | lshift_value_mask_f32_1) === lshift_value_mask_f32_1,
(adder_inv_f32_0 | lshift_value_mask_f32_0) === lshift_value_mask_f32_0)
val lshift_mask_valid_f16_reg_d = Cat((adder_inv_f16_3 | lshift_value_mask_f16_3) === lshift_value_mask_f16_3,
(adder_inv_f16_2 | lshift_value_mask_f16_2) === lshift_value_mask_f16_2,
(adder_inv_f16_1 | lshift_value_mask_f16_1) === lshift_value_mask_f16_1,
(adder_inv_f16_0 | lshift_value_mask_f16_0) === lshift_value_mask_f16_0)
val lshift_mask_valid_reg_d = Mux(is_fp64_reg0, lshift_mask_valid_f64_reg_d, Mux(is_fp32_reg0, lshift_mask_valid_f32_reg_d, lshift_mask_valid_f16_reg_d))
val lshift_mask_valid_reg = RegEnable(lshift_mask_valid_reg_d, fire_reg0)
val lshift_mask_valid_f64_reg1 = lshift_mask_valid_reg(0)
val lshift_mask_valid_f32_0_reg1 = lshift_mask_valid_reg(0)
val lshift_mask_valid_f32_1_reg1 = lshift_mask_valid_reg(1)
val lshift_mask_valid_f16_0_reg1 = lshift_mask_valid_reg(0)
val lshift_mask_valid_f16_1_reg1 = lshift_mask_valid_reg(1)
val lshift_mask_valid_f16_2_reg1 = lshift_mask_valid_reg(2)
val lshift_mask_valid_f16_3_reg1 = lshift_mask_valid_reg(3)
val lshift_value_f64_reg1 = lzd_adder_inv_mask_f64_reg1
val lshift_value_f32_0_reg1 = lzd_adder_inv_mask_f32_0_reg1
val lshift_value_f32_1_reg1 = lzd_adder_inv_mask_f32_1_reg1
val lshift_value_f16_0_reg1 = lzd_adder_inv_mask_f16_0_reg1
val lshift_value_f16_1_reg1 = lzd_adder_inv_mask_f16_1_reg1
val lshift_value_f16_2_reg1 = lzd_adder_inv_mask_f16_2_reg1
val lshift_value_f16_3_reg1 = lzd_adder_inv_mask_f16_3_reg1
//save 304 bits reg
val adder_f64_reg_d = adder_f64
val adder_f32_reg_d = Cat(adder_f32_1, adder_f32_0)
val adder_f16_reg_d = Cat(adder_f16_3, adder_f16_2, adder_f16_1, adder_f16_0)
val adder_reg_d = Mux(is_fp64_reg0, adder_f64_reg_d, Mux(is_fp32_reg0, adder_f32_reg_d, adder_f16_reg_d))
val adder_reg1 = RegEnable(adder_reg_d, fire_reg0)
val adder_f64_reg1 = adder_reg1(163,0)
val adder_f32_0_reg1 = adder_reg1(76,0)
val adder_f32_1_reg1 = adder_reg1(153,77)
val adder_f16_0_reg1 = adder_reg1(37,0)
val adder_f16_1_reg1 = adder_reg1(75,38)
val adder_f16_2_reg1 = adder_reg1(113,76)
val adder_f16_3_reg1 = adder_reg1(151,114)
val lshift_adder_f64 = shiftLeftWithMux(
Mux(is_fp64_reg1,adder_f64_reg1,adder_f32_1_reg1.asTypeOf(adder_f64_reg1)),
Mux(is_fp64_reg1,lshift_value_f64_reg1,lshift_value_f32_1_reg1.asTypeOf(lshift_value_f64_reg1))
)
val lshift_adder_f32_0 = shiftLeftWithMux(adder_f32_0_reg1, lshift_value_f32_0_reg1)
val lshift_adder_f32_1 = lshift_adder_f64.asTypeOf(lshift_adder_f32_0)
val lshift_adder_f16_0 = shiftLeftWithMux(adder_f16_0_reg1, lshift_value_f16_0_reg1)
val lshift_adder_f16_1 = shiftLeftWithMux(adder_f16_1_reg1, lshift_value_f16_1_reg1)
val lshift_adder_f16_2 = shiftLeftWithMux(adder_f16_2_reg1, lshift_value_f16_2_reg1)
val lshift_adder_f16_3 = shiftLeftWithMux(adder_f16_3_reg1, lshift_value_f16_3_reg1)
val lshift_adder_inv_f64 = Cat(Mux(adder_is_negative_reg1(0), ~lshift_adder_f64.head(significandWidth+4),lshift_adder_f64.head(significandWidth+4)),lshift_adder_f64.tail(significandWidth+4))
val lshift_adder_inv_f32_0 = Cat(Mux(adder_is_negative_reg1(0), ~lshift_adder_f32_0.head(24+4),lshift_adder_f32_0.head(24+4)),lshift_adder_f32_0.tail(24+4))
val lshift_adder_inv_f32_1 = Cat(Mux(adder_is_negative_reg1(1), ~lshift_adder_f32_1.head(24+4),lshift_adder_f32_1.head(24+4)),lshift_adder_f32_1.tail(24+4))
val lshift_adder_inv_f16_0 = Cat(Mux(adder_is_negative_reg1(0), ~lshift_adder_f16_0.head(11+4),lshift_adder_f16_0.head(11+4)),lshift_adder_f16_0.tail(11+4))
val lshift_adder_inv_f16_1 = Cat(Mux(adder_is_negative_reg1(1), ~lshift_adder_f16_1.head(11+4),lshift_adder_f16_1.head(11+4)),lshift_adder_f16_1.tail(11+4))
val lshift_adder_inv_f16_2 = Cat(Mux(adder_is_negative_reg1(2), ~lshift_adder_f16_2.head(11+4),lshift_adder_f16_2.head(11+4)),lshift_adder_f16_2.tail(11+4))
val lshift_adder_inv_f16_3 = Cat(Mux(adder_is_negative_reg1(3), ~lshift_adder_f16_3.head(11+4),lshift_adder_f16_3.head(11+4)),lshift_adder_f16_3.tail(11+4))
val is_fix_f64 = (tzd_adder_f64_reg1 + lzd_adder_inv_mask_f64_reg1) === adder_inv_f64.getWidth.U
val is_fix_f32_0 = (tzd_adder_f32_0_reg1 + lzd_adder_inv_mask_f32_0_reg1) === adder_inv_f32_0.getWidth.U
val is_fix_f32_1 = (tzd_adder_f32_1_reg1 + lzd_adder_inv_mask_f32_1_reg1) === adder_inv_f32_1.getWidth.U
val is_fix_f16_0 = (tzd_adder_f16_0_reg1 + lzd_adder_inv_mask_f16_0_reg1) === adder_inv_f16_0.getWidth.U
val is_fix_f16_1 = (tzd_adder_f16_1_reg1 + lzd_adder_inv_mask_f16_1_reg1) === adder_inv_f16_1.getWidth.U
val is_fix_f16_2 = (tzd_adder_f16_2_reg1 + lzd_adder_inv_mask_f16_2_reg1) === adder_inv_f16_2.getWidth.U
val is_fix_f16_3 = (tzd_adder_f16_3_reg1 + lzd_adder_inv_mask_f16_3_reg1) === adder_inv_f16_3.getWidth.U
val lshift_adder_inv_fix_f64 = Mux(is_fix_f64, lshift_adder_inv_f64.head(adder_inv_f64.getWidth), lshift_adder_inv_f64.tail(1))
val lshift_adder_inv_fix_f32_0 = Mux(is_fix_f32_0, lshift_adder_inv_f32_0.head(adder_inv_f32_0.getWidth), lshift_adder_inv_f32_0.tail(1))
val lshift_adder_inv_fix_f32_1 = Mux(is_fix_f32_1, lshift_adder_inv_f32_1.head(adder_inv_f32_1.getWidth), lshift_adder_inv_f32_1.tail(1))
val lshift_adder_inv_fix_f16_0 = Mux(is_fix_f16_0, lshift_adder_inv_f16_0.head(adder_inv_f16_0.getWidth), lshift_adder_inv_f16_0.tail(1))
val lshift_adder_inv_fix_f16_1 = Mux(is_fix_f16_1, lshift_adder_inv_f16_1.head(adder_inv_f16_1.getWidth), lshift_adder_inv_f16_1.tail(1))
val lshift_adder_inv_fix_f16_2 = Mux(is_fix_f16_2, lshift_adder_inv_f16_2.head(adder_inv_f16_2.getWidth), lshift_adder_inv_f16_2.tail(1))
val lshift_adder_inv_fix_f16_3 = Mux(is_fix_f16_3, lshift_adder_inv_f16_3.head(adder_inv_f16_3.getWidth), lshift_adder_inv_f16_3.tail(1))
// save 86bit
val fraction_result_no_round_f64_reg_d = lshift_adder_inv_fix_f64.tail(1).head(significandWidth-1)
val fraction_result_no_round_f32_reg_d = Cat(lshift_adder_inv_fix_f32_1.tail(1).head(24-1), lshift_adder_inv_fix_f32_0.tail(1).head(24-1))
val fraction_result_no_round_f16_reg_d = Cat(lshift_adder_inv_fix_f16_3.tail(1).head(11-1), lshift_adder_inv_fix_f16_2.tail(1).head(11-1),
lshift_adder_inv_fix_f16_1.tail(1).head(11-1), lshift_adder_inv_fix_f16_0.tail(1).head(11-1))
val fraction_result_no_round_reg_d = Mux(is_fp64_reg1, fraction_result_no_round_f64_reg_d,
Mux(is_fp32_reg1, fraction_result_no_round_f32_reg_d, fraction_result_no_round_f16_reg_d))
val fraction_result_no_round_reg = RegEnable(fraction_result_no_round_reg_d, fire_reg1)
val fraction_result_no_round_f64_reg2 = fraction_result_no_round_reg(51,0)
val fraction_result_no_round_f32_0_reg2 = fraction_result_no_round_reg(22,0)
val fraction_result_no_round_f32_1_reg2 = fraction_result_no_round_reg(45,23)
val fraction_result_no_round_f16_0_reg2 = fraction_result_no_round_reg(9,0)
val fraction_result_no_round_f16_1_reg2 = fraction_result_no_round_reg(19,10)
val fraction_result_no_round_f16_2_reg2 = fraction_result_no_round_reg(29,20)
val fraction_result_no_round_f16_3_reg2 = fraction_result_no_round_reg(39,30)
val fraction_result_round_f64 = fraction_result_no_round_f64_reg2 +& 1.U
val fraction_result_round_f32_0 = fraction_result_no_round_f32_0_reg2 +& 1.U
val fraction_result_round_f32_1 = fraction_result_no_round_f32_1_reg2 +& 1.U
val fraction_result_round_f16_0 = fraction_result_no_round_f16_0_reg2 +& 1.U
val fraction_result_round_f16_1 = fraction_result_no_round_f16_1_reg2 +& 1.U
val fraction_result_round_f16_2 = fraction_result_no_round_f16_2_reg2 +& 1.U
val fraction_result_round_f16_3 = fraction_result_no_round_f16_3_reg2 +& 1.U
// todo
val sign_result_temp_f64_reg2 = RegEnable(RegEnable(Mux(adder_is_negative_f64 , RegEnable(sign_c_f64 , fire), RegEnable(sign_a_b_f64 , fire)), fire_reg0), fire_reg1)
val sign_result_temp_f32_0_reg2 = RegEnable(RegEnable(Mux(adder_is_negative_f32_0, RegEnable(sign_c_f32_0, fire), RegEnable(sign_a_b_f32_0, fire)), fire_reg0), fire_reg1)
val sign_result_temp_f32_1_reg2 = RegEnable(RegEnable(Mux(adder_is_negative_f32_1, RegEnable(sign_c_f32_1, fire), RegEnable(sign_a_b_f32_1, fire)), fire_reg0), fire_reg1)
val sign_result_temp_f16_0_reg2 = RegEnable(RegEnable(Mux(adder_is_negative_f16_0, RegEnable(sign_c_f16_0, fire), RegEnable(sign_a_b_f16_0, fire)), fire_reg0), fire_reg1)
val sign_result_temp_f16_1_reg2 = RegEnable(RegEnable(Mux(adder_is_negative_f16_1, RegEnable(sign_c_f16_1, fire), RegEnable(sign_a_b_f16_1, fire)), fire_reg0), fire_reg1)
val sign_result_temp_f16_2_reg2 = RegEnable(RegEnable(Mux(adder_is_negative_f16_2, RegEnable(sign_c_f16_2, fire), RegEnable(sign_a_b_f16_2, fire)), fire_reg0), fire_reg1)
val sign_result_temp_f16_3_reg2 = RegEnable(RegEnable(Mux(adder_is_negative_f16_3, RegEnable(sign_c_f16_3, fire), RegEnable(sign_a_b_f16_3, fire)), fire_reg0), fire_reg1)
val RNE = io.round_mode === "b000".U
val RTZ = io.round_mode === "b001".U
val RDN = io.round_mode === "b010".U
val RUP = io.round_mode === "b011".U
val RMM = io.round_mode === "b100".U
val RNE_reg2 = RegEnable(RegEnable(RegEnable(RNE, fire), fire_reg0), fire_reg1)
val RTZ_reg2 = RegEnable(RegEnable(RegEnable(RTZ, fire), fire_reg0), fire_reg1)
val RDN_reg2 = RegEnable(RegEnable(RegEnable(RDN, fire), fire_reg0), fire_reg1)
val RUP_reg2 = RegEnable(RegEnable(RegEnable(RUP, fire), fire_reg0), fire_reg1)
val RMM_reg2 = RegEnable(RegEnable(RegEnable(RMM, fire), fire_reg0), fire_reg1)
// todo
val sticky_f64_reg2 = RegEnable(RegEnable(rshift_sticky_f64 , fire_reg0) | (lzd_adder_inv_mask_f64_reg1 + tzd_adder_f64_reg1 < (adder_inv_f64.getWidth-significandWidth-2).U), fire_reg1)
val sticky_f32_0_reg2 = RegEnable(RegEnable(rshift_sticky_f32_0, fire_reg0) | (lzd_adder_inv_mask_f32_0_reg1 + tzd_adder_f32_0_reg1 < (adder_inv_f32_0.getWidth-24-2).U), fire_reg1)
val sticky_f32_1_reg2 = RegEnable(RegEnable(rshift_sticky_f32_1, fire_reg0) | (lzd_adder_inv_mask_f32_1_reg1 + tzd_adder_f32_1_reg1 < (adder_inv_f32_1.getWidth-24-2).U), fire_reg1)
val sticky_f16_0_reg2 = RegEnable(RegEnable(rshift_sticky_f16_0, fire_reg0) | (lzd_adder_inv_mask_f16_0_reg1 + tzd_adder_f16_0_reg1 < (adder_inv_f16_0.getWidth-11-2).U), fire_reg1)
val sticky_f16_1_reg2 = RegEnable(RegEnable(rshift_sticky_f16_1, fire_reg0) | (lzd_adder_inv_mask_f16_1_reg1 + tzd_adder_f16_1_reg1 < (adder_inv_f16_1.getWidth-11-2).U), fire_reg1)
val sticky_f16_2_reg2 = RegEnable(RegEnable(rshift_sticky_f16_2, fire_reg0) | (lzd_adder_inv_mask_f16_2_reg1 + tzd_adder_f16_2_reg1 < (adder_inv_f16_2.getWidth-11-2).U), fire_reg1)
val sticky_f16_3_reg2 = RegEnable(RegEnable(rshift_sticky_f16_3, fire_reg0) | (lzd_adder_inv_mask_f16_3_reg1 + tzd_adder_f16_3_reg1 < (adder_inv_f16_3.getWidth-11-2).U), fire_reg1)
val sticky_uf_f64_reg2 = RegEnable(RegEnable(rshift_sticky_f64 , fire_reg0) | (lzd_adder_inv_mask_f64_reg1 + tzd_adder_f64_reg1 < (adder_inv_f64.getWidth-significandWidth-3).U), fire_reg1)
val sticky_uf_f32_0_reg2 = RegEnable(RegEnable(rshift_sticky_f32_0, fire_reg0) | (lzd_adder_inv_mask_f32_0_reg1 + tzd_adder_f32_0_reg1 < (adder_inv_f32_0.getWidth-24-3).U), fire_reg1)
val sticky_uf_f32_1_reg2 = RegEnable(RegEnable(rshift_sticky_f32_1, fire_reg0) | (lzd_adder_inv_mask_f32_1_reg1 + tzd_adder_f32_1_reg1 < (adder_inv_f32_1.getWidth-24-3).U), fire_reg1)
val sticky_uf_f16_0_reg2 = RegEnable(RegEnable(rshift_sticky_f16_0, fire_reg0) | (lzd_adder_inv_mask_f16_0_reg1 + tzd_adder_f16_0_reg1 < (adder_inv_f16_0.getWidth-11-3).U), fire_reg1)
val sticky_uf_f16_1_reg2 = RegEnable(RegEnable(rshift_sticky_f16_1, fire_reg0) | (lzd_adder_inv_mask_f16_1_reg1 + tzd_adder_f16_1_reg1 < (adder_inv_f16_1.getWidth-11-3).U), fire_reg1)
val sticky_uf_f16_2_reg2 = RegEnable(RegEnable(rshift_sticky_f16_2, fire_reg0) | (lzd_adder_inv_mask_f16_2_reg1 + tzd_adder_f16_2_reg1 < (adder_inv_f16_2.getWidth-11-3).U), fire_reg1)
val sticky_uf_f16_3_reg2 = RegEnable(RegEnable(rshift_sticky_f16_3, fire_reg0) | (lzd_adder_inv_mask_f16_3_reg1 + tzd_adder_f16_3_reg1 < (adder_inv_f16_3.getWidth-11-3).U), fire_reg1)
// todo
val round_lshift_f64_reg2 = RegEnable(lshift_adder_inv_fix_f64.tail(significandWidth+1).head(1), fire_reg1)
val round_lshift_f32_0_reg2 = RegEnable(lshift_adder_inv_fix_f32_0.tail(24+1).head(1), fire_reg1)
val round_lshift_f32_1_Reg2 = RegEnable(lshift_adder_inv_fix_f32_1.tail(24+1).head(1), fire_reg1)
val round_lshift_f16_0_reg2 = RegEnable(lshift_adder_inv_fix_f16_0.tail(11+1).head(1), fire_reg1)
val round_lshift_f16_1_reg2 = RegEnable(lshift_adder_inv_fix_f16_1.tail(11+1).head(1), fire_reg1)
val round_lshift_f16_2_reg2 = RegEnable(lshift_adder_inv_fix_f16_2.tail(11+1).head(1), fire_reg1)
val round_lshift_f16_3_reg2 = RegEnable(lshift_adder_inv_fix_f16_3.tail(11+1).head(1), fire_reg1)
val guard_lshift_f64_reg2 = RegEnable(lshift_adder_inv_fix_f64.tail(significandWidth).head(1), fire_reg1)
val guard_lshift_f32_0_reg2 = RegEnable(lshift_adder_inv_fix_f32_0.tail(24).head(1), fire_reg1)
val guard_lshift_f32_1_reg2 = RegEnable(lshift_adder_inv_fix_f32_1.tail(24).head(1), fire_reg1)
val guard_lshift_f16_0_reg2 = RegEnable(lshift_adder_inv_fix_f16_0.tail(11).head(1), fire_reg1)
val guard_lshift_f16_1_reg2 = RegEnable(lshift_adder_inv_fix_f16_1.tail(11).head(1), fire_reg1)
val guard_lshift_f16_2_reg2 = RegEnable(lshift_adder_inv_fix_f16_2.tail(11).head(1), fire_reg1)
val guard_lshift_f16_3_reg2 = RegEnable(lshift_adder_inv_fix_f16_3.tail(11).head(1), fire_reg1)
val round_f64 = Mux(adder_is_negative_f64_reg2, round_lshift_f64_reg2 ^ !sticky_f64_reg2, round_lshift_f64_reg2)
val round_f32_0 = Mux(adder_is_negative_f32_0_reg2, round_lshift_f32_0_reg2 ^ !sticky_f32_0_reg2, round_lshift_f32_0_reg2)
val round_f32_1 = Mux(adder_is_negative_f32_1_reg2, round_lshift_f32_1_Reg2 ^ !sticky_f32_1_reg2, round_lshift_f32_1_Reg2)
val round_f16_0 = Mux(adder_is_negative_f16_0_reg2, round_lshift_f16_0_reg2 ^ !sticky_f16_0_reg2, round_lshift_f16_0_reg2)
val round_f16_1 = Mux(adder_is_negative_f16_1_reg2, round_lshift_f16_1_reg2 ^ !sticky_f16_1_reg2, round_lshift_f16_1_reg2)
val round_f16_2 = Mux(adder_is_negative_f16_2_reg2, round_lshift_f16_2_reg2 ^ !sticky_f16_2_reg2, round_lshift_f16_2_reg2)
val round_f16_3 = Mux(adder_is_negative_f16_3_reg2, round_lshift_f16_3_reg2 ^ !sticky_f16_3_reg2, round_lshift_f16_3_reg2)
val guard_f64 = Mux(adder_is_negative_f64_reg2, guard_lshift_f64_reg2 ^ (!sticky_f64_reg2 & round_lshift_f64_reg2), guard_lshift_f64_reg2)
val guard_f32_0 = Mux(adder_is_negative_f32_0_reg2, guard_lshift_f32_0_reg2 ^ (!sticky_f32_0_reg2 & round_lshift_f32_0_reg2), guard_lshift_f32_0_reg2)
val guard_f32_1 = Mux(adder_is_negative_f32_1_reg2, guard_lshift_f32_1_reg2 ^ (!sticky_f32_1_reg2 & round_lshift_f32_1_Reg2), guard_lshift_f32_1_reg2)
val guard_f16_0 = Mux(adder_is_negative_f16_0_reg2, guard_lshift_f16_0_reg2 ^ (!sticky_f16_0_reg2 & round_lshift_f16_0_reg2), guard_lshift_f16_0_reg2)
val guard_f16_1 = Mux(adder_is_negative_f16_1_reg2, guard_lshift_f16_1_reg2 ^ (!sticky_f16_1_reg2 & round_lshift_f16_1_reg2), guard_lshift_f16_1_reg2)
val guard_f16_2 = Mux(adder_is_negative_f16_2_reg2, guard_lshift_f16_2_reg2 ^ (!sticky_f16_2_reg2 & round_lshift_f16_2_reg2), guard_lshift_f16_2_reg2)
val guard_f16_3 = Mux(adder_is_negative_f16_3_reg2, guard_lshift_f16_3_reg2 ^ (!sticky_f16_3_reg2 & round_lshift_f16_3_reg2), guard_lshift_f16_3_reg2)
val guard_uf_f64 = round_f64
val guard_uf_f32_0 = round_f32_0
val guard_uf_f32_1 = round_f32_1
val guard_uf_f16_0 = round_f16_0
val guard_uf_f16_1 = round_f16_1
val guard_uf_f16_2 = round_f16_2
val guard_uf_f16_3 = round_f16_3
// todo
val round_lshift_uf_f64_reg2 = RegEnable(lshift_adder_inv_fix_f64.tail(significandWidth+2).head(1), fire_reg1)
val round_lshift_uf_f32_0_reg2 = RegEnable(lshift_adder_inv_fix_f32_0.tail(24+2).head(1), fire_reg1)
val round_lshift_uf_f32_1_reg2 = RegEnable(lshift_adder_inv_fix_f32_1.tail(24+2).head(1), fire_reg1)
val round_lshift_uf_f16_0_reg2 = RegEnable(lshift_adder_inv_fix_f16_0.tail(11+2).head(1), fire_reg1)
val round_lshift_uf_f16_1_reg2 = RegEnable(lshift_adder_inv_fix_f16_1.tail(11+2).head(1), fire_reg1)
val round_lshift_uf_f16_2_reg2 = RegEnable(lshift_adder_inv_fix_f16_2.tail(11+2).head(1), fire_reg1)
val round_lshift_uf_f16_3_reg2 = RegEnable(lshift_adder_inv_fix_f16_3.tail(11+2).head(1), fire_reg1)
val round_uf_f64 = Mux(adder_is_negative_f64_reg2, round_lshift_uf_f64_reg2 ^ !sticky_uf_f64_reg2, round_lshift_uf_f64_reg2)
val round_uf_f32_0 = Mux(adder_is_negative_f32_0_reg2, round_lshift_uf_f32_0_reg2 ^ !sticky_uf_f32_0_reg2, round_lshift_uf_f32_0_reg2)
val round_uf_f32_1 = Mux(adder_is_negative_f32_1_reg2, round_lshift_uf_f32_1_reg2 ^ !sticky_uf_f32_1_reg2, round_lshift_uf_f32_1_reg2)
val round_uf_f16_0 = Mux(adder_is_negative_f16_0_reg2, round_lshift_uf_f16_0_reg2 ^ !sticky_uf_f16_0_reg2, round_lshift_uf_f16_0_reg2)
val round_uf_f16_1 = Mux(adder_is_negative_f16_1_reg2, round_lshift_uf_f16_1_reg2 ^ !sticky_uf_f16_1_reg2, round_lshift_uf_f16_1_reg2)
val round_uf_f16_2 = Mux(adder_is_negative_f16_2_reg2, round_lshift_uf_f16_2_reg2 ^ !sticky_uf_f16_2_reg2, round_lshift_uf_f16_2_reg2)
val round_uf_f16_3 = Mux(adder_is_negative_f16_3_reg2, round_lshift_uf_f16_3_reg2 ^ !sticky_uf_f16_3_reg2, round_lshift_uf_f16_3_reg2)
val round_add1_f64 = Wire(UInt(1.W))
round_add1_f64 := RNE_reg2 & (guard_f64 & (fraction_result_no_round_f64_reg2(0) | round_f64 | sticky_f64_reg2)) |
RDN_reg2 & sign_result_temp_f64_reg2 & (guard_f64|round_f64|sticky_f64_reg2) |
RUP_reg2 & !sign_result_temp_f64_reg2 & (guard_f64|round_f64|sticky_f64_reg2) |
RMM_reg2 & guard_f64 |
adder_is_negative_f64_reg2 & !guard_f64 & !round_f64 & !sticky_f64_reg2
val round_add1_f32_0 = Wire(UInt(1.W))
round_add1_f32_0 := RNE_reg2 & (guard_f32_0 & (fraction_result_no_round_f32_0_reg2(0) | round_f32_0 | sticky_f32_0_reg2)) |
RDN_reg2 & sign_result_temp_f32_0_reg2 & (guard_f32_0|round_f32_0|sticky_f32_0_reg2) |
RUP_reg2 & !sign_result_temp_f32_0_reg2 & (guard_f32_0|round_f32_0|sticky_f32_0_reg2) |
RMM_reg2 & guard_f32_0 |
adder_is_negative_f32_0_reg2 & !guard_f32_0 & !round_f32_0 & !sticky_f32_0_reg2
val round_add1_f32_1 = Wire(UInt(1.W))
round_add1_f32_1 := RNE_reg2 & (guard_f32_1 & (fraction_result_no_round_f32_1_reg2(0) | round_f32_1 | sticky_f32_1_reg2)) |
RDN_reg2 & sign_result_temp_f32_1_reg2 & (guard_f32_1|round_f32_1|sticky_f32_1_reg2) |
RUP_reg2 & !sign_result_temp_f32_1_reg2 & (guard_f32_1|round_f32_1|sticky_f32_1_reg2) |
RMM_reg2 & guard_f32_1 |
adder_is_negative_f32_1_reg2 & !guard_f32_1 & !round_f32_1 & !sticky_f32_1_reg2
val round_add1_f16_0 = Wire(UInt(1.W))
round_add1_f16_0 := RNE_reg2 & (guard_f16_0 & (fraction_result_no_round_f16_0_reg2(0) | round_f16_0 | sticky_f16_0_reg2)) |
RDN_reg2 & sign_result_temp_f16_0_reg2 & (guard_f16_0|round_f16_0|sticky_f16_0_reg2) |
RUP_reg2 & !sign_result_temp_f16_0_reg2 & (guard_f16_0|round_f16_0|sticky_f16_0_reg2) |
RMM_reg2 & guard_f16_0 |
adder_is_negative_f16_0_reg2 & !guard_f16_0 & !round_f16_0 & !sticky_f16_0_reg2
val round_add1_f16_1 = Wire(UInt(1.W))
round_add1_f16_1 := RNE_reg2 & (guard_f16_1 & (fraction_result_no_round_f16_1_reg2(0) | round_f16_1 | sticky_f16_1_reg2)) |
RDN_reg2 & sign_result_temp_f16_1_reg2 & (guard_f16_1|round_f16_1|sticky_f16_1_reg2) |
RUP_reg2 & !sign_result_temp_f16_1_reg2 & (guard_f16_1|round_f16_1|sticky_f16_1_reg2) |
RMM_reg2 & guard_f16_1 |
adder_is_negative_f16_1_reg2 & !guard_f16_1 & !round_f16_1 & !sticky_f16_1_reg2
val round_add1_f16_2 = Wire(UInt(1.W))
round_add1_f16_2 := RNE_reg2 & (guard_f16_2 & (fraction_result_no_round_f16_2_reg2(0) | round_f16_2 | sticky_f16_2_reg2)) |
RDN_reg2 & sign_result_temp_f16_2_reg2 & (guard_f16_2|round_f16_2|sticky_f16_2_reg2) |
RUP_reg2 & !sign_result_temp_f16_2_reg2 & (guard_f16_2|round_f16_2|sticky_f16_2_reg2) |
RMM_reg2 & guard_f16_2 |
adder_is_negative_f16_2_reg2 & !guard_f16_2 & !round_f16_2 & !sticky_f16_2_reg2
val round_add1_f16_3 = Wire(UInt(1.W))
round_add1_f16_3 := RNE_reg2 & (guard_f16_3 & (fraction_result_no_round_f16_3_reg2(0) | round_f16_3 | sticky_f16_3_reg2)) |
RDN_reg2 & sign_result_temp_f16_3_reg2 & (guard_f16_3|round_f16_3|sticky_f16_3_reg2) |
RUP_reg2 & !sign_result_temp_f16_3_reg2 & (guard_f16_3|round_f16_3|sticky_f16_3_reg2) |
RMM_reg2 & guard_f16_3 |
adder_is_negative_f16_3_reg2 & !guard_f16_3 & !round_f16_3 & !sticky_f16_3_reg2
val round_add1_uf_f64 = RNE_reg2 & (guard_uf_f64 & (guard_f64 | round_uf_f64 | sticky_uf_f64_reg2)) |
RDN_reg2 & sign_result_temp_f64_reg2 & (guard_uf_f64|round_uf_f64|sticky_uf_f64_reg2) |
RUP_reg2 & !sign_result_temp_f64_reg2 & (guard_uf_f64|round_uf_f64|sticky_uf_f64_reg2) |
RMM_reg2 & guard_uf_f64
val round_add1_uf_f32_0 = RNE_reg2 & (guard_uf_f32_0 & (guard_f32_0 | round_uf_f32_0 | sticky_uf_f32_0_reg2)) |
RDN_reg2 & sign_result_temp_f32_0_reg2 & (guard_uf_f32_0|round_uf_f32_0|sticky_uf_f32_0_reg2) |
RUP_reg2 & !sign_result_temp_f32_0_reg2 & (guard_uf_f32_0|round_uf_f32_0|sticky_uf_f32_0_reg2) |
RMM_reg2 & guard_uf_f32_0
val round_add1_uf_f32_1 = RNE_reg2 & (guard_uf_f32_1 & (guard_f32_1 | round_uf_f32_1 | sticky_uf_f32_1_reg2)) |
RDN_reg2 & sign_result_temp_f32_1_reg2 & (guard_uf_f32_1|round_uf_f32_1|sticky_uf_f32_1_reg2) |
RUP_reg2 & !sign_result_temp_f32_1_reg2 & (guard_uf_f32_1|round_uf_f32_1|sticky_uf_f32_1_reg2) |
RMM_reg2 & guard_uf_f32_1
val round_add1_uf_f16_0 = RNE_reg2 & (guard_uf_f16_0 & (guard_f16_0 | round_uf_f16_0 | sticky_uf_f16_0_reg2)) |
RDN_reg2 & sign_result_temp_f16_0_reg2 & (guard_uf_f16_0|round_uf_f16_0|sticky_uf_f16_0_reg2) |
RUP_reg2 & !sign_result_temp_f16_0_reg2 & (guard_uf_f16_0|round_uf_f16_0|sticky_uf_f16_0_reg2) |
RMM_reg2 & guard_uf_f16_0
val round_add1_uf_f16_1 = RNE_reg2 & (guard_uf_f16_1 & (guard_f16_1 | round_uf_f16_1 | sticky_uf_f16_1_reg2)) |
RDN_reg2 & sign_result_temp_f16_1_reg2 & (guard_uf_f16_1|round_uf_f16_1|sticky_uf_f16_1_reg2) |
RUP_reg2 & !sign_result_temp_f16_1_reg2 & (guard_uf_f16_1|round_uf_f16_1|sticky_uf_f16_1_reg2) |
RMM_reg2 & guard_uf_f16_1
val round_add1_uf_f16_2 = RNE_reg2 & (guard_uf_f16_2 & (guard_f16_2 | round_uf_f16_2 | sticky_uf_f16_2_reg2)) |
RDN_reg2 & sign_result_temp_f16_2_reg2 & (guard_uf_f16_2|round_uf_f16_2|sticky_uf_f16_2_reg2) |
RUP_reg2 & !sign_result_temp_f16_2_reg2 & (guard_uf_f16_2|round_uf_f16_2|sticky_uf_f16_2_reg2) |
RMM_reg2 & guard_uf_f16_2
val round_add1_uf_f16_3 = RNE_reg2 & (guard_uf_f16_3 & (guard_f16_3 | round_uf_f16_3 | sticky_uf_f16_3_reg2)) |
RDN_reg2 & sign_result_temp_f16_3_reg2 & (guard_uf_f16_3|round_uf_f16_3|sticky_uf_f16_3_reg2) |
RUP_reg2 & !sign_result_temp_f16_3_reg2 & (guard_uf_f16_3|round_uf_f16_3|sticky_uf_f16_3_reg2) |
RMM_reg2 & guard_uf_f16_3
val exponent_add_1_f64 = fraction_result_no_round_f64_reg2.andR & round_add1_f64.asBool
val exponent_add_1_f32_0 = fraction_result_no_round_f32_0_reg2.andR & round_add1_f32_0.asBool
val exponent_add_1_f32_1 = fraction_result_no_round_f32_1_reg2.andR & round_add1_f32_1.asBool
val exponent_add_1_f16_0 = fraction_result_no_round_f16_0_reg2.andR & round_add1_f16_0.asBool
val exponent_add_1_f16_1 = fraction_result_no_round_f16_1_reg2.andR & round_add1_f16_1.asBool
val exponent_add_1_f16_2 = fraction_result_no_round_f16_2_reg2.andR & round_add1_f16_2.asBool
val exponent_add_1_f16_3 = fraction_result_no_round_f16_3_reg2.andR & round_add1_f16_3.asBool
// save 47bit regs
val is_fix_f64_reg_d = is_fix_f64
val is_fix_f32_reg_d = Cat(is_fix_f32_1, is_fix_f32_0)
val is_fix_f16_reg_d = Cat(is_fix_f16_3, is_fix_f16_2, is_fix_f16_1, is_fix_f16_0)
val is_fix_reg_d = Mux(is_fp64_reg1, is_fix_f64_reg_d, Mux(is_fp32_reg1, is_fix_f32_reg_d, is_fix_f16_reg_d))
val is_fix_reg2 = RegEnable(is_fix_reg_d, fire_reg1)
val lshift_value_f64_reg_d = lshift_value_f64_reg1
val lshift_value_f32_reg_d = Cat(lshift_value_f32_1_reg1, lshift_value_f32_0_reg1)
val lshift_value_f16_reg_d = Cat(lshift_value_f16_3_reg1, lshift_value_f16_2_reg1, lshift_value_f16_1_reg1, lshift_value_f16_0_reg1)
val lshift_value_reg_d = Mux(is_fp64_reg1, lshift_value_f64_reg_d, Mux(is_fp32_reg1, lshift_value_f32_reg_d, lshift_value_f16_reg_d))
val lshift_value_reg2 = RegEnable(lshift_value_reg_d, fire_reg1)
val exponent_result_add_value_f64 = Mux(exponent_add_1_f64 | is_fix_reg2(0),
E_greater_f64_reg2 - lshift_value_reg2(7,0) + 1.U,
E_greater_f64_reg2 - lshift_value_reg2(7,0)
)
val exponent_result_add_value_f32_0 = Mux(exponent_add_1_f32_0 | is_fix_reg2(0),
E_greater_f32_0_reg2 - lshift_value_reg2(6,0) + 1.U,
E_greater_f32_0_reg2 - lshift_value_reg2(6,0)
)
val exponent_result_add_value_f32_1 = Mux(exponent_add_1_f32_1 | is_fix_reg2(1),
E_greater_f32_1_reg2 - lshift_value_reg2(13,7) + 1.U,
E_greater_f32_1_reg2 - lshift_value_reg2(13,7)
)
val exponent_result_add_value_f16_0 = Mux(exponent_add_1_f16_0 | is_fix_reg2(0),
E_greater_f16_0_reg2 - lshift_value_reg2(5,0) + 1.U,
E_greater_f16_0_reg2 - lshift_value_reg2(5,0)
)