Skip to content

Commit 093fbc4

Browse files
Use interleaving to prevent stack spills
1 parent 3b2ade5 commit 093fbc4

1 file changed

Lines changed: 15 additions & 77 deletions

File tree

src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs

Lines changed: 15 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -628,57 +628,6 @@ public void TransposeInto(ref Block8x8F d)
628628
[MethodImpl(InliningOptions.ShortMethod)]
629629
public void TransposeIntoAvx(ref Block8x8F d)
630630
{
631-
#if avxvariant1
632-
Vector256<float> r0 = Unsafe.As<Vector4, Vector256<float>>(ref this.V0L);
633-
Vector256<float> r1 = Unsafe.As<Vector4, Vector256<float>>(ref this.V1L);
634-
Vector256<float> r2 = Unsafe.As<Vector4, Vector256<float>>(ref this.V2L);
635-
Vector256<float> r3 = Unsafe.As<Vector4, Vector256<float>>(ref this.V3L);
636-
Vector256<float> r4 = Unsafe.As<Vector4, Vector256<float>>(ref this.V4L);
637-
Vector256<float> r5 = Unsafe.As<Vector4, Vector256<float>>(ref this.V5L);
638-
Vector256<float> r6 = Unsafe.As<Vector4, Vector256<float>>(ref this.V6L);
639-
Vector256<float> r7 = Unsafe.As<Vector4, Vector256<float>>(ref this.V7L);
640-
641-
Vector256<float> t0 = Avx.UnpackLow(r0, r1);
642-
Vector256<float> t1 = Avx.UnpackHigh(r0, r1);
643-
Vector256<float> t2 = Avx.UnpackLow(r2, r3);
644-
Vector256<float> t3 = Avx.UnpackHigh(r2, r3);
645-
Vector256<float> t4 = Avx.UnpackLow(r4, r5);
646-
Vector256<float> t5 = Avx.UnpackHigh(r4, r5);
647-
Vector256<float> t6 = Avx.UnpackLow(r6, r7);
648-
Vector256<float> t7 = Avx.UnpackHigh(r6, r7);
649-
650-
// Controls generated via _MM_SHUFFLE
651-
const byte Control1_0_1_0 = 0b1000100;
652-
const byte Control3_2_3_2 = 0b11101110;
653-
r0 = Avx.Shuffle(t0, t2, Control1_0_1_0);
654-
r1 = Avx.Shuffle(t0, t2, Control3_2_3_2);
655-
r2 = Avx.Shuffle(t1, t3, Control1_0_1_0);
656-
r3 = Avx.Shuffle(t1, t3, Control3_2_3_2);
657-
r4 = Avx.Shuffle(t4, t6, Control1_0_1_0);
658-
r5 = Avx.Shuffle(t4, t6, Control3_2_3_2);
659-
r6 = Avx.Shuffle(t5, t7, Control1_0_1_0);
660-
r7 = Avx.Shuffle(t5, t7, Control3_2_3_2);
661-
662-
const byte Control0x20 = 0b100000;
663-
const byte Control0x31 = 0b110001;
664-
t0 = Avx.Permute2x128(r0, r4, Control0x20);
665-
t1 = Avx.Permute2x128(r1, r5, Control0x20);
666-
t2 = Avx.Permute2x128(r2, r6, Control0x20);
667-
t3 = Avx.Permute2x128(r3, r7, Control0x20);
668-
t4 = Avx.Permute2x128(r0, r4, Control0x31);
669-
t5 = Avx.Permute2x128(r1, r5, Control0x31);
670-
t6 = Avx.Permute2x128(r2, r6, Control0x31);
671-
t7 = Avx.Permute2x128(r3, r7, Control0x31);
672-
673-
Unsafe.As<Vector4, Vector256<float>>(ref d.V0L) = t0;
674-
Unsafe.As<Vector4, Vector256<float>>(ref d.V1L) = t1;
675-
Unsafe.As<Vector4, Vector256<float>>(ref d.V2L) = t2;
676-
Unsafe.As<Vector4, Vector256<float>>(ref d.V3L) = t3;
677-
Unsafe.As<Vector4, Vector256<float>>(ref d.V4L) = t4;
678-
Unsafe.As<Vector4, Vector256<float>>(ref d.V5L) = t5;
679-
Unsafe.As<Vector4, Vector256<float>>(ref d.V6L) = t6;
680-
Unsafe.As<Vector4, Vector256<float>>(ref d.V7L) = t7;
681-
#else
682631
Vector256<float> r0 = Avx.InsertVector128(
683632
Unsafe.As<Vector4, Vector128<float>>(ref this.V0L).ToVector256(),
684633
Unsafe.As<Vector4, Vector128<float>>(ref this.V4L),
@@ -720,39 +669,28 @@ public void TransposeIntoAvx(ref Block8x8F d)
720669
1);
721670

722671
Vector256<float> t0 = Avx.UnpackLow(r0, r1);
723-
Vector256<float> t1 = Avx.UnpackHigh(r0, r1);
724672
Vector256<float> t2 = Avx.UnpackLow(r2, r3);
725-
Vector256<float> t3 = Avx.UnpackHigh(r2, r3);
673+
Vector256<float> v = Avx.Shuffle(t0, t2, 0x4E);
674+
Unsafe.As<Vector4, Vector256<float>>(ref d.V0L) = Avx.Blend(t0, v, 0xCC);
675+
Unsafe.As<Vector4, Vector256<float>>(ref d.V1L) = Avx.Blend(t2, v, 0x33);
676+
726677
Vector256<float> t4 = Avx.UnpackLow(r4, r5);
727-
Vector256<float> t5 = Avx.UnpackHigh(r4, r5);
728678
Vector256<float> t6 = Avx.UnpackLow(r6, r7);
729-
Vector256<float> t7 = Avx.UnpackHigh(r6, r7);
730-
731-
Vector256<float> v = Avx.Shuffle(t0, t2, 0x4E);
732-
r0 = Avx.Blend(t0, v, 0xCC);
733-
r1 = Avx.Blend(t2, v, 0x33);
679+
v = Avx.Shuffle(t4, t6, 0x4E);
680+
Unsafe.As<Vector4, Vector256<float>>(ref d.V4L) = Avx.Blend(t4, v, 0xCC);
681+
Unsafe.As<Vector4, Vector256<float>>(ref d.V5L) = Avx.Blend(t6, v, 0x33);
734682

683+
Vector256<float> t1 = Avx.UnpackHigh(r0, r1);
684+
Vector256<float> t3 = Avx.UnpackHigh(r2, r3);
735685
v = Avx.Shuffle(t1, t3, 0x4E);
736-
r2 = Avx.Blend(t1, v, 0xCC);
737-
r3 = Avx.Blend(t3, v, 0x33);
738-
739-
v = Avx.Shuffle(t4, t6, 0x4E);
740-
r4 = Avx.Blend(t4, v, 0xCC);
741-
r5 = Avx.Blend(t6, v, 0x33);
686+
Unsafe.As<Vector4, Vector256<float>>(ref d.V2L) = Avx.Blend(t1, v, 0xCC);
687+
Unsafe.As<Vector4, Vector256<float>>(ref d.V3L) = Avx.Blend(t3, v, 0x33);
742688

689+
Vector256<float> t5 = Avx.UnpackHigh(r4, r5);
690+
Vector256<float> t7 = Avx.UnpackHigh(r6, r7);
743691
v = Avx.Shuffle(t5, t7, 0x4E);
744-
r6 = Avx.Blend(t5, v, 0xCC);
745-
r7 = Avx.Blend(t7, v, 0x33);
746-
747-
Unsafe.As<Vector4, Vector256<float>>(ref d.V0L) = r0;
748-
Unsafe.As<Vector4, Vector256<float>>(ref d.V1L) = r1;
749-
Unsafe.As<Vector4, Vector256<float>>(ref d.V2L) = r2;
750-
Unsafe.As<Vector4, Vector256<float>>(ref d.V3L) = r3;
751-
Unsafe.As<Vector4, Vector256<float>>(ref d.V4L) = r4;
752-
Unsafe.As<Vector4, Vector256<float>>(ref d.V5L) = r5;
753-
Unsafe.As<Vector4, Vector256<float>>(ref d.V6L) = r6;
754-
Unsafe.As<Vector4, Vector256<float>>(ref d.V7L) = r7;
755-
#endif
692+
Unsafe.As<Vector4, Vector256<float>>(ref d.V6L) = Avx.Blend(t5, v, 0xCC);
693+
Unsafe.As<Vector4, Vector256<float>>(ref d.V7L) = Avx.Blend(t7, v, 0x33);
756694
}
757695
#endif
758696
}

0 commit comments

Comments
 (0)