From 3ee5a388022178397161ff50ffa97819c2d4af34 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 6 Nov 2020 20:45:39 +0000 Subject: [PATCH] Fix shuffle --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 51c81be06..2ea7f2c9b 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -358,7 +358,6 @@ namespace SixLabors.ImageSharp { ref byte vmaskBase = ref MemoryMarshal.GetReference(ShuffleMaskPad4Nx16); Vector128 vmask = Unsafe.As>(ref vmaskBase); - Vector128 vfill = Vector128.Create(0xff000000ff000000ul).AsByte(); ref byte vmaskoBase = ref MemoryMarshal.GetReference(ShuffleMaskSlice4Nx16); Vector128 vmasko = Unsafe.As>(ref vmaskoBase); Vector128 vmaske = Ssse3.AlignRight(vmasko, vmasko, 12); @@ -398,8 +397,12 @@ namespace SixLabors.ImageSharp v3 = Ssse3.Shuffle(v3, vmasko); v0 = Ssse3.AlignRight(v1, v0, 4); - v1 = Sse2.Or(Sse2.ShiftRightLogical128BitLane(v1, 4), Sse2.ShiftLeftLogical128BitLane(v2, 4)); - v2 = Ssse3.AlignRight(v3, v2, 12); + v3 = Ssse3.AlignRight(v3, v2, 12); + + v1 = Sse2.ShiftLeftLogical128BitLane(v1, 4); + v2 = Sse2.ShiftRightLogical128BitLane(v2, 4); + + v1 = Ssse3.AlignRight(v2, v1, 8); ref Vector128 vd = ref Unsafe.Add(ref destBase, i); @@ -493,8 +496,12 @@ namespace SixLabors.ImageSharp v3 = Ssse3.Shuffle(Ssse3.Shuffle(v3, vshuffle), vmasko); v0 = Ssse3.AlignRight(v1, v0, 4); - v1 = Sse2.Or(Sse2.ShiftRightLogical128BitLane(v1, 4), Sse2.ShiftLeftLogical128BitLane(v2, 4)); - v2 = Ssse3.AlignRight(v3, v2, 12); + v3 = Ssse3.AlignRight(v3, v2, 12); + + v1 = Sse2.ShiftLeftLogical128BitLane(v1, 4); + v2 = Sse2.ShiftRightLogical128BitLane(v2, 4); + + v1 = Ssse3.AlignRight(v2, v1, 8); ref Vector128 vd = ref Unsafe.Add(ref destBase, j);