diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index e86ef0f2ad..1405b390b6 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -141,11 +141,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless nint i; for (i = 0; i <= numPixels - 4; i += 4) { + const byte mmShuffle_2200 = 0b_10_10_00_00; + ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i); Vector128 input = Unsafe.As>(ref pos).AsByte(); Vector128 a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g - Vector128 b = Sse2.ShuffleLow(a, 0xA0); // MmShuffle(2, 2, 0, 0) - Vector128 c = Sse2.ShuffleHigh(b, 0xA0); // MmShuffle(2, 2, 0, 0) 0g0g + Vector128 b = Sse2.ShuffleLow(a, mmShuffle_2200); + Vector128 c = Sse2.ShuffleHigh(b, mmShuffle_2200); // 0g0g Vector128 output = Sse2.Add(input.AsByte(), c.AsByte()); Unsafe.As>(ref pos) = output.AsUInt32(); } @@ -223,11 +225,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless nint i; for (i = 0; i <= numPixels - 4; i += 4) { + const byte mmShuffle_2200 = 0b_10_10_00_00; + ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i); Vector128 input = Unsafe.As>(ref pos).AsByte(); Vector128 a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g - Vector128 b = Sse2.ShuffleLow(a, 0xA0); // MmShuffle(2, 2, 0, 0) - Vector128 c = Sse2.ShuffleHigh(b, 0xA0); // MmShuffle(2, 2, 0, 0) 0g0g + Vector128 b = Sse2.ShuffleLow(a, mmShuffle_2200); + Vector128 c = Sse2.ShuffleHigh(b, mmShuffle_2200); // 0g0g Vector128 output = Sse2.Subtract(input.AsByte(), c.AsByte()); Unsafe.As>(ref pos) = output.AsUInt32(); } @@ -382,11 +386,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless nint idx; for (idx = 0; idx <= numPixels - 8; idx += 8) { + const byte mmShuffle_2200 = 0b_10_10_00_00; + ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx); Vector256 input = Unsafe.As>(ref pos); Vector256 a = Avx2.And(input.AsByte(), transformColorAlphaGreenMask256); - Vector256 b = Avx2.ShuffleLow(a.AsInt16(), 0xA0); // MmShuffle(2, 2, 0, 0) - Vector256 c = Avx2.ShuffleHigh(b.AsInt16(), 0xA0); // MmShuffle(2, 2, 0, 0) + Vector256 b = Avx2.ShuffleLow(a.AsInt16(), mmShuffle_2200); + Vector256 c = Avx2.ShuffleHigh(b.AsInt16(), mmShuffle_2200); Vector256 d = Avx2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); Vector256 e = Avx2.ShiftLeftLogical(input.AsInt16(), 8); Vector256 f = Avx2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16()); @@ -411,11 +417,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless nint idx; for (idx = 0; idx <= numPixels - 4; idx += 4) { + const byte mmShuffle_2200 = 0b_10_10_00_00; + ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx); Vector128 input = Unsafe.As>(ref pos); Vector128 a = Sse2.And(input.AsByte(), transformColorAlphaGreenMask); - Vector128 b = Sse2.ShuffleLow(a.AsInt16(), 0xA0); // MmShuffle(2, 2, 0, 0) - Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), 0xA0); // MmShuffle(2, 2, 0, 0) + Vector128 b = Sse2.ShuffleLow(a.AsInt16(), mmShuffle_2200); + Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), mmShuffle_2200); Vector128 d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); Vector128 e = Sse2.ShiftLeftLogical(input.AsInt16(), 8); Vector128 f = Sse2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16()); @@ -472,11 +480,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless nint idx; for (idx = 0; idx <= pixelData.Length - 8; idx += 8) { + const byte mmShuffle_2200 = 0b_10_10_00_00; + ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx); Vector256 input = Unsafe.As>(ref pos); Vector256 a = Avx2.And(input.AsByte(), transformColorInverseAlphaGreenMask256); - Vector256 b = Avx2.ShuffleLow(a.AsInt16(), 0xA0); // MmShuffle(2, 2, 0, 0) - Vector256 c = Avx2.ShuffleHigh(b.AsInt16(), 0xA0); // MmShuffle(2, 2, 0, 0) + Vector256 b = Avx2.ShuffleLow(a.AsInt16(), mmShuffle_2200); + Vector256 c = Avx2.ShuffleHigh(b.AsInt16(), mmShuffle_2200); Vector256 d = Avx2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); Vector256 e = Avx2.Add(input.AsByte(), d.AsByte()); Vector256 f = Avx2.ShiftLeftLogical(e.AsInt16(), 8); @@ -502,11 +512,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless nint idx; for (idx = 0; idx <= pixelData.Length - 4; idx += 4) { + const byte mmShuffle_2200 = 0b_10_10_00_00; + ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx); Vector128 input = Unsafe.As>(ref pos); Vector128 a = Sse2.And(input.AsByte(), transformColorInverseAlphaGreenMask); - Vector128 b = Sse2.ShuffleLow(a.AsInt16(), 0xA0); // MmShuffle(2, 2, 0, 0) - Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), 0xA0); // MmShuffle(2, 2, 0, 0) + Vector128 b = Sse2.ShuffleLow(a.AsInt16(), mmShuffle_2200); + Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), mmShuffle_2200); Vector128 d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); Vector128 e = Sse2.Add(input.AsByte(), d.AsByte()); Vector128 f = Sse2.ShiftLeftLogical(e.AsInt16(), 8); diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs index 4e6e8fd8a2..b10b3c600c 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs @@ -535,8 +535,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy { // *in01 = 00 01 10 11 02 03 12 13 // *in23 = 20 21 30 31 22 23 32 33 - Vector128 shuf01_p = Sse2.ShuffleHigh(row01, 0xB1); // MmShuffle(2, 3, 0, 1) - Vector128 shuf32_p = Sse2.ShuffleHigh(row23, 0xB1); // MmShuffle(2, 3, 0, 1) + const byte mmShuffle_2301 = 0b_10_11_00_01; + Vector128 shuf01_p = Sse2.ShuffleHigh(row01, mmShuffle_2301); + Vector128 shuf32_p = Sse2.ShuffleHigh(row23, mmShuffle_2301); // 00 01 10 11 03 02 13 12 // 20 21 30 31 23 22 33 32 @@ -568,7 +569,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy Vector128 shi = Sse2.UnpackHigh(s03, s12); // 2 3 2 3 2 3 Vector128 v23 = Sse2.UnpackHigh(slo.AsInt32(), shi.AsInt32()); out01 = Sse2.UnpackLow(slo.AsInt32(), shi.AsInt32()); - out32 = Sse2.Shuffle(v23, 0x4E); // MmShuffle(1, 0, 3, 2) + + const byte mmShuffle_1032 = 0b_01_00_11_10; + out32 = Sse2.Shuffle(v23, mmShuffle_1032); } public static void FTransformPass2SSE2(Vector128 v01, Vector128 v32, Span output)