diff --git a/src/ImageSharp/Formats/Png/Filters/AverageFilter.cs b/src/ImageSharp/Formats/Png/Filters/AverageFilter.cs index b596643622..818119f331 100644 --- a/src/ImageSharp/Formats/Png/Filters/AverageFilter.cs +++ b/src/ImageSharp/Formats/Png/Filters/AverageFilter.cs @@ -87,6 +87,7 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters #if SUPPORTS_RUNTIME_INTRINSICS if (Avx2.IsSupported) { + Vector256 zero = Vector256.Zero; Vector256 sumAccumulator = Vector256.Zero; Vector256 allBitsSet = Avx2.CompareEqual(sumAccumulator, sumAccumulator).AsByte(); @@ -102,19 +103,7 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters Unsafe.As>(ref Unsafe.Add(ref resultBaseRef, x + 1)) = res; // +1 to skip filter type x += Vector256.Count; - Vector256 absRes = Avx2.Abs(res.AsSByte()).AsSByte(); - Vector256 loRes16 = Avx2.UnpackLow(absRes, Vector256.Zero).AsInt16(); - Vector256 hiRes16 = Avx2.UnpackHigh(absRes, Vector256.Zero).AsInt16(); - - Vector256 loRes32 = Avx2.UnpackLow(loRes16, Vector256.Zero).AsInt32(); - Vector256 hiRes32 = Avx2.UnpackHigh(loRes16, Vector256.Zero).AsInt32(); - sumAccumulator = Avx2.Add(sumAccumulator, loRes32); - sumAccumulator = Avx2.Add(sumAccumulator, hiRes32); - - loRes32 = Avx2.UnpackLow(hiRes16, Vector256.Zero).AsInt32(); - hiRes32 = Avx2.UnpackHigh(hiRes16, Vector256.Zero).AsInt32(); - sumAccumulator = Avx2.Add(sumAccumulator, loRes32); - sumAccumulator = Avx2.Add(sumAccumulator, hiRes32); + sumAccumulator = Avx2.Add(sumAccumulator, Avx2.SumAbsoluteDifferences(Avx2.Abs(res.AsSByte()), zero).AsInt32()); } for (int i = 0; i < Vector256.Count; i++) @@ -124,6 +113,8 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters } else if (Sse2.IsSupported) { + Vector128 zero8 = Vector128.Zero; + Vector128 zero16 = Vector128.Zero; Vector128 sumAccumulator = Vector128.Zero; Vector128 allBitsSet = Sse2.CompareEqual(sumAccumulator, sumAccumulator).AsByte(); @@ -146,21 +137,21 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters } else { - Vector128 mask = Sse2.CompareGreaterThan(res.AsSByte(), Vector128.Zero); + Vector128 mask = Sse2.CompareGreaterThan(res.AsSByte(), zero8); mask = Sse2.Xor(mask, allBitsSet.AsSByte()); absRes = Sse2.Xor(Sse2.Add(res.AsSByte(), mask), mask); } - Vector128 loRes16 = Sse2.UnpackLow(absRes, Vector128.Zero).AsInt16(); - Vector128 hiRes16 = Sse2.UnpackHigh(absRes, Vector128.Zero).AsInt16(); + Vector128 loRes16 = Sse2.UnpackLow(absRes, zero8).AsInt16(); + Vector128 hiRes16 = Sse2.UnpackHigh(absRes, zero8).AsInt16(); - Vector128 loRes32 = Sse2.UnpackLow(loRes16, Vector128.Zero).AsInt32(); - Vector128 hiRes32 = Sse2.UnpackHigh(loRes16, Vector128.Zero).AsInt32(); + Vector128 loRes32 = Sse2.UnpackLow(loRes16, zero16).AsInt32(); + Vector128 hiRes32 = Sse2.UnpackHigh(loRes16, zero16).AsInt32(); sumAccumulator = Sse2.Add(sumAccumulator, loRes32); sumAccumulator = Sse2.Add(sumAccumulator, hiRes32); - loRes32 = Sse2.UnpackLow(hiRes16, Vector128.Zero).AsInt32(); - hiRes32 = Sse2.UnpackHigh(hiRes16, Vector128.Zero).AsInt32(); + loRes32 = Sse2.UnpackLow(hiRes16, zero16).AsInt32(); + hiRes32 = Sse2.UnpackHigh(hiRes16, zero16).AsInt32(); sumAccumulator = Sse2.Add(sumAccumulator, loRes32); sumAccumulator = Sse2.Add(sumAccumulator, hiRes32); } diff --git a/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs b/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs index 7fa8a6b745..f48010dba6 100644 --- a/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs +++ b/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs @@ -91,6 +91,7 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters #if SUPPORTS_RUNTIME_INTRINSICS if (Avx2.IsSupported) { + Vector256 zero = Vector256.Zero; Vector256 sumAccumulator = Vector256.Zero; for (int xLeft = x - bytesPerPixel; x + Vector256.Count <= scanline.Length; xLeft += Vector256.Count) @@ -104,7 +105,7 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters Unsafe.As>(ref Unsafe.Add(ref resultBaseRef, x + 1)) = res; // +1 to skip filter type x += Vector256.Count; - sumAccumulator = Avx2.Add(sumAccumulator, Avx2.SumAbsoluteDifferences(Avx2.Abs(res.AsSByte()), Vector256.Zero).AsInt32()); + sumAccumulator = Avx2.Add(sumAccumulator, Avx2.SumAbsoluteDifferences(Avx2.Abs(res.AsSByte()), zero).AsInt32()); } for (int i = 0; i < Vector256.Count; i++)