From 64e082615a4ad3e1ca2a8b591b793e52e6e6b8f8 Mon Sep 17 00:00:00 2001 From: TechPizza Date: Tue, 18 May 2021 09:50:26 +0200 Subject: [PATCH] Optimized AverageFilter --- .../Formats/Png/Filters/AverageFilter.cs | 45 ++++--------------- 1 file changed, 9 insertions(+), 36 deletions(-) diff --git a/src/ImageSharp/Formats/Png/Filters/AverageFilter.cs b/src/ImageSharp/Formats/Png/Filters/AverageFilter.cs index 57416a737b..b596643622 100644 --- a/src/ImageSharp/Formats/Png/Filters/AverageFilter.cs +++ b/src/ImageSharp/Formats/Png/Filters/AverageFilter.cs @@ -88,6 +88,7 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters if (Avx2.IsSupported) { Vector256 sumAccumulator = Vector256.Zero; + Vector256 allBitsSet = Avx2.CompareEqual(sumAccumulator, sumAccumulator).AsByte(); for (int xLeft = x - bytesPerPixel; x + Vector256.Count <= scanline.Length; xLeft += Vector256.Count) { @@ -95,7 +96,9 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters Vector256 left = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, xLeft)); Vector256 above = Unsafe.As>(ref Unsafe.Add(ref prevBaseRef, x)); - Vector256 res = Avx2.Subtract(scan, Average(left, above)); + Vector256 avg = Avx2.Xor(Avx2.Average(Avx2.Xor(left, allBitsSet), Avx2.Xor(above, allBitsSet)), allBitsSet); + Vector256 res = Avx2.Subtract(scan, avg); + Unsafe.As>(ref Unsafe.Add(ref resultBaseRef, x + 1)) = res; // +1 to skip filter type x += Vector256.Count; @@ -121,8 +124,8 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters } else if (Sse2.IsSupported) { - var allBitsSet = Vector128.Create((sbyte)-1); Vector128 sumAccumulator = Vector128.Zero; + Vector128 allBitsSet = Sse2.CompareEqual(sumAccumulator, sumAccumulator).AsByte(); for (int xLeft = x - bytesPerPixel; x + Vector128.Count <= scanline.Length; xLeft += Vector128.Count) { @@ -130,7 +133,9 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters Vector128 left = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, xLeft)); Vector128 above = Unsafe.As>(ref Unsafe.Add(ref prevBaseRef, x)); - Vector128 res = Sse2.Subtract(scan, Average(left, above)); + Vector128 avg = Sse2.Xor(Sse2.Average(Sse2.Xor(left, allBitsSet), Sse2.Xor(above, allBitsSet)), allBitsSet); + Vector128 res = Sse2.Subtract(scan, avg); + Unsafe.As>(ref Unsafe.Add(ref resultBaseRef, x + 1)) = res; // +1 to skip filter type x += Vector128.Count; @@ -142,7 +147,7 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters else { Vector128 mask = Sse2.CompareGreaterThan(res.AsSByte(), Vector128.Zero); - mask = Sse2.Xor(mask, allBitsSet); + mask = Sse2.Xor(mask, allBitsSet.AsSByte()); absRes = Sse2.Xor(Sse2.Add(res.AsSByte(), mask), mask); } @@ -189,37 +194,5 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters /// The [MethodImpl(MethodImplOptions.AggressiveInlining)] private static int Average(byte left, byte above) => (left + above) >> 1; - -#if SUPPORTS_RUNTIME_INTRINSICS - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector128 Average(Vector128 left, Vector128 above) - { - Vector128 loLeft16 = Sse2.UnpackLow(left, Vector128.Zero).AsUInt16(); - Vector128 hiLeft16 = Sse2.UnpackHigh(left, Vector128.Zero).AsUInt16(); - - Vector128 loAbove16 = Sse2.UnpackLow(above, Vector128.Zero).AsUInt16(); - Vector128 hiAbove16 = Sse2.UnpackHigh(above, Vector128.Zero).AsUInt16(); - - Vector128 div1 = Sse2.ShiftRightLogical(Sse2.Add(loLeft16, loAbove16), 1); - Vector128 div2 = Sse2.ShiftRightLogical(Sse2.Add(hiLeft16, hiAbove16), 1); - - return Sse2.PackUnsignedSaturate(div1.AsInt16(), div2.AsInt16()); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector256 Average(Vector256 left, Vector256 above) - { - Vector256 loLeft16 = Avx2.UnpackLow(left, Vector256.Zero).AsUInt16(); - Vector256 hiLeft16 = Avx2.UnpackHigh(left, Vector256.Zero).AsUInt16(); - - Vector256 loAbove16 = Avx2.UnpackLow(above, Vector256.Zero).AsUInt16(); - Vector256 hiAbove16 = Avx2.UnpackHigh(above, Vector256.Zero).AsUInt16(); - - Vector256 div1 = Avx2.ShiftRightLogical(Avx2.Add(loLeft16, loAbove16), 1); - Vector256 div2 = Avx2.ShiftRightLogical(Avx2.Add(hiLeft16, hiAbove16), 1); - - return Avx2.PackUnsignedSaturate(div1.AsInt16(), div2.AsInt16()); - } -#endif } }