From 5c7f4a9ab37798a512f917f7df8d155dc180254c Mon Sep 17 00:00:00 2001 From: TechPizza Date: Tue, 18 May 2021 10:52:59 +0200 Subject: [PATCH] Added more specialized Png filter code Modified tests accordingly --- src/ImageSharp/Common/Helpers/Numerics.cs | 46 +++++++++++++++++ .../Formats/Png/Filters/AverageFilter.cs | 10 +--- .../Formats/Png/Filters/PaethFilter.cs | 5 +- .../Formats/Png/Filters/SubFilter.cs | 26 +++++++++- .../Formats/Png/Filters/UpFilter.cs | 26 +++++++++- .../Formats/Png/PngFilterTests.cs | 49 +++++++++++++++++-- 6 files changed, 145 insertions(+), 17 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Numerics.cs b/src/ImageSharp/Common/Helpers/Numerics.cs index 0147689117..f9969b27a5 100644 --- a/src/ImageSharp/Common/Helpers/Numerics.cs +++ b/src/ImageSharp/Common/Helpers/Numerics.cs @@ -749,6 +749,7 @@ namespace SixLabors.ImageSharp public static float Lerp(float value1, float value2, float amount) => ((value2 - value1) * amount) + value1; +#if SUPPORTS_RUNTIME_INTRINSICS [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void Accumulate(ref Vector accumulator, Vector values) { @@ -762,5 +763,50 @@ namespace SixLabors.ImageSharp accumulator += intLow; accumulator += intHigh; } + + /// + /// Reduces elements of the vector into one sum. + /// + /// The accumulator to reduce. + /// The sum of all elements. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int ReduceSum(Vector128 accumulator) + { + if (Ssse3.IsSupported) + { + Vector128 hadd = Ssse3.HorizontalAdd(accumulator, accumulator); + Vector128 swapped = Sse2.Shuffle(hadd, 0x1); + Vector128 tmp = Sse2.Add(hadd, swapped); + + // Vector128.ToScalar() isn't optimized pre-net5.0 https://github.com/dotnet/runtime/pull/37882 + return Sse2.ConvertToInt32(tmp); + } + else + { + int sum = 0; + for (int i = 0; i < Vector128.Count; i++) + { + sum += accumulator.GetElement(i); + } + + return sum; + } + } + + /// + /// Reduces even elements of the vector into one sum. + /// + /// The accumulator to reduce. + /// The sum of even elements. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int EvenReduceSum(Vector256 accumulator) + { + Vector128 vsum = Sse2.Add(accumulator.GetLower(), accumulator.GetUpper()); // add upper lane to lower lane + vsum = Sse2.Add(vsum, Sse2.Shuffle(vsum, 0b_11_10_11_10)); // add high to low + + // Vector128.ToScalar() isn't optimized pre-net5.0 https://github.com/dotnet/runtime/pull/37882 + return Sse2.ConvertToInt32(vsum); + } +#endif } } diff --git a/src/ImageSharp/Formats/Png/Filters/AverageFilter.cs b/src/ImageSharp/Formats/Png/Filters/AverageFilter.cs index 818119f331..0ab1413974 100644 --- a/src/ImageSharp/Formats/Png/Filters/AverageFilter.cs +++ b/src/ImageSharp/Formats/Png/Filters/AverageFilter.cs @@ -106,10 +106,7 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters sumAccumulator = Avx2.Add(sumAccumulator, Avx2.SumAbsoluteDifferences(Avx2.Abs(res.AsSByte()), zero).AsInt32()); } - for (int i = 0; i < Vector256.Count; i++) - { - sum += sumAccumulator.GetElement(i); - } + sum += Numerics.EvenReduceSum(sumAccumulator); } else if (Sse2.IsSupported) { @@ -156,10 +153,7 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters sumAccumulator = Sse2.Add(sumAccumulator, hiRes32); } - for (int i = 0; i < Vector128.Count; i++) - { - sum += sumAccumulator.GetElement(i); - } + sum += Numerics.ReduceSum(sumAccumulator); } #endif diff --git a/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs b/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs index f48010dba6..e8e0aa7043 100644 --- a/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs +++ b/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs @@ -108,10 +108,7 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters sumAccumulator = Avx2.Add(sumAccumulator, Avx2.SumAbsoluteDifferences(Avx2.Abs(res.AsSByte()), zero).AsInt32()); } - for (int i = 0; i < Vector256.Count; i++) - { - sum += sumAccumulator.GetElement(i); - } + sum += Numerics.EvenReduceSum(sumAccumulator); } else if (Vector.IsHardwareAccelerated) { diff --git a/src/ImageSharp/Formats/Png/Filters/SubFilter.cs b/src/ImageSharp/Formats/Png/Filters/SubFilter.cs index 31d65995a0..116154836e 100644 --- a/src/ImageSharp/Formats/Png/Filters/SubFilter.cs +++ b/src/ImageSharp/Formats/Png/Filters/SubFilter.cs @@ -6,6 +6,11 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif + namespace SixLabors.ImageSharp.Formats.Png.Filters { /// @@ -66,7 +71,26 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters } #if SUPPORTS_RUNTIME_INTRINSICS - if (Vector.IsHardwareAccelerated) + if (Avx2.IsSupported) + { + Vector256 zero = Vector256.Zero; + Vector256 sumAccumulator = Vector256.Zero; + + for (int xLeft = x - bytesPerPixel; x + Vector256.Count <= scanline.Length; xLeft += Vector256.Count) + { + Vector256 scan = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, x)); + Vector256 prev = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, xLeft)); + + Vector256 res = Avx2.Subtract(scan, prev); + Unsafe.As>(ref Unsafe.Add(ref resultBaseRef, x + 1)) = res; // +1 to skip filter type + x += Vector256.Count; + + sumAccumulator = Avx2.Add(sumAccumulator, Avx2.SumAbsoluteDifferences(Avx2.Abs(res.AsSByte()), zero).AsInt32()); + } + + sum += Numerics.EvenReduceSum(sumAccumulator); + } + else if (Vector.IsHardwareAccelerated) { Vector sumAccumulator = Vector.Zero; diff --git a/src/ImageSharp/Formats/Png/Filters/UpFilter.cs b/src/ImageSharp/Formats/Png/Filters/UpFilter.cs index f119c2fbae..e0f35293a4 100644 --- a/src/ImageSharp/Formats/Png/Filters/UpFilter.cs +++ b/src/ImageSharp/Formats/Png/Filters/UpFilter.cs @@ -6,6 +6,11 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif + namespace SixLabors.ImageSharp.Formats.Png.Filters { /// @@ -61,7 +66,26 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters int x = 0; #if SUPPORTS_RUNTIME_INTRINSICS - if (Vector.IsHardwareAccelerated) + if (Avx2.IsSupported) + { + Vector256 zero = Vector256.Zero; + Vector256 sumAccumulator = Vector256.Zero; + + for (; x + Vector256.Count <= scanline.Length;) + { + Vector256 scan = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, x)); + Vector256 above = Unsafe.As>(ref Unsafe.Add(ref prevBaseRef, x)); + + Vector256 res = Avx2.Subtract(scan, above); + Unsafe.As>(ref Unsafe.Add(ref resultBaseRef, x + 1)) = res; // +1 to skip filter type + x += Vector256.Count; + + sumAccumulator = Avx2.Add(sumAccumulator, Avx2.SumAbsoluteDifferences(Avx2.Abs(res.AsSByte()), zero).AsInt32()); + } + + sum += Numerics.EvenReduceSum(sumAccumulator); + } + else if (Vector.IsHardwareAccelerated) { Vector sumAccumulator = Vector.Zero; diff --git a/tests/ImageSharp.Tests/Formats/Png/PngFilterTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngFilterTests.cs index dae8f25e58..5f7b4f8327 100644 --- a/tests/ImageSharp.Tests/Formats/Png/PngFilterTests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/PngFilterTests.cs @@ -101,7 +101,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png } [Fact] - public void PaethSimd() + public void PaethAvx2() { static void RunTest() { @@ -114,6 +114,20 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png HwIntrinsics.AllowAll); } + [Fact] + public void PaethVector() + { + static void RunTest() + { + var data = new TestData(PngFilterMethod.Paeth, Size); + data.TestFilter(); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2); + } + [Fact] public void Up() { @@ -128,8 +142,9 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png HwIntrinsics.DisableSIMD); } + [Fact] - public void UpSimd() + public void UpAvx2() { static void RunTest() { @@ -142,6 +157,20 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png HwIntrinsics.AllowAll); } + [Fact] + public void UpVector() + { + static void RunTest() + { + var data = new TestData(PngFilterMethod.Up, Size); + data.TestFilter(); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2); + } + [Fact] public void Sub() { @@ -157,7 +186,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png } [Fact] - public void SubSimd() + public void SubAvx2() { static void RunTest() { @@ -170,6 +199,20 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png HwIntrinsics.AllowAll); } + [Fact] + public void SubVector() + { + static void RunTest() + { + var data = new TestData(PngFilterMethod.Sub, Size); + data.TestFilter(); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2); + } + public class TestData { private readonly PngFilterMethod filter;