From 8d19c2881da8da3a7a88a569b6f7784bbc1c210c Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Mon, 8 Nov 2021 10:41:52 +0100 Subject: [PATCH 01/11] Add sse2 version of Vp8Sse4X4 --- .../Formats/Webp/Lossy/LossyUtils.cs | 59 ++++++++++++++++++- 1 file changed, 57 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index d5db3dffa..82e221470 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -4,11 +4,16 @@ using System; using System.Buffers.Binary; using System.Runtime.CompilerServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Numerics; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif // ReSharper disable InconsistentNaming namespace SixLabors.ImageSharp.Formats.Webp.Lossy { - internal static class LossyUtils + internal static unsafe class LossyUtils { [MethodImpl(InliningOptions.ShortMethod)] public static int Vp8Sse16X16(Span a, Span b) => GetSse(a, b, 16, 16); @@ -17,7 +22,57 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy public static int Vp8Sse16X8(Span a, Span b) => GetSse(a, b, 16, 8); [MethodImpl(InliningOptions.ShortMethod)] - public static int Vp8Sse4X4(Span a, Span b) => GetSse(a, b, 4, 4); + public static int Vp8Sse4X4(Span a, Span b) + { +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse2.IsSupported) + { +#pragma warning disable SA1503 // Braces should not be omitted + Span tmp = stackalloc int[4]; + fixed (byte* aPtr = a) + fixed (byte* bPtr = b) + fixed (int* tmpPtr = tmp) + { + // Load values. + Vector128 a0 = Sse2.LoadVector128(aPtr); + Vector128 a1 = Sse2.LoadVector128(aPtr + WebpConstants.Bps); + Vector128 a2 = Sse2.LoadVector128(aPtr + (WebpConstants.Bps * 2)); + Vector128 a3 = Sse2.LoadVector128(aPtr + (WebpConstants.Bps * 3)); + Vector128 b0 = Sse2.LoadVector128(bPtr); + Vector128 b1 = Sse2.LoadVector128(bPtr + WebpConstants.Bps); + Vector128 b2 = Sse2.LoadVector128(bPtr + (WebpConstants.Bps * 2)); + Vector128 b3 = Sse2.LoadVector128(bPtr + (WebpConstants.Bps * 3)); + + // Combine pair of lines. + Vector128 a01 = Sse2.UnpackLow(a0.AsInt32(), a1.AsInt32()); + Vector128 a23 = Sse2.UnpackLow(a2.AsInt32(), a3.AsInt32()); + Vector128 b01 = Sse2.UnpackLow(b0.AsInt32(), b1.AsInt32()); + Vector128 b23 = Sse2.UnpackLow(b2.AsInt32(), b3.AsInt32()); + + // Convert to 16b. + Vector128 a01s = Sse2.UnpackLow(a01.AsByte(), Vector128.Zero); + Vector128 a23s = Sse2.UnpackLow(a23.AsByte(), Vector128.Zero); + Vector128 b01s = Sse2.UnpackLow(b01.AsByte(), Vector128.Zero); + Vector128 b23s = Sse2.UnpackLow(b23.AsByte(), Vector128.Zero); + + // subtract, square and accumulate. + Vector128 d0 = Sse2.SubtractSaturate(a01s, b01s); + Vector128 d1 = Sse2.SubtractSaturate(a23s, b23s); + Vector128 e0 = Sse2.MultiplyAddAdjacent(d0.AsInt16(), d0.AsInt16()); + Vector128 e1 = Sse2.MultiplyAddAdjacent(d1.AsInt16(), d1.AsInt16()); + Vector128 sum = Sse2.Add(e0, e1); + + Sse2.Store(tmpPtr, sum); + return tmp[3] + tmp[2] + tmp[1] + tmp[0]; + } +#pragma warning restore SA1503 // Braces should not be omitted + } + else +#endif + { + return GetSse(a, b, 4, 4); + } + } [MethodImpl(InliningOptions.ShortMethod)] public static int GetSse(Span a, Span b, int w, int h) From 99a3510e279a38a8c7c733d1c29f63fb3772d49d Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 12:53:54 +0100 Subject: [PATCH 02/11] Avoid pinning --- .../Formats/Webp/Lossy/LossyUtils.cs | 72 +++++++++---------- 1 file changed, 34 insertions(+), 38 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index 82e221470..aa35f9673 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -4,6 +4,7 @@ using System; using System.Buffers.Binary; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; #if SUPPORTS_RUNTIME_INTRINSICS using System.Numerics; using System.Runtime.Intrinsics; @@ -27,45 +28,40 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy #if SUPPORTS_RUNTIME_INTRINSICS if (Sse2.IsSupported) { -#pragma warning disable SA1503 // Braces should not be omitted Span tmp = stackalloc int[4]; - fixed (byte* aPtr = a) - fixed (byte* bPtr = b) - fixed (int* tmpPtr = tmp) - { - // Load values. - Vector128 a0 = Sse2.LoadVector128(aPtr); - Vector128 a1 = Sse2.LoadVector128(aPtr + WebpConstants.Bps); - Vector128 a2 = Sse2.LoadVector128(aPtr + (WebpConstants.Bps * 2)); - Vector128 a3 = Sse2.LoadVector128(aPtr + (WebpConstants.Bps * 3)); - Vector128 b0 = Sse2.LoadVector128(bPtr); - Vector128 b1 = Sse2.LoadVector128(bPtr + WebpConstants.Bps); - Vector128 b2 = Sse2.LoadVector128(bPtr + (WebpConstants.Bps * 2)); - Vector128 b3 = Sse2.LoadVector128(bPtr + (WebpConstants.Bps * 3)); - - // Combine pair of lines. - Vector128 a01 = Sse2.UnpackLow(a0.AsInt32(), a1.AsInt32()); - Vector128 a23 = Sse2.UnpackLow(a2.AsInt32(), a3.AsInt32()); - Vector128 b01 = Sse2.UnpackLow(b0.AsInt32(), b1.AsInt32()); - Vector128 b23 = Sse2.UnpackLow(b2.AsInt32(), b3.AsInt32()); - - // Convert to 16b. - Vector128 a01s = Sse2.UnpackLow(a01.AsByte(), Vector128.Zero); - Vector128 a23s = Sse2.UnpackLow(a23.AsByte(), Vector128.Zero); - Vector128 b01s = Sse2.UnpackLow(b01.AsByte(), Vector128.Zero); - Vector128 b23s = Sse2.UnpackLow(b23.AsByte(), Vector128.Zero); - - // subtract, square and accumulate. - Vector128 d0 = Sse2.SubtractSaturate(a01s, b01s); - Vector128 d1 = Sse2.SubtractSaturate(a23s, b23s); - Vector128 e0 = Sse2.MultiplyAddAdjacent(d0.AsInt16(), d0.AsInt16()); - Vector128 e1 = Sse2.MultiplyAddAdjacent(d1.AsInt16(), d1.AsInt16()); - Vector128 sum = Sse2.Add(e0, e1); - - Sse2.Store(tmpPtr, sum); - return tmp[3] + tmp[2] + tmp[1] + tmp[0]; - } -#pragma warning restore SA1503 // Braces should not be omitted + + // Load values. + Vector128 a0 = Unsafe.As>(ref MemoryMarshal.GetReference(a)); + Vector128 a1 = Unsafe.As>(ref MemoryMarshal.GetReference(a.Slice(WebpConstants.Bps, 8))); + Vector128 a2 = Unsafe.As>(ref MemoryMarshal.GetReference(a.Slice(WebpConstants.Bps * 2, 8))); + Vector128 a3 = Unsafe.As>(ref MemoryMarshal.GetReference(a.Slice(WebpConstants.Bps * 3, 8))); + Vector128 b0 = Unsafe.As>(ref MemoryMarshal.GetReference(b)); + Vector128 b1 = Unsafe.As>(ref MemoryMarshal.GetReference(b.Slice(WebpConstants.Bps, 8))); + Vector128 b2 = Unsafe.As>(ref MemoryMarshal.GetReference(b.Slice(WebpConstants.Bps * 2, 8))); + Vector128 b3 = Unsafe.As>(ref MemoryMarshal.GetReference(b.Slice(WebpConstants.Bps * 3, 8))); + + // Combine pair of lines. + Vector128 a01 = Sse2.UnpackLow(a0.AsInt32(), a1.AsInt32()); + Vector128 a23 = Sse2.UnpackLow(a2.AsInt32(), a3.AsInt32()); + Vector128 b01 = Sse2.UnpackLow(b0.AsInt32(), b1.AsInt32()); + Vector128 b23 = Sse2.UnpackLow(b2.AsInt32(), b3.AsInt32()); + + // Convert to 16b. + Vector128 a01s = Sse2.UnpackLow(a01.AsByte(), Vector128.Zero); + Vector128 a23s = Sse2.UnpackLow(a23.AsByte(), Vector128.Zero); + Vector128 b01s = Sse2.UnpackLow(b01.AsByte(), Vector128.Zero); + Vector128 b23s = Sse2.UnpackLow(b23.AsByte(), Vector128.Zero); + + // subtract, square and accumulate. + Vector128 d0 = Sse2.SubtractSaturate(a01s, b01s); + Vector128 d1 = Sse2.SubtractSaturate(a23s, b23s); + Vector128 e0 = Sse2.MultiplyAddAdjacent(d0.AsInt16(), d0.AsInt16()); + Vector128 e1 = Sse2.MultiplyAddAdjacent(d1.AsInt16(), d1.AsInt16()); + Vector128 sum = Sse2.Add(e0, e1); + + ref int outputRef = ref MemoryMarshal.GetReference(tmp); + Unsafe.As>(ref outputRef) = sum; + return tmp[3] + tmp[2] + tmp[1] + tmp[0]; } else #endif From de3140bbc29f4914425564538c849731b531dbeb Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 16:58:48 +0100 Subject: [PATCH 03/11] Use Numerics.ReduceSum(sum) --- src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index c1af2a453..5b7d4d898 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -27,8 +27,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy #if SUPPORTS_RUNTIME_INTRINSICS if (Sse2.IsSupported) { - Span tmp = stackalloc int[4]; - // Load values. Vector128 a0 = Unsafe.As>(ref MemoryMarshal.GetReference(a)); Vector128 a1 = Unsafe.As>(ref MemoryMarshal.GetReference(a.Slice(WebpConstants.Bps, 8))); @@ -58,9 +56,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy Vector128 e1 = Sse2.MultiplyAddAdjacent(d1.AsInt16(), d1.AsInt16()); Vector128 sum = Sse2.Add(e0, e1); - ref int outputRef = ref MemoryMarshal.GetReference(tmp); - Unsafe.As>(ref outputRef) = sum; - return tmp[3] + tmp[2] + tmp[1] + tmp[0]; + return Numerics.ReduceSum(sum); } else #endif @@ -658,9 +654,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy /// public static int TTransformSse41(Span inputA, Span inputB, Span w, Span scratch) { - Span sum = scratch.Slice(0, 4); - sum.Clear(); - // Load and combine inputs. Vector128 ina0 = Unsafe.As>(ref MemoryMarshal.GetReference(inputA)); Vector128 ina1 = Unsafe.As>(ref MemoryMarshal.GetReference(inputA.Slice(WebpConstants.Bps, 16))); @@ -765,9 +758,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy // difference of weighted sums. Vector128 result = Sse2.Subtract(ab0ab2Sum.AsInt32(), b0w0bb2w8Sum.AsInt32()); - ref int outputRef = ref MemoryMarshal.GetReference(sum); - Unsafe.As>(ref outputRef) = result.AsInt32(); - return sum[3] + sum[2] + sum[1] + sum[0]; + return Numerics.ReduceSum(result); } #endif From 80a826f506ae94372b488c099969abd95dc6d16e Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 17:28:30 +0100 Subject: [PATCH 04/11] Remove not needed clear --- src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index 5b7d4d898..febca037b 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -13,7 +13,7 @@ using System.Runtime.Intrinsics.X86; // ReSharper disable InconsistentNaming namespace SixLabors.ImageSharp.Formats.Webp.Lossy { - internal static unsafe class LossyUtils + internal static class LossyUtils { [MethodImpl(InliningOptions.ShortMethod)] public static int Vp8Sse16X16(Span a, Span b) => GetSse(a, b, 16, 16); @@ -771,7 +771,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy public static void TransformOne(Span src, Span dst, Span scratch) { Span tmp = scratch.Slice(0, 16); - tmp.Clear(); int tmpOffset = 0; for (int srcOffset = 0; srcOffset < 4; srcOffset++) { From 5abd7740e81d8d54bd24db235c3f90e1e5d02803 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 18:05:14 +0100 Subject: [PATCH 05/11] Add Vp8Sse4X4 sse tests --- .../Formats/WebP/LossyUtilsTests.cs | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs b/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs index f8b488fde..15b312835 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs @@ -10,6 +10,35 @@ namespace SixLabors.ImageSharp.Tests.Formats.WebP [Trait("Format", "Webp")] public class LossyUtilsTests { + private static void RunVp8Sse4X4Test() + { + byte[] a = + { + 27, 27, 28, 29, 29, 28, 27, 27, 27, 28, 28, 29, 29, 28, 28, 27, 129, 129, 129, 129, 129, 129, 129, + 129, 128, 128, 128, 128, 128, 128, 128, 128, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 29, 29, 28, + 28, 27, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128, 27, 27, 26, + 26, 26, 26, 27, 27, 27, 28, 28, 29, 29, 28, 28, 27, 129, 129, 129, 129, 129, 129, 129, 129, 128, + 128, 128, 128, 128, 128, 128, 128, 28, 27, 27, 26, 26, 27, 27, 28, 27, 28, 28, 29, 29, 28, 28, 27, + 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128 + }; + + byte[] b = + { + 26, 26, 26, 26, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 204, 204, 204, 204, 204, 204, 204, + 204, 204, 204, 204, 204, 204, 204, 204, 204, 26, 26, 26, 26, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 26, 26, 26, + 26, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 204, 204, 204, 204, 204, 204, 204, 204, 204, + 204, 204, 204, 204, 204, 204, 204, 26, 26, 26, 26, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204 + }; + + int expected = 27; + + int actual = LossyUtils.Vp8Sse4X4(a, b); + + Assert.Equal(expected, actual); + } + private static void RunHadamardTransformTest() { byte[] a = @@ -37,10 +66,19 @@ namespace SixLabors.ImageSharp.Tests.Formats.WebP Assert.Equal(expected, actual); } + [Fact] + public void Vp8Sse4X4_Works() => RunVp8Sse4X4Test(); + [Fact] public void HadamardTransform_Works() => RunHadamardTransformTest(); #if SUPPORTS_RUNTIME_INTRINSICS + [Fact] + public void Vp8Sse4X4_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunVp8Sse4X4Test, HwIntrinsics.AllowAll); + + [Fact] + public void Vp8Sse4X4_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunVp8Sse4X4Test, HwIntrinsics.DisableHWIntrinsic); + [Fact] public void HadamardTransform_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunHadamardTransformTest, HwIntrinsics.AllowAll); From 5ead84416dfc37e7fa41a36a9d58e15ac85d4232 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 18:56:30 +0100 Subject: [PATCH 06/11] Use Array.Clear to reset the arrays --- .../Formats/Webp/Lossy/Vp8ModeScore.cs | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8ModeScore.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8ModeScore.cs index 1c92a9d2d..69841b557 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8ModeScore.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8ModeScore.cs @@ -97,18 +97,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy public void Clear() { - this.YDcLevels.AsSpan().Clear(); - this.YAcLevels.AsSpan().Clear(); - this.UvLevels.AsSpan().Clear(); - this.ModesI4.AsSpan().Clear(); - - for (int i = 0; i < 2; i++) - { - for (int j = 0; j < 3; j++) - { - this.Derr[i, j] = 0; - } - } + Array.Clear(this.YDcLevels, 0, this.YDcLevels.Length); + Array.Clear(this.YAcLevels, 0, this.YAcLevels.Length); + Array.Clear(this.UvLevels, 0, this.UvLevels.Length); + Array.Clear(this.ModesI4, 0, this.ModesI4.Length); + Array.Clear(this.Derr, 0, this.Derr.Length); } public void InitScore() From 7312b1a8389c1824409205a5bbfd4ad14224d9c3 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 19:43:49 +0100 Subject: [PATCH 07/11] Dont use slice --- .../Formats/Webp/Lossy/LossyUtils.cs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index febca037b..19a71c3e5 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -28,14 +28,16 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy if (Sse2.IsSupported) { // Load values. - Vector128 a0 = Unsafe.As>(ref MemoryMarshal.GetReference(a)); - Vector128 a1 = Unsafe.As>(ref MemoryMarshal.GetReference(a.Slice(WebpConstants.Bps, 8))); - Vector128 a2 = Unsafe.As>(ref MemoryMarshal.GetReference(a.Slice(WebpConstants.Bps * 2, 8))); - Vector128 a3 = Unsafe.As>(ref MemoryMarshal.GetReference(a.Slice(WebpConstants.Bps * 3, 8))); - Vector128 b0 = Unsafe.As>(ref MemoryMarshal.GetReference(b)); - Vector128 b1 = Unsafe.As>(ref MemoryMarshal.GetReference(b.Slice(WebpConstants.Bps, 8))); - Vector128 b2 = Unsafe.As>(ref MemoryMarshal.GetReference(b.Slice(WebpConstants.Bps * 2, 8))); - Vector128 b3 = Unsafe.As>(ref MemoryMarshal.GetReference(b.Slice(WebpConstants.Bps * 3, 8))); + ref byte aRef = ref MemoryMarshal.GetReference(a); + Vector128 a0 = Unsafe.As>(ref aRef); + Vector128 a1 = Unsafe.As>(ref Unsafe.Add(ref aRef, WebpConstants.Bps)); + Vector128 a2 = Unsafe.As>(ref Unsafe.Add(ref aRef, WebpConstants.Bps * 2)); + Vector128 a3 = Unsafe.As>(ref Unsafe.Add(ref aRef, WebpConstants.Bps * 3)); + ref byte bRef = ref MemoryMarshal.GetReference(b); + Vector128 b0 = Unsafe.As>(ref bRef); + Vector128 b1 = Unsafe.As>(ref Unsafe.Add(ref bRef, WebpConstants.Bps)); + Vector128 b2 = Unsafe.As>(ref Unsafe.Add(ref bRef, WebpConstants.Bps * 2)); + Vector128 b3 = Unsafe.As>(ref Unsafe.Add(ref bRef, WebpConstants.Bps * 3)); // Combine pair of lines. Vector128 a01 = Sse2.UnpackLow(a0.AsInt32(), a1.AsInt32()); From 3dd7c8ea41709173759b02eff4c51268eb2c9f33 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 20:55:23 +0100 Subject: [PATCH 08/11] Remove unnecessary Clear() and scratch buffer --- src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs | 4 ++-- src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index 19a71c3e5..cb839559f 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -127,7 +127,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy #if SUPPORTS_RUNTIME_INTRINSICS if (Sse41.IsSupported) { - int diffSum = TTransformSse41(a, b, w, scratch); + int diffSum = TTransformSse41(a, b, w); return Math.Abs(diffSum) >> 5; } else @@ -654,7 +654,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy /// Returns the weighted sum of the absolute value of transformed coefficients. /// w[] contains a row-major 4 by 4 symmetric matrix. /// - public static int TTransformSse41(Span inputA, Span inputB, Span w, Span scratch) + public static int TTransformSse41(Span inputA, Span inputB, Span w) { // Load and combine inputs. Vector128 ina0 = Unsafe.As>(ref MemoryMarshal.GetReference(inputA)); diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs index 7192fa2d0..6e724e475 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs @@ -49,7 +49,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy this.distribution.AsSpan().Clear(); for (j = startBlock; j < endBlock; j++) { - this.output.AsSpan().Clear(); this.Vp8FTransform(reference.Slice(WebpLookupTables.Vp8DspScan[j]), pred.Slice(WebpLookupTables.Vp8DspScan[j]), this.output); // Convert coefficients to bin. From 5630b25733e98b004b6a0bfe8996cbac47b6c304 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 21:58:52 +0100 Subject: [PATCH 09/11] Remove more unnecessary Clear's --- src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs | 4 ---- src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs | 3 --- 2 files changed, 7 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs index 97ef27d25..d0baa260c 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs @@ -340,8 +340,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy Span reference = it.YuvP.AsSpan(Vp8Encoding.Vp8I4ModeOffsets[mode]); Span tmp = it.Scratch2.AsSpan(0, 16); Span scratch = it.Scratch3.AsSpan(0, 16); - tmp.Clear(); - scratch.Clear(); Vp8Encoding.FTransform(src, reference, tmp, scratch); int nz = QuantizeBlock(tmp, levels, ref dqm.Y1); Vp8Encoding.ITransform(reference, tmp, yuvOut, false, scratch); @@ -357,8 +355,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy int n; Span tmp = it.Scratch2.AsSpan(0, 8 * 16); Span scratch = it.Scratch3.AsSpan(0, 16); - tmp.Clear(); - scratch.Clear(); for (n = 0; n < 8; n += 2) { diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs index 0567a0f27..af7e8eaa3 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs @@ -81,7 +81,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy { int i; Span tmp = scratch.Slice(0, 16); - tmp.Clear(); for (i = 0; i < 4; i++) { // vertical pass. @@ -124,7 +123,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy { int i; Span tmp = scratch.Slice(0, 16); - tmp.Clear(); int srcIdx = 0; int refIdx = 0; @@ -163,7 +161,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy public static void FTransformWht(Span input, Span output, Span scratch) { Span tmp = scratch.Slice(0, 16); - tmp.Clear(); int i; int inputIdx = 0; From 7e20c5daaadefdd3c1073088bc74f1adf0d3436b Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Wed, 10 Nov 2021 12:10:46 +0100 Subject: [PATCH 10/11] Rename Vp8Sse methods --- src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs | 13 ++++++++----- .../Formats/WebP/LossyUtilsTests.cs | 2 +- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index d019b5cd5..a10ec6eab 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -19,14 +19,17 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy private static readonly Vector128 Mean16x4Mask = Vector128.Create((short)0x00ff).AsByte(); #endif + // Note: method name in libwebp reference implementation is called VP8SSE16x16. [MethodImpl(InliningOptions.ShortMethod)] - public static int Vp8Sse16X16(Span a, Span b) => GetSse(a, b, 16, 16); + public static int Vp8_Sse16X16(Span a, Span b) => Vp8_SseNxN(a, b, 16, 16); + // Note: method name in libwebp reference implementation is called VP8SSE16x8. [MethodImpl(InliningOptions.ShortMethod)] - public static int Vp8Sse16X8(Span a, Span b) => GetSse(a, b, 16, 8); + public static int Vp8_Sse16X8(Span a, Span b) => Vp8_SseNxN(a, b, 16, 8); + // Note: method name in libwebp reference implementation is called VP8SSE4x4. [MethodImpl(InliningOptions.ShortMethod)] - public static int Vp8Sse4X4(Span a, Span b) + public static int Vp8_Sse4X4(Span a, Span b) { #if SUPPORTS_RUNTIME_INTRINSICS if (Sse2.IsSupported) @@ -67,12 +70,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy else #endif { - return GetSse(a, b, 4, 4); + return Vp8_SseNxN(a, b, 4, 4); } } [MethodImpl(InliningOptions.ShortMethod)] - public static int GetSse(Span a, Span b, int w, int h) + public static int Vp8_SseNxN(Span a, Span b, int w, int h) { int count = 0; int aOffset = 0; diff --git a/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs b/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs index 9d7545c32..d176a5933 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs @@ -35,7 +35,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.WebP int expected = 27; - int actual = LossyUtils.Vp8Sse4X4(a, b); + int actual = LossyUtils.Vp8_Sse4X4(a, b); Assert.Equal(expected, actual); } From 1997d595e7d496c031e861b8f094a3ba05f94fd0 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Wed, 10 Nov 2021 12:14:08 +0100 Subject: [PATCH 11/11] Fix build error due to renaming --- src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs index d0baa260c..38ed80590 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs @@ -66,7 +66,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy rdCur.Nz = (uint)ReconstructIntra16(it, dqm, rdCur, tmpDst, mode); // Measure RD-score. - rdCur.D = LossyUtils.Vp8Sse16X16(src, tmpDst); + rdCur.D = LossyUtils.Vp8_Sse16X16(src, tmpDst); rdCur.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto16X16(src, tmpDst, WeightY, scratch)) : 0; rdCur.H = WebpConstants.Vp8FixedCostsI16[mode]; rdCur.R = it.GetCostLuma16(rdCur, proba, res); @@ -160,7 +160,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy rdTmp.Nz = (uint)ReconstructIntra4(it, dqm, tmpLevels, src, tmpDst, mode); // Compute RD-score. - rdTmp.D = LossyUtils.Vp8Sse4X4(src, tmpDst); + rdTmp.D = LossyUtils.Vp8_Sse4X4(src, tmpDst); rdTmp.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto4X4(src, tmpDst, WeightY, scratch)) : 0; rdTmp.H = modeCosts[mode]; @@ -251,7 +251,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy rdUv.Nz = (uint)ReconstructUv(it, dqm, rdUv, tmpDst, mode); // Compute RD-score - rdUv.D = LossyUtils.Vp8Sse16X8(src, tmpDst); + rdUv.D = LossyUtils.Vp8_Sse16X8(src, tmpDst); rdUv.SD = 0; // not calling TDisto here: it tends to flatten areas. rdUv.H = WebpConstants.Vp8FixedCostsUv[mode]; rdUv.R = it.GetCostUv(rdUv, proba, res); @@ -407,7 +407,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy for (mode = 0; mode < WebpConstants.NumPredModes; ++mode) { Span reference = it.YuvP.AsSpan(Vp8Encoding.Vp8I16ModeOffsets[mode]); - long score = (LossyUtils.Vp8Sse16X16(src, reference) * WebpConstants.RdDistoMult) + (WebpConstants.Vp8FixedCostsI16[mode] * lambdaDi16); + long score = (LossyUtils.Vp8_Sse16X16(src, reference) * WebpConstants.RdDistoMult) + (WebpConstants.Vp8FixedCostsI16[mode] * lambdaDi16); if (mode > 0 && WebpConstants.Vp8FixedCostsI16[mode] > bitLimit) { @@ -454,7 +454,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy for (mode = 0; mode < WebpConstants.NumBModes; ++mode) { Span reference = it.YuvP.AsSpan(Vp8Encoding.Vp8I4ModeOffsets[mode]); - long score = (LossyUtils.Vp8Sse4X4(src, reference) * WebpConstants.RdDistoMult) + (modeCosts[mode] * lambdaDi4); + long score = (LossyUtils.Vp8_Sse4X4(src, reference) * WebpConstants.RdDistoMult) + (modeCosts[mode] * lambdaDi4); if (score < bestI4Score) { bestI4Mode = mode; @@ -503,7 +503,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy for (mode = 0; mode < WebpConstants.NumPredModes; ++mode) { Span reference = it.YuvP.AsSpan(Vp8Encoding.Vp8UvModeOffsets[mode]); - long score = (LossyUtils.Vp8Sse16X8(src, reference) * WebpConstants.RdDistoMult) + (WebpConstants.Vp8FixedCostsUv[mode] * lambdaDuv); + long score = (LossyUtils.Vp8_Sse16X8(src, reference) * WebpConstants.RdDistoMult) + (WebpConstants.Vp8FixedCostsUv[mode] * lambdaDuv); if (score < bestUvScore) { bestMode = mode;