From fd07436736d721bedfbafc308d902aa1e7765778 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 2 Nov 2021 12:40:04 +0100 Subject: [PATCH 1/9] Replace Guard with DebugGuard in FastSLog2Slow --- src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index 22c233360..ebebe7954 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -780,7 +780,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless private static float FastSLog2Slow(uint v) { - Guard.MustBeGreaterThanOrEqualTo(v, LogLookupIdxMax, nameof(v)); + DebugGuard.MustBeGreaterThanOrEqualTo(v, LogLookupIdxMax, nameof(v)); if (v < ApproxLogWithCorrectionMax) { int logCnt = 0; From 2bf16bcb58556d6f3cbee5298472db42af60bd02 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 2 Nov 2021 12:41:43 +0100 Subject: [PATCH 2/9] Reverse access to output array to remove bounds checks --- src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index ebebe7954..b278b12bc 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -1262,11 +1262,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless Vector128 pb = Sse2.UnpackLow(bc, Vector128.Zero); // |b - c| Vector128 diff = Sse2.Subtract(pb.AsUInt16(), pa.AsUInt16()); Sse2.Store((ushort*)p, diff); + int paMinusPb = output[3] + output[2] + output[1] + output[0]; + return (paMinusPb <= 0) ? a : b; } - - int paMinusPb = output[0] + output[1] + output[2] + output[3]; - - return (paMinusPb <= 0) ? a : b; } else #endif From a7ed1884e0f9439c03d913f4d4a5f2b36d38071e Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 2 Nov 2021 14:15:13 +0100 Subject: [PATCH 3/9] Add sse2 version of ClampedAddSubtractHalf --- .../Formats/Webp/Lossless/LosslessUtils.cs | 32 +++++++++++++++---- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index b278b12bc..0dda5a79a 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -1219,12 +1219,32 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless private static uint ClampedAddSubtractHalf(uint c0, uint c1, uint c2) { - uint ave = Average2(c0, c1); - int a = AddSubtractComponentHalf((int)(ave >> 24), (int)(c2 >> 24)); - int r = AddSubtractComponentHalf((int)((ave >> 16) & 0xff), (int)((c2 >> 16) & 0xff)); - int g = AddSubtractComponentHalf((int)((ave >> 8) & 0xff), (int)((c2 >> 8) & 0xff)); - int b = AddSubtractComponentHalf((int)(ave & 0xff), (int)(c2 & 0xff)); - return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b; +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse2.IsSupported) + { + Vector128 c0Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c0).AsByte(), Vector128.Zero); + Vector128 c1Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c1).AsByte(), Vector128.Zero); + Vector128 b0 = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c2).AsByte(), Vector128.Zero); + Vector128 avg = Sse2.Add(c1Vec.AsInt16(), c0Vec.AsInt16()); + Vector128 a0 = Sse2.ShiftRightLogical(avg, 1); + Vector128 a1 = Sse2.Subtract(a0, b0.AsInt16()); + Vector128 bgta = Sse2.CompareGreaterThan(b0.AsInt16(), a0.AsInt16()); + Vector128 a2 = Sse2.Subtract(a1, bgta); + Vector128 a3 = Sse2.ShiftRightArithmetic(a2.AsInt16(), 1); + Vector128 a4 = Sse2.Add(a0.AsInt16(), a3).AsInt16(); + Vector128 a5 = Sse2.PackUnsignedSaturate(a4, a4); + uint output = Sse2.ConvertToUInt32(a5.AsUInt32()); + return output; + } +#endif + { + uint ave = Average2(c0, c1); + int a = AddSubtractComponentHalf((int)(ave >> 24), (int)(c2 >> 24)); + int r = AddSubtractComponentHalf((int)((ave >> 16) & 0xff), (int)((c2 >> 16) & 0xff)); + int g = AddSubtractComponentHalf((int)((ave >> 8) & 0xff), (int)((c2 >> 8) & 0xff)); + int b = AddSubtractComponentHalf((int)(ave & 0xff), (int)(c2 & 0xff)); + return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b; + } } [MethodImpl(InliningOptions.ShortMethod)] From 28053739a9beeed006fd256a0ea8016631660841 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 2 Nov 2021 14:20:33 +0100 Subject: [PATCH 4/9] Add sse2 version of ClampedAddSubtractFull --- .../Formats/Webp/Lossless/LosslessUtils.cs | 42 ++++++++++++------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index 0dda5a79a..7740dc051 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -1201,20 +1201,34 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless private static uint ClampedAddSubtractFull(uint c0, uint c1, uint c2) { - int a = AddSubtractComponentFull( - (int)(c0 >> 24), - (int)(c1 >> 24), - (int)(c2 >> 24)); - int r = AddSubtractComponentFull( - (int)((c0 >> 16) & 0xff), - (int)((c1 >> 16) & 0xff), - (int)((c2 >> 16) & 0xff)); - int g = AddSubtractComponentFull( - (int)((c0 >> 8) & 0xff), - (int)((c1 >> 8) & 0xff), - (int)((c2 >> 8) & 0xff)); - int b = AddSubtractComponentFull((int)(c0 & 0xff), (int)(c1 & 0xff), (int)(c2 & 0xff)); - return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b; +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse2.IsSupported) + { + Vector128 c0Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c0).AsByte(), Vector128.Zero); + Vector128 c1Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c1).AsByte(), Vector128.Zero); + Vector128 c2Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c2).AsByte(), Vector128.Zero); + Vector128 v1 = Sse2.Add(c0Vec, c1Vec); + Vector128 v2 = Sse2.Subtract(v1, c2Vec); + Vector128 b = Sse2.PackUnsignedSaturate(v2.AsInt16(), v2.AsInt16()); + uint output = Sse2.ConvertToUInt32(b.AsUInt32()); + } +#endif + { + int a = AddSubtractComponentFull( + (int)(c0 >> 24), + (int)(c1 >> 24), + (int)(c2 >> 24)); + int r = AddSubtractComponentFull( + (int)((c0 >> 16) & 0xff), + (int)((c1 >> 16) & 0xff), + (int)((c2 >> 16) & 0xff)); + int g = AddSubtractComponentFull( + (int)((c0 >> 8) & 0xff), + (int)((c1 >> 8) & 0xff), + (int)((c2 >> 8) & 0xff)); + int b = AddSubtractComponentFull((int)(c0 & 0xff), (int)(c1 & 0xff), (int)(c2 & 0xff)); + return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b; + } } private static uint ClampedAddSubtractHalf(uint c0, uint c1, uint c2) From f6dbc7dd8ee95115315805dab2b9b38684e505b2 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 2 Nov 2021 14:40:59 +0100 Subject: [PATCH 5/9] Fix issue in ClampedAddSubtractFull --- src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index 7740dc051..65b39bd2d 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -1207,10 +1207,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless Vector128 c0Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c0).AsByte(), Vector128.Zero); Vector128 c1Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c1).AsByte(), Vector128.Zero); Vector128 c2Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c2).AsByte(), Vector128.Zero); - Vector128 v1 = Sse2.Add(c0Vec, c1Vec); - Vector128 v2 = Sse2.Subtract(v1, c2Vec); + Vector128 v1 = Sse2.Add(c0Vec.AsInt16(), c1Vec.AsInt16()); + Vector128 v2 = Sse2.Subtract(v1, c2Vec.AsInt16()); Vector128 b = Sse2.PackUnsignedSaturate(v2.AsInt16(), v2.AsInt16()); uint output = Sse2.ConvertToUInt32(b.AsUInt32()); + return output; } #endif { From 8fe280e9918e14ca2abb7ffd21ae35c969429447 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 2 Nov 2021 16:04:29 +0100 Subject: [PATCH 6/9] Add predictor 12 and 13 tests --- .../Formats/WebP/LosslessUtilsTests.cs | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs b/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs index bf381ebda..c70f332ef 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs @@ -153,9 +153,55 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp } } + private static void RunPredictor12Test() + { + // arrange + uint[] topData = { 4294844413, 4294779388 }; + uint left = 4294844413; + uint expectedResult = 4294779388; + + // act + unsafe + { + fixed (uint* top = &topData[1]) + { + uint actual = LosslessUtils.Predictor12(left, top); + + // assert + Assert.Equal(expectedResult, actual); + } + } + } + + private static void RunPredictor13Test() + { + // arrange + uint[] topData = { 4278193922, 4278193666 }; + uint left = 4278193410; + uint expectedResult = 4278193154; + + // act + unsafe + { + fixed (uint* top = &topData[1]) + { + uint actual = LosslessUtils.Predictor13(left, top); + + // assert + Assert.Equal(expectedResult, actual); + } + } + } + [Fact] public void Predictor11_Works() => RunPredictor11Test(); + [Fact] + public void Predictor12_Works() => RunPredictor12Test(); + + [Fact] + public void Predictor13_Works() => RunPredictor13Test(); + [Fact] public void SubtractGreen_Works() => RunSubtractGreenTest(); @@ -175,6 +221,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp [Fact] public void Predictor11_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor11Test, HwIntrinsics.DisableSSE2); + [Fact] + public void Predictor12_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor12Test, HwIntrinsics.AllowAll); + + [Fact] + public void Predictor12_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor12Test, HwIntrinsics.DisableSSE2); + + [Fact] + public void Predictor13_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor13Test, HwIntrinsics.AllowAll); + + [Fact] + public void Predictor13_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor13Test, HwIntrinsics.DisableSSE2); + [Fact] public void SubtractGreen_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunSubtractGreenTest, HwIntrinsics.AllowAll); From ffdf99bad2d8f4fb9d52a3938f3c64d750f09957 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 2 Nov 2021 16:29:52 +0100 Subject: [PATCH 7/9] Add aggressive inlining --- src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs | 8 ++++++++ src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs | 1 + 2 files changed, 9 insertions(+) diff --git a/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs b/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs index 8596d8555..02bbc38fc 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs @@ -1,6 +1,8 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. +using System.Runtime.CompilerServices; + namespace SixLabors.ImageSharp.Formats.Webp.Lossless { /// @@ -41,6 +43,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// Inserts a new color into the cache. /// /// The color to insert. + [MethodImpl(InliningOptions.ShortMethod)] public void Insert(uint bgra) { int key = HashPix(bgra, this.HashShift); @@ -52,6 +55,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// /// The key to lookup. /// The color for the key. + [MethodImpl(InliningOptions.ShortMethod)] public uint Lookup(int key) => this.Colors[key]; /// @@ -59,6 +63,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// /// The color to check. /// The index of the color in the cache or -1 if its not present. + [MethodImpl(InliningOptions.ShortMethod)] public int Contains(uint bgra) { int key = HashPix(bgra, this.HashShift); @@ -70,6 +75,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// /// The color. /// The index for the color. + [MethodImpl(InliningOptions.ShortMethod)] public int GetIndex(uint bgra) => HashPix(bgra, this.HashShift); /// @@ -77,8 +83,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// /// The key. /// The color to add. + [MethodImpl(InliningOptions.ShortMethod)] public void Set(uint key, uint bgra) => this.Colors[key] = bgra; + [MethodImpl(InliningOptions.ShortMethod)] public static int HashPix(uint argb, int shift) => (int)((argb * HashMul) >> shift); } } diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index 65b39bd2d..9baa6c3c3 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -752,6 +752,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// /// Fast calculation of log2(v) for integer input. /// + [MethodImpl(InliningOptions.ShortMethod)] public static float FastLog2(uint v) => v < LogLookupIdxMax ? WebpLookupTables.Log2Table[v] : FastLog2Slow(v); /// From fc8d8b81d98201955655595fe682a0c5533eb6ea Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 2 Nov 2021 21:56:19 +0100 Subject: [PATCH 8/9] Remove unnecessary cast AsInt16() --- src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index 9baa6c3c3..8bd3163cc 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -1210,7 +1210,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless Vector128 c2Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c2).AsByte(), Vector128.Zero); Vector128 v1 = Sse2.Add(c0Vec.AsInt16(), c1Vec.AsInt16()); Vector128 v2 = Sse2.Subtract(v1, c2Vec.AsInt16()); - Vector128 b = Sse2.PackUnsignedSaturate(v2.AsInt16(), v2.AsInt16()); + Vector128 b = Sse2.PackUnsignedSaturate(v2, v2); uint output = Sse2.ConvertToUInt32(b.AsUInt32()); return output; } From d6d952e477b0653b2750210ad4cd2d3fc14bbaec Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Thu, 4 Nov 2021 23:12:01 +0100 Subject: [PATCH 9/9] Remove another unnecessary cast AsInt16() --- src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index 8bd3163cc..ee9ea5123 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -1246,8 +1246,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless Vector128 a1 = Sse2.Subtract(a0, b0.AsInt16()); Vector128 bgta = Sse2.CompareGreaterThan(b0.AsInt16(), a0.AsInt16()); Vector128 a2 = Sse2.Subtract(a1, bgta); - Vector128 a3 = Sse2.ShiftRightArithmetic(a2.AsInt16(), 1); - Vector128 a4 = Sse2.Add(a0.AsInt16(), a3).AsInt16(); + Vector128 a3 = Sse2.ShiftRightArithmetic(a2, 1); + Vector128 a4 = Sse2.Add(a0, a3).AsInt16(); Vector128 a5 = Sse2.PackUnsignedSaturate(a4, a4); uint output = Sse2.ConvertToUInt32(a5.AsUInt32()); return output;