From 0676e68bfddd5894613e587ad9cc491f5a487e47 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Fri, 25 Dec 2020 14:37:15 +0100 Subject: [PATCH] Add SSE2 version of TransformColor --- .../Formats/WebP/Lossless/LosslessUtils.cs | 98 +++++++++++++++---- .../Formats/WebP/Lossless/PredictorEncoder.cs | 13 ++- .../Codecs/EncodeWebp.cs | 36 +++---- .../Formats/WebP/LosslessUtilsTests.cs | 50 ++++++++++ 4 files changed, 155 insertions(+), 42 deletions(-) diff --git a/src/ImageSharp/Formats/WebP/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/WebP/Lossless/LosslessUtils.cs index 94ba24660..5b4f3bf72 100644 --- a/src/ImageSharp/Formats/WebP/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/WebP/Lossless/LosslessUtils.cs @@ -391,6 +391,49 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless public static void TransformColor(Vp8LMultipliers m, Span data, int numPixels) { +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse2.IsSupported) + { + Vector128 multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); + Vector128 multsb2 = MkCst16(Cst5b(m.RedToBlue), 0); + var maskalphagreen = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + var maskredblue = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); + var shufflemask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0); + int idx; + fixed (uint* src = data) + { + for (idx = 0; idx + 4 <= numPixels; idx += 4) + { + var pos = src + idx; + Vector128 input = Sse2.LoadVector128(pos); + Vector128 a = Sse2.And(input.AsByte(), maskalphagreen); + Vector128 b = Sse2.ShuffleLow(a.AsInt16(), shufflemask); + Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), shufflemask); + Vector128 d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); + Vector128 e = Sse2.ShiftLeftLogical(input.AsInt16(), 8); + Vector128 f = Sse2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16()); + Vector128 g = Sse2.ShiftRightLogical(f.AsInt32(), 16); + Vector128 h = Sse2.Add(g.AsByte(), d.AsByte()); + Vector128 i = Sse2.And(h, maskredblue); + Vector128 output = Sse2.Subtract(input.AsByte(), i); + Sse2.Store((byte*)pos, output); + } + + if (idx != numPixels) + { + TransformColorNoneVectorized(m, data.Slice(idx), numPixels - idx); + } + } + } + else +#endif + { + TransformColorNoneVectorized(m, data, numPixels); + } + } + + public static void TransformColorNoneVectorized(Vp8LMultipliers m, Span data, int numPixels) + { for (int i = 0; i < numPixels; i++) { uint argb = data[i]; @@ -1140,6 +1183,33 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless } } + /// + /// Computes sampled size of 'size' when sampling using 'sampling bits'. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static int SubSampleSize(int size, int samplingBits) + { + return (size + (1 << samplingBits) - 1) >> samplingBits; + } + + /// + /// Sum of each component, mod 256. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static uint AddPixels(uint a, uint b) + { + uint alphaAndGreen = (a & 0xff00ff00u) + (b & 0xff00ff00u); + uint redAndBlue = (a & 0x00ff00ffu) + (b & 0x00ff00ffu); + return (alphaAndGreen & 0xff00ff00u) | (redAndBlue & 0x00ff00ffu); + } + + // For sign-extended multiplying constants, pre-shifted by 5: + [MethodImpl(InliningOptions.ShortMethod)] + public static short Cst5b(int x) + { + return (short)(((short)(x << 8)) >> 5); + } + private static uint ClampedAddSubtractFull(uint c0, uint c1, uint c2) { int a = AddSubtractComponentFull( @@ -1186,6 +1256,14 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless return a < 256 ? a : ~a >> 24; } +#if SUPPORTS_RUNTIME_INTRINSICS + [MethodImpl(InliningOptions.ShortMethod)] + private static Vector128 MkCst16(int hi, int lo) + { + return Vector128.Create((hi << 16) | (lo & 0xffff)); + } +#endif + private static uint Select(uint a, uint b, uint c) { int paMinusPb = @@ -1222,26 +1300,6 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless return Average2(Average2(a0, a1), Average2(a2, a3)); } - /// - /// Computes sampled size of 'size' when sampling using 'sampling bits'. - /// - [MethodImpl(InliningOptions.ShortMethod)] - public static int SubSampleSize(int size, int samplingBits) - { - return (size + (1 << samplingBits) - 1) >> samplingBits; - } - - /// - /// Sum of each component, mod 256. - /// - [MethodImpl(InliningOptions.ShortMethod)] - public static uint AddPixels(uint a, uint b) - { - uint alphaAndGreen = (a & 0xff00ff00u) + (b & 0xff00ff00u); - uint redAndBlue = (a & 0x00ff00ffu) + (b & 0x00ff00ffu); - return (alphaAndGreen & 0xff00ff00u) | (redAndBlue & 0x00ff00ffu); - } - [MethodImpl(InliningOptions.ShortMethod)] private static uint GetArgbIndex(uint idx) { diff --git a/src/ImageSharp/Formats/WebP/Lossless/PredictorEncoder.cs b/src/ImageSharp/Formats/WebP/Lossless/PredictorEncoder.cs index d2f810949..83c4bda1f 100644 --- a/src/ImageSharp/Formats/WebP/Lossless/PredictorEncoder.cs +++ b/src/ImageSharp/Formats/WebP/Lossless/PredictorEncoder.cs @@ -720,6 +720,7 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless } } + [MethodImpl(InliningOptions.ShortMethod)] private static int MaxDiffBetweenPixels(uint p1, uint p2) { int diffA = Math.Abs((int)(p1 >> 24) - (int)(p2 >> 24)); @@ -729,6 +730,7 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless return GetMax(GetMax(diffA, diffR), GetMax(diffG, diffB)); } + [MethodImpl(InliningOptions.ShortMethod)] private static int MaxDiffAroundPixel(uint current, uint up, uint down, uint left, uint right) { int diffUp = MaxDiffBetweenPixels(current, up); @@ -738,6 +740,7 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless return GetMax(GetMax(diffUp, diffDown), GetMax(diffLeft, diffRight)); } + [MethodImpl(InliningOptions.ShortMethod)] private static void UpdateHisto(int[][] histoArgb, uint argb) { ++histoArgb[0][argb >> 24]; @@ -931,7 +934,7 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless #if SUPPORTS_RUNTIME_INTRINSICS if (Sse41.IsSupported) { - var multsg = Vector128.Create((short)((greenToRed << 8) >> 5)); + var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToRed)); var maskgreen = Vector128.Create(0x00ff00); var mask = Vector128.Create((short)0xff); @@ -1002,11 +1005,11 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless { const int span = 8; Span values = stackalloc ushort[span]; - var multsr = Vector128.Create((short)((redToBlue << 8) >> 5)); - var multsg = Vector128.Create((short)((greenToBlue << 8) >> 5)); + var multsr = Vector128.Create(LosslessUtils.Cst5b(redToBlue)); + var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToBlue)); var maskgreen = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); var maskgreenblue = Vector128.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0); - Vector128 maskblue = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); + var maskblue = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); var shufflerLow = Vector128.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255); var shufflerHigh = Vector128.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14); @@ -1084,6 +1087,7 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless return (float)retVal; } + [MethodImpl(InliningOptions.ShortMethod)] private static float PredictionCostCrossColor(int[] accumulated, int[] counts) { // Favor low entropy, locally and globally. @@ -1092,6 +1096,7 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless return LosslessUtils.CombinedShannonEntropy(counts, accumulated) + PredictionCostSpatial(counts, 3, expValue); } + [MethodImpl(InliningOptions.ShortMethod)] private static float PredictionCostSpatial(int[] counts, int weight0, double expVal) { int significantSymbols = 256 >> 4; diff --git a/tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs b/tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs index 05b2e5fb5..ae2091957 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs @@ -18,7 +18,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs private string TestImageFullPath => Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, this.TestImage); - [Params(TestImages.WebP.Peak)] + [Params(TestImages.Png.Bike)] // The bike image will have all 3 transforms as lossless webp. public string TestImage { get; set; } [GlobalSetup] @@ -74,7 +74,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs }); } - /* Results 14.11.2020 + /* Results 25.12.2020 * Summary * BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.630 (2004/?/20H1) Intel Core i7-6700K CPU 4.00GHz (Skylake), 1 CPU, 8 logical and 4 physical cores @@ -84,22 +84,22 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs Job-GAIITM : .NET Core 2.1.23 (CoreCLR 4.6.29321.03, CoreFX 4.6.29321.01), X64 RyuJIT Job-HWOBSO : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT - | Method | Job | Runtime | TestImage | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | - |--------------------------- |----------- |-------------- |-------------- |----------:|----------:|----------:|------:|--------:|----------:|---------:|---------:|-----------:| - | 'Magick Webp Lossy' | Job-MYNMXL | .NET 4.7.2 | WebP/Peak.png | 1.744 ms | 0.0399 ms | 0.0022 ms | 0.35 | 0.00 | 1.9531 | - | - | 13.58 KB | - | 'ImageSharp Webp Lossy' | Job-MYNMXL | .NET 4.7.2 | WebP/Peak.png | 5.195 ms | 0.4241 ms | 0.0232 ms | 1.04 | 0.01 | 398.4375 | 93.7500 | - | 1661.83 KB | - | 'Magick Webp Lossless' | Job-MYNMXL | .NET 4.7.2 | WebP/Peak.png | 4.993 ms | 0.5097 ms | 0.0279 ms | 1.00 | 0.00 | 7.8125 | - | - | 35.7 KB | - | 'ImageSharp Webp Lossless' | Job-MYNMXL | .NET 4.7.2 | WebP/Peak.png | 12.174 ms | 1.2476 ms | 0.0684 ms | 2.44 | 0.02 | 1000.0000 | 984.3750 | 984.3750 | 8197.11 KB | - | | | | | | | | | | | | | | - | 'Magick Webp Lossy' | Job-MPXHSM | .NET Core 2.1 | WebP/Peak.png | 1.747 ms | 0.0581 ms | 0.0032 ms | 0.35 | 0.00 | 1.9531 | - | - | 13.34 KB | - | 'ImageSharp Webp Lossy' | Job-MPXHSM | .NET Core 2.1 | WebP/Peak.png | 3.527 ms | 0.0972 ms | 0.0053 ms | 0.71 | 0.00 | 402.3438 | 97.6563 | - | 1656.92 KB | - | 'Magick Webp Lossless' | Job-MPXHSM | .NET Core 2.1 | WebP/Peak.png | 5.001 ms | 0.4543 ms | 0.0249 ms | 1.00 | 0.00 | 7.8125 | - | - | 35.39 KB | - | 'ImageSharp Webp Lossless' | Job-MPXHSM | .NET Core 2.1 | WebP/Peak.png | 10.704 ms | 0.9844 ms | 0.0540 ms | 2.14 | 0.02 | 1000.0000 | 984.3750 | 984.3750 | 8182.6 KB | - | | | | | | | | | | | | | | - | 'Magick Webp Lossy' | Job-SYDSGM | .NET Core 3.1 | WebP/Peak.png | 1.742 ms | 0.0279 ms | 0.0015 ms | 0.35 | 0.01 | 1.9531 | - | - | 13.31 KB | - | 'ImageSharp Webp Lossy' | Job-SYDSGM | .NET Core 3.1 | WebP/Peak.png | 3.347 ms | 0.0638 ms | 0.0035 ms | 0.68 | 0.01 | 402.3438 | 97.6563 | - | 1656.93 KB | - | 'Magick Webp Lossless' | Job-SYDSGM | .NET Core 3.1 | WebP/Peak.png | 4.954 ms | 1.4131 ms | 0.0775 ms | 1.00 | 0.00 | 7.8125 | - | - | 35.35 KB | - | 'ImageSharp Webp Lossless' | Job-SYDSGM | .NET Core 3.1 | WebP/Peak.png | 10.737 ms | 2.5604 ms | 0.1403 ms | 2.17 | 0.05 | 1000.0000 | 984.3750 | 984.3750 | 8182.49 KB | + | Method | Job | Runtime | TestImage | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + |--------------------------- |----------- |-------------- |------------- |----------:|-----------:|----------:|------:|--------:|-----------:|----------:|----------:|-------------:| + | 'Magick Webp Lossy' | Job-NTTOHF | .NET 4.7.2 | Png/Bike.png | 23.89 ms | 3.742 ms | 0.205 ms | 0.14 | 0.00 | - | - | - | 68.19 KB | + | 'ImageSharp Webp Lossy' | Job-NTTOHF | .NET 4.7.2 | Png/Bike.png | 72.27 ms | 20.228 ms | 1.109 ms | 0.43 | 0.01 | 6142.8571 | 142.8571 | - | 26360.05 KB | + | 'Magick Webp Lossless' | Job-NTTOHF | .NET 4.7.2 | Png/Bike.png | 167.75 ms | 41.847 ms | 2.294 ms | 1.00 | 0.00 | - | - | - | 520.28 KB | + | 'ImageSharp Webp Lossless' | Job-NTTOHF | .NET 4.7.2 | Png/Bike.png | 388.12 ms | 84.867 ms | 4.652 ms | 2.31 | 0.03 | 34000.0000 | 5000.0000 | 2000.0000 | 163174.2 KB | + | | | | | | | | | | | | | | + | 'Magick Webp Lossy' | Job-RXOYDK | .NET Core 2.1 | Png/Bike.png | 24.00 ms | 7.621 ms | 0.418 ms | 0.14 | 0.00 | - | - | - | 67.67 KB | + | 'ImageSharp Webp Lossy' | Job-RXOYDK | .NET Core 2.1 | Png/Bike.png | 47.77 ms | 6.498 ms | 0.356 ms | 0.29 | 0.00 | 6272.7273 | 272.7273 | 90.9091 | 26284.65 KB | + | 'Magick Webp Lossless' | Job-RXOYDK | .NET Core 2.1 | Png/Bike.png | 166.07 ms | 25.133 ms | 1.378 ms | 1.00 | 0.00 | - | - | - | 519.06 KB | + | 'ImageSharp Webp Lossless' | Job-RXOYDK | .NET Core 2.1 | Png/Bike.png | 356.60 ms | 249.912 ms | 13.699 ms | 2.15 | 0.10 | 34000.0000 | 5000.0000 | 2000.0000 | 162719.59 KB | + | | | | | | | | | | | | | | + | 'Magick Webp Lossy' | Job-UDPFDM | .NET Core 3.1 | Png/Bike.png | 23.95 ms | 5.531 ms | 0.303 ms | 0.14 | 0.00 | - | - | - | 67.57 KB | + | 'ImageSharp Webp Lossy' | Job-UDPFDM | .NET Core 3.1 | Png/Bike.png | 44.12 ms | 4.250 ms | 0.233 ms | 0.27 | 0.01 | 6250.0000 | 250.0000 | 83.3333 | 26284.72 KB | + | 'Magick Webp Lossless' | Job-UDPFDM | .NET Core 3.1 | Png/Bike.png | 165.94 ms | 66.670 ms | 3.654 ms | 1.00 | 0.00 | - | - | - | 523.05 KB | + | 'ImageSharp Webp Lossless' | Job-UDPFDM | .NET Core 3.1 | Png/Bike.png | 342.97 ms | 92.856 ms | 5.090 ms | 2.07 | 0.05 | 34000.0000 | 5000.0000 | 2000.0000 | 162725.32 KB | */ } } diff --git a/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs b/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs index f8b0f5a04..afa6be0da 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs @@ -68,6 +68,38 @@ namespace SixLabors.ImageSharp.Tests.Formats.WebP Assert.Equal(expectedOutput, pixelData); } + private static void RunTransformColorTest() + { + uint[] pixelData = + { + 5998579, 65790, 130301, 16646653, 196350, 130565, 16712702, 16583164, 16452092, 65790, 782600, + 647446, 16571414, 16448771, 263931, 132601, 16711935, 131072, 511, 16711679, 132350, 329469, + 16647676, 132093, 66303, 16647169, 16515584, 196607, 196096, 16646655, 514, 131326, 16712192, + 327169, 16646655, 16776960, 3, 16712190, 511, 16646401, 16580612, 65535, 196092, 327425, 16319743, + 392450, 196861, 16712192, 16711680, 130564, 16451071 + }; + + var m = new Vp8LMultipliers() + { + GreenToBlue = 240, + GreenToRed = 232, + RedToBlue = 0 + }; + + uint[] expectedOutput = + { + 100279, 65790, 16710907, 16712190, 130813, 65028, 131840, 264449, 133377, 65790, 61697, 15917319, + 14801924, 16317698, 591614, 394748, 16711935, 131072, 65792, 16711679, 328704, 656896, 132607, + 328703, 197120, 66563, 16646657, 196607, 130815, 16711936, 131587, 131326, 66049, 261632, 16711936, + 16776960, 3, 511, 65792, 16711938, 16580612, 65535, 65019, 327425, 16516097, 261377, 196861, 66049, + 16711680, 65027, 16712962 + }; + + LosslessUtils.TransformColor(m, pixelData, pixelData.Length); + + Assert.Equal(expectedOutput, pixelData); + } + [Fact] public void SubtractGreen_Works() { @@ -80,6 +112,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.WebP RunAddGreenToBlueAndRedTest(); } + [Fact] + public void TrannsformColor_Works() + { + RunTransformColorTest(); + } + #if SUPPORTS_RUNTIME_INTRINSICS [Fact] public void SubtractGreen_WithHardwareIntrinsics_Works() @@ -116,6 +154,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.WebP { FeatureTestRunner.RunWithHwIntrinsicsFeature(RunAddGreenToBlueAndRedTest, HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE2 | HwIntrinsics.DisableSSSE3); } + + [Fact] + public void TransformColor_WithHardwareIntrinsics_Works() + { + FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTransformColorTest, HwIntrinsics.AllowAll); + } + + [Fact] + public void TransformColor_WithoutSSE2_Works() + { + FeatureTestRunner.RunWithHwIntrinsicsFeature(RunTransformColorTest, HwIntrinsics.DisableSSE2); + } #endif } }