diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index b7f94415b..22c233360 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -551,6 +551,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless int mask = tileWidth - 1; int tilesPerRow = SubSampleSize(width, transform.Bits); int predictorModeIdxBase = (y >> transform.Bits) * tilesPerRow; + Span scratch = stackalloc short[8]; while (y < yEnd) { int predictorModeIdx = predictorModeIdxBase; @@ -608,7 +609,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless PredictorAdd10(input + x, output + x - width, xEnd - x, output + x); break; case 11: - PredictorAdd11(input + x, output + x - width, xEnd - x, output + x); + PredictorAdd11(input + x, output + x - width, xEnd - x, output + x, scratch); break; case 12: PredictorAdd12(input + x, output + x - width, xEnd - x, output + x); @@ -974,11 +975,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless } [MethodImpl(InliningOptions.ShortMethod)] - private static void PredictorAdd11(uint* input, uint* upper, int numberOfPixels, uint* output) + private static void PredictorAdd11(uint* input, uint* upper, int numberOfPixels, uint* output, Span scratch) { for (int x = 0; x < numberOfPixels; x++) { - uint pred = Predictor11(output[x - 1], upper + x); + uint pred = Predictor11(output[x - 1], upper + x, scratch); output[x] = AddPixels(input[x], pred); } } @@ -1031,7 +1032,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless public static uint Predictor10(uint left, uint* top) => Average4(left, top[-1], top[0], top[1]); [MethodImpl(InliningOptions.ShortMethod)] - public static uint Predictor11(uint left, uint* top) => Select(top[0], left, top[-1]); + public static uint Predictor11(uint left, uint* top, Span scratch) => Select(top[0], left, top[-1], scratch); [MethodImpl(InliningOptions.ShortMethod)] public static uint Predictor12(uint left, uint* top) => ClampedAddSubtractFull(left, top[0], top[-1]); @@ -1148,11 +1149,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless } [MethodImpl(InliningOptions.ShortMethod)] - public static void PredictorSub11(uint* input, uint* upper, int numPixels, uint* output) + public static void PredictorSub11(uint* input, uint* upper, int numPixels, uint* output, Span scratch) { for (int x = 0; x < numPixels; x++) { - uint pred = Predictor11(input[x - 1], upper + x); + uint pred = Predictor11(input[x - 1], upper + x, scratch); output[x] = SubPixels(input[x], pred); } } @@ -1240,14 +1241,43 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless private static Vector128 MkCst16(int hi, int lo) => Vector128.Create((hi << 16) | (lo & 0xffff)); #endif - private static uint Select(uint a, uint b, uint c) + private static uint Select(uint a, uint b, uint c, Span scratch) { - int paMinusPb = - Sub3((int)(a >> 24), (int)(b >> 24), (int)(c >> 24)) + - Sub3((int)((a >> 16) & 0xff), (int)((b >> 16) & 0xff), (int)((c >> 16) & 0xff)) + - Sub3((int)((a >> 8) & 0xff), (int)((b >> 8) & 0xff), (int)((c >> 8) & 0xff)) + - Sub3((int)(a & 0xff), (int)(b & 0xff), (int)(c & 0xff)); - return paMinusPb <= 0 ? a : b; +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse2.IsSupported) + { + Span output = scratch; + fixed (short* p = output) + { + Vector128 a0 = Sse2.ConvertScalarToVector128UInt32(a).AsByte(); + Vector128 b0 = Sse2.ConvertScalarToVector128UInt32(b).AsByte(); + Vector128 c0 = Sse2.ConvertScalarToVector128UInt32(c).AsByte(); + Vector128 ac0 = Sse2.SubtractSaturate(a0, c0); + Vector128 ca0 = Sse2.SubtractSaturate(c0, a0); + Vector128 bc0 = Sse2.SubtractSaturate(b0, c0); + Vector128 cb0 = Sse2.SubtractSaturate(c0, b0); + Vector128 ac = Sse2.Or(ac0, ca0); + Vector128 bc = Sse2.Or(bc0, cb0); + Vector128 pa = Sse2.UnpackLow(ac, Vector128.Zero); // |a - c| + Vector128 pb = Sse2.UnpackLow(bc, Vector128.Zero); // |b - c| + Vector128 diff = Sse2.Subtract(pb.AsUInt16(), pa.AsUInt16()); + Sse2.Store((ushort*)p, diff); + } + + int paMinusPb = output[0] + output[1] + output[2] + output[3]; + + return (paMinusPb <= 0) ? a : b; + } + else +#endif + { + int paMinusPb = + Sub3((int)(a >> 24), (int)(b >> 24), (int)(c >> 24)) + + Sub3((int)((a >> 16) & 0xff), (int)((b >> 16) & 0xff), (int)((c >> 16) & 0xff)) + + Sub3((int)((a >> 8) & 0xff), (int)((b >> 8) & 0xff), (int)((c >> 8) & 0xff)) + + Sub3((int)(a & 0xff), (int)(b & 0xff), (int)(c & 0xff)); + return paMinusPb <= 0 ? a : b; + } } [MethodImpl(InliningOptions.ShortMethod)] diff --git a/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs index 671e9a043..2c70faa0d 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs @@ -50,6 +50,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless int tilesPerRow = LosslessUtils.SubSampleSize(width, bits); int tilesPerCol = LosslessUtils.SubSampleSize(height, bits); int maxQuantization = 1 << LosslessUtils.NearLosslessBits(nearLosslessQuality); + Span scratch = stackalloc short[8]; // TODO: Can we optimize this? int[][] histo = new int[4][]; @@ -84,7 +85,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless transparentColorMode, usedSubtractGreen, nearLossless, - image); + image, + scratch); image[(tileY * tilesPerRow) + tileX] = (uint)(WebpConstants.ArgbBlack | (pred << 8)); } @@ -192,7 +194,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless WebpTransparentColorMode transparentColorMode, bool usedSubtractGreen, bool nearLossless, - Span modes) + Span modes, + Span scratch) { const int numPredModes = 14; int startX = tileX << bits; @@ -272,7 +275,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless } } - GetResidual(width, height, upperRow, currentRow, maxDiffs, mode, startX, startX + maxX, y, maxQuantization, transparentColorMode, usedSubtractGreen, nearLossless, residuals); + GetResidual(width, height, upperRow, currentRow, maxDiffs, mode, startX, startX + maxX, y, maxQuantization, transparentColorMode, usedSubtractGreen, nearLossless, residuals, scratch); for (int relativeX = 0; relativeX < maxX; ++relativeX) { UpdateHisto(histoArgb, residuals[relativeX]); @@ -333,11 +336,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless WebpTransparentColorMode transparentColorMode, bool usedSubtractGreen, bool nearLossless, - Span output) + Span output, + Span scratch) { if (transparentColorMode == WebpTransparentColorMode.Preserve) { - PredictBatch(mode, xStart, y, xEnd - xStart, currentRowSpan, upperRowSpan, output); + PredictBatch(mode, xStart, y, xEnd - xStart, currentRowSpan, upperRowSpan, output, scratch); } else { @@ -395,7 +399,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless predict = LosslessUtils.Predictor10(currentRow[x - 1], upperRow + x); break; case 11: - predict = LosslessUtils.Predictor11(currentRow[x - 1], upperRow + x); + predict = LosslessUtils.Predictor11(currentRow[x - 1], upperRow + x, scratch); break; case 12: predict = LosslessUtils.Predictor12(currentRow[x - 1], upperRow + x); @@ -583,6 +587,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless Span currentMaxDiffs = MemoryMarshal.Cast(currentRow.Slice(width + 1)); Span lowerMaxDiffs = currentMaxDiffs.Slice(width); + Span scratch = stackalloc short[8]; for (int y = 0; y < height; y++) { Span tmp32 = upperRow; @@ -593,7 +598,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless if (lowEffort) { - PredictBatch(PredLowEffort, 0, y, width, currentRow, upperRow, argb.Slice(y * width)); + PredictBatch(PredLowEffort, 0, y, width, currentRow, upperRow, argb.Slice(y * width), scratch); } else { @@ -634,7 +639,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless transparentColorMode, usedSubtractGreen, nearLossless, - argb.Slice((y * width) + x)); + argb.Slice((y * width) + x), + scratch); x = xEnd; } @@ -649,7 +655,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless int numPixels, Span currentSpan, Span upperSpan, - Span outputSpan) + Span outputSpan, + Span scratch) { #pragma warning disable SA1503 // Braces should not be omitted fixed (uint* current = currentSpan) @@ -718,7 +725,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless LosslessUtils.PredictorSub10(current + xStart, upper + xStart, numPixels, output); break; case 11: - LosslessUtils.PredictorSub11(current + xStart, upper + xStart, numPixels, output); + LosslessUtils.PredictorSub11(current + xStart, upper + xStart, numPixels, output, scratch); break; case 12: LosslessUtils.PredictorSub12(current + xStart, upper + xStart, numPixels, output); diff --git a/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs b/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs index be7bc27d3..bf381ebda 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs @@ -132,6 +132,30 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp Assert.Equal(expectedOutput, pixelData); } + private static void RunPredictor11Test() + { + // arrange + uint[] topData = { 4278258949, 4278258949 }; + uint left = 4294839812; + short[] scratch = new short[8]; + uint expectedResult = 4294839812; + + // act + unsafe + { + fixed (uint* top = &topData[1]) + { + uint actual = LosslessUtils.Predictor11(left, top, scratch); + + // assert + Assert.Equal(expectedResult, actual); + } + } + } + + [Fact] + public void Predictor11_Works() => RunPredictor11Test(); + [Fact] public void SubtractGreen_Works() => RunSubtractGreenTest(); @@ -145,6 +169,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp public void TransformColorInverse_Works() => RunTransformColorInverseTest(); #if SUPPORTS_RUNTIME_INTRINSICS + [Fact] + public void Predictor11_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor11Test, HwIntrinsics.AllowAll); + + [Fact] + public void Predictor11_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor11Test, HwIntrinsics.DisableSSE2); + [Fact] public void SubtractGreen_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunSubtractGreenTest, HwIntrinsics.AllowAll);