From a7ed1884e0f9439c03d913f4d4a5f2b36d38071e Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 2 Nov 2021 14:15:13 +0100 Subject: [PATCH] Add sse2 version of ClampedAddSubtractHalf --- .../Formats/Webp/Lossless/LosslessUtils.cs | 32 +++++++++++++++---- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index b278b12bc9..0dda5a79a6 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -1219,12 +1219,32 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless private static uint ClampedAddSubtractHalf(uint c0, uint c1, uint c2) { - uint ave = Average2(c0, c1); - int a = AddSubtractComponentHalf((int)(ave >> 24), (int)(c2 >> 24)); - int r = AddSubtractComponentHalf((int)((ave >> 16) & 0xff), (int)((c2 >> 16) & 0xff)); - int g = AddSubtractComponentHalf((int)((ave >> 8) & 0xff), (int)((c2 >> 8) & 0xff)); - int b = AddSubtractComponentHalf((int)(ave & 0xff), (int)(c2 & 0xff)); - return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b; +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse2.IsSupported) + { + Vector128 c0Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c0).AsByte(), Vector128.Zero); + Vector128 c1Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c1).AsByte(), Vector128.Zero); + Vector128 b0 = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c2).AsByte(), Vector128.Zero); + Vector128 avg = Sse2.Add(c1Vec.AsInt16(), c0Vec.AsInt16()); + Vector128 a0 = Sse2.ShiftRightLogical(avg, 1); + Vector128 a1 = Sse2.Subtract(a0, b0.AsInt16()); + Vector128 bgta = Sse2.CompareGreaterThan(b0.AsInt16(), a0.AsInt16()); + Vector128 a2 = Sse2.Subtract(a1, bgta); + Vector128 a3 = Sse2.ShiftRightArithmetic(a2.AsInt16(), 1); + Vector128 a4 = Sse2.Add(a0.AsInt16(), a3).AsInt16(); + Vector128 a5 = Sse2.PackUnsignedSaturate(a4, a4); + uint output = Sse2.ConvertToUInt32(a5.AsUInt32()); + return output; + } +#endif + { + uint ave = Average2(c0, c1); + int a = AddSubtractComponentHalf((int)(ave >> 24), (int)(c2 >> 24)); + int r = AddSubtractComponentHalf((int)((ave >> 16) & 0xff), (int)((c2 >> 16) & 0xff)); + int g = AddSubtractComponentHalf((int)((ave >> 8) & 0xff), (int)((c2 >> 8) & 0xff)); + int b = AddSubtractComponentHalf((int)(ave & 0xff), (int)(c2 & 0xff)); + return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b; + } } [MethodImpl(InliningOptions.ShortMethod)]