From d4c9c6ec005e3c767d89a186e25dceb79328c56b Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Thu, 2 Dec 2021 12:13:17 +0100 Subject: [PATCH] Add SSE2 version of DoFilter6 --- .../Formats/Webp/Lossy/LossyUtils.cs | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index 37cea73ca8..e2ae35b3d4 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -24,6 +24,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy private static readonly Vector128 Four = Vector128.Create((byte)4).AsSByte(); + private static readonly Vector128 Nine = Vector128.Create((short)0x0900).AsSByte(); + + private static readonly Vector128 SixtyThree = Vector128.Create((byte)63).AsSByte(); + private static readonly Vector128 SixtyFour = Vector128.Create((byte)64).AsSByte(); #endif @@ -1462,6 +1466,50 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy q1 = Sse2.Xor(q1.AsByte(), SignBit); } + // Applies filter on 6 pixels (p2, p1, p0, q0, q1 and q2) + private static void DoFilter6Sse2(ref Vector128 p2, ref Vector128 p1, ref Vector128 p0, ref Vector128 q0, ref Vector128 q1, ref Vector128 q2, Vector128 mask, int tresh) + { + // Compute hev mask. + Vector128 notHev = GetNotHev(ref p1, ref p0, ref q0, ref q1, tresh); + + // Convert to signed values. + p1 = Sse2.Xor(p1, SignBit); + p0 = Sse2.Xor(p0, SignBit); + q0 = Sse2.Xor(q0, SignBit); + q1 = Sse2.Xor(q1, SignBit); + q0 = Sse2.Xor(p2, SignBit); + q1 = Sse2.Xor(q2, SignBit); + + Vector128 a = GetBaseDelta(p1.AsSByte(), p0.AsSByte(), q0.AsSByte(), q1.AsSByte()); + + // Do simple filter on pixels with hev. + Vector128 m = Sse2.AndNot(notHev, mask); + Vector128 f = Sse2.And(a.AsByte(), m); + DoSimpleFilterSse2(ref p0, ref q0, f); + + // Do strong filter on pixels with not hev. + m = Sse2.And(notHev, mask); + f = Sse2.And(a.AsByte(), m); + Vector128 flow = Sse2.UnpackLow(Vector128.Zero, f); + Vector128 fhigh = Sse2.UnpackHigh(Vector128.Zero, f); + + Vector128 f9High = Sse2.MultiplyHigh(flow.AsInt16(), Nine.AsInt16()); // Filter (lo) * 9 + Vector128 f9Low = Sse2.MultiplyLow(fhigh.AsInt16(), Nine.AsInt16()); // Filter (hi) * 9 + + Vector128 a2Low = Sse2.Add(f9Low, SixtyThree.AsInt16()); // Filter * 9 + 63 + Vector128 a2High = Sse2.Add(f9High, SixtyThree.AsInt16()); // Filter * 9 + 63 + + Vector128 a1Low = Sse2.Add(a2Low, f9Low); // Filter * 18 + 63 + Vector128 a1High = Sse2.Add(a2High, f9High); // // Filter * 18 + 63 + + Vector128 a0Low = Sse2.Add(a1Low, f9Low); // Filter * 27 + 63 + Vector128 a0High = Sse2.Add(a1High, f9High); // Filter * 27 + 63 + + Update2Pixels(ref p2, ref q2, a2Low, a2High); + Update2Pixels(ref p1, ref q1, a1Low, a1High); + Update2Pixels(ref p0, ref q0, a0Low, a0High); + } + private static void DoSimpleFilterSse2(ref Vector128 p0, ref Vector128 q0, Vector128 fl) { Vector128 v3 = Sse2.AddSaturate(fl.AsSByte(), Three); @@ -1727,6 +1775,21 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy mask = Sse2.And(threshMask, filterMask); } + // Updates values of 2 pixels at MB edge during complex filtering. + // Update operations: + // q = q - delta and p = p + delta; where delta = [(a_hi >> 7), (a_lo >> 7)] + // Pixels 'pi' and 'qi' are int8_t on input, uint8_t on output (sign flip). + private static void Update2Pixels(ref Vector128 pi, ref Vector128 qi, Vector128 a0Low, Vector128 a0High) + { + Vector128 a1Low = Sse2.ShiftRightArithmetic(a0Low, 7); + Vector128 a1High = Sse2.ShiftRightArithmetic(a0High, 7); + Vector128 delta = Sse2.PackSignedSaturate(a1Low, a1High); + pi = Sse2.AddSaturate(pi.AsSByte(), delta).AsByte(); + qi = Sse2.SubtractSaturate(qi.AsSByte(), delta).AsByte(); + pi = Sse2.Xor(pi, SixtyFour.AsByte()); + qi = Sse2.Xor(qi, SignBit.AsByte()); + } + [MethodImpl(InliningOptions.ShortMethod)] private static Vector128 LoadUvEdge(ref byte uRef, ref byte vRef, int offset) {