From 06843f2a7d81a00da7be04d615ef26dc0f5389e2 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Thu, 24 Feb 2022 23:01:53 +0100 Subject: [PATCH] Add SSE version of paeth filter --- .../Formats/Png/Filters/PaethFilter.cs | 88 ++++++++++++++++++- 1 file changed, 84 insertions(+), 4 deletions(-) diff --git a/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs b/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs index 6a89a1122a..0553eb46a9 100644 --- a/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs +++ b/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs @@ -22,9 +22,9 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters internal static class PaethFilter { /// - /// Decodes the scanline + /// Decodes a scanline, which was filtered with the paeth filter. /// - /// The scanline to decode + /// The scanline to decode. /// The previous scanline. /// The bytes per pixel. [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -32,6 +32,86 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters { DebugGuard.MustBeSameSized(scanline, previousScanline, nameof(scanline)); + // Paeth tries to predict pixel d using the pixel to the left of it, a, + // and two pixels from the previous row, b and c: + // prev: c b + // row: a d + // The Paeth function predicts d to be whichever of a, b, or c is nearest to + // p = a + b - c. +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse41.IsSupported && bytesPerPixel is 4) + { + DecodeSse41(scanline, previousScanline); + } + else +#endif + { + DecodeScalar(scanline, previousScanline, bytesPerPixel); + } + } + +#if SUPPORTS_RUNTIME_INTRINSICS + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void DecodeSse41(Span scanline, Span previousScanline) + { + ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline); + ref byte prevBaseRef = ref MemoryMarshal.GetReference(previousScanline); + + Vector128 b = Vector128.Zero; + Vector128 d = Vector128.Zero; + + int rb = scanline.Length; + nint offset = 1; + while (rb >= 4) + { + ref byte scanRef = ref Unsafe.Add(ref scanBaseRef, offset); + + // It's easiest to do this math (particularly, deal with pc) with 16-bit intermediates. + Vector128 c = b; + Vector128 a = d; + b = Sse2.UnpackLow( + Sse2.ConvertScalarToVector128Int32(Unsafe.As(ref Unsafe.Add(ref prevBaseRef, offset))).AsByte(), + Vector128.Zero); + d = Sse2.UnpackLow( + Sse2.ConvertScalarToVector128Int32(Unsafe.As(ref scanRef)).AsByte(), + Vector128.Zero); + + // (p-a) == (a+b-c - a) == (b-c) + Vector128 pa = Sse2.Subtract(b.AsInt16(), c.AsInt16()); + + // (p-b) == (a+b-c - b) == (a-c) + Vector128 pb = Sse2.Subtract(a.AsInt16(), c.AsInt16()); + + // (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c) + Vector128 pc = Sse2.Add(pa.AsInt16(), pb.AsInt16()); + + pa = Ssse3.Abs(pa.AsInt16()).AsInt16(); /* |p-a| */ + pb = Ssse3.Abs(pb.AsInt16()).AsInt16(); /* |p-b| */ + pc = Ssse3.Abs(pc.AsInt16()).AsInt16(); /* |p-c| */ + + Vector128 smallest = Sse2.Min(pc, Sse2.Min(pa, pb)); + + // Paeth breaks ties favoring a over b over c. + Vector128 mask = Sse41.BlendVariable(c, b, Sse2.CompareEqual(smallest, pb).AsByte()); + Vector128 nearest = Sse41.BlendVariable(mask, a, Sse2.CompareEqual(smallest, pa).AsByte()); + + // Note `_epi8`: we need addition to wrap modulo 255. + d = Sse2.Add(d, nearest); + + // Store the result. + Unsafe.As(ref scanRef) = Sse2.ConvertToInt32(Sse2.PackUnsignedSaturate(d.AsInt16(), d.AsInt16()).AsInt32()); + + rb -= 4; + offset += 4; + } + } + +#endif + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void DecodeScalar(Span scanline, Span previousScanline, int bytesPerPixel) + { ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline); ref byte prevBaseRef = ref MemoryMarshal.GetReference(previousScanline); @@ -56,13 +136,13 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters } /// - /// Encodes the scanline + /// Encodes a scanline and applies the paeth filter. /// /// The scanline to encode /// The previous scanline. /// The filtered scanline result. /// The bytes per pixel. - /// The sum of the total variance of the filtered row + /// The sum of the total variance of the filtered row. [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void Encode(ReadOnlySpan scanline, ReadOnlySpan previousScanline, Span result, int bytesPerPixel, out int sum) {