From 8716c51d3044c40eaa46c6406f1f581afd699112 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Thu, 24 Feb 2022 11:53:50 +0100 Subject: [PATCH 1/5] Add SSE2 version of up filter --- .../Formats/Png/Filters/UpFilter.cs | 57 +++++++++++++++++-- 1 file changed, 53 insertions(+), 4 deletions(-) diff --git a/src/ImageSharp/Formats/Png/Filters/UpFilter.cs b/src/ImageSharp/Formats/Png/Filters/UpFilter.cs index 7e0286991b..2d2a442a19 100644 --- a/src/ImageSharp/Formats/Png/Filters/UpFilter.cs +++ b/src/ImageSharp/Formats/Png/Filters/UpFilter.cs @@ -21,7 +21,7 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters internal static class UpFilter { /// - /// Decodes the scanline + /// Decodes a scanline, which was filtered with the up filter. /// /// The scanline to decode /// The previous scanline. @@ -30,6 +30,55 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters { DebugGuard.MustBeSameSized(scanline, previousScanline, nameof(scanline)); +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse2.IsSupported) + { + DecodeSse2(scanline, previousScanline); + } + else +#endif + { + DecodeScalar(scanline, previousScanline); + } + } + +#if SUPPORTS_RUNTIME_INTRINSICS + private static void DecodeSse2(Span scanline, Span previousScanline) + { + ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline); + ref byte prevBaseRef = ref MemoryMarshal.GetReference(previousScanline); + + // Up(x) + Prior(x) + int rb = scanline.Length; + int offset = 1; + const int bytesPerBatch = 16; + while (rb >= bytesPerBatch) + { + ref byte scanRef = ref Unsafe.Add(ref scanBaseRef, offset); + Vector128 current = Unsafe.As>(ref scanRef); + Vector128 up = Unsafe.As>(ref Unsafe.Add(ref prevBaseRef, offset)); + + Vector128 sum = Sse2.Add(up, current); + Unsafe.As>(ref scanRef) = sum; + + offset += bytesPerBatch; + rb -= bytesPerBatch; + } + + // Handle left over. + for (int i = offset; i < scanline.Length; i++) + { + ref byte scan = ref Unsafe.Add(ref scanBaseRef, offset); + byte above = Unsafe.Add(ref prevBaseRef, offset); + scan = (byte)(scan + above); + offset++; + } + } +#endif + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void DecodeScalar(Span scanline, Span previousScanline) + { ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline); ref byte prevBaseRef = ref MemoryMarshal.GetReference(previousScanline); @@ -43,12 +92,12 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters } /// - /// Encodes the scanline + /// Encodes a scanline with the up filter applied. /// - /// The scanline to encode + /// The scanline to encode. /// The previous scanline. /// The filtered scanline result. - /// The sum of the total variance of the filtered row + /// The sum of the total variance of the filtered row. [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void Encode(ReadOnlySpan scanline, ReadOnlySpan previousScanline, Span result, out int sum) { From 0c8dbeae8c6854e75ccefa3902df85f01160f2dc Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Thu, 24 Feb 2022 13:58:16 +0100 Subject: [PATCH 2/5] Add Avx version of up filter --- .../Formats/Png/Filters/UpFilter.cs | 38 ++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/src/ImageSharp/Formats/Png/Filters/UpFilter.cs b/src/ImageSharp/Formats/Png/Filters/UpFilter.cs index 2d2a442a19..20a828e520 100644 --- a/src/ImageSharp/Formats/Png/Filters/UpFilter.cs +++ b/src/ImageSharp/Formats/Png/Filters/UpFilter.cs @@ -31,7 +31,11 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters DebugGuard.MustBeSameSized(scanline, previousScanline, nameof(scanline)); #if SUPPORTS_RUNTIME_INTRINSICS - if (Sse2.IsSupported) + if (Avx2.IsSupported) + { + DecodeAvx2(scanline, previousScanline); + } + else if (Sse2.IsSupported) { DecodeSse2(scanline, previousScanline); } @@ -43,6 +47,38 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters } #if SUPPORTS_RUNTIME_INTRINSICS + private static void DecodeAvx2(Span scanline, Span previousScanline) + { + ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline); + ref byte prevBaseRef = ref MemoryMarshal.GetReference(previousScanline); + + // Up(x) + Prior(x) + int rb = scanline.Length; + int offset = 1; + const int bytesPerBatch = 32; + while (rb >= bytesPerBatch) + { + ref byte scanRef = ref Unsafe.Add(ref scanBaseRef, offset); + Vector256 current = Unsafe.As>(ref scanRef); + Vector256 up = Unsafe.As>(ref Unsafe.Add(ref prevBaseRef, offset)); + + Vector256 sum = Avx2.Add(up, current); + Unsafe.As>(ref scanRef) = sum; + + offset += bytesPerBatch; + rb -= bytesPerBatch; + } + + // Handle left over. + for (int i = offset; i < scanline.Length; i++) + { + ref byte scan = ref Unsafe.Add(ref scanBaseRef, offset); + byte above = Unsafe.Add(ref prevBaseRef, offset); + scan = (byte)(scan + above); + offset++; + } + } + private static void DecodeSse2(Span scanline, Span previousScanline) { ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline); From 849e866221556474e4790e18ae6898e43de7e2f2 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Thu, 24 Feb 2022 16:22:29 +0100 Subject: [PATCH 3/5] Add SSE2 version of sub filter --- .../Formats/Png/Filters/SubFilter.cs | 50 +++++++++++++++++-- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/src/ImageSharp/Formats/Png/Filters/SubFilter.cs b/src/ImageSharp/Formats/Png/Filters/SubFilter.cs index c28b877e41..7985977548 100644 --- a/src/ImageSharp/Formats/Png/Filters/SubFilter.cs +++ b/src/ImageSharp/Formats/Png/Filters/SubFilter.cs @@ -21,12 +21,52 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters internal static class SubFilter { /// - /// Decodes the scanline + /// Decodes a scanline, which was filtered with the sub filter. /// - /// The scanline to decode + /// The scanline to decode. /// The bytes per pixel. [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void Decode(Span scanline, int bytesPerPixel) + { + // The Sub filter predicts each pixel as the previous pixel. +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse2.IsSupported && bytesPerPixel is 4) + { + DecodeSse2(scanline); + } + else +#endif + { + DecodeScalar(scanline, bytesPerPixel); + } + } + +#if SUPPORTS_RUNTIME_INTRINSICS + private static void DecodeSse2(Span scanline) + { + ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline); + + Vector128 d = Vector128.Zero; + + int rb = scanline.Length; + int offset = 1; + while (rb >= 4) + { + ref byte scanRef = ref Unsafe.Add(ref scanBaseRef, offset); + Vector128 a = d; + d = Sse2.ConvertScalarToVector128Int32(Unsafe.As(ref scanRef)).AsByte(); + + d = Sse2.Add(d, a); + + Unsafe.As(ref scanRef) = Sse2.ConvertToInt32(d.AsInt32()); + + rb -= 4; + offset += 4; + } + } +#endif + + private static void DecodeScalar(Span scanline, int bytesPerPixel) { ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline); @@ -42,12 +82,12 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters } /// - /// Encodes the scanline + /// Encodes a scanline with the sup filter applied. /// - /// The scanline to encode + /// The scanline to encode. /// The filtered scanline result. /// The bytes per pixel. - /// The sum of the total variance of the filtered row + /// The sum of the total variance of the filtered row. [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void Encode(ReadOnlySpan scanline, ReadOnlySpan result, int bytesPerPixel, out int sum) { From 8432b9fa4d76cfd13fad3221e77ab43f67ecc6cb Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Thu, 24 Feb 2022 18:29:07 +0100 Subject: [PATCH 4/5] Use nint for offset --- src/ImageSharp/Formats/Png/Filters/SubFilter.cs | 4 ++-- src/ImageSharp/Formats/Png/Filters/UpFilter.cs | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/ImageSharp/Formats/Png/Filters/SubFilter.cs b/src/ImageSharp/Formats/Png/Filters/SubFilter.cs index 7985977548..eaa4dc0344 100644 --- a/src/ImageSharp/Formats/Png/Filters/SubFilter.cs +++ b/src/ImageSharp/Formats/Png/Filters/SubFilter.cs @@ -49,7 +49,7 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters Vector128 d = Vector128.Zero; int rb = scanline.Length; - int offset = 1; + nint offset = 1; while (rb >= 4) { ref byte scanRef = ref Unsafe.Add(ref scanBaseRef, offset); @@ -71,7 +71,7 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline); // Sub(x) + Raw(x-bpp) - int x = bytesPerPixel + 1; + nint x = bytesPerPixel + 1; Unsafe.Add(ref scanBaseRef, x); for (; x < scanline.Length; ++x) { diff --git a/src/ImageSharp/Formats/Png/Filters/UpFilter.cs b/src/ImageSharp/Formats/Png/Filters/UpFilter.cs index 20a828e520..0d24d9c5d5 100644 --- a/src/ImageSharp/Formats/Png/Filters/UpFilter.cs +++ b/src/ImageSharp/Formats/Png/Filters/UpFilter.cs @@ -54,7 +54,7 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters // Up(x) + Prior(x) int rb = scanline.Length; - int offset = 1; + nint offset = 1; const int bytesPerBatch = 32; while (rb >= bytesPerBatch) { @@ -70,7 +70,7 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters } // Handle left over. - for (int i = offset; i < scanline.Length; i++) + for (nint i = offset; i < scanline.Length; i++) { ref byte scan = ref Unsafe.Add(ref scanBaseRef, offset); byte above = Unsafe.Add(ref prevBaseRef, offset); @@ -86,7 +86,7 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters // Up(x) + Prior(x) int rb = scanline.Length; - int offset = 1; + nint offset = 1; const int bytesPerBatch = 16; while (rb >= bytesPerBatch) { @@ -102,7 +102,7 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters } // Handle left over. - for (int i = offset; i < scanline.Length; i++) + for (nint i = offset; i < scanline.Length; i++) { ref byte scan = ref Unsafe.Add(ref scanBaseRef, offset); byte above = Unsafe.Add(ref prevBaseRef, offset); From 06843f2a7d81a00da7be04d615ef26dc0f5389e2 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Thu, 24 Feb 2022 23:01:53 +0100 Subject: [PATCH 5/5] Add SSE version of paeth filter --- .../Formats/Png/Filters/PaethFilter.cs | 88 ++++++++++++++++++- 1 file changed, 84 insertions(+), 4 deletions(-) diff --git a/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs b/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs index 6a89a1122a..0553eb46a9 100644 --- a/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs +++ b/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs @@ -22,9 +22,9 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters internal static class PaethFilter { /// - /// Decodes the scanline + /// Decodes a scanline, which was filtered with the paeth filter. /// - /// The scanline to decode + /// The scanline to decode. /// The previous scanline. /// The bytes per pixel. [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -32,6 +32,86 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters { DebugGuard.MustBeSameSized(scanline, previousScanline, nameof(scanline)); + // Paeth tries to predict pixel d using the pixel to the left of it, a, + // and two pixels from the previous row, b and c: + // prev: c b + // row: a d + // The Paeth function predicts d to be whichever of a, b, or c is nearest to + // p = a + b - c. +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse41.IsSupported && bytesPerPixel is 4) + { + DecodeSse41(scanline, previousScanline); + } + else +#endif + { + DecodeScalar(scanline, previousScanline, bytesPerPixel); + } + } + +#if SUPPORTS_RUNTIME_INTRINSICS + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void DecodeSse41(Span scanline, Span previousScanline) + { + ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline); + ref byte prevBaseRef = ref MemoryMarshal.GetReference(previousScanline); + + Vector128 b = Vector128.Zero; + Vector128 d = Vector128.Zero; + + int rb = scanline.Length; + nint offset = 1; + while (rb >= 4) + { + ref byte scanRef = ref Unsafe.Add(ref scanBaseRef, offset); + + // It's easiest to do this math (particularly, deal with pc) with 16-bit intermediates. + Vector128 c = b; + Vector128 a = d; + b = Sse2.UnpackLow( + Sse2.ConvertScalarToVector128Int32(Unsafe.As(ref Unsafe.Add(ref prevBaseRef, offset))).AsByte(), + Vector128.Zero); + d = Sse2.UnpackLow( + Sse2.ConvertScalarToVector128Int32(Unsafe.As(ref scanRef)).AsByte(), + Vector128.Zero); + + // (p-a) == (a+b-c - a) == (b-c) + Vector128 pa = Sse2.Subtract(b.AsInt16(), c.AsInt16()); + + // (p-b) == (a+b-c - b) == (a-c) + Vector128 pb = Sse2.Subtract(a.AsInt16(), c.AsInt16()); + + // (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c) + Vector128 pc = Sse2.Add(pa.AsInt16(), pb.AsInt16()); + + pa = Ssse3.Abs(pa.AsInt16()).AsInt16(); /* |p-a| */ + pb = Ssse3.Abs(pb.AsInt16()).AsInt16(); /* |p-b| */ + pc = Ssse3.Abs(pc.AsInt16()).AsInt16(); /* |p-c| */ + + Vector128 smallest = Sse2.Min(pc, Sse2.Min(pa, pb)); + + // Paeth breaks ties favoring a over b over c. + Vector128 mask = Sse41.BlendVariable(c, b, Sse2.CompareEqual(smallest, pb).AsByte()); + Vector128 nearest = Sse41.BlendVariable(mask, a, Sse2.CompareEqual(smallest, pa).AsByte()); + + // Note `_epi8`: we need addition to wrap modulo 255. + d = Sse2.Add(d, nearest); + + // Store the result. + Unsafe.As(ref scanRef) = Sse2.ConvertToInt32(Sse2.PackUnsignedSaturate(d.AsInt16(), d.AsInt16()).AsInt32()); + + rb -= 4; + offset += 4; + } + } + +#endif + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void DecodeScalar(Span scanline, Span previousScanline, int bytesPerPixel) + { ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline); ref byte prevBaseRef = ref MemoryMarshal.GetReference(previousScanline); @@ -56,13 +136,13 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters } /// - /// Encodes the scanline + /// Encodes a scanline and applies the paeth filter. /// /// The scanline to encode /// The previous scanline. /// The filtered scanline result. /// The bytes per pixel. - /// The sum of the total variance of the filtered row + /// The sum of the total variance of the filtered row. [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void Encode(ReadOnlySpan scanline, ReadOnlySpan previousScanline, Span result, int bytesPerPixel, out int sum) {