diff --git a/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs b/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs index 6a89a1122a..0553eb46a9 100644 --- a/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs +++ b/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs @@ -22,9 +22,9 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters internal static class PaethFilter { /// - /// Decodes the scanline + /// Decodes a scanline, which was filtered with the paeth filter. /// - /// The scanline to decode + /// The scanline to decode. /// The previous scanline. /// The bytes per pixel. [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -32,6 +32,86 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters { DebugGuard.MustBeSameSized(scanline, previousScanline, nameof(scanline)); + // Paeth tries to predict pixel d using the pixel to the left of it, a, + // and two pixels from the previous row, b and c: + // prev: c b + // row: a d + // The Paeth function predicts d to be whichever of a, b, or c is nearest to + // p = a + b - c. +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse41.IsSupported && bytesPerPixel is 4) + { + DecodeSse41(scanline, previousScanline); + } + else +#endif + { + DecodeScalar(scanline, previousScanline, bytesPerPixel); + } + } + +#if SUPPORTS_RUNTIME_INTRINSICS + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void DecodeSse41(Span scanline, Span previousScanline) + { + ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline); + ref byte prevBaseRef = ref MemoryMarshal.GetReference(previousScanline); + + Vector128 b = Vector128.Zero; + Vector128 d = Vector128.Zero; + + int rb = scanline.Length; + nint offset = 1; + while (rb >= 4) + { + ref byte scanRef = ref Unsafe.Add(ref scanBaseRef, offset); + + // It's easiest to do this math (particularly, deal with pc) with 16-bit intermediates. + Vector128 c = b; + Vector128 a = d; + b = Sse2.UnpackLow( + Sse2.ConvertScalarToVector128Int32(Unsafe.As(ref Unsafe.Add(ref prevBaseRef, offset))).AsByte(), + Vector128.Zero); + d = Sse2.UnpackLow( + Sse2.ConvertScalarToVector128Int32(Unsafe.As(ref scanRef)).AsByte(), + Vector128.Zero); + + // (p-a) == (a+b-c - a) == (b-c) + Vector128 pa = Sse2.Subtract(b.AsInt16(), c.AsInt16()); + + // (p-b) == (a+b-c - b) == (a-c) + Vector128 pb = Sse2.Subtract(a.AsInt16(), c.AsInt16()); + + // (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c) + Vector128 pc = Sse2.Add(pa.AsInt16(), pb.AsInt16()); + + pa = Ssse3.Abs(pa.AsInt16()).AsInt16(); /* |p-a| */ + pb = Ssse3.Abs(pb.AsInt16()).AsInt16(); /* |p-b| */ + pc = Ssse3.Abs(pc.AsInt16()).AsInt16(); /* |p-c| */ + + Vector128 smallest = Sse2.Min(pc, Sse2.Min(pa, pb)); + + // Paeth breaks ties favoring a over b over c. + Vector128 mask = Sse41.BlendVariable(c, b, Sse2.CompareEqual(smallest, pb).AsByte()); + Vector128 nearest = Sse41.BlendVariable(mask, a, Sse2.CompareEqual(smallest, pa).AsByte()); + + // Note `_epi8`: we need addition to wrap modulo 255. + d = Sse2.Add(d, nearest); + + // Store the result. + Unsafe.As(ref scanRef) = Sse2.ConvertToInt32(Sse2.PackUnsignedSaturate(d.AsInt16(), d.AsInt16()).AsInt32()); + + rb -= 4; + offset += 4; + } + } + +#endif + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void DecodeScalar(Span scanline, Span previousScanline, int bytesPerPixel) + { ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline); ref byte prevBaseRef = ref MemoryMarshal.GetReference(previousScanline); @@ -56,13 +136,13 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters } /// - /// Encodes the scanline + /// Encodes a scanline and applies the paeth filter. /// /// The scanline to encode /// The previous scanline. /// The filtered scanline result. /// The bytes per pixel. - /// The sum of the total variance of the filtered row + /// The sum of the total variance of the filtered row. [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void Encode(ReadOnlySpan scanline, ReadOnlySpan previousScanline, Span result, int bytesPerPixel, out int sum) { diff --git a/src/ImageSharp/Formats/Png/Filters/SubFilter.cs b/src/ImageSharp/Formats/Png/Filters/SubFilter.cs index c28b877e41..eaa4dc0344 100644 --- a/src/ImageSharp/Formats/Png/Filters/SubFilter.cs +++ b/src/ImageSharp/Formats/Png/Filters/SubFilter.cs @@ -21,17 +21,57 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters internal static class SubFilter { /// - /// Decodes the scanline + /// Decodes a scanline, which was filtered with the sub filter. /// - /// The scanline to decode + /// The scanline to decode. /// The bytes per pixel. [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void Decode(Span scanline, int bytesPerPixel) + { + // The Sub filter predicts each pixel as the previous pixel. +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse2.IsSupported && bytesPerPixel is 4) + { + DecodeSse2(scanline); + } + else +#endif + { + DecodeScalar(scanline, bytesPerPixel); + } + } + +#if SUPPORTS_RUNTIME_INTRINSICS + private static void DecodeSse2(Span scanline) + { + ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline); + + Vector128 d = Vector128.Zero; + + int rb = scanline.Length; + nint offset = 1; + while (rb >= 4) + { + ref byte scanRef = ref Unsafe.Add(ref scanBaseRef, offset); + Vector128 a = d; + d = Sse2.ConvertScalarToVector128Int32(Unsafe.As(ref scanRef)).AsByte(); + + d = Sse2.Add(d, a); + + Unsafe.As(ref scanRef) = Sse2.ConvertToInt32(d.AsInt32()); + + rb -= 4; + offset += 4; + } + } +#endif + + private static void DecodeScalar(Span scanline, int bytesPerPixel) { ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline); // Sub(x) + Raw(x-bpp) - int x = bytesPerPixel + 1; + nint x = bytesPerPixel + 1; Unsafe.Add(ref scanBaseRef, x); for (; x < scanline.Length; ++x) { @@ -42,12 +82,12 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters } /// - /// Encodes the scanline + /// Encodes a scanline with the sup filter applied. /// - /// The scanline to encode + /// The scanline to encode. /// The filtered scanline result. /// The bytes per pixel. - /// The sum of the total variance of the filtered row + /// The sum of the total variance of the filtered row. [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void Encode(ReadOnlySpan scanline, ReadOnlySpan result, int bytesPerPixel, out int sum) { diff --git a/src/ImageSharp/Formats/Png/Filters/UpFilter.cs b/src/ImageSharp/Formats/Png/Filters/UpFilter.cs index 7e0286991b..0d24d9c5d5 100644 --- a/src/ImageSharp/Formats/Png/Filters/UpFilter.cs +++ b/src/ImageSharp/Formats/Png/Filters/UpFilter.cs @@ -21,7 +21,7 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters internal static class UpFilter { /// - /// Decodes the scanline + /// Decodes a scanline, which was filtered with the up filter. /// /// The scanline to decode /// The previous scanline. @@ -30,6 +30,91 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters { DebugGuard.MustBeSameSized(scanline, previousScanline, nameof(scanline)); +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx2.IsSupported) + { + DecodeAvx2(scanline, previousScanline); + } + else if (Sse2.IsSupported) + { + DecodeSse2(scanline, previousScanline); + } + else +#endif + { + DecodeScalar(scanline, previousScanline); + } + } + +#if SUPPORTS_RUNTIME_INTRINSICS + private static void DecodeAvx2(Span scanline, Span previousScanline) + { + ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline); + ref byte prevBaseRef = ref MemoryMarshal.GetReference(previousScanline); + + // Up(x) + Prior(x) + int rb = scanline.Length; + nint offset = 1; + const int bytesPerBatch = 32; + while (rb >= bytesPerBatch) + { + ref byte scanRef = ref Unsafe.Add(ref scanBaseRef, offset); + Vector256 current = Unsafe.As>(ref scanRef); + Vector256 up = Unsafe.As>(ref Unsafe.Add(ref prevBaseRef, offset)); + + Vector256 sum = Avx2.Add(up, current); + Unsafe.As>(ref scanRef) = sum; + + offset += bytesPerBatch; + rb -= bytesPerBatch; + } + + // Handle left over. + for (nint i = offset; i < scanline.Length; i++) + { + ref byte scan = ref Unsafe.Add(ref scanBaseRef, offset); + byte above = Unsafe.Add(ref prevBaseRef, offset); + scan = (byte)(scan + above); + offset++; + } + } + + private static void DecodeSse2(Span scanline, Span previousScanline) + { + ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline); + ref byte prevBaseRef = ref MemoryMarshal.GetReference(previousScanline); + + // Up(x) + Prior(x) + int rb = scanline.Length; + nint offset = 1; + const int bytesPerBatch = 16; + while (rb >= bytesPerBatch) + { + ref byte scanRef = ref Unsafe.Add(ref scanBaseRef, offset); + Vector128 current = Unsafe.As>(ref scanRef); + Vector128 up = Unsafe.As>(ref Unsafe.Add(ref prevBaseRef, offset)); + + Vector128 sum = Sse2.Add(up, current); + Unsafe.As>(ref scanRef) = sum; + + offset += bytesPerBatch; + rb -= bytesPerBatch; + } + + // Handle left over. + for (nint i = offset; i < scanline.Length; i++) + { + ref byte scan = ref Unsafe.Add(ref scanBaseRef, offset); + byte above = Unsafe.Add(ref prevBaseRef, offset); + scan = (byte)(scan + above); + offset++; + } + } +#endif + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void DecodeScalar(Span scanline, Span previousScanline) + { ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline); ref byte prevBaseRef = ref MemoryMarshal.GetReference(previousScanline); @@ -43,12 +128,12 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters } /// - /// Encodes the scanline + /// Encodes a scanline with the up filter applied. /// - /// The scanline to encode + /// The scanline to encode. /// The previous scanline. /// The filtered scanline result. - /// The sum of the total variance of the filtered row + /// The sum of the total variance of the filtered row. [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void Encode(ReadOnlySpan scanline, ReadOnlySpan previousScanline, Span result, out int sum) {