diff --git a/src/ImageSharp/Formats/Png/Filters/SubFilter.cs b/src/ImageSharp/Formats/Png/Filters/SubFilter.cs
index c28b877e41..7985977548 100644
--- a/src/ImageSharp/Formats/Png/Filters/SubFilter.cs
+++ b/src/ImageSharp/Formats/Png/Filters/SubFilter.cs
@@ -21,12 +21,52 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters
internal static class SubFilter
{
///
- /// Decodes the scanline
+ /// Decodes a scanline, which was filtered with the sub filter.
///
- /// The scanline to decode
+ /// The scanline to decode.
/// The bytes per pixel.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void Decode(Span scanline, int bytesPerPixel)
+ {
+ // The Sub filter predicts each pixel as the previous pixel.
+#if SUPPORTS_RUNTIME_INTRINSICS
+ if (Sse2.IsSupported && bytesPerPixel is 4)
+ {
+ DecodeSse2(scanline);
+ }
+ else
+#endif
+ {
+ DecodeScalar(scanline, bytesPerPixel);
+ }
+ }
+
+#if SUPPORTS_RUNTIME_INTRINSICS
+ private static void DecodeSse2(Span scanline)
+ {
+ ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline);
+
+ Vector128 d = Vector128.Zero;
+
+ int rb = scanline.Length;
+ int offset = 1;
+ while (rb >= 4)
+ {
+ ref byte scanRef = ref Unsafe.Add(ref scanBaseRef, offset);
+ Vector128 a = d;
+ d = Sse2.ConvertScalarToVector128Int32(Unsafe.As(ref scanRef)).AsByte();
+
+ d = Sse2.Add(d, a);
+
+ Unsafe.As(ref scanRef) = Sse2.ConvertToInt32(d.AsInt32());
+
+ rb -= 4;
+ offset += 4;
+ }
+ }
+#endif
+
+ private static void DecodeScalar(Span scanline, int bytesPerPixel)
{
ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline);
@@ -42,12 +82,12 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters
}
///
- /// Encodes the scanline
+ /// Encodes a scanline with the sup filter applied.
///
- /// The scanline to encode
+ /// The scanline to encode.
/// The filtered scanline result.
/// The bytes per pixel.
- /// The sum of the total variance of the filtered row
+ /// The sum of the total variance of the filtered row.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void Encode(ReadOnlySpan scanline, ReadOnlySpan result, int bytesPerPixel, out int sum)
{