diff --git a/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs b/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs index 05ecc74a7d..7fa8a6b745 100644 --- a/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs +++ b/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs @@ -6,6 +6,11 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif + namespace SixLabors.ImageSharp.Formats.Png.Filters { /// @@ -84,7 +89,30 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters } #if SUPPORTS_RUNTIME_INTRINSICS - if (Vector.IsHardwareAccelerated) + if (Avx2.IsSupported) + { + Vector256 sumAccumulator = Vector256.Zero; + + for (int xLeft = x - bytesPerPixel; x + Vector256.Count <= scanline.Length; xLeft += Vector256.Count) + { + Vector256 scan = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, x)); + Vector256 left = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, xLeft)); + Vector256 above = Unsafe.As>(ref Unsafe.Add(ref prevBaseRef, x)); + Vector256 upperLeft = Unsafe.As>(ref Unsafe.Add(ref prevBaseRef, xLeft)); + + Vector256 res = Avx2.Subtract(scan, PaethPredictor(left, above, upperLeft)); + Unsafe.As>(ref Unsafe.Add(ref resultBaseRef, x + 1)) = res; // +1 to skip filter type + x += Vector256.Count; + + sumAccumulator = Avx2.Add(sumAccumulator, Avx2.SumAbsoluteDifferences(Avx2.Abs(res.AsSByte()), Vector256.Zero).AsInt32()); + } + + for (int i = 0; i < Vector256.Count; i++) + { + sum += sumAccumulator.GetElement(i); + } + } + else if (Vector.IsHardwareAccelerated) { Vector sumAccumulator = Vector.Zero; @@ -155,6 +183,39 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters return upperLeft; } +#if SUPPORTS_RUNTIME_INTRINSICS + private static Vector256 PaethPredictor(Vector256 left, Vector256 above, Vector256 upleft) + { + Vector256 zero = Vector256.Zero; + + // Here, we refactor pa = abs(p - left) = abs(left + above - upleft - left) + // to pa = abs(above - upleft). Same deal for pb. + // Using saturated subtraction, if the result is negative, the output is zero. + // If we subtract in both directions and `or` the results, only one can be + // non-zero, so we end up with the absolute value. + Vector256 sac = Avx2.SubtractSaturate(above, upleft); + Vector256 sbc = Avx2.SubtractSaturate(left, upleft); + Vector256 pa = Avx2.Or(Avx2.SubtractSaturate(upleft, above), sac); + Vector256 pb = Avx2.Or(Avx2.SubtractSaturate(upleft, left), sbc); + + // pc = abs(left + above - upleft - upleft), or abs(left - upleft + above - upleft). + // We've already calculated left - upleft and above - upleft in `sac` and `sbc`. + // If they are both negative or both positive, the absolute value of their + // sum can't possibly be less than `pa` or `pb`, so we'll never use the value. + // We make a mask that sets the value to 255 if they either both got + // saturated to zero or both didn't. Then we calculate the absolute value + // of their difference using saturated subtract and `or`, same as before, + // keeping the value only where the mask isn't set. + Vector256 pm = Avx2.CompareEqual(Avx2.CompareEqual(sac, zero), Avx2.CompareEqual(sbc, zero)); + Vector256 pc = Avx2.Or(pm, Avx2.Or(Avx2.SubtractSaturate(pb, pa), Avx2.SubtractSaturate(pa, pb))); + + // Finally, blend the values together. We start with `upleft` and overwrite on + // tied values so that the `left`, `above`, `upleft` precedence is preserved. + Vector256 minbc = Avx2.Min(pc, pb); + Vector256 resbc = Avx2.BlendVariable(upleft, above, Avx2.CompareEqual(minbc, pb)); + return Avx2.BlendVariable(resbc, left, Avx2.CompareEqual(Avx2.Min(minbc, pa), pa)); + } + private static Vector PaethPredictor(Vector left, Vector above, Vector upperLeft) { Vector.Widen(left, out Vector a1, out Vector a2); @@ -185,5 +246,6 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters left: above, right: upperLeft)); } +#endif } }