|
|
@ -6,6 +6,11 @@ using System.Numerics; |
|
|
using System.Runtime.CompilerServices; |
|
|
using System.Runtime.CompilerServices; |
|
|
using System.Runtime.InteropServices; |
|
|
using System.Runtime.InteropServices; |
|
|
|
|
|
|
|
|
|
|
|
#if SUPPORTS_RUNTIME_INTRINSICS
|
|
|
|
|
|
using System.Runtime.Intrinsics; |
|
|
|
|
|
using System.Runtime.Intrinsics.X86; |
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
namespace SixLabors.ImageSharp.Formats.Png.Filters |
|
|
namespace SixLabors.ImageSharp.Formats.Png.Filters |
|
|
{ |
|
|
{ |
|
|
/// <summary>
|
|
|
/// <summary>
|
|
|
@ -84,7 +89,30 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
#if SUPPORTS_RUNTIME_INTRINSICS
|
|
|
#if SUPPORTS_RUNTIME_INTRINSICS
|
|
|
if (Vector.IsHardwareAccelerated) |
|
|
if (Avx2.IsSupported) |
|
|
|
|
|
{ |
|
|
|
|
|
Vector256<int> sumAccumulator = Vector256<int>.Zero; |
|
|
|
|
|
|
|
|
|
|
|
for (int xLeft = x - bytesPerPixel; x + Vector256<byte>.Count <= scanline.Length; xLeft += Vector256<byte>.Count) |
|
|
|
|
|
{ |
|
|
|
|
|
Vector256<byte> scan = Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref scanBaseRef, x)); |
|
|
|
|
|
Vector256<byte> left = Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref scanBaseRef, xLeft)); |
|
|
|
|
|
Vector256<byte> above = Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref prevBaseRef, x)); |
|
|
|
|
|
Vector256<byte> upperLeft = Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref prevBaseRef, xLeft)); |
|
|
|
|
|
|
|
|
|
|
|
Vector256<byte> res = Avx2.Subtract(scan, PaethPredictor(left, above, upperLeft)); |
|
|
|
|
|
Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref resultBaseRef, x + 1)) = res; // +1 to skip filter type
|
|
|
|
|
|
x += Vector256<byte>.Count; |
|
|
|
|
|
|
|
|
|
|
|
sumAccumulator = Avx2.Add(sumAccumulator, Avx2.SumAbsoluteDifferences(Avx2.Abs(res.AsSByte()), Vector256<byte>.Zero).AsInt32()); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < Vector256<int>.Count; i++) |
|
|
|
|
|
{ |
|
|
|
|
|
sum += sumAccumulator.GetElement(i); |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
else if (Vector.IsHardwareAccelerated) |
|
|
{ |
|
|
{ |
|
|
Vector<uint> sumAccumulator = Vector<uint>.Zero; |
|
|
Vector<uint> sumAccumulator = Vector<uint>.Zero; |
|
|
|
|
|
|
|
|
@ -155,6 +183,39 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters |
|
|
return upperLeft; |
|
|
return upperLeft; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#if SUPPORTS_RUNTIME_INTRINSICS
|
|
|
|
|
|
private static Vector256<byte> PaethPredictor(Vector256<byte> left, Vector256<byte> above, Vector256<byte> upleft) |
|
|
|
|
|
{ |
|
|
|
|
|
Vector256<byte> zero = Vector256<byte>.Zero; |
|
|
|
|
|
|
|
|
|
|
|
// Here, we refactor pa = abs(p - left) = abs(left + above - upleft - left)
|
|
|
|
|
|
// to pa = abs(above - upleft). Same deal for pb.
|
|
|
|
|
|
// Using saturated subtraction, if the result is negative, the output is zero.
|
|
|
|
|
|
// If we subtract in both directions and `or` the results, only one can be
|
|
|
|
|
|
// non-zero, so we end up with the absolute value.
|
|
|
|
|
|
Vector256<byte> sac = Avx2.SubtractSaturate(above, upleft); |
|
|
|
|
|
Vector256<byte> sbc = Avx2.SubtractSaturate(left, upleft); |
|
|
|
|
|
Vector256<byte> pa = Avx2.Or(Avx2.SubtractSaturate(upleft, above), sac); |
|
|
|
|
|
Vector256<byte> pb = Avx2.Or(Avx2.SubtractSaturate(upleft, left), sbc); |
|
|
|
|
|
|
|
|
|
|
|
// pc = abs(left + above - upleft - upleft), or abs(left - upleft + above - upleft).
|
|
|
|
|
|
// We've already calculated left - upleft and above - upleft in `sac` and `sbc`.
|
|
|
|
|
|
// If they are both negative or both positive, the absolute value of their
|
|
|
|
|
|
// sum can't possibly be less than `pa` or `pb`, so we'll never use the value.
|
|
|
|
|
|
// We make a mask that sets the value to 255 if they either both got
|
|
|
|
|
|
// saturated to zero or both didn't. Then we calculate the absolute value
|
|
|
|
|
|
// of their difference using saturated subtract and `or`, same as before,
|
|
|
|
|
|
// keeping the value only where the mask isn't set.
|
|
|
|
|
|
Vector256<byte> pm = Avx2.CompareEqual(Avx2.CompareEqual(sac, zero), Avx2.CompareEqual(sbc, zero)); |
|
|
|
|
|
Vector256<byte> pc = Avx2.Or(pm, Avx2.Or(Avx2.SubtractSaturate(pb, pa), Avx2.SubtractSaturate(pa, pb))); |
|
|
|
|
|
|
|
|
|
|
|
// Finally, blend the values together. We start with `upleft` and overwrite on
|
|
|
|
|
|
// tied values so that the `left`, `above`, `upleft` precedence is preserved.
|
|
|
|
|
|
Vector256<byte> minbc = Avx2.Min(pc, pb); |
|
|
|
|
|
Vector256<byte> resbc = Avx2.BlendVariable(upleft, above, Avx2.CompareEqual(minbc, pb)); |
|
|
|
|
|
return Avx2.BlendVariable(resbc, left, Avx2.CompareEqual(Avx2.Min(minbc, pa), pa)); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
private static Vector<byte> PaethPredictor(Vector<byte> left, Vector<byte> above, Vector<byte> upperLeft) |
|
|
private static Vector<byte> PaethPredictor(Vector<byte> left, Vector<byte> above, Vector<byte> upperLeft) |
|
|
{ |
|
|
{ |
|
|
Vector.Widen(left, out Vector<ushort> a1, out Vector<ushort> a2); |
|
|
Vector.Widen(left, out Vector<ushort> a1, out Vector<ushort> a2); |
|
|
@ -185,5 +246,6 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters |
|
|
left: above, |
|
|
left: above, |
|
|
right: upperLeft)); |
|
|
right: upperLeft)); |
|
|
} |
|
|
} |
|
|
|
|
|
#endif
|
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|