diff --git a/src/ImageSharp/Common/Helpers/Numerics.cs b/src/ImageSharp/Common/Helpers/Numerics.cs
index 6105422372..0581993014 100644
--- a/src/ImageSharp/Common/Helpers/Numerics.cs
+++ b/src/ImageSharp/Common/Helpers/Numerics.cs
@@ -748,5 +748,82 @@ namespace SixLabors.ImageSharp
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float Lerp(float value1, float value2, float amount)
=> ((value2 - value1) * amount) + value1;
+
+#if SUPPORTS_RUNTIME_INTRINSICS
+
+ ///
+ /// Accumulates 8-bit integers into by
+ /// widening them to 32-bit integers and performing four additions.
+ ///
+ ///
+ /// byte(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)
+ /// is widened and added onto as such:
+ ///
+ /// accumulator += i32(1, 2, 3, 4);
+ /// accumulator += i32(5, 6, 7, 8);
+ /// accumulator += i32(9, 10, 11, 12);
+ /// accumulator += i32(13, 14, 15, 16);
+ ///
+ ///
+ /// The accumulator destination.
+ /// The values to accumulate.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static void Accumulate(ref Vector accumulator, Vector values)
+ {
+ Vector.Widen(values, out Vector shortLow, out Vector shortHigh);
+
+ Vector.Widen(shortLow, out Vector intLow, out Vector intHigh);
+ accumulator += intLow;
+ accumulator += intHigh;
+
+ Vector.Widen(shortHigh, out intLow, out intHigh);
+ accumulator += intLow;
+ accumulator += intHigh;
+ }
+
+ ///
+ /// Reduces elements of the vector into one sum.
+ ///
+ /// The accumulator to reduce.
+ /// The sum of all elements.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int ReduceSum(Vector128 accumulator)
+ {
+ if (Ssse3.IsSupported)
+ {
+ Vector128 hadd = Ssse3.HorizontalAdd(accumulator, accumulator);
+ Vector128 swapped = Sse2.Shuffle(hadd, 0x1);
+ Vector128 tmp = Sse2.Add(hadd, swapped);
+
+ // Vector128.ToScalar() isn't optimized pre-net5.0 https://github.com/dotnet/runtime/pull/37882
+ return Sse2.ConvertToInt32(tmp);
+ }
+ else
+ {
+ int sum = 0;
+ for (int i = 0; i < Vector128.Count; i++)
+ {
+ sum += accumulator.GetElement(i);
+ }
+
+ return sum;
+ }
+ }
+
+ ///
+ /// Reduces even elements of the vector into one sum.
+ ///
+ /// The accumulator to reduce.
+ /// The sum of even elements.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int EvenReduceSum(Vector256 accumulator)
+ {
+ Vector128 vsum = Sse2.Add(accumulator.GetLower(), accumulator.GetUpper()); // add upper lane to lower lane
+ vsum = Sse2.Add(vsum, Sse2.Shuffle(vsum, 0b_11_10_11_10)); // add high to low
+
+ // Vector128.ToScalar() isn't optimized pre-net5.0 https://github.com/dotnet/runtime/pull/37882
+ return Sse2.ConvertToInt32(vsum);
+ }
+#endif
}
}
diff --git a/src/ImageSharp/Formats/Png/Filters/AverageFilter.cs b/src/ImageSharp/Formats/Png/Filters/AverageFilter.cs
index d1c214e3d6..0ab1413974 100644
--- a/src/ImageSharp/Formats/Png/Filters/AverageFilter.cs
+++ b/src/ImageSharp/Formats/Png/Filters/AverageFilter.cs
@@ -5,6 +5,11 @@ using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
+#if SUPPORTS_RUNTIME_INTRINSICS
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+#endif
+
namespace SixLabors.ImageSharp.Formats.Png.Filters
{
///
@@ -79,6 +84,79 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters
sum += Numerics.Abs(unchecked((sbyte)res));
}
+#if SUPPORTS_RUNTIME_INTRINSICS
+ if (Avx2.IsSupported)
+ {
+ Vector256 zero = Vector256.Zero;
+ Vector256 sumAccumulator = Vector256.Zero;
+ Vector256 allBitsSet = Avx2.CompareEqual(sumAccumulator, sumAccumulator).AsByte();
+
+ for (int xLeft = x - bytesPerPixel; x + Vector256.Count <= scanline.Length; xLeft += Vector256.Count)
+ {
+ Vector256 scan = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, x));
+ Vector256 left = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, xLeft));
+ Vector256 above = Unsafe.As>(ref Unsafe.Add(ref prevBaseRef, x));
+
+ Vector256 avg = Avx2.Xor(Avx2.Average(Avx2.Xor(left, allBitsSet), Avx2.Xor(above, allBitsSet)), allBitsSet);
+ Vector256 res = Avx2.Subtract(scan, avg);
+
+ Unsafe.As>(ref Unsafe.Add(ref resultBaseRef, x + 1)) = res; // +1 to skip filter type
+ x += Vector256.Count;
+
+ sumAccumulator = Avx2.Add(sumAccumulator, Avx2.SumAbsoluteDifferences(Avx2.Abs(res.AsSByte()), zero).AsInt32());
+ }
+
+ sum += Numerics.EvenReduceSum(sumAccumulator);
+ }
+ else if (Sse2.IsSupported)
+ {
+ Vector128 zero8 = Vector128.Zero;
+ Vector128 zero16 = Vector128.Zero;
+ Vector128 sumAccumulator = Vector128.Zero;
+ Vector128 allBitsSet = Sse2.CompareEqual(sumAccumulator, sumAccumulator).AsByte();
+
+ for (int xLeft = x - bytesPerPixel; x + Vector128.Count <= scanline.Length; xLeft += Vector128.Count)
+ {
+ Vector128 scan = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, x));
+ Vector128 left = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, xLeft));
+ Vector128 above = Unsafe.As>(ref Unsafe.Add(ref prevBaseRef, x));
+
+ Vector128 avg = Sse2.Xor(Sse2.Average(Sse2.Xor(left, allBitsSet), Sse2.Xor(above, allBitsSet)), allBitsSet);
+ Vector128 res = Sse2.Subtract(scan, avg);
+
+ Unsafe.As>(ref Unsafe.Add(ref resultBaseRef, x + 1)) = res; // +1 to skip filter type
+ x += Vector128.Count;
+
+ Vector128 absRes;
+ if (Ssse3.IsSupported)
+ {
+ absRes = Ssse3.Abs(res.AsSByte()).AsSByte();
+ }
+ else
+ {
+ Vector128 mask = Sse2.CompareGreaterThan(res.AsSByte(), zero8);
+ mask = Sse2.Xor(mask, allBitsSet.AsSByte());
+ absRes = Sse2.Xor(Sse2.Add(res.AsSByte(), mask), mask);
+ }
+
+ Vector128 loRes16 = Sse2.UnpackLow(absRes, zero8).AsInt16();
+ Vector128 hiRes16 = Sse2.UnpackHigh(absRes, zero8).AsInt16();
+
+ Vector128 loRes32 = Sse2.UnpackLow(loRes16, zero16).AsInt32();
+ Vector128 hiRes32 = Sse2.UnpackHigh(loRes16, zero16).AsInt32();
+ sumAccumulator = Sse2.Add(sumAccumulator, loRes32);
+ sumAccumulator = Sse2.Add(sumAccumulator, hiRes32);
+
+ loRes32 = Sse2.UnpackLow(hiRes16, zero16).AsInt32();
+ hiRes32 = Sse2.UnpackHigh(hiRes16, zero16).AsInt32();
+ sumAccumulator = Sse2.Add(sumAccumulator, loRes32);
+ sumAccumulator = Sse2.Add(sumAccumulator, hiRes32);
+ }
+
+ sum += Numerics.ReduceSum(sumAccumulator);
+ }
+#endif
+
for (int xLeft = x - bytesPerPixel; x < scanline.Length; ++xLeft /* Note: ++x happens in the body to avoid one add operation */)
{
byte scan = Unsafe.Add(ref scanBaseRef, x);
diff --git a/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs b/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs
index fab6788061..e8e0aa7043 100644
--- a/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs
+++ b/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs
@@ -2,9 +2,15 @@
// Licensed under the Apache License, Version 2.0.
using System;
+using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
+#if SUPPORTS_RUNTIME_INTRINSICS
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+#endif
+
namespace SixLabors.ImageSharp.Formats.Png.Filters
{
///
@@ -82,6 +88,53 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters
sum += Numerics.Abs(unchecked((sbyte)res));
}
+#if SUPPORTS_RUNTIME_INTRINSICS
+ if (Avx2.IsSupported)
+ {
+ Vector256 zero = Vector256.Zero;
+ Vector256 sumAccumulator = Vector256.Zero;
+
+ for (int xLeft = x - bytesPerPixel; x + Vector256.Count <= scanline.Length; xLeft += Vector256.Count)
+ {
+ Vector256 scan = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, x));
+ Vector256 left = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, xLeft));
+ Vector256 above = Unsafe.As>(ref Unsafe.Add(ref prevBaseRef, x));
+ Vector256 upperLeft = Unsafe.As>(ref Unsafe.Add(ref prevBaseRef, xLeft));
+
+ Vector256 res = Avx2.Subtract(scan, PaethPredictor(left, above, upperLeft));
+ Unsafe.As>(ref Unsafe.Add(ref resultBaseRef, x + 1)) = res; // +1 to skip filter type
+ x += Vector256.Count;
+
+ sumAccumulator = Avx2.Add(sumAccumulator, Avx2.SumAbsoluteDifferences(Avx2.Abs(res.AsSByte()), zero).AsInt32());
+ }
+
+ sum += Numerics.EvenReduceSum(sumAccumulator);
+ }
+ else if (Vector.IsHardwareAccelerated)
+ {
+ Vector sumAccumulator = Vector.Zero;
+
+ for (int xLeft = x - bytesPerPixel; x + Vector.Count <= scanline.Length; xLeft += Vector.Count)
+ {
+ Vector scan = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, x));
+ Vector left = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, xLeft));
+ Vector above = Unsafe.As>(ref Unsafe.Add(ref prevBaseRef, x));
+ Vector upperLeft = Unsafe.As>(ref Unsafe.Add(ref prevBaseRef, xLeft));
+
+ Vector res = scan - PaethPredictor(left, above, upperLeft);
+ Unsafe.As>(ref Unsafe.Add(ref resultBaseRef, x + 1)) = res; // +1 to skip filter type
+ x += Vector.Count;
+
+ Numerics.Accumulate(ref sumAccumulator, Vector.AsVectorByte(Vector.Abs(Vector.AsVectorSByte(res))));
+ }
+
+ for (int i = 0; i < Vector.Count; i++)
+ {
+ sum += (int)sumAccumulator[i];
+ }
+ }
+#endif
+
for (int xLeft = x - bytesPerPixel; x < scanline.Length; ++xLeft /* Note: ++x happens in the body to avoid one add operation */)
{
byte scan = Unsafe.Add(ref scanBaseRef, x);
@@ -127,5 +180,70 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters
return upperLeft;
}
+
+#if SUPPORTS_RUNTIME_INTRINSICS
+ private static Vector256 PaethPredictor(Vector256 left, Vector256 above, Vector256 upleft)
+ {
+ Vector256 zero = Vector256.Zero;
+
+ // Here, we refactor pa = abs(p - left) = abs(left + above - upleft - left)
+ // to pa = abs(above - upleft). Same deal for pb.
+ // Using saturated subtraction, if the result is negative, the output is zero.
+ // If we subtract in both directions and `or` the results, only one can be
+ // non-zero, so we end up with the absolute value.
+ Vector256 sac = Avx2.SubtractSaturate(above, upleft);
+ Vector256 sbc = Avx2.SubtractSaturate(left, upleft);
+ Vector256 pa = Avx2.Or(Avx2.SubtractSaturate(upleft, above), sac);
+ Vector256 pb = Avx2.Or(Avx2.SubtractSaturate(upleft, left), sbc);
+
+ // pc = abs(left + above - upleft - upleft), or abs(left - upleft + above - upleft).
+ // We've already calculated left - upleft and above - upleft in `sac` and `sbc`.
+ // If they are both negative or both positive, the absolute value of their
+ // sum can't possibly be less than `pa` or `pb`, so we'll never use the value.
+ // We make a mask that sets the value to 255 if they either both got
+ // saturated to zero or both didn't. Then we calculate the absolute value
+ // of their difference using saturated subtract and `or`, same as before,
+ // keeping the value only where the mask isn't set.
+ Vector256 pm = Avx2.CompareEqual(Avx2.CompareEqual(sac, zero), Avx2.CompareEqual(sbc, zero));
+ Vector256 pc = Avx2.Or(pm, Avx2.Or(Avx2.SubtractSaturate(pb, pa), Avx2.SubtractSaturate(pa, pb)));
+
+ // Finally, blend the values together. We start with `upleft` and overwrite on
+ // tied values so that the `left`, `above`, `upleft` precedence is preserved.
+ Vector256 minbc = Avx2.Min(pc, pb);
+ Vector256 resbc = Avx2.BlendVariable(upleft, above, Avx2.CompareEqual(minbc, pb));
+ return Avx2.BlendVariable(resbc, left, Avx2.CompareEqual(Avx2.Min(minbc, pa), pa));
+ }
+
+ private static Vector PaethPredictor(Vector left, Vector above, Vector upperLeft)
+ {
+ Vector.Widen(left, out Vector a1, out Vector a2);
+ Vector.Widen(above, out Vector b1, out Vector b2);
+ Vector.Widen(upperLeft, out Vector c1, out Vector c2);
+
+ Vector p1 = PaethPredictor(Vector.AsVectorInt16(a1), Vector.AsVectorInt16(b1), Vector.AsVectorInt16(c1));
+ Vector p2 = PaethPredictor(Vector.AsVectorInt16(a2), Vector.AsVectorInt16(b2), Vector.AsVectorInt16(c2));
+ return Vector.AsVectorByte(Vector.Narrow(p1, p2));
+ }
+
+ private static Vector PaethPredictor(Vector left, Vector above, Vector upperLeft)
+ {
+ Vector p = left + above - upperLeft;
+ var pa = Vector.Abs(p - left);
+ var pb = Vector.Abs(p - above);
+ var pc = Vector.Abs(p - upperLeft);
+
+ var pa_pb = Vector.LessThanOrEqual(pa, pb);
+ var pa_pc = Vector.LessThanOrEqual(pa, pc);
+ var pb_pc = Vector.LessThanOrEqual(pb, pc);
+
+ return Vector.ConditionalSelect(
+ condition: Vector.BitwiseAnd(pa_pb, pa_pc),
+ left: left,
+ right: Vector.ConditionalSelect(
+ condition: pb_pc,
+ left: above,
+ right: upperLeft));
+ }
+#endif
}
}
diff --git a/src/ImageSharp/Formats/Png/Filters/SubFilter.cs b/src/ImageSharp/Formats/Png/Filters/SubFilter.cs
index cb4cfb471f..116154836e 100644
--- a/src/ImageSharp/Formats/Png/Filters/SubFilter.cs
+++ b/src/ImageSharp/Formats/Png/Filters/SubFilter.cs
@@ -2,9 +2,15 @@
// Licensed under the Apache License, Version 2.0.
using System;
+using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
+#if SUPPORTS_RUNTIME_INTRINSICS
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+#endif
+
namespace SixLabors.ImageSharp.Formats.Png.Filters
{
///
@@ -64,6 +70,49 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters
sum += Numerics.Abs(unchecked((sbyte)res));
}
+#if SUPPORTS_RUNTIME_INTRINSICS
+ if (Avx2.IsSupported)
+ {
+ Vector256 zero = Vector256.Zero;
+ Vector256 sumAccumulator = Vector256.Zero;
+
+ for (int xLeft = x - bytesPerPixel; x + Vector256.Count <= scanline.Length; xLeft += Vector256.Count)
+ {
+ Vector256 scan = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, x));
+ Vector256 prev = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, xLeft));
+
+ Vector256 res = Avx2.Subtract(scan, prev);
+ Unsafe.As>(ref Unsafe.Add(ref resultBaseRef, x + 1)) = res; // +1 to skip filter type
+ x += Vector256.Count;
+
+ sumAccumulator = Avx2.Add(sumAccumulator, Avx2.SumAbsoluteDifferences(Avx2.Abs(res.AsSByte()), zero).AsInt32());
+ }
+
+ sum += Numerics.EvenReduceSum(sumAccumulator);
+ }
+ else if (Vector.IsHardwareAccelerated)
+ {
+ Vector sumAccumulator = Vector.Zero;
+
+ for (int xLeft = x - bytesPerPixel; x + Vector.Count <= scanline.Length; xLeft += Vector.Count)
+ {
+ Vector scan = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, x));
+ Vector prev = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, xLeft));
+
+ Vector res = scan - prev;
+ Unsafe.As>(ref Unsafe.Add(ref resultBaseRef, x + 1)) = res; // +1 to skip filter type
+ x += Vector.Count;
+
+ Numerics.Accumulate(ref sumAccumulator, Vector.AsVectorByte(Vector.Abs(Vector.AsVectorSByte(res))));
+ }
+
+ for (int i = 0; i < Vector.Count; i++)
+ {
+ sum += (int)sumAccumulator[i];
+ }
+ }
+#endif
+
for (int xLeft = x - bytesPerPixel; x < scanline.Length; ++xLeft /* Note: ++x happens in the body to avoid one add operation */)
{
byte scan = Unsafe.Add(ref scanBaseRef, x);
diff --git a/src/ImageSharp/Formats/Png/Filters/UpFilter.cs b/src/ImageSharp/Formats/Png/Filters/UpFilter.cs
index cf553cbb68..e0f35293a4 100644
--- a/src/ImageSharp/Formats/Png/Filters/UpFilter.cs
+++ b/src/ImageSharp/Formats/Png/Filters/UpFilter.cs
@@ -1,10 +1,16 @@
-// Copyright (c) Six Labors.
+// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
+using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
+#if SUPPORTS_RUNTIME_INTRINSICS
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+#endif
+
namespace SixLabors.ImageSharp.Formats.Png.Filters
{
///
@@ -57,7 +63,52 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters
// Up(x) = Raw(x) - Prior(x)
resultBaseRef = 2;
- for (int x = 0; x < scanline.Length; /* Note: ++x happens in the body to avoid one add operation */)
+ int x = 0;
+
+#if SUPPORTS_RUNTIME_INTRINSICS
+ if (Avx2.IsSupported)
+ {
+ Vector256 zero = Vector256.Zero;
+ Vector256 sumAccumulator = Vector256.Zero;
+
+ for (; x + Vector256.Count <= scanline.Length;)
+ {
+ Vector256 scan = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, x));
+ Vector256 above = Unsafe.As>(ref Unsafe.Add(ref prevBaseRef, x));
+
+ Vector256 res = Avx2.Subtract(scan, above);
+ Unsafe.As>(ref Unsafe.Add(ref resultBaseRef, x + 1)) = res; // +1 to skip filter type
+ x += Vector256.Count;
+
+ sumAccumulator = Avx2.Add(sumAccumulator, Avx2.SumAbsoluteDifferences(Avx2.Abs(res.AsSByte()), zero).AsInt32());
+ }
+
+ sum += Numerics.EvenReduceSum(sumAccumulator);
+ }
+ else if (Vector.IsHardwareAccelerated)
+ {
+ Vector sumAccumulator = Vector.Zero;
+
+ for (; x + Vector.Count <= scanline.Length;)
+ {
+ Vector scan = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, x));
+ Vector above = Unsafe.As>(ref Unsafe.Add(ref prevBaseRef, x));
+
+ Vector res = scan - above;
+ Unsafe.As>(ref Unsafe.Add(ref resultBaseRef, x + 1)) = res; // +1 to skip filter type
+ x += Vector.Count;
+
+ Numerics.Accumulate(ref sumAccumulator, Vector.AsVectorByte(Vector.Abs(Vector.AsVectorSByte(res))));
+ }
+
+ for (int i = 0; i < Vector.Count; i++)
+ {
+ sum += (int)sumAccumulator[i];
+ }
+ }
+#endif
+
+ for (; x < scanline.Length; /* Note: ++x happens in the body to avoid one add operation */)
{
byte scan = Unsafe.Add(ref scanBaseRef, x);
byte above = Unsafe.Add(ref prevBaseRef, x);
diff --git a/tests/ImageSharp.Tests/Formats/Png/PngFilterTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngFilterTests.cs
new file mode 100644
index 0000000000..5f7b4f8327
--- /dev/null
+++ b/tests/ImageSharp.Tests/Formats/Png/PngFilterTests.cs
@@ -0,0 +1,313 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+// Uncomment this to turn unit tests into benchmarks:
+// #define BENCHMARKING
+using System;
+
+using SixLabors.ImageSharp.Formats.Png;
+using SixLabors.ImageSharp.Formats.Png.Filters;
+using SixLabors.ImageSharp.Tests.Formats.Png.Utils;
+using SixLabors.ImageSharp.Tests.TestUtilities;
+using Xunit;
+using Xunit.Abstractions;
+
+namespace SixLabors.ImageSharp.Tests.Formats.Png
+{
+ [Trait("Format", "Png")]
+ public partial class PngFilterTests : MeasureFixture
+ {
+#if BENCHMARKING
+ public const int Times = 1000000;
+#else
+ public const int Times = 1;
+#endif
+
+ public PngFilterTests(ITestOutputHelper output)
+ : base(output)
+ {
+ }
+
+ public const int Size = 64;
+
+ [Fact]
+ public void Average()
+ {
+ static void RunTest()
+ {
+ var data = new TestData(PngFilterMethod.Average, Size);
+ data.TestFilter();
+ }
+
+ FeatureTestRunner.RunWithHwIntrinsicsFeature(
+ RunTest,
+ HwIntrinsics.DisableSIMD);
+ }
+
+ [Fact]
+ public void AverageSse2()
+ {
+ static void RunTest()
+ {
+ var data = new TestData(PngFilterMethod.Average, Size);
+ data.TestFilter();
+ }
+
+ FeatureTestRunner.RunWithHwIntrinsicsFeature(
+ RunTest,
+ HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSSE3);
+ }
+
+ [Fact]
+ public void AverageSsse3()
+ {
+ static void RunTest()
+ {
+ var data = new TestData(PngFilterMethod.Average, Size);
+ data.TestFilter();
+ }
+
+ FeatureTestRunner.RunWithHwIntrinsicsFeature(
+ RunTest,
+ HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2);
+ }
+
+ [Fact]
+ public void AverageAvx2()
+ {
+ static void RunTest()
+ {
+ var data = new TestData(PngFilterMethod.Average, Size);
+ data.TestFilter();
+ }
+
+ FeatureTestRunner.RunWithHwIntrinsicsFeature(
+ RunTest,
+ HwIntrinsics.AllowAll);
+ }
+
+ [Fact]
+ public void Paeth()
+ {
+ static void RunTest()
+ {
+ var data = new TestData(PngFilterMethod.Paeth, Size);
+ data.TestFilter();
+ }
+
+ FeatureTestRunner.RunWithHwIntrinsicsFeature(
+ RunTest,
+ HwIntrinsics.DisableSIMD);
+ }
+
+ [Fact]
+ public void PaethAvx2()
+ {
+ static void RunTest()
+ {
+ var data = new TestData(PngFilterMethod.Paeth, Size);
+ data.TestFilter();
+ }
+
+ FeatureTestRunner.RunWithHwIntrinsicsFeature(
+ RunTest,
+ HwIntrinsics.AllowAll);
+ }
+
+ [Fact]
+ public void PaethVector()
+ {
+ static void RunTest()
+ {
+ var data = new TestData(PngFilterMethod.Paeth, Size);
+ data.TestFilter();
+ }
+
+ FeatureTestRunner.RunWithHwIntrinsicsFeature(
+ RunTest,
+ HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2);
+ }
+
+ [Fact]
+ public void Up()
+ {
+ static void RunTest()
+ {
+ var data = new TestData(PngFilterMethod.Up, Size);
+ data.TestFilter();
+ }
+
+ FeatureTestRunner.RunWithHwIntrinsicsFeature(
+ RunTest,
+ HwIntrinsics.DisableSIMD);
+ }
+
+
+ [Fact]
+ public void UpAvx2()
+ {
+ static void RunTest()
+ {
+ var data = new TestData(PngFilterMethod.Up, Size);
+ data.TestFilter();
+ }
+
+ FeatureTestRunner.RunWithHwIntrinsicsFeature(
+ RunTest,
+ HwIntrinsics.AllowAll);
+ }
+
+ [Fact]
+ public void UpVector()
+ {
+ static void RunTest()
+ {
+ var data = new TestData(PngFilterMethod.Up, Size);
+ data.TestFilter();
+ }
+
+ FeatureTestRunner.RunWithHwIntrinsicsFeature(
+ RunTest,
+ HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2);
+ }
+
+ [Fact]
+ public void Sub()
+ {
+ static void RunTest()
+ {
+ var data = new TestData(PngFilterMethod.Sub, Size);
+ data.TestFilter();
+ }
+
+ FeatureTestRunner.RunWithHwIntrinsicsFeature(
+ RunTest,
+ HwIntrinsics.DisableSIMD);
+ }
+
+ [Fact]
+ public void SubAvx2()
+ {
+ static void RunTest()
+ {
+ var data = new TestData(PngFilterMethod.Sub, Size);
+ data.TestFilter();
+ }
+
+ FeatureTestRunner.RunWithHwIntrinsicsFeature(
+ RunTest,
+ HwIntrinsics.AllowAll);
+ }
+
+ [Fact]
+ public void SubVector()
+ {
+ static void RunTest()
+ {
+ var data = new TestData(PngFilterMethod.Sub, Size);
+ data.TestFilter();
+ }
+
+ FeatureTestRunner.RunWithHwIntrinsicsFeature(
+ RunTest,
+ HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2);
+ }
+
+ public class TestData
+ {
+ private readonly PngFilterMethod filter;
+ private readonly int bpp;
+ private readonly byte[] previousScanline;
+ private readonly byte[] scanline;
+ private readonly byte[] expectedResult;
+ private readonly int expectedSum;
+ private readonly byte[] resultBuffer;
+
+ public TestData(PngFilterMethod filter, int size, int bpp = 4)
+ {
+ this.filter = filter;
+ this.bpp = bpp;
+ this.previousScanline = new byte[size * size * bpp];
+ this.scanline = new byte[size * size * bpp];
+ this.expectedResult = new byte[1 + (size * size * bpp)];
+ this.resultBuffer = new byte[1 + (size * size * bpp)];
+
+ var rng = new Random(12345678);
+ byte[] tmp = new byte[6];
+ for (int i = 0; i < this.previousScanline.Length; i += bpp)
+ {
+ rng.NextBytes(tmp);
+
+ this.previousScanline[i + 0] = tmp[0];
+ this.previousScanline[i + 1] = tmp[1];
+ this.previousScanline[i + 2] = tmp[2];
+ this.previousScanline[i + 3] = 255;
+
+ this.scanline[i + 0] = tmp[3];
+ this.scanline[i + 1] = tmp[4];
+ this.scanline[i + 2] = tmp[5];
+ this.scanline[i + 3] = 255;
+ }
+
+ switch (this.filter)
+ {
+ case PngFilterMethod.Sub:
+ ReferenceImplementations.EncodeSubFilter(
+ this.scanline, this.expectedResult, this.bpp, out this.expectedSum);
+ break;
+
+ case PngFilterMethod.Up:
+ ReferenceImplementations.EncodeUpFilter(
+ this.previousScanline, this.scanline, this.expectedResult, out this.expectedSum);
+ break;
+
+ case PngFilterMethod.Average:
+ ReferenceImplementations.EncodeAverageFilter(
+ this.previousScanline, this.scanline, this.expectedResult, this.bpp, out this.expectedSum);
+ break;
+
+ case PngFilterMethod.Paeth:
+ ReferenceImplementations.EncodePaethFilter(
+ this.previousScanline, this.scanline, this.expectedResult, this.bpp, out this.expectedSum);
+ break;
+
+ case PngFilterMethod.None:
+ case PngFilterMethod.Adaptive:
+ default:
+ throw new InvalidOperationException();
+ }
+ }
+
+ public void TestFilter()
+ {
+ int sum;
+ switch (this.filter)
+ {
+ case PngFilterMethod.Sub:
+ SubFilter.Encode(this.scanline, this.resultBuffer, this.bpp, out sum);
+ break;
+
+ case PngFilterMethod.Up:
+ UpFilter.Encode(this.previousScanline, this.scanline, this.resultBuffer, out sum);
+ break;
+
+ case PngFilterMethod.Average:
+ AverageFilter.Encode(this.previousScanline, this.scanline, this.resultBuffer, this.bpp, out sum);
+ break;
+
+ case PngFilterMethod.Paeth:
+ PaethFilter.Encode(this.previousScanline, this.scanline, this.resultBuffer, this.bpp, out sum);
+ break;
+
+ case PngFilterMethod.None:
+ case PngFilterMethod.Adaptive:
+ default:
+ throw new InvalidOperationException();
+ }
+
+ Assert.Equal(this.expectedSum, sum);
+ Assert.Equal(this.expectedResult, this.resultBuffer);
+ }
+ }
+ }
+}
diff --git a/tests/ImageSharp.Tests/Formats/Png/ReferenceImplementations.cs b/tests/ImageSharp.Tests/Formats/Png/ReferenceImplementations.cs
new file mode 100644
index 0000000000..dd8ecc096d
--- /dev/null
+++ b/tests/ImageSharp.Tests/Formats/Png/ReferenceImplementations.cs
@@ -0,0 +1,229 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// ReSharper disable InconsistentNaming
+namespace SixLabors.ImageSharp.Tests.Formats.Png.Utils
+{
+ ///
+ /// This class contains reference implementations to produce verification data for unit tests
+ ///
+ internal static partial class ReferenceImplementations
+ {
+ ///
+ /// Encodes the scanline
+ ///
+ /// The scanline to encode
+ /// The previous scanline.
+ /// The filtered scanline result.
+ /// The bytes per pixel.
+ /// The sum of the total variance of the filtered row
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static void EncodePaethFilter(Span scanline, Span previousScanline, Span result, int bytesPerPixel, out int sum)
+ {
+ DebugGuard.MustBeSameSized(scanline, previousScanline, nameof(scanline));
+ DebugGuard.MustBeSizedAtLeast(result, scanline, nameof(result));
+
+ ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline);
+ ref byte prevBaseRef = ref MemoryMarshal.GetReference(previousScanline);
+ ref byte resultBaseRef = ref MemoryMarshal.GetReference(result);
+ sum = 0;
+
+ // Paeth(x) = Raw(x) - PaethPredictor(Raw(x-bpp), Prior(x), Prior(x - bpp))
+ resultBaseRef = 4;
+
+ int x = 0;
+ for (; x < bytesPerPixel; /* Note: ++x happens in the body to avoid one add operation */)
+ {
+ byte scan = Unsafe.Add(ref scanBaseRef, x);
+ byte above = Unsafe.Add(ref prevBaseRef, x);
+ ++x;
+ ref byte res = ref Unsafe.Add(ref resultBaseRef, x);
+ res = (byte)(scan - PaethPredictor(0, above, 0));
+ sum += Numerics.Abs(unchecked((sbyte)res));
+ }
+
+ for (int xLeft = x - bytesPerPixel; x < scanline.Length; ++xLeft /* Note: ++x happens in the body to avoid one add operation */)
+ {
+ byte scan = Unsafe.Add(ref scanBaseRef, x);
+ byte left = Unsafe.Add(ref scanBaseRef, xLeft);
+ byte above = Unsafe.Add(ref prevBaseRef, x);
+ byte upperLeft = Unsafe.Add(ref prevBaseRef, xLeft);
+ ++x;
+ ref byte res = ref Unsafe.Add(ref resultBaseRef, x);
+ res = (byte)(scan - PaethPredictor(left, above, upperLeft));
+ sum += Numerics.Abs(unchecked((sbyte)res));
+ }
+
+ sum -= 4;
+ }
+
+ ///
+ /// Encodes the scanline
+ ///
+ /// The scanline to encode
+ /// The filtered scanline result.
+ /// The bytes per pixel.
+ /// The sum of the total variance of the filtered row
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static void EncodeSubFilter(Span scanline, Span result, int bytesPerPixel, out int sum)
+ {
+ DebugGuard.MustBeSizedAtLeast(result, scanline, nameof(result));
+
+ ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline);
+ ref byte resultBaseRef = ref MemoryMarshal.GetReference(result);
+ sum = 0;
+
+ // Sub(x) = Raw(x) - Raw(x-bpp)
+ resultBaseRef = 1;
+
+ int x = 0;
+ for (; x < bytesPerPixel; /* Note: ++x happens in the body to avoid one add operation */)
+ {
+ byte scan = Unsafe.Add(ref scanBaseRef, x);
+ ++x;
+ ref byte res = ref Unsafe.Add(ref resultBaseRef, x);
+ res = scan;
+ sum += Numerics.Abs(unchecked((sbyte)res));
+ }
+
+ for (int xLeft = x - bytesPerPixel; x < scanline.Length; ++xLeft /* Note: ++x happens in the body to avoid one add operation */)
+ {
+ byte scan = Unsafe.Add(ref scanBaseRef, x);
+ byte prev = Unsafe.Add(ref scanBaseRef, xLeft);
+ ++x;
+ ref byte res = ref Unsafe.Add(ref resultBaseRef, x);
+ res = (byte)(scan - prev);
+ sum += Numerics.Abs(unchecked((sbyte)res));
+ }
+
+ sum -= 1;
+ }
+
+ ///
+ /// Encodes the scanline
+ ///
+ /// The scanline to encode
+ /// The previous scanline.
+ /// The filtered scanline result.
+ /// The sum of the total variance of the filtered row
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static void EncodeUpFilter(Span scanline, Span previousScanline, Span result, out int sum)
+ {
+ DebugGuard.MustBeSameSized(scanline, previousScanline, nameof(scanline));
+ DebugGuard.MustBeSizedAtLeast(result, scanline, nameof(result));
+
+ ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline);
+ ref byte prevBaseRef = ref MemoryMarshal.GetReference(previousScanline);
+ ref byte resultBaseRef = ref MemoryMarshal.GetReference(result);
+ sum = 0;
+
+ // Up(x) = Raw(x) - Prior(x)
+ resultBaseRef = 2;
+
+ int x = 0;
+
+ for (; x < scanline.Length; /* Note: ++x happens in the body to avoid one add operation */)
+ {
+ byte scan = Unsafe.Add(ref scanBaseRef, x);
+ byte above = Unsafe.Add(ref prevBaseRef, x);
+ ++x;
+ ref byte res = ref Unsafe.Add(ref resultBaseRef, x);
+ res = (byte)(scan - above);
+ sum += Numerics.Abs(unchecked((sbyte)res));
+ }
+
+ sum -= 2;
+ }
+
+ ///
+ /// Encodes the scanline
+ ///
+ /// The scanline to encode
+ /// The previous scanline.
+ /// The filtered scanline result.
+ /// The bytes per pixel.
+ /// The sum of the total variance of the filtered row
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static void EncodeAverageFilter(Span scanline, Span previousScanline, Span result, int bytesPerPixel, out int sum)
+ {
+ DebugGuard.MustBeSameSized(scanline, previousScanline, nameof(scanline));
+ DebugGuard.MustBeSizedAtLeast(result, scanline, nameof(result));
+
+ ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline);
+ ref byte prevBaseRef = ref MemoryMarshal.GetReference(previousScanline);
+ ref byte resultBaseRef = ref MemoryMarshal.GetReference(result);
+ sum = 0;
+
+ // Average(x) = Raw(x) - floor((Raw(x-bpp)+Prior(x))/2)
+ resultBaseRef = 3;
+
+ int x = 0;
+ for (; x < bytesPerPixel; /* Note: ++x happens in the body to avoid one add operation */)
+ {
+ byte scan = Unsafe.Add(ref scanBaseRef, x);
+ byte above = Unsafe.Add(ref prevBaseRef, x);
+ ++x;
+ ref byte res = ref Unsafe.Add(ref resultBaseRef, x);
+ res = (byte)(scan - (above >> 1));
+ sum += Numerics.Abs(unchecked((sbyte)res));
+ }
+
+ for (int xLeft = x - bytesPerPixel; x < scanline.Length; ++xLeft /* Note: ++x happens in the body to avoid one add operation */)
+ {
+ byte scan = Unsafe.Add(ref scanBaseRef, x);
+ byte left = Unsafe.Add(ref scanBaseRef, xLeft);
+ byte above = Unsafe.Add(ref prevBaseRef, x);
+ ++x;
+ ref byte res = ref Unsafe.Add(ref resultBaseRef, x);
+ res = (byte)(scan - Average(left, above));
+ sum += Numerics.Abs(unchecked((sbyte)res));
+ }
+
+ sum -= 3;
+ }
+
+ ///
+ /// Calculates the average value of two bytes
+ ///
+ /// The left byte
+ /// The above byte
+ /// The
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static int Average(byte left, byte above) => (left + above) >> 1;
+
+ ///
+ /// Computes a simple linear function of the three neighboring pixels (left, above, upper left), then chooses
+ /// as predictor the neighboring pixel closest to the computed value.
+ ///
+ /// The left neighbor pixel.
+ /// The above neighbor pixel.
+ /// The upper left neighbor pixel.
+ ///
+ /// The .
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static byte PaethPredictor(byte left, byte above, byte upperLeft)
+ {
+ int p = left + above - upperLeft;
+ int pa = Numerics.Abs(p - left);
+ int pb = Numerics.Abs(p - above);
+ int pc = Numerics.Abs(p - upperLeft);
+
+ if (pa <= pb && pa <= pc)
+ {
+ return left;
+ }
+
+ if (pb <= pc)
+ {
+ return above;
+ }
+
+ return upperLeft;
+ }
+ }
+}