Browse Source

Added more specialized Png filter code

Modified tests accordingly
pull/1630/head
TechPizza 5 years ago
parent
commit
5c7f4a9ab3
  1. 46
      src/ImageSharp/Common/Helpers/Numerics.cs
  2. 10
      src/ImageSharp/Formats/Png/Filters/AverageFilter.cs
  3. 5
      src/ImageSharp/Formats/Png/Filters/PaethFilter.cs
  4. 26
      src/ImageSharp/Formats/Png/Filters/SubFilter.cs
  5. 26
      src/ImageSharp/Formats/Png/Filters/UpFilter.cs
  6. 49
      tests/ImageSharp.Tests/Formats/Png/PngFilterTests.cs

46
src/ImageSharp/Common/Helpers/Numerics.cs

@ -749,6 +749,7 @@ namespace SixLabors.ImageSharp
public static float Lerp(float value1, float value2, float amount)
=> ((value2 - value1) * amount) + value1;
#if SUPPORTS_RUNTIME_INTRINSICS
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void Accumulate(ref Vector<uint> accumulator, Vector<byte> values)
{
@ -762,5 +763,50 @@ namespace SixLabors.ImageSharp
accumulator += intLow;
accumulator += intHigh;
}
/// <summary>
/// Reduces elements of the vector into one sum.
/// </summary>
/// <param name="accumulator">The accumulator to reduce.</param>
/// <returns>The sum of all elements.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int ReduceSum(Vector128<int> accumulator)
{
if (Ssse3.IsSupported)
{
Vector128<int> hadd = Ssse3.HorizontalAdd(accumulator, accumulator);
Vector128<int> swapped = Sse2.Shuffle(hadd, 0x1);
Vector128<int> tmp = Sse2.Add(hadd, swapped);
// Vector128<int>.ToScalar() isn't optimized pre-net5.0 https://github.com/dotnet/runtime/pull/37882
return Sse2.ConvertToInt32(tmp);
}
else
{
int sum = 0;
for (int i = 0; i < Vector128<int>.Count; i++)
{
sum += accumulator.GetElement(i);
}
return sum;
}
}
/// <summary>
/// Reduces even elements of the vector into one sum.
/// </summary>
/// <param name="accumulator">The accumulator to reduce.</param>
/// <returns>The sum of even elements.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int EvenReduceSum(Vector256<int> accumulator)
{
Vector128<int> vsum = Sse2.Add(accumulator.GetLower(), accumulator.GetUpper()); // add upper lane to lower lane
vsum = Sse2.Add(vsum, Sse2.Shuffle(vsum, 0b_11_10_11_10)); // add high to low
// Vector128<int>.ToScalar() isn't optimized pre-net5.0 https://github.com/dotnet/runtime/pull/37882
return Sse2.ConvertToInt32(vsum);
}
#endif
}
}

10
src/ImageSharp/Formats/Png/Filters/AverageFilter.cs

@ -106,10 +106,7 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters
sumAccumulator = Avx2.Add(sumAccumulator, Avx2.SumAbsoluteDifferences(Avx2.Abs(res.AsSByte()), zero).AsInt32());
}
for (int i = 0; i < Vector256<int>.Count; i++)
{
sum += sumAccumulator.GetElement(i);
}
sum += Numerics.EvenReduceSum(sumAccumulator);
}
else if (Sse2.IsSupported)
{
@ -156,10 +153,7 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters
sumAccumulator = Sse2.Add(sumAccumulator, hiRes32);
}
for (int i = 0; i < Vector128<int>.Count; i++)
{
sum += sumAccumulator.GetElement(i);
}
sum += Numerics.ReduceSum(sumAccumulator);
}
#endif

5
src/ImageSharp/Formats/Png/Filters/PaethFilter.cs

@ -108,10 +108,7 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters
sumAccumulator = Avx2.Add(sumAccumulator, Avx2.SumAbsoluteDifferences(Avx2.Abs(res.AsSByte()), zero).AsInt32());
}
for (int i = 0; i < Vector256<int>.Count; i++)
{
sum += sumAccumulator.GetElement(i);
}
sum += Numerics.EvenReduceSum(sumAccumulator);
}
else if (Vector.IsHardwareAccelerated)
{

26
src/ImageSharp/Formats/Png/Filters/SubFilter.cs

@ -6,6 +6,11 @@ using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
namespace SixLabors.ImageSharp.Formats.Png.Filters
{
/// <summary>
@ -66,7 +71,26 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters
}
#if SUPPORTS_RUNTIME_INTRINSICS
if (Vector.IsHardwareAccelerated)
if (Avx2.IsSupported)
{
Vector256<byte> zero = Vector256<byte>.Zero;
Vector256<int> sumAccumulator = Vector256<int>.Zero;
for (int xLeft = x - bytesPerPixel; x + Vector256<byte>.Count <= scanline.Length; xLeft += Vector256<byte>.Count)
{
Vector256<byte> scan = Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref scanBaseRef, x));
Vector256<byte> prev = Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref scanBaseRef, xLeft));
Vector256<byte> res = Avx2.Subtract(scan, prev);
Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref resultBaseRef, x + 1)) = res; // +1 to skip filter type
x += Vector256<byte>.Count;
sumAccumulator = Avx2.Add(sumAccumulator, Avx2.SumAbsoluteDifferences(Avx2.Abs(res.AsSByte()), zero).AsInt32());
}
sum += Numerics.EvenReduceSum(sumAccumulator);
}
else if (Vector.IsHardwareAccelerated)
{
Vector<uint> sumAccumulator = Vector<uint>.Zero;

26
src/ImageSharp/Formats/Png/Filters/UpFilter.cs

@ -6,6 +6,11 @@ using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
namespace SixLabors.ImageSharp.Formats.Png.Filters
{
/// <summary>
@ -61,7 +66,26 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters
int x = 0;
#if SUPPORTS_RUNTIME_INTRINSICS
if (Vector.IsHardwareAccelerated)
if (Avx2.IsSupported)
{
Vector256<byte> zero = Vector256<byte>.Zero;
Vector256<int> sumAccumulator = Vector256<int>.Zero;
for (; x + Vector256<byte>.Count <= scanline.Length;)
{
Vector256<byte> scan = Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref scanBaseRef, x));
Vector256<byte> above = Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref prevBaseRef, x));
Vector256<byte> res = Avx2.Subtract(scan, above);
Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref resultBaseRef, x + 1)) = res; // +1 to skip filter type
x += Vector256<byte>.Count;
sumAccumulator = Avx2.Add(sumAccumulator, Avx2.SumAbsoluteDifferences(Avx2.Abs(res.AsSByte()), zero).AsInt32());
}
sum += Numerics.EvenReduceSum(sumAccumulator);
}
else if (Vector.IsHardwareAccelerated)
{
Vector<uint> sumAccumulator = Vector<uint>.Zero;

49
tests/ImageSharp.Tests/Formats/Png/PngFilterTests.cs

@ -101,7 +101,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png
}
[Fact]
public void PaethSimd()
public void PaethAvx2()
{
static void RunTest()
{
@ -114,6 +114,20 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png
HwIntrinsics.AllowAll);
}
[Fact]
public void PaethVector()
{
static void RunTest()
{
var data = new TestData(PngFilterMethod.Paeth, Size);
data.TestFilter();
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2);
}
[Fact]
public void Up()
{
@ -128,8 +142,9 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png
HwIntrinsics.DisableSIMD);
}
[Fact]
public void UpSimd()
public void UpAvx2()
{
static void RunTest()
{
@ -142,6 +157,20 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png
HwIntrinsics.AllowAll);
}
[Fact]
public void UpVector()
{
static void RunTest()
{
var data = new TestData(PngFilterMethod.Up, Size);
data.TestFilter();
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2);
}
[Fact]
public void Sub()
{
@ -157,7 +186,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png
}
[Fact]
public void SubSimd()
public void SubAvx2()
{
static void RunTest()
{
@ -170,6 +199,20 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png
HwIntrinsics.AllowAll);
}
[Fact]
public void SubVector()
{
static void RunTest()
{
var data = new TestData(PngFilterMethod.Sub, Size);
data.TestFilter();
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2);
}
public class TestData
{
private readonly PngFilterMethod filter;

Loading…
Cancel
Save