Browse Source

Add AVX2 version of Vp8_Sse16X16 and Vp8_Sse16X8

pull/1881/head
Brian Popow 4 years ago
parent
commit
3d00c68a72
  1. 62
      src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs
  2. 6
      tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs

62
src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs

@ -43,6 +43,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
public static int Vp8_Sse16X16(Span<byte> a, Span<byte> b)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
{
return Vp8_Sse16xN_Avx2(a, b, 4);
}
if (Sse2.IsSupported)
{
return Vp8_Sse16xN_Sse2(a, b, 8);
@ -58,6 +63,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
public static int Vp8_Sse16X8(Span<byte> a, Span<byte> b)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
{
return Vp8_Sse16xN_Avx2(a, b, 2);
}
if (Sse2.IsSupported)
{
return Vp8_Sse16xN_Sse2(a, b, 4);
@ -194,6 +204,39 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
return Numerics.ReduceSum(sum);
}
[MethodImpl(InliningOptions.ShortMethod)]
private static int Vp8_Sse16xN_Avx2(Span<byte> a, Span<byte> b, int numPairs)
{
Vector256<int> sum = Vector256<int>.Zero;
int offset = 0;
for (int i = 0; i < numPairs; i++)
{
// Load values.
ref byte aRef = ref MemoryMarshal.GetReference(a);
ref byte bRef = ref MemoryMarshal.GetReference(b);
var a0 = Vector256.Create(
Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref aRef, offset)),
Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref aRef, offset + WebpConstants.Bps)));
var b0 = Vector256.Create(
Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref bRef, offset)),
Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref bRef, offset + WebpConstants.Bps)));
var a1 = Vector256.Create(
Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref aRef, offset + (2 * WebpConstants.Bps))),
Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref aRef, offset + (3 * WebpConstants.Bps))));
var b1 = Vector256.Create(
Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref bRef, offset + (2 * WebpConstants.Bps))),
Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref bRef, offset + (3 * WebpConstants.Bps))));
Vector256<int> sum1 = SubtractAndAccumulate(a0, b0);
Vector256<int> sum2 = SubtractAndAccumulate(a1, b1);
sum = Avx2.Add(sum, Avx2.Add(sum1, sum2));
offset += 4 * WebpConstants.Bps;
}
return Numerics.ReduceSum(sum);
}
[MethodImpl(InliningOptions.ShortMethod)]
private static Vector128<int> SubtractAndAccumulate(Vector128<byte> a, Vector128<byte> b)
{
@ -212,6 +255,25 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
return Sse2.Add(sum1, sum2);
}
[MethodImpl(InliningOptions.ShortMethod)]
private static Vector256<int> SubtractAndAccumulate(Vector256<byte> a, Vector256<byte> b)
{
// Take abs(a-b) in 8b.
Vector256<byte> ab = Avx2.SubtractSaturate(a, b);
Vector256<byte> ba = Avx2.SubtractSaturate(b, a);
Vector256<byte> absAb = Avx2.Or(ab, ba);
// Zero-extend to 16b.
Vector256<byte> c0 = Avx2.UnpackLow(absAb, Vector256<byte>.Zero);
Vector256<byte> c1 = Avx2.UnpackHigh(absAb, Vector256<byte>.Zero);
// Multiply with self.
Vector256<int> sum1 = Avx2.MultiplyAddAdjacent(c0.AsInt16(), c0.AsInt16());
Vector256<int> sum2 = Avx2.MultiplyAddAdjacent(c1.AsInt16(), c1.AsInt16());
return Avx2.Add(sum1, sum2);
}
#endif
[MethodImpl(InliningOptions.ShortMethod)]

6
tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs

@ -320,12 +320,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
[Fact]
public void Vp8Sse16X16_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunVp8Sse16X16Test, HwIntrinsics.DisableSSE2);
[Fact]
public void Vp8Sse16X16_WithoutAVX2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunVp8Sse16X16Test, HwIntrinsics.DisableAVX2);
[Fact]
public void Vp8Sse16X8_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunVp8Sse16X8Test, HwIntrinsics.AllowAll);
[Fact]
public void Vp8Sse16X8_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunVp8Sse16X8Test, HwIntrinsics.DisableSSE2);
[Fact]
public void Vp8Sse16X8_WithoutAVX2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunVp8Sse16X8Test, HwIntrinsics.DisableAVX2);
[Fact]
public void Vp8Sse4X4_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunVp8Sse4X4Test, HwIntrinsics.AllowAll);

Loading…
Cancel
Save