Browse Source

Merge remote-tracking branch 'origin/master' into bp/sse4X4

# Conflicts:
#	tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs
pull/1817/head
Brian Popow 4 years ago
parent
commit
dcca23609c
  1. 79
      src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs
  2. 25
      src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs
  3. 24
      src/ImageSharp/Formats/Webp/Lossy/WebpLossyDecoder.cs
  4. 31
      src/ImageSharp/Formats/Webp/Lossy/YuvConversion.cs
  5. 34
      tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs

79
src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs

@ -15,6 +15,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
{
internal static class LossyUtils
{
#if SUPPORTS_RUNTIME_INTRINSICS
private static readonly Vector128<byte> Mean16x4Mask = Vector128.Create((short)0x00ff).AsByte();
#endif
[MethodImpl(InliningOptions.ShortMethod)]
public static int Vp8Sse16X16(Span<byte> a, Span<byte> b) => GetSse(a, b, 16, 16);
@ -975,26 +979,55 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
FilterLoop24(v, offsetPlus4, 1, stride, 8, thresh, ithresh, hevThresh);
}
[MethodImpl(InliningOptions.ShortMethod)]
public static uint LoadUv(byte u, byte v) =>
(uint)(u | (v << 16)); // We process u and v together stashed into 32bit(16bit each).
[MethodImpl(InliningOptions.ShortMethod)]
public static void YuvToBgr(int y, int u, int v, Span<byte> bgr)
public static void Mean16x4(Span<byte> input, Span<uint> dc)
{
bgr[0] = (byte)YuvToB(y, u);
bgr[1] = (byte)YuvToG(y, u, v);
bgr[2] = (byte)YuvToR(y, v);
}
[MethodImpl(InliningOptions.ShortMethod)]
public static int YuvToB(int y, int u) => Clip8(MultHi(y, 19077) + MultHi(u, 33050) - 17685);
[MethodImpl(InliningOptions.ShortMethod)]
public static int YuvToG(int y, int u, int v) => Clip8(MultHi(y, 19077) - MultHi(u, 6419) - MultHi(v, 13320) + 8708);
#if SUPPORTS_RUNTIME_INTRINSICS
if (Ssse3.IsSupported)
{
Vector128<byte> a0 = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(input));
Vector128<byte> a1 = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(input.Slice(WebpConstants.Bps, 16)));
Vector128<byte> a2 = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(input.Slice(WebpConstants.Bps * 2, 16)));
Vector128<byte> a3 = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(input.Slice(WebpConstants.Bps * 3, 16)));
Vector128<short> b0 = Sse2.ShiftRightLogical(a0.AsInt16(), 8); // hi byte
Vector128<short> b1 = Sse2.ShiftRightLogical(a1.AsInt16(), 8);
Vector128<short> b2 = Sse2.ShiftRightLogical(a2.AsInt16(), 8);
Vector128<short> b3 = Sse2.ShiftRightLogical(a3.AsInt16(), 8);
Vector128<byte> c0 = Sse2.And(a0, Mean16x4Mask); // lo byte
Vector128<byte> c1 = Sse2.And(a1, Mean16x4Mask);
Vector128<byte> c2 = Sse2.And(a2, Mean16x4Mask);
Vector128<byte> c3 = Sse2.And(a3, Mean16x4Mask);
Vector128<int> d0 = Sse2.Add(b0.AsInt32(), c0.AsInt32());
Vector128<int> d1 = Sse2.Add(b1.AsInt32(), c1.AsInt32());
Vector128<int> d2 = Sse2.Add(b2.AsInt32(), c2.AsInt32());
Vector128<int> d3 = Sse2.Add(b3.AsInt32(), c3.AsInt32());
Vector128<int> e0 = Sse2.Add(d0, d1);
Vector128<int> e1 = Sse2.Add(d2, d3);
Vector128<int> f0 = Sse2.Add(e0, e1);
Vector128<short> hadd = Ssse3.HorizontalAdd(f0.AsInt16(), f0.AsInt16());
Vector128<uint> wide = Sse2.UnpackLow(hadd, Vector128<short>.Zero).AsUInt32();
ref uint outputRef = ref MemoryMarshal.GetReference(dc);
Unsafe.As<uint, Vector128<uint>>(ref outputRef) = wide;
}
else
#endif
{
for (int k = 0; k < 4; k++)
{
uint avg = 0;
for (int y = 0; y < 4; y++)
{
for (int x = 0; x < 4; x++)
{
avg += input[x + (y * WebpConstants.Bps)];
}
}
[MethodImpl(InliningOptions.ShortMethod)]
public static int YuvToR(int y, int v) => Clip8(MultHi(y, 19077) + MultHi(v, 26149) - 14234);
dc[k] = avg;
input = input.Slice(4); // go to next 4x4 block.
}
}
}
[MethodImpl(InliningOptions.ShortMethod)]
public static byte Avg2(byte a, byte b) => (byte)((a + b + 1) >> 1);
@ -1200,9 +1233,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
return WebpLookupTables.Abs0(p1 - p0) > thresh || WebpLookupTables.Abs0(q1 - q0) > thresh;
}
[MethodImpl(InliningOptions.ShortMethod)]
private static int MultHi(int v, int coeff) => (v * coeff) >> 8;
[MethodImpl(InliningOptions.ShortMethod)]
private static void Store(Span<byte> dst, int x, int y, int v)
{
@ -1225,13 +1255,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
[MethodImpl(InliningOptions.ShortMethod)]
private static int Mul2(int a) => (a * 35468) >> 16;
[MethodImpl(InliningOptions.ShortMethod)]
private static byte Clip8(int v)
{
int yuvMask = (256 << 6) - 1;
return (byte)((v & ~yuvMask) == 0 ? v >> 6 : v < 0 ? 0 : 255);
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void Put8x8uv(byte value, Span<byte> dst)
{

25
src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs

@ -357,15 +357,16 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int q = quality;
int kThreshold = 8 + ((17 - 8) * q / 100);
int k;
uint[] dc = new uint[16];
Span<uint> dc = stackalloc uint[16];
Span<ushort> tmp = stackalloc ushort[16];
uint m;
uint m2;
for (k = 0; k < 16; k += 4)
{
this.Mean16x4(this.YuvIn.AsSpan(YOffEnc + (k * WebpConstants.Bps)), dc.AsSpan(k));
LossyUtils.Mean16x4(this.YuvIn.AsSpan(YOffEnc + (k * WebpConstants.Bps)), dc.Slice(k, 4));
}
for (m = 0, m2 = 0, k = 0; k < 16; ++k)
for (m = 0, m2 = 0, k = 0; k < 16; k++)
{
m += dc[k];
m2 += dc[k] * dc[k];
@ -823,24 +824,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
this.Nz[this.nzIdx] = nz;
}
private void Mean16x4(Span<byte> input, Span<uint> dc)
{
for (int k = 0; k < 4; k++)
{
uint avg = 0;
for (int y = 0; y < 4; y++)
{
for (int x = 0; x < 4; x++)
{
avg += input[x + (y * WebpConstants.Bps)];
}
}
dc[k] = avg;
input = input.Slice(4); // go to next 4x4 block.
}
}
private void ImportBlock(Span<byte> src, int srcStride, Span<byte> dst, int w, int h, int size)
{
int dstIdx = 0;

24
src/ImageSharp/Formats/Webp/Lossy/WebpLossyDecoder.cs

@ -747,21 +747,21 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
{
int xStep = 3;
int lastPixelPair = (len - 1) >> 1;
uint tluv = LossyUtils.LoadUv(topU[0], topV[0]); // top-left sample
uint luv = LossyUtils.LoadUv(curU[0], curV[0]); // left-sample
uint tluv = YuvConversion.LoadUv(topU[0], topV[0]); // top-left sample
uint luv = YuvConversion.LoadUv(curU[0], curV[0]); // left-sample
uint uv0 = ((3 * tluv) + luv + 0x00020002u) >> 2;
LossyUtils.YuvToBgr(topY[0], (int)(uv0 & 0xff), (int)(uv0 >> 16), topDst);
YuvConversion.YuvToBgr(topY[0], (int)(uv0 & 0xff), (int)(uv0 >> 16), topDst);
if (bottomY != null)
{
uv0 = ((3 * luv) + tluv + 0x00020002u) >> 2;
LossyUtils.YuvToBgr(bottomY[0], (int)uv0 & 0xff, (int)(uv0 >> 16), bottomDst);
YuvConversion.YuvToBgr(bottomY[0], (int)uv0 & 0xff, (int)(uv0 >> 16), bottomDst);
}
for (int x = 1; x <= lastPixelPair; x++)
{
uint tuv = LossyUtils.LoadUv(topU[x], topV[x]); // top sample
uint uv = LossyUtils.LoadUv(curU[x], curV[x]); // sample
uint tuv = YuvConversion.LoadUv(topU[x], topV[x]); // top sample
uint uv = YuvConversion.LoadUv(curU[x], curV[x]); // sample
// Precompute invariant values associated with first and second diagonals.
uint avg = tluv + tuv + luv + uv + 0x00080008u;
@ -770,15 +770,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
uv0 = (diag12 + tluv) >> 1;
uint uv1 = (diag03 + tuv) >> 1;
int xMul2 = x * 2;
LossyUtils.YuvToBgr(topY[xMul2 - 1], (int)(uv0 & 0xff), (int)(uv0 >> 16), topDst.Slice((xMul2 - 1) * xStep));
LossyUtils.YuvToBgr(topY[xMul2 - 0], (int)(uv1 & 0xff), (int)(uv1 >> 16), topDst.Slice((xMul2 - 0) * xStep));
YuvConversion.YuvToBgr(topY[xMul2 - 1], (int)(uv0 & 0xff), (int)(uv0 >> 16), topDst.Slice((xMul2 - 1) * xStep));
YuvConversion.YuvToBgr(topY[xMul2 - 0], (int)(uv1 & 0xff), (int)(uv1 >> 16), topDst.Slice((xMul2 - 0) * xStep));
if (bottomY != null)
{
uv0 = (diag03 + luv) >> 1;
uv1 = (diag12 + uv) >> 1;
LossyUtils.YuvToBgr(bottomY[xMul2 - 1], (int)(uv0 & 0xff), (int)(uv0 >> 16), bottomDst.Slice((xMul2 - 1) * xStep));
LossyUtils.YuvToBgr(bottomY[xMul2 + 0], (int)(uv1 & 0xff), (int)(uv1 >> 16), bottomDst.Slice((xMul2 + 0) * xStep));
YuvConversion.YuvToBgr(bottomY[xMul2 - 1], (int)(uv0 & 0xff), (int)(uv0 >> 16), bottomDst.Slice((xMul2 - 1) * xStep));
YuvConversion.YuvToBgr(bottomY[xMul2 + 0], (int)(uv1 & 0xff), (int)(uv1 >> 16), bottomDst.Slice((xMul2 + 0) * xStep));
}
tluv = tuv;
@ -788,11 +788,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
if ((len & 1) == 0)
{
uv0 = ((3 * tluv) + luv + 0x00020002u) >> 2;
LossyUtils.YuvToBgr(topY[len - 1], (int)(uv0 & 0xff), (int)(uv0 >> 16), topDst.Slice((len - 1) * xStep));
YuvConversion.YuvToBgr(topY[len - 1], (int)(uv0 & 0xff), (int)(uv0 >> 16), topDst.Slice((len - 1) * xStep));
if (bottomY != null)
{
uv0 = ((3 * luv) + tluv + 0x00020002u) >> 2;
LossyUtils.YuvToBgr(bottomY[len - 1], (int)(uv0 & 0xff), (int)(uv0 >> 16), bottomDst.Slice((len - 1) * xStep));
YuvConversion.YuvToBgr(bottomY[len - 1], (int)(uv0 & 0xff), (int)(uv0 >> 16), bottomDst.Slice((len - 1) * xStep));
}
}
}

31
src/ImageSharp/Formats/Webp/Lossy/YuvConversion.cs

@ -299,5 +299,36 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
uv = (uv + rounding + (128 << (YuvFix + 2))) >> (YuvFix + 2);
return (uv & ~0xff) == 0 ? uv : uv < 0 ? 0 : 255;
}
[MethodImpl(InliningOptions.ShortMethod)]
public static uint LoadUv(byte u, byte v) =>
(uint)(u | (v << 16)); // We process u and v together stashed into 32bit(16bit each).
[MethodImpl(InliningOptions.ShortMethod)]
public static void YuvToBgr(int y, int u, int v, Span<byte> bgr)
{
bgr[2] = (byte)YuvToR(y, v);
bgr[1] = (byte)YuvToG(y, u, v);
bgr[0] = (byte)YuvToB(y, u);
}
[MethodImpl(InliningOptions.ShortMethod)]
public static int YuvToB(int y, int u) => Clip8(MultHi(y, 19077) + MultHi(u, 33050) - 17685);
[MethodImpl(InliningOptions.ShortMethod)]
public static int YuvToG(int y, int u, int v) => Clip8(MultHi(y, 19077) - MultHi(u, 6419) - MultHi(v, 13320) + 8708);
[MethodImpl(InliningOptions.ShortMethod)]
public static int YuvToR(int y, int v) => Clip8(MultHi(y, 19077) + MultHi(v, 26149) - 14234);
[MethodImpl(InliningOptions.ShortMethod)]
private static int MultHi(int v, int coeff) => (v * coeff) >> 8;
[MethodImpl(InliningOptions.ShortMethod)]
private static byte Clip8(int v)
{
int yuvMask = (256 << 6) - 1;
return (byte)((v & ~yuvMask) == 0 ? v >> 6 : v < 0 ? 0 : 255);
}
}
}

34
tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs

@ -1,6 +1,7 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System.Linq;
using SixLabors.ImageSharp.Formats.Webp.Lossy;
using SixLabors.ImageSharp.Tests.TestUtilities;
using Xunit;
@ -37,6 +38,29 @@ namespace SixLabors.ImageSharp.Tests.Formats.WebP
int actual = LossyUtils.Vp8Sse4X4(a, b);
Assert.Equal(expected, actual);
}
private static void RunMean16x4Test()
{
// arrange
byte[] input =
{
154, 145, 102, 115, 127, 129, 126, 125, 126, 120, 133, 152, 157, 153, 119, 94, 104, 116, 111, 113,
113, 109, 105, 124, 173, 175, 177, 170, 175, 172, 166, 164, 151, 141, 99, 114, 125, 126, 135, 150,
133, 115, 127, 149, 141, 168, 100, 54, 110, 117, 115, 116, 119, 115, 117, 130, 174, 174, 174, 157,
146, 171, 166, 158, 117, 140, 96, 111, 119, 119, 136, 171, 188, 134, 121, 126, 136, 119, 59, 77,
109, 115, 113, 120, 120, 117, 128, 115, 174, 173, 173, 161, 152, 148, 153, 162, 105, 140, 96, 114,
115, 122, 141, 173, 190, 190, 142, 106, 151, 78, 66, 141, 110, 117, 123, 136, 118, 124, 127, 114,
173, 175, 166, 155, 155, 159, 159, 158
};
uint[] dc = new uint[4];
uint[] expectedDc = { 1940, 2139, 2252, 1813 };
// act
LossyUtils.Mean16x4(input, dc);
// assert
Assert.True(dc.SequenceEqual(expectedDc));
}
private static void RunHadamardTransformTest()
@ -69,6 +93,9 @@ namespace SixLabors.ImageSharp.Tests.Formats.WebP
[Fact]
public void Vp8Sse4X4_Works() => RunVp8Sse4X4Test();
[Fact]
public void Mean16x4_Works() => RunMean16x4Test();
[Fact]
public void HadamardTransform_Works() => RunHadamardTransformTest();
@ -79,12 +106,17 @@ namespace SixLabors.ImageSharp.Tests.Formats.WebP
[Fact]
public void Vp8Sse4X4_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunVp8Sse4X4Test, HwIntrinsics.DisableHWIntrinsic);
[Fact]
public void Mean16x4_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunMean16x4Test, HwIntrinsics.AllowAll);
[Fact]
public void Mean16x4_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunMean16x4Test, HwIntrinsics.DisableHWIntrinsic);
[Fact]
public void HadamardTransform_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunHadamardTransformTest, HwIntrinsics.AllowAll);
[Fact]
public void HadamardTransform_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunHadamardTransformTest, HwIntrinsics.DisableHWIntrinsic);
#endif
}
}

Loading…
Cancel
Save