Browse Source

Fix calculation of mode score for SSE2/AVX2 version

pull/2355/head
Brian Popow 3 years ago
parent
commit
280e3762d2
  1. 18
      src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs
  2. 12
      src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs

18
src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs

@ -14,7 +14,7 @@ internal static class LossyUtils
{ {
// Note: method name in libwebp reference implementation is called VP8SSE16x16. // Note: method name in libwebp reference implementation is called VP8SSE16x16.
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public static int Vp8_Sse16X16(Span<byte> a, Span<byte> b) public static int Vp8_Sse16x16(Span<byte> a, Span<byte> b)
{ {
if (Avx2.IsSupported) if (Avx2.IsSupported)
{ {
@ -31,7 +31,7 @@ internal static class LossyUtils
// Note: method name in libwebp reference implementation is called VP8SSE16x8. // Note: method name in libwebp reference implementation is called VP8SSE16x8.
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public static int Vp8_Sse16X8(Span<byte> a, Span<byte> b) public static int Vp8_Sse16x8(Span<byte> a, Span<byte> b)
{ {
if (Avx2.IsSupported) if (Avx2.IsSupported)
{ {
@ -48,7 +48,7 @@ internal static class LossyUtils
// Note: method name in libwebp reference implementation is called VP8SSE4x4. // Note: method name in libwebp reference implementation is called VP8SSE4x4.
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public static int Vp8_Sse4X4(Span<byte> a, Span<byte> b) public static int Vp8_Sse4x4(Span<byte> a, Span<byte> b)
{ {
if (Avx2.IsSupported) if (Avx2.IsSupported)
{ {
@ -77,8 +77,8 @@ internal static class LossyUtils
Vector256<byte> b01s = Avx2.UnpackLow(b01.AsByte(), Vector256<byte>.Zero); Vector256<byte> b01s = Avx2.UnpackLow(b01.AsByte(), Vector256<byte>.Zero);
// subtract, square and accumulate. // subtract, square and accumulate.
Vector256<byte> d0 = Avx2.SubtractSaturate(a01s, b01s); Vector256<short> d0 = Avx2.SubtractSaturate(a01s.AsInt16(), b01s.AsInt16());
Vector256<int> e0 = Avx2.MultiplyAddAdjacent(d0.AsInt16(), d0.AsInt16()); Vector256<int> e0 = Avx2.MultiplyAddAdjacent(d0, d0);
return Numerics.ReduceSum(e0); return Numerics.ReduceSum(e0);
} }
@ -110,10 +110,10 @@ internal static class LossyUtils
Vector128<byte> b23s = Sse2.UnpackLow(b23.AsByte(), Vector128<byte>.Zero); Vector128<byte> b23s = Sse2.UnpackLow(b23.AsByte(), Vector128<byte>.Zero);
// subtract, square and accumulate. // subtract, square and accumulate.
Vector128<byte> d0 = Sse2.SubtractSaturate(a01s, b01s); Vector128<short> d0 = Sse2.SubtractSaturate(a01s.AsInt16(), b01s.AsInt16());
Vector128<byte> d1 = Sse2.SubtractSaturate(a23s, b23s); Vector128<short> d1 = Sse2.SubtractSaturate(a23s.AsInt16(), b23s.AsInt16());
Vector128<int> e0 = Sse2.MultiplyAddAdjacent(d0.AsInt16(), d0.AsInt16()); Vector128<int> e0 = Sse2.MultiplyAddAdjacent(d0, d0);
Vector128<int> e1 = Sse2.MultiplyAddAdjacent(d1.AsInt16(), d1.AsInt16()); Vector128<int> e1 = Sse2.MultiplyAddAdjacent(d1, d1);
Vector128<int> sum = Sse2.Add(e0, e1); Vector128<int> sum = Sse2.Add(e0, e1);
return Numerics.ReduceSum(sum); return Numerics.ReduceSum(sum);

12
src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs

@ -53,7 +53,7 @@ internal static unsafe class QuantEnc
rdCur.Nz = (uint)ReconstructIntra16(it, dqm, rdCur, tmpDst, mode); rdCur.Nz = (uint)ReconstructIntra16(it, dqm, rdCur, tmpDst, mode);
// Measure RD-score. // Measure RD-score.
rdCur.D = LossyUtils.Vp8_Sse16X16(src, tmpDst); rdCur.D = LossyUtils.Vp8_Sse16x16(src, tmpDst);
rdCur.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto16X16(src, tmpDst, WeightY, scratch)) : 0; rdCur.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto16X16(src, tmpDst, WeightY, scratch)) : 0;
rdCur.H = WebpConstants.Vp8FixedCostsI16[mode]; rdCur.H = WebpConstants.Vp8FixedCostsI16[mode];
rdCur.R = it.GetCostLuma16(rdCur, proba, res); rdCur.R = it.GetCostLuma16(rdCur, proba, res);
@ -145,7 +145,7 @@ internal static unsafe class QuantEnc
rdTmp.Nz = (uint)ReconstructIntra4(it, dqm, tmpLevels, src, tmpDst, mode); rdTmp.Nz = (uint)ReconstructIntra4(it, dqm, tmpLevels, src, tmpDst, mode);
// Compute RD-score. // Compute RD-score.
rdTmp.D = LossyUtils.Vp8_Sse4X4(src, tmpDst); rdTmp.D = LossyUtils.Vp8_Sse4x4(src, tmpDst);
rdTmp.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto4X4(src, tmpDst, WeightY, scratch)) : 0; rdTmp.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto4X4(src, tmpDst, WeightY, scratch)) : 0;
rdTmp.H = modeCosts[mode]; rdTmp.H = modeCosts[mode];
@ -235,7 +235,7 @@ internal static unsafe class QuantEnc
rdUv.Nz = (uint)ReconstructUv(it, dqm, rdUv, tmpDst, mode); rdUv.Nz = (uint)ReconstructUv(it, dqm, rdUv, tmpDst, mode);
// Compute RD-score // Compute RD-score
rdUv.D = LossyUtils.Vp8_Sse16X8(src, tmpDst); rdUv.D = LossyUtils.Vp8_Sse16x8(src, tmpDst);
rdUv.SD = 0; // not calling TDisto here: it tends to flatten areas. rdUv.SD = 0; // not calling TDisto here: it tends to flatten areas.
rdUv.H = WebpConstants.Vp8FixedCostsUv[mode]; rdUv.H = WebpConstants.Vp8FixedCostsUv[mode];
rdUv.R = it.GetCostUv(rdUv, proba, res); rdUv.R = it.GetCostUv(rdUv, proba, res);
@ -389,7 +389,7 @@ internal static unsafe class QuantEnc
for (mode = 0; mode < WebpConstants.NumPredModes; ++mode) for (mode = 0; mode < WebpConstants.NumPredModes; ++mode)
{ {
Span<byte> reference = it.YuvP.AsSpan(Vp8Encoding.Vp8I16ModeOffsets[mode]); Span<byte> reference = it.YuvP.AsSpan(Vp8Encoding.Vp8I16ModeOffsets[mode]);
long score = (LossyUtils.Vp8_Sse16X16(src, reference) * WebpConstants.RdDistoMult) + (WebpConstants.Vp8FixedCostsI16[mode] * lambdaDi16); long score = (LossyUtils.Vp8_Sse16x16(src, reference) * WebpConstants.RdDistoMult) + (WebpConstants.Vp8FixedCostsI16[mode] * lambdaDi16);
if (mode > 0 && WebpConstants.Vp8FixedCostsI16[mode] > bitLimit) if (mode > 0 && WebpConstants.Vp8FixedCostsI16[mode] > bitLimit)
{ {
@ -436,7 +436,7 @@ internal static unsafe class QuantEnc
for (mode = 0; mode < WebpConstants.NumBModes; ++mode) for (mode = 0; mode < WebpConstants.NumBModes; ++mode)
{ {
Span<byte> reference = it.YuvP.AsSpan(Vp8Encoding.Vp8I4ModeOffsets[mode]); Span<byte> reference = it.YuvP.AsSpan(Vp8Encoding.Vp8I4ModeOffsets[mode]);
long score = (LossyUtils.Vp8_Sse4X4(src, reference) * WebpConstants.RdDistoMult) + (modeCosts[mode] * lambdaDi4); long score = (LossyUtils.Vp8_Sse4x4(src, reference) * WebpConstants.RdDistoMult) + (modeCosts[mode] * lambdaDi4);
if (score < bestI4Score) if (score < bestI4Score)
{ {
bestI4Mode = mode; bestI4Mode = mode;
@ -485,7 +485,7 @@ internal static unsafe class QuantEnc
for (mode = 0; mode < WebpConstants.NumPredModes; ++mode) for (mode = 0; mode < WebpConstants.NumPredModes; ++mode)
{ {
Span<byte> reference = it.YuvP.AsSpan(Vp8Encoding.Vp8UvModeOffsets[mode]); Span<byte> reference = it.YuvP.AsSpan(Vp8Encoding.Vp8UvModeOffsets[mode]);
long score = (LossyUtils.Vp8_Sse16X8(src, reference) * WebpConstants.RdDistoMult) + (WebpConstants.Vp8FixedCostsUv[mode] * lambdaDuv); long score = (LossyUtils.Vp8_Sse16x8(src, reference) * WebpConstants.RdDistoMult) + (WebpConstants.Vp8FixedCostsUv[mode] * lambdaDuv);
if (score < bestUvScore) if (score < bestUvScore)
{ {
bestMode = mode; bestMode = mode;

Loading…
Cancel
Save