Browse Source

Fix calculation of mode score for SSE2/AVX2 version

pull/2355/head
Brian Popow 3 years ago
parent
commit
280e3762d2
  1. 18
      src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs
  2. 12
      src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs

18
src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs

@ -14,7 +14,7 @@ internal static class LossyUtils
{
// Note: method name in libwebp reference implementation is called VP8SSE16x16.
[MethodImpl(InliningOptions.ShortMethod)]
public static int Vp8_Sse16X16(Span<byte> a, Span<byte> b)
public static int Vp8_Sse16x16(Span<byte> a, Span<byte> b)
{
if (Avx2.IsSupported)
{
@ -31,7 +31,7 @@ internal static class LossyUtils
// Note: method name in libwebp reference implementation is called VP8SSE16x8.
[MethodImpl(InliningOptions.ShortMethod)]
public static int Vp8_Sse16X8(Span<byte> a, Span<byte> b)
public static int Vp8_Sse16x8(Span<byte> a, Span<byte> b)
{
if (Avx2.IsSupported)
{
@ -48,7 +48,7 @@ internal static class LossyUtils
// Note: method name in libwebp reference implementation is called VP8SSE4x4.
[MethodImpl(InliningOptions.ShortMethod)]
public static int Vp8_Sse4X4(Span<byte> a, Span<byte> b)
public static int Vp8_Sse4x4(Span<byte> a, Span<byte> b)
{
if (Avx2.IsSupported)
{
@ -77,8 +77,8 @@ internal static class LossyUtils
Vector256<byte> b01s = Avx2.UnpackLow(b01.AsByte(), Vector256<byte>.Zero);
// subtract, square and accumulate.
Vector256<byte> d0 = Avx2.SubtractSaturate(a01s, b01s);
Vector256<int> e0 = Avx2.MultiplyAddAdjacent(d0.AsInt16(), d0.AsInt16());
Vector256<short> d0 = Avx2.SubtractSaturate(a01s.AsInt16(), b01s.AsInt16());
Vector256<int> e0 = Avx2.MultiplyAddAdjacent(d0, d0);
return Numerics.ReduceSum(e0);
}
@ -110,10 +110,10 @@ internal static class LossyUtils
Vector128<byte> b23s = Sse2.UnpackLow(b23.AsByte(), Vector128<byte>.Zero);
// subtract, square and accumulate.
Vector128<byte> d0 = Sse2.SubtractSaturate(a01s, b01s);
Vector128<byte> d1 = Sse2.SubtractSaturate(a23s, b23s);
Vector128<int> e0 = Sse2.MultiplyAddAdjacent(d0.AsInt16(), d0.AsInt16());
Vector128<int> e1 = Sse2.MultiplyAddAdjacent(d1.AsInt16(), d1.AsInt16());
Vector128<short> d0 = Sse2.SubtractSaturate(a01s.AsInt16(), b01s.AsInt16());
Vector128<short> d1 = Sse2.SubtractSaturate(a23s.AsInt16(), b23s.AsInt16());
Vector128<int> e0 = Sse2.MultiplyAddAdjacent(d0, d0);
Vector128<int> e1 = Sse2.MultiplyAddAdjacent(d1, d1);
Vector128<int> sum = Sse2.Add(e0, e1);
return Numerics.ReduceSum(sum);

12
src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs

@ -53,7 +53,7 @@ internal static unsafe class QuantEnc
rdCur.Nz = (uint)ReconstructIntra16(it, dqm, rdCur, tmpDst, mode);
// Measure RD-score.
rdCur.D = LossyUtils.Vp8_Sse16X16(src, tmpDst);
rdCur.D = LossyUtils.Vp8_Sse16x16(src, tmpDst);
rdCur.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto16X16(src, tmpDst, WeightY, scratch)) : 0;
rdCur.H = WebpConstants.Vp8FixedCostsI16[mode];
rdCur.R = it.GetCostLuma16(rdCur, proba, res);
@ -145,7 +145,7 @@ internal static unsafe class QuantEnc
rdTmp.Nz = (uint)ReconstructIntra4(it, dqm, tmpLevels, src, tmpDst, mode);
// Compute RD-score.
rdTmp.D = LossyUtils.Vp8_Sse4X4(src, tmpDst);
rdTmp.D = LossyUtils.Vp8_Sse4x4(src, tmpDst);
rdTmp.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto4X4(src, tmpDst, WeightY, scratch)) : 0;
rdTmp.H = modeCosts[mode];
@ -235,7 +235,7 @@ internal static unsafe class QuantEnc
rdUv.Nz = (uint)ReconstructUv(it, dqm, rdUv, tmpDst, mode);
// Compute RD-score
rdUv.D = LossyUtils.Vp8_Sse16X8(src, tmpDst);
rdUv.D = LossyUtils.Vp8_Sse16x8(src, tmpDst);
rdUv.SD = 0; // not calling TDisto here: it tends to flatten areas.
rdUv.H = WebpConstants.Vp8FixedCostsUv[mode];
rdUv.R = it.GetCostUv(rdUv, proba, res);
@ -389,7 +389,7 @@ internal static unsafe class QuantEnc
for (mode = 0; mode < WebpConstants.NumPredModes; ++mode)
{
Span<byte> reference = it.YuvP.AsSpan(Vp8Encoding.Vp8I16ModeOffsets[mode]);
long score = (LossyUtils.Vp8_Sse16X16(src, reference) * WebpConstants.RdDistoMult) + (WebpConstants.Vp8FixedCostsI16[mode] * lambdaDi16);
long score = (LossyUtils.Vp8_Sse16x16(src, reference) * WebpConstants.RdDistoMult) + (WebpConstants.Vp8FixedCostsI16[mode] * lambdaDi16);
if (mode > 0 && WebpConstants.Vp8FixedCostsI16[mode] > bitLimit)
{
@ -436,7 +436,7 @@ internal static unsafe class QuantEnc
for (mode = 0; mode < WebpConstants.NumBModes; ++mode)
{
Span<byte> reference = it.YuvP.AsSpan(Vp8Encoding.Vp8I4ModeOffsets[mode]);
long score = (LossyUtils.Vp8_Sse4X4(src, reference) * WebpConstants.RdDistoMult) + (modeCosts[mode] * lambdaDi4);
long score = (LossyUtils.Vp8_Sse4x4(src, reference) * WebpConstants.RdDistoMult) + (modeCosts[mode] * lambdaDi4);
if (score < bestI4Score)
{
bestI4Mode = mode;
@ -485,7 +485,7 @@ internal static unsafe class QuantEnc
for (mode = 0; mode < WebpConstants.NumPredModes; ++mode)
{
Span<byte> reference = it.YuvP.AsSpan(Vp8Encoding.Vp8UvModeOffsets[mode]);
long score = (LossyUtils.Vp8_Sse16X8(src, reference) * WebpConstants.RdDistoMult) + (WebpConstants.Vp8FixedCostsUv[mode] * lambdaDuv);
long score = (LossyUtils.Vp8_Sse16x8(src, reference) * WebpConstants.RdDistoMult) + (WebpConstants.Vp8FixedCostsUv[mode] * lambdaDuv);
if (score < bestUvScore)
{
bestMode = mode;

Loading…
Cancel
Save