From 3a03fad75eaa8464d1bd84cccd307014f9417497 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 7 Nov 2021 14:51:51 +0100 Subject: [PATCH 1/9] Add sse41 version of quantize block --- src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs | 176 ++++++++++++++---- 1 file changed, 144 insertions(+), 32 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs index 2ed4381660..02087ceda4 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs @@ -3,13 +3,17 @@ using System; using System.Runtime.CompilerServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif namespace SixLabors.ImageSharp.Formats.Webp.Lossy { /// /// Quantization methods. /// - internal static class QuantEnc + internal static unsafe class QuantEnc { private static readonly byte[] Zigzag = { 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 }; @@ -17,6 +21,18 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy private const int MaxLevel = 2047; +#if SUPPORTS_RUNTIME_INTRINSICS + private static readonly Vector128 MaxCoeff2047 = Vector128.Create((short)MaxLevel); + + private static readonly Vector128 CstLo = Vector128.Create(0, 1, 2, 3, 8, 9, 254, 255, 10, 11, 4, 5, 6, 7, 12, 13); + + private static readonly Vector128 Cst7 = Vector128.Create(254, 255, 254, 255, 254, 255, 254, 255, 14, 15, 254, 255, 254, 255, 254, 255); + + private static readonly Vector128 CstHi = Vector128.Create(2, 3, 8, 9, 10, 11, 4, 5, 254, 255, 6, 7, 12, 13, 14, 15); + + private static readonly Vector128 Cst8 = Vector128.Create(254, 255, 254, 255, 254, 255, 0, 1, 254, 255, 254, 255, 254, 255, 254, 255); +#endif + // Diffusion weights. We under-correct a bit (15/16th of the error is actually // diffused) to avoid 'rainbow' chessboard pattern of blocks at q~=0. private const int C1 = 7; // fraction of error sent to the 4x4 block below @@ -486,51 +502,147 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy [MethodImpl(InliningOptions.ShortMethod)] public static int Quantize2Blocks(Span input, Span output, Vp8Matrix mtx) { - int nz = QuantizeBlock(input, output, mtx) << 0; - nz |= QuantizeBlock(input.Slice(1 * 16), output.Slice(1 * 16), mtx) << 1; + int nz = QuantizeBlock(input.Slice(0, 16), output.Slice(0, 16), mtx) << 0; + nz |= QuantizeBlock(input.Slice(1 * 16, 16), output.Slice(1 * 16, 16), mtx) << 1; return nz; } public static int QuantizeBlock(Span input, Span output, Vp8Matrix mtx) { - int last = -1; - int n; - for (n = 0; n < 16; ++n) +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse41.IsSupported) { - int j = Zigzag[n]; - bool sign = input[j] < 0; - uint coeff = (uint)((sign ? -input[j] : input[j]) + mtx.Sharpen[j]); - if (coeff > mtx.ZThresh[j]) +#pragma warning disable SA1503 // Braces should not be omitted + fixed (ushort* mtxIqPtr = mtx.IQ) + fixed (ushort* mtxQPtr = mtx.Q) + fixed (uint* biasQPtr = mtx.Bias) + fixed (short* inputPtr = input) + fixed (short* outputPtr = output) { - uint q = mtx.Q[j]; - uint iQ = mtx.IQ[j]; - uint b = mtx.Bias[j]; - int level = QuantDiv(coeff, iQ, b); - if (level > MaxLevel) + // Load all inputs. + Vector128 input0 = Sse2.LoadVector128(inputPtr); + Vector128 input8 = Sse2.LoadVector128(inputPtr + 8); + Vector128 iq0 = Sse2.LoadVector128(mtxIqPtr); + Vector128 iq8 = Sse2.LoadVector128(mtxIqPtr + 8); + Vector128 q0 = Sse2.LoadVector128(mtxQPtr); + Vector128 q8 = Sse2.LoadVector128(mtxQPtr + 8); + + // coeff = abs(in) + Vector128 coeff0 = Ssse3.Abs(input0); + Vector128 coeff8 = Ssse3.Abs(input8); + + // out = (coeff * iQ + B) >> QFIX + // doing calculations with 32b precision (QFIX=17) + // out = (coeff * iQ) + Vector128 coeffiQ0H = Sse2.MultiplyHigh(coeff0, iq0); + Vector128 coeffiQ0L = Sse2.MultiplyLow(coeff0, iq0); + Vector128 coeffiQ8H = Sse2.MultiplyHigh(coeff8, iq8); + Vector128 coeffiQ8L = Sse2.MultiplyLow(coeff8, iq8); + Vector128 out00 = Sse2.UnpackLow(coeffiQ0L, coeffiQ0H); + Vector128 out04 = Sse2.UnpackHigh(coeffiQ0L, coeffiQ0H); + Vector128 out08 = Sse2.UnpackLow(coeffiQ8L, coeffiQ8H); + Vector128 out12 = Sse2.UnpackHigh(coeffiQ8L, coeffiQ8H); + + // out = (coeff * iQ + B) + Vector128 bias00 = Sse2.LoadVector128(biasQPtr); + Vector128 bias04 = Sse2.LoadVector128(biasQPtr + 4); + Vector128 bias08 = Sse2.LoadVector128(biasQPtr + 8); + Vector128 bias12 = Sse2.LoadVector128(biasQPtr + 12); + out00 = Sse2.Add(out00.AsInt32(), bias00.AsInt32()).AsUInt16(); + out04 = Sse2.Add(out04.AsInt32(), bias04.AsInt32()).AsUInt16(); + out08 = Sse2.Add(out08.AsInt32(), bias08.AsInt32()).AsUInt16(); + out12 = Sse2.Add(out12.AsInt32(), bias12.AsInt32()).AsUInt16(); + + // out = QUANTDIV(coeff, iQ, B, QFIX) + out00 = Sse2.ShiftRightArithmetic(out00.AsInt32(), WebpConstants.QFix).AsUInt16(); + out04 = Sse2.ShiftRightArithmetic(out04.AsInt32(), WebpConstants.QFix).AsUInt16(); + out08 = Sse2.ShiftRightArithmetic(out08.AsInt32(), WebpConstants.QFix).AsUInt16(); + out12 = Sse2.ShiftRightArithmetic(out12.AsInt32(), WebpConstants.QFix).AsUInt16(); + + // pack result as 16b + Vector128 out0 = Sse2.PackSignedSaturate(out00.AsInt32(), out04.AsInt32()); + Vector128 out8 = Sse2.PackSignedSaturate(out08.AsInt32(), out12.AsInt32()); + + // if (coeff > 2047) coeff = 2047 + out0 = Sse2.Min(out0, MaxCoeff2047); + out8 = Sse2.Min(out8, MaxCoeff2047); + + // put sign back + out0 = Ssse3.Sign(out0, input0); + out8 = Ssse3.Sign(out8, input8); + + // in = out * Q + input0 = Sse2.MultiplyLow(out0, q0.AsInt16()); + input8 = Sse2.MultiplyLow(out8, q8.AsInt16()); + + // in = out * Q + Sse2.Store(inputPtr, input0); + Sse2.Store(inputPtr + 8, input8); + + // zigzag the output before storing it. The re-ordering is: + // 0 1 2 3 4 5 6 7 | 8 9 10 11 12 13 14 15 + // -> 0 1 4[8]5 2 3 6 | 9 12 13 10 [7]11 14 15 + // There's only two misplaced entries ([8] and [7]) that are crossing the + // reg's boundaries. + // We use pshufb instead of pshuflo/pshufhi. + Vector128 tmpLo = Ssse3.Shuffle(out0.AsByte(), CstLo); + Vector128 tmp7 = Ssse3.Shuffle(out0.AsByte(), Cst7); // extract #7 + Vector128 tmpHi = Ssse3.Shuffle(out8.AsByte(), CstHi); + Vector128 tmp8 = Ssse3.Shuffle(out8.AsByte(), Cst8); // extract #8 + Vector128 outZ0 = Sse2.Or(tmpLo, tmp8); + Vector128 outZ8 = Sse2.Or(tmpHi, tmp7); + Sse2.Store(outputPtr, outZ0.AsInt16()); + Sse2.Store(outputPtr + 8, outZ8.AsInt16()); + Vector128 packedOutput = Sse2.PackSignedSaturate(outZ0.AsInt16(), outZ8.AsInt16()); + + // Detect if all 'out' values are zeroes or not. + Vector128 cmpeq = Sse2.CompareEqual(packedOutput, Vector128.Zero); + return Sse2.MoveMask(cmpeq) != 0xffff ? 1 : 0; + } +#pragma warning restore SA1503 // Braces should not be omitted + } + else +#endif + { + int last = -1; + int n; + for (n = 0; n < 16; ++n) + { + int j = Zigzag[n]; + bool sign = input[j] < 0; + uint coeff = (uint)((sign ? -input[j] : input[j]) + mtx.Sharpen[j]); + if (coeff > mtx.ZThresh[j]) { - level = MaxLevel; - } + uint q = mtx.Q[j]; + uint iQ = mtx.IQ[j]; + uint b = mtx.Bias[j]; + int level = QuantDiv(coeff, iQ, b); + if (level > MaxLevel) + { + level = MaxLevel; + } - if (sign) - { - level = -level; - } + if (sign) + { + level = -level; + } - input[j] = (short)(level * (int)q); - output[n] = (short)level; - if (level != 0) + input[j] = (short)(level * (int)q); + output[n] = (short)level; + if (level != 0) + { + last = n; + } + } + else { - last = n; + output[n] = 0; + input[j] = 0; } } - else - { - output[n] = 0; - input[j] = 0; - } - } - return last >= 0 ? 1 : 0; + return last >= 0 ? 1 : 0; + } } // Quantize as usual, but also compute and return the quantization error. From 020134ad8c15e58621635d4ca4b5fb4c6acdbe89 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 7 Nov 2021 14:52:11 +0100 Subject: [PATCH 2/9] Add QuantizeBlock sse tests --- .../Formats/Webp/Lossy/Vp8Matrix.cs | 9 +++ .../Formats/WebP/QuantEncTests.cs | 56 +++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Matrix.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Matrix.cs index 4276b887f0..e525e388b8 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Matrix.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Matrix.cs @@ -34,6 +34,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy this.Sharpen = new short[16]; } + public Vp8Matrix(ushort[] q, ushort[] iq, uint[] bias, uint[] zThresh, short[] sharpen) + { + this.Q = q; + this.IQ = iq; + this.Bias = bias; + this.ZThresh = zThresh; + this.Sharpen = sharpen; + } + /// /// Gets the quantizer steps. /// diff --git a/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs b/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs new file mode 100644 index 0000000000..280a7902ae --- /dev/null +++ b/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs @@ -0,0 +1,56 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System.Linq; +using SixLabors.ImageSharp.Formats.Webp.Lossy; +using SixLabors.ImageSharp.Tests.TestUtilities; +using Xunit; + +namespace SixLabors.ImageSharp.Tests.Formats.WebP +{ + [Trait("Format", "Webp")] + public class QuantEncTests + { + private static void RunQuantizeBlockTest() + { + // arrange + short[] input = { 378, 777, -851, 888, 259, 148, 0, -111, -185, -185, -74, -37, 148, 74, 111, 74 }; + short[] output = new short[16]; + ushort[] q = { 42, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37 }; + ushort[] iq = { 3120, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542 }; + uint[] bias = + { + 49152, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, + 55296, 55296 + }; + uint[] zthresh = { 26, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21 }; + short[] expectedOutput = { 9, 21, 7, -5, 4, -23, 24, 0, -5, 4, 2, -2, -3, -1, 3, 2 }; + int expectedResult = 1; + var vp8Matrix = new Vp8Matrix(q, iq, bias, zthresh, new short[16]); + + // act + int actualResult = QuantEnc.QuantizeBlock(input, output, vp8Matrix); + + // assert + Assert.True(output.SequenceEqual(expectedOutput)); + Assert.Equal(expectedResult, actualResult); + } + + [Fact] + public void QuantizeBlock_Works() => RunQuantizeBlockTest(); + +#if SUPPORTS_RUNTIME_INTRINSICS + [Fact] + public void QuantizeBlock_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.AllowAll); + + [Fact] + public void QuantizeBlock_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.DisableSSE2); + + [Fact] + public void QuantizeBlock_WithoutSSSE3_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.DisableSSSE3); + + [Fact] + public void QuantizeBlock_WithoutSSE2AndSSSE3_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.DisableSSE2 | HwIntrinsics.DisableSSSE3); +#endif + } +} From a628909b8da58e9dbd10bfa3b70e9c8ce66ddc1d Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 7 Nov 2021 15:02:08 +0100 Subject: [PATCH 3/9] Add coeff = abs(in) + sharpen --- src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs index 02087ceda4..b812909b20 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs @@ -516,6 +516,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy fixed (ushort* mtxIqPtr = mtx.IQ) fixed (ushort* mtxQPtr = mtx.Q) fixed (uint* biasQPtr = mtx.Bias) + fixed (short* sharpenPtr = mtx.Sharpen) fixed (short* inputPtr = input) fixed (short* outputPtr = output) { @@ -531,6 +532,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy Vector128 coeff0 = Ssse3.Abs(input0); Vector128 coeff8 = Ssse3.Abs(input8); + // coeff = abs(in) + sharpen + Vector128 sharpen0 = Sse2.LoadVector128(sharpenPtr); + Vector128 sharpen8 = Sse2.LoadVector128(sharpenPtr + 8); + Sse2.Add(coeff0.AsInt16(), sharpen0); + Sse2.Add(coeff8.AsInt16(), sharpen8); + // out = (coeff * iQ + B) >> QFIX // doing calculations with 32b precision (QFIX=17) // out = (coeff * iQ) From 5c6e08b80c39f3cd4e24774ee66b5b011c41aa00 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Mon, 8 Nov 2021 16:02:06 +0100 Subject: [PATCH 4/9] Avoid pinning of vp8 matrix data --- src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs | 169 +++++++++--------- 1 file changed, 85 insertions(+), 84 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs index f935bd3ee0..b300b7b5c2 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs @@ -3,6 +3,7 @@ using System; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; #if SUPPORTS_RUNTIME_INTRINSICS using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; @@ -537,99 +538,99 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy if (Sse41.IsSupported) { #pragma warning disable SA1503 // Braces should not be omitted - fixed (ushort* mtxIqPtr = mtx.IQ) - fixed (ushort* mtxQPtr = mtx.Q) - fixed (uint* biasQPtr = mtx.Bias) - fixed (short* sharpenPtr = mtx.Sharpen) + // Load all inputs. + Vector128 input0 = Unsafe.As>(ref MemoryMarshal.GetReference(input)); + Vector128 input8 = Unsafe.As>(ref MemoryMarshal.GetReference(input.Slice(8, 8))); + Vector128 iq0 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.IQ.AsSpan(0, 8))); + Vector128 iq8 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.IQ.AsSpan(8, 8))); + Vector128 q0 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Q.AsSpan(0, 8))); + Vector128 q8 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Q.AsSpan(8, 8))); + + // coeff = abs(in) + Vector128 coeff0 = Ssse3.Abs(input0); + Vector128 coeff8 = Ssse3.Abs(input8); + + // coeff = abs(in) + sharpen + Vector128 sharpen0 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Sharpen.AsSpan(0, 8))); + Vector128 sharpen8 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Sharpen.AsSpan(8, 8))); + Sse2.Add(coeff0.AsInt16(), sharpen0); + Sse2.Add(coeff8.AsInt16(), sharpen8); + + // out = (coeff * iQ + B) >> QFIX + // doing calculations with 32b precision (QFIX=17) + // out = (coeff * iQ) + Vector128 coeffiQ0H = Sse2.MultiplyHigh(coeff0, iq0); + Vector128 coeffiQ0L = Sse2.MultiplyLow(coeff0, iq0); + Vector128 coeffiQ8H = Sse2.MultiplyHigh(coeff8, iq8); + Vector128 coeffiQ8L = Sse2.MultiplyLow(coeff8, iq8); + Vector128 out00 = Sse2.UnpackLow(coeffiQ0L, coeffiQ0H); + Vector128 out04 = Sse2.UnpackHigh(coeffiQ0L, coeffiQ0H); + Vector128 out08 = Sse2.UnpackLow(coeffiQ8L, coeffiQ8H); + Vector128 out12 = Sse2.UnpackHigh(coeffiQ8L, coeffiQ8H); + + // out = (coeff * iQ + B) + Vector128 bias00 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Bias.AsSpan(0, 4))); + Vector128 bias04 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Bias.AsSpan(4, 4))); + Vector128 bias08 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Bias.AsSpan(8, 4))); + Vector128 bias12 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Bias.AsSpan(12, 4))); + out00 = Sse2.Add(out00.AsInt32(), bias00.AsInt32()).AsUInt16(); + out04 = Sse2.Add(out04.AsInt32(), bias04.AsInt32()).AsUInt16(); + out08 = Sse2.Add(out08.AsInt32(), bias08.AsInt32()).AsUInt16(); + out12 = Sse2.Add(out12.AsInt32(), bias12.AsInt32()).AsUInt16(); + + // out = QUANTDIV(coeff, iQ, B, QFIX) + out00 = Sse2.ShiftRightArithmetic(out00.AsInt32(), WebpConstants.QFix).AsUInt16(); + out04 = Sse2.ShiftRightArithmetic(out04.AsInt32(), WebpConstants.QFix).AsUInt16(); + out08 = Sse2.ShiftRightArithmetic(out08.AsInt32(), WebpConstants.QFix).AsUInt16(); + out12 = Sse2.ShiftRightArithmetic(out12.AsInt32(), WebpConstants.QFix).AsUInt16(); + + // pack result as 16b + Vector128 out0 = Sse2.PackSignedSaturate(out00.AsInt32(), out04.AsInt32()); + Vector128 out8 = Sse2.PackSignedSaturate(out08.AsInt32(), out12.AsInt32()); + + // if (coeff > 2047) coeff = 2047 + out0 = Sse2.Min(out0, MaxCoeff2047); + out8 = Sse2.Min(out8, MaxCoeff2047); + + // put sign back + out0 = Ssse3.Sign(out0, input0); + out8 = Ssse3.Sign(out8, input8); + + // in = out * Q + input0 = Sse2.MultiplyLow(out0, q0.AsInt16()); + input8 = Sse2.MultiplyLow(out8, q8.AsInt16()); + fixed (short* inputPtr = input) - fixed (short* outputPtr = output) { - // Load all inputs. - Vector128 input0 = Sse2.LoadVector128(inputPtr); - Vector128 input8 = Sse2.LoadVector128(inputPtr + 8); - Vector128 iq0 = Sse2.LoadVector128(mtxIqPtr); - Vector128 iq8 = Sse2.LoadVector128(mtxIqPtr + 8); - Vector128 q0 = Sse2.LoadVector128(mtxQPtr); - Vector128 q8 = Sse2.LoadVector128(mtxQPtr + 8); - - // coeff = abs(in) - Vector128 coeff0 = Ssse3.Abs(input0); - Vector128 coeff8 = Ssse3.Abs(input8); - - // coeff = abs(in) + sharpen - Vector128 sharpen0 = Sse2.LoadVector128(sharpenPtr); - Vector128 sharpen8 = Sse2.LoadVector128(sharpenPtr + 8); - Sse2.Add(coeff0.AsInt16(), sharpen0); - Sse2.Add(coeff8.AsInt16(), sharpen8); - - // out = (coeff * iQ + B) >> QFIX - // doing calculations with 32b precision (QFIX=17) - // out = (coeff * iQ) - Vector128 coeffiQ0H = Sse2.MultiplyHigh(coeff0, iq0); - Vector128 coeffiQ0L = Sse2.MultiplyLow(coeff0, iq0); - Vector128 coeffiQ8H = Sse2.MultiplyHigh(coeff8, iq8); - Vector128 coeffiQ8L = Sse2.MultiplyLow(coeff8, iq8); - Vector128 out00 = Sse2.UnpackLow(coeffiQ0L, coeffiQ0H); - Vector128 out04 = Sse2.UnpackHigh(coeffiQ0L, coeffiQ0H); - Vector128 out08 = Sse2.UnpackLow(coeffiQ8L, coeffiQ8H); - Vector128 out12 = Sse2.UnpackHigh(coeffiQ8L, coeffiQ8H); - - // out = (coeff * iQ + B) - Vector128 bias00 = Sse2.LoadVector128(biasQPtr); - Vector128 bias04 = Sse2.LoadVector128(biasQPtr + 4); - Vector128 bias08 = Sse2.LoadVector128(biasQPtr + 8); - Vector128 bias12 = Sse2.LoadVector128(biasQPtr + 12); - out00 = Sse2.Add(out00.AsInt32(), bias00.AsInt32()).AsUInt16(); - out04 = Sse2.Add(out04.AsInt32(), bias04.AsInt32()).AsUInt16(); - out08 = Sse2.Add(out08.AsInt32(), bias08.AsInt32()).AsUInt16(); - out12 = Sse2.Add(out12.AsInt32(), bias12.AsInt32()).AsUInt16(); - - // out = QUANTDIV(coeff, iQ, B, QFIX) - out00 = Sse2.ShiftRightArithmetic(out00.AsInt32(), WebpConstants.QFix).AsUInt16(); - out04 = Sse2.ShiftRightArithmetic(out04.AsInt32(), WebpConstants.QFix).AsUInt16(); - out08 = Sse2.ShiftRightArithmetic(out08.AsInt32(), WebpConstants.QFix).AsUInt16(); - out12 = Sse2.ShiftRightArithmetic(out12.AsInt32(), WebpConstants.QFix).AsUInt16(); - - // pack result as 16b - Vector128 out0 = Sse2.PackSignedSaturate(out00.AsInt32(), out04.AsInt32()); - Vector128 out8 = Sse2.PackSignedSaturate(out08.AsInt32(), out12.AsInt32()); - - // if (coeff > 2047) coeff = 2047 - out0 = Sse2.Min(out0, MaxCoeff2047); - out8 = Sse2.Min(out8, MaxCoeff2047); - - // put sign back - out0 = Ssse3.Sign(out0, input0); - out8 = Ssse3.Sign(out8, input8); - - // in = out * Q - input0 = Sse2.MultiplyLow(out0, q0.AsInt16()); - input8 = Sse2.MultiplyLow(out8, q8.AsInt16()); - // in = out * Q Sse2.Store(inputPtr, input0); Sse2.Store(inputPtr + 8, input8); + } - // zigzag the output before storing it. The re-ordering is: - // 0 1 2 3 4 5 6 7 | 8 9 10 11 12 13 14 15 - // -> 0 1 4[8]5 2 3 6 | 9 12 13 10 [7]11 14 15 - // There's only two misplaced entries ([8] and [7]) that are crossing the - // reg's boundaries. - // We use pshufb instead of pshuflo/pshufhi. - Vector128 tmpLo = Ssse3.Shuffle(out0.AsByte(), CstLo); - Vector128 tmp7 = Ssse3.Shuffle(out0.AsByte(), Cst7); // extract #7 - Vector128 tmpHi = Ssse3.Shuffle(out8.AsByte(), CstHi); - Vector128 tmp8 = Ssse3.Shuffle(out8.AsByte(), Cst8); // extract #8 - Vector128 outZ0 = Sse2.Or(tmpLo, tmp8); - Vector128 outZ8 = Sse2.Or(tmpHi, tmp7); + // zigzag the output before storing it. The re-ordering is: + // 0 1 2 3 4 5 6 7 | 8 9 10 11 12 13 14 15 + // -> 0 1 4[8]5 2 3 6 | 9 12 13 10 [7]11 14 15 + // There's only two misplaced entries ([8] and [7]) that are crossing the + // reg's boundaries. + // We use pshufb instead of pshuflo/pshufhi. + Vector128 tmpLo = Ssse3.Shuffle(out0.AsByte(), CstLo); + Vector128 tmp7 = Ssse3.Shuffle(out0.AsByte(), Cst7); // extract #7 + Vector128 tmpHi = Ssse3.Shuffle(out8.AsByte(), CstHi); + Vector128 tmp8 = Ssse3.Shuffle(out8.AsByte(), Cst8); // extract #8 + Vector128 outZ0 = Sse2.Or(tmpLo, tmp8); + Vector128 outZ8 = Sse2.Or(tmpHi, tmp7); + + fixed (short* outputPtr = output) + { Sse2.Store(outputPtr, outZ0.AsInt16()); Sse2.Store(outputPtr + 8, outZ8.AsInt16()); - Vector128 packedOutput = Sse2.PackSignedSaturate(outZ0.AsInt16(), outZ8.AsInt16()); - - // Detect if all 'out' values are zeroes or not. - Vector128 cmpeq = Sse2.CompareEqual(packedOutput, Vector128.Zero); - return Sse2.MoveMask(cmpeq) != 0xffff ? 1 : 0; } + + Vector128 packedOutput = Sse2.PackSignedSaturate(outZ0.AsInt16(), outZ8.AsInt16()); + + // Detect if all 'out' values are zeroes or not. + Vector128 cmpeq = Sse2.CompareEqual(packedOutput, Vector128.Zero); + return Sse2.MoveMask(cmpeq) != 0xffff ? 1 : 0; #pragma warning restore SA1503 // Braces should not be omitted } else From 0c0812de82648be40a35dc63a9b6c914bdcbbbf7 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Mon, 8 Nov 2021 16:58:40 +0100 Subject: [PATCH 5/9] Avoid pinning input and output data --- src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs index b300b7b5c2..6e25dc003c 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs @@ -600,12 +600,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy input0 = Sse2.MultiplyLow(out0, q0.AsInt16()); input8 = Sse2.MultiplyLow(out8, q8.AsInt16()); - fixed (short* inputPtr = input) - { - // in = out * Q - Sse2.Store(inputPtr, input0); - Sse2.Store(inputPtr + 8, input8); - } + // in = out * Q + ref short inputRef = ref MemoryMarshal.GetReference(input); + Unsafe.As>(ref inputRef) = input0; + Unsafe.As>(ref Unsafe.Add(ref inputRef, 8)) = input8; // zigzag the output before storing it. The re-ordering is: // 0 1 2 3 4 5 6 7 | 8 9 10 11 12 13 14 15 @@ -620,11 +618,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy Vector128 outZ0 = Sse2.Or(tmpLo, tmp8); Vector128 outZ8 = Sse2.Or(tmpHi, tmp7); - fixed (short* outputPtr = output) - { - Sse2.Store(outputPtr, outZ0.AsInt16()); - Sse2.Store(outputPtr + 8, outZ8.AsInt16()); - } + ref short outputRef = ref MemoryMarshal.GetReference(output); + Unsafe.As>(ref outputRef) = outZ0.AsInt16(); + Unsafe.As>(ref Unsafe.Add(ref outputRef, 8)) = outZ8.AsInt16(); Vector128 packedOutput = Sse2.PackSignedSaturate(outZ0.AsInt16(), outZ8.AsInt16()); From cffa4b0c366a3d80b7e5c315127ae0a27f1ddb8d Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Mon, 8 Nov 2021 17:00:18 +0100 Subject: [PATCH 6/9] Only test with and without HardwareIntrinsics --- tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs b/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs index 280a7902ae..d0cdfc1ded 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs @@ -44,13 +44,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.WebP public void QuantizeBlock_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.AllowAll); [Fact] - public void QuantizeBlock_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.DisableSSE2); - - [Fact] - public void QuantizeBlock_WithoutSSSE3_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.DisableSSSE3); - - [Fact] - public void QuantizeBlock_WithoutSSE2AndSSSE3_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.DisableSSE2 | HwIntrinsics.DisableSSSE3); + public void QuantizeBlock_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.DisableHWIntrinsic); #endif } } From cb513a905c52e843440f14c70e40fe9192737e91 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 11:05:18 +0100 Subject: [PATCH 7/9] Use fixed sized arrays in Vp8Matrix --- src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs | 20 ++++---- .../Formats/Webp/Lossy/Vp8Encoder.cs | 8 +--- .../Formats/Webp/Lossy/Vp8Matrix.cs | 47 +++++-------------- .../Formats/Webp/Lossy/Vp8SegmentInfo.cs | 12 ++--- .../Formats/WebP/QuantEncTests.cs | 17 ++++--- 5 files changed, 41 insertions(+), 63 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs index 6e25dc003c..4c3a2ff5e3 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs @@ -541,18 +541,18 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy // Load all inputs. Vector128 input0 = Unsafe.As>(ref MemoryMarshal.GetReference(input)); Vector128 input8 = Unsafe.As>(ref MemoryMarshal.GetReference(input.Slice(8, 8))); - Vector128 iq0 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.IQ.AsSpan(0, 8))); - Vector128 iq8 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.IQ.AsSpan(8, 8))); - Vector128 q0 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Q.AsSpan(0, 8))); - Vector128 q8 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Q.AsSpan(8, 8))); + Vector128 iq0 = Unsafe.As>(ref mtx.IQ[0]); + Vector128 iq8 = Unsafe.As>(ref mtx.IQ[8]); + Vector128 q0 = Unsafe.As>(ref mtx.Q[0]); + Vector128 q8 = Unsafe.As>(ref mtx.Q[8]); // coeff = abs(in) Vector128 coeff0 = Ssse3.Abs(input0); Vector128 coeff8 = Ssse3.Abs(input8); // coeff = abs(in) + sharpen - Vector128 sharpen0 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Sharpen.AsSpan(0, 8))); - Vector128 sharpen8 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Sharpen.AsSpan(8, 8))); + Vector128 sharpen0 = Unsafe.As>(ref mtx.Sharpen[0]); + Vector128 sharpen8 = Unsafe.As>(ref mtx.Sharpen[8]); Sse2.Add(coeff0.AsInt16(), sharpen0); Sse2.Add(coeff8.AsInt16(), sharpen8); @@ -569,10 +569,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy Vector128 out12 = Sse2.UnpackHigh(coeffiQ8L, coeffiQ8H); // out = (coeff * iQ + B) - Vector128 bias00 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Bias.AsSpan(0, 4))); - Vector128 bias04 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Bias.AsSpan(4, 4))); - Vector128 bias08 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Bias.AsSpan(8, 4))); - Vector128 bias12 = Unsafe.As>(ref MemoryMarshal.GetReference(mtx.Bias.AsSpan(12, 4))); + Vector128 bias00 = Unsafe.As>(ref mtx.Bias[0]); + Vector128 bias04 = Unsafe.As>(ref mtx.Bias[4]); + Vector128 bias08 = Unsafe.As>(ref mtx.Bias[8]); + Vector128 bias12 = Unsafe.As>(ref mtx.Bias[12]); out00 = Sse2.Add(out00.AsInt32(), bias00.AsInt32()).AsUInt16(); out04 = Sse2.Add(out04.AsInt32(), bias04.AsInt32()).AsUInt16(); out08 = Sse2.Add(out08.AsInt32(), bias08.AsInt32()).AsUInt16(); diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs index 728574682f..8a4115d216 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs @@ -502,7 +502,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy this.ResetStats(); } - private void AdjustFilterStrength() + private unsafe void AdjustFilterStrength() { if (this.filterStrength > 0) { @@ -806,7 +806,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy proba.NbSkip = 0; } - private void SetupMatrices(Vp8SegmentInfo[] dqm) + private unsafe void SetupMatrices(Vp8SegmentInfo[] dqm) { int tlambdaScale = this.method >= WebpEncodingMethod.Default ? this.spatialNoiseShaping : 0; for (int i = 0; i < dqm.Length; i++) @@ -814,10 +814,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy Vp8SegmentInfo m = dqm[i]; int q = m.Quant; - m.Y1 = new Vp8Matrix(); - m.Y2 = new Vp8Matrix(); - m.Uv = new Vp8Matrix(); - m.Y1.Q[0] = WebpLookupTables.DcTable[Numerics.Clamp(q + this.DqY1Dc, 0, 127)]; m.Y1.Q[1] = WebpLookupTables.AcTable[Numerics.Clamp(q, 0, 127)]; diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Matrix.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Matrix.cs index e525e388b8..66c91e44ad 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Matrix.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Matrix.cs @@ -3,7 +3,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy { - internal class Vp8Matrix + internal unsafe struct Vp8Matrix { private static readonly int[][] BiasMatrices = { @@ -23,50 +23,29 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy private const int SharpenBits = 11; /// - /// Initializes a new instance of the class. + /// The quantizer steps. /// - public Vp8Matrix() - { - this.Q = new ushort[16]; - this.IQ = new ushort[16]; - this.Bias = new uint[16]; - this.ZThresh = new uint[16]; - this.Sharpen = new short[16]; - } - - public Vp8Matrix(ushort[] q, ushort[] iq, uint[] bias, uint[] zThresh, short[] sharpen) - { - this.Q = q; - this.IQ = iq; - this.Bias = bias; - this.ZThresh = zThresh; - this.Sharpen = sharpen; - } - - /// - /// Gets the quantizer steps. - /// - public ushort[] Q { get; } + public fixed ushort Q[16]; /// - /// Gets the reciprocals, fixed point. + /// The reciprocals, fixed point. /// - public ushort[] IQ { get; } + public fixed ushort IQ[16]; /// - /// Gets the rounding bias. + /// The rounding bias. /// - public uint[] Bias { get; } + public fixed uint Bias[16]; /// - /// Gets the value below which a coefficient is zeroed. + /// The value below which a coefficient is zeroed. /// - public uint[] ZThresh { get; } + public fixed uint ZThresh[16]; /// - /// Gets the frequency boosters for slight sharpening. + /// The frequency boosters for slight sharpening. /// - public short[] Sharpen { get; } + public fixed short Sharpen[16]; /// /// Returns the average quantizer. @@ -81,7 +60,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy int isAcCoeff = i > 0 ? 1 : 0; int bias = BiasMatrices[type][isAcCoeff]; this.IQ[i] = (ushort)((1 << WebpConstants.QFix) / this.Q[i]); - this.Bias[i] = (uint)this.BIAS(bias); + this.Bias[i] = (uint)BIAS(bias); // zthresh is the exact value such that QUANTDIV(coeff, iQ, B) is: // * zero if coeff <= zthresh @@ -115,6 +94,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy return (sum + 8) >> 4; } - private int BIAS(int b) => b << (WebpConstants.QFix - 8); + private static int BIAS(int b) => b << (WebpConstants.QFix - 8); } } diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs index cf2a5c1775..71983055c0 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs @@ -8,19 +8,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy internal class Vp8SegmentInfo { /// - /// Gets or sets the quantization matrix y1. + /// Gets the quantization matrix y1. /// - public Vp8Matrix Y1 { get; set; } + public Vp8Matrix Y1; /// - /// Gets or sets the quantization matrix y2. + /// Gets the quantization matrix y2. /// - public Vp8Matrix Y2 { get; set; } + public Vp8Matrix Y2; /// - /// Gets or sets the quantization matrix uv. + /// Gets the quantization matrix uv. /// - public Vp8Matrix Uv { get; set; } + public Vp8Matrix Uv; /// /// Gets or sets the quant-susceptibility, range [-127,127]. Zero is neutral. Lower values indicate a lower risk of blurriness. diff --git a/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs b/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs index d0cdfc1ded..7465c42cef 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs @@ -11,22 +11,25 @@ namespace SixLabors.ImageSharp.Tests.Formats.WebP [Trait("Format", "Webp")] public class QuantEncTests { - private static void RunQuantizeBlockTest() + private static unsafe void RunQuantizeBlockTest() { // arrange short[] input = { 378, 777, -851, 888, 259, 148, 0, -111, -185, -185, -74, -37, 148, 74, 111, 74 }; short[] output = new short[16]; ushort[] q = { 42, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37 }; ushort[] iq = { 3120, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542 }; - uint[] bias = - { - 49152, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, - 55296, 55296 - }; + uint[] bias = { 49152, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296 }; uint[] zthresh = { 26, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21 }; short[] expectedOutput = { 9, 21, 7, -5, 4, -23, 24, 0, -5, 4, 2, -2, -3, -1, 3, 2 }; int expectedResult = 1; - var vp8Matrix = new Vp8Matrix(q, iq, bias, zthresh, new short[16]); + Vp8Matrix vp8Matrix = default; + for (int i = 0; i < 16; i++) + { + vp8Matrix.Q[i] = q[i]; + vp8Matrix.IQ[i] = iq[i]; + vp8Matrix.Bias[i] = bias[i]; + vp8Matrix.ZThresh[i] = zthresh[i]; + } // act int actualResult = QuantEnc.QuantizeBlock(input, output, vp8Matrix); From 42c2cf7a799af7c5a6b504ec6233fc6a7308c030 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 13:40:40 +0100 Subject: [PATCH 8/9] Disable SA1401 in file: Fields should be private --- src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs index 71983055c0..2ce383d9e1 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs @@ -10,6 +10,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy /// /// Gets the quantization matrix y1. /// +#pragma warning disable SA1401 // Fields should be private public Vp8Matrix Y1; /// @@ -21,6 +22,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy /// Gets the quantization matrix uv. /// public Vp8Matrix Uv; +#pragma warning restore SA1401 // Fields should be private /// /// Gets or sets the quant-susceptibility, range [-127,127]. Zero is neutral. Lower values indicate a lower risk of blurriness. From 8160a0eeb6a7bb5e8dc65ca1827a754d5a0e1e81 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 9 Nov 2021 13:40:54 +0100 Subject: [PATCH 9/9] Pass Vp8Matrix as ref --- src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs | 34 +++++++++---------- .../Formats/WebP/QuantEncTests.cs | 2 +- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs index 4c3a2ff5e3..97ef27d259 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs @@ -315,14 +315,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy } Vp8Encoding.FTransformWht(tmp, dcTmp, scratch); - nz |= QuantizeBlock(dcTmp, rd.YDcLevels, dqm.Y2) << 24; + nz |= QuantizeBlock(dcTmp, rd.YDcLevels, ref dqm.Y2) << 24; for (n = 0; n < 16; n += 2) { // Zero-out the first coeff, so that: a) nz is correct below, and // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified. tmp[n * 16] = tmp[(n + 1) * 16] = 0; - nz |= Quantize2Blocks(tmp.Slice(n * 16, 32), rd.YAcLevels.AsSpan(n * 16, 32), dqm.Y1) << n; + nz |= Quantize2Blocks(tmp.Slice(n * 16, 32), rd.YAcLevels.AsSpan(n * 16, 32), ref dqm.Y1) << n; } // Transform back. @@ -343,7 +343,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy tmp.Clear(); scratch.Clear(); Vp8Encoding.FTransform(src, reference, tmp, scratch); - int nz = QuantizeBlock(tmp, levels, dqm.Y1); + int nz = QuantizeBlock(tmp, levels, ref dqm.Y1); Vp8Encoding.ITransform(reference, tmp, yuvOut, false, scratch); return nz; @@ -370,11 +370,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy scratch); } - CorrectDcValues(it, dqm.Uv, tmp, rd); + CorrectDcValues(it, ref dqm.Uv, tmp, rd); for (n = 0; n < 8; n += 2) { - nz |= Quantize2Blocks(tmp.Slice(n * 16, 32), rd.UvLevels.AsSpan(n * 16, 32), dqm.Uv) << n; + nz |= Quantize2Blocks(tmp.Slice(n * 16, 32), rd.UvLevels.AsSpan(n * 16, 32), ref dqm.Uv) << n; } for (n = 0; n < 8; n += 2) @@ -525,19 +525,18 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy } [MethodImpl(InliningOptions.ShortMethod)] - public static int Quantize2Blocks(Span input, Span output, Vp8Matrix mtx) + public static int Quantize2Blocks(Span input, Span output, ref Vp8Matrix mtx) { - int nz = QuantizeBlock(input.Slice(0, 16), output.Slice(0, 16), mtx) << 0; - nz |= QuantizeBlock(input.Slice(1 * 16, 16), output.Slice(1 * 16, 16), mtx) << 1; + int nz = QuantizeBlock(input.Slice(0, 16), output.Slice(0, 16), ref mtx) << 0; + nz |= QuantizeBlock(input.Slice(1 * 16, 16), output.Slice(1 * 16, 16), ref mtx) << 1; return nz; } - public static int QuantizeBlock(Span input, Span output, Vp8Matrix mtx) + public static int QuantizeBlock(Span input, Span output, ref Vp8Matrix mtx) { #if SUPPORTS_RUNTIME_INTRINSICS if (Sse41.IsSupported) { -#pragma warning disable SA1503 // Braces should not be omitted // Load all inputs. Vector128 input0 = Unsafe.As>(ref MemoryMarshal.GetReference(input)); Vector128 input8 = Unsafe.As>(ref MemoryMarshal.GetReference(input.Slice(8, 8))); @@ -624,10 +623,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy Vector128 packedOutput = Sse2.PackSignedSaturate(outZ0.AsInt16(), outZ8.AsInt16()); - // Detect if all 'out' values are zeroes or not. + // Detect if all 'out' values are zeros or not. Vector128 cmpeq = Sse2.CompareEqual(packedOutput, Vector128.Zero); return Sse2.MoveMask(cmpeq) != 0xffff ? 1 : 0; -#pragma warning restore SA1503 // Braces should not be omitted } else #endif @@ -675,7 +673,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy // Quantize as usual, but also compute and return the quantization error. // Error is already divided by DSHIFT. - public static int QuantizeSingle(Span v, Vp8Matrix mtx) + public static int QuantizeSingle(Span v, ref Vp8Matrix mtx) { int v0 = v[0]; bool sign = v0 < 0; @@ -696,7 +694,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy return (sign ? -v0 : v0) >> DSCALE; } - public static void CorrectDcValues(Vp8EncIterator it, Vp8Matrix mtx, Span tmp, Vp8ModeScore rd) + public static void CorrectDcValues(Vp8EncIterator it, ref Vp8Matrix mtx, Span tmp, Vp8ModeScore rd) { #pragma warning disable SA1005 // Single line comments should begin with single space // | top[0] | top[1] @@ -713,13 +711,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy Span left = it.LeftDerr.AsSpan(ch, 2); Span c = tmp.Slice(ch * 4 * 16, 4 * 16); c[0] += (short)(((C1 * top[0]) + (C2 * left[0])) >> (DSHIFT - DSCALE)); - int err0 = QuantizeSingle(c, mtx); + int err0 = QuantizeSingle(c, ref mtx); c[1 * 16] += (short)(((C1 * top[1]) + (C2 * err0)) >> (DSHIFT - DSCALE)); - int err1 = QuantizeSingle(c.Slice(1 * 16), mtx); + int err1 = QuantizeSingle(c.Slice(1 * 16), ref mtx); c[2 * 16] += (short)(((C1 * err0) + (C2 * left[1])) >> (DSHIFT - DSCALE)); - int err2 = QuantizeSingle(c.Slice(2 * 16), mtx); + int err2 = QuantizeSingle(c.Slice(2 * 16), ref mtx); c[3 * 16] += (short)(((C1 * err1) + (C2 * err2)) >> (DSHIFT - DSCALE)); - int err3 = QuantizeSingle(c.Slice(3 * 16), mtx); + int err3 = QuantizeSingle(c.Slice(3 * 16), ref mtx); rd.Derr[ch, 0] = err1; rd.Derr[ch, 1] = err2; diff --git a/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs b/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs index 7465c42cef..55738199b7 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs @@ -32,7 +32,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.WebP } // act - int actualResult = QuantEnc.QuantizeBlock(input, output, vp8Matrix); + int actualResult = QuantEnc.QuantizeBlock(input, output, ref vp8Matrix); // assert Assert.True(output.SequenceEqual(expectedOutput));