diff --git a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs index 18d7494f0f..97ef27d259 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs @@ -3,13 +3,18 @@ using System; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif namespace SixLabors.ImageSharp.Formats.Webp.Lossy { /// /// Quantization methods. /// - internal static class QuantEnc + internal static unsafe class QuantEnc { private static readonly byte[] Zigzag = { 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 }; @@ -17,6 +22,18 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy private const int MaxLevel = 2047; +#if SUPPORTS_RUNTIME_INTRINSICS + private static readonly Vector128 MaxCoeff2047 = Vector128.Create((short)MaxLevel); + + private static readonly Vector128 CstLo = Vector128.Create(0, 1, 2, 3, 8, 9, 254, 255, 10, 11, 4, 5, 6, 7, 12, 13); + + private static readonly Vector128 Cst7 = Vector128.Create(254, 255, 254, 255, 254, 255, 254, 255, 14, 15, 254, 255, 254, 255, 254, 255); + + private static readonly Vector128 CstHi = Vector128.Create(2, 3, 8, 9, 10, 11, 4, 5, 254, 255, 6, 7, 12, 13, 14, 15); + + private static readonly Vector128 Cst8 = Vector128.Create(254, 255, 254, 255, 254, 255, 0, 1, 254, 255, 254, 255, 254, 255, 254, 255); +#endif + // Diffusion weights. We under-correct a bit (15/16th of the error is actually // diffused) to avoid 'rainbow' chessboard pattern of blocks at q~=0. private const int C1 = 7; // fraction of error sent to the 4x4 block below @@ -298,14 +315,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy } Vp8Encoding.FTransformWht(tmp, dcTmp, scratch); - nz |= QuantizeBlock(dcTmp, rd.YDcLevels, dqm.Y2) << 24; + nz |= QuantizeBlock(dcTmp, rd.YDcLevels, ref dqm.Y2) << 24; for (n = 0; n < 16; n += 2) { // Zero-out the first coeff, so that: a) nz is correct below, and // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified. tmp[n * 16] = tmp[(n + 1) * 16] = 0; - nz |= Quantize2Blocks(tmp.Slice(n * 16, 32), rd.YAcLevels.AsSpan(n * 16, 32), dqm.Y1) << n; + nz |= Quantize2Blocks(tmp.Slice(n * 16, 32), rd.YAcLevels.AsSpan(n * 16, 32), ref dqm.Y1) << n; } // Transform back. @@ -326,7 +343,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy tmp.Clear(); scratch.Clear(); Vp8Encoding.FTransform(src, reference, tmp, scratch); - int nz = QuantizeBlock(tmp, levels, dqm.Y1); + int nz = QuantizeBlock(tmp, levels, ref dqm.Y1); Vp8Encoding.ITransform(reference, tmp, yuvOut, false, scratch); return nz; @@ -353,11 +370,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy scratch); } - CorrectDcValues(it, dqm.Uv, tmp, rd); + CorrectDcValues(it, ref dqm.Uv, tmp, rd); for (n = 0; n < 8; n += 2) { - nz |= Quantize2Blocks(tmp.Slice(n * 16, 32), rd.UvLevels.AsSpan(n * 16, 32), dqm.Uv) << n; + nz |= Quantize2Blocks(tmp.Slice(n * 16, 32), rd.UvLevels.AsSpan(n * 16, 32), ref dqm.Uv) << n; } for (n = 0; n < 8; n += 2) @@ -508,58 +525,155 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy } [MethodImpl(InliningOptions.ShortMethod)] - public static int Quantize2Blocks(Span input, Span output, Vp8Matrix mtx) + public static int Quantize2Blocks(Span input, Span output, ref Vp8Matrix mtx) { - int nz = QuantizeBlock(input, output, mtx) << 0; - nz |= QuantizeBlock(input.Slice(1 * 16), output.Slice(1 * 16), mtx) << 1; + int nz = QuantizeBlock(input.Slice(0, 16), output.Slice(0, 16), ref mtx) << 0; + nz |= QuantizeBlock(input.Slice(1 * 16, 16), output.Slice(1 * 16, 16), ref mtx) << 1; return nz; } - public static int QuantizeBlock(Span input, Span output, Vp8Matrix mtx) + public static int QuantizeBlock(Span input, Span output, ref Vp8Matrix mtx) { - int last = -1; - int n; - for (n = 0; n < 16; ++n) +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse41.IsSupported) { - int j = Zigzag[n]; - bool sign = input[j] < 0; - uint coeff = (uint)((sign ? -input[j] : input[j]) + mtx.Sharpen[j]); - if (coeff > mtx.ZThresh[j]) + // Load all inputs. + Vector128 input0 = Unsafe.As>(ref MemoryMarshal.GetReference(input)); + Vector128 input8 = Unsafe.As>(ref MemoryMarshal.GetReference(input.Slice(8, 8))); + Vector128 iq0 = Unsafe.As>(ref mtx.IQ[0]); + Vector128 iq8 = Unsafe.As>(ref mtx.IQ[8]); + Vector128 q0 = Unsafe.As>(ref mtx.Q[0]); + Vector128 q8 = Unsafe.As>(ref mtx.Q[8]); + + // coeff = abs(in) + Vector128 coeff0 = Ssse3.Abs(input0); + Vector128 coeff8 = Ssse3.Abs(input8); + + // coeff = abs(in) + sharpen + Vector128 sharpen0 = Unsafe.As>(ref mtx.Sharpen[0]); + Vector128 sharpen8 = Unsafe.As>(ref mtx.Sharpen[8]); + Sse2.Add(coeff0.AsInt16(), sharpen0); + Sse2.Add(coeff8.AsInt16(), sharpen8); + + // out = (coeff * iQ + B) >> QFIX + // doing calculations with 32b precision (QFIX=17) + // out = (coeff * iQ) + Vector128 coeffiQ0H = Sse2.MultiplyHigh(coeff0, iq0); + Vector128 coeffiQ0L = Sse2.MultiplyLow(coeff0, iq0); + Vector128 coeffiQ8H = Sse2.MultiplyHigh(coeff8, iq8); + Vector128 coeffiQ8L = Sse2.MultiplyLow(coeff8, iq8); + Vector128 out00 = Sse2.UnpackLow(coeffiQ0L, coeffiQ0H); + Vector128 out04 = Sse2.UnpackHigh(coeffiQ0L, coeffiQ0H); + Vector128 out08 = Sse2.UnpackLow(coeffiQ8L, coeffiQ8H); + Vector128 out12 = Sse2.UnpackHigh(coeffiQ8L, coeffiQ8H); + + // out = (coeff * iQ + B) + Vector128 bias00 = Unsafe.As>(ref mtx.Bias[0]); + Vector128 bias04 = Unsafe.As>(ref mtx.Bias[4]); + Vector128 bias08 = Unsafe.As>(ref mtx.Bias[8]); + Vector128 bias12 = Unsafe.As>(ref mtx.Bias[12]); + out00 = Sse2.Add(out00.AsInt32(), bias00.AsInt32()).AsUInt16(); + out04 = Sse2.Add(out04.AsInt32(), bias04.AsInt32()).AsUInt16(); + out08 = Sse2.Add(out08.AsInt32(), bias08.AsInt32()).AsUInt16(); + out12 = Sse2.Add(out12.AsInt32(), bias12.AsInt32()).AsUInt16(); + + // out = QUANTDIV(coeff, iQ, B, QFIX) + out00 = Sse2.ShiftRightArithmetic(out00.AsInt32(), WebpConstants.QFix).AsUInt16(); + out04 = Sse2.ShiftRightArithmetic(out04.AsInt32(), WebpConstants.QFix).AsUInt16(); + out08 = Sse2.ShiftRightArithmetic(out08.AsInt32(), WebpConstants.QFix).AsUInt16(); + out12 = Sse2.ShiftRightArithmetic(out12.AsInt32(), WebpConstants.QFix).AsUInt16(); + + // pack result as 16b + Vector128 out0 = Sse2.PackSignedSaturate(out00.AsInt32(), out04.AsInt32()); + Vector128 out8 = Sse2.PackSignedSaturate(out08.AsInt32(), out12.AsInt32()); + + // if (coeff > 2047) coeff = 2047 + out0 = Sse2.Min(out0, MaxCoeff2047); + out8 = Sse2.Min(out8, MaxCoeff2047); + + // put sign back + out0 = Ssse3.Sign(out0, input0); + out8 = Ssse3.Sign(out8, input8); + + // in = out * Q + input0 = Sse2.MultiplyLow(out0, q0.AsInt16()); + input8 = Sse2.MultiplyLow(out8, q8.AsInt16()); + + // in = out * Q + ref short inputRef = ref MemoryMarshal.GetReference(input); + Unsafe.As>(ref inputRef) = input0; + Unsafe.As>(ref Unsafe.Add(ref inputRef, 8)) = input8; + + // zigzag the output before storing it. The re-ordering is: + // 0 1 2 3 4 5 6 7 | 8 9 10 11 12 13 14 15 + // -> 0 1 4[8]5 2 3 6 | 9 12 13 10 [7]11 14 15 + // There's only two misplaced entries ([8] and [7]) that are crossing the + // reg's boundaries. + // We use pshufb instead of pshuflo/pshufhi. + Vector128 tmpLo = Ssse3.Shuffle(out0.AsByte(), CstLo); + Vector128 tmp7 = Ssse3.Shuffle(out0.AsByte(), Cst7); // extract #7 + Vector128 tmpHi = Ssse3.Shuffle(out8.AsByte(), CstHi); + Vector128 tmp8 = Ssse3.Shuffle(out8.AsByte(), Cst8); // extract #8 + Vector128 outZ0 = Sse2.Or(tmpLo, tmp8); + Vector128 outZ8 = Sse2.Or(tmpHi, tmp7); + + ref short outputRef = ref MemoryMarshal.GetReference(output); + Unsafe.As>(ref outputRef) = outZ0.AsInt16(); + Unsafe.As>(ref Unsafe.Add(ref outputRef, 8)) = outZ8.AsInt16(); + + Vector128 packedOutput = Sse2.PackSignedSaturate(outZ0.AsInt16(), outZ8.AsInt16()); + + // Detect if all 'out' values are zeros or not. + Vector128 cmpeq = Sse2.CompareEqual(packedOutput, Vector128.Zero); + return Sse2.MoveMask(cmpeq) != 0xffff ? 1 : 0; + } + else +#endif + { + int last = -1; + int n; + for (n = 0; n < 16; ++n) { - uint q = mtx.Q[j]; - uint iQ = mtx.IQ[j]; - uint b = mtx.Bias[j]; - int level = QuantDiv(coeff, iQ, b); - if (level > MaxLevel) + int j = Zigzag[n]; + bool sign = input[j] < 0; + uint coeff = (uint)((sign ? -input[j] : input[j]) + mtx.Sharpen[j]); + if (coeff > mtx.ZThresh[j]) { - level = MaxLevel; - } + uint q = mtx.Q[j]; + uint iQ = mtx.IQ[j]; + uint b = mtx.Bias[j]; + int level = QuantDiv(coeff, iQ, b); + if (level > MaxLevel) + { + level = MaxLevel; + } - if (sign) - { - level = -level; - } + if (sign) + { + level = -level; + } - input[j] = (short)(level * (int)q); - output[n] = (short)level; - if (level != 0) + input[j] = (short)(level * (int)q); + output[n] = (short)level; + if (level != 0) + { + last = n; + } + } + else { - last = n; + output[n] = 0; + input[j] = 0; } } - else - { - output[n] = 0; - input[j] = 0; - } - } - return last >= 0 ? 1 : 0; + return last >= 0 ? 1 : 0; + } } // Quantize as usual, but also compute and return the quantization error. // Error is already divided by DSHIFT. - public static int QuantizeSingle(Span v, Vp8Matrix mtx) + public static int QuantizeSingle(Span v, ref Vp8Matrix mtx) { int v0 = v[0]; bool sign = v0 < 0; @@ -580,7 +694,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy return (sign ? -v0 : v0) >> DSCALE; } - public static void CorrectDcValues(Vp8EncIterator it, Vp8Matrix mtx, Span tmp, Vp8ModeScore rd) + public static void CorrectDcValues(Vp8EncIterator it, ref Vp8Matrix mtx, Span tmp, Vp8ModeScore rd) { #pragma warning disable SA1005 // Single line comments should begin with single space // | top[0] | top[1] @@ -597,13 +711,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy Span left = it.LeftDerr.AsSpan(ch, 2); Span c = tmp.Slice(ch * 4 * 16, 4 * 16); c[0] += (short)(((C1 * top[0]) + (C2 * left[0])) >> (DSHIFT - DSCALE)); - int err0 = QuantizeSingle(c, mtx); + int err0 = QuantizeSingle(c, ref mtx); c[1 * 16] += (short)(((C1 * top[1]) + (C2 * err0)) >> (DSHIFT - DSCALE)); - int err1 = QuantizeSingle(c.Slice(1 * 16), mtx); + int err1 = QuantizeSingle(c.Slice(1 * 16), ref mtx); c[2 * 16] += (short)(((C1 * err0) + (C2 * left[1])) >> (DSHIFT - DSCALE)); - int err2 = QuantizeSingle(c.Slice(2 * 16), mtx); + int err2 = QuantizeSingle(c.Slice(2 * 16), ref mtx); c[3 * 16] += (short)(((C1 * err1) + (C2 * err2)) >> (DSHIFT - DSCALE)); - int err3 = QuantizeSingle(c.Slice(3 * 16), mtx); + int err3 = QuantizeSingle(c.Slice(3 * 16), ref mtx); rd.Derr[ch, 0] = err1; rd.Derr[ch, 1] = err2; diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs index 728574682f..8a4115d216 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs @@ -502,7 +502,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy this.ResetStats(); } - private void AdjustFilterStrength() + private unsafe void AdjustFilterStrength() { if (this.filterStrength > 0) { @@ -806,7 +806,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy proba.NbSkip = 0; } - private void SetupMatrices(Vp8SegmentInfo[] dqm) + private unsafe void SetupMatrices(Vp8SegmentInfo[] dqm) { int tlambdaScale = this.method >= WebpEncodingMethod.Default ? this.spatialNoiseShaping : 0; for (int i = 0; i < dqm.Length; i++) @@ -814,10 +814,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy Vp8SegmentInfo m = dqm[i]; int q = m.Quant; - m.Y1 = new Vp8Matrix(); - m.Y2 = new Vp8Matrix(); - m.Uv = new Vp8Matrix(); - m.Y1.Q[0] = WebpLookupTables.DcTable[Numerics.Clamp(q + this.DqY1Dc, 0, 127)]; m.Y1.Q[1] = WebpLookupTables.AcTable[Numerics.Clamp(q, 0, 127)]; diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Matrix.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Matrix.cs index 4276b887f0..66c91e44ad 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Matrix.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Matrix.cs @@ -3,7 +3,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy { - internal class Vp8Matrix + internal unsafe struct Vp8Matrix { private static readonly int[][] BiasMatrices = { @@ -23,41 +23,29 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy private const int SharpenBits = 11; /// - /// Initializes a new instance of the class. + /// The quantizer steps. /// - public Vp8Matrix() - { - this.Q = new ushort[16]; - this.IQ = new ushort[16]; - this.Bias = new uint[16]; - this.ZThresh = new uint[16]; - this.Sharpen = new short[16]; - } - - /// - /// Gets the quantizer steps. - /// - public ushort[] Q { get; } + public fixed ushort Q[16]; /// - /// Gets the reciprocals, fixed point. + /// The reciprocals, fixed point. /// - public ushort[] IQ { get; } + public fixed ushort IQ[16]; /// - /// Gets the rounding bias. + /// The rounding bias. /// - public uint[] Bias { get; } + public fixed uint Bias[16]; /// - /// Gets the value below which a coefficient is zeroed. + /// The value below which a coefficient is zeroed. /// - public uint[] ZThresh { get; } + public fixed uint ZThresh[16]; /// - /// Gets the frequency boosters for slight sharpening. + /// The frequency boosters for slight sharpening. /// - public short[] Sharpen { get; } + public fixed short Sharpen[16]; /// /// Returns the average quantizer. @@ -72,7 +60,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy int isAcCoeff = i > 0 ? 1 : 0; int bias = BiasMatrices[type][isAcCoeff]; this.IQ[i] = (ushort)((1 << WebpConstants.QFix) / this.Q[i]); - this.Bias[i] = (uint)this.BIAS(bias); + this.Bias[i] = (uint)BIAS(bias); // zthresh is the exact value such that QUANTDIV(coeff, iQ, B) is: // * zero if coeff <= zthresh @@ -106,6 +94,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy return (sum + 8) >> 4; } - private int BIAS(int b) => b << (WebpConstants.QFix - 8); + private static int BIAS(int b) => b << (WebpConstants.QFix - 8); } } diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs index cf2a5c1775..2ce383d9e1 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8SegmentInfo.cs @@ -8,19 +8,21 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy internal class Vp8SegmentInfo { /// - /// Gets or sets the quantization matrix y1. + /// Gets the quantization matrix y1. /// - public Vp8Matrix Y1 { get; set; } +#pragma warning disable SA1401 // Fields should be private + public Vp8Matrix Y1; /// - /// Gets or sets the quantization matrix y2. + /// Gets the quantization matrix y2. /// - public Vp8Matrix Y2 { get; set; } + public Vp8Matrix Y2; /// - /// Gets or sets the quantization matrix uv. + /// Gets the quantization matrix uv. /// - public Vp8Matrix Uv { get; set; } + public Vp8Matrix Uv; +#pragma warning restore SA1401 // Fields should be private /// /// Gets or sets the quant-susceptibility, range [-127,127]. Zero is neutral. Lower values indicate a lower risk of blurriness. diff --git a/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs b/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs new file mode 100644 index 0000000000..55738199b7 --- /dev/null +++ b/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs @@ -0,0 +1,53 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System.Linq; +using SixLabors.ImageSharp.Formats.Webp.Lossy; +using SixLabors.ImageSharp.Tests.TestUtilities; +using Xunit; + +namespace SixLabors.ImageSharp.Tests.Formats.WebP +{ + [Trait("Format", "Webp")] + public class QuantEncTests + { + private static unsafe void RunQuantizeBlockTest() + { + // arrange + short[] input = { 378, 777, -851, 888, 259, 148, 0, -111, -185, -185, -74, -37, 148, 74, 111, 74 }; + short[] output = new short[16]; + ushort[] q = { 42, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37 }; + ushort[] iq = { 3120, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542, 3542 }; + uint[] bias = { 49152, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296, 55296 }; + uint[] zthresh = { 26, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21 }; + short[] expectedOutput = { 9, 21, 7, -5, 4, -23, 24, 0, -5, 4, 2, -2, -3, -1, 3, 2 }; + int expectedResult = 1; + Vp8Matrix vp8Matrix = default; + for (int i = 0; i < 16; i++) + { + vp8Matrix.Q[i] = q[i]; + vp8Matrix.IQ[i] = iq[i]; + vp8Matrix.Bias[i] = bias[i]; + vp8Matrix.ZThresh[i] = zthresh[i]; + } + + // act + int actualResult = QuantEnc.QuantizeBlock(input, output, ref vp8Matrix); + + // assert + Assert.True(output.SequenceEqual(expectedOutput)); + Assert.Equal(expectedResult, actualResult); + } + + [Fact] + public void QuantizeBlock_Works() => RunQuantizeBlockTest(); + +#if SUPPORTS_RUNTIME_INTRINSICS + [Fact] + public void QuantizeBlock_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.AllowAll); + + [Fact] + public void QuantizeBlock_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunQuantizeBlockTest, HwIntrinsics.DisableHWIntrinsic); +#endif + } +}