diff --git a/shared-infrastructure b/shared-infrastructure index 33cb12ca7..a042aba17 160000 --- a/shared-infrastructure +++ b/shared-infrastructure @@ -1 +1 @@ -Subproject commit 33cb12ca77f919b44de56f344d2627cc2a108c3a +Subproject commit a042aba176cdb840d800c6ed4cfe41a54fb7b1e3 diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8.cs index 9d49b8c45..27bb2fc3c 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8.cs @@ -337,6 +337,64 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components } } + /// + /// Transpose the block inplace. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public void TransposeInplace() + { + ref short elemRef = ref Unsafe.As(ref this); + + // row #0 + Swap(ref Unsafe.Add(ref elemRef, 1), ref Unsafe.Add(ref elemRef, 8)); + Swap(ref Unsafe.Add(ref elemRef, 2), ref Unsafe.Add(ref elemRef, 16)); + Swap(ref Unsafe.Add(ref elemRef, 3), ref Unsafe.Add(ref elemRef, 24)); + Swap(ref Unsafe.Add(ref elemRef, 4), ref Unsafe.Add(ref elemRef, 32)); + Swap(ref Unsafe.Add(ref elemRef, 5), ref Unsafe.Add(ref elemRef, 40)); + Swap(ref Unsafe.Add(ref elemRef, 6), ref Unsafe.Add(ref elemRef, 48)); + Swap(ref Unsafe.Add(ref elemRef, 7), ref Unsafe.Add(ref elemRef, 56)); + + // row #1 + Swap(ref Unsafe.Add(ref elemRef, 10), ref Unsafe.Add(ref elemRef, 17)); + Swap(ref Unsafe.Add(ref elemRef, 11), ref Unsafe.Add(ref elemRef, 25)); + Swap(ref Unsafe.Add(ref elemRef, 12), ref Unsafe.Add(ref elemRef, 33)); + Swap(ref Unsafe.Add(ref elemRef, 13), ref Unsafe.Add(ref elemRef, 41)); + Swap(ref Unsafe.Add(ref elemRef, 14), ref Unsafe.Add(ref elemRef, 49)); + Swap(ref Unsafe.Add(ref elemRef, 15), ref Unsafe.Add(ref elemRef, 57)); + + // row #2 + Swap(ref Unsafe.Add(ref elemRef, 19), ref Unsafe.Add(ref elemRef, 26)); + Swap(ref Unsafe.Add(ref elemRef, 20), ref Unsafe.Add(ref elemRef, 34)); + Swap(ref Unsafe.Add(ref elemRef, 21), ref Unsafe.Add(ref elemRef, 42)); + Swap(ref Unsafe.Add(ref elemRef, 22), ref Unsafe.Add(ref elemRef, 50)); + Swap(ref Unsafe.Add(ref elemRef, 23), ref Unsafe.Add(ref elemRef, 58)); + + // row #3 + Swap(ref Unsafe.Add(ref elemRef, 28), ref Unsafe.Add(ref elemRef, 35)); + Swap(ref Unsafe.Add(ref elemRef, 29), ref Unsafe.Add(ref elemRef, 43)); + Swap(ref Unsafe.Add(ref elemRef, 30), ref Unsafe.Add(ref elemRef, 51)); + Swap(ref Unsafe.Add(ref elemRef, 31), ref Unsafe.Add(ref elemRef, 59)); + + // row #4 + Swap(ref Unsafe.Add(ref elemRef, 37), ref Unsafe.Add(ref elemRef, 44)); + Swap(ref Unsafe.Add(ref elemRef, 38), ref Unsafe.Add(ref elemRef, 52)); + Swap(ref Unsafe.Add(ref elemRef, 39), ref Unsafe.Add(ref elemRef, 60)); + + // row #5 + Swap(ref Unsafe.Add(ref elemRef, 46), ref Unsafe.Add(ref elemRef, 53)); + Swap(ref Unsafe.Add(ref elemRef, 47), ref Unsafe.Add(ref elemRef, 61)); + + // row #6 + Swap(ref Unsafe.Add(ref elemRef, 55), ref Unsafe.Add(ref elemRef, 62)); + + static void Swap(ref short a, ref short b) + { + short tmp = a; + a = b; + b = tmp; + } + } + /// /// Calculate the total sum of absolute differences of elements in 'a' and 'b'. /// diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/HuffmanScanDecoder.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/HuffmanScanDecoder.cs index bc9a53ea0..6f104351c 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/HuffmanScanDecoder.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/HuffmanScanDecoder.cs @@ -151,6 +151,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder if (this.componentsCount == this.frame.ComponentCount) { this.ParseBaselineDataInterleaved(); + this.spectralConverter.CommitConversion(); } else { @@ -501,7 +502,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder { i += r; s = buffer.Receive(s); - Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[i++]) = (short)s; + Unsafe.Add(ref blockDataRef, ZigZag.TransposingOrder[i++]) = (short)s; } else { @@ -570,7 +571,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder if (s != 0) { s = buffer.Receive(s); - Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[i]) = (short)(s << low); + Unsafe.Add(ref blockDataRef, ZigZag.TransposingOrder[i]) = (short)(s << low); } else { @@ -646,7 +647,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder do { - ref short coef = ref Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[k]); + ref short coef = ref Unsafe.Add(ref blockDataRef, ZigZag.TransposingOrder[k]); if (coef != 0) { buffer.CheckBits(); @@ -672,7 +673,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder if ((s != 0) && (k < 64)) { - Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[k]) = (short)s; + Unsafe.Add(ref blockDataRef, ZigZag.TransposingOrder[k]) = (short)s; } } } @@ -681,7 +682,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder { for (; k <= end; k++) { - ref short coef = ref Unsafe.Add(ref blockDataRef, ZigZag.ZigZagOrder[k]); + ref short coef = ref Unsafe.Add(ref blockDataRef, ZigZag.TransposingOrder[k]); if (coef != 0) { diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs index 085cd4a29..15f212b40 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs @@ -18,11 +18,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder /// public Block8x8F SourceBlock; - /// - /// Temporal block to store intermediate computation results. - /// - public Block8x8F WorkspaceBlock; - /// /// The quantization table as . /// @@ -45,7 +40,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder this.subSamplingDivisors = component.SubSamplingDivisors; this.SourceBlock = default; - this.WorkspaceBlock = default; } /// @@ -71,7 +65,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder // Dequantize: block.MultiplyInPlace(ref this.DequantiazationTable); - FastFloatingPointDCT.TransformIDCT(ref block, ref this.WorkspaceBlock); + FastFloatingPointDCT.TransformIDCT(ref block); // To conform better to libjpeg we actually NEED TO loose precision here. // This is because they store blocks as Int16 between all the operations. diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/SpectralConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/SpectralConverter.cs index e975b11fb..4e74f6226 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/SpectralConverter.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/SpectralConverter.cs @@ -13,6 +13,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder /// internal abstract class SpectralConverter { + /// + /// Gets a value indicating whether this converter has converted spectral + /// data of the current image or not. + /// + protected bool Converted { get; private set; } + /// /// Injects jpeg image decoding metadata. /// @@ -33,6 +39,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder /// public abstract void ConvertStrideBaseline(); + /// + /// Marks current converter state as 'converted'. + /// + /// + /// This must be called only for baseline interleaved jpeg's. + /// + public void CommitConversion() + { + DebugGuard.IsFalse(this.Converted, nameof(this.Converted), $"{nameof(this.CommitConversion)} must be called only once"); + + this.Converted = true; + } + /// /// Gets the color converter. /// diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/SpectralConverter{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/SpectralConverter{TPixel}.cs index ec7f3e5c3..2e965e0ac 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/SpectralConverter{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/SpectralConverter{TPixel}.cs @@ -3,6 +3,7 @@ using System; using System.Buffers; +using System.Linq; using System.Numerics; using System.Threading; using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters; @@ -29,8 +30,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder private Buffer2D pixelBuffer; - private int blockRowsPerStep; - private int pixelRowsPerStep; private int pixelRowCounter; @@ -41,8 +40,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder this.cancellationToken = cancellationToken; } - private bool Converted => this.pixelRowCounter >= this.pixelBuffer.Height; - public Buffer2D GetPixelBuffer() { if (!this.Converted) @@ -52,7 +49,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder for (int step = 0; step < steps; step++) { this.cancellationToken.ThrowIfCancellationRequested(); - this.ConvertNextStride(step); + this.ConvertStride(step); } } @@ -65,18 +62,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder MemoryAllocator allocator = this.configuration.MemoryAllocator; // iteration data - IJpegComponent c0 = frame.Components[0]; + int majorBlockWidth = frame.Components.Max((component) => component.SizeInBlocks.Width); + int majorVerticalSamplingFactor = frame.Components.Max((component) => component.SamplingFactors.Height); const int blockPixelHeight = 8; - this.blockRowsPerStep = c0.SamplingFactors.Height; - this.pixelRowsPerStep = this.blockRowsPerStep * blockPixelHeight; + this.pixelRowsPerStep = majorVerticalSamplingFactor * blockPixelHeight; // pixel buffer for resulting image this.pixelBuffer = allocator.Allocate2D(frame.PixelWidth, frame.PixelHeight); this.paddedProxyPixelRow = allocator.Allocate(frame.PixelWidth + 3); // component processors from spectral to Rgba32 - var postProcessorBufferSize = new Size(c0.SizeInBlocks.Width * 8, this.pixelRowsPerStep); + const int blockPixelWidth = 8; + var postProcessorBufferSize = new Size(majorBlockWidth * blockPixelWidth, this.pixelRowsPerStep); this.componentProcessors = new JpegComponentPostProcessor[frame.Components.Length]; for (int i = 0; i < this.componentProcessors.Length; i++) { @@ -84,7 +82,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder } // single 'stride' rgba32 buffer for conversion between spectral and TPixel - // this.rgbaBuffer = allocator.Allocate(frame.PixelWidth); this.rgbBuffer = allocator.Allocate(frame.PixelWidth * 3); // color converter from Rgba32 to TPixel @@ -95,18 +92,17 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder public override void ConvertStrideBaseline() { // Convert next pixel stride using single spectral `stride' - // Note that zero passing eliminates the need of virtual call from JpegComponentPostProcessor - this.ConvertNextStride(spectralStep: 0); + // Note that zero passing eliminates the need of virtual call + // from JpegComponentPostProcessor + this.ConvertStride(spectralStep: 0); - // Clear spectral stride - this is VERY important as jpeg possibly won't fill entire buffer each stride - // Which leads to decoding artifacts - // Note that this code clears all buffers of the post processors, it's their responsibility to allocate only single stride foreach (JpegComponentPostProcessor cpp in this.componentProcessors) { cpp.ClearSpectralBuffers(); } } + /// public void Dispose() { if (this.componentProcessors != null) @@ -121,7 +117,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder this.paddedProxyPixelRow?.Dispose(); } - private void ConvertNextStride(int spectralStep) + private void ConvertStride(int spectralStep) { int maxY = Math.Min(this.pixelBuffer.Height, this.pixelRowCounter + this.pixelRowsPerStep); diff --git a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.Intrinsic.cs b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.Intrinsic.cs index ab9462632..94864005e 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.Intrinsic.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.Intrinsic.cs @@ -2,9 +2,6 @@ // Licensed under the Apache License, Version 2.0. #if SUPPORTS_RUNTIME_INTRINSICS -using System.Diagnostics; -using System.Numerics; -using System.Runtime.CompilerServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; @@ -12,149 +9,147 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components { internal static partial class FastFloatingPointDCT { -#pragma warning disable SA1310, SA1311, IDE1006 // naming rules violation warnings +#pragma warning disable SA1310, SA1311, IDE1006 // naming rule violation warnings private static readonly Vector256 mm256_F_0_7071 = Vector256.Create(0.707106781f); private static readonly Vector256 mm256_F_0_3826 = Vector256.Create(0.382683433f); private static readonly Vector256 mm256_F_0_5411 = Vector256.Create(0.541196100f); private static readonly Vector256 mm256_F_1_3065 = Vector256.Create(1.306562965f); - private static readonly Vector256 mm256_F_1_1758 = Vector256.Create(1.175876f); - private static readonly Vector256 mm256_F_n1_9615 = Vector256.Create(-1.961570560f); - private static readonly Vector256 mm256_F_n0_3901 = Vector256.Create(-0.390180644f); - private static readonly Vector256 mm256_F_n0_8999 = Vector256.Create(-0.899976223f); - private static readonly Vector256 mm256_F_n2_5629 = Vector256.Create(-2.562915447f); - private static readonly Vector256 mm256_F_0_2986 = Vector256.Create(0.298631336f); - private static readonly Vector256 mm256_F_2_0531 = Vector256.Create(2.053119869f); - private static readonly Vector256 mm256_F_3_0727 = Vector256.Create(3.072711026f); - private static readonly Vector256 mm256_F_1_5013 = Vector256.Create(1.501321110f); - private static readonly Vector256 mm256_F_n1_8477 = Vector256.Create(-1.847759065f); - private static readonly Vector256 mm256_F_0_7653 = Vector256.Create(0.765366865f); + private static readonly Vector256 mm256_F_1_4142 = Vector256.Create(1.414213562f); + private static readonly Vector256 mm256_F_1_8477 = Vector256.Create(1.847759065f); + private static readonly Vector256 mm256_F_n1_0823 = Vector256.Create(-1.082392200f); + private static readonly Vector256 mm256_F_n2_6131 = Vector256.Create(-2.613125930f); #pragma warning restore SA1310, SA1311, IDE1006 /// /// Apply floating point FDCT inplace using simd operations. /// - /// Input matrix. - private static void ForwardTransform_Avx(ref Block8x8F block) + /// Input block. + private static void FDCT8x8_Avx(ref Block8x8F block) { DebugGuard.IsTrue(Avx.IsSupported, "Avx support is required to execute this operation."); // First pass - process rows block.TransposeInplace(); - FDCT8x8_Avx(ref block); + FDCT8x8_1D_Avx(ref block); // Second pass - process columns block.TransposeInplace(); - FDCT8x8_Avx(ref block); + FDCT8x8_1D_Avx(ref block); + + // Applies 1D floating point FDCT inplace + static void FDCT8x8_1D_Avx(ref Block8x8F block) + { + Vector256 tmp0 = Avx.Add(block.V0, block.V7); + Vector256 tmp7 = Avx.Subtract(block.V0, block.V7); + Vector256 tmp1 = Avx.Add(block.V1, block.V6); + Vector256 tmp6 = Avx.Subtract(block.V1, block.V6); + Vector256 tmp2 = Avx.Add(block.V2, block.V5); + Vector256 tmp5 = Avx.Subtract(block.V2, block.V5); + Vector256 tmp3 = Avx.Add(block.V3, block.V4); + Vector256 tmp4 = Avx.Subtract(block.V3, block.V4); + + // Even part + Vector256 tmp10 = Avx.Add(tmp0, tmp3); + Vector256 tmp13 = Avx.Subtract(tmp0, tmp3); + Vector256 tmp11 = Avx.Add(tmp1, tmp2); + Vector256 tmp12 = Avx.Subtract(tmp1, tmp2); + + block.V0 = Avx.Add(tmp10, tmp11); + block.V4 = Avx.Subtract(tmp10, tmp11); + + Vector256 z1 = Avx.Multiply(Avx.Add(tmp12, tmp13), mm256_F_0_7071); + block.V2 = Avx.Add(tmp13, z1); + block.V6 = Avx.Subtract(tmp13, z1); + + // Odd part + tmp10 = Avx.Add(tmp4, tmp5); + tmp11 = Avx.Add(tmp5, tmp6); + tmp12 = Avx.Add(tmp6, tmp7); + + Vector256 z5 = Avx.Multiply(Avx.Subtract(tmp10, tmp12), mm256_F_0_3826); + Vector256 z2 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, mm256_F_0_5411, tmp10); + Vector256 z4 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, mm256_F_1_3065, tmp12); + Vector256 z3 = Avx.Multiply(tmp11, mm256_F_0_7071); + + Vector256 z11 = Avx.Add(tmp7, z3); + Vector256 z13 = Avx.Subtract(tmp7, z3); + + block.V5 = Avx.Add(z13, z2); + block.V3 = Avx.Subtract(z13, z2); + block.V1 = Avx.Add(z11, z4); + block.V7 = Avx.Subtract(z11, z4); + } } /// - /// Apply 1D floating point FDCT inplace using AVX operations on 8x8 matrix. + /// Apply floating point IDCT inplace using simd operations. /// - /// - /// Requires Avx support. - /// - /// Input matrix. - public static void FDCT8x8_Avx(ref Block8x8F block) + /// Transposed input block. + private static void IDCT8x8_Avx(ref Block8x8F transposedBlock) { DebugGuard.IsTrue(Avx.IsSupported, "Avx support is required to execute this operation."); - Vector256 tmp0 = Avx.Add(block.V0, block.V7); - Vector256 tmp7 = Avx.Subtract(block.V0, block.V7); - Vector256 tmp1 = Avx.Add(block.V1, block.V6); - Vector256 tmp6 = Avx.Subtract(block.V1, block.V6); - Vector256 tmp2 = Avx.Add(block.V2, block.V5); - Vector256 tmp5 = Avx.Subtract(block.V2, block.V5); - Vector256 tmp3 = Avx.Add(block.V3, block.V4); - Vector256 tmp4 = Avx.Subtract(block.V3, block.V4); - - // Even part - Vector256 tmp10 = Avx.Add(tmp0, tmp3); - Vector256 tmp13 = Avx.Subtract(tmp0, tmp3); - Vector256 tmp11 = Avx.Add(tmp1, tmp2); - Vector256 tmp12 = Avx.Subtract(tmp1, tmp2); - - block.V0 = Avx.Add(tmp10, tmp11); - block.V4 = Avx.Subtract(tmp10, tmp11); - - Vector256 z1 = Avx.Multiply(Avx.Add(tmp12, tmp13), mm256_F_0_7071); - block.V2 = Avx.Add(tmp13, z1); - block.V6 = Avx.Subtract(tmp13, z1); - - // Odd part - tmp10 = Avx.Add(tmp4, tmp5); - tmp11 = Avx.Add(tmp5, tmp6); - tmp12 = Avx.Add(tmp6, tmp7); - - Vector256 z5 = Avx.Multiply(Avx.Subtract(tmp10, tmp12), mm256_F_0_3826); - Vector256 z2 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, mm256_F_0_5411, tmp10); - Vector256 z4 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, mm256_F_1_3065, tmp12); - Vector256 z3 = Avx.Multiply(tmp11, mm256_F_0_7071); - - Vector256 z11 = Avx.Add(tmp7, z3); - Vector256 z13 = Avx.Subtract(tmp7, z3); - - block.V5 = Avx.Add(z13, z2); - block.V3 = Avx.Subtract(z13, z2); - block.V1 = Avx.Add(z11, z4); - block.V7 = Avx.Subtract(z11, z4); - } - - /// - /// Combined operation of and - /// using AVX commands. - /// - /// Source - /// Destination - public static void IDCT8x8_Avx(ref Block8x8F s, ref Block8x8F d) - { - Debug.Assert(Avx.IsSupported, "AVX is required to execute this method"); - - Vector256 my1 = s.V1; - Vector256 my7 = s.V7; - Vector256 mz0 = Avx.Add(my1, my7); - - Vector256 my3 = s.V3; - Vector256 mz2 = Avx.Add(my3, my7); - Vector256 my5 = s.V5; - Vector256 mz1 = Avx.Add(my3, my5); - Vector256 mz3 = Avx.Add(my1, my5); - - Vector256 mz4 = Avx.Multiply(Avx.Add(mz0, mz1), mm256_F_1_1758); - - mz2 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, mz2, mm256_F_n1_9615); - mz3 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, mz3, mm256_F_n0_3901); - mz0 = Avx.Multiply(mz0, mm256_F_n0_8999); - mz1 = Avx.Multiply(mz1, mm256_F_n2_5629); - - Vector256 mb3 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz0, my7, mm256_F_0_2986), mz2); - Vector256 mb2 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz1, my5, mm256_F_2_0531), mz3); - Vector256 mb1 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz1, my3, mm256_F_3_0727), mz2); - Vector256 mb0 = Avx.Add(SimdUtils.HwIntrinsics.MultiplyAdd(mz0, my1, mm256_F_1_5013), mz3); - - Vector256 my2 = s.V2; - Vector256 my6 = s.V6; - mz4 = Avx.Multiply(Avx.Add(my2, my6), mm256_F_0_5411); - Vector256 my0 = s.V0; - Vector256 my4 = s.V4; - mz0 = Avx.Add(my0, my4); - mz1 = Avx.Subtract(my0, my4); - mz2 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, my6, mm256_F_n1_8477); - mz3 = SimdUtils.HwIntrinsics.MultiplyAdd(mz4, my2, mm256_F_0_7653); - - my0 = Avx.Add(mz0, mz3); - my3 = Avx.Subtract(mz0, mz3); - my1 = Avx.Add(mz1, mz2); - my2 = Avx.Subtract(mz1, mz2); - - d.V0 = Avx.Add(my0, mb0); - d.V7 = Avx.Subtract(my0, mb0); - d.V1 = Avx.Add(my1, mb1); - d.V6 = Avx.Subtract(my1, mb1); - d.V2 = Avx.Add(my2, mb2); - d.V5 = Avx.Subtract(my2, mb2); - d.V3 = Avx.Add(my3, mb3); - d.V4 = Avx.Subtract(my3, mb3); + // First pass - process columns + IDCT8x8_1D_Avx(ref transposedBlock); + + // Second pass - process rows + transposedBlock.TransposeInplace(); + IDCT8x8_1D_Avx(ref transposedBlock); + + // Applies 1D floating point FDCT inplace + static void IDCT8x8_1D_Avx(ref Block8x8F block) + { + // Even part + Vector256 tmp0 = block.V0; + Vector256 tmp1 = block.V2; + Vector256 tmp2 = block.V4; + Vector256 tmp3 = block.V6; + + Vector256 z5 = tmp0; + Vector256 tmp10 = Avx.Add(z5, tmp2); + Vector256 tmp11 = Avx.Subtract(z5, tmp2); + + Vector256 tmp13 = Avx.Add(tmp1, tmp3); + Vector256 tmp12 = SimdUtils.HwIntrinsics.MultiplySubstract(tmp13, Avx.Subtract(tmp1, tmp3), mm256_F_1_4142); + + tmp0 = Avx.Add(tmp10, tmp13); + tmp3 = Avx.Subtract(tmp10, tmp13); + tmp1 = Avx.Add(tmp11, tmp12); + tmp2 = Avx.Subtract(tmp11, tmp12); + + // Odd part + Vector256 tmp4 = block.V1; + Vector256 tmp5 = block.V3; + Vector256 tmp6 = block.V5; + Vector256 tmp7 = block.V7; + + Vector256 z13 = Avx.Add(tmp6, tmp5); + Vector256 z10 = Avx.Subtract(tmp6, tmp5); + Vector256 z11 = Avx.Add(tmp4, tmp7); + Vector256 z12 = Avx.Subtract(tmp4, tmp7); + + tmp7 = Avx.Add(z11, z13); + tmp11 = Avx.Multiply(Avx.Subtract(z11, z13), mm256_F_1_4142); + + z5 = Avx.Multiply(Avx.Add(z10, z12), mm256_F_1_8477); + + tmp10 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, z12, mm256_F_n1_0823); + tmp12 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, z10, mm256_F_n2_6131); + + tmp6 = Avx.Subtract(tmp12, tmp7); + tmp5 = Avx.Subtract(tmp11, tmp6); + tmp4 = Avx.Subtract(tmp10, tmp5); + + block.V0 = Avx.Add(tmp0, tmp7); + block.V7 = Avx.Subtract(tmp0, tmp7); + block.V1 = Avx.Add(tmp1, tmp6); + block.V6 = Avx.Subtract(tmp1, tmp6); + block.V2 = Avx.Add(tmp2, tmp5); + block.V5 = Avx.Subtract(tmp2, tmp5); + block.V3 = Avx.Add(tmp3, tmp4); + block.V4 = Avx.Subtract(tmp3, tmp4); + } } } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs index 6963c3636..81bfe2135 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs @@ -3,6 +3,7 @@ using System.Numerics; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; #if SUPPORTS_RUNTIME_INTRINSICS using System.Runtime.Intrinsics.X86; #endif @@ -15,102 +16,202 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components /// internal static partial class FastFloatingPointDCT { -#pragma warning disable SA1310 // FieldNamesMustNotContainUnderscore - private const float C_1_175876 = 1.175875602f; - private const float C_1_961571 = -1.961570560f; - private const float C_0_390181 = -0.390180644f; - private const float C_0_899976 = -0.899976223f; - private const float C_2_562915 = -2.562915447f; - private const float C_0_298631 = 0.298631336f; - private const float C_2_053120 = 2.053119869f; - private const float C_3_072711 = 3.072711026f; - private const float C_1_501321 = 1.501321110f; - private const float C_0_541196 = 0.541196100f; - private const float C_1_847759 = -1.847759065f; - private const float C_0_765367 = 0.765366865f; - - private const float C_0_125 = 0.1250f; - -#pragma warning disable SA1311, IDE1006 // naming rules violation warnings - private static readonly Vector4 mm128_F_0_7071 = new Vector4(0.707106781f); - private static readonly Vector4 mm128_F_0_3826 = new Vector4(0.382683433f); - private static readonly Vector4 mm128_F_0_5411 = new Vector4(0.541196100f); - private static readonly Vector4 mm128_F_1_3065 = new Vector4(1.306562965f); -#pragma warning restore SA1311, IDE1006 - -#pragma warning restore SA1310 // FieldNamesMustNotContainUnderscore +#pragma warning disable SA1310, SA1311, IDE1006 // naming rules violation warnings + private static readonly Vector4 mm128_F_0_7071 = new(0.707106781f); + private static readonly Vector4 mm128_F_0_3826 = new(0.382683433f); + private static readonly Vector4 mm128_F_0_5411 = new(0.541196100f); + private static readonly Vector4 mm128_F_1_3065 = new(1.306562965f); + + private static readonly Vector4 mm128_F_1_4142 = new(1.414213562f); + private static readonly Vector4 mm128_F_1_8477 = new(1.847759065f); + private static readonly Vector4 mm128_F_n1_0823 = new(-1.082392200f); + private static readonly Vector4 mm128_F_n2_6131 = new(-2.613125930f); +#pragma warning restore SA1310, SA1311, IDE1006 /// - /// Gets reciprocal coefficients for jpeg quantization tables calculation. + /// Gets adjustment table for quantization tables. /// /// /// - /// Current FDCT implementation expects its results to be multiplied by - /// a reciprocal quantization table. To get 8x8 reciprocal block values in this - /// table must be divided by quantization table values scaled with quality settings. + /// Current IDCT and FDCT implementations are based on Arai, Agui, + /// and Nakajima's algorithm. Both DCT methods does not + /// produce finished DCT output, final step is fused into the + /// quantization step. Quantization and de-quantization coefficients + /// must be multiplied by these values. /// /// - /// These values were calculates with this formula: - /// - /// value[row * 8 + col] = scalefactor[row] * scalefactor[col] * 8; - /// - /// Where: + /// Given values were generated by formula: /// + /// scalefactor[row] * scalefactor[col], where /// scalefactor[0] = 1 - /// - /// /// scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 /// - /// Values are also scaled by 8 so DCT code won't do extra division/multiplication. /// /// - internal static readonly float[] DctReciprocalAdjustmentCoefficients = new float[] + private static readonly float[] AdjustmentCoefficients = new float[] { - 0.125f, 0.09011998f, 0.09567086f, 0.10630376f, 0.125f, 0.15909483f, 0.23096988f, 0.45306373f, - 0.09011998f, 0.064972885f, 0.068974845f, 0.07664074f, 0.09011998f, 0.11470097f, 0.16652f, 0.32664075f, - 0.09567086f, 0.068974845f, 0.07322331f, 0.081361376f, 0.09567086f, 0.121765904f, 0.17677669f, 0.34675997f, - 0.10630376f, 0.07664074f, 0.081361376f, 0.09040392f, 0.10630376f, 0.13529903f, 0.19642374f, 0.38529903f, - 0.125f, 0.09011998f, 0.09567086f, 0.10630376f, 0.125f, 0.15909483f, 0.23096988f, 0.45306373f, - 0.15909483f, 0.11470097f, 0.121765904f, 0.13529903f, 0.15909483f, 0.2024893f, 0.2939689f, 0.5766407f, - 0.23096988f, 0.16652f, 0.17677669f, 0.19642374f, 0.23096988f, 0.2939689f, 0.4267767f, 0.8371526f, - 0.45306373f, 0.32664075f, 0.34675997f, 0.38529903f, 0.45306373f, 0.5766407f, 0.8371526f, 1.642134f, + 1f, 1.3870399f, 1.306563f, 1.1758755f, 1f, 0.78569496f, 0.5411961f, 0.27589938f, + 1.3870399f, 1.9238797f, 1.812255f, 1.6309863f, 1.3870399f, 1.0897902f, 0.7506606f, 0.38268346f, + 1.306563f, 1.812255f, 1.707107f, 1.5363555f, 1.306563f, 1.02656f, 0.7071068f, 0.36047992f, + 1.1758755f, 1.6309863f, 1.5363555f, 1.3826833f, 1.1758755f, 0.9238795f, 0.63637924f, 0.32442334f, + 1f, 1.3870399f, 1.306563f, 1.1758755f, 1f, 0.78569496f, 0.5411961f, 0.27589938f, + 0.78569496f, 1.0897902f, 1.02656f, 0.9238795f, 0.78569496f, 0.61731654f, 0.42521507f, 0.21677275f, + 0.5411961f, 0.7506606f, 0.7071068f, 0.63637924f, 0.5411961f, 0.42521507f, 0.29289323f, 0.14931567f, + 0.27589938f, 0.38268346f, 0.36047992f, 0.32442334f, 0.27589938f, 0.21677275f, 0.14931567f, 0.076120466f, }; /// - /// Adjusts given quantization table to be complient with FDCT implementation. + /// Adjusts given quantization table for usage with . + /// + /// Quantization table to adjust. + public static void AdjustToIDCT(ref Block8x8F quantTable) + { + ref float tableRef = ref Unsafe.As(ref quantTable); + ref float multipliersRef = ref MemoryMarshal.GetReference(AdjustmentCoefficients); + for (nint i = 0; i < Block8x8F.Size; i++) + { + tableRef = 0.125f * tableRef * Unsafe.Add(ref multipliersRef, i); + tableRef = ref Unsafe.Add(ref tableRef, 1); + } + + // Spectral macroblocks are transposed before quantization + // so we must transpose quantization table + quantTable.TransposeInplace(); + } + + /// + /// Adjusts given quantization table for usage with . + /// + /// Quantization table to adjust. + public static void AdjustToFDCT(ref Block8x8F quantTable) + { + ref float tableRef = ref Unsafe.As(ref quantTable); + ref float multipliersRef = ref MemoryMarshal.GetReference(AdjustmentCoefficients); + for (nint i = 0; i < Block8x8F.Size; i++) + { + tableRef = 0.125f / (tableRef * Unsafe.Add(ref multipliersRef, i)); + tableRef = ref Unsafe.Add(ref tableRef, 1); + } + } + + /// + /// Apply 2D floating point IDCT inplace. /// /// - /// See docs for explanation. + /// Input block must be dequantized before this method with table + /// adjusted by . /// - /// Quantization table to adjust. - public static void AdjustToFDCT(ref Block8x8F quantizationtable) + /// Input block. + public static void TransformIDCT(ref Block8x8F block) { - for (int i = 0; i < Block8x8F.Size; i++) +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx.IsSupported) { - quantizationtable[i] = DctReciprocalAdjustmentCoefficients[i] / quantizationtable[i]; + IDCT8x8_Avx(ref block); + } + else +#endif + { + IDCT_Vector4(ref block); } } /// - /// Apply 2D floating point FDCT inplace. + /// Apply 2D floating point IDCT inplace. /// - /// Input matrix. + /// + /// Input block must be quantized after this method with table adjusted + /// by . + /// + /// Input block. public static void TransformFDCT(ref Block8x8F block) { #if SUPPORTS_RUNTIME_INTRINSICS if (Avx.IsSupported) { - ForwardTransform_Avx(ref block); + FDCT8x8_Avx(ref block); } else #endif if (Vector.IsHardwareAccelerated) { - ForwardTransform_Vector4(ref block); + FDCT_Vector4(ref block); } else { - ForwardTransform_Scalar(ref block); + FDCT_Scalar(ref block); + } + } + + /// + /// Apply floating point IDCT inplace using API. + /// + /// Input block. + private static void IDCT_Vector4(ref Block8x8F transposedBlock) + { + DebugGuard.IsTrue(Vector.IsHardwareAccelerated, "Scalar implementation should be called for non-accelerated hardware."); + + // First pass - process columns + IDCT8x4_Vector4(ref transposedBlock.V0L); + IDCT8x4_Vector4(ref transposedBlock.V0R); + + // Second pass - process rows + transposedBlock.TransposeInplace(); + IDCT8x4_Vector4(ref transposedBlock.V0L); + IDCT8x4_Vector4(ref transposedBlock.V0R); + + // Applies 1D floating point IDCT inplace on 8x4 part of 8x8 block + static void IDCT8x4_Vector4(ref Vector4 vecRef) + { + // Even part + Vector4 tmp0 = Unsafe.Add(ref vecRef, 0 * 2); + Vector4 tmp1 = Unsafe.Add(ref vecRef, 2 * 2); + Vector4 tmp2 = Unsafe.Add(ref vecRef, 4 * 2); + Vector4 tmp3 = Unsafe.Add(ref vecRef, 6 * 2); + + Vector4 z5 = tmp0; + Vector4 tmp10 = z5 + tmp2; + Vector4 tmp11 = z5 - tmp2; + + Vector4 tmp13 = tmp1 + tmp3; + Vector4 tmp12 = ((tmp1 - tmp3) * mm128_F_1_4142) - tmp13; + + tmp0 = tmp10 + tmp13; + tmp3 = tmp10 - tmp13; + tmp1 = tmp11 + tmp12; + tmp2 = tmp11 - tmp12; + + // Odd part + Vector4 tmp4 = Unsafe.Add(ref vecRef, 1 * 2); + Vector4 tmp5 = Unsafe.Add(ref vecRef, 3 * 2); + Vector4 tmp6 = Unsafe.Add(ref vecRef, 5 * 2); + Vector4 tmp7 = Unsafe.Add(ref vecRef, 7 * 2); + + Vector4 z13 = tmp6 + tmp5; + Vector4 z10 = tmp6 - tmp5; + Vector4 z11 = tmp4 + tmp7; + Vector4 z12 = tmp4 - tmp7; + + tmp7 = z11 + z13; + tmp11 = (z11 - z13) * mm128_F_1_4142; + + z5 = (z10 + z12) * mm128_F_1_8477; + + tmp10 = (z12 * mm128_F_n1_0823) + z5; + tmp12 = (z10 * mm128_F_n2_6131) + z5; + + tmp6 = tmp12 - tmp7; + tmp5 = tmp11 - tmp6; + tmp4 = tmp10 - tmp5; + + Unsafe.Add(ref vecRef, 0 * 2) = tmp0 + tmp7; + Unsafe.Add(ref vecRef, 7 * 2) = tmp0 - tmp7; + Unsafe.Add(ref vecRef, 1 * 2) = tmp1 + tmp6; + Unsafe.Add(ref vecRef, 6 * 2) = tmp1 - tmp6; + Unsafe.Add(ref vecRef, 2 * 2) = tmp2 + tmp5; + Unsafe.Add(ref vecRef, 5 * 2) = tmp2 - tmp5; + Unsafe.Add(ref vecRef, 3 * 2) = tmp3 + tmp4; + Unsafe.Add(ref vecRef, 4 * 2) = tmp3 - tmp4; } } @@ -120,8 +221,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components /// /// Ported from libjpeg-turbo https://github.com/libjpeg-turbo/libjpeg-turbo/blob/main/jfdctflt.c. /// - /// Input matrix. - private static void ForwardTransform_Scalar(ref Block8x8F block) + /// Input block. + private static void FDCT_Scalar(ref Block8x8F block) { const int dctSize = 8; @@ -130,17 +231,17 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components float z1, z2, z3, z4, z5, z11, z13; // First pass - process rows - ref float dataRef = ref Unsafe.As(ref block); + ref float blockRef = ref Unsafe.As(ref block); for (int ctr = 7; ctr >= 0; ctr--) { - tmp0 = Unsafe.Add(ref dataRef, 0) + Unsafe.Add(ref dataRef, 7); - tmp7 = Unsafe.Add(ref dataRef, 0) - Unsafe.Add(ref dataRef, 7); - tmp1 = Unsafe.Add(ref dataRef, 1) + Unsafe.Add(ref dataRef, 6); - tmp6 = Unsafe.Add(ref dataRef, 1) - Unsafe.Add(ref dataRef, 6); - tmp2 = Unsafe.Add(ref dataRef, 2) + Unsafe.Add(ref dataRef, 5); - tmp5 = Unsafe.Add(ref dataRef, 2) - Unsafe.Add(ref dataRef, 5); - tmp3 = Unsafe.Add(ref dataRef, 3) + Unsafe.Add(ref dataRef, 4); - tmp4 = Unsafe.Add(ref dataRef, 3) - Unsafe.Add(ref dataRef, 4); + tmp0 = Unsafe.Add(ref blockRef, 0) + Unsafe.Add(ref blockRef, 7); + tmp7 = Unsafe.Add(ref blockRef, 0) - Unsafe.Add(ref blockRef, 7); + tmp1 = Unsafe.Add(ref blockRef, 1) + Unsafe.Add(ref blockRef, 6); + tmp6 = Unsafe.Add(ref blockRef, 1) - Unsafe.Add(ref blockRef, 6); + tmp2 = Unsafe.Add(ref blockRef, 2) + Unsafe.Add(ref blockRef, 5); + tmp5 = Unsafe.Add(ref blockRef, 2) - Unsafe.Add(ref blockRef, 5); + tmp3 = Unsafe.Add(ref blockRef, 3) + Unsafe.Add(ref blockRef, 4); + tmp4 = Unsafe.Add(ref blockRef, 3) - Unsafe.Add(ref blockRef, 4); // Even part tmp10 = tmp0 + tmp3; @@ -148,12 +249,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - Unsafe.Add(ref dataRef, 0) = tmp10 + tmp11; - Unsafe.Add(ref dataRef, 4) = tmp10 - tmp11; + Unsafe.Add(ref blockRef, 0) = tmp10 + tmp11; + Unsafe.Add(ref blockRef, 4) = tmp10 - tmp11; z1 = (tmp12 + tmp13) * 0.707106781f; - Unsafe.Add(ref dataRef, 2) = tmp13 + z1; - Unsafe.Add(ref dataRef, 6) = tmp13 - z1; + Unsafe.Add(ref blockRef, 2) = tmp13 + z1; + Unsafe.Add(ref blockRef, 6) = tmp13 - z1; // Odd part tmp10 = tmp4 + tmp5; @@ -168,26 +269,26 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components z11 = tmp7 + z3; z13 = tmp7 - z3; - Unsafe.Add(ref dataRef, 5) = z13 + z2; - Unsafe.Add(ref dataRef, 3) = z13 - z2; - Unsafe.Add(ref dataRef, 1) = z11 + z4; - Unsafe.Add(ref dataRef, 7) = z11 - z4; + Unsafe.Add(ref blockRef, 5) = z13 + z2; + Unsafe.Add(ref blockRef, 3) = z13 - z2; + Unsafe.Add(ref blockRef, 1) = z11 + z4; + Unsafe.Add(ref blockRef, 7) = z11 - z4; - dataRef = ref Unsafe.Add(ref dataRef, dctSize); + blockRef = ref Unsafe.Add(ref blockRef, dctSize); } // Second pass - process columns - dataRef = ref Unsafe.As(ref block); + blockRef = ref Unsafe.As(ref block); for (int ctr = 7; ctr >= 0; ctr--) { - tmp0 = Unsafe.Add(ref dataRef, dctSize * 0) + Unsafe.Add(ref dataRef, dctSize * 7); - tmp7 = Unsafe.Add(ref dataRef, dctSize * 0) - Unsafe.Add(ref dataRef, dctSize * 7); - tmp1 = Unsafe.Add(ref dataRef, dctSize * 1) + Unsafe.Add(ref dataRef, dctSize * 6); - tmp6 = Unsafe.Add(ref dataRef, dctSize * 1) - Unsafe.Add(ref dataRef, dctSize * 6); - tmp2 = Unsafe.Add(ref dataRef, dctSize * 2) + Unsafe.Add(ref dataRef, dctSize * 5); - tmp5 = Unsafe.Add(ref dataRef, dctSize * 2) - Unsafe.Add(ref dataRef, dctSize * 5); - tmp3 = Unsafe.Add(ref dataRef, dctSize * 3) + Unsafe.Add(ref dataRef, dctSize * 4); - tmp4 = Unsafe.Add(ref dataRef, dctSize * 3) - Unsafe.Add(ref dataRef, dctSize * 4); + tmp0 = Unsafe.Add(ref blockRef, dctSize * 0) + Unsafe.Add(ref blockRef, dctSize * 7); + tmp7 = Unsafe.Add(ref blockRef, dctSize * 0) - Unsafe.Add(ref blockRef, dctSize * 7); + tmp1 = Unsafe.Add(ref blockRef, dctSize * 1) + Unsafe.Add(ref blockRef, dctSize * 6); + tmp6 = Unsafe.Add(ref blockRef, dctSize * 1) - Unsafe.Add(ref blockRef, dctSize * 6); + tmp2 = Unsafe.Add(ref blockRef, dctSize * 2) + Unsafe.Add(ref blockRef, dctSize * 5); + tmp5 = Unsafe.Add(ref blockRef, dctSize * 2) - Unsafe.Add(ref blockRef, dctSize * 5); + tmp3 = Unsafe.Add(ref blockRef, dctSize * 3) + Unsafe.Add(ref blockRef, dctSize * 4); + tmp4 = Unsafe.Add(ref blockRef, dctSize * 3) - Unsafe.Add(ref blockRef, dctSize * 4); // Even part tmp10 = tmp0 + tmp3; @@ -195,12 +296,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - Unsafe.Add(ref dataRef, dctSize * 0) = tmp10 + tmp11; - Unsafe.Add(ref dataRef, dctSize * 4) = tmp10 - tmp11; + Unsafe.Add(ref blockRef, dctSize * 0) = tmp10 + tmp11; + Unsafe.Add(ref blockRef, dctSize * 4) = tmp10 - tmp11; z1 = (tmp12 + tmp13) * 0.707106781f; - Unsafe.Add(ref dataRef, dctSize * 2) = tmp13 + z1; - Unsafe.Add(ref dataRef, dctSize * 6) = tmp13 - z1; + Unsafe.Add(ref blockRef, dctSize * 2) = tmp13 + z1; + Unsafe.Add(ref blockRef, dctSize * 6) = tmp13 - z1; // Odd part tmp10 = tmp4 + tmp5; @@ -215,12 +316,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components z11 = tmp7 + z3; z13 = tmp7 - z3; - Unsafe.Add(ref dataRef, dctSize * 5) = z13 + z2; - Unsafe.Add(ref dataRef, dctSize * 3) = z13 - z2; - Unsafe.Add(ref dataRef, dctSize * 1) = z11 + z4; - Unsafe.Add(ref dataRef, dctSize * 7) = z11 - z4; + Unsafe.Add(ref blockRef, dctSize * 5) = z13 + z2; + Unsafe.Add(ref blockRef, dctSize * 3) = z13 - z2; + Unsafe.Add(ref blockRef, dctSize * 1) = z11 + z4; + Unsafe.Add(ref blockRef, dctSize * 7) = z11 - z4; - dataRef = ref Unsafe.Add(ref dataRef, 1); + blockRef = ref Unsafe.Add(ref blockRef, 1); } } @@ -230,11 +331,11 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components /// /// This implementation must be called only if hardware supports 4 /// floating point numbers vector. Otherwise explicit scalar - /// implementation is faster - /// because it does not rely on matrix transposition. + /// implementation is faster + /// because it does not rely on block transposition. /// - /// Input matrix. - private static void ForwardTransform_Vector4(ref Block8x8F block) + /// Input block. + public static void FDCT_Vector4(ref Block8x8F block) { DebugGuard.IsTrue(Vector.IsHardwareAccelerated, "Scalar implementation should be called for non-accelerated hardware."); @@ -247,209 +348,50 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components block.TransposeInplace(); FDCT8x4_Vector4(ref block.V0L); FDCT8x4_Vector4(ref block.V0R); - } - /// - /// Apply 1D floating point FDCT inplace on 8x4 part of 8x8 matrix. - /// - /// - /// Implemented using Vector4 API operations for either scalar or sse hardware implementation. - /// Must be called on both 8x4 matrix parts for the full FDCT transform. - /// - /// Input reference to the first - private static void FDCT8x4_Vector4(ref Vector4 blockRef) - { - Vector4 tmp0 = Unsafe.Add(ref blockRef, 0) + Unsafe.Add(ref blockRef, 14); - Vector4 tmp7 = Unsafe.Add(ref blockRef, 0) - Unsafe.Add(ref blockRef, 14); - Vector4 tmp1 = Unsafe.Add(ref blockRef, 2) + Unsafe.Add(ref blockRef, 12); - Vector4 tmp6 = Unsafe.Add(ref blockRef, 2) - Unsafe.Add(ref blockRef, 12); - Vector4 tmp2 = Unsafe.Add(ref blockRef, 4) + Unsafe.Add(ref blockRef, 10); - Vector4 tmp5 = Unsafe.Add(ref blockRef, 4) - Unsafe.Add(ref blockRef, 10); - Vector4 tmp3 = Unsafe.Add(ref blockRef, 6) + Unsafe.Add(ref blockRef, 8); - Vector4 tmp4 = Unsafe.Add(ref blockRef, 6) - Unsafe.Add(ref blockRef, 8); - - // Even part - Vector4 tmp10 = tmp0 + tmp3; - Vector4 tmp13 = tmp0 - tmp3; - Vector4 tmp11 = tmp1 + tmp2; - Vector4 tmp12 = tmp1 - tmp2; - - Unsafe.Add(ref blockRef, 0) = tmp10 + tmp11; - Unsafe.Add(ref blockRef, 8) = tmp10 - tmp11; - - Vector4 z1 = (tmp12 + tmp13) * mm128_F_0_7071; - Unsafe.Add(ref blockRef, 4) = tmp13 + z1; - Unsafe.Add(ref blockRef, 12) = tmp13 - z1; - - // Odd part - tmp10 = tmp4 + tmp5; - tmp11 = tmp5 + tmp6; - tmp12 = tmp6 + tmp7; - - Vector4 z5 = (tmp10 - tmp12) * mm128_F_0_3826; - Vector4 z2 = (mm128_F_0_5411 * tmp10) + z5; - Vector4 z4 = (mm128_F_1_3065 * tmp12) + z5; - Vector4 z3 = tmp11 * mm128_F_0_7071; - - Vector4 z11 = tmp7 + z3; - Vector4 z13 = tmp7 - z3; - - Unsafe.Add(ref blockRef, 10) = z13 + z2; - Unsafe.Add(ref blockRef, 6) = z13 - z2; - Unsafe.Add(ref blockRef, 2) = z11 + z4; - Unsafe.Add(ref blockRef, 14) = z11 - z4; - } + // Applies 1D floating point FDCT inplace on 8x4 part of 8x8 block + static void FDCT8x4_Vector4(ref Vector4 vecRef) + { + Vector4 tmp0 = Unsafe.Add(ref vecRef, 0) + Unsafe.Add(ref vecRef, 14); + Vector4 tmp7 = Unsafe.Add(ref vecRef, 0) - Unsafe.Add(ref vecRef, 14); + Vector4 tmp1 = Unsafe.Add(ref vecRef, 2) + Unsafe.Add(ref vecRef, 12); + Vector4 tmp6 = Unsafe.Add(ref vecRef, 2) - Unsafe.Add(ref vecRef, 12); + Vector4 tmp2 = Unsafe.Add(ref vecRef, 4) + Unsafe.Add(ref vecRef, 10); + Vector4 tmp5 = Unsafe.Add(ref vecRef, 4) - Unsafe.Add(ref vecRef, 10); + Vector4 tmp3 = Unsafe.Add(ref vecRef, 6) + Unsafe.Add(ref vecRef, 8); + Vector4 tmp4 = Unsafe.Add(ref vecRef, 6) - Unsafe.Add(ref vecRef, 8); - /// - /// Apply floating point IDCT inplace. - /// Ported from https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L239. - /// - /// Input matrix. - /// Matrix to store temporal results. - public static void TransformIDCT(ref Block8x8F block, ref Block8x8F temp) - { - block.TransposeInplace(); - IDCT8x8(ref block, ref temp); - temp.TransposeInplace(); - IDCT8x8(ref temp, ref block); + // Even part + Vector4 tmp10 = tmp0 + tmp3; + Vector4 tmp13 = tmp0 - tmp3; + Vector4 tmp11 = tmp1 + tmp2; + Vector4 tmp12 = tmp1 - tmp2; - // TODO: This can be fused into quantization table step - block.MultiplyInPlace(C_0_125); - } + Unsafe.Add(ref vecRef, 0) = tmp10 + tmp11; + Unsafe.Add(ref vecRef, 8) = tmp10 - tmp11; - /// - /// Performs 8x8 matrix Inverse Discrete Cosine Transform - /// - /// Source - /// Destination - private static void IDCT8x8(ref Block8x8F s, ref Block8x8F d) - { -#if SUPPORTS_RUNTIME_INTRINSICS - if (Avx.IsSupported) - { - IDCT8x8_Avx(ref s, ref d); - } - else -#endif - { - IDCT8x4_LeftPart(ref s, ref d); - IDCT8x4_RightPart(ref s, ref d); - } - } + Vector4 z1 = (tmp12 + tmp13) * mm128_F_0_7071; + Unsafe.Add(ref vecRef, 4) = tmp13 + z1; + Unsafe.Add(ref vecRef, 12) = tmp13 - z1; - /// - /// Do IDCT internal operations on the left part of the block. Original src: - /// https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L261 - /// - /// The source block - /// Destination block - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void IDCT8x4_LeftPart(ref Block8x8F s, ref Block8x8F d) - { - Vector4 my1 = s.V1L; - Vector4 my7 = s.V7L; - Vector4 mz0 = my1 + my7; - - Vector4 my3 = s.V3L; - Vector4 mz2 = my3 + my7; - Vector4 my5 = s.V5L; - Vector4 mz1 = my3 + my5; - Vector4 mz3 = my1 + my5; - - Vector4 mz4 = (mz0 + mz1) * C_1_175876; - - mz2 = (mz2 * C_1_961571) + mz4; - mz3 = (mz3 * C_0_390181) + mz4; - mz0 = mz0 * C_0_899976; - mz1 = mz1 * C_2_562915; - - Vector4 mb3 = (my7 * C_0_298631) + mz0 + mz2; - Vector4 mb2 = (my5 * C_2_053120) + mz1 + mz3; - Vector4 mb1 = (my3 * C_3_072711) + mz1 + mz2; - Vector4 mb0 = (my1 * C_1_501321) + mz0 + mz3; - - Vector4 my2 = s.V2L; - Vector4 my6 = s.V6L; - mz4 = (my2 + my6) * C_0_541196; - Vector4 my0 = s.V0L; - Vector4 my4 = s.V4L; - mz0 = my0 + my4; - mz1 = my0 - my4; - - mz2 = mz4 + (my6 * C_1_847759); - mz3 = mz4 + (my2 * C_0_765367); - - my0 = mz0 + mz3; - my3 = mz0 - mz3; - my1 = mz1 + mz2; - my2 = mz1 - mz2; - - d.V0L = my0 + mb0; - d.V7L = my0 - mb0; - d.V1L = my1 + mb1; - d.V6L = my1 - mb1; - d.V2L = my2 + mb2; - d.V5L = my2 - mb2; - d.V3L = my3 + mb3; - d.V4L = my3 - mb3; - } + // Odd part + tmp10 = tmp4 + tmp5; + tmp11 = tmp5 + tmp6; + tmp12 = tmp6 + tmp7; - /// - /// Do IDCT internal operations on the right part of the block. - /// Original src: - /// https://github.com/norishigefukushima/dct_simd/blob/master/dct/dct8x8_simd.cpp#L261 - /// - /// The source block - /// The destination block - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void IDCT8x4_RightPart(ref Block8x8F s, ref Block8x8F d) - { - Vector4 my1 = s.V1R; - Vector4 my7 = s.V7R; - Vector4 mz0 = my1 + my7; - - Vector4 my3 = s.V3R; - Vector4 mz2 = my3 + my7; - Vector4 my5 = s.V5R; - Vector4 mz1 = my3 + my5; - Vector4 mz3 = my1 + my5; - - Vector4 mz4 = (mz0 + mz1) * C_1_175876; - - mz2 = (mz2 * C_1_961571) + mz4; - mz3 = (mz3 * C_0_390181) + mz4; - mz0 = mz0 * C_0_899976; - mz1 = mz1 * C_2_562915; - - Vector4 mb3 = (my7 * C_0_298631) + mz0 + mz2; - Vector4 mb2 = (my5 * C_2_053120) + mz1 + mz3; - Vector4 mb1 = (my3 * C_3_072711) + mz1 + mz2; - Vector4 mb0 = (my1 * C_1_501321) + mz0 + mz3; - - Vector4 my2 = s.V2R; - Vector4 my6 = s.V6R; - mz4 = (my2 + my6) * C_0_541196; - Vector4 my0 = s.V0R; - Vector4 my4 = s.V4R; - mz0 = my0 + my4; - mz1 = my0 - my4; - - mz2 = mz4 + (my6 * C_1_847759); - mz3 = mz4 + (my2 * C_0_765367); - - my0 = mz0 + mz3; - my3 = mz0 - mz3; - my1 = mz1 + mz2; - my2 = mz1 - mz2; - - d.V0R = my0 + mb0; - d.V7R = my0 - mb0; - d.V1R = my1 + mb1; - d.V6R = my1 - mb1; - d.V2R = my2 + mb2; - d.V5R = my2 - mb2; - d.V3R = my3 + mb3; - d.V4R = my3 - mb3; + Vector4 z5 = (tmp10 - tmp12) * mm128_F_0_3826; + Vector4 z2 = (mm128_F_0_5411 * tmp10) + z5; + Vector4 z4 = (mm128_F_1_3065 * tmp12) + z5; + Vector4 z3 = tmp11 * mm128_F_0_7071; + + Vector4 z11 = tmp7 + z3; + Vector4 z13 = tmp7 - z3; + + Unsafe.Add(ref vecRef, 10) = z13 + z2; + Unsafe.Add(ref vecRef, 6) = z13 - z2; + Unsafe.Add(ref vecRef, 2) = z11 + z4; + Unsafe.Add(ref vecRef, 14) = z11 - z4; + } } } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/ZigZag.cs b/src/ImageSharp/Formats/Jpeg/Components/ZigZag.cs index e519a8a1d..ab80b3ae6 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/ZigZag.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/ZigZag.cs @@ -35,5 +35,34 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 }; + + /// + /// Gets span of zig-zag with fused transpose step ordering indices. + /// + /// + /// When reading corrupted data, the Huffman decoders could attempt + /// to reference an entry beyond the end of this array (if the decoded + /// zero run length reaches past the end of the block). To prevent + /// wild stores without adding an inner-loop test, we put some extra + /// "63"s after the real entries. This will cause the extra coefficient + /// to be stored in location 63 of the block, not somewhere random. + /// The worst case would be a run-length of 15, which means we need 16 + /// fake entries. + /// + public static ReadOnlySpan TransposingOrder => new byte[] + { + 0, 8, 1, 2, 9, 16, 24, 17, + 10, 3, 4, 11, 18, 25, 32, 40, + 33, 26, 19, 12, 5, 6, 13, 20, + 27, 34, 41, 48, 56, 49, 42, 35, + 28, 21, 14, 7, 15, 22, 29, 36, + 43, 50, 57, 58, 51, 44, 37, 30, + 23, 31, 38, 45, 52, 59, 60, 53, + 46, 39, 47, 54, 61, 62, 55, 63, + + // Extra entries for safety in decoder + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63 + }; } } diff --git a/src/ImageSharp/Formats/Jpeg/JpegDecoderCore.cs b/src/ImageSharp/Formats/Jpeg/JpegDecoderCore.cs index 9a9e5eb79..73763f4ab 100644 --- a/src/ImageSharp/Formats/Jpeg/JpegDecoderCore.cs +++ b/src/ImageSharp/Formats/Jpeg/JpegDecoderCore.cs @@ -942,6 +942,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg break; } } + + // Adjusting table for IDCT step during decompression + FastFloatingPointDCT.AdjustToIDCT(ref table); } } diff --git a/src/ImageSharp/Formats/Webp/EntropyIx.cs b/src/ImageSharp/Formats/Webp/EntropyIx.cs index c72ddeb42..98e8b7e16 100644 --- a/src/ImageSharp/Formats/Webp/EntropyIx.cs +++ b/src/ImageSharp/Formats/Webp/EntropyIx.cs @@ -6,7 +6,7 @@ namespace SixLabors.ImageSharp.Formats.Webp /// /// These five modes are evaluated and their respective entropy is computed. /// - internal enum EntropyIx + internal enum EntropyIx : byte { Direct = 0, diff --git a/src/ImageSharp/Formats/Webp/HistoIx.cs b/src/ImageSharp/Formats/Webp/HistoIx.cs index 68b00394b..83522f9da 100644 --- a/src/ImageSharp/Formats/Webp/HistoIx.cs +++ b/src/ImageSharp/Formats/Webp/HistoIx.cs @@ -3,7 +3,7 @@ namespace SixLabors.ImageSharp.Formats.Webp { - internal enum HistoIx + internal enum HistoIx : byte { HistoAlpha = 0, diff --git a/src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs index dc546f8ac..c394a8caa 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs @@ -2,11 +2,13 @@ // Licensed under the Apache License, Version 2.0. using System; +using System.Buffers; using System.Collections.Generic; +using SixLabors.ImageSharp.Memory; namespace SixLabors.ImageSharp.Formats.Webp.Lossless { - internal class BackwardReferenceEncoder + internal static class BackwardReferenceEncoder { /// /// Maximum bit length. @@ -41,6 +43,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless int quality, int lz77TypesToTry, ref int cacheBits, + MemoryAllocator memoryAllocator, Vp8LHashChain hashChain, Vp8LBackwardRefs best, Vp8LBackwardRefs worst) @@ -69,7 +72,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless BackwardReferencesLz77(width, height, bgra, 0, hashChain, worst); break; case Vp8LLz77Type.Lz77Box: - hashChainBox = new Vp8LHashChain(width * height); + hashChainBox = new Vp8LHashChain(memoryAllocator, width * height); BackwardReferencesLz77Box(width, height, bgra, 0, hashChain, hashChainBox, worst); break; } @@ -100,7 +103,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless if ((lz77TypeBest == (int)Vp8LLz77Type.Lz77Standard || lz77TypeBest == (int)Vp8LLz77Type.Lz77Box) && quality >= 25) { Vp8LHashChain hashChainTmp = lz77TypeBest == (int)Vp8LLz77Type.Lz77Standard ? hashChain : hashChainBox; - BackwardReferencesTraceBackwards(width, height, bgra, cacheBits, hashChainTmp, best, worst); + BackwardReferencesTraceBackwards(width, height, memoryAllocator, bgra, cacheBits, hashChainTmp, best, worst); var histo = new Vp8LHistogram(worst, cacheBits); double bitCostTrace = histo.EstimateBits(stats, bitsEntropy); if (bitCostTrace < bitCostBest) @@ -111,6 +114,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless BackwardReferences2DLocality(width, best); + hashChainBox?.Dispose(); + return best; } @@ -234,6 +239,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless private static void BackwardReferencesTraceBackwards( int xSize, int ySize, + MemoryAllocator memoryAllocator, ReadOnlySpan bgra, int cacheBits, Vp8LHashChain hashChain, @@ -241,22 +247,24 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless Vp8LBackwardRefs refsDst) { int distArraySize = xSize * ySize; - ushort[] distArray = new ushort[distArraySize]; + using IMemoryOwner distArrayBuffer = memoryAllocator.Allocate(distArraySize); + Span distArray = distArrayBuffer.GetSpan(); - BackwardReferencesHashChainDistanceOnly(xSize, ySize, bgra, cacheBits, hashChain, refsSrc, distArray); + BackwardReferencesHashChainDistanceOnly(xSize, ySize, memoryAllocator, bgra, cacheBits, hashChain, refsSrc, distArrayBuffer); int chosenPathSize = TraceBackwards(distArray, distArraySize); - Span chosenPath = distArray.AsSpan(distArraySize - chosenPathSize); + Span chosenPath = distArray.Slice(distArraySize - chosenPathSize); BackwardReferencesHashChainFollowChosenPath(bgra, cacheBits, chosenPath, chosenPathSize, hashChain, refsDst); } private static void BackwardReferencesHashChainDistanceOnly( int xSize, int ySize, + MemoryAllocator memoryAllocator, ReadOnlySpan bgra, int cacheBits, Vp8LHashChain hashChain, Vp8LBackwardRefs refs, - ushort[] distArray) + IMemoryOwner distArrayBuffer) { int pixCount = xSize * ySize; bool useColorCache = cacheBits > 0; @@ -275,22 +283,24 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless } costModel.Build(xSize, cacheBits, refs); - var costManager = new CostManager(distArray, pixCount, costModel); + using var costManager = new CostManager(memoryAllocator, distArrayBuffer, pixCount, costModel); + Span costManagerCosts = costManager.Costs.GetSpan(); + Span distArray = distArrayBuffer.GetSpan(); // We loop one pixel at a time, but store all currently best points to non-processed locations from this point. distArray[0] = 0; // Add first pixel as literal. - AddSingleLiteralWithCostModel(bgra, colorCache, costModel, 0, useColorCache, 0.0f, costManager.Costs, distArray); + AddSingleLiteralWithCostModel(bgra, colorCache, costModel, 0, useColorCache, 0.0f, costManagerCosts, distArray); for (int i = 1; i < pixCount; i++) { - float prevCost = costManager.Costs[i - 1]; + float prevCost = costManagerCosts[i - 1]; int offset = hashChain.FindOffset(i); int len = hashChain.FindLength(i); // Try adding the pixel as a literal. - AddSingleLiteralWithCostModel(bgra, colorCache, costModel, i, useColorCache, prevCost, costManager.Costs, distArray); + AddSingleLiteralWithCostModel(bgra, colorCache, costModel, i, useColorCache, prevCost, costManagerCosts, distArray); // If we are dealing with a non-literal. if (len >= 2) @@ -334,7 +344,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless costManager.UpdateCostAtIndex(j - 1, false); costManager.UpdateCostAtIndex(j, false); - costManager.PushInterval(costManager.Costs[j - 1] + offsetCost, j, lenJ); + costManager.PushInterval(costManagerCosts[j - 1] + offsetCost, j, lenJ); reach = j + lenJ - 1; } } @@ -346,7 +356,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless } } - private static int TraceBackwards(ushort[] distArray, int distArraySize) + private static int TraceBackwards(Span distArray, int distArraySize) { int chosenPathSize = 0; int pathPos = distArraySize; @@ -426,8 +436,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless int idx, bool useColorCache, float prevCost, - float[] cost, - ushort[] distArray) + Span cost, + Span distArray) { double costVal = prevCost; uint color = bgra[idx]; @@ -617,7 +627,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless } } - hashChain.OffsetLength[0] = 0; + Span hashChainOffsetLength = hashChain.OffsetLength.GetSpan(); + hashChainOffsetLength[0] = 0; for (i = 1; i < pixelCount; i++) { int ind; @@ -695,19 +706,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless if (bestLength <= MinLength) { - hashChain.OffsetLength[i] = 0; + hashChainOffsetLength[i] = 0; bestOffsetPrev = 0; bestLengthPrev = 0; } else { - hashChain.OffsetLength[i] = (uint)((bestOffset << MaxLengthBits) | bestLength); + hashChainOffsetLength[i] = (uint)((bestOffset << MaxLengthBits) | bestLength); bestOffsetPrev = bestOffset; bestLengthPrev = bestLength; } } - hashChain.OffsetLength[0] = 0; + hashChainOffsetLength[0] = 0; BackwardReferencesLz77(xSize, ySize, bgra, cacheBits, hashChain, refs); } diff --git a/src/ImageSharp/Formats/Webp/Lossless/CostManager.cs b/src/ImageSharp/Formats/Webp/Lossless/CostManager.cs index 94c7bd847..c121a41a1 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/CostManager.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/CostManager.cs @@ -1,7 +1,10 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. +using System; +using System.Buffers; using System.Collections.Generic; +using SixLabors.ImageSharp.Memory; namespace SixLabors.ImageSharp.Formats.Webp.Lossless { @@ -10,20 +13,29 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// It caches the different CostCacheInterval, caches the different /// GetLengthCost(costModel, k) in costCache and the CostInterval's. /// - internal class CostManager + internal sealed class CostManager : IDisposable { private CostInterval head; - public CostManager(ushort[] distArray, int pixCount, CostModel costModel) + private const int FreeIntervalsStartCount = 25; + + private readonly Stack freeIntervals = new(FreeIntervalsStartCount); + + public CostManager(MemoryAllocator memoryAllocator, IMemoryOwner distArray, int pixCount, CostModel costModel) { int costCacheSize = pixCount > BackwardReferenceEncoder.MaxLength ? BackwardReferenceEncoder.MaxLength : pixCount; this.CacheIntervals = new List(); this.CostCache = new List(); - this.Costs = new float[pixCount]; + this.Costs = memoryAllocator.Allocate(pixCount); this.DistArray = distArray; this.Count = 0; + for (int i = 0; i < FreeIntervalsStartCount; i++) + { + this.freeIntervals.Push(new CostInterval()); + } + // Fill in the cost cache. this.CacheIntervalsSize++; this.CostCache.Add(costModel.GetLengthCost(0)); @@ -64,10 +76,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless } // Set the initial costs high for every pixel as we will keep the minimum. - for (int i = 0; i < pixCount; i++) - { - this.Costs[i] = 1e38f; - } + this.Costs.GetSpan().Fill(1e38f); } /// @@ -82,9 +91,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless public int CacheIntervalsSize { get; } - public float[] Costs { get; } + public IMemoryOwner Costs { get; } - public ushort[] DistArray { get; } + public IMemoryOwner DistArray { get; } public List CacheIntervals { get; } @@ -128,6 +137,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless // interval logic, just serialize it right away. This constant is empirical. int skipDistance = 10; + Span costs = this.Costs.GetSpan(); + Span distArray = this.DistArray.GetSpan(); if (len < skipDistance) { for (int j = position; j < position + len; j++) @@ -135,10 +146,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless int k = j - position; float costTmp = (float)(distanceCost + this.CostCache[k]); - if (this.Costs[j] > costTmp) + if (costs[j] > costTmp) { - this.Costs[j] = costTmp; - this.DistArray[j] = (ushort)(k + 1); + costs[j] = costTmp; + distArray[j] = (ushort)(k + 1); } } @@ -201,10 +212,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless this.InsertInterval(interval, interval.Cost, interval.Index, end, endOriginal); break; } - else - { - interval.End = start; - } + + interval.End = start; } } @@ -226,6 +235,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless this.ConnectIntervals(interval.Previous, interval.Next); this.Count--; + + interval.Next = null; + interval.Previous = null; + this.freeIntervals.Push(interval); } private void InsertInterval(CostInterval intervalIn, float cost, int position, int start, int end) @@ -236,13 +249,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless } // TODO: should we use COST_CACHE_INTERVAL_SIZE_MAX? - var intervalNew = new CostInterval() + CostInterval intervalNew; + if (this.freeIntervals.Count > 0) { - Cost = cost, - Start = start, - End = end, - Index = position - }; + intervalNew = this.freeIntervals.Pop(); + intervalNew.Cost = cost; + intervalNew.Start = start; + intervalNew.End = end; + intervalNew.Index = position; + } + else + { + intervalNew = new CostInterval() { Cost = cost, Start = start, End = end, Index = position }; + } this.PositionOrphanInterval(intervalNew, intervalIn); this.Count++; @@ -297,12 +316,17 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// private void UpdateCost(int i, int position, float cost) { + Span costs = this.Costs.GetSpan(); + Span distArray = this.DistArray.GetSpan(); int k = i - position; - if (this.Costs[i] > cost) + if (costs[i] > cost) { - this.Costs[i] = cost; - this.DistArray[i] = (ushort)(k + 1); + costs[i] = cost; + distArray[i] = (ushort)(k + 1); } } + + /// + public void Dispose() => this.Costs.Dispose(); } } diff --git a/src/ImageSharp/Formats/Webp/Lossless/HTreeGroup.cs b/src/ImageSharp/Formats/Webp/Lossless/HTreeGroup.cs index a038248f1..6c2217eb6 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/HTreeGroup.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/HTreeGroup.cs @@ -13,16 +13,16 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// - UsePackedTable: few enough literal symbols, so all the bit codes can fit into a small look-up table PackedTable[] /// The common literal base, if applicable, is stored in 'LiteralArb'. /// - internal class HTreeGroup + internal struct HTreeGroup { public HTreeGroup(uint packedTableSize) { this.HTrees = new List(WebpConstants.HuffmanCodesPerMetaCode); this.PackedTable = new HuffmanCode[packedTableSize]; - for (int i = 0; i < packedTableSize; i++) - { - this.PackedTable[i] = new HuffmanCode(); - } + this.IsTrivialCode = false; + this.IsTrivialLiteral = false; + this.LiteralArb = 0; + this.UsePackedTable = false; } /// diff --git a/src/ImageSharp/Formats/Webp/Lossless/HuffmanCode.cs b/src/ImageSharp/Formats/Webp/Lossless/HuffmanCode.cs index f75c64de1..efb928356 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/HuffmanCode.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/HuffmanCode.cs @@ -9,7 +9,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// A classic way to do entropy coding where a smaller number of bits are used for more frequent codes. /// [DebuggerDisplay("BitsUsed: {BitsUsed}, Value: {Value}")] - internal class HuffmanCode + internal struct HuffmanCode { /// /// Gets or sets the number of bits used for this symbol. diff --git a/src/ImageSharp/Formats/Webp/Lossless/HuffmanTree.cs b/src/ImageSharp/Formats/Webp/Lossless/HuffmanTree.cs index 0376311ed..07fec7f99 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/HuffmanTree.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/HuffmanTree.cs @@ -9,7 +9,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// Represents the Huffman tree. /// [DebuggerDisplay("TotalCount = {TotalCount}, Value = {Value}, Left = {PoolIndexLeft}, Right = {PoolIndexRight}")] - internal struct HuffmanTree : IDeepCloneable + internal struct HuffmanTree { /// /// Initializes a new instance of the struct. @@ -57,7 +57,5 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless return t1.Value < t2.Value ? -1 : 1; } - - public IDeepCloneable DeepClone() => new HuffmanTree(this); } } diff --git a/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs index 5db01ca1c..56f2ee9ce 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs @@ -2,6 +2,7 @@ // Licensed under the Apache License, Version 2.0. using System; +using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp.Formats.Webp.Lossless { @@ -218,8 +219,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless while (treeSize > 1) { // Finish when we have only one root. - treePool[treePoolSize++] = (HuffmanTree)tree[treeSize - 1].DeepClone(); - treePool[treePoolSize++] = (HuffmanTree)tree[treeSize - 2].DeepClone(); + treePool[treePoolSize++] = tree[treeSize - 1]; + treePool[treePoolSize++] = tree[treeSize - 2]; int count = treePool[treePoolSize - 1].TotalCount + treePool[treePoolSize - 2].TotalCount; treeSize -= 2; @@ -238,7 +239,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless int startIdx = endIdx + num - 1; for (int i = startIdx; i >= endIdx; i--) { - tree[i] = (HuffmanTree)tree[i - 1].DeepClone(); + tree[i] = tree[i - 1]; } tree[k].TotalCount = count; @@ -307,9 +308,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless public static int BuildHuffmanTable(Span table, int rootBits, int[] codeLengths, int codeLengthsSize) { - Guard.MustBeGreaterThan(rootBits, 0, nameof(rootBits)); - Guard.NotNull(codeLengths, nameof(codeLengths)); - Guard.MustBeGreaterThan(codeLengthsSize, 0, nameof(codeLengthsSize)); + DebugGuard.MustBeGreaterThan(rootBits, 0, nameof(rootBits)); + DebugGuard.NotNull(codeLengths, nameof(codeLengths)); + DebugGuard.MustBeGreaterThan(codeLengthsSize, 0, nameof(codeLengthsSize)); // sorted[codeLengthsSize] is a pre-allocated array for sorting symbols by code length. int[] sorted = new int[codeLengthsSize]; @@ -467,27 +468,27 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless break; } - else if (repetitions < 11) + + if (repetitions < 11) { tokens[pos].Code = 17; tokens[pos].ExtraBits = (byte)(repetitions - 3); pos++; break; } - else if (repetitions < 139) + + if (repetitions < 139) { tokens[pos].Code = 18; tokens[pos].ExtraBits = (byte)(repetitions - 11); pos++; break; } - else - { - tokens[pos].Code = 18; - tokens[pos].ExtraBits = 0x7f; // 138 repeated 0s - pos++; - repetitions -= 138; - } + + tokens[pos].Code = 18; + tokens[pos].ExtraBits = 0x7f; // 138 repeated 0s + pos++; + repetitions -= 138; } return pos; @@ -519,20 +520,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless break; } - else if (repetitions < 7) + + if (repetitions < 7) { tokens[pos].Code = 16; tokens[pos].ExtraBits = (byte)(repetitions - 3); pos++; break; } - else - { - tokens[pos].Code = 16; - tokens[pos].ExtraBits = 3; - pos++; - repetitions -= 6; - } + + tokens[pos].Code = 16; + tokens[pos].ExtraBits = 3; + pos++; + repetitions -= 6; } return pos; @@ -541,7 +541,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// /// Get the actual bit values for a tree of bit depths. /// - /// The hiffman tree. + /// The huffman tree. private static void ConvertBitDepthsToSymbols(HuffmanTreeCode tree) { // 0 bit-depth means that the symbol does not exist. @@ -628,7 +628,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// private static void ReplicateValue(Span table, int step, int end, HuffmanCode code) { - Guard.IsTrue(end % step == 0, nameof(end), "end must be a multiple of step"); + DebugGuard.IsTrue(end % step == 0, nameof(end), "end must be a multiple of step"); do { @@ -656,6 +656,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// /// Heuristics for selecting the stride ranges to collapse. /// + [MethodImpl(InliningOptions.ShortMethod)] private static bool ValuesShouldBeCollapsedToStrideAverage(int a, int b) => Math.Abs(a - b) < 4; } } diff --git a/src/ImageSharp/Formats/Webp/Lossless/PixOrCopy.cs b/src/ImageSharp/Formats/Webp/Lossless/PixOrCopy.cs index 6cd109121..96cdc3cbc 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/PixOrCopy.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/PixOrCopy.cs @@ -6,7 +6,7 @@ using System.Diagnostics; namespace SixLabors.ImageSharp.Formats.Webp.Lossless { [DebuggerDisplay("Mode: {Mode}, Len: {Len}, BgraOrDistance: {BgraOrDistance}")] - internal class PixOrCopy + internal sealed class PixOrCopy { public PixOrCopyMode Mode { get; set; } diff --git a/src/ImageSharp/Formats/Webp/Lossless/PixOrCopyMode.cs b/src/ImageSharp/Formats/Webp/Lossless/PixOrCopyMode.cs index 0d7023ffc..26099b902 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/PixOrCopyMode.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/PixOrCopyMode.cs @@ -3,7 +3,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless { - internal enum PixOrCopyMode + internal enum PixOrCopyMode : byte { Literal, diff --git a/src/ImageSharp/Formats/Webp/Lossless/Vp8LBackwardRefs.cs b/src/ImageSharp/Formats/Webp/Lossless/Vp8LBackwardRefs.cs index 502728b15..fca4ec59f 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/Vp8LBackwardRefs.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/Vp8LBackwardRefs.cs @@ -7,7 +7,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless { internal class Vp8LBackwardRefs { - public Vp8LBackwardRefs() => this.Refs = new List(); + public Vp8LBackwardRefs(int pixels) => this.Refs = new List(pixels); /// /// Gets or sets the common block-size. diff --git a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs index da815a479..adabd0ac3 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs @@ -124,19 +124,25 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless this.EncodedData = memoryAllocator.Allocate(pixelCount); this.Palette = memoryAllocator.Allocate(WebpConstants.MaxPaletteSize); this.Refs = new Vp8LBackwardRefs[3]; - this.HashChain = new Vp8LHashChain(pixelCount); + this.HashChain = new Vp8LHashChain(memoryAllocator, pixelCount); // We round the block size up, so we're guaranteed to have at most MaxRefsBlockPerImage blocks used: int refsBlockSize = ((pixelCount - 1) / MaxRefsBlockPerImage) + 1; for (int i = 0; i < this.Refs.Length; i++) { - this.Refs[i] = new Vp8LBackwardRefs + this.Refs[i] = new Vp8LBackwardRefs(pixelCount) { BlockSize = refsBlockSize < MinBlockSize ? MinBlockSize : refsBlockSize }; } } + // RFC 1951 will calm you down if you are worried about this funny sequence. + // This sequence is tuned from that, but more weighted for lower symbol count, + // and more spiking histograms. + // This uses C#'s compiler optimization to refer to assembly's static data directly. + private static ReadOnlySpan StorageOrder => new byte[] { 17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; + // This uses C#'s compiler optimization to refer to assembly's static data directly. private static ReadOnlySpan Order => new byte[] { 1, 2, 0, 3 }; @@ -515,7 +521,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless } // Calculate backward references from BGRA image. - this.HashChain.Fill(this.memoryAllocator, bgra, this.quality, width, height, lowEffort); + this.HashChain.Fill(bgra, this.quality, width, height, lowEffort); Vp8LBitWriter bitWriterBest = config.SubConfigs.Count > 1 ? this.bitWriter.Clone() : this.bitWriter; Vp8LBitWriter bwInit = this.bitWriter; @@ -529,6 +535,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless this.quality, subConfig.Lz77, ref cacheBits, + this.memoryAllocator, this.HashChain, this.Refs[0], this.Refs[1]); @@ -735,7 +742,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless } // Calculate backward references from the image pixels. - hashChain.Fill(this.memoryAllocator, bgra, quality, width, height, lowEffort); + hashChain.Fill(bgra, quality, width, height, lowEffort); Vp8LBackwardRefs refs = BackwardReferenceEncoder.GetBackwardReferences( width, @@ -744,6 +751,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless quality, (int)Vp8LLz77Type.Lz77Standard | (int)Vp8LLz77Type.Lz77Rle, ref cacheBits, + this.memoryAllocator, hashChain, refsTmp1, refsTmp2); @@ -940,16 +948,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless private void StoreHuffmanTreeOfHuffmanTreeToBitMask(byte[] codeLengthBitDepth) { - // RFC 1951 will calm you down if you are worried about this funny sequence. - // This sequence is tuned from that, but more weighted for lower symbol count, - // and more spiking histograms. - byte[] storageOrder = { 17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; - // Throw away trailing zeros: int codesToStore = WebpConstants.CodeLengthCodes; for (; codesToStore > 4; codesToStore--) { - if (codeLengthBitDepth[storageOrder[codesToStore - 1]] != 0) + if (codeLengthBitDepth[StorageOrder[codesToStore - 1]] != 0) { break; } @@ -958,7 +961,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless this.bitWriter.PutBits((uint)codesToStore - 4, 4); for (int i = 0; i < codesToStore; i++) { - this.bitWriter.PutBits(codeLengthBitDepth[storageOrder[i]], 3); + this.bitWriter.PutBits(codeLengthBitDepth[StorageOrder[i]], 3); } } @@ -1802,6 +1805,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless this.BgraScratch.Dispose(); this.Palette.Dispose(); this.TransformData.Dispose(); + this.HashChain.Dispose(); } } } diff --git a/src/ImageSharp/Formats/Webp/Lossless/Vp8LHashChain.cs b/src/ImageSharp/Formats/Webp/Lossless/Vp8LHashChain.cs index 977a094bd..1bc7613a9 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/Vp8LHashChain.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/Vp8LHashChain.cs @@ -8,7 +8,7 @@ using SixLabors.ImageSharp.Memory; namespace SixLabors.ImageSharp.Formats.Webp.Lossless { - internal class Vp8LHashChain + internal sealed class Vp8LHashChain : IDisposable { private const uint HashMultiplierHi = 0xc6a4a793u; @@ -28,14 +28,17 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// private const int WindowSize = (1 << WindowSizeBits) - 120; + private readonly MemoryAllocator memoryAllocator; + /// /// Initializes a new instance of the class. /// + /// The memory allocator. /// The size off the chain. - public Vp8LHashChain(int size) + public Vp8LHashChain(MemoryAllocator memoryAllocator, int size) { - this.OffsetLength = new uint[size]; - this.OffsetLength.AsSpan().Fill(0xcdcdcdcd); + this.memoryAllocator = memoryAllocator; + this.OffsetLength = this.memoryAllocator.Allocate(size, AllocationOptions.Clean); this.Size = size; } @@ -45,16 +48,16 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// These 20 bits are the limit defined by GetWindowSizeForHashChain (through WindowSize = 1 << 20). /// The lower 12 bits contain the length of the match. /// - public uint[] OffsetLength { get; } + public IMemoryOwner OffsetLength { get; } /// /// Gets the size of the hash chain. - /// This is the maximum size of the hash_chain that can be constructed. + /// This is the maximum size of the hashchain that can be constructed. /// Typically this is the pixel count (width x height) for a given image. /// public int Size { get; } - public void Fill(MemoryAllocator memoryAllocator, ReadOnlySpan bgra, int quality, int xSize, int ySize, bool lowEffort) + public void Fill(ReadOnlySpan bgra, int quality, int xSize, int ySize, bool lowEffort) { int size = xSize * ySize; int iterMax = GetMaxItersForQuality(quality); @@ -63,20 +66,21 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless if (size <= 2) { - this.OffsetLength[0] = 0; + this.OffsetLength.GetSpan()[0] = 0; return; } - using IMemoryOwner hashToFirstIndexBuffer = memoryAllocator.Allocate(HashSize); + using IMemoryOwner hashToFirstIndexBuffer = this.memoryAllocator.Allocate(HashSize); + using IMemoryOwner chainBuffer = this.memoryAllocator.Allocate(size, AllocationOptions.Clean); Span hashToFirstIndex = hashToFirstIndexBuffer.GetSpan(); + Span chain = chainBuffer.GetSpan(); // Initialize hashToFirstIndex array to -1. hashToFirstIndex.Fill(-1); - int[] chain = new int[size]; - // Fill the chain linking pixels with the same hash. bool bgraComp = bgra.Length > 1 && bgra[0] == bgra[1]; + Span tmp = stackalloc uint[2]; for (pos = 0; pos < size - 2;) { uint hashCode; @@ -85,7 +89,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless { // Consecutive pixels with the same color will share the same hash. // We therefore use a different hash: the color and its repetition length. - uint[] tmp = new uint[2]; + tmp.Clear(); uint len = 1; tmp[0] = bgra[pos]; @@ -134,7 +138,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless // Find the best match interval at each pixel, defined by an offset to the // pixel and a length. The right-most pixel cannot match anything to the right // (hence a best length of 0) and the left-most pixel nothing to the left (hence an offset of 0). - this.OffsetLength[0] = this.OffsetLength[size - 1] = 0; + Span offsetLength = this.OffsetLength.GetSpan(); + offsetLength[0] = offsetLength[size - 1] = 0; for (int basePosition = size - 2; basePosition > 0;) { int maxLen = LosslessUtils.MaxFindCopyLength(size - 1 - basePosition); @@ -208,7 +213,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless uint maxBasePosition = (uint)basePosition; while (true) { - this.OffsetLength[basePosition] = (bestDistance << BackwardReferenceEncoder.MaxLengthBits) | (uint)bestLength; + offsetLength[basePosition] = (bestDistance << BackwardReferenceEncoder.MaxLengthBits) | (uint)bestLength; --basePosition; // Stop if we don't have a match or if we are out of bounds. @@ -242,10 +247,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless } [MethodImpl(InliningOptions.ShortMethod)] - public int FindLength(int basePosition) => (int)(this.OffsetLength[basePosition] & ((1U << BackwardReferenceEncoder.MaxLengthBits) - 1)); + public int FindLength(int basePosition) => (int)(this.OffsetLength.GetSpan()[basePosition] & ((1U << BackwardReferenceEncoder.MaxLengthBits) - 1)); [MethodImpl(InliningOptions.ShortMethod)] - public int FindOffset(int basePosition) => (int)(this.OffsetLength[basePosition] >> BackwardReferenceEncoder.MaxLengthBits); + public int FindOffset(int basePosition) => (int)(this.OffsetLength.GetSpan()[basePosition] >> BackwardReferenceEncoder.MaxLengthBits); /// /// Calculates the hash for a pixel pair. @@ -280,5 +285,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless return maxWindowSize > WindowSize ? WindowSize : maxWindowSize; } + + /// + public void Dispose() => this.OffsetLength.Dispose(); } } diff --git a/src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs b/src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs index 4f7a4eb3d..82bd32a02 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs @@ -65,15 +65,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless FixedTableSize + 2704 }; - private static readonly byte[] CodeLengthCodeOrder = { 17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; - private static readonly int NumCodeLengthCodes = CodeLengthCodeOrder.Length; - private static readonly byte[] LiteralMap = - { - 0, 1, 1, 1, 0 - }; - /// /// Initializes a new instance of the class. /// @@ -87,6 +80,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless this.configuration = configuration; } + // This uses C#'s compiler optimization to refer to assembly's static data directly. + private static ReadOnlySpan CodeLengthCodeOrder => new byte[] { 17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; + + // This uses C#'s compiler optimization to refer to assembly's static data directly. + private static ReadOnlySpan LiteralMap => new byte[] { 0, 1, 1, 1, 0 }; + /// /// Decodes the image from the stream using the bitreader. /// @@ -834,10 +833,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless private void BuildPackedTable(HTreeGroup hTreeGroup) { - for (uint code = 0; code < HuffmanUtils.HuffmanPackedTableSize; ++code) + for (uint code = 0; code < HuffmanUtils.HuffmanPackedTableSize; code++) { uint bits = code; - HuffmanCode huff = hTreeGroup.PackedTable[bits]; + ref HuffmanCode huff = ref hTreeGroup.PackedTable[bits]; HuffmanCode hCode = hTreeGroup.HTrees[HuffIndex.Green][bits]; if (hCode.Value >= WebpConstants.NumLiteralCodes) { @@ -848,10 +847,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless { huff.BitsUsed = 0; huff.Value = 0; - bits >>= AccumulateHCode(hCode, 8, huff); - bits >>= AccumulateHCode(hTreeGroup.HTrees[HuffIndex.Red][bits], 16, huff); - bits >>= AccumulateHCode(hTreeGroup.HTrees[HuffIndex.Blue][bits], 0, huff); - bits >>= AccumulateHCode(hTreeGroup.HTrees[HuffIndex.Alpha][bits], 24, huff); + bits >>= AccumulateHCode(hCode, 8, ref huff); + bits >>= AccumulateHCode(hTreeGroup.HTrees[HuffIndex.Red][bits], 16, ref huff); + bits >>= AccumulateHCode(hTreeGroup.HTrees[HuffIndex.Blue][bits], 0, ref huff); + bits >>= AccumulateHCode(hTreeGroup.HTrees[HuffIndex.Alpha][bits], 24, ref huff); } } } @@ -992,7 +991,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless } [MethodImpl(InliningOptions.ShortMethod)] - private static int AccumulateHCode(HuffmanCode hCode, int shift, HuffmanCode huff) + private static int AccumulateHCode(HuffmanCode hCode, int shift, ref HuffmanCode huff) { huff.BitsUsed += hCode.BitsUsed; huff.Value |= hCode.Value << shift; diff --git a/src/ImageSharp/Formats/Webp/WebpLookupTables.cs b/src/ImageSharp/Formats/Webp/WebpLookupTables.cs index bf47b01bc..c89411435 100644 --- a/src/ImageSharp/Formats/Webp/WebpLookupTables.cs +++ b/src/ImageSharp/Formats/Webp/WebpLookupTables.cs @@ -239,7 +239,8 @@ namespace SixLabors.ImageSharp.Formats.Webp } }; - public static readonly byte[] Norm = + // This uses C#'s compiler optimization to refer to assembly's static data directly. + public static ReadOnlySpan Norm => new byte[] { // renorm_sizes[i] = 8 - log2(i) 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, diff --git a/src/ImageSharp/Processing/Extensions/Normalization/HistogramEqualizationExtensions.cs b/src/ImageSharp/Processing/Extensions/Normalization/HistogramEqualizationExtensions.cs index a8ac3376a..c1046f82d 100644 --- a/src/ImageSharp/Processing/Extensions/Normalization/HistogramEqualizationExtensions.cs +++ b/src/ImageSharp/Processing/Extensions/Normalization/HistogramEqualizationExtensions.cs @@ -16,7 +16,7 @@ namespace SixLabors.ImageSharp.Processing /// The image this method extends. /// The to allow chaining of operations. public static IImageProcessingContext HistogramEqualization(this IImageProcessingContext source) => - HistogramEqualization(source, HistogramEqualizationOptions.Default); + HistogramEqualization(source, new HistogramEqualizationOptions()); /// /// Equalizes the histogram of an image to increases the contrast. diff --git a/src/ImageSharp/Processing/Processors/Normalization/HistogramEqualizationOptions.cs b/src/ImageSharp/Processing/Processors/Normalization/HistogramEqualizationOptions.cs index 602dc0c4b..1b8723e4f 100644 --- a/src/ImageSharp/Processing/Processors/Normalization/HistogramEqualizationOptions.cs +++ b/src/ImageSharp/Processing/Processors/Normalization/HistogramEqualizationOptions.cs @@ -8,11 +8,6 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization /// public class HistogramEqualizationOptions { - /// - /// Gets the default instance. - /// - public static HistogramEqualizationOptions Default { get; } = new HistogramEqualizationOptions(); - /// /// Gets or sets the histogram equalization method to use. Defaults to global histogram equalization. /// diff --git a/src/ImageSharp/Processing/Processors/Quantization/EuclideanPixelMap{TPixel}.cs b/src/ImageSharp/Processing/Processors/Quantization/EuclideanPixelMap{TPixel}.cs index b82ce71bb..f54489348 100644 --- a/src/ImageSharp/Processing/Processors/Quantization/EuclideanPixelMap{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Quantization/EuclideanPixelMap{TPixel}.cs @@ -22,7 +22,9 @@ namespace SixLabors.ImageSharp.Processing.Processors.Quantization where TPixel : unmanaged, IPixel { private Rgba32[] rgbaPalette; - private readonly ColorDistanceCache cache; + + // Do not make this readonly! Struct value would be always copied on non-readonly method calls. + private ColorDistanceCache cache; private readonly Configuration configuration; /// diff --git a/tests/ImageSharp.Benchmarks/Codecs/DecodeBmp.cs b/tests/ImageSharp.Benchmarks/Codecs/Bmp/DecodeBmp.cs similarity index 100% rename from tests/ImageSharp.Benchmarks/Codecs/DecodeBmp.cs rename to tests/ImageSharp.Benchmarks/Codecs/Bmp/DecodeBmp.cs diff --git a/tests/ImageSharp.Benchmarks/Codecs/EncodeBmp.cs b/tests/ImageSharp.Benchmarks/Codecs/Bmp/EncodeBmp.cs similarity index 100% rename from tests/ImageSharp.Benchmarks/Codecs/EncodeBmp.cs rename to tests/ImageSharp.Benchmarks/Codecs/Bmp/EncodeBmp.cs diff --git a/tests/ImageSharp.Benchmarks/Codecs/EncodeBmpMultiple.cs b/tests/ImageSharp.Benchmarks/Codecs/Bmp/EncodeBmpMultiple.cs similarity index 100% rename from tests/ImageSharp.Benchmarks/Codecs/EncodeBmpMultiple.cs rename to tests/ImageSharp.Benchmarks/Codecs/Bmp/EncodeBmpMultiple.cs diff --git a/tests/ImageSharp.Benchmarks/Codecs/DecodeGif.cs b/tests/ImageSharp.Benchmarks/Codecs/Gif/DecodeGif.cs similarity index 100% rename from tests/ImageSharp.Benchmarks/Codecs/DecodeGif.cs rename to tests/ImageSharp.Benchmarks/Codecs/Gif/DecodeGif.cs diff --git a/tests/ImageSharp.Benchmarks/Codecs/EncodeGif.cs b/tests/ImageSharp.Benchmarks/Codecs/Gif/EncodeGif.cs similarity index 100% rename from tests/ImageSharp.Benchmarks/Codecs/EncodeGif.cs rename to tests/ImageSharp.Benchmarks/Codecs/Gif/EncodeGif.cs diff --git a/tests/ImageSharp.Benchmarks/Codecs/EncodeGifMultiple.cs b/tests/ImageSharp.Benchmarks/Codecs/Gif/EncodeGifMultiple.cs similarity index 100% rename from tests/ImageSharp.Benchmarks/Codecs/EncodeGifMultiple.cs rename to tests/ImageSharp.Benchmarks/Codecs/Gif/EncodeGifMultiple.cs diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/CmykColorConversion.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/CmykColorConversion.cs similarity index 100% rename from tests/ImageSharp.Benchmarks/Codecs/Jpeg/CmykColorConversion.cs rename to tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/CmykColorConversion.cs diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversionBenchmark.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/ColorConversionBenchmark.cs similarity index 100% rename from tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversionBenchmark.cs rename to tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/ColorConversionBenchmark.cs diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/GrayscaleColorConversion.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/GrayscaleColorConversion.cs similarity index 100% rename from tests/ImageSharp.Benchmarks/Codecs/Jpeg/GrayscaleColorConversion.cs rename to tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/GrayscaleColorConversion.cs diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/RgbColorConversion.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/RgbColorConversion.cs similarity index 100% rename from tests/ImageSharp.Benchmarks/Codecs/Jpeg/RgbColorConversion.cs rename to tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/RgbColorConversion.cs diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/YCbCrColorConversion.cs similarity index 100% rename from tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs rename to tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/YCbCrColorConversion.cs diff --git a/tests/ImageSharp.Benchmarks/Format/Jpeg/Components/Encoder/YCbCrForwardConverterBenchmark.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/YCbCrForwardConverterBenchmark.cs similarity index 100% rename from tests/ImageSharp.Benchmarks/Format/Jpeg/Components/Encoder/YCbCrForwardConverterBenchmark.cs rename to tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/YCbCrForwardConverterBenchmark.cs diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YccKColorConverter.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/YccKColorConverter.cs similarity index 100% rename from tests/ImageSharp.Benchmarks/Codecs/Jpeg/YccKColorConverter.cs rename to tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversion/YccKColorConverter.cs diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/DecodeJpeg.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/DecodeJpeg.cs new file mode 100644 index 000000000..9665ca42d --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/DecodeJpeg.cs @@ -0,0 +1,82 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System.IO; +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Formats.Jpeg; +using SixLabors.ImageSharp.Tests; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg +{ + public class DecodeJpeg + { + private JpegDecoder decoder; + + private MemoryStream preloadedImageStream; + + private void GenericSetup(string imageSubpath) + { + this.decoder = new JpegDecoder(); + byte[] bytes = File.ReadAllBytes(Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, imageSubpath)); + this.preloadedImageStream = new MemoryStream(bytes); + } + + private void GenericBechmark() + { + this.preloadedImageStream.Position = 0; + using Image img = this.decoder.Decode(Configuration.Default, this.preloadedImageStream); + } + + [GlobalSetup(Target = nameof(JpegBaselineInterleaved444))] + public void SetupBaselineInterleaved444() => + this.GenericSetup(TestImages.Jpeg.Baseline.Winter444_Interleaved); + + [GlobalSetup(Target = nameof(JpegBaselineInterleaved420))] + public void SetupBaselineInterleaved420() => + this.GenericSetup(TestImages.Jpeg.Baseline.Hiyamugi); + + [GlobalSetup(Target = nameof(JpegBaseline400))] + public void SetupBaselineSingleComponent() => + this.GenericSetup(TestImages.Jpeg.Baseline.Jpeg400); + + [GlobalSetup(Target = nameof(JpegProgressiveNonInterleaved420))] + public void SetupProgressiveNoninterleaved420() => + this.GenericSetup(TestImages.Jpeg.Progressive.Winter420_NonInterleaved); + + [GlobalCleanup] + public void Cleanup() + { + this.preloadedImageStream.Dispose(); + this.preloadedImageStream = null; + } + + [Benchmark(Description = "Baseline 4:4:4 Interleaved")] + public void JpegBaselineInterleaved444() => this.GenericBechmark(); + + [Benchmark(Description = "Baseline 4:2:0 Interleaved")] + public void JpegBaselineInterleaved420() => this.GenericBechmark(); + + [Benchmark(Description = "Baseline 4:0:0 (grayscale)")] + public void JpegBaseline400() => this.GenericBechmark(); + + [Benchmark(Description = "Progressive 4:2:0 Non-Interleaved")] + public void JpegProgressiveNonInterleaved420() => this.GenericBechmark(); + } +} + + +/* +BenchmarkDotNet=v0.13.0, OS=Windows 10.0.19042.1348 (20H2/October2020Update) +Intel Core i7-6700K CPU 4.00GHz (Skylake), 1 CPU, 8 logical and 4 physical cores +.NET SDK=6.0.100-preview.3.21202.5 + [Host] : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT + DefaultJob : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT + + +| Method | Mean | Error | StdDev | +|------------------------------------ |----------:|----------:|----------:| +| 'Baseline 4:4:4 Interleaved' | 11.127 ms | 0.0659 ms | 0.0550 ms | +| 'Baseline 4:2:0 Interleaved' | 8.458 ms | 0.0289 ms | 0.0256 ms | +| 'Baseline 4:0:0 (grayscale)' | 1.550 ms | 0.0050 ms | 0.0044 ms | +| 'Progressive 4:2:0 Non-Interleaved' | 13.220 ms | 0.0449 ms | 0.0398 ms | +*/ diff --git a/tests/ImageSharp.Benchmarks/Codecs/DecodeFilteredPng.cs b/tests/ImageSharp.Benchmarks/Codecs/Png/DecodeFilteredPng.cs similarity index 100% rename from tests/ImageSharp.Benchmarks/Codecs/DecodeFilteredPng.cs rename to tests/ImageSharp.Benchmarks/Codecs/Png/DecodeFilteredPng.cs diff --git a/tests/ImageSharp.Benchmarks/Codecs/DecodePng.cs b/tests/ImageSharp.Benchmarks/Codecs/Png/DecodePng.cs similarity index 100% rename from tests/ImageSharp.Benchmarks/Codecs/DecodePng.cs rename to tests/ImageSharp.Benchmarks/Codecs/Png/DecodePng.cs diff --git a/tests/ImageSharp.Benchmarks/Codecs/EncodeIndexedPng.cs b/tests/ImageSharp.Benchmarks/Codecs/Png/EncodeIndexedPng.cs similarity index 100% rename from tests/ImageSharp.Benchmarks/Codecs/EncodeIndexedPng.cs rename to tests/ImageSharp.Benchmarks/Codecs/Png/EncodeIndexedPng.cs diff --git a/tests/ImageSharp.Benchmarks/Codecs/EncodePng.cs b/tests/ImageSharp.Benchmarks/Codecs/Png/EncodePng.cs similarity index 100% rename from tests/ImageSharp.Benchmarks/Codecs/EncodePng.cs rename to tests/ImageSharp.Benchmarks/Codecs/Png/EncodePng.cs diff --git a/tests/ImageSharp.Benchmarks/Codecs/DecodeTga.cs b/tests/ImageSharp.Benchmarks/Codecs/Tga/DecodeTga.cs similarity index 100% rename from tests/ImageSharp.Benchmarks/Codecs/DecodeTga.cs rename to tests/ImageSharp.Benchmarks/Codecs/Tga/DecodeTga.cs diff --git a/tests/ImageSharp.Benchmarks/Codecs/EncodeTga.cs b/tests/ImageSharp.Benchmarks/Codecs/Tga/EncodeTga.cs similarity index 100% rename from tests/ImageSharp.Benchmarks/Codecs/EncodeTga.cs rename to tests/ImageSharp.Benchmarks/Codecs/Tga/EncodeTga.cs diff --git a/tests/ImageSharp.Benchmarks/Codecs/DecodeTiff.cs b/tests/ImageSharp.Benchmarks/Codecs/Tiff/DecodeTiff.cs similarity index 100% rename from tests/ImageSharp.Benchmarks/Codecs/DecodeTiff.cs rename to tests/ImageSharp.Benchmarks/Codecs/Tiff/DecodeTiff.cs diff --git a/tests/ImageSharp.Benchmarks/Codecs/EncodeTiff.cs b/tests/ImageSharp.Benchmarks/Codecs/Tiff/EncodeTiff.cs similarity index 100% rename from tests/ImageSharp.Benchmarks/Codecs/EncodeTiff.cs rename to tests/ImageSharp.Benchmarks/Codecs/Tiff/EncodeTiff.cs diff --git a/tests/ImageSharp.Benchmarks/Codecs/DecodeWebp.cs b/tests/ImageSharp.Benchmarks/Codecs/Webp/DecodeWebp.cs similarity index 100% rename from tests/ImageSharp.Benchmarks/Codecs/DecodeWebp.cs rename to tests/ImageSharp.Benchmarks/Codecs/Webp/DecodeWebp.cs diff --git a/tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs b/tests/ImageSharp.Benchmarks/Codecs/Webp/EncodeWebp.cs similarity index 100% rename from tests/ImageSharp.Benchmarks/Codecs/EncodeWebp.cs rename to tests/ImageSharp.Benchmarks/Codecs/Webp/EncodeWebp.cs diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs index e5dc0ba01..8f5f10f19 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs @@ -183,9 +183,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg Assert.Equal(expected, actual); } + // This method has only 2 implementations: + // 1. AVX + // 2. Scalar FeatureTestRunner.RunWithHwIntrinsicsFeature( RunTest, - HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableHWIntrinsic); + HwIntrinsics.AllowAll | HwIntrinsics.DisableHWIntrinsic); } private static float[] Create8x8ColorCropTestData() diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8Tests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8Tests.cs index 3737cce80..b13a196cb 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8Tests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8Tests.cs @@ -276,5 +276,31 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg seed, HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2); } + + [Fact] + public void TransposeInplace() + { + static void RunTest() + { + short[] expected = Create8x8ShortData(); + ReferenceImplementations.Transpose8x8(expected); + + var block8x8 = default(Block8x8); + block8x8.LoadFrom(Create8x8ShortData()); + + block8x8.TransposeInplace(); + + short[] actual = new short[64]; + block8x8.CopyTo(actual); + + Assert.Equal(expected, actual); + } + + // This method has only 1 implementation: + // 1. Scalar + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + HwIntrinsics.DisableHWIntrinsic); + } } } diff --git a/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs index 0a49d20cd..85f30d28d 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs @@ -2,9 +2,6 @@ // Licensed under the Apache License, Version 2.0. using System; -#if SUPPORTS_RUNTIME_INTRINSICS -using System.Runtime.Intrinsics.X86; -#endif using SixLabors.ImageSharp.Formats.Jpeg.Components; using SixLabors.ImageSharp.Tests.Formats.Jpg.Utils; using SixLabors.ImageSharp.Tests.TestUtilities; @@ -17,6 +14,20 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg [Trait("Format", "Jpg")] public static class DCTTests { + private const int MaxAllowedValue = short.MaxValue; + private const int MinAllowedValue = short.MinValue; + + internal static Block8x8F CreateBlockFromScalar(float value) + { + Block8x8F result = default; + for (int i = 0; i < Block8x8F.Size; i++) + { + result[i] = value; + } + + return result; + } + public class FastFloatingPoint : JpegFixture { public FastFloatingPoint(ITestOutputHelper output) @@ -24,130 +35,75 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg { } - // Reference tests [Theory] [InlineData(1)] [InlineData(2)] [InlineData(3)] public void LLM_TransformIDCT_CompareToNonOptimized(int seed) { - float[] sourceArray = Create8x8RoundedRandomFloatData(-1000, 1000, seed); + float[] sourceArray = Create8x8RoundedRandomFloatData(MinAllowedValue, MaxAllowedValue, seed); var srcBlock = Block8x8F.Load(sourceArray); + // reference Block8x8F expected = ReferenceImplementations.LLM_FloatingPoint_DCT.TransformIDCT(ref srcBlock); - var temp = default(Block8x8F); - FastFloatingPointDCT.TransformIDCT(ref srcBlock, ref temp); - - this.CompareBlocks(expected, srcBlock, 1f); - } - - [Theory] - [InlineData(1)] - [InlineData(2)] - [InlineData(3)] - public void LLM_TransformIDCT_CompareToAccurate(int seed) - { - float[] sourceArray = Create8x8RoundedRandomFloatData(-1000, 1000, seed); + // testee + // Part of the IDCT calculations is fused into the quantization step + // We must multiply input block with adjusted no-quantization matrix + // before applying IDCT + // Dequantization using unit matrix - no values are upscaled + Block8x8F dequantMatrix = CreateBlockFromScalar(1); - var srcBlock = Block8x8F.Load(sourceArray); + // This step is needed to apply adjusting multipliers to the input block + FastFloatingPointDCT.AdjustToIDCT(ref dequantMatrix); - Block8x8F expected = ReferenceImplementations.AccurateDCT.TransformIDCT(ref srcBlock); + // IDCT implementation tranforms blocks after transposition + srcBlock.TransposeInplace(); + srcBlock.MultiplyInPlace(ref dequantMatrix); - var temp = default(Block8x8F); - FastFloatingPointDCT.TransformIDCT(ref srcBlock, ref temp); + // IDCT calculation + FastFloatingPointDCT.TransformIDCT(ref srcBlock); this.CompareBlocks(expected, srcBlock, 1f); } - // Inverse transform - [Theory] - [InlineData(1)] - [InlineData(2)] - public void IDCT8x4_LeftPart(int seed) - { - Span src = Create8x8RoundedRandomFloatData(-200, 200, seed); - var srcBlock = default(Block8x8F); - srcBlock.LoadFrom(src); - - var destBlock = default(Block8x8F); - - var expectedDest = new float[64]; - - // reference - ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src, expectedDest); - - // testee - FastFloatingPointDCT.IDCT8x4_LeftPart(ref srcBlock, ref destBlock); - - var actualDest = new float[64]; - destBlock.ScaledCopyTo(actualDest); - - Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f)); - } - [Theory] [InlineData(1)] [InlineData(2)] - public void IDCT8x4_RightPart(int seed) + [InlineData(3)] + public void LLM_TransformIDCT_CompareToAccurate(int seed) { - Span src = Create8x8RoundedRandomFloatData(-200, 200, seed); - var srcBlock = default(Block8x8F); - srcBlock.LoadFrom(src); + float[] sourceArray = Create8x8RoundedRandomFloatData(MinAllowedValue, MaxAllowedValue, seed); - var destBlock = default(Block8x8F); - - var expectedDest = new float[64]; + var srcBlock = Block8x8F.Load(sourceArray); // reference - ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src.Slice(4), expectedDest.AsSpan(4)); + Block8x8F expected = ReferenceImplementations.AccurateDCT.TransformIDCT(ref srcBlock); // testee - FastFloatingPointDCT.IDCT8x4_RightPart(ref srcBlock, ref destBlock); - - var actualDest = new float[64]; - destBlock.ScaledCopyTo(actualDest); - - Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f)); - } - - [Theory] - [InlineData(1)] - [InlineData(2)] - public void IDCT8x8_Avx(int seed) - { -#if SUPPORTS_RUNTIME_INTRINSICS - if (!Avx.IsSupported) - { - this.Output.WriteLine("No AVX present, skipping test!"); - return; - } - - Span src = Create8x8RoundedRandomFloatData(-200, 200, seed); - Block8x8F srcBlock = default; - srcBlock.LoadFrom(src); + // Part of the IDCT calculations is fused into the quantization step + // We must multiply input block with adjusted no-quantization matrix + // before applying IDCT + // Dequantization using unit matrix - no values are upscaled + Block8x8F dequantMatrix = CreateBlockFromScalar(1); - Block8x8F destBlock = default; + // This step is needed to apply adjusting multipliers to the input block + FastFloatingPointDCT.AdjustToIDCT(ref dequantMatrix); - float[] expectedDest = new float[64]; + // IDCT implementation tranforms blocks after transposition + srcBlock.TransposeInplace(); + srcBlock.MultiplyInPlace(ref dequantMatrix); - // reference, left part - ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src, expectedDest); + // IDCT calculation + FastFloatingPointDCT.TransformIDCT(ref srcBlock); - // reference, right part - ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src.Slice(4), expectedDest.AsSpan(4)); - - // testee, whole 8x8 - FastFloatingPointDCT.IDCT8x8_Avx(ref srcBlock, ref destBlock); - - float[] actualDest = new float[64]; - destBlock.ScaledCopyTo(actualDest); - - Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f)); -#endif + this.CompareBlocks(expected, srcBlock, 1f); } + // Inverse transform + // This test covers entire IDCT conversion chain + // This test checks all hardware implementations [Theory] [InlineData(1)] [InlineData(2)] @@ -157,41 +113,53 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg { int seed = FeatureTestRunner.Deserialize(serialized); - Span src = Create8x8RoundedRandomFloatData(-200, 200, seed); + Span src = Create8x8RoundedRandomFloatData(MinAllowedValue, MaxAllowedValue, seed); var srcBlock = default(Block8x8F); srcBlock.LoadFrom(src); - var expectedDest = new float[64]; - var temp1 = new float[64]; - var temp2 = default(Block8x8F); + float[] expectedDest = new float[64]; + float[] temp = new float[64]; // reference - ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D_llm(src, expectedDest, temp1); + ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D_llm(src, expectedDest, temp); // testee - FastFloatingPointDCT.TransformIDCT(ref srcBlock, ref temp2); + // Part of the IDCT calculations is fused into the quantization step + // We must multiply input block with adjusted no-quantization matrix + // before applying IDCT + Block8x8F dequantMatrix = CreateBlockFromScalar(1); + + // Dequantization using unit matrix - no values are upscaled + // as quant matrix is all 1's + // This step is needed to apply adjusting multipliers to the input block + FastFloatingPointDCT.AdjustToIDCT(ref dequantMatrix); + srcBlock.MultiplyInPlace(ref dequantMatrix); + + // IDCT implementation tranforms blocks after transposition + srcBlock.TransposeInplace(); - var actualDest = new float[64]; - srcBlock.ScaledCopyTo(actualDest); + // IDCT calculation + FastFloatingPointDCT.TransformIDCT(ref srcBlock); + + float[] actualDest = srcBlock.ToArray(); Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f)); } - // 3 paths: + // 4 paths: // 1. AllowAll - call avx/fma implementation - // 2. DisableFMA - call avx implementation without fma acceleration - // 3. DisableAvx - call fallback code of Vector4 implementation - // - // DisableSSE isn't needed because fallback Vector4 code will compile to either sse or fallback code with same result + // 2. DisableFMA - call avx without fma implementation + // 3. DisableAvx - call sse Vector4 implementation + // 4. DisableHWIntrinsic - call scalar fallback implementation FeatureTestRunner.RunWithHwIntrinsicsFeature( RunTest, seed, - HwIntrinsics.AllowAll | HwIntrinsics.DisableFMA | HwIntrinsics.DisableAVX); + HwIntrinsics.AllowAll | HwIntrinsics.DisableFMA | HwIntrinsics.DisableAVX | HwIntrinsics.DisableHWIntrinsic); } // Forward transform - // This test covers entire FDCT conversions chain - // This test checks all implementations: intrinsic and scalar fallback + // This test covers entire FDCT conversion chain + // This test checks all hardware implementations [Theory] [InlineData(1)] [InlineData(2)] @@ -201,7 +169,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg { int seed = FeatureTestRunner.Deserialize(serialized); - Span src = Create8x8RoundedRandomFloatData(-200, 200, seed); + Span src = Create8x8RoundedRandomFloatData(MinAllowedValue, MaxAllowedValue, seed); var block = default(Block8x8F); block.LoadFrom(src); @@ -212,23 +180,24 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg ReferenceImplementations.LLM_FloatingPoint_DCT.FDCT2D_llm(src, expectedDest, temp1, downscaleBy8: true); // testee - // Part of the FDCT calculations is fused into the quantization step - // We must multiply transformed block with reciprocal values from FastFloatingPointDCT.ANN_DCT_reciprocalAdjustmen FastFloatingPointDCT.TransformFDCT(ref block); - for (int i = 0; i < 64; i++) - { - block[i] = block[i] * FastFloatingPointDCT.DctReciprocalAdjustmentCoefficients[i]; - } + + // Part of the IDCT calculations is fused into the quantization step + // We must multiply input block with adjusted no-quantization matrix + // after applying FDCT + Block8x8F quantMatrix = CreateBlockFromScalar(1); + FastFloatingPointDCT.AdjustToFDCT(ref quantMatrix); + block.MultiplyInPlace(ref quantMatrix); float[] actualDest = block.ToArray(); Assert.Equal(expectedDest, actualDest, new ApproximateFloatComparer(1f)); } - // 3 paths: + // 4 paths: // 1. AllowAll - call avx/fma implementation - // 2. DisableFMA - call avx implementation without fma acceleration - // 3. DisableAvx - call sse implementation + // 2. DisableFMA - call avx without fma implementation + // 3. DisableAvx - call sse Vector4 implementation // 4. DisableHWIntrinsic - call scalar fallback implementation FeatureTestRunner.RunWithHwIntrinsicsFeature( RunTest, diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Images.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Images.cs index d12240cba..ef817154d 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Images.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Images.cs @@ -20,6 +20,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg TestImages.Jpeg.Baseline.Jpeg420Small, TestImages.Jpeg.Issues.Fuzz.AccessViolationException922, TestImages.Jpeg.Baseline.Jpeg444, + TestImages.Jpeg.Baseline.Jpeg422, TestImages.Jpeg.Baseline.Bad.BadEOF, TestImages.Jpeg.Baseline.MultiScanBaselineCMYK, TestImages.Jpeg.Baseline.YcckSubsample1222, @@ -100,6 +101,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg [TestImages.Jpeg.Baseline.Bad.BadEOF] = 0.38f / 100, [TestImages.Jpeg.Baseline.Bad.BadRST] = 0.0589f / 100, + [TestImages.Jpeg.Baseline.Jpeg422] = 0.0013f / 100, [TestImages.Jpeg.Baseline.Testorig420] = 0.38f / 100, [TestImages.Jpeg.Baseline.Jpeg420Small] = 0.287f / 100, [TestImages.Jpeg.Baseline.Turtle420] = 1.0f / 100, diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Metadata.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Metadata.cs index 5e42c6c8f..7b3e20aa2 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Metadata.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Metadata.cs @@ -56,7 +56,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg { TestImages.Jpeg.Progressive.Fb, 75 }, { TestImages.Jpeg.Issues.IncorrectQuality845, 98 }, { TestImages.Jpeg.Baseline.ForestBridgeDifferentComponentsQuality, 89 }, - { TestImages.Jpeg.Progressive.Winter, 80 } + { TestImages.Jpeg.Progressive.Winter420_NonInterleaved, 80 } }; [Theory] diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Utils/JpegFixture.cs b/tests/ImageSharp.Tests/Formats/Jpg/Utils/JpegFixture.cs index a76b2bf2e..1bdfc6eca 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Utils/JpegFixture.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Utils/JpegFixture.cs @@ -172,7 +172,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils bool failed = false; - for (int i = 0; i < 64; i++) + for (int i = 0; i < Block8x8F.Size; i++) { float expected = a[i]; float actual = b[i]; diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Utils/LibJpegTools.ComponentData.cs b/tests/ImageSharp.Tests/Formats/Jpg/Utils/LibJpegTools.ComponentData.cs index adbd695c0..5c00b39af 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Utils/LibJpegTools.ComponentData.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Utils/LibJpegTools.ComponentData.cs @@ -48,6 +48,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils public short MaxVal { get; private set; } = short.MinValue; + internal void MakeBlock(Block8x8 block, int y, int x) + { + block.TransposeInplace(); + this.MakeBlock(block.ToArray(), y, x); + } + internal void MakeBlock(short[] data, int y, int x) { this.MinVal = Math.Min(this.MinVal, data.Min()); @@ -66,11 +72,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils Span blockRow = data.GetRowSpan(y - startIndex); for (int x = 0; x < this.WidthInBlocks; x++) { - short[] block = blockRow[x].ToArray(); - - // x coordinate stays the same - we load entire stride - // y coordinate is tricky as we load single stride to full buffer - offset is needed - this.MakeBlock(block, y, x); + this.MakeBlock(blockRow[x], y, x); } } } @@ -83,8 +85,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils Span blockRow = data.GetRowSpan(y); for (int x = 0; x < this.WidthInBlocks; x++) { - short[] block = blockRow[x].ToArray(); - this.MakeBlock(block, y, x); + this.MakeBlock(blockRow[x], y, x); } } } diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Utils/ReferenceImplementations.cs b/tests/ImageSharp.Tests/Formats/Jpg/Utils/ReferenceImplementations.cs index c9741521c..8dc1c83d4 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Utils/ReferenceImplementations.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Utils/ReferenceImplementations.cs @@ -40,6 +40,23 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils } } + /// + /// Transpose 8x8 block stored linearly in a (inplace) + /// + internal static void Transpose8x8(Span data) + { + for (int i = 1; i < 8; i++) + { + int i8 = i * 8; + for (int j = 0; j < i; j++) + { + short tmp = data[i8 + j]; + data[i8 + j] = data[(j * 8) + i]; + data[(j * 8) + i] = tmp; + } + } + } + /// /// Transpose 8x8 block stored linearly in a /// diff --git a/tests/ImageSharp.Tests/Formats/Jpg/ZigZagTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/ZigZagTests.cs index 39046438a..b67ad85ee 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/ZigZagTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/ZigZagTests.cs @@ -1,6 +1,7 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. +using System; using SixLabors.ImageSharp.Formats.Jpeg.Components; using Xunit; @@ -9,8 +10,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg [Trait("Format", "Jpg")] public class ZigZagTests { - [Fact] - public void ZigZagCanHandleAllPossibleCoefficients() + private static void CanHandleAllPossibleCoefficients(ReadOnlySpan order) { // Mimic the behaviour of the huffman scan decoder using all possible byte values short[] block = new short[64]; @@ -26,7 +26,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg if (s != 0) { i += r; - block[ZigZag.ZigZagOrder[i++]] = (short)s; + block[order[i++]] = (short)s; } else { @@ -40,5 +40,13 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg } } } + + [Fact] + public static void ZigZagCanHandleAllPossibleCoefficients() => + CanHandleAllPossibleCoefficients(ZigZag.ZigZagOrder); + + [Fact] + public static void TrasposingZigZagCanHandleAllPossibleCoefficients() => + CanHandleAllPossibleCoefficients(ZigZag.TransposingOrder); } } diff --git a/tests/ImageSharp.Tests/Formats/WebP/ColorSpaceTransformUtilsTests.cs b/tests/ImageSharp.Tests/Formats/WebP/ColorSpaceTransformUtilsTests.cs index 5306a8c78..f7eef0d85 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/ColorSpaceTransformUtilsTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/ColorSpaceTransformUtilsTests.cs @@ -5,7 +5,7 @@ using SixLabors.ImageSharp.Formats.Webp.Lossless; using SixLabors.ImageSharp.Tests.TestUtilities; using Xunit; -namespace SixLabors.ImageSharp.Tests.Formats.WebP +namespace SixLabors.ImageSharp.Tests.Formats.Webp { [Trait("Format", "Webp")] public class ColorSpaceTransformUtilsTests diff --git a/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs b/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs index d176a5933..907b18300 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/LossyUtilsTests.cs @@ -6,7 +6,7 @@ using SixLabors.ImageSharp.Formats.Webp.Lossy; using SixLabors.ImageSharp.Tests.TestUtilities; using Xunit; -namespace SixLabors.ImageSharp.Tests.Formats.WebP +namespace SixLabors.ImageSharp.Tests.Formats.Webp { [Trait("Format", "Webp")] public class LossyUtilsTests @@ -38,7 +38,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.WebP int actual = LossyUtils.Vp8_Sse4X4(a, b); Assert.Equal(expected, actual); - } + } private static void RunMean16x4Test() { diff --git a/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs b/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs index 55738199b..80b5f0a53 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/QuantEncTests.cs @@ -6,7 +6,7 @@ using SixLabors.ImageSharp.Formats.Webp.Lossy; using SixLabors.ImageSharp.Tests.TestUtilities; using Xunit; -namespace SixLabors.ImageSharp.Tests.Formats.WebP +namespace SixLabors.ImageSharp.Tests.Formats.Webp { [Trait("Format", "Webp")] public class QuantEncTests diff --git a/tests/ImageSharp.Tests/Formats/WebP/Vp8EncodingTests.cs b/tests/ImageSharp.Tests/Formats/WebP/Vp8EncodingTests.cs index 17c9beb9b..6bcb4f21f 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/Vp8EncodingTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/Vp8EncodingTests.cs @@ -6,7 +6,7 @@ using SixLabors.ImageSharp.Formats.Webp.Lossy; using SixLabors.ImageSharp.Tests.TestUtilities; using Xunit; -namespace SixLabors.ImageSharp.Tests.Formats.WebP +namespace SixLabors.ImageSharp.Tests.Formats.Webp { [Trait("Format", "Webp")] public class Vp8EncodingTests diff --git a/tests/ImageSharp.Tests/TestImages.cs b/tests/ImageSharp.Tests/TestImages.cs index 116c5adc3..e00364913 100644 --- a/tests/ImageSharp.Tests/TestImages.cs +++ b/tests/ImageSharp.Tests/TestImages.cs @@ -163,7 +163,7 @@ namespace SixLabors.ImageSharp.Tests public const string Fb = "Jpg/progressive/fb.jpg"; public const string Progress = "Jpg/progressive/progress.jpg"; public const string Festzug = "Jpg/progressive/Festzug.jpg"; - public const string Winter = "Jpg/progressive/winter.jpg"; + public const string Winter420_NonInterleaved = "Jpg/progressive/winter420_noninterleaved.jpg"; public static class Bad { @@ -213,6 +213,7 @@ namespace SixLabors.ImageSharp.Tests public const string ArithmeticCoding = "Jpg/baseline/arithmetic_coding.jpg"; public const string ArithmeticCodingProgressive = "Jpg/progressive/arithmetic_progressive.jpg"; public const string Lossless = "Jpg/baseline/lossless.jpg"; + public const string Winter444_Interleaved = "Jpg/baseline/winter444_interleaved.jpg"; public static readonly string[] All = { diff --git a/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_jpeg422.png b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_jpeg422.png new file mode 100644 index 000000000..018ecda7a --- /dev/null +++ b/tests/Images/External/ReferenceOutput/JpegDecoderTests/DecodeBaselineJpeg_jpeg422.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:733cc46271c4402974db2536a55e6ecae3110856df73031ca48dad03745d852d +size 35375 diff --git a/tests/Images/Input/Jpg/baseline/winter444_interleaved.jpg b/tests/Images/Input/Jpg/baseline/winter444_interleaved.jpg new file mode 100644 index 000000000..9ae834389 --- /dev/null +++ b/tests/Images/Input/Jpg/baseline/winter444_interleaved.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73b1deb4e2fb8027f6bb4fb293e5b2615c80b3ac0a7f99fd90118fd340a9fd12 +size 283330 diff --git a/tests/Images/Input/Jpg/progressive/winter.jpg b/tests/Images/Input/Jpg/progressive/winter420_noninterleaved.jpg similarity index 100% rename from tests/Images/Input/Jpg/progressive/winter.jpg rename to tests/Images/Input/Jpg/progressive/winter420_noninterleaved.jpg