Separated scaled IDCT methods

4 years ago · 52f507d793
9 changed files with 139 additions and 120 deletions
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ComponentProcessors/DirectComponentProcessor.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ComponentProcessors/DirectComponentProcessor.cs
@ -47,7 +47,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
                    workspaceBlock.MultiplyInPlace(ref dequantTable);

                    // Convert from spectral to color
-                    FastFloatingPointDCT.TransformIDCT(ref workspaceBlock);
+                    FloatingPointDCT.TransformIDCT(ref workspaceBlock);

                    // To conform better to libjpeg we actually NEED TO loose precision here.
                    // This is because they store blocks as Int16 between all the operations.
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ComponentProcessors/DownScalingComponentProcessor2.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ComponentProcessors/DownScalingComponentProcessor2.cs
@ -45,7 +45,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
                    workspaceBlock.LoadFrom(ref blockRow[xBlock]);

                    // IDCT/Normalization/Range
-                    TransformIDCT_4x4(ref workspaceBlock, ref dequantTable, normalizationValue, maximumValue);
+                    ScaledFloatingPointDCT.TransformIDCT_4x4(ref workspaceBlock, ref dequantTable, normalizationValue, maximumValue);

                    // Save to the intermediate buffer
                    int xColorBufferStart = xBlock * this.BlockAreaSize.Width;
@ -59,66 +59,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
            }
        }

-        public static void TransformIDCT_4x4(ref Block8x8F block, ref Block8x8F dequantTable, float normalizationValue, float maxValue)
-        {
-            const int DCTSIZE = 8;
-            const float FIX_0_541196100 = 0.541196100f;
-            const float FIX_0_765366865 = 0.765366865f;
-            const float FIX_1_847759065 = 1.847759065f;
-
-            // input block is transposed so term indices must be tranposed too
-            float tmp0, tmp2, tmp10, tmp12;
-            float z1, z2, z3;
-
-            for (int ctr = 0; ctr < 4; ctr++)
-            {
-                // Even part
-                tmp0 = block[ctr * DCTSIZE] * dequantTable[ctr * DCTSIZE];
-                tmp2 = block[(ctr * DCTSIZE) + 2] * dequantTable[(ctr * DCTSIZE) + 2];
-
-                tmp10 = tmp0 + tmp2;
-                tmp12 = tmp0 - tmp2;
-
-                // Odd part
-                z2 = block[(ctr * DCTSIZE) + 1] * dequantTable[(ctr * DCTSIZE) + 1];
-                z3 = block[(ctr * DCTSIZE) + 3] * dequantTable[(ctr * DCTSIZE) + 3];
-
-                z1 = (z2 + z3) * FIX_0_541196100;
-                tmp0 = z1 + (z2 * FIX_0_765366865);
-                tmp2 = z1 - (z3 * FIX_1_847759065);
-
-                /* Final output stage */
-                block[ctr + 4] = tmp10 + tmp0;
-                block[ctr + 28] = tmp10 - tmp0;
-                block[ctr + 12] = tmp12 + tmp2;
-                block[ctr + 20] = tmp12 - tmp2;
-            }
-
-            for (int ctr = 0; ctr < 4; ctr++)
-            {
-                // Even part
-                tmp0 = block[(ctr * 8) + 0 + 4];
-                tmp2 = block[(ctr * 8) + 2 + 4];
-
-                tmp10 = tmp0 + tmp2;
-                tmp12 = tmp0 - tmp2;
-
-                // Odd part
-                z2 = block[(ctr * 8) + 1 + 4];
-                z3 = block[(ctr * 8) + 3 + 4];
-
-                z1 = (z2 + z3) * FIX_0_541196100;
-                tmp0 = z1 + (z2 * FIX_0_765366865);
-                tmp2 = z1 - (z3 * FIX_1_847759065);
-
-                /* Final output stage */
-                block[(ctr * 8) + 0] = (float)Math.Round(Numerics.Clamp(tmp10 + tmp0 + normalizationValue, 0, maxValue));
-                block[(ctr * 8) + 3] = (float)Math.Round(Numerics.Clamp(tmp10 - tmp0 + normalizationValue, 0, maxValue));
-                block[(ctr * 8) + 1] = (float)Math.Round(Numerics.Clamp(tmp12 + tmp2 + normalizationValue, 0, maxValue));
-                block[(ctr * 8) + 2] = (float)Math.Round(Numerics.Clamp(tmp12 - tmp2 + normalizationValue, 0, maxValue));
-            }
-        }
-
        [MethodImpl(InliningOptions.ShortMethod)]
        public static void ScaledCopyTo(ref Block8x8F block, ref float destRef, int destStrideWidth, int horizontalScale, int verticalScale)
        {
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ComponentProcessors/DownScalingComponentProcessor4.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ComponentProcessors/DownScalingComponentProcessor4.cs
@ -45,7 +45,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
                    workspaceBlock.LoadFrom(ref blockRow[xBlock]);

                    // IDCT/Normalization/Range
-                    TransformIDCT_2x2(ref workspaceBlock, ref dequantTable, normalizationValue, maximumValue);
+                    ScaledFloatingPointDCT.TransformIDCT_2x2(ref workspaceBlock, ref dequantTable, normalizationValue, maximumValue);

                    // Save to the intermediate buffer
                    int xColorBufferStart = xBlock * this.BlockAreaSize.Width;
@ -59,36 +59,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
            }
        }

-        public static void TransformIDCT_2x2(ref Block8x8F block, ref Block8x8F dequantTable, float normalizationValue, float maxValue)
-        {
-            // input block is transposed so term indices must be tranposed too
-            float tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-
-            // 0
-            //   => 0 1
-            // 8
-            tmp4 = block[0] * dequantTable[0];
-            tmp5 = block[1] * dequantTable[1];
-            tmp0 = tmp4 + tmp5;
-            tmp2 = tmp4 - tmp5;
-
-            // 1
-            //   => 8 9
-            // 9
-            tmp4 = block[8] * dequantTable[8];
-            tmp5 = block[9] * dequantTable[9];
-            tmp1 = tmp4 + tmp5;
-            tmp3 = tmp4 - tmp5;
-
-            // Row 0
-            block[0] = (float)Math.Round(Numerics.Clamp(tmp0 + tmp1 + normalizationValue, 0, maxValue));
-            block[1] = (float)Math.Round(Numerics.Clamp(tmp0 - tmp1 + normalizationValue, 0, maxValue));
-
-            // Row 1
-            block[8] = (float)Math.Round(Numerics.Clamp(tmp2 + tmp3 + normalizationValue, 0, maxValue));
-            block[9] = (float)Math.Round(Numerics.Clamp(tmp2 - tmp3 + normalizationValue, 0, maxValue));
-        }
-
        [MethodImpl(InliningOptions.ShortMethod)]
        public static void ScaledCopyTo(ref Block8x8F block, ref float destRef, int destStrideWidth, int horizontalScale, int verticalScale)
        {
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ComponentProcessors/DownScalingComponentProcessor8.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ComponentProcessors/DownScalingComponentProcessor8.cs
@ -38,7 +38,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder

                for (int xBlock = 0; xBlock < spectralBuffer.Width; xBlock++)
                {
-                    float dc = TransformIDCT_1x1(blockRow[xBlock][0], this.dcDequantizer, normalizationValue, maximumValue);
+                    float dc = ScaledFloatingPointDCT.TransformIDCT_1x1(blockRow[xBlock][0], this.dcDequantizer, normalizationValue, maximumValue);

                    // Save to the intermediate buffer
                    int xColorBufferStart = xBlock * this.BlockAreaSize.Width;
@ -52,13 +52,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
            }
        }

-        [MethodImpl(InliningOptions.ShortMethod)]
-        public static float TransformIDCT_1x1(float dc, float dequantizer, float normalizationValue, float maxValue)
-        {
-            dc *= dequantizer;
-            return (float)Math.Round(Numerics.Clamp(dc + normalizationValue, 0, maxValue));
-        }
-
        [MethodImpl(InliningOptions.ShortMethod)]
        public static void ScaledCopyTo(float value, ref float destRef, int destStrideWidth, int horizontalScale, int verticalScale)
        {
--- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs
@ -139,8 +139,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
        public void Encode444<TPixel>(Image<TPixel> pixels, ref Block8x8F luminanceQuantTable, ref Block8x8F chrominanceQuantTable, CancellationToken cancellationToken)
            where TPixel : unmanaged, IPixel<TPixel>
        {
-            FastFloatingPointDCT.AdjustToFDCT(ref luminanceQuantTable);
-            FastFloatingPointDCT.AdjustToFDCT(ref chrominanceQuantTable);
+            FloatingPointDCT.AdjustToFDCT(ref luminanceQuantTable);
+            FloatingPointDCT.AdjustToFDCT(ref chrominanceQuantTable);

            this.huffmanTables = HuffmanLut.TheHuffmanLut;

@ -202,8 +202,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
        public void Encode420<TPixel>(Image<TPixel> pixels, ref Block8x8F luminanceQuantTable, ref Block8x8F chrominanceQuantTable, CancellationToken cancellationToken)
            where TPixel : unmanaged, IPixel<TPixel>
        {
-            FastFloatingPointDCT.AdjustToFDCT(ref luminanceQuantTable);
-            FastFloatingPointDCT.AdjustToFDCT(ref chrominanceQuantTable);
+            FloatingPointDCT.AdjustToFDCT(ref luminanceQuantTable);
+            FloatingPointDCT.AdjustToFDCT(ref chrominanceQuantTable);

            this.huffmanTables = HuffmanLut.TheHuffmanLut;

@ -271,7 +271,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
        public void EncodeGrayscale<TPixel>(Image<TPixel> pixels, ref Block8x8F luminanceQuantTable, CancellationToken cancellationToken)
            where TPixel : unmanaged, IPixel<TPixel>
        {
-            FastFloatingPointDCT.AdjustToFDCT(ref luminanceQuantTable);
+            FloatingPointDCT.AdjustToFDCT(ref luminanceQuantTable);

            this.huffmanTables = HuffmanLut.TheHuffmanLut;

@ -319,7 +319,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
        public void EncodeRgb<TPixel>(Image<TPixel> pixels, ref Block8x8F quantTable, CancellationToken cancellationToken)
            where TPixel : unmanaged, IPixel<TPixel>
        {
-            FastFloatingPointDCT.AdjustToFDCT(ref quantTable);
+            FloatingPointDCT.AdjustToFDCT(ref quantTable);

            this.huffmanTables = HuffmanLut.TheHuffmanLut;

@ -391,7 +391,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
            block.AddInPlace(-128f);

            // Discrete cosine transform
-            FastFloatingPointDCT.TransformFDCT(ref block);
+            FloatingPointDCT.TransformFDCT(ref block);

            // Quantization
            Block8x8F.Quantize(ref block, ref spectralBlock, ref quant);
--- a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.Intrinsic.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.Intrinsic.cs
@ -7,7 +7,7 @@ using System.Runtime.Intrinsics.X86;

 namespace SixLabors.ImageSharp.Formats.Jpeg.Components
 {
-    internal static partial class FastFloatingPointDCT
+    internal static partial class FloatingPointDCT
    {
 #pragma warning disable SA1310, SA1311, IDE1006 // naming rule violation warnings
        private static readonly Vector256<float> mm256_F_0_7071 = Vector256.Create(0.707106781f);
--- a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs
@ -12,9 +12,12 @@ using System.Runtime.Intrinsics.X86;
 namespace SixLabors.ImageSharp.Formats.Jpeg.Components
 {
    /// <summary>
-    /// Contains inaccurate, but fast forward and inverse DCT implementations.
+    /// Contains floating point forward and inverse DCT implementations
    /// </summary>
-    internal static partial class FastFloatingPointDCT
+    /// <remarks>
+    /// Based on "Arai, Agui and Nakajima" algorithm.
+    /// </remarks>
+    internal static partial class FloatingPointDCT
    {
 #pragma warning disable SA1310, SA1311, IDE1006 // naming rules violation warnings
        private static readonly Vector4 mm128_F_0_7071 = new(0.707106781f);
@ -70,8 +73,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
            ref float multipliersRef = ref MemoryMarshal.GetReference<float>(AdjustmentCoefficients);
            for (nint i = 0; i < Block8x8F.Size; i++)
            {
-                tableRef = 0.125f * tableRef * Unsafe.Add(ref multipliersRef, i);
-                tableRef = ref Unsafe.Add(ref tableRef, 1);
+                ref float elemRef = ref Unsafe.Add(ref tableRef, i);
+                elemRef = 0.125f * elemRef * Unsafe.Add(ref multipliersRef, i);
            }

            // Spectral macroblocks are transposed before quantization
@ -89,8 +92,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
            ref float multipliersRef = ref MemoryMarshal.GetReference<float>(AdjustmentCoefficients);
            for (nint i = 0; i < Block8x8F.Size; i++)
            {
-                tableRef = 0.125f / (tableRef * Unsafe.Add(ref multipliersRef, i));
-                tableRef = ref Unsafe.Add(ref tableRef, 1);
+                ref float elemRef = ref Unsafe.Add(ref tableRef, i);
+                elemRef = 0.125f / (elemRef * Unsafe.Add(ref multipliersRef, i));
            }

            // Spectral macroblocks are not transposed before quantization
@ -103,7 +106,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
        /// Apply 2D floating point IDCT inplace.
        /// </summary>
        /// <remarks>
-        /// Input block must be dequantized before this method with table
+        /// Input block must be dequantized with quantization table
        /// adjusted by <see cref="AdjustToIDCT"/>.
        /// </remarks>
        /// <param name="block">Input block.</param>
@ -125,8 +128,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
        /// Apply 2D floating point IDCT inplace.
        /// </summary>
        /// <remarks>
-        /// Input block must be quantized after this method with table adjusted
-        /// by <see cref="AdjustToFDCT"/>.
+        /// Input block must be quantized after this method with quantization
+        /// table adjusted by <see cref="AdjustToFDCT"/>.
        /// </remarks>
        /// <param name="block">Input block.</param>
        public static void TransformFDCT(ref Block8x8F block)
@ -221,7 +224,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
        /// Apply floating point FDCT inplace using <see cref="Vector4"/> API.
        /// </summary>
        /// <param name="block">Input block.</param>
-        public static void FDCT_Vector4(ref Block8x8F block)
+        private static void FDCT_Vector4(ref Block8x8F block)
        {
            // First pass - process columns
            FDCT8x4_Vector4(ref block.V0L);
--- a/src/ImageSharp/Formats/Jpeg/Components/ScaledFloatingPointDCT.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/ScaledFloatingPointDCT.cs
@ -0,0 +1,113 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Runtime.CompilerServices;
+
+namespace SixLabors.ImageSharp.Formats.Jpeg.Components
+{
+    /// <summary>
+    /// Contains floating point forward DCT implementations with built-in scaling.
+    /// </summary>
+    /// <remarks>
+    /// Based on "Loeffler, Ligtenberg, and Moschytz" algorithm.
+    /// </remarks>
+    internal static class ScaledFloatingPointDCT
+    {
+        /// <summary>
+        /// Adjusts given quantization table for usage with IDCT algorithms
+        /// from <see cref="ScaledFloatingPointDCT"/>.
+        /// </summary>
+        /// <param name="quantTable">Quantization table to adjust.</param>
+        public static void AdjustToIDCT(ref Block8x8F quantTable)
+        {
+            ref float tableRef = ref Unsafe.As<Block8x8F, float>(ref quantTable);
+            for (nint i = 0; i < Block8x8F.Size; i++)
+            {
+                ref float elemRef = ref Unsafe.Add(ref tableRef, i);
+                elemRef = 0.125f * elemRef;
+            }
+
+            // Spectral macroblocks are transposed before quantization
+            // so we must transpose quantization table
+            quantTable.TransposeInplace();
+        }
+
+        public static float TransformIDCT_1x1(float dc, float dequantizer, float normalizationValue, float maxValue)
+            => MathF.Round(Numerics.Clamp((dc * dequantizer) + normalizationValue, 0, maxValue));
+
+        public static void TransformIDCT_2x2(ref Block8x8F block, ref Block8x8F dequantTable, float normalizationValue, float maxValue)
+        {
+            float tmp4 = block[0] * dequantTable[0];
+            float tmp5 = block[1] * dequantTable[1];
+            float tmp0 = tmp4 + tmp5;
+            float tmp2 = tmp4 - tmp5;
+
+            tmp4 = block[8] * dequantTable[8];
+            tmp5 = block[9] * dequantTable[9];
+            float tmp1 = tmp4 + tmp5;
+            float tmp3 = tmp4 - tmp5;
+
+            block[0] = MathF.Round(Numerics.Clamp(tmp0 + tmp1 + normalizationValue, 0, maxValue));
+            block[1] = MathF.Round(Numerics.Clamp(tmp0 - tmp1 + normalizationValue, 0, maxValue));
+            block[8] = MathF.Round(Numerics.Clamp(tmp2 + tmp3 + normalizationValue, 0, maxValue));
+            block[9] = MathF.Round(Numerics.Clamp(tmp2 - tmp3 + normalizationValue, 0, maxValue));
+        }
+
+        public static void TransformIDCT_4x4(ref Block8x8F block, ref Block8x8F dequantTable, float normalizationValue, float maxValue)
+        {
+            const int DCTSIZE = 8;
+            const float FIX_0_541196100 = 0.541196100f;
+            const float FIX_0_765366865 = 0.765366865f;
+            const float FIX_1_847759065 = 1.847759065f;
+
+            for (int ctr = 0; ctr < 4; ctr++)
+            {
+                // Even part
+                float tmp0 = block[ctr * DCTSIZE] * dequantTable[ctr * DCTSIZE];
+                float tmp2 = block[(ctr * DCTSIZE) + 2] * dequantTable[(ctr * DCTSIZE) + 2];
+
+                float tmp10 = tmp0 + tmp2;
+                float tmp12 = tmp0 - tmp2;
+
+                // Odd part
+                float z2 = block[(ctr * DCTSIZE) + 1] * dequantTable[(ctr * DCTSIZE) + 1];
+                float z3 = block[(ctr * DCTSIZE) + 3] * dequantTable[(ctr * DCTSIZE) + 3];
+
+                float z1 = (z2 + z3) * FIX_0_541196100;
+                tmp0 = z1 + (z2 * FIX_0_765366865);
+                tmp2 = z1 - (z3 * FIX_1_847759065);
+
+                /* Final output stage */
+                block[ctr + 4] = tmp10 + tmp0;
+                block[ctr + 28] = tmp10 - tmp0;
+                block[ctr + 12] = tmp12 + tmp2;
+                block[ctr + 20] = tmp12 - tmp2;
+            }
+
+            for (int ctr = 0; ctr < 4; ctr++)
+            {
+                // Even part
+                float tmp0 = block[(ctr * 8) + 0 + 4];
+                float tmp2 = block[(ctr * 8) + 2 + 4];
+
+                float tmp10 = tmp0 + tmp2;
+                float tmp12 = tmp0 - tmp2;
+
+                // Odd part
+                float z2 = block[(ctr * 8) + 1 + 4];
+                float z3 = block[(ctr * 8) + 3 + 4];
+
+                float z1 = (z2 + z3) * FIX_0_541196100;
+                tmp0 = z1 + (z2 * FIX_0_765366865);
+                tmp2 = z1 - (z3 * FIX_1_847759065);
+
+                /* Final output stage */
+                block[(ctr * 8) + 0] = MathF.Round(Numerics.Clamp(tmp10 + tmp0 + normalizationValue, 0, maxValue));
+                block[(ctr * 8) + 3] = MathF.Round(Numerics.Clamp(tmp10 - tmp0 + normalizationValue, 0, maxValue));
+                block[(ctr * 8) + 1] = MathF.Round(Numerics.Clamp(tmp12 + tmp2 + normalizationValue, 0, maxValue));
+                block[(ctr * 8) + 2] = MathF.Round(Numerics.Clamp(tmp12 - tmp2 + normalizationValue, 0, maxValue));
+            }
+        }
+    }
+}
--- a/src/ImageSharp/Formats/Jpeg/JpegDecoderCore.cs
+++ b/src/ImageSharp/Formats/Jpeg/JpegDecoderCore.cs
@ -1027,7 +1027,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg
                }

                // Adjusting table for IDCT step during decompression
-                FastFloatingPointDCT.AdjustToIDCT(ref table);
+                FloatingPointDCT.AdjustToIDCT(ref table);
            }
        }