Fixed compilation errors, fixed tests

5 years ago · af0b8ac3dd
2 changed files with 92 additions and 127 deletions
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs
@ -18,11 +18,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
        /// </summary>
        public Block8x8F SourceBlock;

-        /// <summary>
-        /// Temporal block to store intermediate computation results.
-        /// </summary>
-        public Block8x8F WorkspaceBlock;
-
        /// <summary>
        /// The quantization table as <see cref="Block8x8F"/>.
        /// </summary>
@ -45,7 +40,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
            this.subSamplingDivisors = component.SubSamplingDivisors;

            this.SourceBlock = default;
-            this.WorkspaceBlock = default;
        }

        /// <summary>
@ -71,7 +65,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
            // Dequantize:
            block.MultiplyInPlace(ref this.DequantiazationTable);

-            FastFloatingPointDCT.TransformIDCT(ref block, ref this.WorkspaceBlock);
+            FastFloatingPointDCT.TransformIDCT(ref block);

            // To conform better to libjpeg we actually NEED TO loose precision here.
            // This is because they store blocks as Int16 between all the operations.
--- a/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs
+++ b/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs
@ -2,9 +2,6 @@
 // Licensed under the Apache License, Version 2.0.

 using System;
-#if SUPPORTS_RUNTIME_INTRINSICS
-using System.Runtime.Intrinsics.X86;
-#endif
 using SixLabors.ImageSharp.Formats.Jpeg.Components;
 using SixLabors.ImageSharp.Tests.Formats.Jpg.Utils;
 using SixLabors.ImageSharp.Tests.TestUtilities;
@ -17,6 +14,20 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
    [Trait("Format", "Jpg")]
    public static class DCTTests
    {
+        private const int MaxAllowedValue = short.MaxValue;
+        private const int MinAllowedValue = short.MinValue;
+
+        internal static Block8x8F CreateBlockFromScalar(float value)
+        {
+            Block8x8F result = default;
+            for (int i = 0; i < Block8x8F.Size; i++)
+            {
+                result[i] = value;
+            }
+
+            return result;
+        }
+
        public class FastFloatingPoint : JpegFixture
        {
            public FastFloatingPoint(ITestOutputHelper output)
@ -24,130 +35,77 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
            {
            }

-            // Reference tests
            [Theory]
            [InlineData(1)]
            [InlineData(2)]
            [InlineData(3)]
            public void LLM_TransformIDCT_CompareToNonOptimized(int seed)
            {
-                float[] sourceArray = Create8x8RoundedRandomFloatData(-1000, 1000, seed);
+                float[] sourceArray = Create8x8RoundedRandomFloatData(MinAllowedValue, MaxAllowedValue, seed);

                var srcBlock = Block8x8F.Load(sourceArray);

+                // reference
                Block8x8F expected = ReferenceImplementations.LLM_FloatingPoint_DCT.TransformIDCT(ref srcBlock);

-                var temp = default(Block8x8F);
-                FastFloatingPointDCT.TransformIDCT(ref srcBlock, ref temp);
-
-                this.CompareBlocks(expected, srcBlock, 1f);
-            }
-
-            [Theory]
-            [InlineData(1)]
-            [InlineData(2)]
-            [InlineData(3)]
-            public void LLM_TransformIDCT_CompareToAccurate(int seed)
-            {
-                float[] sourceArray = Create8x8RoundedRandomFloatData(-1000, 1000, seed);
+                // testee
+                // Part of the IDCT calculations is fused into the quantization step
+                // We must multiply input block with adjusted no-quantization matrix
+                // before applying IDCT
+                Block8x8F dequantMatrix = CreateBlockFromScalar(1);

-                var srcBlock = Block8x8F.Load(sourceArray);
+                // Dequantization using unit matrix - no values are upscaled
+                // as quant matrix is all 1's
+                // This step is needed to apply adjusting multipliers to the input block
+                FastFloatingPointDCT.AdjustToIDCT(ref dequantMatrix);
+                srcBlock.MultiplyInPlace(ref dequantMatrix);

-                Block8x8F expected = ReferenceImplementations.AccurateDCT.TransformIDCT(ref srcBlock);
+                // IDCT implementation tranforms blocks after transposition
+                srcBlock.TransposeInplace();

-                var temp = default(Block8x8F);
-                FastFloatingPointDCT.TransformIDCT(ref srcBlock, ref temp);
+                // IDCT calculation
+                FastFloatingPointDCT.TransformIDCT(ref srcBlock);

                this.CompareBlocks(expected, srcBlock, 1f);
            }

-            // Inverse transform
-            [Theory]
-            [InlineData(1)]
-            [InlineData(2)]
-            public void IDCT8x4_LeftPart(int seed)
-            {
-                Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed);
-                var srcBlock = default(Block8x8F);
-                srcBlock.LoadFrom(src);
-
-                var destBlock = default(Block8x8F);
-
-                var expectedDest = new float[64];
-
-                // reference
-                ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src, expectedDest);
-
-                // testee
-                FastFloatingPointDCT.IDCT8x4_LeftPart(ref srcBlock, ref destBlock);
-
-                var actualDest = new float[64];
-                destBlock.ScaledCopyTo(actualDest);
-
-                Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
-            }
-
            [Theory]
            [InlineData(1)]
            [InlineData(2)]
-            public void IDCT8x4_RightPart(int seed)
+            [InlineData(3)]
+            public void LLM_TransformIDCT_CompareToAccurate(int seed)
            {
-                Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed);
-                var srcBlock = default(Block8x8F);
-                srcBlock.LoadFrom(src);
+                float[] sourceArray = Create8x8RoundedRandomFloatData(MinAllowedValue, MaxAllowedValue, seed);

-                var destBlock = default(Block8x8F);
-
-                var expectedDest = new float[64];
+                var srcBlock = Block8x8F.Load(sourceArray);

                // reference
-                ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src.Slice(4), expectedDest.AsSpan(4));
+                Block8x8F expected = ReferenceImplementations.AccurateDCT.TransformIDCT(ref srcBlock);

                // testee
-                FastFloatingPointDCT.IDCT8x4_RightPart(ref srcBlock, ref destBlock);
-
-                var actualDest = new float[64];
-                destBlock.ScaledCopyTo(actualDest);
-
-                Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
-            }
-
-            [Theory]
-            [InlineData(1)]
-            [InlineData(2)]
-            public void IDCT8x8_Avx(int seed)
-            {
-#if SUPPORTS_RUNTIME_INTRINSICS
-                if (!Avx.IsSupported)
-                {
-                    this.Output.WriteLine("No AVX present, skipping test!");
-                    return;
-                }
-
-                Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed);
-                Block8x8F srcBlock = default;
-                srcBlock.LoadFrom(src);
+                // Part of the IDCT calculations is fused into the quantization step
+                // We must multiply input block with adjusted no-quantization matrix
+                // before applying IDCT
+                Block8x8F dequantMatrix = CreateBlockFromScalar(1);

-                Block8x8F destBlock = default;
+                // Dequantization using unit matrix - no values are upscaled
+                // as quant matrix is all 1's
+                // This step is needed to apply adjusting multipliers to the input block
+                FastFloatingPointDCT.AdjustToIDCT(ref dequantMatrix);
+                srcBlock.MultiplyInPlace(ref dequantMatrix);

-                float[] expectedDest = new float[64];
+                // IDCT implementation tranforms blocks after transposition
+                srcBlock.TransposeInplace();

-                // reference, left part
-                ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src, expectedDest);
+                // IDCT calculation
+                FastFloatingPointDCT.TransformIDCT(ref srcBlock);

-                // reference, right part
-                ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src.Slice(4), expectedDest.AsSpan(4));
-
-                // testee, whole 8x8
-                FastFloatingPointDCT.IDCT8x8_Avx(ref srcBlock, ref destBlock);
-
-                float[] actualDest = new float[64];
-                destBlock.ScaledCopyTo(actualDest);
-
-                Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
-#endif
+                this.CompareBlocks(expected, srcBlock, 1f);
            }

+            // Inverse transform
+            // This test covers entire IDCT conversion chain
+            // This test checks all hardware implementations
            [Theory]
            [InlineData(1)]
            [InlineData(2)]
@ -157,41 +115,53 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
                {
                    int seed = FeatureTestRunner.Deserialize<int>(serialized);

-                    Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed);
+                    Span<float> src = Create8x8RoundedRandomFloatData(MinAllowedValue, MaxAllowedValue, seed);
                    var srcBlock = default(Block8x8F);
                    srcBlock.LoadFrom(src);

-                    var expectedDest = new float[64];
-                    var temp1 = new float[64];
-                    var temp2 = default(Block8x8F);
+                    float[] expectedDest = new float[64];
+                    float[] temp = new float[64];

                    // reference
-                    ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D_llm(src, expectedDest, temp1);
+                    ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D_llm(src, expectedDest, temp);

                    // testee
-                    FastFloatingPointDCT.TransformIDCT(ref srcBlock, ref temp2);
+                    // Part of the IDCT calculations is fused into the quantization step
+                    // We must multiply input block with adjusted no-quantization matrix
+                    // before applying IDCT
+                    Block8x8F dequantMatrix = CreateBlockFromScalar(1);
+
+                    // Dequantization using unit matrix - no values are upscaled
+                    // as quant matrix is all 1's
+                    // This step is needed to apply adjusting multipliers to the input block
+                    FastFloatingPointDCT.AdjustToIDCT(ref dequantMatrix);
+                    srcBlock.MultiplyInPlace(ref dequantMatrix);
+
+                    // IDCT implementation tranforms blocks after transposition
+                    srcBlock.TransposeInplace();

-                    var actualDest = new float[64];
-                    srcBlock.ScaledCopyTo(actualDest);
+                    // IDCT calculation
+                    FastFloatingPointDCT.TransformIDCT(ref srcBlock);
+
+                    float[] actualDest = srcBlock.ToArray();

                    Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
                }

-                // 3 paths:
+                // 4 paths:
                // 1. AllowAll - call avx/fma implementation
-                // 2. DisableFMA - call avx implementation without fma acceleration
-                // 3. DisableAvx - call fallback code of Vector4 implementation
-                //
-                // DisableSSE isn't needed because fallback Vector4 code will compile to either sse or fallback code with same result
+                // 2. DisableFMA - call avx without fma implementation
+                // 3. DisableAvx - call sse Vector4 implementation
+                // 4. DisableHWIntrinsic - call scalar fallback implementation
                FeatureTestRunner.RunWithHwIntrinsicsFeature(
                    RunTest,
                    seed,
-                    HwIntrinsics.AllowAll | HwIntrinsics.DisableFMA | HwIntrinsics.DisableAVX);
+                    HwIntrinsics.AllowAll | HwIntrinsics.DisableFMA | HwIntrinsics.DisableAVX | HwIntrinsics.DisableHWIntrinsic);
            }

            // Forward transform
-            // This test covers entire FDCT conversions chain
-            // This test checks all implementations: intrinsic and scalar fallback
+            // This test covers entire FDCT conversion chain
+            // This test checks all hardware implementations
            [Theory]
            [InlineData(1)]
            [InlineData(2)]
@ -201,7 +171,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
                {
                    int seed = FeatureTestRunner.Deserialize<int>(serialized);

-                    Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed);
+                    Span<float> src = Create8x8RoundedRandomFloatData(MinAllowedValue, MaxAllowedValue, seed);
                    var block = default(Block8x8F);
                    block.LoadFrom(src);

@ -212,23 +182,24 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
                    ReferenceImplementations.LLM_FloatingPoint_DCT.FDCT2D_llm(src, expectedDest, temp1, downscaleBy8: true);

                    // testee
-                    // Part of the FDCT calculations is fused into the quantization step
-                    // We must multiply transformed block with reciprocal values from FastFloatingPointDCT.ANN_DCT_reciprocalAdjustmen
                    FastFloatingPointDCT.TransformFDCT(ref block);
-                    for (int i = 0; i < 64; i++)
-                    {
-                        block[i] = block[i] * FastFloatingPointDCT.DctReciprocalAdjustmentCoefficients[i];
-                    }
+
+                    // Part of the IDCT calculations is fused into the quantization step
+                    // We must multiply input block with adjusted no-quantization matrix
+                    // after applying FDCT
+                    Block8x8F quantMatrix = CreateBlockFromScalar(1);
+                    FastFloatingPointDCT.AdjustToFDCT(ref quantMatrix);
+                    block.MultiplyInPlace(ref quantMatrix);

                    float[] actualDest = block.ToArray();

                    Assert.Equal(expectedDest, actualDest, new ApproximateFloatComparer(1f));
                }

-                // 3 paths:
+                // 4 paths:
                // 1. AllowAll - call avx/fma implementation
-                // 2. DisableFMA - call avx implementation without fma acceleration
-                // 3. DisableAvx - call sse implementation
+                // 2. DisableFMA - call avx without fma implementation
+                // 3. DisableAvx - call sse Vector4 implementation
                // 4. DisableHWIntrinsic - call scalar fallback implementation
                FeatureTestRunner.RunWithHwIntrinsicsFeature(
                    RunTest,