Fixed compilation errors, fixed tests

5 years ago · af0b8ac3dd
2 changed files with 92 additions and 127 deletions
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs
@ -18,11 +18,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
        /// </summary>
        public Block8x8F SourceBlock;
        /// <summary>
        /// Temporal block to store intermediate computation results.
        /// </summary>
        public Block8x8F WorkspaceBlock;
        /// <summary>
        /// The quantization table as <see cref="Block8x8F"/>.
        /// </summary>
@ -45,7 +40,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
            this.subSamplingDivisors = component.SubSamplingDivisors;
            this.SourceBlock = default;
            this.WorkspaceBlock = default;
        }
        /// <summary>
@ -71,7 +65,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
            // Dequantize:
            block.MultiplyInPlace(ref this.DequantiazationTable);
-            FastFloatingPointDCT.TransformIDCT(ref block, ref this.WorkspaceBlock);
+            FastFloatingPointDCT.TransformIDCT(ref block);
            // To conform better to libjpeg we actually NEED TO loose precision here.
            // This is because they store blocks as Int16 between all the operations.
--- a/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs
+++ b/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs
@ -2,9 +2,6 @@
 // Licensed under the Apache License, Version 2.0.
 using System;
 #if SUPPORTS_RUNTIME_INTRINSICS
 using System.Runtime.Intrinsics.X86;
 #endif
 using SixLabors.ImageSharp.Formats.Jpeg.Components;
 using SixLabors.ImageSharp.Tests.Formats.Jpg.Utils;
 using SixLabors.ImageSharp.Tests.TestUtilities;
@ -17,6 +14,20 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
    [Trait("Format", "Jpg")]
    public static class DCTTests
    {
        private const int MaxAllowedValue = short.MaxValue;
        private const int MinAllowedValue = short.MinValue;
        internal static Block8x8F CreateBlockFromScalar(float value)
        {
            Block8x8F result = default;
            for (int i = 0; i < Block8x8F.Size; i++)
            {
                result[i] = value;
            }
            return result;
        }
        public class FastFloatingPoint : JpegFixture
        {
            public FastFloatingPoint(ITestOutputHelper output)
@ -24,130 +35,77 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
            {
            }
            // Reference tests
            [Theory]
            [InlineData(1)]
            [InlineData(2)]
            [InlineData(3)]
            public void LLM_TransformIDCT_CompareToNonOptimized(int seed)
            {
-                float[] sourceArray = Create8x8RoundedRandomFloatData(-1000, 1000, seed);
+                float[] sourceArray = Create8x8RoundedRandomFloatData(MinAllowedValue, MaxAllowedValue, seed);
                var srcBlock = Block8x8F.Load(sourceArray);
                // reference
                Block8x8F expected = ReferenceImplementations.LLM_FloatingPoint_DCT.TransformIDCT(ref srcBlock);
-                var temp = default(Block8x8F);
+                // testee
-                FastFloatingPointDCT.TransformIDCT(ref srcBlock, ref temp);
+                // Part of the IDCT calculations is fused into the quantization step
-
+                // We must multiply input block with adjusted no-quantization matrix
-                this.CompareBlocks(expected, srcBlock, 1f);
+                // before applying IDCT
-            }
+                Block8x8F dequantMatrix = CreateBlockFromScalar(1);
            [Theory]
            [InlineData(1)]
            [InlineData(2)]
            [InlineData(3)]
            public void LLM_TransformIDCT_CompareToAccurate(int seed)
            {
                float[] sourceArray = Create8x8RoundedRandomFloatData(-1000, 1000, seed);
-                var srcBlock = Block8x8F.Load(sourceArray);
+                // Dequantization using unit matrix - no values are upscaled
                // as quant matrix is all 1's
                // This step is needed to apply adjusting multipliers to the input block
                FastFloatingPointDCT.AdjustToIDCT(ref dequantMatrix);
                srcBlock.MultiplyInPlace(ref dequantMatrix);
-                Block8x8F expected = ReferenceImplementations.AccurateDCT.TransformIDCT(ref srcBlock);
+                // IDCT implementation tranforms blocks after transposition
                srcBlock.TransposeInplace();
-                var temp = default(Block8x8F);
+                // IDCT calculation
-                FastFloatingPointDCT.TransformIDCT(ref srcBlock, ref temp);
+                FastFloatingPointDCT.TransformIDCT(ref srcBlock);
                this.CompareBlocks(expected, srcBlock, 1f);
            }
            // Inverse transform
            [Theory]
            [InlineData(1)]
            [InlineData(2)]
            public void IDCT8x4_LeftPart(int seed)
            {
                Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed);
                var srcBlock = default(Block8x8F);
                srcBlock.LoadFrom(src);
                var destBlock = default(Block8x8F);
                var expectedDest = new float[64];
                // reference
                ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src, expectedDest);
                // testee
                FastFloatingPointDCT.IDCT8x4_LeftPart(ref srcBlock, ref destBlock);
                var actualDest = new float[64];
                destBlock.ScaledCopyTo(actualDest);
                Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
            }
            [Theory]
            [InlineData(1)]
            [InlineData(2)]
-            public void IDCT8x4_RightPart(int seed)
+            [InlineData(3)]
            public void LLM_TransformIDCT_CompareToAccurate(int seed)
            {
-                Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed);
+                float[] sourceArray = Create8x8RoundedRandomFloatData(MinAllowedValue, MaxAllowedValue, seed);
                var srcBlock = default(Block8x8F);
                srcBlock.LoadFrom(src);
-                var destBlock = default(Block8x8F);
+                var srcBlock = Block8x8F.Load(sourceArray);
                var expectedDest = new float[64];
                // reference
-                ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src.Slice(4), expectedDest.AsSpan(4));
+                Block8x8F expected = ReferenceImplementations.AccurateDCT.TransformIDCT(ref srcBlock);
                // testee
-                FastFloatingPointDCT.IDCT8x4_RightPart(ref srcBlock, ref destBlock);
+                // Part of the IDCT calculations is fused into the quantization step
-
+                // We must multiply input block with adjusted no-quantization matrix
-                var actualDest = new float[64];
+                // before applying IDCT
-                destBlock.ScaledCopyTo(actualDest);
+                Block8x8F dequantMatrix = CreateBlockFromScalar(1);
                Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
            }
            [Theory]
            [InlineData(1)]
            [InlineData(2)]
            public void IDCT8x8_Avx(int seed)
            {
 #if SUPPORTS_RUNTIME_INTRINSICS
                if (!Avx.IsSupported)
                {
                    this.Output.WriteLine("No AVX present, skipping test!");
                    return;
                }
                Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed);
                Block8x8F srcBlock = default;
                srcBlock.LoadFrom(src);
-                Block8x8F destBlock = default;
+                // Dequantization using unit matrix - no values are upscaled
                // as quant matrix is all 1's
                // This step is needed to apply adjusting multipliers to the input block
                FastFloatingPointDCT.AdjustToIDCT(ref dequantMatrix);
                srcBlock.MultiplyInPlace(ref dequantMatrix);
-                float[] expectedDest = new float[64];
+                // IDCT implementation tranforms blocks after transposition
                srcBlock.TransposeInplace();
-                // reference, left part
+                // IDCT calculation
-                ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src, expectedDest);
+                FastFloatingPointDCT.TransformIDCT(ref srcBlock);
-                // reference, right part
+                this.CompareBlocks(expected, srcBlock, 1f);
                ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src.Slice(4), expectedDest.AsSpan(4));
                // testee, whole 8x8
                FastFloatingPointDCT.IDCT8x8_Avx(ref srcBlock, ref destBlock);
                float[] actualDest = new float[64];
                destBlock.ScaledCopyTo(actualDest);
                Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
 #endif
            }
            // Inverse transform
            // This test covers entire IDCT conversion chain
            // This test checks all hardware implementations
            [Theory]
            [InlineData(1)]
            [InlineData(2)]
@ -157,41 +115,53 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
                {
                    int seed = FeatureTestRunner.Deserialize<int>(serialized);
-                    Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed);
+                    Span<float> src = Create8x8RoundedRandomFloatData(MinAllowedValue, MaxAllowedValue, seed);
                    var srcBlock = default(Block8x8F);
                    srcBlock.LoadFrom(src);
-                    var expectedDest = new float[64];
+                    float[] expectedDest = new float[64];
-                    var temp1 = new float[64];
+                    float[] temp = new float[64];
                    var temp2 = default(Block8x8F);
                    // reference
-                    ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D_llm(src, expectedDest, temp1);
+                    ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D_llm(src, expectedDest, temp);
                    // testee
-                    FastFloatingPointDCT.TransformIDCT(ref srcBlock, ref temp2);
+                    // Part of the IDCT calculations is fused into the quantization step
                    // We must multiply input block with adjusted no-quantization matrix
                    // before applying IDCT
                    Block8x8F dequantMatrix = CreateBlockFromScalar(1);
                    // Dequantization using unit matrix - no values are upscaled
                    // as quant matrix is all 1's
                    // This step is needed to apply adjusting multipliers to the input block
                    FastFloatingPointDCT.AdjustToIDCT(ref dequantMatrix);
                    srcBlock.MultiplyInPlace(ref dequantMatrix);
                    // IDCT implementation tranforms blocks after transposition
                    srcBlock.TransposeInplace();
-                    var actualDest = new float[64];
+                    // IDCT calculation
-                    srcBlock.ScaledCopyTo(actualDest);
+                    FastFloatingPointDCT.TransformIDCT(ref srcBlock);
                    float[] actualDest = srcBlock.ToArray();
                    Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
                }
-                // 3 paths:
+                // 4 paths:
                // 1. AllowAll - call avx/fma implementation
-                // 2. DisableFMA - call avx implementation without fma acceleration
+                // 2. DisableFMA - call avx without fma implementation
-                // 3. DisableAvx - call fallback code of Vector4 implementation
+                // 3. DisableAvx - call sse Vector4 implementation
-                //
+                // 4. DisableHWIntrinsic - call scalar fallback implementation
                // DisableSSE isn't needed because fallback Vector4 code will compile to either sse or fallback code with same result
                FeatureTestRunner.RunWithHwIntrinsicsFeature(
                    RunTest,
                    seed,
-                    HwIntrinsics.AllowAll | HwIntrinsics.DisableFMA | HwIntrinsics.DisableAVX);
+                    HwIntrinsics.AllowAll | HwIntrinsics.DisableFMA | HwIntrinsics.DisableAVX | HwIntrinsics.DisableHWIntrinsic);
            }
            // Forward transform
-            // This test covers entire FDCT conversions chain
+            // This test covers entire FDCT conversion chain
-            // This test checks all implementations: intrinsic and scalar fallback
+            // This test checks all hardware implementations
            [Theory]
            [InlineData(1)]
            [InlineData(2)]
@ -201,7 +171,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
                {
                    int seed = FeatureTestRunner.Deserialize<int>(serialized);
-                    Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed);
+                    Span<float> src = Create8x8RoundedRandomFloatData(MinAllowedValue, MaxAllowedValue, seed);
                    var block = default(Block8x8F);
                    block.LoadFrom(src);
@ -212,23 +182,24 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
                    ReferenceImplementations.LLM_FloatingPoint_DCT.FDCT2D_llm(src, expectedDest, temp1, downscaleBy8: true);
                    // testee
                    // Part of the FDCT calculations is fused into the quantization step
                    // We must multiply transformed block with reciprocal values from FastFloatingPointDCT.ANN_DCT_reciprocalAdjustmen
                    FastFloatingPointDCT.TransformFDCT(ref block);
-                    for (int i = 0; i < 64; i++)
+
-                    {
+                    // Part of the IDCT calculations is fused into the quantization step
-                        block[i] = block[i] * FastFloatingPointDCT.DctReciprocalAdjustmentCoefficients[i];
+                    // We must multiply input block with adjusted no-quantization matrix
-                    }
+                    // after applying FDCT
                    Block8x8F quantMatrix = CreateBlockFromScalar(1);
                    FastFloatingPointDCT.AdjustToFDCT(ref quantMatrix);
                    block.MultiplyInPlace(ref quantMatrix);
                    float[] actualDest = block.ToArray();
                    Assert.Equal(expectedDest, actualDest, new ApproximateFloatComparer(1f));
                }
-                // 3 paths:
+                // 4 paths:
                // 1. AllowAll - call avx/fma implementation
-                // 2. DisableFMA - call avx implementation without fma acceleration
+                // 2. DisableFMA - call avx without fma implementation
-                // 3. DisableAvx - call sse implementation
+                // 3. DisableAvx - call sse Vector4 implementation
                // 4. DisableHWIntrinsic - call scalar fallback implementation
                FeatureTestRunner.RunWithHwIntrinsicsFeature(
                    RunTest,