From af0b8ac3dd10e7dadd88a4baa84ab298ef2c4b59 Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Sun, 21 Nov 2021 21:46:11 +0300 Subject: [PATCH] Fixed compilation errors, fixed tests --- .../Decoder/JpegBlockPostProcessor.cs | 8 +- .../ImageSharp.Tests/Formats/Jpg/DCTTests.cs | 211 ++++++++---------- 2 files changed, 92 insertions(+), 127 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs index 085cd4a291..15f212b400 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs @@ -18,11 +18,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder /// public Block8x8F SourceBlock; - /// - /// Temporal block to store intermediate computation results. - /// - public Block8x8F WorkspaceBlock; - /// /// The quantization table as . /// @@ -45,7 +40,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder this.subSamplingDivisors = component.SubSamplingDivisors; this.SourceBlock = default; - this.WorkspaceBlock = default; } /// @@ -71,7 +65,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder // Dequantize: block.MultiplyInPlace(ref this.DequantiazationTable); - FastFloatingPointDCT.TransformIDCT(ref block, ref this.WorkspaceBlock); + FastFloatingPointDCT.TransformIDCT(ref block); // To conform better to libjpeg we actually NEED TO loose precision here. // This is because they store blocks as Int16 between all the operations. diff --git a/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs index 0a49d20cd4..3a6eb4f8bf 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs @@ -2,9 +2,6 @@ // Licensed under the Apache License, Version 2.0. using System; -#if SUPPORTS_RUNTIME_INTRINSICS -using System.Runtime.Intrinsics.X86; -#endif using SixLabors.ImageSharp.Formats.Jpeg.Components; using SixLabors.ImageSharp.Tests.Formats.Jpg.Utils; using SixLabors.ImageSharp.Tests.TestUtilities; @@ -17,6 +14,20 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg [Trait("Format", "Jpg")] public static class DCTTests { + private const int MaxAllowedValue = short.MaxValue; + private const int MinAllowedValue = short.MinValue; + + internal static Block8x8F CreateBlockFromScalar(float value) + { + Block8x8F result = default; + for (int i = 0; i < Block8x8F.Size; i++) + { + result[i] = value; + } + + return result; + } + public class FastFloatingPoint : JpegFixture { public FastFloatingPoint(ITestOutputHelper output) @@ -24,130 +35,77 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg { } - // Reference tests [Theory] [InlineData(1)] [InlineData(2)] [InlineData(3)] public void LLM_TransformIDCT_CompareToNonOptimized(int seed) { - float[] sourceArray = Create8x8RoundedRandomFloatData(-1000, 1000, seed); + float[] sourceArray = Create8x8RoundedRandomFloatData(MinAllowedValue, MaxAllowedValue, seed); var srcBlock = Block8x8F.Load(sourceArray); + // reference Block8x8F expected = ReferenceImplementations.LLM_FloatingPoint_DCT.TransformIDCT(ref srcBlock); - var temp = default(Block8x8F); - FastFloatingPointDCT.TransformIDCT(ref srcBlock, ref temp); - - this.CompareBlocks(expected, srcBlock, 1f); - } - - [Theory] - [InlineData(1)] - [InlineData(2)] - [InlineData(3)] - public void LLM_TransformIDCT_CompareToAccurate(int seed) - { - float[] sourceArray = Create8x8RoundedRandomFloatData(-1000, 1000, seed); + // testee + // Part of the IDCT calculations is fused into the quantization step + // We must multiply input block with adjusted no-quantization matrix + // before applying IDCT + Block8x8F dequantMatrix = CreateBlockFromScalar(1); - var srcBlock = Block8x8F.Load(sourceArray); + // Dequantization using unit matrix - no values are upscaled + // as quant matrix is all 1's + // This step is needed to apply adjusting multipliers to the input block + FastFloatingPointDCT.AdjustToIDCT(ref dequantMatrix); + srcBlock.MultiplyInPlace(ref dequantMatrix); - Block8x8F expected = ReferenceImplementations.AccurateDCT.TransformIDCT(ref srcBlock); + // IDCT implementation tranforms blocks after transposition + srcBlock.TransposeInplace(); - var temp = default(Block8x8F); - FastFloatingPointDCT.TransformIDCT(ref srcBlock, ref temp); + // IDCT calculation + FastFloatingPointDCT.TransformIDCT(ref srcBlock); this.CompareBlocks(expected, srcBlock, 1f); } - // Inverse transform - [Theory] - [InlineData(1)] - [InlineData(2)] - public void IDCT8x4_LeftPart(int seed) - { - Span src = Create8x8RoundedRandomFloatData(-200, 200, seed); - var srcBlock = default(Block8x8F); - srcBlock.LoadFrom(src); - - var destBlock = default(Block8x8F); - - var expectedDest = new float[64]; - - // reference - ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src, expectedDest); - - // testee - FastFloatingPointDCT.IDCT8x4_LeftPart(ref srcBlock, ref destBlock); - - var actualDest = new float[64]; - destBlock.ScaledCopyTo(actualDest); - - Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f)); - } - [Theory] [InlineData(1)] [InlineData(2)] - public void IDCT8x4_RightPart(int seed) + [InlineData(3)] + public void LLM_TransformIDCT_CompareToAccurate(int seed) { - Span src = Create8x8RoundedRandomFloatData(-200, 200, seed); - var srcBlock = default(Block8x8F); - srcBlock.LoadFrom(src); + float[] sourceArray = Create8x8RoundedRandomFloatData(MinAllowedValue, MaxAllowedValue, seed); - var destBlock = default(Block8x8F); - - var expectedDest = new float[64]; + var srcBlock = Block8x8F.Load(sourceArray); // reference - ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src.Slice(4), expectedDest.AsSpan(4)); + Block8x8F expected = ReferenceImplementations.AccurateDCT.TransformIDCT(ref srcBlock); // testee - FastFloatingPointDCT.IDCT8x4_RightPart(ref srcBlock, ref destBlock); - - var actualDest = new float[64]; - destBlock.ScaledCopyTo(actualDest); - - Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f)); - } - - [Theory] - [InlineData(1)] - [InlineData(2)] - public void IDCT8x8_Avx(int seed) - { -#if SUPPORTS_RUNTIME_INTRINSICS - if (!Avx.IsSupported) - { - this.Output.WriteLine("No AVX present, skipping test!"); - return; - } - - Span src = Create8x8RoundedRandomFloatData(-200, 200, seed); - Block8x8F srcBlock = default; - srcBlock.LoadFrom(src); + // Part of the IDCT calculations is fused into the quantization step + // We must multiply input block with adjusted no-quantization matrix + // before applying IDCT + Block8x8F dequantMatrix = CreateBlockFromScalar(1); - Block8x8F destBlock = default; + // Dequantization using unit matrix - no values are upscaled + // as quant matrix is all 1's + // This step is needed to apply adjusting multipliers to the input block + FastFloatingPointDCT.AdjustToIDCT(ref dequantMatrix); + srcBlock.MultiplyInPlace(ref dequantMatrix); - float[] expectedDest = new float[64]; + // IDCT implementation tranforms blocks after transposition + srcBlock.TransposeInplace(); - // reference, left part - ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src, expectedDest); + // IDCT calculation + FastFloatingPointDCT.TransformIDCT(ref srcBlock); - // reference, right part - ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src.Slice(4), expectedDest.AsSpan(4)); - - // testee, whole 8x8 - FastFloatingPointDCT.IDCT8x8_Avx(ref srcBlock, ref destBlock); - - float[] actualDest = new float[64]; - destBlock.ScaledCopyTo(actualDest); - - Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f)); -#endif + this.CompareBlocks(expected, srcBlock, 1f); } + // Inverse transform + // This test covers entire IDCT conversion chain + // This test checks all hardware implementations [Theory] [InlineData(1)] [InlineData(2)] @@ -157,41 +115,53 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg { int seed = FeatureTestRunner.Deserialize(serialized); - Span src = Create8x8RoundedRandomFloatData(-200, 200, seed); + Span src = Create8x8RoundedRandomFloatData(MinAllowedValue, MaxAllowedValue, seed); var srcBlock = default(Block8x8F); srcBlock.LoadFrom(src); - var expectedDest = new float[64]; - var temp1 = new float[64]; - var temp2 = default(Block8x8F); + float[] expectedDest = new float[64]; + float[] temp = new float[64]; // reference - ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D_llm(src, expectedDest, temp1); + ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D_llm(src, expectedDest, temp); // testee - FastFloatingPointDCT.TransformIDCT(ref srcBlock, ref temp2); + // Part of the IDCT calculations is fused into the quantization step + // We must multiply input block with adjusted no-quantization matrix + // before applying IDCT + Block8x8F dequantMatrix = CreateBlockFromScalar(1); + + // Dequantization using unit matrix - no values are upscaled + // as quant matrix is all 1's + // This step is needed to apply adjusting multipliers to the input block + FastFloatingPointDCT.AdjustToIDCT(ref dequantMatrix); + srcBlock.MultiplyInPlace(ref dequantMatrix); + + // IDCT implementation tranforms blocks after transposition + srcBlock.TransposeInplace(); - var actualDest = new float[64]; - srcBlock.ScaledCopyTo(actualDest); + // IDCT calculation + FastFloatingPointDCT.TransformIDCT(ref srcBlock); + + float[] actualDest = srcBlock.ToArray(); Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f)); } - // 3 paths: + // 4 paths: // 1. AllowAll - call avx/fma implementation - // 2. DisableFMA - call avx implementation without fma acceleration - // 3. DisableAvx - call fallback code of Vector4 implementation - // - // DisableSSE isn't needed because fallback Vector4 code will compile to either sse or fallback code with same result + // 2. DisableFMA - call avx without fma implementation + // 3. DisableAvx - call sse Vector4 implementation + // 4. DisableHWIntrinsic - call scalar fallback implementation FeatureTestRunner.RunWithHwIntrinsicsFeature( RunTest, seed, - HwIntrinsics.AllowAll | HwIntrinsics.DisableFMA | HwIntrinsics.DisableAVX); + HwIntrinsics.AllowAll | HwIntrinsics.DisableFMA | HwIntrinsics.DisableAVX | HwIntrinsics.DisableHWIntrinsic); } // Forward transform - // This test covers entire FDCT conversions chain - // This test checks all implementations: intrinsic and scalar fallback + // This test covers entire FDCT conversion chain + // This test checks all hardware implementations [Theory] [InlineData(1)] [InlineData(2)] @@ -201,7 +171,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg { int seed = FeatureTestRunner.Deserialize(serialized); - Span src = Create8x8RoundedRandomFloatData(-200, 200, seed); + Span src = Create8x8RoundedRandomFloatData(MinAllowedValue, MaxAllowedValue, seed); var block = default(Block8x8F); block.LoadFrom(src); @@ -212,23 +182,24 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg ReferenceImplementations.LLM_FloatingPoint_DCT.FDCT2D_llm(src, expectedDest, temp1, downscaleBy8: true); // testee - // Part of the FDCT calculations is fused into the quantization step - // We must multiply transformed block with reciprocal values from FastFloatingPointDCT.ANN_DCT_reciprocalAdjustmen FastFloatingPointDCT.TransformFDCT(ref block); - for (int i = 0; i < 64; i++) - { - block[i] = block[i] * FastFloatingPointDCT.DctReciprocalAdjustmentCoefficients[i]; - } + + // Part of the IDCT calculations is fused into the quantization step + // We must multiply input block with adjusted no-quantization matrix + // after applying FDCT + Block8x8F quantMatrix = CreateBlockFromScalar(1); + FastFloatingPointDCT.AdjustToFDCT(ref quantMatrix); + block.MultiplyInPlace(ref quantMatrix); float[] actualDest = block.ToArray(); Assert.Equal(expectedDest, actualDest, new ApproximateFloatComparer(1f)); } - // 3 paths: + // 4 paths: // 1. AllowAll - call avx/fma implementation - // 2. DisableFMA - call avx implementation without fma acceleration - // 3. DisableAvx - call sse implementation + // 2. DisableFMA - call avx without fma implementation + // 3. DisableAvx - call sse Vector4 implementation // 4. DisableHWIntrinsic - call scalar fallback implementation FeatureTestRunner.RunWithHwIntrinsicsFeature( RunTest,