diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs
index 085cd4a291..15f212b400 100644
--- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs
+++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs
@@ -18,11 +18,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
///
public Block8x8F SourceBlock;
- ///
- /// Temporal block to store intermediate computation results.
- ///
- public Block8x8F WorkspaceBlock;
-
///
/// The quantization table as .
///
@@ -45,7 +40,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
this.subSamplingDivisors = component.SubSamplingDivisors;
this.SourceBlock = default;
- this.WorkspaceBlock = default;
}
///
@@ -71,7 +65,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder
// Dequantize:
block.MultiplyInPlace(ref this.DequantiazationTable);
- FastFloatingPointDCT.TransformIDCT(ref block, ref this.WorkspaceBlock);
+ FastFloatingPointDCT.TransformIDCT(ref block);
// To conform better to libjpeg we actually NEED TO loose precision here.
// This is because they store blocks as Int16 between all the operations.
diff --git a/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs
index 0a49d20cd4..3a6eb4f8bf 100644
--- a/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs
+++ b/tests/ImageSharp.Tests/Formats/Jpg/DCTTests.cs
@@ -2,9 +2,6 @@
// Licensed under the Apache License, Version 2.0.
using System;
-#if SUPPORTS_RUNTIME_INTRINSICS
-using System.Runtime.Intrinsics.X86;
-#endif
using SixLabors.ImageSharp.Formats.Jpeg.Components;
using SixLabors.ImageSharp.Tests.Formats.Jpg.Utils;
using SixLabors.ImageSharp.Tests.TestUtilities;
@@ -17,6 +14,20 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
[Trait("Format", "Jpg")]
public static class DCTTests
{
+ private const int MaxAllowedValue = short.MaxValue;
+ private const int MinAllowedValue = short.MinValue;
+
+ internal static Block8x8F CreateBlockFromScalar(float value)
+ {
+ Block8x8F result = default;
+ for (int i = 0; i < Block8x8F.Size; i++)
+ {
+ result[i] = value;
+ }
+
+ return result;
+ }
+
public class FastFloatingPoint : JpegFixture
{
public FastFloatingPoint(ITestOutputHelper output)
@@ -24,130 +35,77 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
{
}
- // Reference tests
[Theory]
[InlineData(1)]
[InlineData(2)]
[InlineData(3)]
public void LLM_TransformIDCT_CompareToNonOptimized(int seed)
{
- float[] sourceArray = Create8x8RoundedRandomFloatData(-1000, 1000, seed);
+ float[] sourceArray = Create8x8RoundedRandomFloatData(MinAllowedValue, MaxAllowedValue, seed);
var srcBlock = Block8x8F.Load(sourceArray);
+ // reference
Block8x8F expected = ReferenceImplementations.LLM_FloatingPoint_DCT.TransformIDCT(ref srcBlock);
- var temp = default(Block8x8F);
- FastFloatingPointDCT.TransformIDCT(ref srcBlock, ref temp);
-
- this.CompareBlocks(expected, srcBlock, 1f);
- }
-
- [Theory]
- [InlineData(1)]
- [InlineData(2)]
- [InlineData(3)]
- public void LLM_TransformIDCT_CompareToAccurate(int seed)
- {
- float[] sourceArray = Create8x8RoundedRandomFloatData(-1000, 1000, seed);
+ // testee
+ // Part of the IDCT calculations is fused into the quantization step
+ // We must multiply input block with adjusted no-quantization matrix
+ // before applying IDCT
+ Block8x8F dequantMatrix = CreateBlockFromScalar(1);
- var srcBlock = Block8x8F.Load(sourceArray);
+ // Dequantization using unit matrix - no values are upscaled
+ // as quant matrix is all 1's
+ // This step is needed to apply adjusting multipliers to the input block
+ FastFloatingPointDCT.AdjustToIDCT(ref dequantMatrix);
+ srcBlock.MultiplyInPlace(ref dequantMatrix);
- Block8x8F expected = ReferenceImplementations.AccurateDCT.TransformIDCT(ref srcBlock);
+ // IDCT implementation tranforms blocks after transposition
+ srcBlock.TransposeInplace();
- var temp = default(Block8x8F);
- FastFloatingPointDCT.TransformIDCT(ref srcBlock, ref temp);
+ // IDCT calculation
+ FastFloatingPointDCT.TransformIDCT(ref srcBlock);
this.CompareBlocks(expected, srcBlock, 1f);
}
- // Inverse transform
- [Theory]
- [InlineData(1)]
- [InlineData(2)]
- public void IDCT8x4_LeftPart(int seed)
- {
- Span src = Create8x8RoundedRandomFloatData(-200, 200, seed);
- var srcBlock = default(Block8x8F);
- srcBlock.LoadFrom(src);
-
- var destBlock = default(Block8x8F);
-
- var expectedDest = new float[64];
-
- // reference
- ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src, expectedDest);
-
- // testee
- FastFloatingPointDCT.IDCT8x4_LeftPart(ref srcBlock, ref destBlock);
-
- var actualDest = new float[64];
- destBlock.ScaledCopyTo(actualDest);
-
- Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
- }
-
[Theory]
[InlineData(1)]
[InlineData(2)]
- public void IDCT8x4_RightPart(int seed)
+ [InlineData(3)]
+ public void LLM_TransformIDCT_CompareToAccurate(int seed)
{
- Span src = Create8x8RoundedRandomFloatData(-200, 200, seed);
- var srcBlock = default(Block8x8F);
- srcBlock.LoadFrom(src);
+ float[] sourceArray = Create8x8RoundedRandomFloatData(MinAllowedValue, MaxAllowedValue, seed);
- var destBlock = default(Block8x8F);
-
- var expectedDest = new float[64];
+ var srcBlock = Block8x8F.Load(sourceArray);
// reference
- ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src.Slice(4), expectedDest.AsSpan(4));
+ Block8x8F expected = ReferenceImplementations.AccurateDCT.TransformIDCT(ref srcBlock);
// testee
- FastFloatingPointDCT.IDCT8x4_RightPart(ref srcBlock, ref destBlock);
-
- var actualDest = new float[64];
- destBlock.ScaledCopyTo(actualDest);
-
- Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
- }
-
- [Theory]
- [InlineData(1)]
- [InlineData(2)]
- public void IDCT8x8_Avx(int seed)
- {
-#if SUPPORTS_RUNTIME_INTRINSICS
- if (!Avx.IsSupported)
- {
- this.Output.WriteLine("No AVX present, skipping test!");
- return;
- }
-
- Span src = Create8x8RoundedRandomFloatData(-200, 200, seed);
- Block8x8F srcBlock = default;
- srcBlock.LoadFrom(src);
+ // Part of the IDCT calculations is fused into the quantization step
+ // We must multiply input block with adjusted no-quantization matrix
+ // before applying IDCT
+ Block8x8F dequantMatrix = CreateBlockFromScalar(1);
- Block8x8F destBlock = default;
+ // Dequantization using unit matrix - no values are upscaled
+ // as quant matrix is all 1's
+ // This step is needed to apply adjusting multipliers to the input block
+ FastFloatingPointDCT.AdjustToIDCT(ref dequantMatrix);
+ srcBlock.MultiplyInPlace(ref dequantMatrix);
- float[] expectedDest = new float[64];
+ // IDCT implementation tranforms blocks after transposition
+ srcBlock.TransposeInplace();
- // reference, left part
- ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src, expectedDest);
+ // IDCT calculation
+ FastFloatingPointDCT.TransformIDCT(ref srcBlock);
- // reference, right part
- ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src.Slice(4), expectedDest.AsSpan(4));
-
- // testee, whole 8x8
- FastFloatingPointDCT.IDCT8x8_Avx(ref srcBlock, ref destBlock);
-
- float[] actualDest = new float[64];
- destBlock.ScaledCopyTo(actualDest);
-
- Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
-#endif
+ this.CompareBlocks(expected, srcBlock, 1f);
}
+ // Inverse transform
+ // This test covers entire IDCT conversion chain
+ // This test checks all hardware implementations
[Theory]
[InlineData(1)]
[InlineData(2)]
@@ -157,41 +115,53 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
{
int seed = FeatureTestRunner.Deserialize(serialized);
- Span src = Create8x8RoundedRandomFloatData(-200, 200, seed);
+ Span src = Create8x8RoundedRandomFloatData(MinAllowedValue, MaxAllowedValue, seed);
var srcBlock = default(Block8x8F);
srcBlock.LoadFrom(src);
- var expectedDest = new float[64];
- var temp1 = new float[64];
- var temp2 = default(Block8x8F);
+ float[] expectedDest = new float[64];
+ float[] temp = new float[64];
// reference
- ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D_llm(src, expectedDest, temp1);
+ ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D_llm(src, expectedDest, temp);
// testee
- FastFloatingPointDCT.TransformIDCT(ref srcBlock, ref temp2);
+ // Part of the IDCT calculations is fused into the quantization step
+ // We must multiply input block with adjusted no-quantization matrix
+ // before applying IDCT
+ Block8x8F dequantMatrix = CreateBlockFromScalar(1);
+
+ // Dequantization using unit matrix - no values are upscaled
+ // as quant matrix is all 1's
+ // This step is needed to apply adjusting multipliers to the input block
+ FastFloatingPointDCT.AdjustToIDCT(ref dequantMatrix);
+ srcBlock.MultiplyInPlace(ref dequantMatrix);
+
+ // IDCT implementation tranforms blocks after transposition
+ srcBlock.TransposeInplace();
- var actualDest = new float[64];
- srcBlock.ScaledCopyTo(actualDest);
+ // IDCT calculation
+ FastFloatingPointDCT.TransformIDCT(ref srcBlock);
+
+ float[] actualDest = srcBlock.ToArray();
Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f));
}
- // 3 paths:
+ // 4 paths:
// 1. AllowAll - call avx/fma implementation
- // 2. DisableFMA - call avx implementation without fma acceleration
- // 3. DisableAvx - call fallback code of Vector4 implementation
- //
- // DisableSSE isn't needed because fallback Vector4 code will compile to either sse or fallback code with same result
+ // 2. DisableFMA - call avx without fma implementation
+ // 3. DisableAvx - call sse Vector4 implementation
+ // 4. DisableHWIntrinsic - call scalar fallback implementation
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
seed,
- HwIntrinsics.AllowAll | HwIntrinsics.DisableFMA | HwIntrinsics.DisableAVX);
+ HwIntrinsics.AllowAll | HwIntrinsics.DisableFMA | HwIntrinsics.DisableAVX | HwIntrinsics.DisableHWIntrinsic);
}
// Forward transform
- // This test covers entire FDCT conversions chain
- // This test checks all implementations: intrinsic and scalar fallback
+ // This test covers entire FDCT conversion chain
+ // This test checks all hardware implementations
[Theory]
[InlineData(1)]
[InlineData(2)]
@@ -201,7 +171,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
{
int seed = FeatureTestRunner.Deserialize(serialized);
- Span src = Create8x8RoundedRandomFloatData(-200, 200, seed);
+ Span src = Create8x8RoundedRandomFloatData(MinAllowedValue, MaxAllowedValue, seed);
var block = default(Block8x8F);
block.LoadFrom(src);
@@ -212,23 +182,24 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg
ReferenceImplementations.LLM_FloatingPoint_DCT.FDCT2D_llm(src, expectedDest, temp1, downscaleBy8: true);
// testee
- // Part of the FDCT calculations is fused into the quantization step
- // We must multiply transformed block with reciprocal values from FastFloatingPointDCT.ANN_DCT_reciprocalAdjustmen
FastFloatingPointDCT.TransformFDCT(ref block);
- for (int i = 0; i < 64; i++)
- {
- block[i] = block[i] * FastFloatingPointDCT.DctReciprocalAdjustmentCoefficients[i];
- }
+
+ // Part of the IDCT calculations is fused into the quantization step
+ // We must multiply input block with adjusted no-quantization matrix
+ // after applying FDCT
+ Block8x8F quantMatrix = CreateBlockFromScalar(1);
+ FastFloatingPointDCT.AdjustToFDCT(ref quantMatrix);
+ block.MultiplyInPlace(ref quantMatrix);
float[] actualDest = block.ToArray();
Assert.Equal(expectedDest, actualDest, new ApproximateFloatComparer(1f));
}
- // 3 paths:
+ // 4 paths:
// 1. AllowAll - call avx/fma implementation
- // 2. DisableFMA - call avx implementation without fma acceleration
- // 3. DisableAvx - call sse implementation
+ // 2. DisableFMA - call avx without fma implementation
+ // 3. DisableAvx - call sse Vector4 implementation
// 4. DisableHWIntrinsic - call scalar fallback implementation
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,