|
|
|
@ -2,9 +2,6 @@ |
|
|
|
// Licensed under the Apache License, Version 2.0.
|
|
|
|
|
|
|
|
using System; |
|
|
|
#if SUPPORTS_RUNTIME_INTRINSICS
|
|
|
|
using System.Runtime.Intrinsics.X86; |
|
|
|
#endif
|
|
|
|
using SixLabors.ImageSharp.Formats.Jpeg.Components; |
|
|
|
using SixLabors.ImageSharp.Tests.Formats.Jpg.Utils; |
|
|
|
using SixLabors.ImageSharp.Tests.TestUtilities; |
|
|
|
@ -17,6 +14,20 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg |
|
|
|
[Trait("Format", "Jpg")] |
|
|
|
public static class DCTTests |
|
|
|
{ |
|
|
|
private const int MaxAllowedValue = short.MaxValue; |
|
|
|
private const int MinAllowedValue = short.MinValue; |
|
|
|
|
|
|
|
internal static Block8x8F CreateBlockFromScalar(float value) |
|
|
|
{ |
|
|
|
Block8x8F result = default; |
|
|
|
for (int i = 0; i < Block8x8F.Size; i++) |
|
|
|
{ |
|
|
|
result[i] = value; |
|
|
|
} |
|
|
|
|
|
|
|
return result; |
|
|
|
} |
|
|
|
|
|
|
|
public class FastFloatingPoint : JpegFixture |
|
|
|
{ |
|
|
|
public FastFloatingPoint(ITestOutputHelper output) |
|
|
|
@ -24,130 +35,77 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg |
|
|
|
{ |
|
|
|
} |
|
|
|
|
|
|
|
// Reference tests
|
|
|
|
[Theory] |
|
|
|
[InlineData(1)] |
|
|
|
[InlineData(2)] |
|
|
|
[InlineData(3)] |
|
|
|
public void LLM_TransformIDCT_CompareToNonOptimized(int seed) |
|
|
|
{ |
|
|
|
float[] sourceArray = Create8x8RoundedRandomFloatData(-1000, 1000, seed); |
|
|
|
float[] sourceArray = Create8x8RoundedRandomFloatData(MinAllowedValue, MaxAllowedValue, seed); |
|
|
|
|
|
|
|
var srcBlock = Block8x8F.Load(sourceArray); |
|
|
|
|
|
|
|
// reference
|
|
|
|
Block8x8F expected = ReferenceImplementations.LLM_FloatingPoint_DCT.TransformIDCT(ref srcBlock); |
|
|
|
|
|
|
|
var temp = default(Block8x8F); |
|
|
|
FastFloatingPointDCT.TransformIDCT(ref srcBlock, ref temp); |
|
|
|
|
|
|
|
this.CompareBlocks(expected, srcBlock, 1f); |
|
|
|
} |
|
|
|
|
|
|
|
[Theory] |
|
|
|
[InlineData(1)] |
|
|
|
[InlineData(2)] |
|
|
|
[InlineData(3)] |
|
|
|
public void LLM_TransformIDCT_CompareToAccurate(int seed) |
|
|
|
{ |
|
|
|
float[] sourceArray = Create8x8RoundedRandomFloatData(-1000, 1000, seed); |
|
|
|
// testee
|
|
|
|
// Part of the IDCT calculations is fused into the quantization step
|
|
|
|
// We must multiply input block with adjusted no-quantization matrix
|
|
|
|
// before applying IDCT
|
|
|
|
Block8x8F dequantMatrix = CreateBlockFromScalar(1); |
|
|
|
|
|
|
|
var srcBlock = Block8x8F.Load(sourceArray); |
|
|
|
// Dequantization using unit matrix - no values are upscaled
|
|
|
|
// as quant matrix is all 1's
|
|
|
|
// This step is needed to apply adjusting multipliers to the input block
|
|
|
|
FastFloatingPointDCT.AdjustToIDCT(ref dequantMatrix); |
|
|
|
srcBlock.MultiplyInPlace(ref dequantMatrix); |
|
|
|
|
|
|
|
Block8x8F expected = ReferenceImplementations.AccurateDCT.TransformIDCT(ref srcBlock); |
|
|
|
// IDCT implementation tranforms blocks after transposition
|
|
|
|
srcBlock.TransposeInplace(); |
|
|
|
|
|
|
|
var temp = default(Block8x8F); |
|
|
|
FastFloatingPointDCT.TransformIDCT(ref srcBlock, ref temp); |
|
|
|
// IDCT calculation
|
|
|
|
FastFloatingPointDCT.TransformIDCT(ref srcBlock); |
|
|
|
|
|
|
|
this.CompareBlocks(expected, srcBlock, 1f); |
|
|
|
} |
|
|
|
|
|
|
|
// Inverse transform
|
|
|
|
[Theory] |
|
|
|
[InlineData(1)] |
|
|
|
[InlineData(2)] |
|
|
|
public void IDCT8x4_LeftPart(int seed) |
|
|
|
{ |
|
|
|
Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed); |
|
|
|
var srcBlock = default(Block8x8F); |
|
|
|
srcBlock.LoadFrom(src); |
|
|
|
|
|
|
|
var destBlock = default(Block8x8F); |
|
|
|
|
|
|
|
var expectedDest = new float[64]; |
|
|
|
|
|
|
|
// reference
|
|
|
|
ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src, expectedDest); |
|
|
|
|
|
|
|
// testee
|
|
|
|
FastFloatingPointDCT.IDCT8x4_LeftPart(ref srcBlock, ref destBlock); |
|
|
|
|
|
|
|
var actualDest = new float[64]; |
|
|
|
destBlock.ScaledCopyTo(actualDest); |
|
|
|
|
|
|
|
Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f)); |
|
|
|
} |
|
|
|
|
|
|
|
[Theory] |
|
|
|
[InlineData(1)] |
|
|
|
[InlineData(2)] |
|
|
|
public void IDCT8x4_RightPart(int seed) |
|
|
|
[InlineData(3)] |
|
|
|
public void LLM_TransformIDCT_CompareToAccurate(int seed) |
|
|
|
{ |
|
|
|
Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed); |
|
|
|
var srcBlock = default(Block8x8F); |
|
|
|
srcBlock.LoadFrom(src); |
|
|
|
float[] sourceArray = Create8x8RoundedRandomFloatData(MinAllowedValue, MaxAllowedValue, seed); |
|
|
|
|
|
|
|
var destBlock = default(Block8x8F); |
|
|
|
|
|
|
|
var expectedDest = new float[64]; |
|
|
|
var srcBlock = Block8x8F.Load(sourceArray); |
|
|
|
|
|
|
|
// reference
|
|
|
|
ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src.Slice(4), expectedDest.AsSpan(4)); |
|
|
|
Block8x8F expected = ReferenceImplementations.AccurateDCT.TransformIDCT(ref srcBlock); |
|
|
|
|
|
|
|
// testee
|
|
|
|
FastFloatingPointDCT.IDCT8x4_RightPart(ref srcBlock, ref destBlock); |
|
|
|
|
|
|
|
var actualDest = new float[64]; |
|
|
|
destBlock.ScaledCopyTo(actualDest); |
|
|
|
|
|
|
|
Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f)); |
|
|
|
} |
|
|
|
|
|
|
|
[Theory] |
|
|
|
[InlineData(1)] |
|
|
|
[InlineData(2)] |
|
|
|
public void IDCT8x8_Avx(int seed) |
|
|
|
{ |
|
|
|
#if SUPPORTS_RUNTIME_INTRINSICS
|
|
|
|
if (!Avx.IsSupported) |
|
|
|
{ |
|
|
|
this.Output.WriteLine("No AVX present, skipping test!"); |
|
|
|
return; |
|
|
|
} |
|
|
|
|
|
|
|
Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed); |
|
|
|
Block8x8F srcBlock = default; |
|
|
|
srcBlock.LoadFrom(src); |
|
|
|
// Part of the IDCT calculations is fused into the quantization step
|
|
|
|
// We must multiply input block with adjusted no-quantization matrix
|
|
|
|
// before applying IDCT
|
|
|
|
Block8x8F dequantMatrix = CreateBlockFromScalar(1); |
|
|
|
|
|
|
|
Block8x8F destBlock = default; |
|
|
|
// Dequantization using unit matrix - no values are upscaled
|
|
|
|
// as quant matrix is all 1's
|
|
|
|
// This step is needed to apply adjusting multipliers to the input block
|
|
|
|
FastFloatingPointDCT.AdjustToIDCT(ref dequantMatrix); |
|
|
|
srcBlock.MultiplyInPlace(ref dequantMatrix); |
|
|
|
|
|
|
|
float[] expectedDest = new float[64]; |
|
|
|
// IDCT implementation tranforms blocks after transposition
|
|
|
|
srcBlock.TransposeInplace(); |
|
|
|
|
|
|
|
// reference, left part
|
|
|
|
ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src, expectedDest); |
|
|
|
// IDCT calculation
|
|
|
|
FastFloatingPointDCT.TransformIDCT(ref srcBlock); |
|
|
|
|
|
|
|
// reference, right part
|
|
|
|
ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D8x4_32f(src.Slice(4), expectedDest.AsSpan(4)); |
|
|
|
|
|
|
|
// testee, whole 8x8
|
|
|
|
FastFloatingPointDCT.IDCT8x8_Avx(ref srcBlock, ref destBlock); |
|
|
|
|
|
|
|
float[] actualDest = new float[64]; |
|
|
|
destBlock.ScaledCopyTo(actualDest); |
|
|
|
|
|
|
|
Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f)); |
|
|
|
#endif
|
|
|
|
this.CompareBlocks(expected, srcBlock, 1f); |
|
|
|
} |
|
|
|
|
|
|
|
// Inverse transform
|
|
|
|
// This test covers entire IDCT conversion chain
|
|
|
|
// This test checks all hardware implementations
|
|
|
|
[Theory] |
|
|
|
[InlineData(1)] |
|
|
|
[InlineData(2)] |
|
|
|
@ -157,41 +115,53 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg |
|
|
|
{ |
|
|
|
int seed = FeatureTestRunner.Deserialize<int>(serialized); |
|
|
|
|
|
|
|
Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed); |
|
|
|
Span<float> src = Create8x8RoundedRandomFloatData(MinAllowedValue, MaxAllowedValue, seed); |
|
|
|
var srcBlock = default(Block8x8F); |
|
|
|
srcBlock.LoadFrom(src); |
|
|
|
|
|
|
|
var expectedDest = new float[64]; |
|
|
|
var temp1 = new float[64]; |
|
|
|
var temp2 = default(Block8x8F); |
|
|
|
float[] expectedDest = new float[64]; |
|
|
|
float[] temp = new float[64]; |
|
|
|
|
|
|
|
// reference
|
|
|
|
ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D_llm(src, expectedDest, temp1); |
|
|
|
ReferenceImplementations.LLM_FloatingPoint_DCT.IDCT2D_llm(src, expectedDest, temp); |
|
|
|
|
|
|
|
// testee
|
|
|
|
FastFloatingPointDCT.TransformIDCT(ref srcBlock, ref temp2); |
|
|
|
// Part of the IDCT calculations is fused into the quantization step
|
|
|
|
// We must multiply input block with adjusted no-quantization matrix
|
|
|
|
// before applying IDCT
|
|
|
|
Block8x8F dequantMatrix = CreateBlockFromScalar(1); |
|
|
|
|
|
|
|
// Dequantization using unit matrix - no values are upscaled
|
|
|
|
// as quant matrix is all 1's
|
|
|
|
// This step is needed to apply adjusting multipliers to the input block
|
|
|
|
FastFloatingPointDCT.AdjustToIDCT(ref dequantMatrix); |
|
|
|
srcBlock.MultiplyInPlace(ref dequantMatrix); |
|
|
|
|
|
|
|
// IDCT implementation tranforms blocks after transposition
|
|
|
|
srcBlock.TransposeInplace(); |
|
|
|
|
|
|
|
var actualDest = new float[64]; |
|
|
|
srcBlock.ScaledCopyTo(actualDest); |
|
|
|
// IDCT calculation
|
|
|
|
FastFloatingPointDCT.TransformIDCT(ref srcBlock); |
|
|
|
|
|
|
|
float[] actualDest = srcBlock.ToArray(); |
|
|
|
|
|
|
|
Assert.Equal(actualDest, expectedDest, new ApproximateFloatComparer(1f)); |
|
|
|
} |
|
|
|
|
|
|
|
// 3 paths:
|
|
|
|
// 4 paths:
|
|
|
|
// 1. AllowAll - call avx/fma implementation
|
|
|
|
// 2. DisableFMA - call avx implementation without fma acceleration
|
|
|
|
// 3. DisableAvx - call fallback code of Vector4 implementation
|
|
|
|
//
|
|
|
|
// DisableSSE isn't needed because fallback Vector4 code will compile to either sse or fallback code with same result
|
|
|
|
// 2. DisableFMA - call avx without fma implementation
|
|
|
|
// 3. DisableAvx - call sse Vector4 implementation
|
|
|
|
// 4. DisableHWIntrinsic - call scalar fallback implementation
|
|
|
|
FeatureTestRunner.RunWithHwIntrinsicsFeature( |
|
|
|
RunTest, |
|
|
|
seed, |
|
|
|
HwIntrinsics.AllowAll | HwIntrinsics.DisableFMA | HwIntrinsics.DisableAVX); |
|
|
|
HwIntrinsics.AllowAll | HwIntrinsics.DisableFMA | HwIntrinsics.DisableAVX | HwIntrinsics.DisableHWIntrinsic); |
|
|
|
} |
|
|
|
|
|
|
|
// Forward transform
|
|
|
|
// This test covers entire FDCT conversions chain
|
|
|
|
// This test checks all implementations: intrinsic and scalar fallback
|
|
|
|
// This test covers entire FDCT conversion chain
|
|
|
|
// This test checks all hardware implementations
|
|
|
|
[Theory] |
|
|
|
[InlineData(1)] |
|
|
|
[InlineData(2)] |
|
|
|
@ -201,7 +171,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg |
|
|
|
{ |
|
|
|
int seed = FeatureTestRunner.Deserialize<int>(serialized); |
|
|
|
|
|
|
|
Span<float> src = Create8x8RoundedRandomFloatData(-200, 200, seed); |
|
|
|
Span<float> src = Create8x8RoundedRandomFloatData(MinAllowedValue, MaxAllowedValue, seed); |
|
|
|
var block = default(Block8x8F); |
|
|
|
block.LoadFrom(src); |
|
|
|
|
|
|
|
@ -212,23 +182,24 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg |
|
|
|
ReferenceImplementations.LLM_FloatingPoint_DCT.FDCT2D_llm(src, expectedDest, temp1, downscaleBy8: true); |
|
|
|
|
|
|
|
// testee
|
|
|
|
// Part of the FDCT calculations is fused into the quantization step
|
|
|
|
// We must multiply transformed block with reciprocal values from FastFloatingPointDCT.ANN_DCT_reciprocalAdjustmen
|
|
|
|
FastFloatingPointDCT.TransformFDCT(ref block); |
|
|
|
for (int i = 0; i < 64; i++) |
|
|
|
{ |
|
|
|
block[i] = block[i] * FastFloatingPointDCT.DctReciprocalAdjustmentCoefficients[i]; |
|
|
|
} |
|
|
|
|
|
|
|
// Part of the IDCT calculations is fused into the quantization step
|
|
|
|
// We must multiply input block with adjusted no-quantization matrix
|
|
|
|
// after applying FDCT
|
|
|
|
Block8x8F quantMatrix = CreateBlockFromScalar(1); |
|
|
|
FastFloatingPointDCT.AdjustToFDCT(ref quantMatrix); |
|
|
|
block.MultiplyInPlace(ref quantMatrix); |
|
|
|
|
|
|
|
float[] actualDest = block.ToArray(); |
|
|
|
|
|
|
|
Assert.Equal(expectedDest, actualDest, new ApproximateFloatComparer(1f)); |
|
|
|
} |
|
|
|
|
|
|
|
// 3 paths:
|
|
|
|
// 4 paths:
|
|
|
|
// 1. AllowAll - call avx/fma implementation
|
|
|
|
// 2. DisableFMA - call avx implementation without fma acceleration
|
|
|
|
// 3. DisableAvx - call sse implementation
|
|
|
|
// 2. DisableFMA - call avx without fma implementation
|
|
|
|
// 3. DisableAvx - call sse Vector4 implementation
|
|
|
|
// 4. DisableHWIntrinsic - call scalar fallback implementation
|
|
|
|
FeatureTestRunner.RunWithHwIntrinsicsFeature( |
|
|
|
RunTest, |
|
|
|
|