From 4fd912b9dd84f6a5c8774f110d719f188488f55f Mon Sep 17 00:00:00 2001 From: Dmitry Pentin Date: Mon, 13 Sep 2021 09:21:35 +0300 Subject: [PATCH] Fixed Ssse3 zig-zag implementation --- .../Formats/Jpeg/Components/Block8x8F.cs | 4 +- .../Jpeg/Components/ZigZag.Intrinsic.cs | 228 ++++++++++-------- .../Formats/Jpg/Block8x8FTests.cs | 49 ++-- .../Formats/Jpg/Utils/JpegFixture.cs | 32 +++ .../FeatureTesting/FeatureTestRunner.cs | 46 ++++ 5 files changed, 241 insertions(+), 118 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index d93375f398..24177c5564 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -414,12 +414,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components if (Avx2.IsSupported) { MultiplyIntoInt16_Avx2(ref block, ref qt, ref dest); - ZigZag.ApplyZigZagOrderingAvx(ref dest, ref dest); + ZigZag.ApplyZigZagOrderingAvx(ref dest); } else if (Ssse3.IsSupported) { MultiplyIntoInt16_Sse2(ref block, ref qt, ref dest); - ZigZag.ApplyZigZagOrderingSse(ref dest, ref dest); + ZigZag.ApplyZigZagOrderingSse(ref dest); } else #endif diff --git a/src/ImageSharp/Formats/Jpeg/Components/ZigZag.Intrinsic.cs b/src/ImageSharp/Formats/Jpeg/Components/ZigZag.Intrinsic.cs index abe02d0404..eb15c8b551 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/ZigZag.Intrinsic.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/ZigZag.Intrinsic.cs @@ -21,6 +21,47 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components /// Gets shuffle vectors for /// zig zag implementation. /// + private static ReadOnlySpan SseShuffleMasks1 => new byte[] + { + // row0 + 0, 1, 2, 3, _, _, _, _, _, _, 4, 5, 6, 7, _, _, + _, _, _, _, 0, 1, _, _, 2, 3, _, _, _, _, 4, 5, + _, _, _, _, _, _, 0, 1, _, _, _, _, _, _, _, _, + + // row1 + _, _, _, _, _, _, _, _, _, _, _, _, 8, 9, 10, 11, + 2, 3, _, _, _, _, _, _, 4, 5, _, _, _, _, _, _, + _, _, 0, 1, _, _, 2, 3, _, _, _, _, _, _, _, _, + + // row2 + _, _, _, _, _, _, 2, 3, _, _, _, _, _, _, 4, 5, + _, _, _, _, _, _, _, _, 0, 1, _, _, 2, 3, _, _, + + // row3 + _, _, _, _, _, _, 12, 13, 14, 15, _, _, _, _, _, _, + _, _, _, _, 10, 11, _, _, _, _, 12, 13, _, _, _, _, + _, _, 8, 9, _, _, _, _, _, _, _, _, 10, 11, _, _, + 6, 7, _, _, _, _, _, _, _, _, _, _, _, _, 8, 9, + + // row4 + _, _, 4, 5, _, _, _, _, _, _, _, _, 6, 7, _, _, + _, _, _, _, 2, 3, _, _, _, _, 4, 5, _, _, _, _, + _, _, _, _, _, _, 0, 1, 2, 3, _, _, _, _, _, _, + + // row5 + _, _, 12, 13, _, _, 14, 15, _, _, _, _, _, _, _, _, + 10, 11, _, _, _, _, _, _, 12, 13, _, _, _, _, _, _, + + // row6 + _, _, _, _, _, _, _, _, 12, 13, _, _, 14, 15, _, _, + _, _, _, _, _, _, 10, 11, _, _, _, _, _, _, 12, 13, + 4, 5, 6, 7, _, _, _, _, _, _, _, _, _, _, _, _, + + // row7 + 10, 11, _, _, _, _, 12, 13, _, _, 14, 15, _, _, _, _, + _, _, 8, 9, 10, 11, _, _, _, _, _, _, 12, 13, 14, 15 + }; + private static ReadOnlySpan SseShuffleMasks => new byte[] { // row0 @@ -56,7 +97,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components // row4 // E F G H - // 6, 7, _, _, _, _, _, _, _, _, _, _, _, _, 8, 9, + 6, 7, _, _, _, _, _, _, _, _, _, _, _, _, 8, 9, _, _, 4, 5, _, _, _, _, _, _, _, _, 6, 7, _, _, _, _, _, _, 2, 3, _, _, _, _, 4, 5, _, _, _, _, _, _, _, _, _, _, 0, 1, 2, 3, _, _, _, _, _, _, @@ -152,112 +193,99 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components /// /// Requires Ssse3 support. /// - /// Input matrix. + /// Input matrix. /// Matrix to store the result. Can be a reference to input matrix. - public static unsafe void ApplyZigZagOrderingSse(ref Block8x8 source, ref Block8x8 dest) + public static unsafe void ApplyZigZagOrderingSse(ref Block8x8 block) { DebugGuard.IsTrue(Ssse3.IsSupported, "Ssse3 support is required to run this operation!"); - fixed (byte* maskPtr = SseShuffleMasks) + fixed (byte* maskPtr = SseShuffleMasks1) { - Vector128 rowA = source.V0.AsByte(); - Vector128 rowB = source.V1.AsByte(); - Vector128 rowC = source.V2.AsByte(); - Vector128 rowD = source.V3.AsByte(); - Vector128 rowE = source.V4.AsByte(); - Vector128 rowF = source.V5.AsByte(); - Vector128 rowG = source.V6.AsByte(); - Vector128 rowH = source.V7.AsByte(); - - // row0 - Vector128 row0A = Ssse3.Shuffle(rowA, Sse2.LoadVector128(maskPtr + (0 * 16))).AsInt16(); - Vector128 row0B = Ssse3.Shuffle(rowB, Sse2.LoadVector128(maskPtr + (1 * 16))).AsInt16(); - Vector128 row0 = Sse2.Or(row0A, row0B); - Vector128 row0C = Ssse3.Shuffle(rowC, Sse2.LoadVector128(maskPtr + (2 * 16))).AsInt16(); - row0 = Sse2.Or(row0, row0C); - - // row1 - Vector128 row1A = Ssse3.Shuffle(rowA, Sse2.LoadVector128(maskPtr + (3 * 16))).AsInt16(); - Vector128 row1B = Ssse3.Shuffle(rowB, Sse2.LoadVector128(maskPtr + (4 * 16))).AsInt16(); - Vector128 row1 = Sse2.Or(row1A, row1B); - Vector128 row1C = Ssse3.Shuffle(rowC, Sse2.LoadVector128(maskPtr + (5 * 16))).AsInt16(); - row1 = Sse2.Or(row1, row1C); - Vector128 row1D = Ssse3.Shuffle(rowD, Sse2.LoadVector128(maskPtr + (6 * 16))).AsInt16(); - row1 = Sse2.Or(row1, row1D); - Vector128 row1E = Ssse3.Shuffle(rowE, Sse2.LoadVector128(maskPtr + (7 * 16))).AsInt16(); - row1 = Sse2.Or(row1, row1E); + Vector128 rowA = block.V0.AsByte(); + Vector128 rowB = block.V1.AsByte(); + Vector128 rowC = block.V2.AsByte(); + Vector128 rowD = block.V3.AsByte(); + Vector128 rowE = block.V4.AsByte(); + Vector128 rowF = block.V5.AsByte(); + Vector128 rowG = block.V6.AsByte(); + Vector128 rowH = block.V7.AsByte(); + + // row0 - A0 A1 B0 C0 B1 A2 A3 B2 + Vector128 rowA0 = Ssse3.Shuffle(rowA, Sse2.LoadVector128(maskPtr + (16 * 0))).AsInt16(); + Vector128 rowB0 = Ssse3.Shuffle(rowB, Sse2.LoadVector128(maskPtr + (16 * 1))).AsInt16(); + Vector128 row0 = Sse2.Or(rowA0, rowB0); + Vector128 rowC0 = Ssse3.Shuffle(rowC, Sse2.LoadVector128(maskPtr + (16 * 2))).AsInt16(); + row0 = Sse2.Or(row0, rowC0); + + // row1 - C1 D0 E0 D1 C2 B3 A4 A5 + Vector128 rowA1 = Ssse3.Shuffle(rowA, Sse2.LoadVector128(maskPtr + (16 * 3))).AsInt16(); + Vector128 rowC1 = Ssse3.Shuffle(rowC, Sse2.LoadVector128(maskPtr + (16 * 4))).AsInt16(); + Vector128 row1 = Sse2.Or(rowA1, rowC1); + Vector128 rowD1 = Ssse3.Shuffle(rowD, Sse2.LoadVector128(maskPtr + (16 * 5))).AsInt16(); + row1 = Sse2.Or(row1, rowD1); + row1 = Sse2.Insert(row1.AsUInt16(), Sse2.Extract(rowB.AsUInt16(), 3), 5).AsInt16(); + row1 = Sse2.Insert(row1.AsUInt16(), Sse2.Extract(rowE.AsUInt16(), 0), 2).AsInt16(); // row2 - Vector128 row2B = Ssse3.Shuffle(rowB, Sse2.LoadVector128(maskPtr + (8 * 16))).AsInt16(); - Vector128 row2C = Ssse3.Shuffle(rowC, Sse2.LoadVector128(maskPtr + (9 * 16))).AsInt16(); - Vector128 row2 = Sse2.Or(row2B, row2C); - Vector128 row2D = Ssse3.Shuffle(rowD, Sse2.LoadVector128(maskPtr + (10 * 16))).AsInt16(); - row2 = Sse2.Or(row2, row2D); - Vector128 row2E = Ssse3.Shuffle(rowE, Sse2.LoadVector128(maskPtr + (11 * 16))).AsInt16(); - row2 = Sse2.Or(row2, row2E); - Vector128 row2F = Ssse3.Shuffle(rowF, Sse2.LoadVector128(maskPtr + (12 * 16))).AsInt16(); - row2 = Sse2.Or(row2, row2F); - Vector128 row2G = Ssse3.Shuffle(rowG, Sse2.LoadVector128(maskPtr + (13 * 16))).AsInt16(); - row2 = Sse2.Or(row2, row2G); + Vector128 rowE2 = Ssse3.Shuffle(rowE, Sse2.LoadVector128(maskPtr + (16 * 6))).AsInt16(); + Vector128 rowF2 = Ssse3.Shuffle(rowF, Sse2.LoadVector128(maskPtr + (16 * 7))).AsInt16(); + Vector128 row2 = Sse2.Or(rowE2, rowF2); + row2 = Sse2.Insert(row2.AsUInt16(), Sse2.Extract(rowB.AsUInt16(), 4), 0).AsInt16(); + row2 = Sse2.Insert(row2.AsUInt16(), Sse2.Extract(rowC.AsUInt16(), 3), 1).AsInt16(); + row2 = Sse2.Insert(row2.AsUInt16(), Sse2.Extract(rowD.AsUInt16(), 2), 2).AsInt16(); + row2 = Sse2.Insert(row2.AsUInt16(), Sse2.Extract(rowG.AsUInt16(), 0), 5).AsInt16(); // row3 - Vector128 row3A = Ssse3.Shuffle(rowA, Sse2.LoadVector128(maskPtr + (14 * 16))).AsInt16().AsInt16(); - Vector128 row3B = Ssse3.Shuffle(rowB, Sse2.LoadVector128(maskPtr + (15 * 16))).AsInt16().AsInt16(); - Vector128 row3 = Sse2.Or(row3A, row3B); - Vector128 row3C = Ssse3.Shuffle(rowC, Sse2.LoadVector128(maskPtr + (16 * 16))).AsInt16(); - row3 = Sse2.Or(row3, row3C); - Vector128 row3D_row4E_shuffleMask = Sse2.LoadVector128(maskPtr + (17 * 16)); - Vector128 row3D = Ssse3.Shuffle(rowD, row3D_row4E_shuffleMask).AsInt16(); - row3 = Sse2.Or(row3, row3D); + Vector128 rowA3 = Ssse3.Shuffle(rowA, Sse2.LoadVector128(maskPtr + (16 * 8))).AsInt16().AsInt16(); + Vector128 rowB3 = Ssse3.Shuffle(rowB, Sse2.LoadVector128(maskPtr + (16 * 9))).AsInt16().AsInt16(); + Vector128 row3 = Sse2.Or(rowA3, rowB3); + Vector128 rowC3 = Ssse3.Shuffle(rowC, Sse2.LoadVector128(maskPtr + (16 * 10))).AsInt16(); + row3 = Sse2.Or(row3, rowC3); + Vector128 shuffleRowD3EF = Sse2.LoadVector128(maskPtr + (16 * 11)); + Vector128 rowD3 = Ssse3.Shuffle(rowD, shuffleRowD3EF).AsInt16(); + row3 = Sse2.Or(row3, rowD3); // row4 - Vector128 row4E = Ssse3.Shuffle(rowE, row3D_row4E_shuffleMask).AsInt16(); - Vector128 row4F = Ssse3.Shuffle(rowF, Sse2.LoadVector128(maskPtr + (18 * 16))).AsInt16(); - Vector128 row4 = Sse2.Or(row4E, row4F); - Vector128 row4G = Ssse3.Shuffle(rowG, Sse2.LoadVector128(maskPtr + (19 * 16))).AsInt16(); - row4 = Sse2.Or(row4, row4G); - Vector128 row4H = Ssse3.Shuffle(rowH, Sse2.LoadVector128(maskPtr + (20 * 16))).AsInt16(); - row4 = Sse2.Or(row4, row4H); + Vector128 rowE4 = Ssse3.Shuffle(rowE, shuffleRowD3EF).AsInt16(); + Vector128 rowF4 = Ssse3.Shuffle(rowF, Sse2.LoadVector128(maskPtr + (16 * 12))).AsInt16(); + Vector128 row4 = Sse2.Or(rowE4, rowF4); + Vector128 rowG4 = Ssse3.Shuffle(rowG, Sse2.LoadVector128(maskPtr + (16 * 13))).AsInt16(); + row4 = Sse2.Or(row4, rowG4); + Vector128 rowH4 = Ssse3.Shuffle(rowH, Sse2.LoadVector128(maskPtr + (16 * 14))).AsInt16(); + row4 = Sse2.Or(row4, rowH4); // row5 - Vector128 row5B = Ssse3.Shuffle(rowB, Sse2.LoadVector128(maskPtr + (21 * 16))).AsInt16(); - Vector128 row5C = Ssse3.Shuffle(rowC, Sse2.LoadVector128(maskPtr + (22 * 16))).AsInt16(); - Vector128 row5 = Sse2.Or(row5B, row5C); - Vector128 row5D = Ssse3.Shuffle(rowD, Sse2.LoadVector128(maskPtr + (23 * 16))).AsInt16(); - row5 = Sse2.Or(row5, row5D); - Vector128 row5E = Ssse3.Shuffle(rowE, Sse2.LoadVector128(maskPtr + (24 * 16))).AsInt16(); - row5 = Sse2.Or(row5, row5E); - Vector128 row5F = Ssse3.Shuffle(rowF, Sse2.LoadVector128(maskPtr + (25 * 16))).AsInt16(); - row5 = Sse2.Or(row5, row5F); - Vector128 row5G = Ssse3.Shuffle(rowG, Sse2.LoadVector128(maskPtr + (26 * 16))).AsInt16(); - row5 = Sse2.Or(row5, row5G); + Vector128 rowC5 = Ssse3.Shuffle(rowC, Sse2.LoadVector128(maskPtr + (16 * 15))).AsInt16(); + Vector128 rowD5 = Ssse3.Shuffle(rowD, Sse2.LoadVector128(maskPtr + (16 * 16))).AsInt16(); + Vector128 row5 = Sse2.Or(rowC5, rowD5); + row5 = Sse2.Insert(row5.AsUInt16(), Sse2.Extract(rowB.AsUInt16(), 7), 2).AsInt16(); + row5 = Sse2.Insert(row5.AsUInt16(), Sse2.Extract(rowE.AsUInt16(), 5), 5).AsInt16(); + row5 = Sse2.Insert(row5.AsUInt16(), Sse2.Extract(rowF.AsUInt16(), 4), 6).AsInt16(); + row5 = Sse2.Insert(row5.AsUInt16(), Sse2.Extract(rowG.AsUInt16(), 3), 7).AsInt16(); // row6 - Vector128 row6D = Ssse3.Shuffle(rowD, Sse2.LoadVector128(maskPtr + (27 * 16))).AsInt16(); - Vector128 row6E = Ssse3.Shuffle(rowE, Sse2.LoadVector128(maskPtr + (28 * 16))).AsInt16(); - Vector128 row6 = Sse2.Or(row6D, row6E); - Vector128 row6F = Ssse3.Shuffle(rowF, Sse2.LoadVector128(maskPtr + (29 * 16))).AsInt16(); - row6 = Sse2.Or(row6, row6F); - Vector128 row6G = Ssse3.Shuffle(rowG, Sse2.LoadVector128(maskPtr + (30 * 16))).AsInt16(); - row6 = Sse2.Or(row6, row6G); - Vector128 row6H = Ssse3.Shuffle(rowH, Sse2.LoadVector128(maskPtr + (31 * 16))).AsInt16(); - row6 = Sse2.Or(row6, row6H); + Vector128 rowE6 = Ssse3.Shuffle(rowE, Sse2.LoadVector128(maskPtr + (16 * 17))).AsInt16(); + Vector128 rowF6 = Ssse3.Shuffle(rowF, Sse2.LoadVector128(maskPtr + (16 * 18))).AsInt16(); + Vector128 row6 = Sse2.Or(rowE6, rowF6); + Vector128 rowH6 = Ssse3.Shuffle(rowH, Sse2.LoadVector128(maskPtr + (16 * 19))).AsInt16(); + row6 = Sse2.Or(row6, rowH6); + row6 = Sse2.Insert(row6.AsUInt16(), Sse2.Extract(rowD.AsUInt16(), 7), 5).AsInt16(); + row6 = Sse2.Insert(row6.AsUInt16(), Sse2.Extract(rowG.AsUInt16(), 4), 2).AsInt16(); // row7 - Vector128 row7F = Ssse3.Shuffle(rowF, Sse2.LoadVector128(maskPtr + (32 * 16))).AsInt16(); - Vector128 row7G = Ssse3.Shuffle(rowG, Sse2.LoadVector128(maskPtr + (33 * 16))).AsInt16(); - Vector128 row7 = Sse2.Or(row7F, row7G); - Vector128 row7H = Ssse3.Shuffle(rowH, Sse2.LoadVector128(maskPtr + (35 * 16))).AsInt16(); - row7 = Sse2.Or(row7, row7H); - - dest.V0 = row0; - dest.V1 = row1; - dest.V2 = row2; - dest.V3 = row3; - dest.V4 = row4; - dest.V5 = row5; - dest.V6 = row6; - dest.V7 = row7; + Vector128 rowG7 = Ssse3.Shuffle(rowG, Sse2.LoadVector128(maskPtr + (16 * 20))).AsInt16(); + Vector128 rowH7 = Ssse3.Shuffle(rowH, Sse2.LoadVector128(maskPtr + (16 * 21))).AsInt16(); + Vector128 row7 = Sse2.Or(rowG7, rowH7); + row7 = Sse2.Insert(row7.AsUInt16(), Sse2.Extract(rowF.AsUInt16(), 7), 4).AsInt16(); + + block.V0 = row0; + block.V1 = row1; + block.V2 = row2; + block.V3 = row3; + block.V4 = row4; + block.V5 = row5; + block.V6 = row6; + block.V7 = row7; } } @@ -267,18 +295,18 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components /// /// Requires Avx2 support. /// - /// Input matrix. + /// Input matrix. /// Matrix to store the result. Can be a reference to input matrix. - public static unsafe void ApplyZigZagOrderingAvx(ref Block8x8 source, ref Block8x8 dest) + public static unsafe void ApplyZigZagOrderingAvx(ref Block8x8 block) { DebugGuard.IsTrue(Avx2.IsSupported, "Avx2 support is required to run this operation!"); fixed (byte* shuffleVectorsPtr = AvxShuffleMasks) { - Vector256 rowsAB = source.V01.AsByte(); - Vector256 rowsCD = source.V23.AsByte(); - Vector256 rowsEF = source.V45.AsByte(); - Vector256 rowsGH = source.V67.AsByte(); + Vector256 rowsAB = block.V01.AsByte(); + Vector256 rowsCD = block.V23.AsByte(); + Vector256 rowsEF = block.V45.AsByte(); + Vector256 rowsGH = block.V67.AsByte(); // rows 0 1 Vector256 rows_AB01_EF01_CD23_shuffleMask = Avx.LoadVector256(shuffleVectorsPtr + (0 * 32)).AsInt32(); @@ -333,10 +361,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components Vector256 row67 = Avx2.Or(Avx2.Or(row67_CD, row67_EF), row67_GH); - dest.V01 = row01.AsInt16(); - dest.V23 = row23.AsInt16(); - dest.V45 = row45.AsInt16(); - dest.V67 = row67.AsInt16(); + block.V01 = row01.AsInt16(); + block.V23 = row23.AsInt16(); + block.V45 = row45.AsInt16(); + block.V67 = row67.AsInt16(); } } } diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs index 89ef74d8b7..40e42acb31 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs @@ -4,7 +4,9 @@ // Uncomment this to turn unit tests into benchmarks: // #define BENCHMARKING using System; -using System.Diagnostics; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics.X86; +#endif using SixLabors.ImageSharp.Formats.Jpeg.Components; using SixLabors.ImageSharp.Tests.Formats.Jpg.Utils; @@ -247,30 +249,45 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg this.CompareBlocks(expected, actual, 0); } - // TODO: intrinsic tests [Theory] [InlineData(1, 2)] [InlineData(2, 1)] public void Quantize(int srcSeed, int qtSeed) { - Block8x8F source = CreateRandomFloatBlock(-2000, 2000, srcSeed); - Block8x8F quant = CreateRandomFloatBlock(-2000, 2000, qtSeed); + static void RunTest(string srcSeedSerialized, string qtSeedSerialized) + { + int srcSeed = FeatureTestRunner.Deserialize(srcSeedSerialized); + int qtSeed = FeatureTestRunner.Deserialize(qtSeedSerialized); - // Reference implementation quantizes given block via division - Block8x8 expected = default; - ReferenceImplementations.Quantize(ref source, ref expected, ref quant, ZigZag.ZigZagOrder); + Block8x8F source = CreateRandomFloatBlock(-2000, 2000, srcSeed); - // Actual current implementation quantizes given block via multiplication - // With quantization table reciprocal - for (int i = 0; i < Block8x8F.Size; i++) - { - quant[i] = 1f / quant[i]; - } + // Quantization code is used only in jpeg where it's guaranteed that + // qunatization valus are greater than 1 + // Quantize method supports negative numbers by very small numbers can cause troubles + Block8x8F quant = CreateRandomFloatBlock(1, 2000, qtSeed); + + // Reference implementation quantizes given block via division + Block8x8 expected = default; + ReferenceImplementations.Quantize(ref source, ref expected, ref quant, ZigZag.ZigZagOrder); + + // Actual current implementation quantizes given block via multiplication + // With quantization table reciprocal + for (int i = 0; i < Block8x8F.Size; i++) + { + quant[i] = 1f / quant[i]; + } - Block8x8 actual = default; - Block8x8F.Quantize(ref source, ref actual, ref quant); + Block8x8 actual = default; + Block8x8F.Quantize(ref source, ref actual, ref quant); - this.CompareBlocks(expected, actual, 1); + Assert.True(CompareBlocks(expected, actual, 1, out int diff), $"Blocks are not equal, diff={diff}"); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + srcSeed, + qtSeed, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE); } [Fact] diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Utils/JpegFixture.cs b/tests/ImageSharp.Tests/Formats/Jpg/Utils/JpegFixture.cs index ccb7f6f1eb..1cf9bc4aef 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Utils/JpegFixture.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Utils/JpegFixture.cs @@ -190,6 +190,38 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg.Utils Assert.False(failed); } + internal static bool CompareBlocks(Block8x8 a, Block8x8 b, int tolerance, out int diff) + { + bool res = CompareBlocks(a.AsFloatBlock(), b.AsFloatBlock(), tolerance + 1e-5f, out float fdiff); + diff = (int)fdiff; + return res; + } + + internal static bool CompareBlocks(Block8x8F a, Block8x8F b, float tolerance, out float diff) => + CompareBlocks(a.ToArray(), b.ToArray(), tolerance, out diff); + + internal static bool CompareBlocks(Span a, Span b, float tolerance, out float diff) + { + var comparer = new ApproximateFloatComparer(tolerance); + bool failed = false; + + diff = 0; + + for (int i = 0; i < 64; i++) + { + float expected = a[i]; + float actual = b[i]; + diff += Math.Abs(expected - actual); + + if (!comparer.Equals(expected, actual)) + { + failed = true; + } + } + + return !failed; + } + internal static JpegDecoderCore ParseJpegStream(string testFileName, bool metaDataOnly = false) { byte[] bytes = TestFile.Create(testFileName).Bytes; diff --git a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs index fa0f02ca1f..0d2f3fcefb 100644 --- a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs +++ b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs @@ -301,6 +301,52 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities } } + /// + /// Runs the given test within an environment + /// where the given features. + /// + /// The test action to run. + /// The value to pass as a parameter #0 to the test action. + /// The value to pass as a parameter #1 to the test action. + /// The intrinsics features. + public static void RunWithHwIntrinsicsFeature( + Action action, + T arg0, + T arg1, + HwIntrinsics intrinsics) + where T : IConvertible + { + if (!RemoteExecutor.IsSupported) + { + return; + } + + foreach (KeyValuePair intrinsic in intrinsics.ToFeatureKeyValueCollection()) + { + var processStartInfo = new ProcessStartInfo(); + if (intrinsic.Key != HwIntrinsics.AllowAll) + { + processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; + + RemoteExecutor.Invoke( + action, + arg0.ToString(), + arg1.ToString(), + new RemoteInvokeOptions + { + StartInfo = processStartInfo + }) + .Dispose(); + } + else + { + // Since we are running using the default architecture there is no + // point creating the overhead of running the action in a separate process. + action(arg0.ToString(), arg1.ToString()); + } + } + } + internal static Dictionary ToFeatureKeyValueCollection(this HwIntrinsics intrinsics) { // Loop through and translate the given values into COMPlus equivaluents