From 3091072e382e6a6205ecf999f4518ef6ba4dff2c Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 7 Oct 2020 15:42:51 +0100 Subject: [PATCH 001/104] Add AVX backed Block8x8F Transpose method --- .../Jpeg/Components/Block8x8F.Generated.cs | 6 +- .../Jpeg/Components/Block8x8F.Generated.tt | 6 +- .../Formats/Jpeg/Components/Block8x8F.cs | 83 +++++++++++++++++++ .../Jpeg/Components/FastFloatingPointDCT.cs | 6 +- .../BlockOperations/Block8x8F_Transpose.cs | 45 ++++++++++ .../Formats/Jpg/Block8x8FTests.cs | 22 ++++- 6 files changed, 157 insertions(+), 11 deletions(-) create mode 100644 tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs index f6f5903684..10cbee5e6f 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs @@ -10,12 +10,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components { internal partial struct Block8x8F { - /// - /// Transpose the block into the destination block. + /// + /// Fallback method to transpose a block into the destination block on non AVX supported CPUs. /// /// The destination block [MethodImpl(InliningOptions.ShortMethod)] - public void TransposeInto(ref Block8x8F d) + public void TransposeIntoFallback(ref Block8x8F d) { d.V0L.X = V0L.X; d.V1L.X = V0L.Y; diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt index 6ee0540213..f47d9106ee 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt @@ -23,12 +23,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components { internal partial struct Block8x8F { - /// - /// Transpose the block into the destination block. + /// + /// Fallback method to transpose a block into the destination block on non AVX supported CPUs. /// /// The destination block [MethodImpl(InliningOptions.ShortMethod)] - public void TransposeInto(ref Block8x8F d) + public void TransposeIntoFallback(ref Block8x8F d) { <# PushIndent(" "); diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index b7835d6706..d698e90b3c 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -6,6 +6,10 @@ using System.Diagnostics; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif using System.Text; // ReSharper disable InconsistentNaming @@ -596,5 +600,84 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components DebugGuard.MustBeLessThan(idx, Size, nameof(idx)); DebugGuard.MustBeGreaterThanOrEqualTo(idx, 0, nameof(idx)); } + + /// + /// Transpose the block into the destination block. + /// + /// The destination block + [MethodImpl(InliningOptions.ShortMethod)] + public void TransposeInto(ref Block8x8F d) + { +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx.IsSupported) + { + this.TransposeIntoAvx(ref d); + } + else +#endif + { + this.TransposeIntoFallback(ref d); + } + } + +#if SUPPORTS_RUNTIME_INTRINSICS + /// + /// AVX-only variant for executing . + /// + /// + [MethodImpl(InliningOptions.ShortMethod)] + public void TransposeIntoAvx(ref Block8x8F d) + { + ref Vector256 r0 = ref Unsafe.As>(ref this.V0L); + ref Vector256 r1 = ref Unsafe.As>(ref this.V1L); + ref Vector256 r2 = ref Unsafe.As>(ref this.V2L); + ref Vector256 r3 = ref Unsafe.As>(ref this.V3L); + ref Vector256 r4 = ref Unsafe.As>(ref this.V4L); + ref Vector256 r5 = ref Unsafe.As>(ref this.V5L); + ref Vector256 r6 = ref Unsafe.As>(ref this.V6L); + ref Vector256 r7 = ref Unsafe.As>(ref this.V7L); + + Vector256 t0 = Avx.UnpackLow(r0, r1); + Vector256 t1 = Avx.UnpackHigh(r0, r1); + Vector256 t2 = Avx.UnpackLow(r2, r3); + Vector256 t3 = Avx.UnpackHigh(r2, r3); + Vector256 t4 = Avx.UnpackLow(r4, r5); + Vector256 t5 = Avx.UnpackHigh(r4, r5); + Vector256 t6 = Avx.UnpackLow(r6, r7); + Vector256 t7 = Avx.UnpackHigh(r6, r7); + + // Controls generated via + // _MM_SHUFFLE(fp3, fp2, fp1, fp0)(((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0))) + const byte Control1_0_1_0 = 0b1_00_01_00; // 1, 0, 1, 0 + const byte Control3_2_3_2 = 0b11_10_11_10; // 3, 2, 3, 2 + + r0 = Avx.Shuffle(t0, t2, Control1_0_1_0); + r1 = Avx.Shuffle(t0, t2, Control3_2_3_2); + r2 = Avx.Shuffle(t1, t3, Control1_0_1_0); + r3 = Avx.Shuffle(t1, t3, Control3_2_3_2); + r4 = Avx.Shuffle(t4, t6, Control1_0_1_0); + r5 = Avx.Shuffle(t4, t6, Control3_2_3_2); + r6 = Avx.Shuffle(t5, t7, Control1_0_1_0); + r7 = Avx.Shuffle(t5, t7, Control3_2_3_2); + + t0 = Avx.Permute2x128(r0, r4, 0x20); + t1 = Avx.Permute2x128(r1, r5, 0x20); + t2 = Avx.Permute2x128(r2, r6, 0x20); + t3 = Avx.Permute2x128(r3, r7, 0x20); + t4 = Avx.Permute2x128(r0, r4, 0x31); + t5 = Avx.Permute2x128(r1, r5, 0x31); + t6 = Avx.Permute2x128(r2, r6, 0x31); + t7 = Avx.Permute2x128(r3, r7, 0x31); + + Unsafe.As>(ref d.V0L) = t0; + Unsafe.As>(ref d.V1L) = t1; + Unsafe.As>(ref d.V2L) = t2; + Unsafe.As>(ref d.V3L) = t3; + Unsafe.As>(ref d.V4L) = t4; + Unsafe.As>(ref d.V5L) = t5; + Unsafe.As>(ref d.V6L) = t6; + Unsafe.As>(ref d.V7L) = t7; + } +#endif } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs index ee06f2bdeb..d0b373609b 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs @@ -1,4 +1,4 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. using System.Numerics; @@ -50,8 +50,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components /// Temporary block provided by the caller public static void TransformIDCT(ref Block8x8F src, ref Block8x8F dest, ref Block8x8F temp) { - // TODO: Transpose is a bottleneck now. We need full AVX support to optimize it: - // https://github.com/dotnet/corefx/issues/22940 src.TransposeInto(ref temp); IDCT8x4_LeftPart(ref temp, ref dest); @@ -340,4 +338,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components dest.MultiplyInplace(C_0_125); } } -} \ No newline at end of file +} diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs new file mode 100644 index 0000000000..d21e848357 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs @@ -0,0 +1,45 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Formats.Jpeg.Components; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations +{ + public class Block8x8F_Transpose + { + private static readonly Block8x8F Source = Create8x8FloatData(); + + [Benchmark] + public void TransposeIntoVector4() + { + var dest = default(Block8x8F); + Source.TransposeIntoFallback(ref dest); + } + +#if SUPPORTS_RUNTIME_INTRINSICS + [Benchmark] + public void TransposeIntoAvx() + { + var dest = default(Block8x8F); + Source.TransposeIntoAvx(ref dest); + } +#endif + + private static Block8x8F Create8x8FloatData() + { + var result = new float[64]; + for (int i = 0; i < 8; i++) + { + for (int j = 0; j < 8; j++) + { + result[(i * 8) + j] = (i * 10) + j; + } + } + + var source = default(Block8x8F); + source.LoadFrom(result); + return source; + } + } +} diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs index 722521f98d..050b1889ad 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs @@ -172,7 +172,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg source.LoadFrom(Create8x8FloatData()); var dest = default(Block8x8F); - source.TransposeInto(ref dest); + source.TransposeIntoFallback(ref dest); float[] actual = new float[64]; dest.ScaledCopyTo(actual); @@ -180,6 +180,26 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg Assert.Equal(expected, actual); } +#if SUPPORTS_RUNTIME_INTRINSICS + [Fact] + public void TransposeIntoAvx() + { + float[] expected = Create8x8FloatData(); + ReferenceImplementations.Transpose8x8(expected); + + var source = default(Block8x8F); + source.LoadFrom(Create8x8FloatData()); + + var dest = default(Block8x8F); + source.TransposeIntoAvx(ref dest); + + float[] actual = new float[64]; + dest.ScaledCopyTo(actual); + + Assert.Equal(expected, actual); + } +#endif + private class BufferHolder { public Block8x8F Buffer; From 398e901840c8809942f41ca5feba7dd88e67551d Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 7 Oct 2020 17:42:50 +0100 Subject: [PATCH 002/104] Add variant 2 --- .../Formats/Jpeg/Components/Block8x8F.cs | 118 ++++++++++++++---- 1 file changed, 97 insertions(+), 21 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index d698e90b3c..683308e354 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -628,14 +628,15 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components [MethodImpl(InliningOptions.ShortMethod)] public void TransposeIntoAvx(ref Block8x8F d) { - ref Vector256 r0 = ref Unsafe.As>(ref this.V0L); - ref Vector256 r1 = ref Unsafe.As>(ref this.V1L); - ref Vector256 r2 = ref Unsafe.As>(ref this.V2L); - ref Vector256 r3 = ref Unsafe.As>(ref this.V3L); - ref Vector256 r4 = ref Unsafe.As>(ref this.V4L); - ref Vector256 r5 = ref Unsafe.As>(ref this.V5L); - ref Vector256 r6 = ref Unsafe.As>(ref this.V6L); - ref Vector256 r7 = ref Unsafe.As>(ref this.V7L); +#if avxvariant1 + Vector256 r0 = Unsafe.As>(ref this.V0L); + Vector256 r1 = Unsafe.As>(ref this.V1L); + Vector256 r2 = Unsafe.As>(ref this.V2L); + Vector256 r3 = Unsafe.As>(ref this.V3L); + Vector256 r4 = Unsafe.As>(ref this.V4L); + Vector256 r5 = Unsafe.As>(ref this.V5L); + Vector256 r6 = Unsafe.As>(ref this.V6L); + Vector256 r7 = Unsafe.As>(ref this.V7L); Vector256 t0 = Avx.UnpackLow(r0, r1); Vector256 t1 = Avx.UnpackHigh(r0, r1); @@ -646,11 +647,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components Vector256 t6 = Avx.UnpackLow(r6, r7); Vector256 t7 = Avx.UnpackHigh(r6, r7); - // Controls generated via - // _MM_SHUFFLE(fp3, fp2, fp1, fp0)(((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0))) - const byte Control1_0_1_0 = 0b1_00_01_00; // 1, 0, 1, 0 - const byte Control3_2_3_2 = 0b11_10_11_10; // 3, 2, 3, 2 - + // Controls generated via _MM_SHUFFLE + const byte Control1_0_1_0 = 0b1000100; + const byte Control3_2_3_2 = 0b11101110; r0 = Avx.Shuffle(t0, t2, Control1_0_1_0); r1 = Avx.Shuffle(t0, t2, Control3_2_3_2); r2 = Avx.Shuffle(t1, t3, Control1_0_1_0); @@ -660,14 +659,16 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components r6 = Avx.Shuffle(t5, t7, Control1_0_1_0); r7 = Avx.Shuffle(t5, t7, Control3_2_3_2); - t0 = Avx.Permute2x128(r0, r4, 0x20); - t1 = Avx.Permute2x128(r1, r5, 0x20); - t2 = Avx.Permute2x128(r2, r6, 0x20); - t3 = Avx.Permute2x128(r3, r7, 0x20); - t4 = Avx.Permute2x128(r0, r4, 0x31); - t5 = Avx.Permute2x128(r1, r5, 0x31); - t6 = Avx.Permute2x128(r2, r6, 0x31); - t7 = Avx.Permute2x128(r3, r7, 0x31); + const byte Control0x20 = 0b100000; + const byte Control0x31 = 0b110001; + t0 = Avx.Permute2x128(r0, r4, Control0x20); + t1 = Avx.Permute2x128(r1, r5, Control0x20); + t2 = Avx.Permute2x128(r2, r6, Control0x20); + t3 = Avx.Permute2x128(r3, r7, Control0x20); + t4 = Avx.Permute2x128(r0, r4, Control0x31); + t5 = Avx.Permute2x128(r1, r5, Control0x31); + t6 = Avx.Permute2x128(r2, r6, Control0x31); + t7 = Avx.Permute2x128(r3, r7, Control0x31); Unsafe.As>(ref d.V0L) = t0; Unsafe.As>(ref d.V1L) = t1; @@ -677,6 +678,81 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components Unsafe.As>(ref d.V5L) = t5; Unsafe.As>(ref d.V6L) = t6; Unsafe.As>(ref d.V7L) = t7; +#else + Vector256 r0 = Avx.InsertVector128( + Unsafe.As>(ref this.V0L).ToVector256(), + Unsafe.As>(ref this.V4L), + 1); + + Vector256 r1 = Avx.InsertVector128( + Unsafe.As>(ref this.V1L).ToVector256(), + Unsafe.As>(ref this.V5L), + 1); + + Vector256 r2 = Avx.InsertVector128( + Unsafe.As>(ref this.V2L).ToVector256(), + Unsafe.As>(ref this.V6L), + 1); + + Vector256 r3 = Avx.InsertVector128( + Unsafe.As>(ref this.V3L).ToVector256(), + Unsafe.As>(ref this.V7L), + 1); + + Vector256 r4 = Avx.InsertVector128( + Unsafe.As>(ref this.V0R).ToVector256(), + Unsafe.As>(ref this.V4R), + 1); + + Vector256 r5 = Avx.InsertVector128( + Unsafe.As>(ref this.V1R).ToVector256(), + Unsafe.As>(ref this.V5R), + 1); + + Vector256 r6 = Avx.InsertVector128( + Unsafe.As>(ref this.V2R).ToVector256(), + Unsafe.As>(ref this.V6R), + 1); + + Vector256 r7 = Avx.InsertVector128( + Unsafe.As>(ref this.V3R).ToVector256(), + Unsafe.As>(ref this.V7R), + 1); + + Vector256 t0 = Avx.UnpackLow(r0, r1); + Vector256 t1 = Avx.UnpackHigh(r0, r1); + Vector256 t2 = Avx.UnpackLow(r2, r3); + Vector256 t3 = Avx.UnpackHigh(r2, r3); + Vector256 t4 = Avx.UnpackLow(r4, r5); + Vector256 t5 = Avx.UnpackHigh(r4, r5); + Vector256 t6 = Avx.UnpackLow(r6, r7); + Vector256 t7 = Avx.UnpackHigh(r6, r7); + + Vector256 v = Avx.Shuffle(t0, t2, 0x4E); + r0 = Avx.Blend(t0, v, 0xCC); + r1 = Avx.Blend(t2, v, 0x33); + + v = Avx.Shuffle(t1, t3, 0x4E); + r2 = Avx.Blend(t1, v, 0xCC); + r3 = Avx.Blend(t3, v, 0x33); + + v = Avx.Shuffle(t4, t6, 0x4E); + r4 = Avx.Blend(t4, v, 0xCC); + r5 = Avx.Blend(t6, v, 0x33); + + v = Avx.Shuffle(t5, t7, 0x4E); + r6 = Avx.Blend(t5, v, 0xCC); + r7 = Avx.Blend(t7, v, 0x33); + + Unsafe.As>(ref d.V0L) = r0; + Unsafe.As>(ref d.V1L) = r1; + Unsafe.As>(ref d.V2L) = r2; + Unsafe.As>(ref d.V3L) = r3; + Unsafe.As>(ref d.V4L) = r4; + Unsafe.As>(ref d.V5L) = r5; + Unsafe.As>(ref d.V6L) = r6; + Unsafe.As>(ref d.V7L) = r7; +#endif } #endif } From 24641e74cd52b110fe8047950ec3e938150eb2af Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 7 Oct 2020 20:38:32 +0100 Subject: [PATCH 003/104] Update tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs Co-authored-by: Anton Firszov --- .../Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs index d21e848357..ae1b23df92 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs @@ -10,7 +10,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations { private static readonly Block8x8F Source = Create8x8FloatData(); - [Benchmark] + [Benchmark(Baseline=true)] public void TransposeIntoVector4() { var dest = default(Block8x8F); From ce74b9e820f931536397611731de9f47b0651c5e Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 7 Oct 2020 22:23:37 +0100 Subject: [PATCH 004/104] Use interleaving to prevent stack spills --- .../Formats/Jpeg/Components/Block8x8F.cs | 92 +++---------------- 1 file changed, 15 insertions(+), 77 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index 683308e354..547e116230 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -628,57 +628,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components [MethodImpl(InliningOptions.ShortMethod)] public void TransposeIntoAvx(ref Block8x8F d) { -#if avxvariant1 - Vector256 r0 = Unsafe.As>(ref this.V0L); - Vector256 r1 = Unsafe.As>(ref this.V1L); - Vector256 r2 = Unsafe.As>(ref this.V2L); - Vector256 r3 = Unsafe.As>(ref this.V3L); - Vector256 r4 = Unsafe.As>(ref this.V4L); - Vector256 r5 = Unsafe.As>(ref this.V5L); - Vector256 r6 = Unsafe.As>(ref this.V6L); - Vector256 r7 = Unsafe.As>(ref this.V7L); - - Vector256 t0 = Avx.UnpackLow(r0, r1); - Vector256 t1 = Avx.UnpackHigh(r0, r1); - Vector256 t2 = Avx.UnpackLow(r2, r3); - Vector256 t3 = Avx.UnpackHigh(r2, r3); - Vector256 t4 = Avx.UnpackLow(r4, r5); - Vector256 t5 = Avx.UnpackHigh(r4, r5); - Vector256 t6 = Avx.UnpackLow(r6, r7); - Vector256 t7 = Avx.UnpackHigh(r6, r7); - - // Controls generated via _MM_SHUFFLE - const byte Control1_0_1_0 = 0b1000100; - const byte Control3_2_3_2 = 0b11101110; - r0 = Avx.Shuffle(t0, t2, Control1_0_1_0); - r1 = Avx.Shuffle(t0, t2, Control3_2_3_2); - r2 = Avx.Shuffle(t1, t3, Control1_0_1_0); - r3 = Avx.Shuffle(t1, t3, Control3_2_3_2); - r4 = Avx.Shuffle(t4, t6, Control1_0_1_0); - r5 = Avx.Shuffle(t4, t6, Control3_2_3_2); - r6 = Avx.Shuffle(t5, t7, Control1_0_1_0); - r7 = Avx.Shuffle(t5, t7, Control3_2_3_2); - - const byte Control0x20 = 0b100000; - const byte Control0x31 = 0b110001; - t0 = Avx.Permute2x128(r0, r4, Control0x20); - t1 = Avx.Permute2x128(r1, r5, Control0x20); - t2 = Avx.Permute2x128(r2, r6, Control0x20); - t3 = Avx.Permute2x128(r3, r7, Control0x20); - t4 = Avx.Permute2x128(r0, r4, Control0x31); - t5 = Avx.Permute2x128(r1, r5, Control0x31); - t6 = Avx.Permute2x128(r2, r6, Control0x31); - t7 = Avx.Permute2x128(r3, r7, Control0x31); - - Unsafe.As>(ref d.V0L) = t0; - Unsafe.As>(ref d.V1L) = t1; - Unsafe.As>(ref d.V2L) = t2; - Unsafe.As>(ref d.V3L) = t3; - Unsafe.As>(ref d.V4L) = t4; - Unsafe.As>(ref d.V5L) = t5; - Unsafe.As>(ref d.V6L) = t6; - Unsafe.As>(ref d.V7L) = t7; -#else Vector256 r0 = Avx.InsertVector128( Unsafe.As>(ref this.V0L).ToVector256(), Unsafe.As>(ref this.V4L), @@ -720,39 +669,28 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components 1); Vector256 t0 = Avx.UnpackLow(r0, r1); - Vector256 t1 = Avx.UnpackHigh(r0, r1); Vector256 t2 = Avx.UnpackLow(r2, r3); - Vector256 t3 = Avx.UnpackHigh(r2, r3); + Vector256 v = Avx.Shuffle(t0, t2, 0x4E); + Unsafe.As>(ref d.V0L) = Avx.Blend(t0, v, 0xCC); + Unsafe.As>(ref d.V1L) = Avx.Blend(t2, v, 0x33); + Vector256 t4 = Avx.UnpackLow(r4, r5); - Vector256 t5 = Avx.UnpackHigh(r4, r5); Vector256 t6 = Avx.UnpackLow(r6, r7); - Vector256 t7 = Avx.UnpackHigh(r6, r7); - - Vector256 v = Avx.Shuffle(t0, t2, 0x4E); - r0 = Avx.Blend(t0, v, 0xCC); - r1 = Avx.Blend(t2, v, 0x33); + v = Avx.Shuffle(t4, t6, 0x4E); + Unsafe.As>(ref d.V4L) = Avx.Blend(t4, v, 0xCC); + Unsafe.As>(ref d.V5L) = Avx.Blend(t6, v, 0x33); + Vector256 t1 = Avx.UnpackHigh(r0, r1); + Vector256 t3 = Avx.UnpackHigh(r2, r3); v = Avx.Shuffle(t1, t3, 0x4E); - r2 = Avx.Blend(t1, v, 0xCC); - r3 = Avx.Blend(t3, v, 0x33); - - v = Avx.Shuffle(t4, t6, 0x4E); - r4 = Avx.Blend(t4, v, 0xCC); - r5 = Avx.Blend(t6, v, 0x33); + Unsafe.As>(ref d.V2L) = Avx.Blend(t1, v, 0xCC); + Unsafe.As>(ref d.V3L) = Avx.Blend(t3, v, 0x33); + Vector256 t5 = Avx.UnpackHigh(r4, r5); + Vector256 t7 = Avx.UnpackHigh(r6, r7); v = Avx.Shuffle(t5, t7, 0x4E); - r6 = Avx.Blend(t5, v, 0xCC); - r7 = Avx.Blend(t7, v, 0x33); - - Unsafe.As>(ref d.V0L) = r0; - Unsafe.As>(ref d.V1L) = r1; - Unsafe.As>(ref d.V2L) = r2; - Unsafe.As>(ref d.V3L) = r3; - Unsafe.As>(ref d.V4L) = r4; - Unsafe.As>(ref d.V5L) = r5; - Unsafe.As>(ref d.V6L) = r6; - Unsafe.As>(ref d.V7L) = r7; -#endif + Unsafe.As>(ref d.V6L) = Avx.Blend(t5, v, 0xCC); + Unsafe.As>(ref d.V7L) = Avx.Blend(t7, v, 0x33); } #endif } From a62347e761a0ef0393934c7f2483608266253239 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 7 Oct 2020 23:10:53 +0100 Subject: [PATCH 005/104] Update Block8x8FTests.cs --- tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs index 050b1889ad..73a68063c0 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs @@ -163,7 +163,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg } [Fact] - public void TransposeInto() + public void TransposeIntoFallback() { float[] expected = Create8x8FloatData(); ReferenceImplementations.Transpose8x8(expected); From 0c34ce36c6708f27abbdc0c0d2667870269fbfb3 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 9 Oct 2020 17:02:13 +0100 Subject: [PATCH 006/104] First pass at HW feature tests Designed to fail to ensure RemoteExecutor is running. --- .../Formats/Jpg/Block8x8FTests.cs | 30 ++- .../FeatureTesting/FeatureTestRunner.cs | 232 ++++++++++++++++++ 2 files changed, 261 insertions(+), 1 deletion(-) create mode 100644 tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs index 73a68063c0..a09472b46f 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs @@ -8,7 +8,7 @@ using System.Diagnostics; using SixLabors.ImageSharp.Formats.Jpeg.Components; using SixLabors.ImageSharp.Tests.Formats.Jpg.Utils; - +using SixLabors.ImageSharp.Tests.TestUtilities; using Xunit; using Xunit.Abstractions; @@ -200,6 +200,34 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg } #endif + [Fact] + public void TransposeInto() + { + static void RunTest() + { + // Just testing this fails in CI. RemoteExecutor is not working on my machine. + Assert.True(false); + + float[] expected = Create8x8FloatData(); + ReferenceImplementations.Transpose8x8(expected); + + var source = default(Block8x8F); + source.LoadFrom(Create8x8FloatData()); + + var dest = default(Block8x8F); + source.TransposeInto(ref dest); + + float[] actual = new float[64]; + dest.ScaledCopyTo(actual); + + Assert.Equal(expected, actual); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX); + } + private class BufferHolder { public Block8x8F Buffer; diff --git a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs new file mode 100644 index 0000000000..8b5ed8d48b --- /dev/null +++ b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs @@ -0,0 +1,232 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Numerics; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics.X86; +#endif +using Microsoft.DotNet.RemoteExecutor; +using Xunit; +using Xunit.Abstractions; + +namespace SixLabors.ImageSharp.Tests.TestUtilities +{ + /// + /// Allows the testing against specific feature sets. + /// + public static class FeatureTestRunner + { + private static readonly char[] SplitChars = new[] { ',', ' ' }; + + /// + /// Allows the deserialization of parameters passed to the feature test. + /// + /// + /// This is required because does not allow + /// marshalling of fields so we cannot pass a wrapped + /// allowing automatic deserialization. + /// + /// + /// + /// The type to deserialize to. + /// The string value to deserialize. + /// The value. + public static T Deserialize(string value) + where T : IXunitSerializable + => BasicSerializer.Deserialize(value); + + // TODO: Write runner test and use this. + private static void AssertHwIntrinsicsFeatureDisabled(HwIntrinsics intrinsics) + { + switch (intrinsics) + { + case HwIntrinsics.DisableSIMD: + Assert.False(Vector.IsHardwareAccelerated); + break; +#if SUPPORTS_RUNTIME_INTRINSICS + case HwIntrinsics.DisableHWIntrinsic: + Assert.False(Vector.IsHardwareAccelerated); + break; + case HwIntrinsics.DisableSSE: + Assert.False(Sse.IsSupported); + break; + case HwIntrinsics.DisableSSE2: + Assert.False(Sse2.IsSupported); + break; + case HwIntrinsics.DisableAES: + Assert.False(Aes.IsSupported); + break; + case HwIntrinsics.DisablePCLMULQDQ: + Assert.False(Pclmulqdq.IsSupported); + break; + case HwIntrinsics.DisableSSE3: + Assert.False(Sse3.IsSupported); + break; + case HwIntrinsics.DisableSSSE3: + Assert.False(Ssse3.IsSupported); + break; + case HwIntrinsics.DisableSSE41: + Assert.False(Sse41.IsSupported); + break; + case HwIntrinsics.DisableSSE42: + Assert.False(Sse42.IsSupported); + break; + case HwIntrinsics.DisablePOPCNT: + Assert.False(Popcnt.IsSupported); + break; + case HwIntrinsics.DisableAVX: + Assert.False(Avx.IsSupported); + break; + case HwIntrinsics.DisableFMA: + Assert.False(Fma.IsSupported); + break; + case HwIntrinsics.DisableAVX2: + Assert.False(Avx2.IsSupported); + break; + case HwIntrinsics.DisableBMI1: + Assert.False(Bmi1.IsSupported); + break; + case HwIntrinsics.DisableBMI2: + Assert.False(Bmi2.IsSupported); + break; + case HwIntrinsics.DisableLZCNT: + Assert.False(Lzcnt.IsSupported); + break; +#endif + } + } + + /// + /// Runs the given test within an environment + /// where the given features. + /// + /// The test action to run. + /// The intrinsics features. + public static void RunWithHwIntrinsicsFeature( + Action action, + HwIntrinsics intrinsics) + { + if (!RemoteExecutor.IsSupported) + { + return; + } + + foreach (string intrinsic in intrinsics.ToFeatureCollection()) + { + var processStartInfo = new ProcessStartInfo(); + if (intrinsic != nameof(HwIntrinsics.AllowAll)) + { + processStartInfo.Environment[$"COMPlus_{intrinsic}"] = "0"; + } + + RemoteExecutor.Invoke( + action, + new RemoteInvokeOptions + { + StartInfo = processStartInfo + }) + .Dispose(); + } + } + + /// + /// Runs the given test within an environment + /// where the given features. + /// + /// The test action to run. + /// The intrinsics features. + /// The value to pass as a parameter to the test action. + public static void RunWithHwIntrinsicsFeature( + Action action, + HwIntrinsics intrinsics, + T serializable) + where T : IXunitSerializable + { + if (!RemoteExecutor.IsSupported) + { + return; + } + + foreach (string intrinsic in intrinsics.ToFeatureCollection()) + { + var processStartInfo = new ProcessStartInfo(); + if (intrinsic != nameof(HwIntrinsics.AllowAll)) + { + processStartInfo.Environment[$"COMPlus_Enable{intrinsic}"] = "0"; + } + + RemoteExecutor.Invoke( + action, + BasicSerializer.Serialize(serializable), + new RemoteInvokeOptions + { + StartInfo = processStartInfo + }) + .Dispose(); + } + } + + private static IEnumerable ToFeatureCollection(this HwIntrinsics intrinsics) + { + // Loop through and translate the given values into COMPlus equivaluents + var features = new List(); + var split = intrinsics.ToString("G").Split(SplitChars, StringSplitOptions.RemoveEmptyEntries).ToArray(); + foreach (string intrinsic in split) + { + switch (intrinsic) + { + case nameof(HwIntrinsics.DisableSIMD): + features.Add("FeatureSIMD"); + break; + + case nameof(HwIntrinsics.AllowAll): + + // Not a COMPlus value. We filter in calling method. + features.Add(nameof(HwIntrinsics.AllowAll)); + break; + + default: + features.Add(intrinsic.Replace("Disable", "Enable")); + break; + } + } + + return features; + } + } + + /// + /// See + /// + /// ends up impacting all SIMD support(including System.Numerics) + /// but not things like , , and . + /// + /// + [Flags] + public enum HwIntrinsics + { + // Use flags so we can pass multiple values without using params. + DisableSIMD = 0, + DisableHWIntrinsic = 1 << 0, + DisableSSE = 1 << 1, + DisableSSE2 = 1 << 2, + DisableAES = 1 << 3, + DisablePCLMULQDQ = 1 << 4, + DisableSSE3 = 1 << 5, + DisableSSSE3 = 1 << 6, + DisableSSE41 = 1 << 7, + DisableSSE42 = 1 << 8, + DisablePOPCNT = 1 << 9, + DisableAVX = 1 << 10, + DisableFMA = 1 << 11, + DisableAVX2 = 1 << 12, + DisableBMI1 = 1 << 13, + DisableBMI2 = 1 << 14, + DisableLZCNT = 1 << 15, + AllowAll = 1 << 16 + } +} From a15cf770f9dc5c40c3c761a0bec8611027e10d3f Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 9 Oct 2020 17:09:02 +0100 Subject: [PATCH 007/104] Fix build --- tests/Directory.Build.targets | 4 ++-- .../TestUtilities/FeatureTesting/FeatureTestRunner.cs | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/Directory.Build.targets b/tests/Directory.Build.targets index e9e93a855f..4edc9fdff3 100644 --- a/tests/Directory.Build.targets +++ b/tests/Directory.Build.targets @@ -30,8 +30,8 @@ - - + + diff --git a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs index 8b5ed8d48b..172c03d5a0 100644 --- a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs +++ b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs @@ -174,8 +174,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities { // Loop through and translate the given values into COMPlus equivaluents var features = new List(); - var split = intrinsics.ToString("G").Split(SplitChars, StringSplitOptions.RemoveEmptyEntries).ToArray(); - foreach (string intrinsic in split) + foreach (string intrinsic in intrinsics.ToString("G").Split(SplitChars, StringSplitOptions.RemoveEmptyEntries)) { switch (intrinsic) { From 10250ffc34f638b81099db36b5a1d6760ba50f73 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 9 Oct 2020 17:16:30 +0100 Subject: [PATCH 008/104] Test windows only --- .github/workflows/build-and-test.yml | 32 ++++++++++++++-------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 0e093a8347..8fc2dd2bc9 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -14,26 +14,26 @@ jobs: strategy: matrix: options: - - os: ubuntu-latest - framework: netcoreapp3.1 - runtime: -x64 - codecov: false - - os: windows-latest - framework: netcoreapp3.1 - runtime: -x64 - codecov: true + # - os: ubuntu-latest + # framework: netcoreapp3.1 + # runtime: -x64 + # codecov: false + # - os: windows-latest + # framework: netcoreapp3.1 + # runtime: -x64 + # codecov: true - os: windows-latest framework: netcoreapp2.1 runtime: -x64 codecov: false - - os: windows-latest - framework: net472 - runtime: -x64 - codecov: false - - os: windows-latest - framework: net472 - runtime: -x86 - codecov: false + # - os: windows-latest + # framework: net472 + # runtime: -x64 + # codecov: false + # - os: windows-latest + # framework: net472 + # runtime: -x86 + # codecov: false runs-on: ${{matrix.options.os}} if: "!contains(github.event.head_commit.message, '[skip ci]')" From 37b0585febdce8465211b15f2786f22afadfb259 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 9 Oct 2020 18:15:59 +0100 Subject: [PATCH 009/104] Use single test, enable runners --- .github/workflows/build-and-test.yml | 32 +++++++------- .../Formats/Jpg/Block8x8FTests.cs | 42 ------------------- 2 files changed, 16 insertions(+), 58 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 8fc2dd2bc9..0e093a8347 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -14,26 +14,26 @@ jobs: strategy: matrix: options: - # - os: ubuntu-latest - # framework: netcoreapp3.1 - # runtime: -x64 - # codecov: false - # - os: windows-latest - # framework: netcoreapp3.1 - # runtime: -x64 - # codecov: true + - os: ubuntu-latest + framework: netcoreapp3.1 + runtime: -x64 + codecov: false + - os: windows-latest + framework: netcoreapp3.1 + runtime: -x64 + codecov: true - os: windows-latest framework: netcoreapp2.1 runtime: -x64 codecov: false - # - os: windows-latest - # framework: net472 - # runtime: -x64 - # codecov: false - # - os: windows-latest - # framework: net472 - # runtime: -x86 - # codecov: false + - os: windows-latest + framework: net472 + runtime: -x64 + codecov: false + - os: windows-latest + framework: net472 + runtime: -x86 + codecov: false runs-on: ${{matrix.options.os}} if: "!contains(github.event.head_commit.message, '[skip ci]')" diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs index a09472b46f..5482380885 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs @@ -5,7 +5,6 @@ // #define BENCHMARKING using System; using System.Diagnostics; - using SixLabors.ImageSharp.Formats.Jpeg.Components; using SixLabors.ImageSharp.Tests.Formats.Jpg.Utils; using SixLabors.ImageSharp.Tests.TestUtilities; @@ -162,52 +161,11 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg // PrintLinearData((Span)mirror); } - [Fact] - public void TransposeIntoFallback() - { - float[] expected = Create8x8FloatData(); - ReferenceImplementations.Transpose8x8(expected); - - var source = default(Block8x8F); - source.LoadFrom(Create8x8FloatData()); - - var dest = default(Block8x8F); - source.TransposeIntoFallback(ref dest); - - float[] actual = new float[64]; - dest.ScaledCopyTo(actual); - - Assert.Equal(expected, actual); - } - -#if SUPPORTS_RUNTIME_INTRINSICS - [Fact] - public void TransposeIntoAvx() - { - float[] expected = Create8x8FloatData(); - ReferenceImplementations.Transpose8x8(expected); - - var source = default(Block8x8F); - source.LoadFrom(Create8x8FloatData()); - - var dest = default(Block8x8F); - source.TransposeIntoAvx(ref dest); - - float[] actual = new float[64]; - dest.ScaledCopyTo(actual); - - Assert.Equal(expected, actual); - } -#endif - [Fact] public void TransposeInto() { static void RunTest() { - // Just testing this fails in CI. RemoteExecutor is not working on my machine. - Assert.True(false); - float[] expected = Create8x8FloatData(); ReferenceImplementations.Transpose8x8(expected); From a7626e16dc36bd01eabe66c0ad3a41376de25b18 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 12 Oct 2020 14:13:05 +0100 Subject: [PATCH 010/104] Revert to 8e5a59f --- tests/Directory.Build.targets | 4 +- .../Formats/Jpg/Block8x8FTests.cs | 48 ++-- .../FeatureTesting/FeatureTestRunner.cs | 231 ------------------ 3 files changed, 33 insertions(+), 250 deletions(-) delete mode 100644 tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs diff --git a/tests/Directory.Build.targets b/tests/Directory.Build.targets index 4edc9fdff3..e9e93a855f 100644 --- a/tests/Directory.Build.targets +++ b/tests/Directory.Build.targets @@ -30,8 +30,8 @@ - - + + diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs index 5482380885..73a68063c0 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs @@ -5,9 +5,10 @@ // #define BENCHMARKING using System; using System.Diagnostics; + using SixLabors.ImageSharp.Formats.Jpeg.Components; using SixLabors.ImageSharp.Tests.Formats.Jpg.Utils; -using SixLabors.ImageSharp.Tests.TestUtilities; + using Xunit; using Xunit.Abstractions; @@ -162,29 +163,42 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg } [Fact] - public void TransposeInto() + public void TransposeIntoFallback() { - static void RunTest() - { - float[] expected = Create8x8FloatData(); - ReferenceImplementations.Transpose8x8(expected); + float[] expected = Create8x8FloatData(); + ReferenceImplementations.Transpose8x8(expected); - var source = default(Block8x8F); - source.LoadFrom(Create8x8FloatData()); + var source = default(Block8x8F); + source.LoadFrom(Create8x8FloatData()); - var dest = default(Block8x8F); - source.TransposeInto(ref dest); + var dest = default(Block8x8F); + source.TransposeIntoFallback(ref dest); - float[] actual = new float[64]; - dest.ScaledCopyTo(actual); + float[] actual = new float[64]; + dest.ScaledCopyTo(actual); - Assert.Equal(expected, actual); - } + Assert.Equal(expected, actual); + } - FeatureTestRunner.RunWithHwIntrinsicsFeature( - RunTest, - HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX); +#if SUPPORTS_RUNTIME_INTRINSICS + [Fact] + public void TransposeIntoAvx() + { + float[] expected = Create8x8FloatData(); + ReferenceImplementations.Transpose8x8(expected); + + var source = default(Block8x8F); + source.LoadFrom(Create8x8FloatData()); + + var dest = default(Block8x8F); + source.TransposeIntoAvx(ref dest); + + float[] actual = new float[64]; + dest.ScaledCopyTo(actual); + + Assert.Equal(expected, actual); } +#endif private class BufferHolder { diff --git a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs deleted file mode 100644 index 172c03d5a0..0000000000 --- a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs +++ /dev/null @@ -1,231 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -using System; -using System.Collections.Generic; -using System.Diagnostics; -using System.Linq; -using System.Numerics; -#if SUPPORTS_RUNTIME_INTRINSICS -using System.Runtime.Intrinsics.X86; -#endif -using Microsoft.DotNet.RemoteExecutor; -using Xunit; -using Xunit.Abstractions; - -namespace SixLabors.ImageSharp.Tests.TestUtilities -{ - /// - /// Allows the testing against specific feature sets. - /// - public static class FeatureTestRunner - { - private static readonly char[] SplitChars = new[] { ',', ' ' }; - - /// - /// Allows the deserialization of parameters passed to the feature test. - /// - /// - /// This is required because does not allow - /// marshalling of fields so we cannot pass a wrapped - /// allowing automatic deserialization. - /// - /// - /// - /// The type to deserialize to. - /// The string value to deserialize. - /// The value. - public static T Deserialize(string value) - where T : IXunitSerializable - => BasicSerializer.Deserialize(value); - - // TODO: Write runner test and use this. - private static void AssertHwIntrinsicsFeatureDisabled(HwIntrinsics intrinsics) - { - switch (intrinsics) - { - case HwIntrinsics.DisableSIMD: - Assert.False(Vector.IsHardwareAccelerated); - break; -#if SUPPORTS_RUNTIME_INTRINSICS - case HwIntrinsics.DisableHWIntrinsic: - Assert.False(Vector.IsHardwareAccelerated); - break; - case HwIntrinsics.DisableSSE: - Assert.False(Sse.IsSupported); - break; - case HwIntrinsics.DisableSSE2: - Assert.False(Sse2.IsSupported); - break; - case HwIntrinsics.DisableAES: - Assert.False(Aes.IsSupported); - break; - case HwIntrinsics.DisablePCLMULQDQ: - Assert.False(Pclmulqdq.IsSupported); - break; - case HwIntrinsics.DisableSSE3: - Assert.False(Sse3.IsSupported); - break; - case HwIntrinsics.DisableSSSE3: - Assert.False(Ssse3.IsSupported); - break; - case HwIntrinsics.DisableSSE41: - Assert.False(Sse41.IsSupported); - break; - case HwIntrinsics.DisableSSE42: - Assert.False(Sse42.IsSupported); - break; - case HwIntrinsics.DisablePOPCNT: - Assert.False(Popcnt.IsSupported); - break; - case HwIntrinsics.DisableAVX: - Assert.False(Avx.IsSupported); - break; - case HwIntrinsics.DisableFMA: - Assert.False(Fma.IsSupported); - break; - case HwIntrinsics.DisableAVX2: - Assert.False(Avx2.IsSupported); - break; - case HwIntrinsics.DisableBMI1: - Assert.False(Bmi1.IsSupported); - break; - case HwIntrinsics.DisableBMI2: - Assert.False(Bmi2.IsSupported); - break; - case HwIntrinsics.DisableLZCNT: - Assert.False(Lzcnt.IsSupported); - break; -#endif - } - } - - /// - /// Runs the given test within an environment - /// where the given features. - /// - /// The test action to run. - /// The intrinsics features. - public static void RunWithHwIntrinsicsFeature( - Action action, - HwIntrinsics intrinsics) - { - if (!RemoteExecutor.IsSupported) - { - return; - } - - foreach (string intrinsic in intrinsics.ToFeatureCollection()) - { - var processStartInfo = new ProcessStartInfo(); - if (intrinsic != nameof(HwIntrinsics.AllowAll)) - { - processStartInfo.Environment[$"COMPlus_{intrinsic}"] = "0"; - } - - RemoteExecutor.Invoke( - action, - new RemoteInvokeOptions - { - StartInfo = processStartInfo - }) - .Dispose(); - } - } - - /// - /// Runs the given test within an environment - /// where the given features. - /// - /// The test action to run. - /// The intrinsics features. - /// The value to pass as a parameter to the test action. - public static void RunWithHwIntrinsicsFeature( - Action action, - HwIntrinsics intrinsics, - T serializable) - where T : IXunitSerializable - { - if (!RemoteExecutor.IsSupported) - { - return; - } - - foreach (string intrinsic in intrinsics.ToFeatureCollection()) - { - var processStartInfo = new ProcessStartInfo(); - if (intrinsic != nameof(HwIntrinsics.AllowAll)) - { - processStartInfo.Environment[$"COMPlus_Enable{intrinsic}"] = "0"; - } - - RemoteExecutor.Invoke( - action, - BasicSerializer.Serialize(serializable), - new RemoteInvokeOptions - { - StartInfo = processStartInfo - }) - .Dispose(); - } - } - - private static IEnumerable ToFeatureCollection(this HwIntrinsics intrinsics) - { - // Loop through and translate the given values into COMPlus equivaluents - var features = new List(); - foreach (string intrinsic in intrinsics.ToString("G").Split(SplitChars, StringSplitOptions.RemoveEmptyEntries)) - { - switch (intrinsic) - { - case nameof(HwIntrinsics.DisableSIMD): - features.Add("FeatureSIMD"); - break; - - case nameof(HwIntrinsics.AllowAll): - - // Not a COMPlus value. We filter in calling method. - features.Add(nameof(HwIntrinsics.AllowAll)); - break; - - default: - features.Add(intrinsic.Replace("Disable", "Enable")); - break; - } - } - - return features; - } - } - - /// - /// See - /// - /// ends up impacting all SIMD support(including System.Numerics) - /// but not things like , , and . - /// - /// - [Flags] - public enum HwIntrinsics - { - // Use flags so we can pass multiple values without using params. - DisableSIMD = 0, - DisableHWIntrinsic = 1 << 0, - DisableSSE = 1 << 1, - DisableSSE2 = 1 << 2, - DisableAES = 1 << 3, - DisablePCLMULQDQ = 1 << 4, - DisableSSE3 = 1 << 5, - DisableSSSE3 = 1 << 6, - DisableSSE41 = 1 << 7, - DisableSSE42 = 1 << 8, - DisablePOPCNT = 1 << 9, - DisableAVX = 1 << 10, - DisableFMA = 1 << 11, - DisableAVX2 = 1 << 12, - DisableBMI1 = 1 << 13, - DisableBMI2 = 1 << 14, - DisableLZCNT = 1 << 15, - AllowAll = 1 << 16 - } -} From f08b68247d09c9cafe3cacacaa8de6927fca86f3 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 12 Oct 2020 14:46:04 +0100 Subject: [PATCH 011/104] Use Ubuntu for coverage. Touch #1376 --- .github/workflows/build-and-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 0e093a8347..1422606a6d 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -18,7 +18,7 @@ jobs: framework: netcoreapp3.1 runtime: -x64 codecov: false - - os: windows-latest + - os: ubuntu-latest framework: netcoreapp3.1 runtime: -x64 codecov: true From 90624b71fbb7b4bd2e84e38a5465f35db311826f Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 12 Oct 2020 14:57:54 +0100 Subject: [PATCH 012/104] Enable windows netcore 3.1 --- .github/workflows/build-and-test.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 1422606a6d..c8f3997946 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -17,11 +17,11 @@ jobs: - os: ubuntu-latest framework: netcoreapp3.1 runtime: -x64 - codecov: false - - os: ubuntu-latest + codecov: true + - os: windows-latest framework: netcoreapp3.1 runtime: -x64 - codecov: true + codecov: false - os: windows-latest framework: netcoreapp2.1 runtime: -x64 From f1f4d49c01c00216a356f8033878c4a7f9c7517b Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 12 Oct 2020 15:31:51 +0100 Subject: [PATCH 013/104] Try bumping SDK --- tests/Directory.Build.targets | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/Directory.Build.targets b/tests/Directory.Build.targets index e9e93a855f..acdf684328 100644 --- a/tests/Directory.Build.targets +++ b/tests/Directory.Build.targets @@ -31,7 +31,7 @@ - + From e7988d21d248ae0984d08b14bfb6103461158d0a Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 12 Oct 2020 16:48:06 +0100 Subject: [PATCH 014/104] Update deterministic workaround --- Directory.Build.targets | 2 +- src/Directory.Build.targets | 25 +++++++++++++++++-------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/Directory.Build.targets b/Directory.Build.targets index 4e7ab9e6b7..5d27adc652 100644 --- a/Directory.Build.targets +++ b/Directory.Build.targets @@ -18,7 +18,7 @@ - + diff --git a/src/Directory.Build.targets b/src/Directory.Build.targets index d1875262d3..9b8be05b56 100644 --- a/src/Directory.Build.targets +++ b/src/Directory.Build.targets @@ -21,16 +21,25 @@ - + + $([System.IO.Path]::Combine('$(IntermediateOutputPath)','$(TargetFrameworkMoniker).AssemblyAttributes$(DefaultLanguageSourceExtension)')) + + + + + + + + + DependsOnTargets="InitializeSourceRootMappedPaths" + Returns="@(_LocalTopLevelSourceRoot)" + Condition="'$(DeterministicSourcePaths)' == 'true'"> <_LocalTopLevelSourceRoot Include="@(SourceRoot)" Condition="'%(SourceRoot.NestedRoot)' == ''"/> - + false @@ -62,7 +71,7 @@ - + @@ -74,7 +83,7 @@ SkipUnchangedFiles = "true" DestinationFolder="..\..\" /> - + - + From 9bbf90837306b84bbd432e6cba632fcd0f7fb40c Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 12 Oct 2020 17:03:39 +0100 Subject: [PATCH 015/104] Revert SDK bump --- Directory.Build.targets | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Directory.Build.targets b/Directory.Build.targets index 5d27adc652..4e7ab9e6b7 100644 --- a/Directory.Build.targets +++ b/Directory.Build.targets @@ -18,7 +18,7 @@ - + From 8d3f5e79624041fc87ba04ad2c5f1a3a30ffe6a8 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 12 Oct 2020 18:06:51 +0100 Subject: [PATCH 016/104] Use coverlet nightlies --- Directory.Build.props | 1 + tests/Directory.Build.targets | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Directory.Build.props b/Directory.Build.props index 0f9c5bdde2..c4610d0ed3 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -120,6 +120,7 @@ https://api.nuget.org/v3/index.json; https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-eng/nuget/v3/index.json; + https://www.myget.org/F/coverlet-dev/api/v3/index.json; true $(MSBuildThisFileDirectory)shared-infrastructure/SixLabors.snk diff --git a/tests/Directory.Build.targets b/tests/Directory.Build.targets index acdf684328..9ee4e2a2e2 100644 --- a/tests/Directory.Build.targets +++ b/tests/Directory.Build.targets @@ -28,7 +28,7 @@ - + From f71d7a9cb30d0124e7949f44c88bc3022b6a1ae5 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 12 Oct 2020 19:07:42 +0100 Subject: [PATCH 017/104] Update Directory.Build.targets --- tests/Directory.Build.targets | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/Directory.Build.targets b/tests/Directory.Build.targets index 9ee4e2a2e2..335f3d106c 100644 --- a/tests/Directory.Build.targets +++ b/tests/Directory.Build.targets @@ -31,7 +31,7 @@ - + From 87c7d3f89c5a563befde2b759b8e8c966ba75562 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 12 Oct 2020 23:06:32 +0100 Subject: [PATCH 018/104] Update test 3rd party libraries --- tests/Directory.Build.targets | 6 +++--- tests/ImageSharp.Tests/Formats/Tga/TgaTestUtils.cs | 6 +++--- .../ReferenceCodecs/MagickReferenceDecoder.cs | 10 +++++----- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/Directory.Build.targets b/tests/Directory.Build.targets index 335f3d106c..b4e815eb42 100644 --- a/tests/Directory.Build.targets +++ b/tests/Directory.Build.targets @@ -29,12 +29,12 @@ - + - + - + diff --git a/tests/ImageSharp.Tests/Formats/Tga/TgaTestUtils.cs b/tests/ImageSharp.Tests/Formats/Tga/TgaTestUtils.cs index 0f76d99317..58ed31e610 100644 --- a/tests/ImageSharp.Tests/Formats/Tga/TgaTestUtils.cs +++ b/tests/ImageSharp.Tests/Formats/Tga/TgaTestUtils.cs @@ -18,7 +18,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Tga Image image, bool useExactComparer = true, float compareTolerance = 0.01f) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { string path = TestImageProvider.GetFilePathOrNull(provider); if (path == null) @@ -39,7 +39,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Tga } public static Image DecodeWithMagick(Configuration configuration, FileInfo fileInfo) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { using (var magickImage = new MagickImage(fileInfo)) { @@ -48,7 +48,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Tga Assert.True(result.TryGetSinglePixelSpan(out Span resultPixels)); - using (IPixelCollection pixels = magickImage.GetPixelsUnsafe()) + using (IUnsafePixelCollection pixels = magickImage.GetPixelsUnsafe()) { byte[] data = pixels.ToByteArray(PixelMapping.RGBA); diff --git a/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs b/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs index de8278a33e..bb8407f19b 100644 --- a/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs +++ b/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs @@ -18,7 +18,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs public static MagickReferenceDecoder Instance { get; } = new MagickReferenceDecoder(); private static void FromRgba32Bytes(Configuration configuration, Span rgbaBytes, IMemoryGroup destinationGroup) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { foreach (Memory m in destinationGroup) { @@ -33,7 +33,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs } private static void FromRgba64Bytes(Configuration configuration, Span rgbaBytes, IMemoryGroup destinationGroup) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { foreach (Memory m in destinationGroup) { @@ -48,17 +48,17 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs } public Task> DecodeAsync(Configuration configuration, Stream stream, CancellationToken cancellationToken) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel => Task.FromResult(this.Decode(configuration, stream)); public Image Decode(Configuration configuration, Stream stream) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { using var magickImage = new MagickImage(stream); var result = new Image(configuration, magickImage.Width, magickImage.Height); MemoryGroup resultPixels = result.GetRootFramePixelBuffer().FastMemoryGroup; - using (IPixelCollection pixels = magickImage.GetPixelsUnsafe()) + using (IUnsafePixelCollection pixels = magickImage.GetPixelsUnsafe()) { if (magickImage.Depth == 8) { From 8f94dc29c7a06c3694d6bb18c07a5d0dda81f43c Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 12 Oct 2020 23:13:55 +0100 Subject: [PATCH 019/104] Update Test SDK. --- tests/Directory.Build.targets | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/Directory.Build.targets b/tests/Directory.Build.targets index b4e815eb42..7a2ed70615 100644 --- a/tests/Directory.Build.targets +++ b/tests/Directory.Build.targets @@ -31,7 +31,7 @@ - + From cb87ae166480321a3cf28df5d7411170ddc5b388 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 12 Oct 2020 23:42:15 +0100 Subject: [PATCH 020/104] Tweak decode cancel timings --- tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.cs index 78218aec90..0884215491 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.cs @@ -129,10 +129,10 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg [Theory] [InlineData(TestImages.Jpeg.Baseline.Jpeg420Small, 0)] [InlineData(TestImages.Jpeg.Issues.ExifGetString750Transform, 1)] - [InlineData(TestImages.Jpeg.Issues.ExifGetString750Transform, 10)] + [InlineData(TestImages.Jpeg.Issues.ExifGetString750Transform, 15)] [InlineData(TestImages.Jpeg.Issues.ExifGetString750Transform, 30)] [InlineData(TestImages.Jpeg.Issues.BadRstProgressive518, 1)] - [InlineData(TestImages.Jpeg.Issues.BadRstProgressive518, 10)] + [InlineData(TestImages.Jpeg.Issues.BadRstProgressive518, 15)] [InlineData(TestImages.Jpeg.Issues.BadRstProgressive518, 30)] public async Task Decode_IsCancellable(string fileName, int cancellationDelayMs) { From ad21822ff9a63ff0a3267b58b2b7555ca534c4ea Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 12 Oct 2020 23:42:22 +0100 Subject: [PATCH 021/104] Bump src deps. --- Directory.Build.targets | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/Directory.Build.targets b/Directory.Build.targets index 4e7ab9e6b7..91d5a5d1fd 100644 --- a/Directory.Build.targets +++ b/Directory.Build.targets @@ -24,16 +24,12 @@ - + - - + From e356bdecac2d344d010797fe19f38a4ae01226bb Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Tue, 13 Oct 2020 00:19:23 +0100 Subject: [PATCH 022/104] Fix missing dll.config file The file only exists if there are binding redirects. --- tests/ImageSharp.Tests/TestUtilities/TestEnvironment.cs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.cs b/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.cs index 1375b5763e..48728faf0e 100644 --- a/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.cs +++ b/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.cs @@ -170,7 +170,10 @@ namespace SixLabors.ImageSharp.Tests } string testProjectConfigPath = TestAssemblyFile.FullName + ".config"; - File.Copy(testProjectConfigPath, remoteExecutorConfigPath); + if (File.Exists(testProjectConfigPath)) + { + File.Copy(testProjectConfigPath, remoteExecutorConfigPath); + } if (Is64BitProcess) { From 31f7480c29c42d649606fa2db2b6c781dfe327fb Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Tue, 13 Oct 2020 00:19:35 +0100 Subject: [PATCH 023/104] Update xunit runner --- tests/Directory.Build.targets | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/Directory.Build.targets b/tests/Directory.Build.targets index 7a2ed70615..f9efbf79fe 100644 --- a/tests/Directory.Build.targets +++ b/tests/Directory.Build.targets @@ -37,7 +37,7 @@ - + From c15a552f9aaf2019783f453c83081d5adcd4bd93 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Tue, 13 Oct 2020 18:04:03 +0100 Subject: [PATCH 024/104] Try System.Drawing BMP Decoder everywhere. --- .../TestUtilities/TestEnvironment.Formats.cs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.Formats.cs b/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.Formats.cs index 6e204e2d48..5370506dda 100644 --- a/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.Formats.cs +++ b/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.Formats.cs @@ -69,7 +69,10 @@ namespace SixLabors.ImageSharp.Tests cfg.ConfigureCodecs( BmpFormat.Instance, - IsWindows ? (IImageDecoder)SystemDrawingReferenceDecoder.Instance : MagickReferenceDecoder.Instance, + + // Try SD Bitmap decoder everywhere. + // IsWindows ? (IImageDecoder)SystemDrawingReferenceDecoder.Instance : MagickReferenceDecoder.Instance, + SystemDrawingReferenceDecoder.Instance, bmpEncoder, new BmpImageFormatDetector()); From 9ab1200f4c0e00d8e9c12a5cda66439548e971bd Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Tue, 13 Oct 2020 21:06:33 +0100 Subject: [PATCH 025/104] Revert "Try System.Drawing BMP Decoder everywhere." This reverts commit 94ba508fd7d07beec309af00fbd0bcbd409dcc09. --- .../TestUtilities/TestEnvironment.Formats.cs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.Formats.cs b/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.Formats.cs index 5370506dda..6e204e2d48 100644 --- a/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.Formats.cs +++ b/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.Formats.cs @@ -69,10 +69,7 @@ namespace SixLabors.ImageSharp.Tests cfg.ConfigureCodecs( BmpFormat.Instance, - - // Try SD Bitmap decoder everywhere. - // IsWindows ? (IImageDecoder)SystemDrawingReferenceDecoder.Instance : MagickReferenceDecoder.Instance, - SystemDrawingReferenceDecoder.Instance, + IsWindows ? (IImageDecoder)SystemDrawingReferenceDecoder.Instance : MagickReferenceDecoder.Instance, bmpEncoder, new BmpImageFormatDetector()); From 7c4a8a1a6fbf0364f9651c737991552468e57ce2 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Tue, 13 Oct 2020 23:41:29 +0100 Subject: [PATCH 026/104] Skip tests on Linux with known Magick issue --- .runsettings | 6 ++++++ Directory.Build.props | 1 + ImageSharp.sln | 1 + tests/Directory.Build.targets | 5 +++-- tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs | 1 + tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs | 6 ++++++ tests/ImageSharp.Tests/ImageSharp.Tests.csproj | 1 + 7 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 .runsettings diff --git a/.runsettings b/.runsettings new file mode 100644 index 0000000000..7af0056c81 --- /dev/null +++ b/.runsettings @@ -0,0 +1,6 @@ + + + + category!=failing + + diff --git a/Directory.Build.props b/Directory.Build.props index c4610d0ed3..bb97810a8f 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -15,6 +15,7 @@ $(MSBuildThisFileDirectory)artifacts/ $(SixLaborsProjectCategory)/$(MSBuildProjectName) https://github.com/SixLabors/ImageSharp/ + $(MSBuildThisFileDirectory)/.runsettings diff --git a/ImageSharp.sln b/ImageSharp.sln index 509dcf96bf..b1d3176ad2 100644 --- a/ImageSharp.sln +++ b/ImageSharp.sln @@ -9,6 +9,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution .gitattributes = .gitattributes .gitignore = .gitignore .gitmodules = .gitmodules + .runsettings = .runsettings ci-build.ps1 = ci-build.ps1 ci-pack.ps1 = ci-pack.ps1 ci-test.ps1 = ci-test.ps1 diff --git a/tests/Directory.Build.targets b/tests/Directory.Build.targets index f9efbf79fe..1f699c9dd1 100644 --- a/tests/Directory.Build.targets +++ b/tests/Directory.Build.targets @@ -26,11 +26,12 @@ - + - + + diff --git a/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs index 3f767620a6..5015faa701 100644 --- a/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs @@ -339,6 +339,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp } } + [ActiveIssue("https://github.com/SixLabors/ImageSharp/issues/1380", TestPlatforms.Linux)] [Theory] [WithFile(WinBmpv2, PixelTypes.Rgba32)] [WithFile(CoreHeader, PixelTypes.Rgba32)] diff --git a/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs b/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs index b05486e356..d6839198d7 100644 --- a/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs @@ -152,18 +152,21 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp public void Encode_16Bit_WithV4Header_Works(TestImageProvider provider, BmpBitsPerPixel bitsPerPixel) where TPixel : unmanaged, IPixel => TestBmpEncoderCore(provider, bitsPerPixel, supportTransparency: true); + [ActiveIssue("https://github.com/SixLabors/ImageSharp/issues/1380", TestPlatforms.Linux)] [Theory] [WithFile(WinBmpv5, PixelTypes.Rgba32, BmpBitsPerPixel.Pixel8)] [WithFile(Bit8Palette4, PixelTypes.Rgba32, BmpBitsPerPixel.Pixel8)] public void Encode_8Bit_WithV3Header_Works(TestImageProvider provider, BmpBitsPerPixel bitsPerPixel) where TPixel : unmanaged, IPixel => TestBmpEncoderCore(provider, bitsPerPixel, supportTransparency: false); + [ActiveIssue("https://github.com/SixLabors/ImageSharp/issues/1380", TestPlatforms.Linux)] [Theory] [WithFile(WinBmpv5, PixelTypes.Rgba32, BmpBitsPerPixel.Pixel8)] [WithFile(Bit8Palette4, PixelTypes.Rgba32, BmpBitsPerPixel.Pixel8)] public void Encode_8Bit_WithV4Header_Works(TestImageProvider provider, BmpBitsPerPixel bitsPerPixel) where TPixel : unmanaged, IPixel => TestBmpEncoderCore(provider, bitsPerPixel, supportTransparency: true); + [ActiveIssue("https://github.com/SixLabors/ImageSharp/issues/1380", TestPlatforms.Linux)] [Theory] [WithFile(Bit8Gs, PixelTypes.L8, BmpBitsPerPixel.Pixel8)] public void Encode_8BitGray_WithV3Header_Works(TestImageProvider provider, BmpBitsPerPixel bitsPerPixel) @@ -173,6 +176,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp bitsPerPixel, supportTransparency: false); + [ActiveIssue("https://github.com/SixLabors/ImageSharp/issues/1380", TestPlatforms.Linux)] [Theory] [WithFile(Bit8Gs, PixelTypes.L8, BmpBitsPerPixel.Pixel8)] public void Encode_8BitGray_WithV4Header_Works(TestImageProvider provider, BmpBitsPerPixel bitsPerPixel) @@ -182,6 +186,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp bitsPerPixel, supportTransparency: true); + [ActiveIssue("https://github.com/SixLabors/ImageSharp/issues/1380", TestPlatforms.Linux)] [Theory] [WithFile(Bit32Rgb, PixelTypes.Rgba32)] public void Encode_8BitColor_WithWuQuantizer(TestImageProvider provider) @@ -208,6 +213,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp } } + [ActiveIssue("https://github.com/SixLabors/ImageSharp/issues/1380", TestPlatforms.Linux)] [Theory] [WithFile(Bit32Rgb, PixelTypes.Rgba32)] public void Encode_8BitColor_WithOctreeQuantizer(TestImageProvider provider) diff --git a/tests/ImageSharp.Tests/ImageSharp.Tests.csproj b/tests/ImageSharp.Tests/ImageSharp.Tests.csproj index ba849ab251..07ade97d5d 100644 --- a/tests/ImageSharp.Tests/ImageSharp.Tests.csproj +++ b/tests/ImageSharp.Tests/ImageSharp.Tests.csproj @@ -20,6 +20,7 @@ + From 4454395cd8b2407627f542749f9f5d564a5eb3ff Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 14 Oct 2020 00:13:05 +0100 Subject: [PATCH 027/104] Add rule to coverlet settings. --- .runsettings | 1 + tests/coverlet.runsettings | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/.runsettings b/.runsettings index 7af0056c81..ca48342bd6 100644 --- a/.runsettings +++ b/.runsettings @@ -1,3 +1,4 @@ + diff --git a/tests/coverlet.runsettings b/tests/coverlet.runsettings index ee408a5f04..cffce3540b 100644 --- a/tests/coverlet.runsettings +++ b/tests/coverlet.runsettings @@ -1,5 +1,9 @@ + + + category!=failing + From 722a014f12c1e3136cb14b1bbd30a1b7611b3ee3 Mon Sep 17 00:00:00 2001 From: Dirk Lemstra Date: Wed, 14 Oct 2020 11:51:33 +0200 Subject: [PATCH 028/104] Upgraded Magick.NET. --- tests/Directory.Build.targets | 2 +- .../Formats/Tga/TgaTestUtils.cs | 6 +++--- .../ReferenceCodecs/MagickReferenceDecoder.cs | 19 ++++++++++++++----- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/tests/Directory.Build.targets b/tests/Directory.Build.targets index 335f3d106c..ef67b122ed 100644 --- a/tests/Directory.Build.targets +++ b/tests/Directory.Build.targets @@ -29,7 +29,7 @@ - + diff --git a/tests/ImageSharp.Tests/Formats/Tga/TgaTestUtils.cs b/tests/ImageSharp.Tests/Formats/Tga/TgaTestUtils.cs index 0f76d99317..58ed31e610 100644 --- a/tests/ImageSharp.Tests/Formats/Tga/TgaTestUtils.cs +++ b/tests/ImageSharp.Tests/Formats/Tga/TgaTestUtils.cs @@ -18,7 +18,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Tga Image image, bool useExactComparer = true, float compareTolerance = 0.01f) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { string path = TestImageProvider.GetFilePathOrNull(provider); if (path == null) @@ -39,7 +39,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Tga } public static Image DecodeWithMagick(Configuration configuration, FileInfo fileInfo) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { using (var magickImage = new MagickImage(fileInfo)) { @@ -48,7 +48,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Tga Assert.True(result.TryGetSinglePixelSpan(out Span resultPixels)); - using (IPixelCollection pixels = magickImage.GetPixelsUnsafe()) + using (IUnsafePixelCollection pixels = magickImage.GetPixelsUnsafe()) { byte[] data = pixels.ToByteArray(PixelMapping.RGBA); diff --git a/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs b/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs index de8278a33e..615e918900 100644 --- a/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs +++ b/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs @@ -7,6 +7,7 @@ using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; using ImageMagick; +using ImageMagick.Formats.Bmp; using SixLabors.ImageSharp.Formats; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; @@ -18,7 +19,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs public static MagickReferenceDecoder Instance { get; } = new MagickReferenceDecoder(); private static void FromRgba32Bytes(Configuration configuration, Span rgbaBytes, IMemoryGroup destinationGroup) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { foreach (Memory m in destinationGroup) { @@ -33,7 +34,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs } private static void FromRgba64Bytes(Configuration configuration, Span rgbaBytes, IMemoryGroup destinationGroup) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { foreach (Memory m in destinationGroup) { @@ -48,17 +49,25 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs } public Task> DecodeAsync(Configuration configuration, Stream stream, CancellationToken cancellationToken) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel => Task.FromResult(this.Decode(configuration, stream)); public Image Decode(Configuration configuration, Stream stream) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { + var bmpReadDefines = new BmpReadDefines + { + IgnoreFileSize = true + }; + + var settings = new MagickReadSettings(); + settings.SetDefines(bmpReadDefines); + using var magickImage = new MagickImage(stream); var result = new Image(configuration, magickImage.Width, magickImage.Height); MemoryGroup resultPixels = result.GetRootFramePixelBuffer().FastMemoryGroup; - using (IPixelCollection pixels = magickImage.GetPixelsUnsafe()) + using (IUnsafePixelCollection pixels = magickImage.GetPixelsUnsafe()) { if (magickImage.Depth == 8) { From 97d6413640e062c3453c076b47f8c2d1e859ef0a Mon Sep 17 00:00:00 2001 From: Dirk Lemstra Date: Wed, 14 Oct 2020 12:22:58 +0200 Subject: [PATCH 029/104] Use settings in the constructor. --- .../TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs b/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs index 615e918900..d4c35a1ad0 100644 --- a/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs +++ b/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs @@ -63,7 +63,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs var settings = new MagickReadSettings(); settings.SetDefines(bmpReadDefines); - using var magickImage = new MagickImage(stream); + using var magickImage = new MagickImage(stream, settings); var result = new Image(configuration, magickImage.Width, magickImage.Height); MemoryGroup resultPixels = result.GetRootFramePixelBuffer().FastMemoryGroup; From 08771fe8abb5085ffbfddbd2ec1b4df3ca11f37e Mon Sep 17 00:00:00 2001 From: Dirk Lemstra Date: Wed, 14 Oct 2020 13:53:32 +0000 Subject: [PATCH 030/104] Update tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs Co-authored-by: James Jackson-South --- .../TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs b/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs index d4c35a1ad0..dda5f751c6 100644 --- a/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs +++ b/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs @@ -56,6 +56,12 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { var bmpReadDefines = new BmpReadDefines + { + // See https://github.com/SixLabors/ImageSharp/issues/1380 + // Validation fails on Ubuntu despite identical header generation + // on all platforms. + IgnoreFileSize = !TestEnvironment.IsWindows + }; { IgnoreFileSize = true }; From b7185acf454a88130d53a329618512e4fad0c666 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 14 Oct 2020 15:50:13 +0100 Subject: [PATCH 031/104] Fix bad merge --- .../TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs b/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs index dda5f751c6..8efc9a0235 100644 --- a/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs +++ b/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs @@ -62,9 +62,6 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs // on all platforms. IgnoreFileSize = !TestEnvironment.IsWindows }; - { - IgnoreFileSize = true - }; var settings = new MagickReadSettings(); settings.SetDefines(bmpReadDefines); From 4992c87323916803ad35f095a33bb2ca8aa1b9ec Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 14 Oct 2020 16:56:48 +0100 Subject: [PATCH 032/104] Remove ActiveIssue filters --- tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs | 1 - tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs | 6 ------ 2 files changed, 7 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs index 5015faa701..3f767620a6 100644 --- a/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs @@ -339,7 +339,6 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp } } - [ActiveIssue("https://github.com/SixLabors/ImageSharp/issues/1380", TestPlatforms.Linux)] [Theory] [WithFile(WinBmpv2, PixelTypes.Rgba32)] [WithFile(CoreHeader, PixelTypes.Rgba32)] diff --git a/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs b/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs index d6839198d7..b05486e356 100644 --- a/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs @@ -152,21 +152,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp public void Encode_16Bit_WithV4Header_Works(TestImageProvider provider, BmpBitsPerPixel bitsPerPixel) where TPixel : unmanaged, IPixel => TestBmpEncoderCore(provider, bitsPerPixel, supportTransparency: true); - [ActiveIssue("https://github.com/SixLabors/ImageSharp/issues/1380", TestPlatforms.Linux)] [Theory] [WithFile(WinBmpv5, PixelTypes.Rgba32, BmpBitsPerPixel.Pixel8)] [WithFile(Bit8Palette4, PixelTypes.Rgba32, BmpBitsPerPixel.Pixel8)] public void Encode_8Bit_WithV3Header_Works(TestImageProvider provider, BmpBitsPerPixel bitsPerPixel) where TPixel : unmanaged, IPixel => TestBmpEncoderCore(provider, bitsPerPixel, supportTransparency: false); - [ActiveIssue("https://github.com/SixLabors/ImageSharp/issues/1380", TestPlatforms.Linux)] [Theory] [WithFile(WinBmpv5, PixelTypes.Rgba32, BmpBitsPerPixel.Pixel8)] [WithFile(Bit8Palette4, PixelTypes.Rgba32, BmpBitsPerPixel.Pixel8)] public void Encode_8Bit_WithV4Header_Works(TestImageProvider provider, BmpBitsPerPixel bitsPerPixel) where TPixel : unmanaged, IPixel => TestBmpEncoderCore(provider, bitsPerPixel, supportTransparency: true); - [ActiveIssue("https://github.com/SixLabors/ImageSharp/issues/1380", TestPlatforms.Linux)] [Theory] [WithFile(Bit8Gs, PixelTypes.L8, BmpBitsPerPixel.Pixel8)] public void Encode_8BitGray_WithV3Header_Works(TestImageProvider provider, BmpBitsPerPixel bitsPerPixel) @@ -176,7 +173,6 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp bitsPerPixel, supportTransparency: false); - [ActiveIssue("https://github.com/SixLabors/ImageSharp/issues/1380", TestPlatforms.Linux)] [Theory] [WithFile(Bit8Gs, PixelTypes.L8, BmpBitsPerPixel.Pixel8)] public void Encode_8BitGray_WithV4Header_Works(TestImageProvider provider, BmpBitsPerPixel bitsPerPixel) @@ -186,7 +182,6 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp bitsPerPixel, supportTransparency: true); - [ActiveIssue("https://github.com/SixLabors/ImageSharp/issues/1380", TestPlatforms.Linux)] [Theory] [WithFile(Bit32Rgb, PixelTypes.Rgba32)] public void Encode_8BitColor_WithWuQuantizer(TestImageProvider provider) @@ -213,7 +208,6 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp } } - [ActiveIssue("https://github.com/SixLabors/ImageSharp/issues/1380", TestPlatforms.Linux)] [Theory] [WithFile(Bit32Rgb, PixelTypes.Rgba32)] public void Encode_8BitColor_WithOctreeQuantizer(TestImageProvider provider) From 11f2c9666734779daa99d7a43c5e6bb31b4f5846 Mon Sep 17 00:00:00 2001 From: Dirk Lemstra Date: Wed, 14 Oct 2020 18:25:17 +0200 Subject: [PATCH 033/104] Reverted using the read defines. --- .../ReferenceCodecs/MagickReferenceDecoder.cs | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs b/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs index 8efc9a0235..bb8407f19b 100644 --- a/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs +++ b/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs @@ -7,7 +7,6 @@ using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; using ImageMagick; -using ImageMagick.Formats.Bmp; using SixLabors.ImageSharp.Formats; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; @@ -55,18 +54,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs public Image Decode(Configuration configuration, Stream stream) where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { - var bmpReadDefines = new BmpReadDefines - { - // See https://github.com/SixLabors/ImageSharp/issues/1380 - // Validation fails on Ubuntu despite identical header generation - // on all platforms. - IgnoreFileSize = !TestEnvironment.IsWindows - }; - - var settings = new MagickReadSettings(); - settings.SetDefines(bmpReadDefines); - - using var magickImage = new MagickImage(stream, settings); + using var magickImage = new MagickImage(stream); var result = new Image(configuration, magickImage.Width, magickImage.Height); MemoryGroup resultPixels = result.GetRootFramePixelBuffer().FastMemoryGroup; From bf7c00441cac8104e648fdff70b898f64aa31ed1 Mon Sep 17 00:00:00 2001 From: Dirk Lemstra Date: Wed, 14 Oct 2020 18:25:50 +0200 Subject: [PATCH 034/104] The colorPaletteSize is also part of the file size. --- src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs b/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs index eb29c44050..454440f634 100644 --- a/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs +++ b/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs @@ -171,7 +171,7 @@ namespace SixLabors.ImageSharp.Formats.Bmp var fileHeader = new BmpFileHeader( type: BmpConstants.TypeMarkers.Bitmap, - fileSize: BmpFileHeader.Size + infoHeaderSize + infoHeader.ImageSize, + fileSize: BmpFileHeader.Size + infoHeaderSize + colorPaletteSize + infoHeader.ImageSize, reserved: 0, offset: BmpFileHeader.Size + infoHeaderSize + colorPaletteSize); From 8d12e539a94e41eb44db11392371711809a1dc49 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 14 Oct 2020 21:02:50 +0100 Subject: [PATCH 035/104] Allow selectively bypassing bmp verification. --- .../Formats/Bmp/BmpDecoderTests.cs | 4 +++- .../Formats/Bmp/BmpEncoderTests.cs | 22 +++++++++++++++--- .../ReferenceCodecs/MagickReferenceDecoder.cs | 23 ++++++++++++++++++- 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs index 3f767620a6..0c7c9a0077 100644 --- a/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs @@ -348,7 +348,9 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp using (Image image = provider.GetImage(BmpDecoder)) { image.DebugSave(provider); - image.CompareToOriginal(provider); + + // Do not validate. Reference files will fail validation. + image.CompareToOriginal(provider, new MagickReferenceDecoder(false)); } } diff --git a/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs b/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs index b05486e356..83b67a01af 100644 --- a/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs @@ -10,7 +10,7 @@ using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.Processing; using SixLabors.ImageSharp.Processing.Processors.Quantization; using SixLabors.ImageSharp.Tests.TestUtilities.ImageComparison; - +using SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs; using Xunit; using Xunit.Abstractions; @@ -200,10 +200,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp Quantizer = new WuQuantizer() }; string actualOutputFile = provider.Utility.SaveTestOutputFile(image, "bmp", encoder, appendPixelTypeToFileName: false); + + // Use the default decoder to test our encoded image. This verifies the content. + // We do not verify the reference image though as some are invalid. IImageDecoder referenceDecoder = TestEnvironment.GetReferenceDecoder(actualOutputFile); using (var referenceImage = Image.Load(actualOutputFile, referenceDecoder)) { - referenceImage.CompareToReferenceOutput(ImageComparer.TolerantPercentage(0.01f), provider, extension: "bmp", appendPixelTypeToFileName: false); + referenceImage.CompareToReferenceOutput( + ImageComparer.TolerantPercentage(0.01f), + provider, + extension: "bmp", + appendPixelTypeToFileName: false, + decoder: new MagickReferenceDecoder(false)); } } } @@ -226,10 +234,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp Quantizer = new OctreeQuantizer() }; string actualOutputFile = provider.Utility.SaveTestOutputFile(image, "bmp", encoder, appendPixelTypeToFileName: false); + + // Use the default decoder to test our encoded image. This verifies the content. + // We do not verify the reference image though as some are invalid. IImageDecoder referenceDecoder = TestEnvironment.GetReferenceDecoder(actualOutputFile); using (var referenceImage = Image.Load(actualOutputFile, referenceDecoder)) { - referenceImage.CompareToReferenceOutput(ImageComparer.TolerantPercentage(0.01f), provider, extension: "bmp", appendPixelTypeToFileName: false); + referenceImage.CompareToReferenceOutput( + ImageComparer.TolerantPercentage(0.01f), + provider, + extension: "bmp", + appendPixelTypeToFileName: false, + decoder: new MagickReferenceDecoder(false)); } } } diff --git a/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs b/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs index bb8407f19b..d20e7d2a7e 100644 --- a/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs +++ b/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs @@ -7,6 +7,7 @@ using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; using ImageMagick; +using ImageMagick.Formats.Bmp; using SixLabors.ImageSharp.Formats; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; @@ -15,6 +16,18 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs { public class MagickReferenceDecoder : IImageDecoder { + private readonly bool validate; + + public MagickReferenceDecoder() + : this(true) + { + } + + public MagickReferenceDecoder(bool validate) + { + this.validate = validate; + } + public static MagickReferenceDecoder Instance { get; } = new MagickReferenceDecoder(); private static void FromRgba32Bytes(Configuration configuration, Span rgbaBytes, IMemoryGroup destinationGroup) @@ -54,7 +67,15 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs public Image Decode(Configuration configuration, Stream stream) where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { - using var magickImage = new MagickImage(stream); + var bmpReadDefines = new BmpReadDefines + { + IgnoreFileSize = !this.validate + }; + + var settings = new MagickReadSettings(); + settings.SetDefines(bmpReadDefines); + + using var magickImage = new MagickImage(stream, settings); var result = new Image(configuration, magickImage.Width, magickImage.Height); MemoryGroup resultPixels = result.GetRootFramePixelBuffer().FastMemoryGroup; From 7909bc1e6529c913ead01fca055193d1af21940f Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 14 Oct 2020 22:25:50 +0100 Subject: [PATCH 036/104] Disable failing RemoteExecutor tests on NETFX. --- Directory.Build.targets | 2 +- .../Formats/Gif/GifDecoderTests.cs | 1 + .../Jpg/JpegDecoderTests.Progressive.cs | 1 + .../Formats/Png/PngDecoderTests.cs | 1 + .../Formats/Png/PngEncoderTests.cs | 1 + .../Formats/Tga/TgaDecoderTests.cs | 1 + .../Processors/Convolution/BokehBlurTest.cs | 59 +++++-------------- 7 files changed, 20 insertions(+), 46 deletions(-) diff --git a/Directory.Build.targets b/Directory.Build.targets index 91d5a5d1fd..2a7d25b977 100644 --- a/Directory.Build.targets +++ b/Directory.Build.targets @@ -18,7 +18,7 @@ - + diff --git a/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs index 63aae5c559..2e2da8d6ab 100644 --- a/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs @@ -195,6 +195,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Gif Assert.IsType(ex.InnerException); } + [ActiveIssue("https://github.com/dotnet/arcade/issues/6393", TargetFrameworkMonikers.NetFramework)] [Theory] [WithFile(TestImages.Gif.Giphy, PixelTypes.Rgba32)] [WithFile(TestImages.Gif.Kumin, PixelTypes.Rgba32)] diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs index e29d8f158b..62873a1c76 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs @@ -13,6 +13,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg { public const string DecodeProgressiveJpegOutputName = "DecodeProgressiveJpeg"; + [ActiveIssue("https://github.com/dotnet/arcade/issues/6393", TargetFrameworkMonikers.NetFramework)] [Theory] [WithFileCollection(nameof(ProgressiveTestJpegs), PixelTypes.Rgba32, false)] [WithFile(TestImages.Jpeg.Progressive.Progress, PixelTypes.Rgba32, true)] diff --git a/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs index 5b6adfe1af..80ca747191 100644 --- a/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs @@ -401,6 +401,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png Assert.IsType(ex.InnerException); } + [ActiveIssue("https://github.com/dotnet/arcade/issues/6393", TargetFrameworkMonikers.NetFramework)] [Theory] [WithFile(TestImages.Png.Splash, PixelTypes.Rgba32)] [WithFile(TestImages.Png.Bike, PixelTypes.Rgba32)] diff --git a/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs index b9f5f16fa5..47419d47b3 100644 --- a/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs @@ -534,6 +534,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png } } + [ActiveIssue("https://github.com/dotnet/arcade/issues/6393", TargetFrameworkMonikers.NetFramework)] [Theory] [WithTestPatternImages(100, 100, PixelTypes.Rgba32)] public void EncodeWorksWithoutSsse3Intrinsics(TestImageProvider provider) diff --git a/tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs index 5fb15541ec..7514823fe3 100644 --- a/tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs @@ -744,6 +744,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Tga Assert.IsType(ex.InnerException); } + [ActiveIssue("https://github.com/dotnet/arcade/issues/6393", TargetFrameworkMonikers.NetFramework)] [Theory] [WithFile(Bit24BottomLeft, PixelTypes.Rgba32)] [WithFile(Bit32BottomLeft, PixelTypes.Rgba32)] diff --git a/tests/ImageSharp.Tests/Processing/Processors/Convolution/BokehBlurTest.cs b/tests/ImageSharp.Tests/Processing/Processors/Convolution/BokehBlurTest.cs index 50b8782e47..6c48cf843d 100644 --- a/tests/ImageSharp.Tests/Processing/Processors/Convolution/BokehBlurTest.cs +++ b/tests/ImageSharp.Tests/Processing/Processors/Convolution/BokehBlurTest.cs @@ -138,21 +138,10 @@ namespace SixLabors.ImageSharp.Tests.Processing.Processors.Convolution public void BokehBlurFilterProcessor(TestImageProvider provider, BokehBlurInfo value) where TPixel : unmanaged, IPixel { - static void RunTest(string providerDump, string infoDump) - { - TestImageProvider provider = - BasicSerializer.Deserialize>(providerDump); - BokehBlurInfo value = BasicSerializer.Deserialize(infoDump); - - provider.RunValidatingProcessorTest( - x => x.BokehBlur(value.Radius, value.Components, value.Gamma), - testOutputDetails: value.ToString(), - appendPixelTypeToFileName: false); - } - - RemoteExecutor - .Invoke(RunTest, BasicSerializer.Serialize(provider), BasicSerializer.Serialize(value)) - .Dispose(); + provider.RunValidatingProcessorTest( + x => x.BokehBlur(value.Radius, value.Components, value.Gamma), + testOutputDetails: value.ToString(), + appendPixelTypeToFileName: false); } [Theory] @@ -164,18 +153,9 @@ namespace SixLabors.ImageSharp.Tests.Processing.Processors.Convolution public void BokehBlurFilterProcessor_WorksWithAllPixelTypes(TestImageProvider provider) where TPixel : unmanaged, IPixel { - static void RunTest(string providerDump) - { - TestImageProvider provider = - BasicSerializer.Deserialize>(providerDump); - provider.RunValidatingProcessorTest( + provider.RunValidatingProcessorTest( x => x.BokehBlur(8, 2, 3), appendSourceFileOrDescription: false); - } - - RemoteExecutor - .Invoke(RunTest, BasicSerializer.Serialize(provider)) - .Dispose(); } [Theory] @@ -183,26 +163,15 @@ namespace SixLabors.ImageSharp.Tests.Processing.Processors.Convolution public void BokehBlurFilterProcessor_Bounded(TestImageProvider provider, BokehBlurInfo value) where TPixel : unmanaged, IPixel { - static void RunTest(string providerDump, string infoDump) - { - TestImageProvider provider = - BasicSerializer.Deserialize>(providerDump); - BokehBlurInfo value = BasicSerializer.Deserialize(infoDump); - - provider.RunValidatingProcessorTest( - x => - { - Size size = x.GetCurrentSize(); - var bounds = new Rectangle(10, 10, size.Width / 2, size.Height / 2); - x.BokehBlur(value.Radius, value.Components, value.Gamma, bounds); - }, - testOutputDetails: value.ToString(), - appendPixelTypeToFileName: false); - } - - RemoteExecutor - .Invoke(RunTest, BasicSerializer.Serialize(provider), BasicSerializer.Serialize(value)) - .Dispose(); + provider.RunValidatingProcessorTest( + x => + { + Size size = x.GetCurrentSize(); + var bounds = new Rectangle(10, 10, size.Width / 2, size.Height / 2); + x.BokehBlur(value.Radius, value.Components, value.Gamma, bounds); + }, + testOutputDetails: value.ToString(), + appendPixelTypeToFileName: false); } [Theory] From 21b0982327ad6a57245a310a9f8ec5a5b2f008ea Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 14 Oct 2020 22:48:56 +0100 Subject: [PATCH 037/104] Split out progressive jpeg decoding tests --- .../Formats/Gif/GifDecoderTests.cs | 2 +- .../Jpg/JpegDecoderTests.Progressive.cs | 27 +++++++++++++------ 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs index 2e2da8d6ab..1256520e6b 100644 --- a/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs @@ -209,7 +209,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Gif provider.LimitAllocatorBufferCapacity().InPixelsSqrt(100); using Image image = provider.GetImage(GifDecoder); - image.DebugSave(provider); + image.DebugSave(provider, nonContiguousBuffersStr); image.CompareToOriginal(provider); } diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs index 62873a1c76..5bf80580e3 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs @@ -13,11 +13,25 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg { public const string DecodeProgressiveJpegOutputName = "DecodeProgressiveJpeg"; + [Theory] + [WithFileCollection(nameof(ProgressiveTestJpegs), PixelTypes.Rgba32)] + public void DecodeProgressiveJpeg(TestImageProvider provider) + where TPixel : unmanaged, IPixel + { + using Image image = provider.GetImage(JpegDecoder); + image.DebugSave(provider); + + provider.Utility.TestName = DecodeProgressiveJpegOutputName; + image.CompareToReferenceOutput( + GetImageComparer(provider), + provider, + appendPixelTypeToFileName: false); + } + [ActiveIssue("https://github.com/dotnet/arcade/issues/6393", TargetFrameworkMonikers.NetFramework)] [Theory] - [WithFileCollection(nameof(ProgressiveTestJpegs), PixelTypes.Rgba32, false)] - [WithFile(TestImages.Jpeg.Progressive.Progress, PixelTypes.Rgba32, true)] - public void DecodeProgressiveJpeg(TestImageProvider provider, bool enforceDiscontiguousBuffers) + [WithFile(TestImages.Jpeg.Progressive.Progress, PixelTypes.Rgba32)] + public void DecodeProgressiveJpeg_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) where TPixel : unmanaged, IPixel { static void RunTest(string providerDump, string nonContiguousBuffersStr) @@ -25,10 +39,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); - if (!string.IsNullOrEmpty(nonContiguousBuffersStr)) - { - provider.LimitAllocatorBufferCapacity().InBytesSqrt(200); - } + provider.LimitAllocatorBufferCapacity().InBytesSqrt(200); using Image image = provider.GetImage(JpegDecoder); image.DebugSave(provider, nonContiguousBuffersStr); @@ -45,7 +56,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg RemoteExecutor.Invoke( RunTest, providerDump, - enforceDiscontiguousBuffers ? "Disco" : string.Empty) + "Disco") .Dispose(); } } From 586f2dca750037d918cfa6572ed40ef1483628f7 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 14 Oct 2020 23:02:33 +0100 Subject: [PATCH 038/104] Skip limited buffer bitmap test on NETFX --- .../Formats/Bmp/BmpDecoderTests.cs | 30 +++++++++++++------ 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs index 0c7c9a0077..e56ffa143a 100644 --- a/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs @@ -39,22 +39,34 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp }; [Theory] - [WithFileCollection(nameof(MiscBmpFiles), PixelTypes.Rgba32, false)] - [WithFileCollection(nameof(MiscBmpFiles), PixelTypes.Rgba32, true)] - public void BmpDecoder_CanDecode_MiscellaneousBitmaps(TestImageProvider provider, bool enforceDiscontiguousBuffers) + [WithFileCollection(nameof(MiscBmpFiles), PixelTypes.Rgba32)] + public void BmpDecoder_CanDecode_MiscellaneousBitmaps(TestImageProvider provider) + where TPixel : unmanaged, IPixel + { + using Image image = provider.GetImage(BmpDecoder); + image.DebugSave(provider); + + if (TestEnvironment.IsWindows) + { + image.CompareToOriginal(provider); + } + } + + [ActiveIssue("https://github.com/dotnet/arcade/issues/6393", TargetFrameworkMonikers.NetFramework)] + [Theory] + [WithFileCollection(nameof(MiscBmpFiles), PixelTypes.Rgba32)] + public void BmpDecoder_CanDecode_MiscellaneousBitmaps_WithLimitedAllocatorBufferCapacity( + TestImageProvider provider) where TPixel : unmanaged, IPixel { static void RunTest(string providerDump, string nonContiguousBuffersStr) { TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); - if (!string.IsNullOrEmpty(nonContiguousBuffersStr)) - { - provider.LimitAllocatorBufferCapacity().InPixelsSqrt(100); - } + provider.LimitAllocatorBufferCapacity().InPixelsSqrt(100); using Image image = provider.GetImage(BmpDecoder); - image.DebugSave(provider, testOutputDetails: nonContiguousBuffersStr); + image.DebugSave(provider, nonContiguousBuffersStr); if (TestEnvironment.IsWindows) { @@ -66,7 +78,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp RemoteExecutor.Invoke( RunTest, providerDump, - enforceDiscontiguousBuffers ? "Disco" : string.Empty) + "Disco") .Dispose(); } From 147aea2448ace014ff669901d653cba0af6eb412 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 14 Oct 2020 23:50:04 +0100 Subject: [PATCH 039/104] Add skip for dotnet xunit --- .../ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs | 6 ++++++ .../ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs | 6 ++++++ .../Formats/Jpg/JpegDecoderTests.Progressive.cs | 6 ++++++ .../ImageSharp.Tests/Formats/Png/PngDecoderTests.cs | 12 ++++++++++++ .../ImageSharp.Tests/Formats/Png/PngEncoderTests.cs | 6 ++++++ .../ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs | 6 ++++++ 6 files changed, 42 insertions(+) diff --git a/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs index e56ffa143a..68a3213d3a 100644 --- a/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs @@ -59,6 +59,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp TestImageProvider provider) where TPixel : unmanaged, IPixel { + // dotnet xunit doesn't respect filter. + if (TestEnvironment.IsFramework) + { + return; + } + static void RunTest(string providerDump, string nonContiguousBuffersStr) { TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); diff --git a/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs index 1256520e6b..b76ea264ce 100644 --- a/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs @@ -202,6 +202,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Gif public void GifDecoder_CanDecode_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) where TPixel : unmanaged, IPixel { + // dotnet xunit doesn't respect filter. + if (TestEnvironment.IsFramework) + { + return; + } + static void RunTest(string providerDump, string nonContiguousBuffersStr) { TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs index 5bf80580e3..1edbe5ba98 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs @@ -34,6 +34,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg public void DecodeProgressiveJpeg_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) where TPixel : unmanaged, IPixel { + // dotnet xunit doesn't respect filter. + if (TestEnvironment.IsFramework) + { + return; + } + static void RunTest(string providerDump, string nonContiguousBuffersStr) { TestImageProvider provider = diff --git a/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs index 80ca747191..8511154789 100644 --- a/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs @@ -396,6 +396,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png public void PngDecoder_DegenerateMemoryRequest_ShouldTranslateTo_ImageFormatException(TestImageProvider provider) where TPixel : unmanaged, IPixel { + // dotnet xunit doesn't respect filter. + if (TestEnvironment.IsFramework) + { + return; + } + provider.LimitAllocatorBufferCapacity().InPixelsSqrt(10); InvalidImageContentException ex = Assert.Throws(() => provider.GetImage(PngDecoder)); Assert.IsType(ex.InnerException); @@ -408,6 +414,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png public void PngDecoder_CanDecode_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) where TPixel : unmanaged, IPixel { + // dotnet xunit doesn't respect filter. + if (TestEnvironment.IsFramework) + { + return; + } + static void RunTest(string providerDump, string nonContiguousBuffersStr) { TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); diff --git a/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs index 47419d47b3..8c4f17f02b 100644 --- a/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs @@ -540,6 +540,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png public void EncodeWorksWithoutSsse3Intrinsics(TestImageProvider provider) where TPixel : unmanaged, IPixel { + // dotnet xunit doesn't respect filter. + if (TestEnvironment.IsFramework) + { + return; + } + static void RunTest(string providerDump) { TestImageProvider provider = diff --git a/tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs index 7514823fe3..8fff47719b 100644 --- a/tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs @@ -751,6 +751,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Tga public void TgaDecoder_CanDecode_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) where TPixel : unmanaged, IPixel { + // dotnet xunit doesn't respect filter. + if (TestEnvironment.IsFramework) + { + return; + } + static void RunTest(string providerDump, string nonContiguousBuffersStr) { TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); From 45f303d094e60798f9ffe1650bf86fca92074b13 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 15 Oct 2020 02:19:35 +0100 Subject: [PATCH 040/104] Don't use generics. Could have saved myself days of issues if i'd just thought of this first. --- .../Formats/Bmp/BmpDecoderTests.cs | 16 ++++----------- .../Formats/Gif/GifDecoderTests.cs | 16 +++++---------- .../Jpg/JpegDecoderTests.Progressive.cs | 20 ++++++------------- .../Formats/Png/PngDecoderTests.cs | 20 +++---------------- .../Formats/Png/PngEncoderTests.cs | 14 +++---------- .../Formats/Tga/TgaDecoderTests.cs | 14 +++---------- 6 files changed, 24 insertions(+), 76 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs index 68a3213d3a..f98fa3c7f3 100644 --- a/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs @@ -52,26 +52,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp } } - [ActiveIssue("https://github.com/dotnet/arcade/issues/6393", TargetFrameworkMonikers.NetFramework)] [Theory] [WithFileCollection(nameof(MiscBmpFiles), PixelTypes.Rgba32)] - public void BmpDecoder_CanDecode_MiscellaneousBitmaps_WithLimitedAllocatorBufferCapacity( - TestImageProvider provider) - where TPixel : unmanaged, IPixel + public void BmpDecoder_CanDecode_MiscellaneousBitmaps_WithLimitedAllocatorBufferCapacity( + TestImageProvider provider) { - // dotnet xunit doesn't respect filter. - if (TestEnvironment.IsFramework) - { - return; - } - static void RunTest(string providerDump, string nonContiguousBuffersStr) { - TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); + TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); provider.LimitAllocatorBufferCapacity().InPixelsSqrt(100); - using Image image = provider.GetImage(BmpDecoder); + using Image image = provider.GetImage(BmpDecoder); image.DebugSave(provider, nonContiguousBuffersStr); if (TestEnvironment.IsWindows) diff --git a/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs index b76ea264ce..eb2643b8cd 100644 --- a/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs @@ -195,26 +195,20 @@ namespace SixLabors.ImageSharp.Tests.Formats.Gif Assert.IsType(ex.InnerException); } - [ActiveIssue("https://github.com/dotnet/arcade/issues/6393", TargetFrameworkMonikers.NetFramework)] [Theory] [WithFile(TestImages.Gif.Giphy, PixelTypes.Rgba32)] [WithFile(TestImages.Gif.Kumin, PixelTypes.Rgba32)] - public void GifDecoder_CanDecode_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) - where TPixel : unmanaged, IPixel + public void GifDecoder_CanDecode_WithLimitedAllocatorBufferCapacity( + TestImageProvider provider) { - // dotnet xunit doesn't respect filter. - if (TestEnvironment.IsFramework) - { - return; - } - static void RunTest(string providerDump, string nonContiguousBuffersStr) { - TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); + TestImageProvider provider + = BasicSerializer.Deserialize>(providerDump); provider.LimitAllocatorBufferCapacity().InPixelsSqrt(100); - using Image image = provider.GetImage(GifDecoder); + using Image image = provider.GetImage(GifDecoder); image.DebugSave(provider, nonContiguousBuffersStr); image.CompareToOriginal(provider); } diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs index 1edbe5ba98..98421ca5d4 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs @@ -28,26 +28,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg appendPixelTypeToFileName: false); } - [ActiveIssue("https://github.com/dotnet/arcade/issues/6393", TargetFrameworkMonikers.NetFramework)] [Theory] [WithFile(TestImages.Jpeg.Progressive.Progress, PixelTypes.Rgba32)] - public void DecodeProgressiveJpeg_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) - where TPixel : unmanaged, IPixel + public void DecodeProgressiveJpeg_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) { - // dotnet xunit doesn't respect filter. - if (TestEnvironment.IsFramework) - { - return; - } - static void RunTest(string providerDump, string nonContiguousBuffersStr) { - TestImageProvider provider = - BasicSerializer.Deserialize>(providerDump); + TestImageProvider provider = + BasicSerializer.Deserialize>(providerDump); provider.LimitAllocatorBufferCapacity().InBytesSqrt(200); - using Image image = provider.GetImage(JpegDecoder); + using Image image = provider.GetImage(JpegDecoder); image.DebugSave(provider, nonContiguousBuffersStr); provider.Utility.TestName = DecodeProgressiveJpegOutputName; @@ -62,8 +54,8 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg RemoteExecutor.Invoke( RunTest, providerDump, - "Disco") - .Dispose(); + "Disco") + .Dispose(); } } } diff --git a/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs index 8511154789..2164975df0 100644 --- a/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs @@ -396,37 +396,23 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png public void PngDecoder_DegenerateMemoryRequest_ShouldTranslateTo_ImageFormatException(TestImageProvider provider) where TPixel : unmanaged, IPixel { - // dotnet xunit doesn't respect filter. - if (TestEnvironment.IsFramework) - { - return; - } - provider.LimitAllocatorBufferCapacity().InPixelsSqrt(10); InvalidImageContentException ex = Assert.Throws(() => provider.GetImage(PngDecoder)); Assert.IsType(ex.InnerException); } - [ActiveIssue("https://github.com/dotnet/arcade/issues/6393", TargetFrameworkMonikers.NetFramework)] [Theory] [WithFile(TestImages.Png.Splash, PixelTypes.Rgba32)] [WithFile(TestImages.Png.Bike, PixelTypes.Rgba32)] - public void PngDecoder_CanDecode_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) - where TPixel : unmanaged, IPixel + public void PngDecoder_CanDecode_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) { - // dotnet xunit doesn't respect filter. - if (TestEnvironment.IsFramework) - { - return; - } - static void RunTest(string providerDump, string nonContiguousBuffersStr) { - TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); + TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); provider.LimitAllocatorBufferCapacity().InPixelsSqrt(100); - using Image image = provider.GetImage(PngDecoder); + using Image image = provider.GetImage(PngDecoder); image.DebugSave(provider, testOutputDetails: nonContiguousBuffersStr); image.CompareToOriginal(provider); } diff --git a/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs index 8c4f17f02b..b35e55887c 100644 --- a/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs @@ -534,22 +534,14 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png } } - [ActiveIssue("https://github.com/dotnet/arcade/issues/6393", TargetFrameworkMonikers.NetFramework)] [Theory] [WithTestPatternImages(100, 100, PixelTypes.Rgba32)] - public void EncodeWorksWithoutSsse3Intrinsics(TestImageProvider provider) - where TPixel : unmanaged, IPixel + public void EncodeWorksWithoutSsse3Intrinsics(TestImageProvider provider) { - // dotnet xunit doesn't respect filter. - if (TestEnvironment.IsFramework) - { - return; - } - static void RunTest(string providerDump) { - TestImageProvider provider = - BasicSerializer.Deserialize>(providerDump); + TestImageProvider provider = + BasicSerializer.Deserialize>(providerDump); #if SUPPORTS_RUNTIME_INTRINSICS Assert.False(Ssse3.IsSupported); #endif diff --git a/tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs index 8fff47719b..edb43aa126 100644 --- a/tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs @@ -744,26 +744,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.Tga Assert.IsType(ex.InnerException); } - [ActiveIssue("https://github.com/dotnet/arcade/issues/6393", TargetFrameworkMonikers.NetFramework)] [Theory] [WithFile(Bit24BottomLeft, PixelTypes.Rgba32)] [WithFile(Bit32BottomLeft, PixelTypes.Rgba32)] - public void TgaDecoder_CanDecode_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) - where TPixel : unmanaged, IPixel + public void TgaDecoder_CanDecode_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) { - // dotnet xunit doesn't respect filter. - if (TestEnvironment.IsFramework) - { - return; - } - static void RunTest(string providerDump, string nonContiguousBuffersStr) { - TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); + TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); provider.LimitAllocatorBufferCapacity().InPixelsSqrt(100); - using Image image = provider.GetImage(TgaDecoder); + using Image image = provider.GetImage(TgaDecoder); image.DebugSave(provider, testOutputDetails: nonContiguousBuffersStr); if (TestEnvironment.IsWindows) From 310710e3113223b976b638334b0290549da67bae Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 15 Oct 2020 21:32:37 +0100 Subject: [PATCH 041/104] Add FeatureTestRunner --- .../FeatureTesting/FeatureTestRunner.cs | 266 ++++++++++++++++++ .../Tests/FeatureTestRunnerTests.cs | 237 ++++++++++++++++ 2 files changed, 503 insertions(+) create mode 100644 tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs create mode 100644 tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs diff --git a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs new file mode 100644 index 0000000000..57ed85a182 --- /dev/null +++ b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs @@ -0,0 +1,266 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using Microsoft.DotNet.RemoteExecutor; +using Xunit.Abstractions; + +namespace SixLabors.ImageSharp.Tests.TestUtilities +{ + /// + /// Allows the testing against specific feature sets. + /// + public static class FeatureTestRunner + { + private static readonly char[] SplitChars = new[] { ',', ' ' }; + + /// + /// Allows the deserialization of parameters passed to the feature test. + /// + /// + /// This is required because does not allow + /// marshalling of fields so we cannot pass a wrapped + /// allowing automatic deserialization. + /// + /// + /// + /// The type to deserialize to. + /// The string value to deserialize. + /// The value. + public static T Deserialize(string value) + where T : IXunitSerializable + => BasicSerializer.Deserialize(value); + + /// + /// Runs the given test within an environment + /// where the given features. + /// + /// The test action to run. + /// The intrinsics features. + public static void RunWithHwIntrinsicsFeature( + Action action, + HwIntrinsics intrinsics) + { + if (!RemoteExecutor.IsSupported) + { + return; + } + + foreach (KeyValuePair intrinsic in intrinsics.ToFeatureKeyValueCollection()) + { + var processStartInfo = new ProcessStartInfo(); + if (intrinsic.Key != HwIntrinsics.AllowAll) + { + processStartInfo.Environment[$"COMPlus_Enable{intrinsic}"] = "0"; + + RemoteExecutor.Invoke( + action, + new RemoteInvokeOptions + { + StartInfo = processStartInfo + }) + .Dispose(); + } + else + { + // Since we are running using the default architecture there is no + // point creating the overhead of running the action in a separate process. + action(); + } + } + } + + /// + /// Runs the given test within an environment + /// where the given features. + /// + /// + /// The test action to run. + /// The parameter passed will be a string representing the currently testing . + /// The intrinsics features. + public static void RunWithHwIntrinsicsFeature( + Action action, + HwIntrinsics intrinsics) + { + if (!RemoteExecutor.IsSupported) + { + return; + } + + foreach (KeyValuePair intrinsic in intrinsics.ToFeatureKeyValueCollection()) + { + var processStartInfo = new ProcessStartInfo(); + if (intrinsic.Key != HwIntrinsics.AllowAll) + { + processStartInfo.Environment[$"COMPlus_Enable{intrinsic}"] = "0"; + + RemoteExecutor.Invoke( + action, + intrinsic.Key.ToString(), + new RemoteInvokeOptions + { + StartInfo = processStartInfo + }) + .Dispose(); + } + else + { + // Since we are running using the default architecture there is no + // point creating the overhead of running the action in a separate process. + action(intrinsic.Key.ToString()); + } + } + } + + /// + /// Runs the given test within an environment + /// where the given features. + /// + /// The test action to run. + /// The intrinsics features. + /// The value to pass as a parameter to the test action. + public static void RunWithHwIntrinsicsFeature( + Action action, + HwIntrinsics intrinsics, + T serializable) + where T : IXunitSerializable + { + if (!RemoteExecutor.IsSupported) + { + return; + } + + foreach (KeyValuePair intrinsic in intrinsics.ToFeatureKeyValueCollection()) + { + var processStartInfo = new ProcessStartInfo(); + if (intrinsic.Key != HwIntrinsics.AllowAll) + { + processStartInfo.Environment[$"COMPlus_Enable{intrinsic}"] = "0"; + + RemoteExecutor.Invoke( + action, + BasicSerializer.Serialize(serializable), + new RemoteInvokeOptions + { + StartInfo = processStartInfo + }) + .Dispose(); + } + else + { + // Since we are running using the default architecture there is no + // point creating the overhead of running the action in a separate process. + action(BasicSerializer.Serialize(serializable)); + } + } + } + + /// + /// Runs the given test within an environment + /// where the given features. + /// + /// The test action to run. + /// The intrinsics features. + /// The value to pass as a parameter to the test action. + public static void RunWithHwIntrinsicsFeature( + Action action, + HwIntrinsics intrinsics, + T serializable) + where T : IXunitSerializable + { + if (!RemoteExecutor.IsSupported) + { + return; + } + + foreach (KeyValuePair intrinsic in intrinsics.ToFeatureKeyValueCollection()) + { + var processStartInfo = new ProcessStartInfo(); + if (intrinsic.Key != HwIntrinsics.AllowAll) + { + processStartInfo.Environment[$"COMPlus_Enable{intrinsic}"] = "0"; + + RemoteExecutor.Invoke( + action, + BasicSerializer.Serialize(serializable), + intrinsic.Key.ToString(), + new RemoteInvokeOptions + { + StartInfo = processStartInfo + }) + .Dispose(); + } + else + { + // Since we are running using the default architecture there is no + // point creating the overhead of running the action in a separate process. + action(BasicSerializer.Serialize(serializable), intrinsic.Key.ToString()); + } + } + } + + internal static Dictionary ToFeatureKeyValueCollection(this HwIntrinsics intrinsics) + { + // Loop through and translate the given values into COMPlus equivaluents + var features = new Dictionary(); + foreach (string intrinsic in intrinsics.ToString("G").Split(SplitChars, StringSplitOptions.RemoveEmptyEntries)) + { + var key = (HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic); + switch (intrinsic) + { + case nameof(HwIntrinsics.DisableSIMD): + features.Add(key, "FeatureSIMD"); + break; + + case nameof(HwIntrinsics.AllowAll): + + // Not a COMPlus value. We filter in calling method. + features.Add(key, nameof(HwIntrinsics.AllowAll)); + break; + + default: + features.Add(key, intrinsic.Replace("Disable", "Enable")); + break; + } + } + + return features; + } + } + + /// + /// See + /// + /// ends up impacting all SIMD support(including System.Numerics) + /// but not things like , , and . + /// + /// + [Flags] +#pragma warning disable RCS1135 // Declare enum member with zero value (when enum has FlagsAttribute). + public enum HwIntrinsics +#pragma warning restore RCS1135 // Declare enum member with zero value (when enum has FlagsAttribute). + { + // Use flags so we can pass multiple values without using params. + // Don't base on 0 or use inverse for All as that doesn't translate to string values. + DisableSIMD = 1 << 0, + DisableHWIntrinsic = 1 << 1, + DisableSSE = 1 << 2, + DisableSSE2 = 1 << 3, + DisableAES = 1 << 4, + DisablePCLMULQDQ = 1 << 5, + DisableSSE3 = 1 << 6, + DisableSSSE3 = 1 << 7, + DisableSSE41 = 1 << 8, + DisableSSE42 = 1 << 9, + DisablePOPCNT = 1 << 10, + DisableAVX = 1 << 11, + DisableFMA = 1 << 12, + DisableAVX2 = 1 << 13, + DisableBMI1 = 1 << 14, + DisableBMI2 = 1 << 15, + DisableLZCNT = 1 << 16, + AllowAll = 1 << 17 + } +} diff --git a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs new file mode 100644 index 0000000000..9852ba3478 --- /dev/null +++ b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs @@ -0,0 +1,237 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Numerics; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics.X86; +#endif +using Xunit; +using Xunit.Abstractions; + +namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests +{ + public class FeatureTestRunnerTests + { + public static TheoryData Intrinsics => + new TheoryData + { + { HwIntrinsics.DisableAES | HwIntrinsics.AllowAll, new string[] { "EnableAES", "AllowAll" } }, + { HwIntrinsics.DisableSIMD | HwIntrinsics.DisableHWIntrinsic, new string[] { "FeatureSIMD", "EnableHWIntrinsic" } }, + { HwIntrinsics.DisableSSE42 | HwIntrinsics.DisableAVX, new string[] { "EnableSSE42", "EnableAVX" } } + }; + + [Theory] + [MemberData(nameof(Intrinsics))] + public void ToFeatureCollectionReturnsExpectedResult(HwIntrinsics expectedItrinsics, string[] expectedValues) + { + Dictionary features = expectedItrinsics.ToFeatureKeyValueCollection(); + HwIntrinsics[] keys = features.Keys.ToArray(); + + HwIntrinsics actualIntrinsics = keys[0]; + for (int i = 1; i < keys.Length; i++) + { + actualIntrinsics |= keys[i]; + } + + Assert.Equal(expectedItrinsics, actualIntrinsics); + + IEnumerable actualValues = features.Select(x => x.Value); + Assert.Equal(expectedValues, actualValues); + } + + [Fact] + public void AllowsAllHwIntrinsicFeatures() + { + FeatureTestRunner.RunWithHwIntrinsicsFeature( + () => Assert.True(Vector.IsHardwareAccelerated), + HwIntrinsics.AllowAll); + } + + [Fact] + public void CanLimitHwIntrinsicFeatures() + { + FeatureTestRunner.RunWithHwIntrinsicsFeature( + () => Assert.False(Vector.IsHardwareAccelerated), + HwIntrinsics.DisableSIMD); + } + + [Fact] + public void CanLimitHwIntrinsicFeaturesWithIntrinsicsParam() + { + static void AssertHwIntrinsicsFeatureDisabled(string intrinsic) + { + Assert.NotNull(intrinsic); + + switch ((HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic)) + { + case HwIntrinsics.DisableSIMD: + Assert.False(Vector.IsHardwareAccelerated); + break; +#if SUPPORTS_RUNTIME_INTRINSICS + case HwIntrinsics.DisableHWIntrinsic: + Assert.False(Vector.IsHardwareAccelerated); + break; + case HwIntrinsics.DisableSSE: + Assert.False(Sse.IsSupported); + break; + case HwIntrinsics.DisableSSE2: + Assert.False(Sse2.IsSupported); + break; + case HwIntrinsics.DisableAES: + Assert.False(Aes.IsSupported); + break; + case HwIntrinsics.DisablePCLMULQDQ: + Assert.False(Pclmulqdq.IsSupported); + break; + case HwIntrinsics.DisableSSE3: + Assert.False(Sse3.IsSupported); + break; + case HwIntrinsics.DisableSSSE3: + Assert.False(Ssse3.IsSupported); + break; + case HwIntrinsics.DisableSSE41: + Assert.False(Sse41.IsSupported); + break; + case HwIntrinsics.DisableSSE42: + Assert.False(Sse42.IsSupported); + break; + case HwIntrinsics.DisablePOPCNT: + Assert.False(Popcnt.IsSupported); + break; + case HwIntrinsics.DisableAVX: + Assert.False(Avx.IsSupported); + break; + case HwIntrinsics.DisableFMA: + Assert.False(Fma.IsSupported); + break; + case HwIntrinsics.DisableAVX2: + Assert.False(Avx2.IsSupported); + break; + case HwIntrinsics.DisableBMI1: + Assert.False(Bmi1.IsSupported); + break; + case HwIntrinsics.DisableBMI2: + Assert.False(Bmi2.IsSupported); + break; + case HwIntrinsics.DisableLZCNT: + Assert.False(Lzcnt.IsSupported); + break; +#endif + } + } + + foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) + { + FeatureTestRunner.RunWithHwIntrinsicsFeature(AssertHwIntrinsicsFeatureDisabled, intrinsic); + } + } + + [Fact] + public void CanLimitHwIntrinsicFeaturesWithSerializableParam() + { + static void AssertHwIntrinsicsFeatureDisabled(string serializable) + { + Assert.NotNull(serializable); + Assert.NotNull(FeatureTestRunner.Deserialize(serializable)); + +#if SUPPORTS_RUNTIME_INTRINSICS + Assert.False(Sse.IsSupported); +#endif + } + + foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) + { + FeatureTestRunner.RunWithHwIntrinsicsFeature( + AssertHwIntrinsicsFeatureDisabled, + HwIntrinsics.DisableSSE, + new FakeSerializable()); + } + } + + [Fact] + public void CanLimitHwIntrinsicFeaturesWithSerializableAndIntrinsicsParams() + { + static void AssertHwIntrinsicsFeatureDisabled(string serializable, string intrinsic) + { + Assert.NotNull(serializable); + Assert.NotNull(FeatureTestRunner.Deserialize(serializable)); + + switch ((HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic)) + { + case HwIntrinsics.DisableSIMD: + Assert.False(Vector.IsHardwareAccelerated); + break; +#if SUPPORTS_RUNTIME_INTRINSICS + case HwIntrinsics.DisableHWIntrinsic: + Assert.False(Vector.IsHardwareAccelerated); + break; + case HwIntrinsics.DisableSSE: + Assert.False(Sse.IsSupported); + break; + case HwIntrinsics.DisableSSE2: + Assert.False(Sse2.IsSupported); + break; + case HwIntrinsics.DisableAES: + Assert.False(Aes.IsSupported); + break; + case HwIntrinsics.DisablePCLMULQDQ: + Assert.False(Pclmulqdq.IsSupported); + break; + case HwIntrinsics.DisableSSE3: + Assert.False(Sse3.IsSupported); + break; + case HwIntrinsics.DisableSSSE3: + Assert.False(Ssse3.IsSupported); + break; + case HwIntrinsics.DisableSSE41: + Assert.False(Sse41.IsSupported); + break; + case HwIntrinsics.DisableSSE42: + Assert.False(Sse42.IsSupported); + break; + case HwIntrinsics.DisablePOPCNT: + Assert.False(Popcnt.IsSupported); + break; + case HwIntrinsics.DisableAVX: + Assert.False(Avx.IsSupported); + break; + case HwIntrinsics.DisableFMA: + Assert.False(Fma.IsSupported); + break; + case HwIntrinsics.DisableAVX2: + Assert.False(Avx2.IsSupported); + break; + case HwIntrinsics.DisableBMI1: + Assert.False(Bmi1.IsSupported); + break; + case HwIntrinsics.DisableBMI2: + Assert.False(Bmi2.IsSupported); + break; + case HwIntrinsics.DisableLZCNT: + Assert.False(Lzcnt.IsSupported); + break; +#endif + } + } + + foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) + { + FeatureTestRunner.RunWithHwIntrinsicsFeature(AssertHwIntrinsicsFeatureDisabled, intrinsic, new FakeSerializable()); + } + } + + public class FakeSerializable : IXunitSerializable + { + public void Deserialize(IXunitSerializationInfo info) + { + } + + public void Serialize(IXunitSerializationInfo info) + { + } + } + } +} From 9380dd5b1e3487837c9494247a5bf8e6cc548c66 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 15 Oct 2020 21:36:51 +0100 Subject: [PATCH 042/104] Use single Block8x8F.TransponseInto test --- .../Formats/Jpg/Block8x8FTests.cs | 48 +++++++------------ 1 file changed, 17 insertions(+), 31 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs index 73a68063c0..5482380885 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs @@ -5,10 +5,9 @@ // #define BENCHMARKING using System; using System.Diagnostics; - using SixLabors.ImageSharp.Formats.Jpeg.Components; using SixLabors.ImageSharp.Tests.Formats.Jpg.Utils; - +using SixLabors.ImageSharp.Tests.TestUtilities; using Xunit; using Xunit.Abstractions; @@ -163,42 +162,29 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg } [Fact] - public void TransposeIntoFallback() + public void TransposeInto() { - float[] expected = Create8x8FloatData(); - ReferenceImplementations.Transpose8x8(expected); - - var source = default(Block8x8F); - source.LoadFrom(Create8x8FloatData()); - - var dest = default(Block8x8F); - source.TransposeIntoFallback(ref dest); - - float[] actual = new float[64]; - dest.ScaledCopyTo(actual); - - Assert.Equal(expected, actual); - } + static void RunTest() + { + float[] expected = Create8x8FloatData(); + ReferenceImplementations.Transpose8x8(expected); -#if SUPPORTS_RUNTIME_INTRINSICS - [Fact] - public void TransposeIntoAvx() - { - float[] expected = Create8x8FloatData(); - ReferenceImplementations.Transpose8x8(expected); + var source = default(Block8x8F); + source.LoadFrom(Create8x8FloatData()); - var source = default(Block8x8F); - source.LoadFrom(Create8x8FloatData()); + var dest = default(Block8x8F); + source.TransposeInto(ref dest); - var dest = default(Block8x8F); - source.TransposeIntoAvx(ref dest); + float[] actual = new float[64]; + dest.ScaledCopyTo(actual); - float[] actual = new float[64]; - dest.ScaledCopyTo(actual); + Assert.Equal(expected, actual); + } - Assert.Equal(expected, actual); + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX); } -#endif private class BufferHolder { From 86198504ee6d3ea9841be70e638d8f307fca4e94 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 15 Oct 2020 21:58:24 +0100 Subject: [PATCH 043/104] Ensure remoteexecutor tests run --- .github/workflows/build-and-test.yml | 32 +++++++++---------- .../Tests/FeatureTestRunnerTests.cs | 8 +++++ 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index c8f3997946..ecb5ceb0ef 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -17,23 +17,23 @@ jobs: - os: ubuntu-latest framework: netcoreapp3.1 runtime: -x64 - codecov: true - - os: windows-latest - framework: netcoreapp3.1 - runtime: -x64 - codecov: false - - os: windows-latest - framework: netcoreapp2.1 - runtime: -x64 - codecov: false - - os: windows-latest - framework: net472 - runtime: -x64 - codecov: false - - os: windows-latest - framework: net472 - runtime: -x86 codecov: false + # - os: windows-latest + # framework: netcoreapp3.1 + # runtime: -x64 + # codecov: false + # - os: windows-latest + # framework: netcoreapp2.1 + # runtime: -x64 + # codecov: false + # - os: windows-latest + # framework: net472 + # runtime: -x64 + # codecov: false + # - os: windows-latest + # framework: net472 + # runtime: -x86 + # codecov: false runs-on: ${{matrix.options.os}} if: "!contains(github.event.head_commit.message, '[skip ci]')" diff --git a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs index 9852ba3478..070f205748 100644 --- a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs +++ b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs @@ -23,6 +23,14 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests { HwIntrinsics.DisableSSE42 | HwIntrinsics.DisableAVX, new string[] { "EnableSSE42", "EnableAVX" } } }; + [Fact] + public void TempAssertThrow() + { + FeatureTestRunner.RunWithHwIntrinsicsFeature( + () => Assert.True(false), + HwIntrinsics.DisableAVX); + } + [Theory] [MemberData(nameof(Intrinsics))] public void ToFeatureCollectionReturnsExpectedResult(HwIntrinsics expectedItrinsics, string[] expectedValues) From d3208ad354b14a253f217c1bf79c2271beb36b5e Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 15 Oct 2020 22:12:40 +0100 Subject: [PATCH 044/104] Increment tests --- .../Tests/FeatureTestRunnerTests.cs | 340 +++++++++--------- 1 file changed, 166 insertions(+), 174 deletions(-) diff --git a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs index 070f205748..023679b2e0 100644 --- a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs +++ b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs @@ -23,14 +23,6 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests { HwIntrinsics.DisableSSE42 | HwIntrinsics.DisableAVX, new string[] { "EnableSSE42", "EnableAVX" } } }; - [Fact] - public void TempAssertThrow() - { - FeatureTestRunner.RunWithHwIntrinsicsFeature( - () => Assert.True(false), - HwIntrinsics.DisableAVX); - } - [Theory] [MemberData(nameof(Intrinsics))] public void ToFeatureCollectionReturnsExpectedResult(HwIntrinsics expectedItrinsics, string[] expectedValues) @@ -54,7 +46,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests public void AllowsAllHwIntrinsicFeatures() { FeatureTestRunner.RunWithHwIntrinsicsFeature( - () => Assert.True(Vector.IsHardwareAccelerated), + () => Assert.True(Vector.IsHardwareAccelerated, nameof(Vector.IsHardwareAccelerated)), HwIntrinsics.AllowAll); } @@ -62,174 +54,174 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests public void CanLimitHwIntrinsicFeatures() { FeatureTestRunner.RunWithHwIntrinsicsFeature( - () => Assert.False(Vector.IsHardwareAccelerated), + () => Assert.False(Vector.IsHardwareAccelerated, nameof(Vector.IsHardwareAccelerated)), HwIntrinsics.DisableSIMD); } - [Fact] - public void CanLimitHwIntrinsicFeaturesWithIntrinsicsParam() - { - static void AssertHwIntrinsicsFeatureDisabled(string intrinsic) - { - Assert.NotNull(intrinsic); - - switch ((HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic)) - { - case HwIntrinsics.DisableSIMD: - Assert.False(Vector.IsHardwareAccelerated); - break; -#if SUPPORTS_RUNTIME_INTRINSICS - case HwIntrinsics.DisableHWIntrinsic: - Assert.False(Vector.IsHardwareAccelerated); - break; - case HwIntrinsics.DisableSSE: - Assert.False(Sse.IsSupported); - break; - case HwIntrinsics.DisableSSE2: - Assert.False(Sse2.IsSupported); - break; - case HwIntrinsics.DisableAES: - Assert.False(Aes.IsSupported); - break; - case HwIntrinsics.DisablePCLMULQDQ: - Assert.False(Pclmulqdq.IsSupported); - break; - case HwIntrinsics.DisableSSE3: - Assert.False(Sse3.IsSupported); - break; - case HwIntrinsics.DisableSSSE3: - Assert.False(Ssse3.IsSupported); - break; - case HwIntrinsics.DisableSSE41: - Assert.False(Sse41.IsSupported); - break; - case HwIntrinsics.DisableSSE42: - Assert.False(Sse42.IsSupported); - break; - case HwIntrinsics.DisablePOPCNT: - Assert.False(Popcnt.IsSupported); - break; - case HwIntrinsics.DisableAVX: - Assert.False(Avx.IsSupported); - break; - case HwIntrinsics.DisableFMA: - Assert.False(Fma.IsSupported); - break; - case HwIntrinsics.DisableAVX2: - Assert.False(Avx2.IsSupported); - break; - case HwIntrinsics.DisableBMI1: - Assert.False(Bmi1.IsSupported); - break; - case HwIntrinsics.DisableBMI2: - Assert.False(Bmi2.IsSupported); - break; - case HwIntrinsics.DisableLZCNT: - Assert.False(Lzcnt.IsSupported); - break; -#endif - } - } - - foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) - { - FeatureTestRunner.RunWithHwIntrinsicsFeature(AssertHwIntrinsicsFeatureDisabled, intrinsic); - } - } - - [Fact] - public void CanLimitHwIntrinsicFeaturesWithSerializableParam() - { - static void AssertHwIntrinsicsFeatureDisabled(string serializable) - { - Assert.NotNull(serializable); - Assert.NotNull(FeatureTestRunner.Deserialize(serializable)); - -#if SUPPORTS_RUNTIME_INTRINSICS - Assert.False(Sse.IsSupported); -#endif - } - - foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) - { - FeatureTestRunner.RunWithHwIntrinsicsFeature( - AssertHwIntrinsicsFeatureDisabled, - HwIntrinsics.DisableSSE, - new FakeSerializable()); - } - } - - [Fact] - public void CanLimitHwIntrinsicFeaturesWithSerializableAndIntrinsicsParams() - { - static void AssertHwIntrinsicsFeatureDisabled(string serializable, string intrinsic) - { - Assert.NotNull(serializable); - Assert.NotNull(FeatureTestRunner.Deserialize(serializable)); - - switch ((HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic)) - { - case HwIntrinsics.DisableSIMD: - Assert.False(Vector.IsHardwareAccelerated); - break; -#if SUPPORTS_RUNTIME_INTRINSICS - case HwIntrinsics.DisableHWIntrinsic: - Assert.False(Vector.IsHardwareAccelerated); - break; - case HwIntrinsics.DisableSSE: - Assert.False(Sse.IsSupported); - break; - case HwIntrinsics.DisableSSE2: - Assert.False(Sse2.IsSupported); - break; - case HwIntrinsics.DisableAES: - Assert.False(Aes.IsSupported); - break; - case HwIntrinsics.DisablePCLMULQDQ: - Assert.False(Pclmulqdq.IsSupported); - break; - case HwIntrinsics.DisableSSE3: - Assert.False(Sse3.IsSupported); - break; - case HwIntrinsics.DisableSSSE3: - Assert.False(Ssse3.IsSupported); - break; - case HwIntrinsics.DisableSSE41: - Assert.False(Sse41.IsSupported); - break; - case HwIntrinsics.DisableSSE42: - Assert.False(Sse42.IsSupported); - break; - case HwIntrinsics.DisablePOPCNT: - Assert.False(Popcnt.IsSupported); - break; - case HwIntrinsics.DisableAVX: - Assert.False(Avx.IsSupported); - break; - case HwIntrinsics.DisableFMA: - Assert.False(Fma.IsSupported); - break; - case HwIntrinsics.DisableAVX2: - Assert.False(Avx2.IsSupported); - break; - case HwIntrinsics.DisableBMI1: - Assert.False(Bmi1.IsSupported); - break; - case HwIntrinsics.DisableBMI2: - Assert.False(Bmi2.IsSupported); - break; - case HwIntrinsics.DisableLZCNT: - Assert.False(Lzcnt.IsSupported); - break; -#endif - } - } - - foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) - { - FeatureTestRunner.RunWithHwIntrinsicsFeature(AssertHwIntrinsicsFeatureDisabled, intrinsic, new FakeSerializable()); - } - } + // [Fact] + // public void CanLimitHwIntrinsicFeaturesWithIntrinsicsParam() + // { + // static void AssertHwIntrinsicsFeatureDisabled(string intrinsic) + // { + // Assert.NotNull(intrinsic); + + // switch ((HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic)) + // { + // case HwIntrinsics.DisableSIMD: + // Assert.False(Vector.IsHardwareAccelerated); + // break; + //#if SUPPORTS_RUNTIME_INTRINSICS + // case HwIntrinsics.DisableHWIntrinsic: + // Assert.False(Vector.IsHardwareAccelerated); + // break; + // case HwIntrinsics.DisableSSE: + // Assert.False(Sse.IsSupported); + // break; + // case HwIntrinsics.DisableSSE2: + // Assert.False(Sse2.IsSupported); + // break; + // case HwIntrinsics.DisableAES: + // Assert.False(Aes.IsSupported); + // break; + // case HwIntrinsics.DisablePCLMULQDQ: + // Assert.False(Pclmulqdq.IsSupported); + // break; + // case HwIntrinsics.DisableSSE3: + // Assert.False(Sse3.IsSupported); + // break; + // case HwIntrinsics.DisableSSSE3: + // Assert.False(Ssse3.IsSupported); + // break; + // case HwIntrinsics.DisableSSE41: + // Assert.False(Sse41.IsSupported); + // break; + // case HwIntrinsics.DisableSSE42: + // Assert.False(Sse42.IsSupported); + // break; + // case HwIntrinsics.DisablePOPCNT: + // Assert.False(Popcnt.IsSupported); + // break; + // case HwIntrinsics.DisableAVX: + // Assert.False(Avx.IsSupported); + // break; + // case HwIntrinsics.DisableFMA: + // Assert.False(Fma.IsSupported); + // break; + // case HwIntrinsics.DisableAVX2: + // Assert.False(Avx2.IsSupported); + // break; + // case HwIntrinsics.DisableBMI1: + // Assert.False(Bmi1.IsSupported); + // break; + // case HwIntrinsics.DisableBMI2: + // Assert.False(Bmi2.IsSupported); + // break; + // case HwIntrinsics.DisableLZCNT: + // Assert.False(Lzcnt.IsSupported); + // break; + //#endif + // } + // } + + // foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) + // { + // FeatureTestRunner.RunWithHwIntrinsicsFeature(AssertHwIntrinsicsFeatureDisabled, intrinsic); + // } + // } + + // [Fact] + // public void CanLimitHwIntrinsicFeaturesWithSerializableParam() + // { + // static void AssertHwIntrinsicsFeatureDisabled(string serializable) + // { + // Assert.NotNull(serializable); + // Assert.NotNull(FeatureTestRunner.Deserialize(serializable)); + + //#if SUPPORTS_RUNTIME_INTRINSICS + // Assert.False(Sse.IsSupported); + //#endif + // } + + // foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) + // { + // FeatureTestRunner.RunWithHwIntrinsicsFeature( + // AssertHwIntrinsicsFeatureDisabled, + // HwIntrinsics.DisableSSE, + // new FakeSerializable()); + // } + // } + + // [Fact] + // public void CanLimitHwIntrinsicFeaturesWithSerializableAndIntrinsicsParams() + // { + // static void AssertHwIntrinsicsFeatureDisabled(string serializable, string intrinsic) + // { + // Assert.NotNull(serializable); + // Assert.NotNull(FeatureTestRunner.Deserialize(serializable)); + + // switch ((HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic)) + // { + // case HwIntrinsics.DisableSIMD: + // Assert.False(Vector.IsHardwareAccelerated, nameof(Vector.IsHardwareAccelerated)); + // break; + //#if SUPPORTS_RUNTIME_INTRINSICS + // case HwIntrinsics.DisableHWIntrinsic: + // Assert.False(Vector.IsHardwareAccelerated, nameof(Vector.IsHardwareAccelerated)); + // break; + // case HwIntrinsics.DisableSSE: + // Assert.False(Sse.IsSupported); + // break; + // case HwIntrinsics.DisableSSE2: + // Assert.False(Sse2.IsSupported); + // break; + // case HwIntrinsics.DisableAES: + // Assert.False(Aes.IsSupported); + // break; + // case HwIntrinsics.DisablePCLMULQDQ: + // Assert.False(Pclmulqdq.IsSupported); + // break; + // case HwIntrinsics.DisableSSE3: + // Assert.False(Sse3.IsSupported); + // break; + // case HwIntrinsics.DisableSSSE3: + // Assert.False(Ssse3.IsSupported); + // break; + // case HwIntrinsics.DisableSSE41: + // Assert.False(Sse41.IsSupported); + // break; + // case HwIntrinsics.DisableSSE42: + // Assert.False(Sse42.IsSupported); + // break; + // case HwIntrinsics.DisablePOPCNT: + // Assert.False(Popcnt.IsSupported); + // break; + // case HwIntrinsics.DisableAVX: + // Assert.False(Avx.IsSupported); + // break; + // case HwIntrinsics.DisableFMA: + // Assert.False(Fma.IsSupported); + // break; + // case HwIntrinsics.DisableAVX2: + // Assert.False(Avx2.IsSupported); + // break; + // case HwIntrinsics.DisableBMI1: + // Assert.False(Bmi1.IsSupported); + // break; + // case HwIntrinsics.DisableBMI2: + // Assert.False(Bmi2.IsSupported); + // break; + // case HwIntrinsics.DisableLZCNT: + // Assert.False(Lzcnt.IsSupported); + // break; + //#endif + // } + // } + + // foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) + // { + // FeatureTestRunner.RunWithHwIntrinsicsFeature(AssertHwIntrinsicsFeatureDisabled, intrinsic, new FakeSerializable()); + // } + // } public class FakeSerializable : IXunitSerializable { From 5e53e8363d7d5f27bfa07ac561b5a4f63dbaf95e Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 15 Oct 2020 22:22:08 +0100 Subject: [PATCH 045/104] Fix COMPlus environmental parameters --- .../TestUtilities/FeatureTesting/FeatureTestRunner.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs index 57ed85a182..a053471394 100644 --- a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs +++ b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs @@ -53,7 +53,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities var processStartInfo = new ProcessStartInfo(); if (intrinsic.Key != HwIntrinsics.AllowAll) { - processStartInfo.Environment[$"COMPlus_Enable{intrinsic}"] = "0"; + processStartInfo.Environment[$"COMPlus_{intrinsic}"] = "0"; RemoteExecutor.Invoke( action, @@ -94,7 +94,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities var processStartInfo = new ProcessStartInfo(); if (intrinsic.Key != HwIntrinsics.AllowAll) { - processStartInfo.Environment[$"COMPlus_Enable{intrinsic}"] = "0"; + processStartInfo.Environment[$"COMPlus_{intrinsic}"] = "0"; RemoteExecutor.Invoke( action, @@ -137,7 +137,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities var processStartInfo = new ProcessStartInfo(); if (intrinsic.Key != HwIntrinsics.AllowAll) { - processStartInfo.Environment[$"COMPlus_Enable{intrinsic}"] = "0"; + processStartInfo.Environment[$"COMPlus_{intrinsic}"] = "0"; RemoteExecutor.Invoke( action, @@ -180,7 +180,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities var processStartInfo = new ProcessStartInfo(); if (intrinsic.Key != HwIntrinsics.AllowAll) { - processStartInfo.Environment[$"COMPlus_Enable{intrinsic}"] = "0"; + processStartInfo.Environment[$"COMPlus_{intrinsic}"] = "0"; RemoteExecutor.Invoke( action, From e794901bf6cf802f90d9c5fd8dac9e4bf0f4272d Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 15 Oct 2020 22:30:11 +0100 Subject: [PATCH 046/104] Test another property to be sure --- .../TestUtilities/Tests/FeatureTestRunnerTests.cs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs index 023679b2e0..13a0806129 100644 --- a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs +++ b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs @@ -50,13 +50,15 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests HwIntrinsics.AllowAll); } +#if SUPPORTS_RUNTIME_INTRINSICS [Fact] public void CanLimitHwIntrinsicFeatures() { FeatureTestRunner.RunWithHwIntrinsicsFeature( - () => Assert.False(Vector.IsHardwareAccelerated, nameof(Vector.IsHardwareAccelerated)), - HwIntrinsics.DisableSIMD); + () => Assert.False(Sse.IsSupported, nameof(Sse.IsSupported)), + HwIntrinsics.DisableSSE); } +#endif // [Fact] // public void CanLimitHwIntrinsicFeaturesWithIntrinsicsParam() From 54836a623bf27c2b97e8abbc159cd60fe077dbfe Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 15 Oct 2020 22:44:35 +0100 Subject: [PATCH 047/104] Fix COMPlus value properly. --- .../TestUtilities/FeatureTesting/FeatureTestRunner.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs index a053471394..eb1714baad 100644 --- a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs +++ b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs @@ -53,7 +53,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities var processStartInfo = new ProcessStartInfo(); if (intrinsic.Key != HwIntrinsics.AllowAll) { - processStartInfo.Environment[$"COMPlus_{intrinsic}"] = "0"; + processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; RemoteExecutor.Invoke( action, @@ -94,7 +94,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities var processStartInfo = new ProcessStartInfo(); if (intrinsic.Key != HwIntrinsics.AllowAll) { - processStartInfo.Environment[$"COMPlus_{intrinsic}"] = "0"; + processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; RemoteExecutor.Invoke( action, @@ -137,7 +137,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities var processStartInfo = new ProcessStartInfo(); if (intrinsic.Key != HwIntrinsics.AllowAll) { - processStartInfo.Environment[$"COMPlus_{intrinsic}"] = "0"; + processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; RemoteExecutor.Invoke( action, @@ -180,7 +180,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities var processStartInfo = new ProcessStartInfo(); if (intrinsic.Key != HwIntrinsics.AllowAll) { - processStartInfo.Environment[$"COMPlus_{intrinsic}"] = "0"; + processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; RemoteExecutor.Invoke( action, From 13b570dd02c57caa059fdcb169124a26d641dae6 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 15 Oct 2020 22:50:22 +0100 Subject: [PATCH 048/104] Test FeatureSIMD --- .../TestUtilities/Tests/FeatureTestRunnerTests.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs index 13a0806129..eb6f82885f 100644 --- a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs +++ b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs @@ -55,8 +55,8 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests public void CanLimitHwIntrinsicFeatures() { FeatureTestRunner.RunWithHwIntrinsicsFeature( - () => Assert.False(Sse.IsSupported, nameof(Sse.IsSupported)), - HwIntrinsics.DisableSSE); + () => Assert.False(Vector.IsHardwareAccelerated, nameof(Vector.IsHardwareAccelerated)), + HwIntrinsics.DisableSIMD); } #endif From b90cac23dfa8214aa2ffb91e73326d2741984f12 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 15 Oct 2020 22:55:46 +0100 Subject: [PATCH 049/104] Now test both disable values. --- .../TestUtilities/Tests/FeatureTestRunnerTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs index eb6f82885f..0f6cdc066a 100644 --- a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs +++ b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs @@ -56,7 +56,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests { FeatureTestRunner.RunWithHwIntrinsicsFeature( () => Assert.False(Vector.IsHardwareAccelerated, nameof(Vector.IsHardwareAccelerated)), - HwIntrinsics.DisableSIMD); + HwIntrinsics.DisableSIMD | HwIntrinsics.DisableHWIntrinsic); } #endif From 54ea7d669099b55d1421b9bf94e141958d02e6d6 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 15 Oct 2020 23:08:27 +0100 Subject: [PATCH 050/104] Split base disabled checks --- .../Tests/FeatureTestRunnerTests.cs | 33 +++++++++++++++++-- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs index 0f6cdc066a..8eb5c9fd62 100644 --- a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs +++ b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs @@ -50,13 +50,40 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests HwIntrinsics.AllowAll); } -#if SUPPORTS_RUNTIME_INTRINSICS [Fact] - public void CanLimitHwIntrinsicFeatures() + public void CanLimitHwIntrinsicSIMDFeatures() { FeatureTestRunner.RunWithHwIntrinsicsFeature( () => Assert.False(Vector.IsHardwareAccelerated, nameof(Vector.IsHardwareAccelerated)), - HwIntrinsics.DisableSIMD | HwIntrinsics.DisableHWIntrinsic); + HwIntrinsics.DisableSIMD); + } + +#if SUPPORTS_RUNTIME_INTRINSICS + [Fact] + public void CanLimitHwIntrinsicBaseFeatures() + { + static void AssertDisabled() + { + Assert.False(Sse.IsSupported); + Assert.False(Sse2.IsSupported); + Assert.False(Aes.IsSupported); + Assert.False(Pclmulqdq.IsSupported); + Assert.False(Sse3.IsSupported); + Assert.False(Ssse3.IsSupported); + Assert.False(Sse41.IsSupported); + Assert.False(Sse42.IsSupported); + Assert.False(Popcnt.IsSupported); + Assert.False(Avx.IsSupported); + Assert.False(Fma.IsSupported); + Assert.False(Avx2.IsSupported); + Assert.False(Bmi1.IsSupported); + Assert.False(Bmi2.IsSupported); + Assert.False(Lzcnt.IsSupported); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + AssertDisabled, + HwIntrinsics.DisableHWIntrinsic); } #endif From 911d28993cfd3a30304afb2b9e327f941a8afc35 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 15 Oct 2020 23:16:00 +0100 Subject: [PATCH 051/104] Enable additional tests --- .../Tests/FeatureTestRunnerTests.cs | 341 ++++++++++-------- 1 file changed, 183 insertions(+), 158 deletions(-) diff --git a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs index 8eb5c9fd62..eea22592bc 100644 --- a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs +++ b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs @@ -46,7 +46,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests public void AllowsAllHwIntrinsicFeatures() { FeatureTestRunner.RunWithHwIntrinsicsFeature( - () => Assert.True(Vector.IsHardwareAccelerated, nameof(Vector.IsHardwareAccelerated)), + () => Assert.True(Vector.IsHardwareAccelerated), HwIntrinsics.AllowAll); } @@ -54,7 +54,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests public void CanLimitHwIntrinsicSIMDFeatures() { FeatureTestRunner.RunWithHwIntrinsicsFeature( - () => Assert.False(Vector.IsHardwareAccelerated, nameof(Vector.IsHardwareAccelerated)), + () => Assert.False(Vector.IsHardwareAccelerated), HwIntrinsics.DisableSIMD); } @@ -87,170 +87,195 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests } #endif - // [Fact] - // public void CanLimitHwIntrinsicFeaturesWithIntrinsicsParam() - // { - // static void AssertHwIntrinsicsFeatureDisabled(string intrinsic) - // { - // Assert.NotNull(intrinsic); + [Fact] + public void CanLimitHwIntrinsicFeaturesWithIntrinsicsParam() + { + static void AssertHwIntrinsicsFeatureDisabled(string intrinsic) + { + Assert.NotNull(intrinsic); - // switch ((HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic)) - // { - // case HwIntrinsics.DisableSIMD: - // Assert.False(Vector.IsHardwareAccelerated); - // break; - //#if SUPPORTS_RUNTIME_INTRINSICS - // case HwIntrinsics.DisableHWIntrinsic: - // Assert.False(Vector.IsHardwareAccelerated); - // break; - // case HwIntrinsics.DisableSSE: - // Assert.False(Sse.IsSupported); - // break; - // case HwIntrinsics.DisableSSE2: - // Assert.False(Sse2.IsSupported); - // break; - // case HwIntrinsics.DisableAES: - // Assert.False(Aes.IsSupported); - // break; - // case HwIntrinsics.DisablePCLMULQDQ: - // Assert.False(Pclmulqdq.IsSupported); - // break; - // case HwIntrinsics.DisableSSE3: - // Assert.False(Sse3.IsSupported); - // break; - // case HwIntrinsics.DisableSSSE3: - // Assert.False(Ssse3.IsSupported); - // break; - // case HwIntrinsics.DisableSSE41: - // Assert.False(Sse41.IsSupported); - // break; - // case HwIntrinsics.DisableSSE42: - // Assert.False(Sse42.IsSupported); - // break; - // case HwIntrinsics.DisablePOPCNT: - // Assert.False(Popcnt.IsSupported); - // break; - // case HwIntrinsics.DisableAVX: - // Assert.False(Avx.IsSupported); - // break; - // case HwIntrinsics.DisableFMA: - // Assert.False(Fma.IsSupported); - // break; - // case HwIntrinsics.DisableAVX2: - // Assert.False(Avx2.IsSupported); - // break; - // case HwIntrinsics.DisableBMI1: - // Assert.False(Bmi1.IsSupported); - // break; - // case HwIntrinsics.DisableBMI2: - // Assert.False(Bmi2.IsSupported); - // break; - // case HwIntrinsics.DisableLZCNT: - // Assert.False(Lzcnt.IsSupported); - // break; - //#endif - // } - // } + switch ((HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic)) + { + case HwIntrinsics.DisableSIMD: + Assert.False(Vector.IsHardwareAccelerated); + break; +#if SUPPORTS_RUNTIME_INTRINSICS + case HwIntrinsics.DisableHWIntrinsic: + Assert.False(Sse.IsSupported); + Assert.False(Sse2.IsSupported); + Assert.False(Aes.IsSupported); + Assert.False(Pclmulqdq.IsSupported); + Assert.False(Sse3.IsSupported); + Assert.False(Ssse3.IsSupported); + Assert.False(Sse41.IsSupported); + Assert.False(Sse42.IsSupported); + Assert.False(Popcnt.IsSupported); + Assert.False(Avx.IsSupported); + Assert.False(Fma.IsSupported); + Assert.False(Avx2.IsSupported); + Assert.False(Bmi1.IsSupported); + Assert.False(Bmi2.IsSupported); + Assert.False(Lzcnt.IsSupported); + break; + case HwIntrinsics.DisableSSE: + Assert.False(Sse.IsSupported); + break; + case HwIntrinsics.DisableSSE2: + Assert.False(Sse2.IsSupported); + break; + case HwIntrinsics.DisableAES: + Assert.False(Aes.IsSupported); + break; + case HwIntrinsics.DisablePCLMULQDQ: + Assert.False(Pclmulqdq.IsSupported); + break; + case HwIntrinsics.DisableSSE3: + Assert.False(Sse3.IsSupported); + break; + case HwIntrinsics.DisableSSSE3: + Assert.False(Ssse3.IsSupported); + break; + case HwIntrinsics.DisableSSE41: + Assert.False(Sse41.IsSupported); + break; + case HwIntrinsics.DisableSSE42: + Assert.False(Sse42.IsSupported); + break; + case HwIntrinsics.DisablePOPCNT: + Assert.False(Popcnt.IsSupported); + break; + case HwIntrinsics.DisableAVX: + Assert.False(Avx.IsSupported); + break; + case HwIntrinsics.DisableFMA: + Assert.False(Fma.IsSupported); + break; + case HwIntrinsics.DisableAVX2: + Assert.False(Avx2.IsSupported); + break; + case HwIntrinsics.DisableBMI1: + Assert.False(Bmi1.IsSupported); + break; + case HwIntrinsics.DisableBMI2: + Assert.False(Bmi2.IsSupported); + break; + case HwIntrinsics.DisableLZCNT: + Assert.False(Lzcnt.IsSupported); + break; +#endif + } + } - // foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) - // { - // FeatureTestRunner.RunWithHwIntrinsicsFeature(AssertHwIntrinsicsFeatureDisabled, intrinsic); - // } - // } + foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) + { + FeatureTestRunner.RunWithHwIntrinsicsFeature(AssertHwIntrinsicsFeatureDisabled, intrinsic); + } + } - // [Fact] - // public void CanLimitHwIntrinsicFeaturesWithSerializableParam() - // { - // static void AssertHwIntrinsicsFeatureDisabled(string serializable) - // { - // Assert.NotNull(serializable); - // Assert.NotNull(FeatureTestRunner.Deserialize(serializable)); + [Fact] + public void CanLimitHwIntrinsicFeaturesWithSerializableParam() + { + static void AssertHwIntrinsicsFeatureDisabled(string serializable) + { + Assert.NotNull(serializable); + Assert.NotNull(FeatureTestRunner.Deserialize(serializable)); - //#if SUPPORTS_RUNTIME_INTRINSICS - // Assert.False(Sse.IsSupported); - //#endif - // } +#if SUPPORTS_RUNTIME_INTRINSICS + Assert.False(Sse.IsSupported); +#endif + } - // foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) - // { - // FeatureTestRunner.RunWithHwIntrinsicsFeature( - // AssertHwIntrinsicsFeatureDisabled, - // HwIntrinsics.DisableSSE, - // new FakeSerializable()); - // } - // } + FeatureTestRunner.RunWithHwIntrinsicsFeature( + AssertHwIntrinsicsFeatureDisabled, + HwIntrinsics.DisableSSE, + new FakeSerializable()); + } - // [Fact] - // public void CanLimitHwIntrinsicFeaturesWithSerializableAndIntrinsicsParams() - // { - // static void AssertHwIntrinsicsFeatureDisabled(string serializable, string intrinsic) - // { - // Assert.NotNull(serializable); - // Assert.NotNull(FeatureTestRunner.Deserialize(serializable)); + [Fact] + public void CanLimitHwIntrinsicFeaturesWithSerializableAndIntrinsicsParams() + { + static void AssertHwIntrinsicsFeatureDisabled(string serializable, string intrinsic) + { + Assert.NotNull(serializable); + Assert.NotNull(FeatureTestRunner.Deserialize(serializable)); - // switch ((HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic)) - // { - // case HwIntrinsics.DisableSIMD: - // Assert.False(Vector.IsHardwareAccelerated, nameof(Vector.IsHardwareAccelerated)); - // break; - //#if SUPPORTS_RUNTIME_INTRINSICS - // case HwIntrinsics.DisableHWIntrinsic: - // Assert.False(Vector.IsHardwareAccelerated, nameof(Vector.IsHardwareAccelerated)); - // break; - // case HwIntrinsics.DisableSSE: - // Assert.False(Sse.IsSupported); - // break; - // case HwIntrinsics.DisableSSE2: - // Assert.False(Sse2.IsSupported); - // break; - // case HwIntrinsics.DisableAES: - // Assert.False(Aes.IsSupported); - // break; - // case HwIntrinsics.DisablePCLMULQDQ: - // Assert.False(Pclmulqdq.IsSupported); - // break; - // case HwIntrinsics.DisableSSE3: - // Assert.False(Sse3.IsSupported); - // break; - // case HwIntrinsics.DisableSSSE3: - // Assert.False(Ssse3.IsSupported); - // break; - // case HwIntrinsics.DisableSSE41: - // Assert.False(Sse41.IsSupported); - // break; - // case HwIntrinsics.DisableSSE42: - // Assert.False(Sse42.IsSupported); - // break; - // case HwIntrinsics.DisablePOPCNT: - // Assert.False(Popcnt.IsSupported); - // break; - // case HwIntrinsics.DisableAVX: - // Assert.False(Avx.IsSupported); - // break; - // case HwIntrinsics.DisableFMA: - // Assert.False(Fma.IsSupported); - // break; - // case HwIntrinsics.DisableAVX2: - // Assert.False(Avx2.IsSupported); - // break; - // case HwIntrinsics.DisableBMI1: - // Assert.False(Bmi1.IsSupported); - // break; - // case HwIntrinsics.DisableBMI2: - // Assert.False(Bmi2.IsSupported); - // break; - // case HwIntrinsics.DisableLZCNT: - // Assert.False(Lzcnt.IsSupported); - // break; - //#endif - // } - // } + switch ((HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic)) + { + case HwIntrinsics.DisableSIMD: + Assert.False(Vector.IsHardwareAccelerated, nameof(Vector.IsHardwareAccelerated)); + break; +#if SUPPORTS_RUNTIME_INTRINSICS + case HwIntrinsics.DisableHWIntrinsic: + Assert.False(Sse.IsSupported); + Assert.False(Sse2.IsSupported); + Assert.False(Aes.IsSupported); + Assert.False(Pclmulqdq.IsSupported); + Assert.False(Sse3.IsSupported); + Assert.False(Ssse3.IsSupported); + Assert.False(Sse41.IsSupported); + Assert.False(Sse42.IsSupported); + Assert.False(Popcnt.IsSupported); + Assert.False(Avx.IsSupported); + Assert.False(Fma.IsSupported); + Assert.False(Avx2.IsSupported); + Assert.False(Bmi1.IsSupported); + Assert.False(Bmi2.IsSupported); + Assert.False(Lzcnt.IsSupported); + break; + case HwIntrinsics.DisableSSE: + Assert.False(Sse.IsSupported); + break; + case HwIntrinsics.DisableSSE2: + Assert.False(Sse2.IsSupported); + break; + case HwIntrinsics.DisableAES: + Assert.False(Aes.IsSupported); + break; + case HwIntrinsics.DisablePCLMULQDQ: + Assert.False(Pclmulqdq.IsSupported); + break; + case HwIntrinsics.DisableSSE3: + Assert.False(Sse3.IsSupported); + break; + case HwIntrinsics.DisableSSSE3: + Assert.False(Ssse3.IsSupported); + break; + case HwIntrinsics.DisableSSE41: + Assert.False(Sse41.IsSupported); + break; + case HwIntrinsics.DisableSSE42: + Assert.False(Sse42.IsSupported); + break; + case HwIntrinsics.DisablePOPCNT: + Assert.False(Popcnt.IsSupported); + break; + case HwIntrinsics.DisableAVX: + Assert.False(Avx.IsSupported); + break; + case HwIntrinsics.DisableFMA: + Assert.False(Fma.IsSupported); + break; + case HwIntrinsics.DisableAVX2: + Assert.False(Avx2.IsSupported); + break; + case HwIntrinsics.DisableBMI1: + Assert.False(Bmi1.IsSupported); + break; + case HwIntrinsics.DisableBMI2: + Assert.False(Bmi2.IsSupported); + break; + case HwIntrinsics.DisableLZCNT: + Assert.False(Lzcnt.IsSupported); + break; +#endif + } + } - // foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) - // { - // FeatureTestRunner.RunWithHwIntrinsicsFeature(AssertHwIntrinsicsFeatureDisabled, intrinsic, new FakeSerializable()); - // } - // } + foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) + { + FeatureTestRunner.RunWithHwIntrinsicsFeature(AssertHwIntrinsicsFeatureDisabled, intrinsic, new FakeSerializable()); + } + } public class FakeSerializable : IXunitSerializable { From e6c73e4b2b28a0e99b6e5f688d4536acd5da2be3 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 15 Oct 2020 23:31:56 +0100 Subject: [PATCH 052/104] Update PngEncoderTests.cs --- .../Formats/Png/PngEncoderTests.cs | 28 ++++--------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs index b35e55887c..9ba956d722 100644 --- a/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs @@ -2,13 +2,8 @@ // Licensed under the Apache License, Version 2.0. // ReSharper disable InconsistentNaming -using System.Diagnostics; using System.IO; using System.Linq; -#if SUPPORTS_RUNTIME_INTRINSICS -using System.Runtime.Intrinsics.X86; -#endif -using Microsoft.DotNet.RemoteExecutor; using SixLabors.ImageSharp.Formats; using SixLabors.ImageSharp.Formats.Png; using SixLabors.ImageSharp.Metadata; @@ -16,7 +11,6 @@ using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.Processing.Processors.Quantization; using SixLabors.ImageSharp.Tests.TestUtilities; using SixLabors.ImageSharp.Tests.TestUtilities.ImageComparison; - using Xunit; namespace SixLabors.ImageSharp.Tests.Formats.Png @@ -538,13 +532,10 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png [WithTestPatternImages(100, 100, PixelTypes.Rgba32)] public void EncodeWorksWithoutSsse3Intrinsics(TestImageProvider provider) { - static void RunTest(string providerDump) + static void RunTest(string serialized) { TestImageProvider provider = - BasicSerializer.Deserialize>(providerDump); -#if SUPPORTS_RUNTIME_INTRINSICS - Assert.False(Ssse3.IsSupported); -#endif + FeatureTestRunner.Deserialize>(serialized); foreach (PngInterlaceMode interlaceMode in InterlaceMode) { @@ -559,19 +550,10 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png } } - string providerDump = BasicSerializer.Serialize(provider); - - var processStartInfo = new ProcessStartInfo(); - processStartInfo.Environment[TestEnvironment.Features.EnableSSE3] = TestEnvironment.Features.Off; - - RemoteExecutor.Invoke( + FeatureTestRunner.RunWithHwIntrinsicsFeature( RunTest, - providerDump, - new RemoteInvokeOptions - { - StartInfo = processStartInfo - }) - .Dispose(); + HwIntrinsics.DisableSSSE3, + provider); } private static void TestPngEncoderCore( From ec36095092e207ba51267444d3dedbee11aed76f Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 16 Oct 2020 00:06:31 +0100 Subject: [PATCH 053/104] Use envonment aware benchmarking for intrinsics. --- .../BlockOperations/Block8x8F_Transpose.cs | 16 +--- .../Config.HwIntrinsics.cs | 81 +++++++++++++++++++ tests/ImageSharp.Benchmarks/Config.cs | 2 +- .../TestUtilities/TestEnvironment.Features.cs | 54 ------------- 4 files changed, 86 insertions(+), 67 deletions(-) create mode 100644 tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs delete mode 100644 tests/ImageSharp.Tests/TestUtilities/TestEnvironment.Features.cs diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs index ae1b23df92..814c910383 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs @@ -6,26 +6,18 @@ using SixLabors.ImageSharp.Formats.Jpeg.Components; namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations { + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] public class Block8x8F_Transpose { private static readonly Block8x8F Source = Create8x8FloatData(); - [Benchmark(Baseline=true)] - public void TransposeIntoVector4() + [Benchmark(Baseline = true)] + public void TransposeInto() { var dest = default(Block8x8F); - Source.TransposeIntoFallback(ref dest); + Source.TransposeInto(ref dest); } -#if SUPPORTS_RUNTIME_INTRINSICS - [Benchmark] - public void TransposeIntoAvx() - { - var dest = default(Block8x8F); - Source.TransposeIntoAvx(ref dest); - } -#endif - private static Block8x8F Create8x8FloatData() { var result = new float[64]; diff --git a/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs b/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs new file mode 100644 index 0000000000..e860c5491f --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs @@ -0,0 +1,81 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics.X86; +#endif +using BenchmarkDotNet.Environments; +using BenchmarkDotNet.Jobs; + +namespace SixLabors.ImageSharp.Benchmarks +{ + public partial class Config + { + private const string On = "1"; + private const string Off = "0"; + + // See https://github.com/SixLabors/ImageSharp/pull/1229#discussion_r440477861 + // * EnableHWIntrinsic + // * EnableSSE + // * EnableSSE2 + // * EnableAES + // * EnablePCLMULQDQ + // * EnableSSE3 + // * EnableSSSE3 + // * EnableSSE41 + // * EnableSSE42 + // * EnablePOPCNT + // * EnableAVX + // * EnableFMA + // * EnableAVX2 + // * EnableBMI1 + // * EnableBMI2 + // * EnableLZCNT + // + // `FeatureSIMD` ends up impacting all SIMD support(including `System.Numerics`) but not things + // like `LZCNT`, `BMI1`, or `BMI2` + // `EnableSSE3_4` is a legacy switch that exists for compat and is basically the same as `EnableSSE3` + private const string EnableAES = "COMPlus_EnableAES"; + private const string EnableAVX = "COMPlus_EnableAVX"; + private const string EnableAVX2 = "COMPlus_EnableAVX2"; + private const string EnableBMI1 = "COMPlus_EnableBMI1"; + private const string EnableBMI2 = "COMPlus_EnableBMI2"; + private const string EnableFMA = "COMPlus_EnableFMA"; + private const string EnableHWIntrinsic = "COMPlus_EnableHWIntrinsic"; + private const string EnableLZCNT = "COMPlus_EnableLZCNT"; + private const string EnablePCLMULQDQ = "COMPlus_EnablePCLMULQDQ"; + private const string EnablePOPCNT = "COMPlus_EnablePOPCNT"; + private const string EnableSSE = "COMPlus_EnableSSE"; + private const string EnableSSE2 = "COMPlus_EnableSSE2"; + private const string EnableSSE3 = "COMPlus_EnableSSE3"; + private const string EnableSSE3_4 = "COMPlus_EnableSSE3_4"; + private const string EnableSSE41 = "COMPlus_EnableSSE41"; + private const string EnableSSE42 = "COMPlus_EnableSSE42"; + private const string EnableSSSE3 = "COMPlus_EnableSSSE3"; + private const string FeatureSIMD = "COMPlus_FeatureSIMD"; + + public class HwIntrinsics_SSE_AVX : Config + { + public HwIntrinsics_SSE_AVX() + { +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx.IsSupported) + { + this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31) + .WithId("AVX").AsBaseline()); + } + + if (Sse.IsSupported) + { + this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31) + .WithEnvironmentVariables(new EnvironmentVariable(EnableAVX, Off)) + .WithId("SSE")); + } +#endif + this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31) + .WithEnvironmentVariables(new EnvironmentVariable(EnableHWIntrinsic, Off)) + .WithId("No HwIntrinsics")); + } + } + } +} diff --git a/tests/ImageSharp.Benchmarks/Config.cs b/tests/ImageSharp.Benchmarks/Config.cs index f9240779b9..53271f522d 100644 --- a/tests/ImageSharp.Benchmarks/Config.cs +++ b/tests/ImageSharp.Benchmarks/Config.cs @@ -12,7 +12,7 @@ using BenchmarkDotNet.Jobs; namespace SixLabors.ImageSharp.Benchmarks { - public class Config : ManualConfig + public partial class Config : ManualConfig { public Config() { diff --git a/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.Features.cs b/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.Features.cs deleted file mode 100644 index 3568c1e5dc..0000000000 --- a/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.Features.cs +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -namespace SixLabors.ImageSharp.Tests -{ - public static partial class TestEnvironment - { - internal static class Features - { - public const string On = "1"; - public const string Off = "0"; - - // See https://github.com/SixLabors/ImageSharp/pull/1229#discussion_r440477861 - // * EnableHWIntrinsic - // * EnableSSE - // * EnableSSE2 - // * EnableAES - // * EnablePCLMULQDQ - // * EnableSSE3 - // * EnableSSSE3 - // * EnableSSE41 - // * EnableSSE42 - // * EnablePOPCNT - // * EnableAVX - // * EnableFMA - // * EnableAVX2 - // * EnableBMI1 - // * EnableBMI2 - // * EnableLZCNT - // - // `FeatureSIMD` ends up impacting all SIMD support(including `System.Numerics`) but not things - // like `LZCNT`, `BMI1`, or `BMI2` - // `EnableSSE3_4` is a legacy switch that exists for compat and is basically the same as `EnableSSE3` - public const string EnableAES = "COMPlus_EnableAES"; - public const string EnableAVX = "COMPlus_EnableAVX"; - public const string EnableAVX2 = "COMPlus_EnableAVX2"; - public const string EnableBMI1 = "COMPlus_EnableBMI1"; - public const string EnableBMI2 = "COMPlus_EnableBMI2"; - public const string EnableFMA = "COMPlus_EnableFMA"; - public const string EnableHWIntrinsic = "COMPlus_EnableHWIntrinsic"; - public const string EnableLZCNT = "COMPlus_EnableLZCNT"; - public const string EnablePCLMULQDQ = "COMPlus_EnablePCLMULQDQ"; - public const string EnablePOPCNT = "COMPlus_EnablePOPCNT"; - public const string EnableSSE = "COMPlus_EnableSSE"; - public const string EnableSSE2 = "COMPlus_EnableSSE2"; - public const string EnableSSE3 = "COMPlus_EnableSSE3"; - public const string EnableSSE3_4 = "COMPlus_EnableSSE3_4"; - public const string EnableSSE41 = "COMPlus_EnableSSE41"; - public const string EnableSSE42 = "COMPlus_EnableSSE42"; - public const string EnableSSSE3 = "COMPlus_EnableSSSE3"; - public const string FeatureSIMD = "COMPlus_FeatureSIMD"; - } - } -} From 75e0ffc522bd28245384fe81ee6e2af459662dd1 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 16 Oct 2020 00:14:47 +0100 Subject: [PATCH 054/104] Use a single method for Block8x8F.TransposeInto. --- .../Jpeg/Components/Block8x8F.Generated.cs | 80 ------- .../Jpeg/Components/Block8x8F.Generated.tt | 32 --- .../Formats/Jpeg/Components/Block8x8F.cs | 211 +++++++++++------- 3 files changed, 135 insertions(+), 188 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs index 10cbee5e6f..6a336ad2b4 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs @@ -10,86 +10,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components { internal partial struct Block8x8F { - /// - /// Fallback method to transpose a block into the destination block on non AVX supported CPUs. - /// - /// The destination block - [MethodImpl(InliningOptions.ShortMethod)] - public void TransposeIntoFallback(ref Block8x8F d) - { - d.V0L.X = V0L.X; - d.V1L.X = V0L.Y; - d.V2L.X = V0L.Z; - d.V3L.X = V0L.W; - d.V4L.X = V0R.X; - d.V5L.X = V0R.Y; - d.V6L.X = V0R.Z; - d.V7L.X = V0R.W; - - d.V0L.Y = V1L.X; - d.V1L.Y = V1L.Y; - d.V2L.Y = V1L.Z; - d.V3L.Y = V1L.W; - d.V4L.Y = V1R.X; - d.V5L.Y = V1R.Y; - d.V6L.Y = V1R.Z; - d.V7L.Y = V1R.W; - - d.V0L.Z = V2L.X; - d.V1L.Z = V2L.Y; - d.V2L.Z = V2L.Z; - d.V3L.Z = V2L.W; - d.V4L.Z = V2R.X; - d.V5L.Z = V2R.Y; - d.V6L.Z = V2R.Z; - d.V7L.Z = V2R.W; - - d.V0L.W = V3L.X; - d.V1L.W = V3L.Y; - d.V2L.W = V3L.Z; - d.V3L.W = V3L.W; - d.V4L.W = V3R.X; - d.V5L.W = V3R.Y; - d.V6L.W = V3R.Z; - d.V7L.W = V3R.W; - - d.V0R.X = V4L.X; - d.V1R.X = V4L.Y; - d.V2R.X = V4L.Z; - d.V3R.X = V4L.W; - d.V4R.X = V4R.X; - d.V5R.X = V4R.Y; - d.V6R.X = V4R.Z; - d.V7R.X = V4R.W; - - d.V0R.Y = V5L.X; - d.V1R.Y = V5L.Y; - d.V2R.Y = V5L.Z; - d.V3R.Y = V5L.W; - d.V4R.Y = V5R.X; - d.V5R.Y = V5R.Y; - d.V6R.Y = V5R.Z; - d.V7R.Y = V5R.W; - - d.V0R.Z = V6L.X; - d.V1R.Z = V6L.Y; - d.V2R.Z = V6L.Z; - d.V3R.Z = V6L.W; - d.V4R.Z = V6R.X; - d.V5R.Z = V6R.Y; - d.V6R.Z = V6R.Z; - d.V7R.Z = V6R.W; - - d.V0R.W = V7L.X; - d.V1R.W = V7L.Y; - d.V2R.W = V7L.Z; - d.V3R.W = V7L.W; - d.V4R.W = V7R.X; - d.V5R.W = V7R.Y; - d.V6R.W = V7R.Z; - d.V7R.W = V7R.W; - } - /// /// Level shift by +maximum/2, clip to [0, maximum] /// diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt index f47d9106ee..26cd5c2ac4 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt @@ -23,38 +23,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components { internal partial struct Block8x8F { - /// - /// Fallback method to transpose a block into the destination block on non AVX supported CPUs. - /// - /// The destination block - [MethodImpl(InliningOptions.ShortMethod)] - public void TransposeIntoFallback(ref Block8x8F d) - { - <# - PushIndent(" "); - - for (int i = 0; i < 8; i++) - { - char destCoord = coordz[i % 4]; - char destSide = (i / 4) % 2 == 0 ? 'L' : 'R'; - - for (int j = 0; j < 8; j++) - { - if(i > 0 && j == 0){ - WriteLine(""); - } - - char srcCoord = coordz[j % 4]; - char srcSide = (j / 4) % 2 == 0 ? 'L' : 'R'; - - var expression = $"d.V{j}{destSide}.{destCoord} = V{i}{srcSide}.{srcCoord};\r\n"; - Write(expression); - } - } - PopIndent(); - #> - } - /// /// Level shift by +maximum/2, clip to [0, maximum] /// diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index 547e116230..ccdba48857 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -611,87 +611,146 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components #if SUPPORTS_RUNTIME_INTRINSICS if (Avx.IsSupported) { - this.TransposeIntoAvx(ref d); + // https://stackoverflow.com/questions/25622745/transpose-an-8x8-float-using-avx-avx2/25627536#25627536 + Vector256 r0 = Avx.InsertVector128( + Unsafe.As>(ref this.V0L).ToVector256(), + Unsafe.As>(ref this.V4L), + 1); + + Vector256 r1 = Avx.InsertVector128( + Unsafe.As>(ref this.V1L).ToVector256(), + Unsafe.As>(ref this.V5L), + 1); + + Vector256 r2 = Avx.InsertVector128( + Unsafe.As>(ref this.V2L).ToVector256(), + Unsafe.As>(ref this.V6L), + 1); + + Vector256 r3 = Avx.InsertVector128( + Unsafe.As>(ref this.V3L).ToVector256(), + Unsafe.As>(ref this.V7L), + 1); + + Vector256 r4 = Avx.InsertVector128( + Unsafe.As>(ref this.V0R).ToVector256(), + Unsafe.As>(ref this.V4R), + 1); + + Vector256 r5 = Avx.InsertVector128( + Unsafe.As>(ref this.V1R).ToVector256(), + Unsafe.As>(ref this.V5R), + 1); + + Vector256 r6 = Avx.InsertVector128( + Unsafe.As>(ref this.V2R).ToVector256(), + Unsafe.As>(ref this.V6R), + 1); + + Vector256 r7 = Avx.InsertVector128( + Unsafe.As>(ref this.V3R).ToVector256(), + Unsafe.As>(ref this.V7R), + 1); + + Vector256 t0 = Avx.UnpackLow(r0, r1); + Vector256 t2 = Avx.UnpackLow(r2, r3); + Vector256 v = Avx.Shuffle(t0, t2, 0x4E); + Unsafe.As>(ref d.V0L) = Avx.Blend(t0, v, 0xCC); + Unsafe.As>(ref d.V1L) = Avx.Blend(t2, v, 0x33); + + Vector256 t4 = Avx.UnpackLow(r4, r5); + Vector256 t6 = Avx.UnpackLow(r6, r7); + v = Avx.Shuffle(t4, t6, 0x4E); + Unsafe.As>(ref d.V4L) = Avx.Blend(t4, v, 0xCC); + Unsafe.As>(ref d.V5L) = Avx.Blend(t6, v, 0x33); + + Vector256 t1 = Avx.UnpackHigh(r0, r1); + Vector256 t3 = Avx.UnpackHigh(r2, r3); + v = Avx.Shuffle(t1, t3, 0x4E); + Unsafe.As>(ref d.V2L) = Avx.Blend(t1, v, 0xCC); + Unsafe.As>(ref d.V3L) = Avx.Blend(t3, v, 0x33); + + Vector256 t5 = Avx.UnpackHigh(r4, r5); + Vector256 t7 = Avx.UnpackHigh(r6, r7); + v = Avx.Shuffle(t5, t7, 0x4E); + Unsafe.As>(ref d.V6L) = Avx.Blend(t5, v, 0xCC); + Unsafe.As>(ref d.V7L) = Avx.Blend(t7, v, 0x33); } else #endif { - this.TransposeIntoFallback(ref d); + d.V0L.X = this.V0L.X; + d.V1L.X = this.V0L.Y; + d.V2L.X = this.V0L.Z; + d.V3L.X = this.V0L.W; + d.V4L.X = this.V0R.X; + d.V5L.X = this.V0R.Y; + d.V6L.X = this.V0R.Z; + d.V7L.X = this.V0R.W; + + d.V0L.Y = this.V1L.X; + d.V1L.Y = this.V1L.Y; + d.V2L.Y = this.V1L.Z; + d.V3L.Y = this.V1L.W; + d.V4L.Y = this.V1R.X; + d.V5L.Y = this.V1R.Y; + d.V6L.Y = this.V1R.Z; + d.V7L.Y = this.V1R.W; + + d.V0L.Z = this.V2L.X; + d.V1L.Z = this.V2L.Y; + d.V2L.Z = this.V2L.Z; + d.V3L.Z = this.V2L.W; + d.V4L.Z = this.V2R.X; + d.V5L.Z = this.V2R.Y; + d.V6L.Z = this.V2R.Z; + d.V7L.Z = this.V2R.W; + + d.V0L.W = this.V3L.X; + d.V1L.W = this.V3L.Y; + d.V2L.W = this.V3L.Z; + d.V3L.W = this.V3L.W; + d.V4L.W = this.V3R.X; + d.V5L.W = this.V3R.Y; + d.V6L.W = this.V3R.Z; + d.V7L.W = this.V3R.W; + + d.V0R.X = this.V4L.X; + d.V1R.X = this.V4L.Y; + d.V2R.X = this.V4L.Z; + d.V3R.X = this.V4L.W; + d.V4R.X = this.V4R.X; + d.V5R.X = this.V4R.Y; + d.V6R.X = this.V4R.Z; + d.V7R.X = this.V4R.W; + + d.V0R.Y = this.V5L.X; + d.V1R.Y = this.V5L.Y; + d.V2R.Y = this.V5L.Z; + d.V3R.Y = this.V5L.W; + d.V4R.Y = this.V5R.X; + d.V5R.Y = this.V5R.Y; + d.V6R.Y = this.V5R.Z; + d.V7R.Y = this.V5R.W; + + d.V0R.Z = this.V6L.X; + d.V1R.Z = this.V6L.Y; + d.V2R.Z = this.V6L.Z; + d.V3R.Z = this.V6L.W; + d.V4R.Z = this.V6R.X; + d.V5R.Z = this.V6R.Y; + d.V6R.Z = this.V6R.Z; + d.V7R.Z = this.V6R.W; + + d.V0R.W = this.V7L.X; + d.V1R.W = this.V7L.Y; + d.V2R.W = this.V7L.Z; + d.V3R.W = this.V7L.W; + d.V4R.W = this.V7R.X; + d.V5R.W = this.V7R.Y; + d.V6R.W = this.V7R.Z; + d.V7R.W = this.V7R.W; } } - -#if SUPPORTS_RUNTIME_INTRINSICS - /// - /// AVX-only variant for executing . - /// - /// - [MethodImpl(InliningOptions.ShortMethod)] - public void TransposeIntoAvx(ref Block8x8F d) - { - Vector256 r0 = Avx.InsertVector128( - Unsafe.As>(ref this.V0L).ToVector256(), - Unsafe.As>(ref this.V4L), - 1); - - Vector256 r1 = Avx.InsertVector128( - Unsafe.As>(ref this.V1L).ToVector256(), - Unsafe.As>(ref this.V5L), - 1); - - Vector256 r2 = Avx.InsertVector128( - Unsafe.As>(ref this.V2L).ToVector256(), - Unsafe.As>(ref this.V6L), - 1); - - Vector256 r3 = Avx.InsertVector128( - Unsafe.As>(ref this.V3L).ToVector256(), - Unsafe.As>(ref this.V7L), - 1); - - Vector256 r4 = Avx.InsertVector128( - Unsafe.As>(ref this.V0R).ToVector256(), - Unsafe.As>(ref this.V4R), - 1); - - Vector256 r5 = Avx.InsertVector128( - Unsafe.As>(ref this.V1R).ToVector256(), - Unsafe.As>(ref this.V5R), - 1); - - Vector256 r6 = Avx.InsertVector128( - Unsafe.As>(ref this.V2R).ToVector256(), - Unsafe.As>(ref this.V6R), - 1); - - Vector256 r7 = Avx.InsertVector128( - Unsafe.As>(ref this.V3R).ToVector256(), - Unsafe.As>(ref this.V7R), - 1); - - Vector256 t0 = Avx.UnpackLow(r0, r1); - Vector256 t2 = Avx.UnpackLow(r2, r3); - Vector256 v = Avx.Shuffle(t0, t2, 0x4E); - Unsafe.As>(ref d.V0L) = Avx.Blend(t0, v, 0xCC); - Unsafe.As>(ref d.V1L) = Avx.Blend(t2, v, 0x33); - - Vector256 t4 = Avx.UnpackLow(r4, r5); - Vector256 t6 = Avx.UnpackLow(r6, r7); - v = Avx.Shuffle(t4, t6, 0x4E); - Unsafe.As>(ref d.V4L) = Avx.Blend(t4, v, 0xCC); - Unsafe.As>(ref d.V5L) = Avx.Blend(t6, v, 0x33); - - Vector256 t1 = Avx.UnpackHigh(r0, r1); - Vector256 t3 = Avx.UnpackHigh(r2, r3); - v = Avx.Shuffle(t1, t3, 0x4E); - Unsafe.As>(ref d.V2L) = Avx.Blend(t1, v, 0xCC); - Unsafe.As>(ref d.V3L) = Avx.Blend(t3, v, 0x33); - - Vector256 t5 = Avx.UnpackHigh(r4, r5); - Vector256 t7 = Avx.UnpackHigh(r6, r7); - v = Avx.Shuffle(t5, t7, 0x4E); - Unsafe.As>(ref d.V6L) = Avx.Blend(t5, v, 0xCC); - Unsafe.As>(ref d.V7L) = Avx.Blend(t7, v, 0x33); - } -#endif } } From a79e6d3425a53a17891997b6cc3147d5c0b5f02b Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 16 Oct 2020 00:20:41 +0100 Subject: [PATCH 055/104] Enable all test platforms --- .github/workflows/build-and-test.yml | 32 ++++++++++++++-------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index ecb5ceb0ef..c8f3997946 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -17,23 +17,23 @@ jobs: - os: ubuntu-latest framework: netcoreapp3.1 runtime: -x64 + codecov: true + - os: windows-latest + framework: netcoreapp3.1 + runtime: -x64 + codecov: false + - os: windows-latest + framework: netcoreapp2.1 + runtime: -x64 + codecov: false + - os: windows-latest + framework: net472 + runtime: -x64 + codecov: false + - os: windows-latest + framework: net472 + runtime: -x86 codecov: false - # - os: windows-latest - # framework: netcoreapp3.1 - # runtime: -x64 - # codecov: false - # - os: windows-latest - # framework: netcoreapp2.1 - # runtime: -x64 - # codecov: false - # - os: windows-latest - # framework: net472 - # runtime: -x64 - # codecov: false - # - os: windows-latest - # framework: net472 - # runtime: -x86 - # codecov: false runs-on: ${{matrix.options.os}} if: "!contains(github.event.head_commit.message, '[skip ci]')" From aad4c1e89e6372e4a0dc066aca85caa78d6127eb Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 16 Oct 2020 00:36:39 +0100 Subject: [PATCH 056/104] Remove method baseline property --- .../Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs index 814c910383..1d103cd1a0 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs @@ -11,7 +11,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations { private static readonly Block8x8F Source = Create8x8FloatData(); - [Benchmark(Baseline = true)] + [Benchmark] public void TransposeInto() { var dest = default(Block8x8F); From 7b53df11c82e6500596a598b5769aa7c0f48c1ec Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 16 Oct 2020 01:05:40 +0100 Subject: [PATCH 057/104] Fix incorrect test on NET 32bit --- .../TestUtilities/Tests/FeatureTestRunnerTests.cs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs index eea22592bc..646000120f 100644 --- a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs +++ b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs @@ -45,6 +45,11 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests [Fact] public void AllowsAllHwIntrinsicFeatures() { + if (!Vector.IsHardwareAccelerated) + { + return; + } + FeatureTestRunner.RunWithHwIntrinsicsFeature( () => Assert.True(Vector.IsHardwareAccelerated), HwIntrinsics.AllowAll); From 68d2406f4ead2fd3fa53a04e40bea9c279db19e9 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 16 Oct 2020 18:02:14 +0100 Subject: [PATCH 058/104] Optimize low hanging fruit and fix naming --- .../Jpeg/Components/Block8x8F.Generated.cs | 6 +- .../Jpeg/Components/Block8x8F.Generated.tt | 6 +- .../Formats/Jpeg/Components/Block8x8F.cs | 203 ++++++++++++------ .../Decoder/JpegBlockPostProcessor.cs | 4 +- .../Jpeg/Components/FastFloatingPointDCT.cs | 6 +- .../BlockOperations/Block8x8F_AddInPlace.cs | 21 ++ .../Block8x8F_MultiplyInPlaceBlock.cs | 37 ++++ .../Block8x8F_MultiplyInPlaceScalar.cs | 21 ++ .../Formats/Jpg/Block8x8FTests.cs | 160 ++++++++------ 9 files changed, 331 insertions(+), 133 deletions(-) create mode 100644 tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_AddInPlace.cs create mode 100644 tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_MultiplyInPlaceBlock.cs create mode 100644 tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_MultiplyInPlaceScalar.cs diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs index 6a336ad2b4..0efefc06b5 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs @@ -13,7 +13,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components /// /// Level shift by +maximum/2, clip to [0, maximum] /// - public void NormalizeColorsInplace(float maximum) + public void NormalizeColorsInPlace(float maximum) { var CMin4 = new Vector4(0F); var CMax4 = new Vector4(maximum); @@ -38,10 +38,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components } /// - /// AVX2-only variant for executing and in one step. + /// AVX2-only variant for executing and in one step. /// [MethodImpl(InliningOptions.ShortMethod)] - public void NormalizeColorsAndRoundInplaceVector8(float maximum) + public void NormalizeColorsAndRoundInPlaceVector8(float maximum) { var off = new Vector(MathF.Ceiling(maximum / 2)); var max = new Vector(maximum); diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt index 26cd5c2ac4..e5a62dc075 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt @@ -26,7 +26,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components /// /// Level shift by +maximum/2, clip to [0, maximum] /// - public void NormalizeColorsInplace(float maximum) + public void NormalizeColorsInPlace(float maximum) { var CMin4 = new Vector4(0F); var CMax4 = new Vector4(maximum); @@ -49,10 +49,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components } /// - /// AVX2-only variant for executing and in one step. + /// AVX2-only variant for executing and in one step. /// [MethodImpl(InliningOptions.ShortMethod)] - public void NormalizeColorsAndRoundInplaceVector8(float maximum) + public void NormalizeColorsAndRoundInPlaceVector8(float maximum) { var off = new Vector(MathF.Ceiling(maximum / 2)); var max = new Vector(maximum); diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index ccdba48857..0dbdadbeb4 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -281,73 +281,156 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components /// /// The value to multiply by. [MethodImpl(InliningOptions.ShortMethod)] - public void MultiplyInplace(float value) - { - this.V0L *= value; - this.V0R *= value; - this.V1L *= value; - this.V1R *= value; - this.V2L *= value; - this.V2R *= value; - this.V3L *= value; - this.V3R *= value; - this.V4L *= value; - this.V4R *= value; - this.V5L *= value; - this.V5R *= value; - this.V6L *= value; - this.V6R *= value; - this.V7L *= value; - this.V7R *= value; + public void MultiplyInPlace(float value) + { +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx.IsSupported) + { + var valueVec = Vector256.Create(value); + Unsafe.As>(ref this.V0L) = Avx.Multiply(Unsafe.As>(ref this.V0L), valueVec); + Unsafe.As>(ref this.V1L) = Avx.Multiply(Unsafe.As>(ref this.V1L), valueVec); + Unsafe.As>(ref this.V2L) = Avx.Multiply(Unsafe.As>(ref this.V2L), valueVec); + Unsafe.As>(ref this.V3L) = Avx.Multiply(Unsafe.As>(ref this.V3L), valueVec); + Unsafe.As>(ref this.V4L) = Avx.Multiply(Unsafe.As>(ref this.V4L), valueVec); + Unsafe.As>(ref this.V5L) = Avx.Multiply(Unsafe.As>(ref this.V5L), valueVec); + Unsafe.As>(ref this.V6L) = Avx.Multiply(Unsafe.As>(ref this.V6L), valueVec); + Unsafe.As>(ref this.V7L) = Avx.Multiply(Unsafe.As>(ref this.V7L), valueVec); + } + else +#endif + { + var valueVec = new Vector4(value); + this.V0L *= valueVec; + this.V0R *= valueVec; + this.V1L *= valueVec; + this.V1R *= valueVec; + this.V2L *= valueVec; + this.V2R *= valueVec; + this.V3L *= valueVec; + this.V3R *= valueVec; + this.V4L *= valueVec; + this.V4R *= valueVec; + this.V5L *= valueVec; + this.V5R *= valueVec; + this.V6L *= valueVec; + this.V6R *= valueVec; + this.V7L *= valueVec; + this.V7R *= valueVec; + } } /// /// Multiply all elements of the block by the corresponding elements of 'other'. /// [MethodImpl(InliningOptions.ShortMethod)] - public void MultiplyInplace(ref Block8x8F other) - { - this.V0L *= other.V0L; - this.V0R *= other.V0R; - this.V1L *= other.V1L; - this.V1R *= other.V1R; - this.V2L *= other.V2L; - this.V2R *= other.V2R; - this.V3L *= other.V3L; - this.V3R *= other.V3R; - this.V4L *= other.V4L; - this.V4R *= other.V4R; - this.V5L *= other.V5L; - this.V5R *= other.V5R; - this.V6L *= other.V6L; - this.V6R *= other.V6R; - this.V7L *= other.V7L; - this.V7R *= other.V7R; + public unsafe void MultiplyInPlace(ref Block8x8F other) + { +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx.IsSupported) + { + Unsafe.As>(ref this.V0L) + = Avx.Multiply( + Unsafe.As>(ref this.V0L), + Unsafe.As>(ref other.V0L)); + + Unsafe.As>(ref this.V1L) + = Avx.Multiply( + Unsafe.As>(ref this.V1L), + Unsafe.As>(ref other.V1L)); + + Unsafe.As>(ref this.V2L) + = Avx.Multiply( + Unsafe.As>(ref this.V2L), + Unsafe.As>(ref other.V2L)); + + Unsafe.As>(ref this.V3L) + = Avx.Multiply( + Unsafe.As>(ref this.V3L), + Unsafe.As>(ref other.V3L)); + + Unsafe.As>(ref this.V4L) + = Avx.Multiply( + Unsafe.As>(ref this.V4L), + Unsafe.As>(ref other.V4L)); + + Unsafe.As>(ref this.V5L) + = Avx.Multiply( + Unsafe.As>(ref this.V5L), + Unsafe.As>(ref other.V5L)); + + Unsafe.As>(ref this.V6L) + = Avx.Multiply( + Unsafe.As>(ref this.V6L), + Unsafe.As>(ref other.V6L)); + + Unsafe.As>(ref this.V7L) + = Avx.Multiply( + Unsafe.As>(ref this.V7L), + Unsafe.As>(ref other.V7L)); + } + else +#endif + { + this.V0L *= other.V0L; + this.V0R *= other.V0R; + this.V1L *= other.V1L; + this.V1R *= other.V1R; + this.V2L *= other.V2L; + this.V2R *= other.V2R; + this.V3L *= other.V3L; + this.V3R *= other.V3R; + this.V4L *= other.V4L; + this.V4R *= other.V4R; + this.V5L *= other.V5L; + this.V5R *= other.V5R; + this.V6L *= other.V6L; + this.V6R *= other.V6R; + this.V7L *= other.V7L; + this.V7R *= other.V7R; + } } /// /// Adds a vector to all elements of the block. /// - /// The added vector + /// The added vector. [MethodImpl(InliningOptions.ShortMethod)] - public void AddToAllInplace(Vector4 diff) - { - this.V0L += diff; - this.V0R += diff; - this.V1L += diff; - this.V1R += diff; - this.V2L += diff; - this.V2R += diff; - this.V3L += diff; - this.V3R += diff; - this.V4L += diff; - this.V4R += diff; - this.V5L += diff; - this.V5R += diff; - this.V6L += diff; - this.V6R += diff; - this.V7L += diff; - this.V7R += diff; + public void AddInPlace(float value) + { +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx.IsSupported) + { + var valueVec = Vector256.Create(value); + Unsafe.As>(ref this.V0L) = Avx.Add(Unsafe.As>(ref this.V0L), valueVec); + Unsafe.As>(ref this.V1L) = Avx.Add(Unsafe.As>(ref this.V1L), valueVec); + Unsafe.As>(ref this.V2L) = Avx.Add(Unsafe.As>(ref this.V2L), valueVec); + Unsafe.As>(ref this.V3L) = Avx.Add(Unsafe.As>(ref this.V3L), valueVec); + Unsafe.As>(ref this.V4L) = Avx.Add(Unsafe.As>(ref this.V4L), valueVec); + Unsafe.As>(ref this.V5L) = Avx.Add(Unsafe.As>(ref this.V5L), valueVec); + Unsafe.As>(ref this.V6L) = Avx.Add(Unsafe.As>(ref this.V6L), valueVec); + Unsafe.As>(ref this.V7L) = Avx.Add(Unsafe.As>(ref this.V7L), valueVec); + } + else +#endif + { + var valueVec = new Vector4(value); + this.V0L += valueVec; + this.V0R += valueVec; + this.V1L += valueVec; + this.V1R += valueVec; + this.V2L += valueVec; + this.V2R += valueVec; + this.V3L += valueVec; + this.V3R += valueVec; + this.V4L += valueVec; + this.V4R += valueVec; + this.V5L += valueVec; + this.V5R += valueVec; + this.V6L += valueVec; + this.V6R += valueVec; + this.V7L += valueVec; + this.V7R += valueVec; + } } /// @@ -468,23 +551,23 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components /// /// Level shift by +maximum/2, clip to [0..maximum], and round all the values in the block. /// - public void NormalizeColorsAndRoundInplace(float maximum) + public void NormalizeColorsAndRoundInPlace(float maximum) { if (SimdUtils.HasVector8) { - this.NormalizeColorsAndRoundInplaceVector8(maximum); + this.NormalizeColorsAndRoundInPlaceVector8(maximum); } else { - this.NormalizeColorsInplace(maximum); - this.RoundInplace(); + this.NormalizeColorsInPlace(maximum); + this.RoundInPlace(); } } /// /// Rounds all values in the block. /// - public void RoundInplace() + public void RoundInPlace() { for (int i = 0; i < Size; i++) { diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs index 40683e25a9..e0311dafef 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs @@ -81,14 +81,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder b.LoadFrom(ref sourceBlock); // Dequantize: - b.MultiplyInplace(ref this.DequantiazationTable); + b.MultiplyInPlace(ref this.DequantiazationTable); FastFloatingPointDCT.TransformIDCT(ref b, ref this.WorkspaceBlock1, ref this.WorkspaceBlock2); // To conform better to libjpeg we actually NEED TO loose precision here. // This is because they store blocks as Int16 between all the operations. // To be "more accurate", we need to emulate this by rounding! - this.WorkspaceBlock1.NormalizeColorsAndRoundInplace(maximumValue); + this.WorkspaceBlock1.NormalizeColorsAndRoundInPlace(maximumValue); this.WorkspaceBlock1.ScaledCopyTo( ref destAreaOrigin, diff --git a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs index d0b373609b..a6d0622dd8 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs @@ -61,7 +61,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components IDCT8x4_RightPart(ref temp, ref dest); // TODO: What if we leave the blocks in a scaled-by-x8 state until final color packing? - dest.MultiplyInplace(C_0_125); + dest.MultiplyInPlace(C_0_125); } /// @@ -324,7 +324,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components src.TransposeInto(ref temp); if (offsetSourceByNeg128) { - temp.AddToAllInplace(new Vector4(-128)); + temp.AddInPlace(-128F); } FDCT8x4_LeftPart(ref temp, ref dest); @@ -335,7 +335,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components FDCT8x4_LeftPart(ref temp, ref dest); FDCT8x4_RightPart(ref temp, ref dest); - dest.MultiplyInplace(C_0_125); + dest.MultiplyInPlace(C_0_125); } } } diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_AddInPlace.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_AddInPlace.cs new file mode 100644 index 0000000000..61fb2745b9 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_AddInPlace.cs @@ -0,0 +1,21 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Formats.Jpeg.Components; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class Block8x8F_AddInPlace + { + [Benchmark] + public float AddInplace() + { + float f = 42F; + Block8x8F b = default; + b.AddInPlace(f); + return f; + } + } +} diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_MultiplyInPlaceBlock.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_MultiplyInPlaceBlock.cs new file mode 100644 index 0000000000..0d1e67112f --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_MultiplyInPlaceBlock.cs @@ -0,0 +1,37 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Formats.Jpeg.Components; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class Block8x8F_MultiplyInPlaceBlock + { + private static readonly Block8x8F Source = Create8x8FloatData(); + + [Benchmark] + public void MultiplyInPlaceBlock() + { + Block8x8F dest = default; + Source.MultiplyInPlace(ref dest); + } + + private static Block8x8F Create8x8FloatData() + { + var result = new float[64]; + for (int i = 0; i < 8; i++) + { + for (int j = 0; j < 8; j++) + { + result[(i * 8) + j] = (i * 10) + j; + } + } + + var source = default(Block8x8F); + source.LoadFrom(result); + return source; + } + } +} diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_MultiplyInPlaceScalar.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_MultiplyInPlaceScalar.cs new file mode 100644 index 0000000000..31a6ca713f --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_MultiplyInPlaceScalar.cs @@ -0,0 +1,21 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Formats.Jpeg.Components; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class Block8x8F_MultiplyInPlaceScalar + { + [Benchmark] + public float MultiplyInPlaceScalar() + { + float f = 42F; + Block8x8F b = default; + b.MultiplyInPlace(f); + return f; + } + } +} diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs index 5482380885..927d7c2528 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs @@ -5,6 +5,7 @@ // #define BENCHMARKING using System; using System.Diagnostics; + using SixLabors.ImageSharp.Formats.Jpeg.Components; using SixLabors.ImageSharp.Tests.Formats.Jpg.Utils; using SixLabors.ImageSharp.Tests.TestUtilities; @@ -44,20 +45,20 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg this.Measure( Times, () => + { + var block = default(Block8x8F); + + for (int i = 0; i < Block8x8F.Size; i++) { - var block = default(Block8x8F); - - for (int i = 0; i < Block8x8F.Size; i++) - { - block[i] = i; - } - - sum = 0; - for (int i = 0; i < Block8x8F.Size; i++) - { - sum += block[i]; - } - }); + block[i] = i; + } + + sum = 0; + for (int i = 0; i < Block8x8F.Size; i++) + { + sum += block[i]; + } + }); Assert.Equal(sum, 64f * 63f * 0.5f); } @@ -69,20 +70,20 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg this.Measure( Times, () => + { + // Block8x8F block = new Block8x8F(); + float[] block = new float[64]; + for (int i = 0; i < Block8x8F.Size; i++) { - // Block8x8F block = new Block8x8F(); - float[] block = new float[64]; - for (int i = 0; i < Block8x8F.Size; i++) - { - block[i] = i; - } - - sum = 0; - for (int i = 0; i < Block8x8F.Size; i++) - { - sum += block[i]; - } - }); + block[i] = i; + } + + sum = 0; + for (int i = 0; i < Block8x8F.Size; i++) + { + sum += block[i]; + } + }); Assert.Equal(sum, 64f * 63f * 0.5f); } @@ -100,11 +101,11 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg this.Measure( Times, () => - { - var b = default(Block8x8F); - b.LoadFrom(data); - b.ScaledCopyTo(mirror); - }); + { + var b = default(Block8x8F); + b.LoadFrom(data); + b.ScaledCopyTo(mirror); + }); Assert.Equal(data, mirror); @@ -125,11 +126,11 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg this.Measure( Times, () => - { - var b = default(Block8x8F); - Block8x8F.LoadFrom(&b, data); - Block8x8F.ScaledCopyTo(&b, mirror); - }); + { + var b = default(Block8x8F); + Block8x8F.LoadFrom(&b, data); + Block8x8F.ScaledCopyTo(&b, mirror); + }); Assert.Equal(data, mirror); @@ -150,11 +151,11 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg this.Measure( Times, () => - { - var v = default(Block8x8F); - v.LoadFrom(data); - v.ScaledCopyTo(mirror); - }); + { + var v = default(Block8x8F); + v.LoadFrom(data); + v.ScaledCopyTo(mirror); + }); Assert.Equal(data, mirror); @@ -234,7 +235,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg this.PrintLinearData(input); Block8x8F dest = block; - dest.NormalizeColorsInplace(255); + dest.NormalizeColorsInPlace(255); float[] array = new float[64]; dest.ScaledCopyTo(array); @@ -259,11 +260,11 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg Block8x8F source = CreateRandomFloatBlock(-200, 200, seed); Block8x8F expected = source; - expected.NormalizeColorsInplace(255); - expected.RoundInplace(); + expected.NormalizeColorsInPlace(255); + expected.RoundInPlace(); Block8x8F actual = source; - actual.NormalizeColorsAndRoundInplaceVector8(255); + actual.NormalizeColorsAndRoundInPlaceVector8(255); this.Output.WriteLine(expected.ToString()); this.Output.WriteLine(actual.ToString()); @@ -324,12 +325,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg [InlineData(1)] [InlineData(2)] [InlineData(3)] - public void RoundInplaceSlow(int seed) + public void RoundInPlaceSlow(int seed) { Block8x8F s = CreateRandomFloatBlock(-500, 500, seed); Block8x8F d = s; - d.RoundInplace(); + d.RoundInPlace(); this.Output.WriteLine(s.ToString()); this.Output.WriteLine(d.ToString()); @@ -344,19 +345,26 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg } [Fact] - public void MultiplyInplace_ByOtherBlock() + public void MultiplyInPlace_ByOtherBlock() { - Block8x8F original = CreateRandomFloatBlock(-500, 500, 42); - Block8x8F m = CreateRandomFloatBlock(-500, 500, 42); + static void RunTest() + { + Block8x8F original = CreateRandomFloatBlock(-500, 500, 42); + Block8x8F m = CreateRandomFloatBlock(-500, 500, 42); - Block8x8F actual = original; + Block8x8F actual = original; - actual.MultiplyInplace(ref m); + actual.MultiplyInPlace(ref m); - for (int i = 0; i < Block8x8F.Size; i++) - { - Assert.Equal(original[i] * m[i], actual[i]); + for (int i = 0; i < Block8x8F.Size; i++) + { + Assert.Equal(original[i] * m[i], actual[i]); + } } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX); } [Theory] @@ -396,23 +404,51 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg ReferenceImplementations.DequantizeBlock(&expected, &qt, unzig.Data); - actual.MultiplyInplace(ref zigQt); + actual.MultiplyInPlace(ref zigQt); this.CompareBlocks(expected, actual, 0); } [Fact] - public void MultiplyInplace_ByScalar() + public void AddToAllInPlace() { - Block8x8F original = CreateRandomFloatBlock(-500, 500); + static void RunTest() + { + Block8x8F original = CreateRandomFloatBlock(-500, 500); - Block8x8F actual = original; - actual.MultiplyInplace(42f); + Block8x8F actual = original; + actual.AddInPlace(42f); - for (int i = 0; i < 64; i++) + for (int i = 0; i < 64; i++) + { + Assert.Equal(original[i] + 42f, actual[i]); + } + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX); + } + + [Fact] + public void MultiplyInPlace_ByScalar() + { + static void RunTest() { - Assert.Equal(original[i] * 42f, actual[i]); + Block8x8F original = CreateRandomFloatBlock(-500, 500); + + Block8x8F actual = original; + actual.MultiplyInPlace(42f); + + for (int i = 0; i < 64; i++) + { + Assert.Equal(original[i] * 42f, actual[i]); + } } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX); } [Fact] From 5fadafe1689a2715b0f25458590c8a3ad743ddc7 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 16 Oct 2020 18:22:39 +0100 Subject: [PATCH 059/104] Fix warning --- .../Formats/Jpg/JpegDecoderTests.Progressive.cs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs index 98421ca5d4..cc23a45fcb 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs @@ -54,8 +54,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg RemoteExecutor.Invoke( RunTest, providerDump, - "Disco") - .Dispose(); + "Disco").Dispose(); } } } From 102876b60d417a1513eecbaf0d4c50f0cb856a7c Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 16 Oct 2020 22:18:00 +0100 Subject: [PATCH 060/104] Disable NetNative optimization on UWP. Fix #1204 --- ...lHistogramEqualizationProcessor{TPixel}.cs | 25 ++++++++++++++++--- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs index 19514c4b6f..274376671b 100644 --- a/src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs @@ -106,15 +106,23 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization } /// +#if NETSTANDARD2_0 + // https://github.com/SixLabors/ImageSharp/issues/1204 + [MethodImpl(MethodImplOptions.NoOptimization)] +#else [MethodImpl(InliningOptions.ShortMethod)] +#endif public void Invoke(int y) { ref int histogramBase = ref MemoryMarshal.GetReference(this.histogramBuffer.GetSpan()); ref TPixel pixelBase = ref MemoryMarshal.GetReference(this.source.GetPixelRowSpan(y)); + int levels = this.luminanceLevels; for (int x = 0; x < this.bounds.Width; x++) { - int luminance = GetLuminance(Unsafe.Add(ref pixelBase, x), this.luminanceLevels); + // TODO: We should bulk convert here. + var vector = Unsafe.Add(ref pixelBase, x).ToVector4(); + int luminance = ImageMaths.GetBT709Luminance(ref vector, levels); Unsafe.Add(ref histogramBase, luminance)++; } } @@ -147,18 +155,27 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization } /// +#if NETSTANDARD2_0 + // https://github.com/SixLabors/ImageSharp/issues/1204 + [MethodImpl(MethodImplOptions.NoOptimization)] +#else [MethodImpl(InliningOptions.ShortMethod)] +#endif public void Invoke(int y) { ref int cdfBase = ref MemoryMarshal.GetReference(this.cdfBuffer.GetSpan()); ref TPixel pixelBase = ref MemoryMarshal.GetReference(this.source.GetPixelRowSpan(y)); + int levels = this.luminanceLevels; + float noOfPixelsMinusCdfMin = this.numberOfPixelsMinusCdfMin; for (int x = 0; x < this.bounds.Width; x++) { + // TODO: We should bulk convert here. ref TPixel pixel = ref Unsafe.Add(ref pixelBase, x); - int luminance = GetLuminance(pixel, this.luminanceLevels); - float luminanceEqualized = Unsafe.Add(ref cdfBase, luminance) / this.numberOfPixelsMinusCdfMin; - pixel.FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, pixel.ToVector4().W)); + var vector = pixel.ToVector4(); + int luminance = ImageMaths.GetBT709Luminance(ref vector, levels); + float luminanceEqualized = Unsafe.Add(ref cdfBase, luminance) / noOfPixelsMinusCdfMin; + pixel.FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, vector.W)); } } } From abe1263b17e2c347d0d436d1176fe2d89defb0e2 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Tue, 20 Oct 2020 18:42:30 +0100 Subject: [PATCH 061/104] Auto repair Png options to use Bit8. Fixes #935 --- .../Formats/Png/PngEncoderOptionsHelpers.cs | 22 +++++++++---------- .../Formats/Png/PngEncoderTests.cs | 11 ++++++++++ tests/ImageSharp.Tests/TestImages.cs | 3 +++ tests/Images/Input/Png/issues/Issue_935.png | 3 +++ 4 files changed, 27 insertions(+), 12 deletions(-) create mode 100644 tests/Images/Input/Png/issues/Issue_935.png diff --git a/src/ImageSharp/Formats/Png/PngEncoderOptionsHelpers.cs b/src/ImageSharp/Formats/Png/PngEncoderOptionsHelpers.cs index b0311f0887..9342e09dfe 100644 --- a/src/ImageSharp/Formats/Png/PngEncoderOptionsHelpers.cs +++ b/src/ImageSharp/Formats/Png/PngEncoderOptionsHelpers.cs @@ -35,6 +35,15 @@ namespace SixLabors.ImageSharp.Formats.Png options.ColorType ??= pngMetadata.ColorType ?? SuggestColorType(); options.BitDepth ??= pngMetadata.BitDepth ?? SuggestBitDepth(); + // Ensure bit depth and color type are a supported combination. + // Bit8 is the only bit depth supported by all color types. + byte bits = (byte)options.BitDepth; + byte[] validBitDepths = PngConstants.ColorTypes[options.ColorType.Value]; + if (Array.IndexOf(validBitDepths, bits) == -1) + { + options.BitDepth = PngBitDepth.Bit8; + } + options.InterlaceMethod ??= pngMetadata.InterlaceMethod; use16Bit = options.BitDepth == PngBitDepth.Bit16; @@ -44,12 +53,6 @@ namespace SixLabors.ImageSharp.Formats.Png { options.ChunkFilter = PngChunkFilter.ExcludeAll; } - - // Ensure we are not allowing impossible combinations. - if (!PngConstants.ColorTypes.ContainsKey(options.ColorType.Value)) - { - throw new NotSupportedException("Color type is not supported or not valid."); - } } /// @@ -68,15 +71,10 @@ namespace SixLabors.ImageSharp.Formats.Png return null; } - byte bits = (byte)options.BitDepth; - if (Array.IndexOf(PngConstants.ColorTypes[options.ColorType.Value], bits) == -1) - { - throw new NotSupportedException("Bit depth is not supported or not valid."); - } - // Use the metadata to determine what quantization depth to use if no quantizer has been set. if (options.Quantizer is null) { + byte bits = (byte)options.BitDepth; var maxColors = ImageMaths.GetColorCountForBitDepth(bits); options.Quantizer = new WuQuantizer(new QuantizerOptions { MaxColors = maxColors }); } diff --git a/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs index 9ba956d722..465bed8a16 100644 --- a/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs @@ -556,6 +556,17 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png provider); } + [Fact] + public void EncodeFixesInvalidOptions() + { + // https://github.com/SixLabors/ImageSharp/issues/935 + using var ms = new MemoryStream(); + var testFile = TestFile.Create(TestImages.Png.Issue935); + using Image image = testFile.CreateRgba32Image(new PngDecoder()); + + image.Save(ms, new PngEncoder { ColorType = PngColorType.RgbWithAlpha }); + } + private static void TestPngEncoderCore( TestImageProvider provider, PngColorType pngColorType, diff --git a/tests/ImageSharp.Tests/TestImages.cs b/tests/ImageSharp.Tests/TestImages.cs index fd5296c375..dce36bb0fb 100644 --- a/tests/ImageSharp.Tests/TestImages.cs +++ b/tests/ImageSharp.Tests/TestImages.cs @@ -107,6 +107,9 @@ namespace SixLabors.ImageSharp.Tests public const string Issue1177_1 = "Png/issues/Issue_1177_1.png"; public const string Issue1177_2 = "Png/issues/Issue_1177_2.png"; + // Issue 935: https://github.com/SixLabors/ImageSharp/issues/935 + public const string Issue935 = "Png/issues/Issue_935.png"; + public static class Bad { public const string MissingDataChunk = "Png/xdtn0g01.png"; diff --git a/tests/Images/Input/Png/issues/Issue_935.png b/tests/Images/Input/Png/issues/Issue_935.png new file mode 100644 index 0000000000..9f9e84dc3c --- /dev/null +++ b/tests/Images/Input/Png/issues/Issue_935.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a9c5cdacc9bedf481c883828de5bfb7902e2bec038fff08830171cf7075e4f9 +size 870 From f493aa4efa5ed84f1cd2c659e19bb5fa9177d82b Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 21 Oct 2020 00:00:46 +0100 Subject: [PATCH 062/104] Implement SimdUtils.HwIntrisics --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 276 ++++++++++++++++++ src/ImageSharp/Common/Helpers/SimdUtils.cs | 7 +- .../Color/Bulk/FromVector4.cs | 4 +- .../ImageSharp.Tests/Common/SimdUtilsTests.cs | 20 +- 4 files changed, 295 insertions(+), 12 deletions(-) create mode 100644 src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs new file mode 100644 index 0000000000..2fe2f99ac6 --- /dev/null +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -0,0 +1,276 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +#if SUPPORTS_RUNTIME_INTRINSICS +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; + +namespace SixLabors.ImageSharp +{ + internal static partial class SimdUtils + { + public static class HwIntrinsics + { + private static ReadOnlySpan PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 }; + + /// + /// as many elements as possible, slicing them down (keeping the remainder). + /// + [MethodImpl(InliningOptions.ShortMethod)] + internal static void ByteToNormalizedFloatReduce( + ref ReadOnlySpan source, + ref Span dest) + { + DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); + + if (Avx2.IsSupported || Sse2.IsSupported) + { + int remainder; + if (Avx2.IsSupported) + { + remainder = ImageMaths.ModuloP2(source.Length, Vector256.Count); + } + else + { + remainder = ImageMaths.ModuloP2(source.Length, Vector128.Count); + } + + int adjustedCount = source.Length - remainder; + + if (adjustedCount > 0) + { + ByteToNormalizedFloat(source.Slice(0, adjustedCount), dest.Slice(0, adjustedCount)); + + source = source.Slice(adjustedCount); + dest = dest.Slice(adjustedCount); + } + } + } + + /// + /// Implementation , which is faster on new RyuJIT runtime. + /// + /// + /// Implementation is based on MagicScaler code: + /// https://github.com/saucecontrol/PhotoSauce/blob/b5811908041200488aa18fdfd17df5fc457415dc/src/MagicScaler/Magic/Processors/ConvertersFloat.cs#L80-L182 + /// + internal static unsafe void ByteToNormalizedFloat( + ReadOnlySpan source, + Span dest) + { + if (Avx2.IsSupported) + { + VerifySpanInput(source, dest, Vector256.Count); + + int n = dest.Length / Vector256.Count; + + byte* sourceBase = (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(source)); + + ref Vector256 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + var scale = Vector256.Create(1 / (float)byte.MaxValue); + + for (int i = 0; i < n; i++) + { + int si = Vector256.Count * i; + Vector256 i0 = Avx2.ConvertToVector256Int32(sourceBase + si); + Vector256 i1 = Avx2.ConvertToVector256Int32(sourceBase + si + Vector256.Count); + Vector256 i2 = Avx2.ConvertToVector256Int32(sourceBase + si + (Vector256.Count * 2)); + Vector256 i3 = Avx2.ConvertToVector256Int32(sourceBase + si + (Vector256.Count * 3)); + + Vector256 f0 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i0)); + Vector256 f1 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i1)); + Vector256 f2 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i2)); + Vector256 f3 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i3)); + + ref Vector256 d = ref Unsafe.Add(ref destBase, i * 4); + + d = f0; + Unsafe.Add(ref d, 1) = f1; + Unsafe.Add(ref d, 2) = f2; + Unsafe.Add(ref d, 3) = f3; + } + } + else + { + // Sse + VerifySpanInput(source, dest, Vector128.Count); + + int n = dest.Length / Vector128.Count; + + byte* sourceBase = (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(source)); + + ref Vector128 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + var scale = Vector128.Create(1 / (float)byte.MaxValue); + Vector128 zero = Vector128.Zero; + + for (int i = 0; i < n; i++) + { + int si = Vector128.Count * i; + + Vector128 i0, i1, i2, i3; + if (Sse41.IsSupported) + { + i0 = Sse41.ConvertToVector128Int32(sourceBase + si); + i1 = Sse41.ConvertToVector128Int32(sourceBase + si + Vector128.Count); + i2 = Sse41.ConvertToVector128Int32(sourceBase + si + (Vector128.Count * 2)); + i3 = Sse41.ConvertToVector128Int32(sourceBase + si + (Vector128.Count * 3)); + } + else + { + Vector128 b = Sse2.LoadVector128(sourceBase + si); + Vector128 s0 = Sse2.UnpackLow(b, zero).AsInt16(); + Vector128 s1 = Sse2.UnpackHigh(b, zero).AsInt16(); + + i0 = Sse2.UnpackLow(s0, zero.AsInt16()).AsInt32(); + i1 = Sse2.UnpackHigh(s0, zero.AsInt16()).AsInt32(); + i2 = Sse2.UnpackLow(s1, zero.AsInt16()).AsInt32(); + i3 = Sse2.UnpackHigh(s1, zero.AsInt16()).AsInt32(); + } + + Vector128 f0 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i0)); + Vector128 f1 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i1)); + Vector128 f2 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i2)); + Vector128 f3 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i3)); + + ref Vector128 d = ref Unsafe.Add(ref destBase, i * 4); + + d = f0; + Unsafe.Add(ref d, 1) = f1; + Unsafe.Add(ref d, 2) = f2; + Unsafe.Add(ref d, 3) = f3; + } + } + } + + /// + /// as many elements as possible, slicing them down (keeping the remainder). + /// + [MethodImpl(InliningOptions.ShortMethod)] + internal static void NormalizedFloatToByteSaturateReduce( + ref ReadOnlySpan source, + ref Span dest) + { + DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); + + if (Avx2.IsSupported || Sse2.IsSupported) + { + int remainder; + if (Avx2.IsSupported) + { + remainder = ImageMaths.ModuloP2(source.Length, Vector256.Count); + } + else + { + remainder = ImageMaths.ModuloP2(source.Length, Vector128.Count); + } + + int adjustedCount = source.Length - remainder; + + if (adjustedCount > 0) + { + NormalizedFloatToByteSaturate( + source.Slice(0, adjustedCount), + dest.Slice(0, adjustedCount)); + + source = source.Slice(adjustedCount); + dest = dest.Slice(adjustedCount); + } + } + } + + /// + /// Implementation of , which is faster on new .NET runtime. + /// + /// + /// Implementation is based on MagicScaler code: + /// https://github.com/saucecontrol/PhotoSauce/blob/b5811908041200488aa18fdfd17df5fc457415dc/src/MagicScaler/Magic/Processors/ConvertersFloat.cs#L541-L622 + /// + internal static void NormalizedFloatToByteSaturate( + ReadOnlySpan source, + Span dest) + { + if (Avx2.IsSupported) + { + VerifySpanInput(source, dest, Vector256.Count); + + int n = dest.Length / Vector256.Count; + + ref Vector256 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector256 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + var scale = Vector256.Create((float)byte.MaxValue); + ref byte maskBase = ref MemoryMarshal.GetReference(PermuteMaskDeinterleave8x32); + Vector256 mask = Unsafe.As>(ref maskBase); + + for (int i = 0; i < n; i++) + { + ref Vector256 s = ref Unsafe.Add(ref sourceBase, i * 4); + + Vector256 f0 = Avx.Multiply(scale, s); + Vector256 f1 = Avx.Multiply(scale, Unsafe.Add(ref s, 1)); + Vector256 f2 = Avx.Multiply(scale, Unsafe.Add(ref s, 2)); + Vector256 f3 = Avx.Multiply(scale, Unsafe.Add(ref s, 3)); + + Vector256 w0 = Avx.ConvertToVector256Int32(f0); + Vector256 w1 = Avx.ConvertToVector256Int32(f1); + Vector256 w2 = Avx.ConvertToVector256Int32(f2); + Vector256 w3 = Avx.ConvertToVector256Int32(f3); + + Vector256 u0 = Avx2.PackSignedSaturate(w0, w1); + Vector256 u1 = Avx2.PackSignedSaturate(w2, w3); + Vector256 b = Avx2.PackUnsignedSaturate(u0, u1); + b = Avx2.PermuteVar8x32(b.AsInt32(), mask).AsByte(); + + Unsafe.Add(ref destBase, i) = b; + } + } + else + { + // Sse + VerifySpanInput(source, dest, Vector128.Count); + + int n = dest.Length / Vector128.Count; + + ref Vector128 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector128 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + var scale = Vector128.Create((float)byte.MaxValue); + + for (int i = 0; i < n; i++) + { + ref Vector128 s = ref Unsafe.Add(ref sourceBase, i * 4); + + Vector128 f0 = Sse.Multiply(scale, s); + Vector128 f1 = Sse.Multiply(scale, Unsafe.Add(ref s, 1)); + Vector128 f2 = Sse.Multiply(scale, Unsafe.Add(ref s, 2)); + Vector128 f3 = Sse.Multiply(scale, Unsafe.Add(ref s, 3)); + + Vector128 w0 = Sse2.ConvertToVector128Int32(f0); + Vector128 w1 = Sse2.ConvertToVector128Int32(f1); + Vector128 w2 = Sse2.ConvertToVector128Int32(f2); + Vector128 w3 = Sse2.ConvertToVector128Int32(f3); + + Vector128 u0 = Sse2.PackSignedSaturate(w0, w1); + Vector128 u1 = Sse2.PackSignedSaturate(w2, w3); + + Unsafe.Add(ref destBase, i) = Sse2.PackUnsignedSaturate(u0, u1); + } + } + } + } + } +} +#endif diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.cs b/src/ImageSharp/Common/Helpers/SimdUtils.cs index 7f917648dc..df533cedf1 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.cs @@ -79,8 +79,9 @@ namespace SixLabors.ImageSharp internal static void ByteToNormalizedFloat(ReadOnlySpan source, Span dest) { DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); - -#if SUPPORTS_EXTENDED_INTRINSICS +#if SUPPORTS_RUNTIME_INTRINSICS + HwIntrinsics.ByteToNormalizedFloatReduce(ref source, ref dest); +#elif SUPPORTS_EXTENDED_INTRINSICS ExtendedIntrinsics.ByteToNormalizedFloatReduce(ref source, ref dest); #else BasicIntrinsics256.ByteToNormalizedFloatReduce(ref source, ref dest); @@ -110,7 +111,7 @@ namespace SixLabors.ImageSharp DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); #if SUPPORTS_RUNTIME_INTRINSICS - Avx2Intrinsics.NormalizedFloatToByteSaturateReduce(ref source, ref dest); + HwIntrinsics.NormalizedFloatToByteSaturateReduce(ref source, ref dest); #elif SUPPORTS_EXTENDED_INTRINSICS ExtendedIntrinsics.NormalizedFloatToByteSaturateReduce(ref source, ref dest); #else diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs index da15da24c7..267bca4ad0 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs @@ -104,12 +104,12 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk #if SUPPORTS_RUNTIME_INTRINSICS [Benchmark] - public void UseAvx2() + public void UseHwIntrinsics() { Span sBytes = MemoryMarshal.Cast(this.source.GetSpan()); Span dFloats = MemoryMarshal.Cast(this.destination.GetSpan()); - SimdUtils.Avx2Intrinsics.NormalizedFloatToByteSaturate(sBytes, dFloats); + SimdUtils.HwIntrinsics.NormalizedFloatToByteSaturate(sBytes, dFloats); } private static ReadOnlySpan PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 }; diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs index 6dce489353..eca4e72cba 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs @@ -204,6 +204,17 @@ namespace SixLabors.ImageSharp.Tests.Common (s, d) => SimdUtils.ExtendedIntrinsics.ByteToNormalizedFloat(s.Span, d.Span)); } +#if SUPPORTS_RUNTIME_INTRINSICS + [Theory] + [MemberData(nameof(ArraySizesDivisibleBy32))] + public void HwIntrinsics_BulkConvertByteToNormalizedFloat(int count) + { + TestImpl_BulkConvertByteToNormalizedFloat( + count, + (s, d) => SimdUtils.HwIntrinsics.ByteToNormalizedFloat(s.Span, d.Span)); + } +#endif + [Theory] [MemberData(nameof(ArbitraryArraySizes))] public void BulkConvertByteToNormalizedFloat(int count) @@ -281,16 +292,11 @@ namespace SixLabors.ImageSharp.Tests.Common [Theory] [MemberData(nameof(ArraySizesDivisibleBy32))] - public void Avx2_BulkConvertNormalizedFloatToByteClampOverflows(int count) + public void HwIntrinsics_BulkConvertNormalizedFloatToByteClampOverflows(int count) { - if (!System.Runtime.Intrinsics.X86.Avx2.IsSupported) - { - return; - } - TestImpl_BulkConvertNormalizedFloatToByteClampOverflows( count, - (s, d) => SimdUtils.Avx2Intrinsics.NormalizedFloatToByteSaturate(s.Span, d.Span)); + (s, d) => SimdUtils.HwIntrinsics.NormalizedFloatToByteSaturate(s.Span, d.Span)); } #endif From 8e993394b44eb7a436311aa88c541fd30144cbdd Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 21 Oct 2020 14:49:05 +0100 Subject: [PATCH 063/104] Benchmarks, tests, and cleanup. --- .../Helpers/SimdUtils.Avx2Intrinsics.cs | 103 ------------------ .../Color/Bulk/FromVector4.cs | 4 +- .../Color/Bulk/ToVector4_Rgba32.cs | 13 ++- .../ImageSharp.Tests/Common/SimdUtilsTests.cs | 30 +++-- .../FeatureTesting/FeatureTestRunner.cs | 50 +++++++++ 5 files changed, 86 insertions(+), 114 deletions(-) delete mode 100644 src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs deleted file mode 100644 index b56c92dab7..0000000000 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -#if SUPPORTS_RUNTIME_INTRINSICS - -using System; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; - -namespace SixLabors.ImageSharp -{ - internal static partial class SimdUtils - { - public static class Avx2Intrinsics - { - private static ReadOnlySpan PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 }; - - /// - /// as many elements as possible, slicing them down (keeping the remainder). - /// - [MethodImpl(InliningOptions.ShortMethod)] - internal static void NormalizedFloatToByteSaturateReduce( - ref ReadOnlySpan source, - ref Span dest) - { - DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); - - if (Avx2.IsSupported) - { - int remainder = ImageMaths.ModuloP2(source.Length, Vector.Count); - int adjustedCount = source.Length - remainder; - - if (adjustedCount > 0) - { - NormalizedFloatToByteSaturate( - source.Slice(0, adjustedCount), - dest.Slice(0, adjustedCount)); - - source = source.Slice(adjustedCount); - dest = dest.Slice(adjustedCount); - } - } - } - - /// - /// Implementation of , which is faster on new .NET runtime. - /// - /// - /// Implementation is based on MagicScaler code: - /// https://github.com/saucecontrol/PhotoSauce/blob/a9bd6e5162d2160419f0cf743fd4f536c079170b/src/MagicScaler/Magic/Processors/ConvertersFloat.cs#L453-L477 - /// - internal static void NormalizedFloatToByteSaturate( - ReadOnlySpan source, - Span dest) - { - VerifySpanInput(source, dest, Vector256.Count); - - int n = dest.Length / Vector256.Count; - - ref Vector256 sourceBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); - ref Vector256 destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); - - var maxBytes = Vector256.Create(255f); - ref byte maskBase = ref MemoryMarshal.GetReference(PermuteMaskDeinterleave8x32); - Vector256 mask = Unsafe.As>(ref maskBase); - - for (int i = 0; i < n; i++) - { - ref Vector256 s = ref Unsafe.Add(ref sourceBase, i * 4); - - Vector256 f0 = s; - Vector256 f1 = Unsafe.Add(ref s, 1); - Vector256 f2 = Unsafe.Add(ref s, 2); - Vector256 f3 = Unsafe.Add(ref s, 3); - - Vector256 w0 = ConvertToInt32(f0, maxBytes); - Vector256 w1 = ConvertToInt32(f1, maxBytes); - Vector256 w2 = ConvertToInt32(f2, maxBytes); - Vector256 w3 = ConvertToInt32(f3, maxBytes); - - Vector256 u0 = Avx2.PackSignedSaturate(w0, w1); - Vector256 u1 = Avx2.PackSignedSaturate(w2, w3); - Vector256 b = Avx2.PackUnsignedSaturate(u0, u1); - b = Avx2.PermuteVar8x32(b.AsInt32(), mask).AsByte(); - - Unsafe.Add(ref destBase, i) = b; - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector256 ConvertToInt32(Vector256 vf, Vector256 scale) - { - vf = Avx.Multiply(vf, scale); - return Avx.ConvertToVector256Int32(vf); - } - } - } -} -#endif diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs index 267bca4ad0..dc030e07a7 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs @@ -13,15 +13,13 @@ using System.Runtime.Intrinsics.X86; #endif using BenchmarkDotNet.Attributes; -using BenchmarkDotNet.Environments; -using BenchmarkDotNet.Jobs; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; // ReSharper disable InconsistentNaming namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk { - [Config(typeof(Config.ShortClr))] + [Config(typeof(Config.ShortCore31))] public abstract class FromVector4 where TPixel : unmanaged, IPixel { diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgba32.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgba32.cs index 145bf9889b..9ae3b073d4 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgba32.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgba32.cs @@ -13,7 +13,7 @@ using SixLabors.ImageSharp.PixelFormats; namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk { - [Config(typeof(Config.ShortClr))] + [Config(typeof(Config.ShortCore31))] public class ToVector4_Rgba32 : ToVector4 { [Benchmark] @@ -52,6 +52,17 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk SimdUtils.ExtendedIntrinsics.ByteToNormalizedFloat(sBytes, dFloats); } +#if SUPPORTS_RUNTIME_INTRINSICS + [Benchmark] + public void HwIntrinsics() + { + Span sBytes = MemoryMarshal.Cast(this.source.GetSpan()); + Span dFloats = MemoryMarshal.Cast(this.destination.GetSpan()); + + SimdUtils.HwIntrinsics.ByteToNormalizedFloat(sBytes, dFloats); + } +#endif + // [Benchmark] public void ExtendedIntrinsics_BulkConvertByteToNormalizedFloat_2Loops() { diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs index eca4e72cba..838db742a1 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs @@ -7,7 +7,7 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using SixLabors.ImageSharp.Common.Tuples; - +using SixLabors.ImageSharp.Tests.TestUtilities; using Xunit; using Xunit.Abstractions; @@ -209,9 +209,17 @@ namespace SixLabors.ImageSharp.Tests.Common [MemberData(nameof(ArraySizesDivisibleBy32))] public void HwIntrinsics_BulkConvertByteToNormalizedFloat(int count) { - TestImpl_BulkConvertByteToNormalizedFloat( - count, - (s, d) => SimdUtils.HwIntrinsics.ByteToNormalizedFloat(s.Span, d.Span)); + static void RunTest(string serialized) + { + TestImpl_BulkConvertByteToNormalizedFloat( + FeatureTestRunner.Deserialize(serialized), + (s, d) => SimdUtils.HwIntrinsics.ByteToNormalizedFloat(s.Span, d.Span)); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE41, + count); } #endif @@ -294,9 +302,17 @@ namespace SixLabors.ImageSharp.Tests.Common [MemberData(nameof(ArraySizesDivisibleBy32))] public void HwIntrinsics_BulkConvertNormalizedFloatToByteClampOverflows(int count) { - TestImpl_BulkConvertNormalizedFloatToByteClampOverflows( - count, - (s, d) => SimdUtils.HwIntrinsics.NormalizedFloatToByteSaturate(s.Span, d.Span)); + static void RunTest(string serialized) + { + TestImpl_BulkConvertNormalizedFloatToByteClampOverflows( + FeatureTestRunner.Deserialize(serialized), + (s, d) => SimdUtils.HwIntrinsics.NormalizedFloatToByteSaturate(s.Span, d.Span)); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2, + count); } #endif diff --git a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs index eb1714baad..fdba9ce982 100644 --- a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs +++ b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs @@ -33,6 +33,14 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities where T : IXunitSerializable => BasicSerializer.Deserialize(value); + /// + /// Allows the deserialization of integers passed to the feature test. + /// + /// The string value to deserialize. + /// The value. + public static int Deserialize(string value) + => Convert.ToInt32(value); + /// /// Runs the given test within an environment /// where the given features. @@ -201,6 +209,48 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities } } + /// + /// Runs the given test within an environment + /// where the given features. + /// + /// The test action to run. + /// The intrinsics features. + /// The value to pass as a parameter to the test action. + public static void RunWithHwIntrinsicsFeature( + Action action, + HwIntrinsics intrinsics, + int serializable) + { + if (!RemoteExecutor.IsSupported) + { + return; + } + + foreach (KeyValuePair intrinsic in intrinsics.ToFeatureKeyValueCollection()) + { + var processStartInfo = new ProcessStartInfo(); + if (intrinsic.Key != HwIntrinsics.AllowAll) + { + processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; + + RemoteExecutor.Invoke( + action, + serializable.ToString(), + new RemoteInvokeOptions + { + StartInfo = processStartInfo + }) + .Dispose(); + } + else + { + // Since we are running using the default architecture there is no + // point creating the overhead of running the action in a separate process. + action(serializable.ToString()); + } + } + } + internal static Dictionary ToFeatureKeyValueCollection(this HwIntrinsics intrinsics) { // Loop through and translate the given values into COMPlus equivaluents From aecf80388cd4f8a33d709d2ff1f359de9cfa8319 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 21 Oct 2020 17:59:57 +0100 Subject: [PATCH 064/104] Add Avx2 Span Premultiplication and Reverse --- src/ImageSharp/Common/Helpers/ImageMaths.cs | 6 ++ .../Common/Helpers/Vector4Utilities.cs | 80 ++++++++++++++++--- .../Color/Bulk/PremultiplyVector4.cs | 68 ++++++++++++++++ .../Color/Bulk/UnPremultiplyVector4.cs | 68 ++++++++++++++++ .../Helpers/ImageMathsTests.cs | 15 ++++ .../Helpers/Vector4UtilsTests.cs | 2 + 6 files changed, 229 insertions(+), 10 deletions(-) create mode 100644 tests/ImageSharp.Benchmarks/Color/Bulk/PremultiplyVector4.cs create mode 100644 tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs diff --git a/src/ImageSharp/Common/Helpers/ImageMaths.cs b/src/ImageSharp/Common/Helpers/ImageMaths.cs index 977432f8bb..d24230fe18 100644 --- a/src/ImageSharp/Common/Helpers/ImageMaths.cs +++ b/src/ImageSharp/Common/Helpers/ImageMaths.cs @@ -132,6 +132,12 @@ namespace SixLabors.ImageSharp return (a / GreatestCommonDivisor(a, b)) * b; } + /// + /// Calculates % 2 + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static int Modulo2(int x) => x & 1; + /// /// Calculates % 4 /// diff --git a/src/ImageSharp/Common/Helpers/Vector4Utilities.cs b/src/ImageSharp/Common/Helpers/Vector4Utilities.cs index fccc50755d..848a917912 100644 --- a/src/ImageSharp/Common/Helpers/Vector4Utilities.cs +++ b/src/ImageSharp/Common/Helpers/Vector4Utilities.cs @@ -5,6 +5,10 @@ using System; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif namespace SixLabors.ImageSharp { @@ -13,6 +17,10 @@ namespace SixLabors.ImageSharp /// internal static class Vector4Utilities { + private const int BlendAlphaControl = 0b10001000; + + private static ReadOnlySpan PermuteAlphaMask8x32 => new byte[] { 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0, 7, 0, 0, 0, 7, 0, 0, 0, 7, 0, 0, 0 }; + /// /// Restricts a vector between a minimum and a maximum value. /// 5x Faster then . @@ -56,13 +64,39 @@ namespace SixLabors.ImageSharp [MethodImpl(InliningOptions.ShortMethod)] public static void Premultiply(Span vectors) { - // TODO: This method can be AVX2 optimized using Vector - ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx2.IsSupported && vectors.Length >= 2) + { + ref Vector256 vectorsBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(vectors)); - for (int i = 0; i < vectors.Length; i++) + Vector256 mask = + Unsafe.As>(ref MemoryMarshal.GetReference(PermuteAlphaMask8x32)); + + int n = (vectors.Length * 4) / Vector256.Count; + for (int i = 0; i < n; i++) + { + ref Vector256 source = ref Unsafe.Add(ref vectorsBase, i); + Vector256 multiply = Avx2.PermuteVar8x32(source, mask); + source = Avx.Blend(Avx.Multiply(source, multiply), source, BlendAlphaControl); + } + + if (ImageMaths.Modulo2(vectors.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + Premultiply(ref MemoryMarshal.GetReference(vectors.Slice(vectors.Length - 1))); + } + } + else +#endif { - ref Vector4 v = ref Unsafe.Add(ref baseRef, i); - Premultiply(ref v); + ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); + + for (int i = 0; i < vectors.Length; i++) + { + ref Vector4 v = ref Unsafe.Add(ref baseRef, i); + Premultiply(ref v); + } } } @@ -73,13 +107,39 @@ namespace SixLabors.ImageSharp [MethodImpl(InliningOptions.ShortMethod)] public static void UnPremultiply(Span vectors) { - // TODO: This method can be AVX2 optimized using Vector - ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx2.IsSupported && vectors.Length >= 2) + { + ref Vector256 vectorsBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(vectors)); - for (int i = 0; i < vectors.Length; i++) + Vector256 mask = + Unsafe.As>(ref MemoryMarshal.GetReference(PermuteAlphaMask8x32)); + + int n = (vectors.Length * 4) / Vector256.Count; + for (int i = 0; i < n; i++) + { + ref Vector256 source = ref Unsafe.Add(ref vectorsBase, i); + Vector256 multiply = Avx2.PermuteVar8x32(source, mask); + source = Avx.Blend(Avx.Divide(source, multiply), source, BlendAlphaControl); + } + + if (ImageMaths.Modulo2(vectors.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + UnPremultiply(ref MemoryMarshal.GetReference(vectors.Slice(vectors.Length - 1))); + } + } + else +#endif { - ref Vector4 v = ref Unsafe.Add(ref baseRef, i); - UnPremultiply(ref v); + ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); + + for (int i = 0; i < vectors.Length; i++) + { + ref Vector4 v = ref Unsafe.Add(ref baseRef, i); + UnPremultiply(ref v); + } } } diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/PremultiplyVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/PremultiplyVector4.cs new file mode 100644 index 0000000000..2a886c6879 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/PremultiplyVector4.cs @@ -0,0 +1,68 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using BenchmarkDotNet.Attributes; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.ShortCore31))] + public class PremultiplyVector4 + { + private static readonly Vector4[] Vectors = CreateVectors(); + + [Benchmark(Baseline = true)] + public void PremultiplyBaseline() + { + ref Vector4 baseRef = ref MemoryMarshal.GetReference(Vectors); + + for (int i = 0; i < Vectors.Length; i++) + { + ref Vector4 v = ref Unsafe.Add(ref baseRef, i); + Premultiply(ref v); + } + } + + [Benchmark] + public void Premultiply() + { + Vector4Utilities.Premultiply(Vectors); + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void Premultiply(ref Vector4 source) + { + float w = source.W; + source *= w; + source.W = w; + } + + private static Vector4[] CreateVectors() + { + var rnd = new Random(42); + return GenerateRandomVectorArray(rnd, 2048, 0, 1); + } + + private static Vector4[] GenerateRandomVectorArray(Random rnd, int length, float minVal, float maxVal) + { + var values = new Vector4[length]; + + for (int i = 0; i < length; i++) + { + ref Vector4 v = ref values[i]; + v.X = GetRandomFloat(rnd, minVal, maxVal); + v.Y = GetRandomFloat(rnd, minVal, maxVal); + v.Z = GetRandomFloat(rnd, minVal, maxVal); + v.W = GetRandomFloat(rnd, minVal, maxVal); + } + + return values; + } + + private static float GetRandomFloat(Random rnd, float minVal, float maxVal) + => ((float)rnd.NextDouble() * (maxVal - minVal)) + minVal; + } +} diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs new file mode 100644 index 0000000000..89e055da46 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs @@ -0,0 +1,68 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using BenchmarkDotNet.Attributes; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.ShortCore31))] + public class UnPremultiplyVector4 + { + private static readonly Vector4[] Vectors = CreateVectors(); + + [Benchmark(Baseline = true)] + public void UnPremultiplyBaseline() + { + ref Vector4 baseRef = ref MemoryMarshal.GetReference(Vectors); + + for (int i = 0; i < Vectors.Length; i++) + { + ref Vector4 v = ref Unsafe.Add(ref baseRef, i); + UnPremultiply(ref v); + } + } + + [Benchmark] + public void UnPremultiply() + { + Vector4Utilities.UnPremultiply(Vectors); + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void UnPremultiply(ref Vector4 source) + { + float w = source.W; + source *= w; + source.W = w; + } + + private static Vector4[] CreateVectors() + { + var rnd = new Random(42); + return GenerateRandomVectorArray(rnd, 2048, 0, 1); + } + + private static Vector4[] GenerateRandomVectorArray(Random rnd, int length, float minVal, float maxVal) + { + var values = new Vector4[length]; + + for (int i = 0; i < length; i++) + { + ref Vector4 v = ref values[i]; + v.X = GetRandomFloat(rnd, minVal, maxVal); + v.Y = GetRandomFloat(rnd, minVal, maxVal); + v.Z = GetRandomFloat(rnd, minVal, maxVal); + v.W = GetRandomFloat(rnd, minVal, maxVal); + } + + return values; + } + + private static float GetRandomFloat(Random rnd, float minVal, float maxVal) + => ((float)rnd.NextDouble() * (maxVal - minVal)) + minVal; + } +} diff --git a/tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs b/tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs index 27689f6813..7d16623877 100644 --- a/tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs +++ b/tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs @@ -10,6 +10,21 @@ namespace SixLabors.ImageSharp.Tests.Helpers { public class ImageMathsTests { + [Theory] + [InlineData(0)] + [InlineData(1)] + [InlineData(2)] + [InlineData(3)] + [InlineData(4)] + [InlineData(100)] + [InlineData(123)] + [InlineData(53436353)] + public void Modulo2(int x) + { + int actual = ImageMaths.Modulo2(x); + Assert.Equal(x % 2, actual); + } + [Theory] [InlineData(0)] [InlineData(1)] diff --git a/tests/ImageSharp.Tests/Helpers/Vector4UtilsTests.cs b/tests/ImageSharp.Tests/Helpers/Vector4UtilsTests.cs index c3b8e79ee2..2bb43c440b 100644 --- a/tests/ImageSharp.Tests/Helpers/Vector4UtilsTests.cs +++ b/tests/ImageSharp.Tests/Helpers/Vector4UtilsTests.cs @@ -17,6 +17,7 @@ namespace SixLabors.ImageSharp.Tests.Helpers [InlineData(0)] [InlineData(1)] [InlineData(30)] + [InlineData(63)] public void Premultiply_VectorSpan(int length) { var rnd = new Random(42); @@ -36,6 +37,7 @@ namespace SixLabors.ImageSharp.Tests.Helpers [InlineData(0)] [InlineData(1)] [InlineData(30)] + [InlineData(63)] public void UnPremultiply_VectorSpan(int length) { var rnd = new Random(42); From 1067acbe4c57ba7fc601186ad58b1087380a8a69 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 21 Oct 2020 22:22:47 +0100 Subject: [PATCH 065/104] Use Tanner's updated code. --- .../Common/Helpers/Vector4Utilities.cs | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Vector4Utilities.cs b/src/ImageSharp/Common/Helpers/Vector4Utilities.cs index 848a917912..5ae7ac1b71 100644 --- a/src/ImageSharp/Common/Helpers/Vector4Utilities.cs +++ b/src/ImageSharp/Common/Helpers/Vector4Utilities.cs @@ -61,7 +61,7 @@ namespace SixLabors.ImageSharp /// Bulk variant of /// /// The span of vectors - [MethodImpl(InliningOptions.ShortMethod)] + [MethodImpl(InliningOptions.HotPath | InliningOptions.ShortMethod)] public static void Premultiply(Span vectors) { #if SUPPORTS_RUNTIME_INTRINSICS @@ -73,12 +73,15 @@ namespace SixLabors.ImageSharp Vector256 mask = Unsafe.As>(ref MemoryMarshal.GetReference(PermuteAlphaMask8x32)); - int n = (vectors.Length * 4) / Vector256.Count; - for (int i = 0; i < n; i++) + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u)); + + while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast)) { - ref Vector256 source = ref Unsafe.Add(ref vectorsBase, i); + Vector256 source = vectorsBase; Vector256 multiply = Avx2.PermuteVar8x32(source, mask); - source = Avx.Blend(Avx.Multiply(source, multiply), source, BlendAlphaControl); + vectorsBase = Avx.Blend(Avx.Multiply(source, multiply), source, BlendAlphaControl); + vectorsBase = ref Unsafe.Add(ref vectorsBase, 1); } if (ImageMaths.Modulo2(vectors.Length) != 0) @@ -104,7 +107,7 @@ namespace SixLabors.ImageSharp /// Bulk variant of /// /// The span of vectors - [MethodImpl(InliningOptions.ShortMethod)] + [MethodImpl(InliningOptions.HotPath | InliningOptions.ShortMethod)] public static void UnPremultiply(Span vectors) { #if SUPPORTS_RUNTIME_INTRINSICS @@ -116,12 +119,15 @@ namespace SixLabors.ImageSharp Vector256 mask = Unsafe.As>(ref MemoryMarshal.GetReference(PermuteAlphaMask8x32)); - int n = (vectors.Length * 4) / Vector256.Count; - for (int i = 0; i < n; i++) + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u)); + + while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast)) { - ref Vector256 source = ref Unsafe.Add(ref vectorsBase, i); + Vector256 source = vectorsBase; Vector256 multiply = Avx2.PermuteVar8x32(source, mask); - source = Avx.Blend(Avx.Divide(source, multiply), source, BlendAlphaControl); + vectorsBase = Avx.Blend(Avx.Divide(source, multiply), source, BlendAlphaControl); + vectorsBase = ref Unsafe.Add(ref vectorsBase, 1); } if (ImageMaths.Modulo2(vectors.Length) != 0) From d4e0bdd7b7949072c6bc47e07301fce8ab5a96af Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 21 Oct 2020 23:59:37 +0100 Subject: [PATCH 066/104] Remove hotpath attr --- src/ImageSharp/Common/Helpers/Vector4Utilities.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Vector4Utilities.cs b/src/ImageSharp/Common/Helpers/Vector4Utilities.cs index 5ae7ac1b71..0137d02568 100644 --- a/src/ImageSharp/Common/Helpers/Vector4Utilities.cs +++ b/src/ImageSharp/Common/Helpers/Vector4Utilities.cs @@ -61,7 +61,7 @@ namespace SixLabors.ImageSharp /// Bulk variant of /// /// The span of vectors - [MethodImpl(InliningOptions.HotPath | InliningOptions.ShortMethod)] + [MethodImpl(InliningOptions.ShortMethod)] public static void Premultiply(Span vectors) { #if SUPPORTS_RUNTIME_INTRINSICS @@ -107,7 +107,7 @@ namespace SixLabors.ImageSharp /// Bulk variant of /// /// The span of vectors - [MethodImpl(InliningOptions.HotPath | InliningOptions.ShortMethod)] + [MethodImpl(InliningOptions.ShortMethod)] public static void UnPremultiply(Span vectors) { #if SUPPORTS_RUNTIME_INTRINSICS From e3faadbf2edac8a51d09bf593088f42a073bd60b Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 22 Oct 2020 10:34:42 +0100 Subject: [PATCH 067/104] Use Avx.Shuffle for lower latency --- src/ImageSharp/Common/Helpers/Vector4Utilities.cs | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Vector4Utilities.cs b/src/ImageSharp/Common/Helpers/Vector4Utilities.cs index 0137d02568..f617e9a3ea 100644 --- a/src/ImageSharp/Common/Helpers/Vector4Utilities.cs +++ b/src/ImageSharp/Common/Helpers/Vector4Utilities.cs @@ -17,9 +17,8 @@ namespace SixLabors.ImageSharp /// internal static class Vector4Utilities { - private const int BlendAlphaControl = 0b10001000; - - private static ReadOnlySpan PermuteAlphaMask8x32 => new byte[] { 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0, 7, 0, 0, 0, 7, 0, 0, 0, 7, 0, 0, 0 }; + private const int BlendAlphaControl = 0b_10_00_10_00; + private const int ShuffleAlphaControl = 0b_11_11_11_11; /// /// Restricts a vector between a minimum and a maximum value. @@ -70,16 +69,13 @@ namespace SixLabors.ImageSharp ref Vector256 vectorsBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(vectors)); - Vector256 mask = - Unsafe.As>(ref MemoryMarshal.GetReference(PermuteAlphaMask8x32)); - // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u)); while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast)) { Vector256 source = vectorsBase; - Vector256 multiply = Avx2.PermuteVar8x32(source, mask); + Vector256 multiply = Avx.Shuffle(source, source, ShuffleAlphaControl); vectorsBase = Avx.Blend(Avx.Multiply(source, multiply), source, BlendAlphaControl); vectorsBase = ref Unsafe.Add(ref vectorsBase, 1); } @@ -116,16 +112,13 @@ namespace SixLabors.ImageSharp ref Vector256 vectorsBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(vectors)); - Vector256 mask = - Unsafe.As>(ref MemoryMarshal.GetReference(PermuteAlphaMask8x32)); - // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u)); while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast)) { Vector256 source = vectorsBase; - Vector256 multiply = Avx2.PermuteVar8x32(source, mask); + Vector256 multiply = Avx.Shuffle(source, source, ShuffleAlphaControl); vectorsBase = Avx.Blend(Avx.Divide(source, multiply), source, BlendAlphaControl); vectorsBase = ref Unsafe.Add(ref vectorsBase, 1); } From 05b66da9f79a8faba536d3614469d7b477e93eaa Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 22 Oct 2020 10:53:14 +0100 Subject: [PATCH 068/104] Fix base unpremultiply benchmark --- tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs index 89e055da46..1312c767be 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs @@ -36,7 +36,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk private static void UnPremultiply(ref Vector4 source) { float w = source.W; - source *= w; + source /= w; source.W = w; } From 9629f1c16e87e1a960e9e635f1595ba2af7dae02 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 23 Oct 2020 14:03:33 +0100 Subject: [PATCH 069/104] Add AVX2 implementation --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 2 +- .../ColorConverters/JpegColorConverter.cs | 130 ++++++++++++------ .../Codecs/Jpeg/Vector4OctetPack.cs | 40 ++++++ .../Config.HwIntrinsics.cs | 4 +- 4 files changed, 134 insertions(+), 42 deletions(-) create mode 100644 tests/ImageSharp.Benchmarks/Codecs/Jpeg/Vector4OctetPack.cs diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 2fe2f99ac6..a51c21b37f 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -14,7 +14,7 @@ namespace SixLabors.ImageSharp { public static class HwIntrinsics { - private static ReadOnlySpan PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 }; + public static ReadOnlySpan PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 }; /// /// as many elements as possible, slicing them down (keeping the remainder). diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs index f68bca0412..f2a1c1e91e 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs @@ -4,7 +4,12 @@ using System; using System.Collections.Generic; using System.Numerics; - +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.Tuples; @@ -190,45 +195,90 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters /// public void Pack(ref Vector4Pair r, ref Vector4Pair g, ref Vector4Pair b) { - this.V0.X = r.A.X; - this.V0.Y = g.A.X; - this.V0.Z = b.A.X; - this.V0.W = 1f; - - this.V1.X = r.A.Y; - this.V1.Y = g.A.Y; - this.V1.Z = b.A.Y; - this.V1.W = 1f; - - this.V2.X = r.A.Z; - this.V2.Y = g.A.Z; - this.V2.Z = b.A.Z; - this.V2.W = 1f; - - this.V3.X = r.A.W; - this.V3.Y = g.A.W; - this.V3.Z = b.A.W; - this.V3.W = 1f; - - this.V4.X = r.B.X; - this.V4.Y = g.B.X; - this.V4.Z = b.B.X; - this.V4.W = 1f; - - this.V5.X = r.B.Y; - this.V5.Y = g.B.Y; - this.V5.Z = b.B.Y; - this.V5.W = 1f; - - this.V6.X = r.B.Z; - this.V6.Y = g.B.Z; - this.V6.Z = b.B.Z; - this.V6.W = 1f; - - this.V7.X = r.B.W; - this.V7.Y = g.B.W; - this.V7.Z = b.B.W; - this.V7.W = 1f; +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx2.IsSupported) + { + Vector4 vo = Vector4.One; + Vector128 valpha = Unsafe.As>(ref vo); + + ref byte control = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskDeinterleave8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + Vector256 r0 = Avx.InsertVector128( + Unsafe.As>(ref r.A).ToVector256(), + Unsafe.As>(ref g.A), + 1); + + Vector256 r1 = Avx.InsertVector128( + Unsafe.As>(ref b.A).ToVector256(), + valpha, + 1); + + Vector256 r2 = Avx.InsertVector128( + Unsafe.As>(ref r.B).ToVector256(), + Unsafe.As>(ref g.B), + 1); + + Vector256 r3 = Avx.InsertVector128( + Unsafe.As>(ref b.B).ToVector256(), + valpha, + 1); + + Vector256 t0 = Avx.UnpackLow(r0, r1); + Vector256 t2 = Avx.UnpackHigh(r0, r1); + + Unsafe.As>(ref this.V0) = Avx2.PermuteVar8x32(t0, vcontrol); + Unsafe.As>(ref this.V2) = Avx2.PermuteVar8x32(t2, vcontrol); + + Vector256 t4 = Avx.UnpackLow(r2, r3); + Vector256 t6 = Avx.UnpackHigh(r2, r3); + + Unsafe.As>(ref this.V4) = Avx2.PermuteVar8x32(t4, vcontrol); + Unsafe.As>(ref this.V6) = Avx2.PermuteVar8x32(t6, vcontrol); + } + else +#endif + { + this.V0.X = r.A.X; + this.V0.Y = g.A.X; + this.V0.Z = b.A.X; + this.V0.W = 1f; + + this.V1.X = r.A.Y; + this.V1.Y = g.A.Y; + this.V1.Z = b.A.Y; + this.V1.W = 1f; + + this.V2.X = r.A.Z; + this.V2.Y = g.A.Z; + this.V2.Z = b.A.Z; + this.V2.W = 1f; + + this.V3.X = r.A.W; + this.V3.Y = g.A.W; + this.V3.Z = b.A.W; + this.V3.W = 1f; + + this.V4.X = r.B.X; + this.V4.Y = g.B.X; + this.V4.Z = b.B.X; + this.V4.W = 1f; + + this.V5.X = r.B.Y; + this.V5.Y = g.B.Y; + this.V5.Z = b.B.Y; + this.V5.W = 1f; + + this.V6.X = r.B.Z; + this.V6.Y = g.B.Z; + this.V6.Z = b.B.Z; + this.V6.W = 1f; + + this.V7.X = r.B.W; + this.V7.Y = g.B.W; + this.V7.Z = b.B.W; + this.V7.W = 1f; + } } } } diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/Vector4OctetPack.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/Vector4OctetPack.cs new file mode 100644 index 0000000000..a7ea771988 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/Vector4OctetPack.cs @@ -0,0 +1,40 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System.Numerics; +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Tuples; +using static SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters.JpegColorConverter; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class Vector4OctetPack + { + private static Vector4Pair r = new Vector4Pair + { + A = new Vector4(1, 2, 3, 4), + B = new Vector4(5, 6, 7, 8) + }; + + private static Vector4Pair g = new Vector4Pair + { + A = new Vector4(9, 10, 11, 12), + B = new Vector4(13, 14, 15, 16) + }; + + private static Vector4Pair b = new Vector4Pair + { + A = new Vector4(17, 18, 19, 20), + B = new Vector4(21, 22, 23, 24) + }; + + [Benchmark] + public void Pack() + { + Vector4Octet v = default; + + v.Pack(ref r, ref g, ref b); + } + } +} diff --git a/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs b/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs index e860c5491f..e8a06bf24e 100644 --- a/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs +++ b/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs @@ -73,7 +73,9 @@ namespace SixLabors.ImageSharp.Benchmarks } #endif this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31) - .WithEnvironmentVariables(new EnvironmentVariable(EnableHWIntrinsic, Off)) + .WithEnvironmentVariables( + new EnvironmentVariable(EnableHWIntrinsic, Off), + new EnvironmentVariable(FeatureSIMD, Off)) .WithId("No HwIntrinsics")); } } From b8081fd3e94e2b338a9e12e9a0d859f0d9f785d6 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 23 Oct 2020 16:54:10 +0100 Subject: [PATCH 070/104] Use HW color conversion --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 23 +++ .../JpegColorConverter.FromYCbCrSimdAvx2.cs | 74 ++++++++- .../ColorConverters/JpegColorConverter.cs | 156 +++++++++--------- .../Codecs/Jpeg/Vector4OctetPack.cs | 40 ----- 4 files changed, 174 insertions(+), 119 deletions(-) delete mode 100644 tests/ImageSharp.Benchmarks/Codecs/Jpeg/Vector4OctetPack.cs diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index a51c21b37f..c5a7f5e909 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -16,6 +16,29 @@ namespace SixLabors.ImageSharp { public static ReadOnlySpan PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 }; + /// + /// Performs a multiplication and an addition of the . + /// + /// The vector to add to the intermediate result. + /// The first vector to multiply. + /// The second vector to multiply. + /// The . + [MethodImpl(InliningOptions.ShortMethod)] + public static Vector256 MultiplyAdd( + in Vector256 va, + in Vector256 vm0, + in Vector256 vm1) + { + if (Fma.IsSupported) + { + return Fma.MultiplyAdd(vm1, vm0, va); + } + else + { + return Avx.Add(Avx.Multiply(vm0, vm1), va); + } + } + /// /// as many elements as possible, slicing them down (keeping the remainder). /// diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs index c4d1408a2e..8c34baa1dc 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs @@ -1,11 +1,15 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. using System; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; - +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#endif using SixLabors.ImageSharp.Tuples; // ReSharper disable ImpureMethodCallOnReadonlyValueField @@ -47,6 +51,71 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters "JpegColorConverter.FromYCbCrSimd256 can be used only on architecture having 256 byte floating point SIMD registers!"); } +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector256 cbBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector256 crBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + + ref Vector4Octet resultBase = + ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var chromaOffset = Vector256.Create(-halfValue); + var scale = Vector256.Create(1 / maxValue); + var rCrMult = Vector256.Create(1.402F); + var gCbMult = Vector256.Create(0.344136F); + var gCrMult = Vector256.Create(0.714136F); + var bCbMult = Vector256.Create(1.772F); + + // Used for packing. + Vector4 vo = Vector4.One; + Vector128 valpha = Unsafe.As>(ref vo); + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskDeinterleave8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + Vector4Pair rr = default; + Vector4Pair gg = default; + Vector4Pair bb = default; + + ref Vector256 rrRefAsVector = ref Unsafe.As>(ref rr); + ref Vector256 ggRefAsVector = ref Unsafe.As>(ref gg); + ref Vector256 bbRefAsVector = ref Unsafe.As>(ref bb); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + // y = yVals[i]; + // cb = cbVals[i] - 128F; + // cr = crVals[i] - 128F; + Vector256 y = Unsafe.Add(ref yBase, i); + Vector256 cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset); + Vector256 cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset); + + // r = y + (1.402F * cr); + // g = y - (0.344136F * cb) - (0.714136F * cr); + // b = y + (1.772F * cb); + // Adding & multiplying 8 elements at one time: + Vector256 r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult); + Vector256 g = Avx.Subtract(Avx.Subtract(y, Avx.Multiply(cb, gCbMult)), Avx.Multiply(cr, gCrMult)); + Vector256 b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult); + + r = Avx.Multiply(Avx.RoundToNearestInteger(r), scale); + g = Avx.Multiply(Avx.RoundToNearestInteger(g), scale); + b = Avx.Multiply(Avx.RoundToNearestInteger(b), scale); + + rrRefAsVector = r; + ggRefAsVector = g; + bbRefAsVector = b; + + // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: + ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); + destination.PackAvx2(ref rr, ref gg, ref bb, in valpha, in vcontrol); + } +#else ref Vector yBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); ref Vector cbBase = @@ -104,6 +173,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); destination.Pack(ref rr, ref gg, ref bb); } +#endif } } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs index f2a1c1e91e..4e96f3471d 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs @@ -190,95 +190,97 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters #pragma warning disable SA1132 // Do not combine fields public Vector4 V0, V1, V2, V3, V4, V5, V6, V7; +#if SUPPORTS_RUNTIME_INTRINSICS + /// /// Pack (r0,r1...r7) (g0,g1...g7) (b0,b1...b7) vector values as (r0,g0,b0,1), (r1,g1,b1,1) ... /// - public void Pack(ref Vector4Pair r, ref Vector4Pair g, ref Vector4Pair b) + [MethodImpl(InliningOptions.ShortMethod)] + public void PackAvx2( + ref Vector4Pair r, + ref Vector4Pair g, + ref Vector4Pair b, + in Vector128 a, + in Vector256 vcontrol) { -#if SUPPORTS_RUNTIME_INTRINSICS - if (Avx2.IsSupported) - { - Vector4 vo = Vector4.One; - Vector128 valpha = Unsafe.As>(ref vo); + Vector256 r0 = Avx.InsertVector128( + Unsafe.As>(ref r.A), + Unsafe.As>(ref g.A), + 1); - ref byte control = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskDeinterleave8x32); - Vector256 vcontrol = Unsafe.As>(ref control); + Vector256 r1 = Avx.InsertVector128( + Unsafe.As>(ref b.A), + a, + 1); - Vector256 r0 = Avx.InsertVector128( - Unsafe.As>(ref r.A).ToVector256(), - Unsafe.As>(ref g.A), - 1); + Vector256 r2 = Avx.InsertVector128( + Unsafe.As>(ref r.B).ToVector256(), + Unsafe.As>(ref g.B), + 1); - Vector256 r1 = Avx.InsertVector128( - Unsafe.As>(ref b.A).ToVector256(), - valpha, - 1); + Vector256 r3 = Avx.InsertVector128( + Unsafe.As>(ref b.B).ToVector256(), + a, + 1); - Vector256 r2 = Avx.InsertVector128( - Unsafe.As>(ref r.B).ToVector256(), - Unsafe.As>(ref g.B), - 1); + Vector256 t0 = Avx.UnpackLow(r0, r1); + Vector256 t2 = Avx.UnpackHigh(r0, r1); - Vector256 r3 = Avx.InsertVector128( - Unsafe.As>(ref b.B).ToVector256(), - valpha, - 1); + Unsafe.As>(ref this.V0) = Avx2.PermuteVar8x32(t0, vcontrol); + Unsafe.As>(ref this.V2) = Avx2.PermuteVar8x32(t2, vcontrol); - Vector256 t0 = Avx.UnpackLow(r0, r1); - Vector256 t2 = Avx.UnpackHigh(r0, r1); + Vector256 t4 = Avx.UnpackLow(r2, r3); + Vector256 t6 = Avx.UnpackHigh(r2, r3); - Unsafe.As>(ref this.V0) = Avx2.PermuteVar8x32(t0, vcontrol); - Unsafe.As>(ref this.V2) = Avx2.PermuteVar8x32(t2, vcontrol); - - Vector256 t4 = Avx.UnpackLow(r2, r3); - Vector256 t6 = Avx.UnpackHigh(r2, r3); - - Unsafe.As>(ref this.V4) = Avx2.PermuteVar8x32(t4, vcontrol); - Unsafe.As>(ref this.V6) = Avx2.PermuteVar8x32(t6, vcontrol); - } - else + Unsafe.As>(ref this.V4) = Avx2.PermuteVar8x32(t4, vcontrol); + Unsafe.As>(ref this.V6) = Avx2.PermuteVar8x32(t6, vcontrol); + } #endif - { - this.V0.X = r.A.X; - this.V0.Y = g.A.X; - this.V0.Z = b.A.X; - this.V0.W = 1f; - - this.V1.X = r.A.Y; - this.V1.Y = g.A.Y; - this.V1.Z = b.A.Y; - this.V1.W = 1f; - - this.V2.X = r.A.Z; - this.V2.Y = g.A.Z; - this.V2.Z = b.A.Z; - this.V2.W = 1f; - - this.V3.X = r.A.W; - this.V3.Y = g.A.W; - this.V3.Z = b.A.W; - this.V3.W = 1f; - - this.V4.X = r.B.X; - this.V4.Y = g.B.X; - this.V4.Z = b.B.X; - this.V4.W = 1f; - - this.V5.X = r.B.Y; - this.V5.Y = g.B.Y; - this.V5.Z = b.B.Y; - this.V5.W = 1f; - - this.V6.X = r.B.Z; - this.V6.Y = g.B.Z; - this.V6.Z = b.B.Z; - this.V6.W = 1f; - - this.V7.X = r.B.W; - this.V7.Y = g.B.W; - this.V7.Z = b.B.W; - this.V7.W = 1f; - } + + /// + /// Pack (r0,r1...r7) (g0,g1...g7) (b0,b1...b7) vector values as (r0,g0,b0,1), (r1,g1,b1,1) ... + /// + public void Pack(ref Vector4Pair r, ref Vector4Pair g, ref Vector4Pair b) + { + this.V0.X = r.A.X; + this.V0.Y = g.A.X; + this.V0.Z = b.A.X; + this.V0.W = 1f; + + this.V1.X = r.A.Y; + this.V1.Y = g.A.Y; + this.V1.Z = b.A.Y; + this.V1.W = 1f; + + this.V2.X = r.A.Z; + this.V2.Y = g.A.Z; + this.V2.Z = b.A.Z; + this.V2.W = 1f; + + this.V3.X = r.A.W; + this.V3.Y = g.A.W; + this.V3.Z = b.A.W; + this.V3.W = 1f; + + this.V4.X = r.B.X; + this.V4.Y = g.B.X; + this.V4.Z = b.B.X; + this.V4.W = 1f; + + this.V5.X = r.B.Y; + this.V5.Y = g.B.Y; + this.V5.Z = b.B.Y; + this.V5.W = 1f; + + this.V6.X = r.B.Z; + this.V6.Y = g.B.Z; + this.V6.Z = b.B.Z; + this.V6.W = 1f; + + this.V7.X = r.B.W; + this.V7.Y = g.B.W; + this.V7.Z = b.B.W; + this.V7.W = 1f; } } } diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/Vector4OctetPack.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/Vector4OctetPack.cs deleted file mode 100644 index a7ea771988..0000000000 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/Vector4OctetPack.cs +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -using System.Numerics; -using BenchmarkDotNet.Attributes; -using SixLabors.ImageSharp.Tuples; -using static SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters.JpegColorConverter; - -namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg -{ - [Config(typeof(Config.HwIntrinsics_SSE_AVX))] - public class Vector4OctetPack - { - private static Vector4Pair r = new Vector4Pair - { - A = new Vector4(1, 2, 3, 4), - B = new Vector4(5, 6, 7, 8) - }; - - private static Vector4Pair g = new Vector4Pair - { - A = new Vector4(9, 10, 11, 12), - B = new Vector4(13, 14, 15, 16) - }; - - private static Vector4Pair b = new Vector4Pair - { - A = new Vector4(17, 18, 19, 20), - B = new Vector4(21, 22, 23, 24) - }; - - [Benchmark] - public void Pack() - { - Vector4Octet v = default; - - v.Pack(ref r, ref g, ref b); - } - } -} From 50bc02764398f78ae862ec2b30363cdf3d71f52e Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 23 Oct 2020 18:09:06 +0100 Subject: [PATCH 071/104] Fix access violation --- .../Components/Decoder/ColorConverters/JpegColorConverter.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs index 4e96f3471d..b40d9b9e6e 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs @@ -204,12 +204,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters in Vector256 vcontrol) { Vector256 r0 = Avx.InsertVector128( - Unsafe.As>(ref r.A), + Unsafe.As>(ref r.A).ToVector256(), Unsafe.As>(ref g.A), 1); Vector256 r1 = Avx.InsertVector128( - Unsafe.As>(ref b.A), + Unsafe.As>(ref b.A).ToVector256(), a, 1); From 40442c24424ca43583700b5577c557f2ba21bd75 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 23 Oct 2020 18:46:00 +0100 Subject: [PATCH 072/104] Inline the packing. --- .../JpegColorConverter.FromYCbCrSimdAvx2.cs | 59 +++++++++++++------ .../ColorConverters/JpegColorConverter.cs | 53 ----------------- 2 files changed, 42 insertions(+), 70 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs index 8c34baa1dc..ca7971a074 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs @@ -59,8 +59,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters ref Vector256 crBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); - ref Vector4Octet resultBase = - ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); // Used for the color conversion var chromaOffset = Vector256.Create(-halfValue); @@ -76,14 +76,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskDeinterleave8x32); Vector256 vcontrol = Unsafe.As>(ref control); - Vector4Pair rr = default; - Vector4Pair gg = default; - Vector4Pair bb = default; - - ref Vector256 rrRefAsVector = ref Unsafe.As>(ref rr); - ref Vector256 ggRefAsVector = ref Unsafe.As>(ref gg); - ref Vector256 bbRefAsVector = ref Unsafe.As>(ref bb); - // Walking 8 elements at one step: int n = result.Length / 8; for (int i = 0; i < n; i++) @@ -107,13 +99,46 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters g = Avx.Multiply(Avx.RoundToNearestInteger(g), scale); b = Avx.Multiply(Avx.RoundToNearestInteger(b), scale); - rrRefAsVector = r; - ggRefAsVector = g; - bbRefAsVector = b; - - // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: - ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); - destination.PackAvx2(ref rr, ref gg, ref bb, in valpha, in vcontrol); + // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the + // expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: + // + // Left side. + Vector256 r0 = Avx.InsertVector128( + r, + Unsafe.As, Vector128>(ref g), + 1); + + Vector256 r1 = Avx.InsertVector128( + b, + valpha, + 1); + + // Right side + Vector256 r2 = Avx.InsertVector128( + Unsafe.Add(ref Unsafe.As, Vector128>(ref r), 1).ToVector256(), + Unsafe.Add(ref Unsafe.As, Vector128>(ref g), 1), + 1); + + Vector256 r3 = Avx.InsertVector128( + Unsafe.Add(ref Unsafe.As, Vector128>(ref b), 1).ToVector256(), + valpha, + 1); + + // Split into separate rows + Vector256 t0 = Avx.UnpackLow(r0, r1); + Vector256 t2 = Avx.UnpackHigh(r0, r1); + + // Deinterleave and set + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + destination = Avx2.PermuteVar8x32(t0, vcontrol); + Unsafe.Add(ref destination, 1) = Avx2.PermuteVar8x32(t2, vcontrol); + + // Repeat for right side. + Vector256 t4 = Avx.UnpackLow(r2, r3); + Vector256 t6 = Avx.UnpackHigh(r2, r3); + + Unsafe.Add(ref destination, 2) = Avx2.PermuteVar8x32(t4, vcontrol); + Unsafe.Add(ref destination, 3) = Avx2.PermuteVar8x32(t6, vcontrol); } #else ref Vector yBase = diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs index b40d9b9e6e..7c780700c9 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs @@ -4,12 +4,6 @@ using System; using System.Collections.Generic; using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -#if SUPPORTS_RUNTIME_INTRINSICS -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; -#endif using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.Tuples; @@ -190,53 +184,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters #pragma warning disable SA1132 // Do not combine fields public Vector4 V0, V1, V2, V3, V4, V5, V6, V7; -#if SUPPORTS_RUNTIME_INTRINSICS - - /// - /// Pack (r0,r1...r7) (g0,g1...g7) (b0,b1...b7) vector values as (r0,g0,b0,1), (r1,g1,b1,1) ... - /// - [MethodImpl(InliningOptions.ShortMethod)] - public void PackAvx2( - ref Vector4Pair r, - ref Vector4Pair g, - ref Vector4Pair b, - in Vector128 a, - in Vector256 vcontrol) - { - Vector256 r0 = Avx.InsertVector128( - Unsafe.As>(ref r.A).ToVector256(), - Unsafe.As>(ref g.A), - 1); - - Vector256 r1 = Avx.InsertVector128( - Unsafe.As>(ref b.A).ToVector256(), - a, - 1); - - Vector256 r2 = Avx.InsertVector128( - Unsafe.As>(ref r.B).ToVector256(), - Unsafe.As>(ref g.B), - 1); - - Vector256 r3 = Avx.InsertVector128( - Unsafe.As>(ref b.B).ToVector256(), - a, - 1); - - Vector256 t0 = Avx.UnpackLow(r0, r1); - Vector256 t2 = Avx.UnpackHigh(r0, r1); - - Unsafe.As>(ref this.V0) = Avx2.PermuteVar8x32(t0, vcontrol); - Unsafe.As>(ref this.V2) = Avx2.PermuteVar8x32(t2, vcontrol); - - Vector256 t4 = Avx.UnpackLow(r2, r3); - Vector256 t6 = Avx.UnpackHigh(r2, r3); - - Unsafe.As>(ref this.V4) = Avx2.PermuteVar8x32(t4, vcontrol); - Unsafe.As>(ref this.V6) = Avx2.PermuteVar8x32(t6, vcontrol); - } -#endif - /// /// Pack (r0,r1...r7) (g0,g1...g7) (b0,b1...b7) vector values as (r0,g0,b0,1), (r1,g1,b1,1) ... /// From 238564b6096f540e247cfa725e334b21a79da5ab Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sat, 24 Oct 2020 00:55:22 +0100 Subject: [PATCH 073/104] Use less permutes and more multiply/add --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 2 + .../JpegColorConverter.FromYCbCrSimdAvx2.cs | 63 ++++++------------- 2 files changed, 22 insertions(+), 43 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index c5a7f5e909..2d788992ee 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -16,6 +16,8 @@ namespace SixLabors.ImageSharp { public static ReadOnlySpan PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 }; + public static ReadOnlySpan PermuteMaskEvenOdd8x32 => new byte[] { 0, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0 }; + /// /// Performs a multiplication and an addition of the . /// diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs index ca7971a074..1319b56ee0 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs @@ -66,14 +66,13 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters var chromaOffset = Vector256.Create(-halfValue); var scale = Vector256.Create(1 / maxValue); var rCrMult = Vector256.Create(1.402F); - var gCbMult = Vector256.Create(0.344136F); - var gCrMult = Vector256.Create(0.714136F); + var gCbMult = Vector256.Create(-0.344136F); + var gCrMult = Vector256.Create(-0.714136F); var bCbMult = Vector256.Create(1.772F); // Used for packing. - Vector4 vo = Vector4.One; - Vector128 valpha = Unsafe.As>(ref vo); - ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskDeinterleave8x32); + var va = Vector256.Create(1F); + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); Vector256 vcontrol = Unsafe.As>(ref control); // Walking 8 elements at one step: @@ -87,58 +86,36 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters Vector256 cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset); Vector256 cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset); + y = Avx2.PermuteVar8x32(y, vcontrol); + cb = Avx2.PermuteVar8x32(cb, vcontrol); + cr = Avx2.PermuteVar8x32(cr, vcontrol); + // r = y + (1.402F * cr); // g = y - (0.344136F * cb) - (0.714136F * cr); // b = y + (1.772F * cb); // Adding & multiplying 8 elements at one time: Vector256 r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult); - Vector256 g = Avx.Subtract(Avx.Subtract(y, Avx.Multiply(cb, gCbMult)), Avx.Multiply(cr, gCrMult)); + Vector256 g = HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult); Vector256 b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult); + // TODO: We should be savving to RGBA not Vector4 r = Avx.Multiply(Avx.RoundToNearestInteger(r), scale); g = Avx.Multiply(Avx.RoundToNearestInteger(g), scale); b = Avx.Multiply(Avx.RoundToNearestInteger(b), scale); - // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the - // expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: - // - // Left side. - Vector256 r0 = Avx.InsertVector128( - r, - Unsafe.As, Vector128>(ref g), - 1); - - Vector256 r1 = Avx.InsertVector128( - b, - valpha, - 1); - - // Right side - Vector256 r2 = Avx.InsertVector128( - Unsafe.Add(ref Unsafe.As, Vector128>(ref r), 1).ToVector256(), - Unsafe.Add(ref Unsafe.As, Vector128>(ref g), 1), - 1); - - Vector256 r3 = Avx.InsertVector128( - Unsafe.Add(ref Unsafe.As, Vector128>(ref b), 1).ToVector256(), - valpha, - 1); - - // Split into separate rows - Vector256 t0 = Avx.UnpackLow(r0, r1); - Vector256 t2 = Avx.UnpackHigh(r0, r1); - - // Deinterleave and set + Vector256 vte = Avx.UnpackLow(r, b); + Vector256 vto = Avx.UnpackLow(g, va); + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); - destination = Avx2.PermuteVar8x32(t0, vcontrol); - Unsafe.Add(ref destination, 1) = Avx2.PermuteVar8x32(t2, vcontrol); - // Repeat for right side. - Vector256 t4 = Avx.UnpackLow(r2, r3); - Vector256 t6 = Avx.UnpackHigh(r2, r3); + destination = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto); + + vte = Avx.UnpackHigh(r, b); + vto = Avx.UnpackHigh(g, va); - Unsafe.Add(ref destination, 2) = Avx2.PermuteVar8x32(t4, vcontrol); - Unsafe.Add(ref destination, 3) = Avx2.PermuteVar8x32(t6, vcontrol); + Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto); } #else ref Vector yBase = From 893247bd882674a6cce48d505b1b34e68a3e27da Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 26 Oct 2020 17:05:50 +0000 Subject: [PATCH 074/104] Add 4 channel float shuffling. --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 80 +++++++++++ .../Common/Helpers/SimdUtils.Shuffle.cs | 131 ++++++++++++++++++ .../Color/Bulk/ShuffleFloat4Channel.cs | 68 +++++++++ .../ImageSharp.Benchmarks.csproj | 1 + .../Common/SimdUtilsTests.Shuffle.cs | 75 ++++++++++ .../ImageSharp.Tests/Common/SimdUtilsTests.cs | 14 +- .../Formats/Png/PngEncoderTests.cs | 2 +- .../FeatureTesting/FeatureTestRunner.cs | 21 +-- .../Tests/FeatureTestRunnerTests.cs | 4 +- 9 files changed, 377 insertions(+), 19 deletions(-) create mode 100644 src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs create mode 100644 tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs create mode 100644 tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 2d788992ee..aea04737d8 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -18,6 +18,86 @@ namespace SixLabors.ImageSharp public static ReadOnlySpan PermuteMaskEvenOdd8x32 => new byte[] { 0, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0 }; + /// + /// Shuffle single-precision (32-bit) floating-point elements in + /// using the control and store the results in . + /// + /// The source span of floats + /// The destination span of float + /// The byte control. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle4ChannelReduce( + ref ReadOnlySpan source, + ref Span dest, + byte control) + { + if (Avx.IsSupported || Sse.IsSupported) + { + int remainder; + if (Avx.IsSupported) + { + remainder = ImageMaths.ModuloP2(source.Length, Vector256.Count); + } + else + { + remainder = ImageMaths.ModuloP2(source.Length, Vector128.Count); + } + + int adjustedCount = source.Length - remainder; + + if (adjustedCount > 0) + { + Shuffle4Channel( + source.Slice(0, adjustedCount), + dest.Slice(0, adjustedCount), + control); + + source = source.Slice(adjustedCount); + dest = dest.Slice(adjustedCount); + } + } + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void Shuffle4Channel( + ReadOnlySpan source, + Span dest, + byte control) + { + if (Avx.IsSupported) + { + int n = dest.Length / Vector256.Count; + + ref Vector256 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector256 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + for (int i = 0; i < n; i++) + { + Unsafe.Add(ref destBase, i) = Avx.Permute(Unsafe.Add(ref sourceBase, i), control); + } + } + else + { + // Sse + int n = dest.Length / Vector128.Count; + + ref Vector128 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector128 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + for (int i = 0; i < n; i++) + { + Vector128 vs = Unsafe.Add(ref sourceBase, i); + Unsafe.Add(ref destBase, i) = Sse.Shuffle(vs, vs, control); + } + } + } + /// /// Performs a multiplication and an addition of the . /// diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs new file mode 100644 index 0000000000..fe7cbb72a5 --- /dev/null +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -0,0 +1,131 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace SixLabors.ImageSharp +{ + internal static partial class SimdUtils + { + /// + /// Shuffle single-precision (32-bit) floating-point elements in + /// using the control and store the results in . + /// + /// The source span of floats + /// The destination span of float + /// The byte control. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle4Channel( + ReadOnlySpan source, + Span dest, + byte control) + { + VerifyShuffleSpanInput(source, dest); + + // TODO: There doesn't seem to be any APIs for + // System.Numerics that allow shuffling. +#if SUPPORTS_RUNTIME_INTRINSICS + HwIntrinsics.Shuffle4ChannelReduce(ref source, ref dest, control); +#endif + + // Deal with the remainder: + if (source.Length > 0) + { + ShuffleRemainder4Channel(source, dest, control); + } + } + + [MethodImpl(InliningOptions.ColdPath)] + public static void ShuffleRemainder4Channel( + ReadOnlySpan source, + Span dest, + byte control) + { + ref float sBase = ref MemoryMarshal.GetReference(source); + ref float dBase = ref MemoryMarshal.GetReference(dest); + Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0); + + for (int i = 0; i < source.Length; i += 4) + { + Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i); + Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i); + Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i); + Unsafe.Add(ref dBase, i + 3) = Unsafe.Add(ref sBase, p3 + i); + } + } + + [Conditional("DEBUG")] + private static void VerifyShuffleSpanInput(ReadOnlySpan source, Span dest) + { + DebugGuard.IsTrue( + source.Length == dest.Length, + nameof(source), + "Input spans must be of same length!"); + + DebugGuard.IsTrue( + source.Length % 4 == 0, + nameof(source), + "Input spans must be divisiable by 4!"); + } + + public static class Shuffle + { + public const byte WXYZ = (2 << 6) | (1 << 4) | (0 << 2) | 3; + public const byte XYZW = (3 << 6) | (2 << 4) | (1 << 2) | 0; + public const byte ZYXW = (3 << 6) | (0 << 4) | (1 << 2) | 2; + + public static ReadOnlySpan WXYZ_128 => MmShuffleByte128(2, 1, 0, 3); + + public static ReadOnlySpan XYZW_128 => MmShuffleByte128(3, 2, 1, 0); + + public static ReadOnlySpan ZYXW_128 => MmShuffleByte128(3, 0, 1, 2); + + public static ReadOnlySpan WXYZ_256 => MmShuffleByte256(2, 1, 0, 3); + + public static ReadOnlySpan XYZW_256 => MmShuffleByte256(3, 2, 1, 0); + + public static ReadOnlySpan ZYXW_256 => MmShuffleByte256(3, 0, 1, 2); + + private static byte[] MmShuffleByte128(int p3, int p2, int p1, int p0) + { + byte[] result = new byte[16]; + + for (int i = 0; i < result.Length; i += 4) + { + result[i] = (byte)(p0 + i); + result[i + 1] = (byte)(p1 + i); + result[i + 2] = (byte)(p2 + i); + result[i + 3] = (byte)(p3 + i); + } + + return result; + } + + private static byte[] MmShuffleByte256(int p3, int p2, int p1, int p0) + { + byte[] result = new byte[32]; + + for (int i = 0; i < result.Length; i += 4) + { + result[i] = (byte)(p0 + i); + result[i + 1] = (byte)(p1 + i); + result[i + 2] = (byte)(p2 + i); + result[i + 3] = (byte)(p3 + i); + } + + return result; + } + + public static void InverseMmShuffle(byte control, out int p3, out int p2, out int p1, out int p0) + { + p3 = control >> 6 & 0x3; + p2 = control >> 4 & 0x3; + p1 = control >> 2 & 0x3; + p0 = control >> 0 & 0x3; + } + } + } +} diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs new file mode 100644 index 0000000000..36b9591d9d --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs @@ -0,0 +1,68 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Tests; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class ShuffleFloat4Channel + { + private float[] source; + private float[] destination; + + [GlobalSetup] + public void Setup() + { + this.source = new Random(this.Count).GenerateRandomFloatArray(this.Count, 0, 256); + this.destination = new float[this.Count]; + } + + [Params(128, 256, 512, 1024, 2048)] + public int Count { get; set; } + + [Benchmark] + public void Shuffle4Channel() + { + SimdUtils.Shuffle4Channel(this.source, this.destination, SimdUtils.Shuffle.WXYZ); + } + } + + // 2020-10-26 + // ########## + // + // BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1) + // Intel Core i7-8650U CPU 1.90GHz(Kaby Lake R), 1 CPU, 8 logical and 4 physical cores + // .NET Core SDK = 5.0.100-rc.2.20479.15 + // + // [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // + // Runtime=.NET Core 3.1 + // + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |---------------- |---------------- |-------------------------------------------------- |------ |------------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:| + // | Shuffle4Channel | AVX | Empty | 128 | 14.49 ns | 0.244 ns | 0.217 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 87.74 ns | 0.524 ns | 0.490 ns | 6.06 | 0.09 | - | - | - | - | + // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 128 | 23.65 ns | 0.101 ns | 0.094 ns | 1.63 | 0.03 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | AVX | Empty | 256 | 25.87 ns | 0.492 ns | 0.673 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 159.52 ns | 0.901 ns | 0.843 ns | 6.12 | 0.12 | - | - | - | - | + // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 256 | 45.47 ns | 0.404 ns | 0.378 ns | 1.75 | 0.03 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | AVX | Empty | 512 | 49.51 ns | 0.088 ns | 0.083 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 297.96 ns | 0.926 ns | 0.821 ns | 6.02 | 0.02 | - | - | - | - | + // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 512 | 90.77 ns | 0.191 ns | 0.169 ns | 1.83 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | AVX | Empty | 1024 | 113.09 ns | 1.913 ns | 3.090 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 604.58 ns | 1.464 ns | 1.298 ns | 5.29 | 0.18 | - | - | - | - | + // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 1024 | 179.44 ns | 0.208 ns | 0.184 ns | 1.57 | 0.05 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | AVX | Empty | 2048 | 217.95 ns | 1.314 ns | 1.165 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 1,152.04 ns | 3.941 ns | 3.494 ns | 5.29 | 0.03 | - | - | - | - | + // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 2048 | 349.52 ns | 0.587 ns | 0.520 ns | 1.60 | 0.01 | - | - | - | - | +} diff --git a/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj b/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj index eaab162ff2..4784a219b2 100644 --- a/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj +++ b/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj @@ -17,6 +17,7 @@ + diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs new file mode 100644 index 0000000000..04aab18e4e --- /dev/null +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -0,0 +1,75 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using SixLabors.ImageSharp.Tests.TestUtilities; +using Xunit; + +namespace SixLabors.ImageSharp.Tests.Common +{ + public partial class SimdUtilsTests + { + public static readonly TheoryData ShuffleControls = + new TheoryData + { + SimdUtils.Shuffle.WXYZ, + SimdUtils.Shuffle.XYZW, + SimdUtils.Shuffle.ZYXW + }; + + [Theory] + [MemberData(nameof(ShuffleControls))] + public void BulkShuffleFloat4Channel(byte control) + { + static void RunTest(string serialized) + { + byte ctrl = FeatureTestRunner.Deserialize(serialized); + foreach (var item in ArraySizesDivisibleBy4) + { + foreach (var count in item) + { + TestShuffle( + (int)count, + (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, ctrl), + ctrl); + } + } + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + control, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE); + } + + private static void TestShuffle( + int count, + Action, Memory> convert, + byte control) + { + float[] source = new Random(count).GenerateRandomFloatArray(count, 0, 256); + var result = new float[count]; + + float[] expected = new float[count]; + + SimdUtils.Shuffle.InverseMmShuffle( + control, + out int p3, + out int p2, + out int p1, + out int p0); + + for (int i = 0; i < expected.Length; i += 4) + { + expected[i] = source[p0 + i]; + expected[i + 1] = source[p1 + i]; + expected[i + 2] = source[p2 + i]; + expected[i + 3] = source[p3 + i]; + } + + convert(source, result); + + Assert.Equal(expected, result, new ApproximateFloatComparer(1e-5F)); + } + } +} diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs index 838db742a1..bddadff4da 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs @@ -13,7 +13,7 @@ using Xunit.Abstractions; namespace SixLabors.ImageSharp.Tests.Common { - public class SimdUtilsTests + public partial class SimdUtilsTests { private ITestOutputHelper Output { get; } @@ -212,14 +212,14 @@ namespace SixLabors.ImageSharp.Tests.Common static void RunTest(string serialized) { TestImpl_BulkConvertByteToNormalizedFloat( - FeatureTestRunner.Deserialize(serialized), + FeatureTestRunner.Deserialize(serialized), (s, d) => SimdUtils.HwIntrinsics.ByteToNormalizedFloat(s.Span, d.Span)); } FeatureTestRunner.RunWithHwIntrinsicsFeature( RunTest, - HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE41, - count); + count, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE41); } #endif @@ -305,14 +305,14 @@ namespace SixLabors.ImageSharp.Tests.Common static void RunTest(string serialized) { TestImpl_BulkConvertNormalizedFloatToByteClampOverflows( - FeatureTestRunner.Deserialize(serialized), + FeatureTestRunner.Deserialize(serialized), (s, d) => SimdUtils.HwIntrinsics.NormalizedFloatToByteSaturate(s.Span, d.Span)); } FeatureTestRunner.RunWithHwIntrinsicsFeature( RunTest, - HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2, - count); + count, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2); } #endif diff --git a/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs index 465bed8a16..b4670cb5d4 100644 --- a/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs @@ -535,7 +535,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png static void RunTest(string serialized) { TestImageProvider provider = - FeatureTestRunner.Deserialize>(serialized); + FeatureTestRunner.DeserializeForXunit>(serialized); foreach (PngInterlaceMode interlaceMode in InterlaceMode) { diff --git a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs index fdba9ce982..4720ea78ac 100644 --- a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs +++ b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs @@ -29,17 +29,19 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities /// The type to deserialize to. /// The string value to deserialize. /// The value. - public static T Deserialize(string value) + public static T DeserializeForXunit(string value) where T : IXunitSerializable => BasicSerializer.Deserialize(value); /// - /// Allows the deserialization of integers passed to the feature test. + /// Allows the deserialization of types implementing + /// passed to the feature test. /// /// The string value to deserialize. - /// The value. - public static int Deserialize(string value) - => Convert.ToInt32(value); + /// The value. + public static T Deserialize(string value) + where T : IConvertible + => (T)Convert.ChangeType(value, typeof(T)); /// /// Runs the given test within an environment @@ -214,12 +216,13 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities /// where the given features. /// /// The test action to run. - /// The intrinsics features. /// The value to pass as a parameter to the test action. - public static void RunWithHwIntrinsicsFeature( + /// The intrinsics features. + public static void RunWithHwIntrinsicsFeature( Action action, - HwIntrinsics intrinsics, - int serializable) + T serializable, + HwIntrinsics intrinsics) + where T : IConvertible { if (!RemoteExecutor.IsSupported) { diff --git a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs index 646000120f..4cbbefe686 100644 --- a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs +++ b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs @@ -183,7 +183,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests static void AssertHwIntrinsicsFeatureDisabled(string serializable) { Assert.NotNull(serializable); - Assert.NotNull(FeatureTestRunner.Deserialize(serializable)); + Assert.NotNull(FeatureTestRunner.DeserializeForXunit(serializable)); #if SUPPORTS_RUNTIME_INTRINSICS Assert.False(Sse.IsSupported); @@ -202,7 +202,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests static void AssertHwIntrinsicsFeatureDisabled(string serializable, string intrinsic) { Assert.NotNull(serializable); - Assert.NotNull(FeatureTestRunner.Deserialize(serializable)); + Assert.NotNull(FeatureTestRunner.DeserializeForXunit(serializable)); switch ((HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic)) { From 99d0a3111d42eb975a7a253c3c0be8c35d1ba125 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 26 Oct 2020 20:04:32 +0000 Subject: [PATCH 075/104] Add 4 channel byte shuffling --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 122 +++++++++++++++++- .../Common/Helpers/SimdUtils.Shuffle.cs | 115 ++++++++++++----- .../Color/Bulk/ShuffleByte4Channel.cs | 68 ++++++++++ .../Common/SimdUtilsTests.Shuffle.cs | 65 +++++++++- 4 files changed, 330 insertions(+), 40 deletions(-) create mode 100644 tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index aea04737d8..899ab7130b 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -22,8 +22,8 @@ namespace SixLabors.ImageSharp /// Shuffle single-precision (32-bit) floating-point elements in /// using the control and store the results in . /// - /// The source span of floats - /// The destination span of float + /// The source span of floats. + /// The destination span of floats. /// The byte control. [MethodImpl(InliningOptions.ShortMethod)] public static void Shuffle4ChannelReduce( @@ -58,6 +58,46 @@ namespace SixLabors.ImageSharp } } + /// + /// Shuffle 8-bit integers in a within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The byte control. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle4ChannelReduce( + ref ReadOnlySpan source, + ref Span dest, + byte control) + { + if (Avx2.IsSupported || Ssse3.IsSupported) + { + int remainder; + if (Avx.IsSupported) + { + remainder = ImageMaths.ModuloP2(source.Length, Vector256.Count); + } + else + { + remainder = ImageMaths.ModuloP2(source.Length, Vector128.Count); + } + + int adjustedCount = source.Length - remainder; + + if (adjustedCount > 0) + { + Shuffle4Channel( + source.Slice(0, adjustedCount), + dest.Slice(0, adjustedCount), + control); + + source = source.Slice(adjustedCount); + dest = dest.Slice(adjustedCount); + } + } + } + [MethodImpl(InliningOptions.ShortMethod)] private static void Shuffle4Channel( ReadOnlySpan source, @@ -98,6 +138,84 @@ namespace SixLabors.ImageSharp } } + [MethodImpl(InliningOptions.ShortMethod)] + private static void Shuffle4Channel( + ReadOnlySpan source, + Span dest, + byte control) + { + if (Avx2.IsSupported) + { + int n = dest.Length / Vector256.Count; + + Vector256 vcm; + switch (control) + { + case Shuffle.WXYZ: + vcm = Unsafe.As>(ref MemoryMarshal.GetReference(Shuffle.WXYZ_256)); + break; + case Shuffle.XYZW: + vcm = Unsafe.As>(ref MemoryMarshal.GetReference(Shuffle.XYZW_256)); + break; + case Shuffle.ZYXW: + vcm = Unsafe.As>(ref MemoryMarshal.GetReference(Shuffle.ZYXW_256)); + break; + default: + Span bytes = stackalloc byte[Vector256.Count]; + Shuffle.MmShuffleSpan(ref bytes, control); + vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + break; + } + + ref Vector256 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector256 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + for (int i = 0; i < n; i++) + { + Unsafe.Add(ref destBase, i) = Avx2.Shuffle(Unsafe.Add(ref sourceBase, i), vcm); + } + } + else + { + // Ssse3 + int n = dest.Length / Vector128.Count; + + Vector128 vcm; + switch (control) + { + case Shuffle.WXYZ: + vcm = Unsafe.As>(ref MemoryMarshal.GetReference(Shuffle.WXYZ_128)); + break; + case Shuffle.XYZW: + vcm = Unsafe.As>(ref MemoryMarshal.GetReference(Shuffle.XYZW_128)); + break; + case Shuffle.ZYXW: + vcm = Unsafe.As>(ref MemoryMarshal.GetReference(Shuffle.ZYXW_128)); + break; + default: + Span bytes = stackalloc byte[Vector128.Count]; + Shuffle.MmShuffleSpan(ref bytes, control); + vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + break; + } + + ref Vector128 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector128 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + for (int i = 0; i < n; i++) + { + Vector128 vs = Unsafe.Add(ref sourceBase, i); + Unsafe.Add(ref destBase, i) = Ssse3.Shuffle(vs, vcm); + } + } + } + /// /// Performs a multiplication and an addition of the . /// diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index fe7cbb72a5..76746e4d25 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -14,8 +14,8 @@ namespace SixLabors.ImageSharp /// Shuffle single-precision (32-bit) floating-point elements in /// using the control and store the results in . /// - /// The source span of floats - /// The destination span of float + /// The source span of floats. + /// The destination span of floats. /// The byte control. [MethodImpl(InliningOptions.ShortMethod)] public static void Shuffle4Channel( @@ -38,14 +38,43 @@ namespace SixLabors.ImageSharp } } + /// + /// Shuffle 8-bit integers in a within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The byte control. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle4Channel( + ReadOnlySpan source, + Span dest, + byte control) + { + VerifyShuffleSpanInput(source, dest); + + // TODO: There doesn't seem to be any APIs for + // System.Numerics that allow shuffling. +#if SUPPORTS_RUNTIME_INTRINSICS + HwIntrinsics.Shuffle4ChannelReduce(ref source, ref dest, control); +#endif + + // Deal with the remainder: + if (source.Length > 0) + { + ShuffleRemainder4Channel(source, dest, control); + } + } + [MethodImpl(InliningOptions.ColdPath)] - public static void ShuffleRemainder4Channel( - ReadOnlySpan source, - Span dest, + public static void ShuffleRemainder4Channel( + ReadOnlySpan source, + Span dest, byte control) + where T : struct { - ref float sBase = ref MemoryMarshal.GetReference(source); - ref float dBase = ref MemoryMarshal.GetReference(dest); + ref T sBase = ref MemoryMarshal.GetReference(source); + ref T dBase = ref MemoryMarshal.GetReference(dest); Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0); for (int i = 0; i < source.Length; i += 4) @@ -58,7 +87,8 @@ namespace SixLabors.ImageSharp } [Conditional("DEBUG")] - private static void VerifyShuffleSpanInput(ReadOnlySpan source, Span dest) + private static void VerifyShuffleSpanInput(ReadOnlySpan source, Span dest) + where T : struct { DebugGuard.IsTrue( source.Length == dest.Length, @@ -77,49 +107,64 @@ namespace SixLabors.ImageSharp public const byte XYZW = (3 << 6) | (2 << 4) | (1 << 2) | 0; public const byte ZYXW = (3 << 6) | (0 << 4) | (1 << 2) | 2; - public static ReadOnlySpan WXYZ_128 => MmShuffleByte128(2, 1, 0, 3); + public static ReadOnlySpan WXYZ_128 => MmShuffleSpan128(WXYZ); - public static ReadOnlySpan XYZW_128 => MmShuffleByte128(3, 2, 1, 0); + public static ReadOnlySpan XYZW_128 => MmShuffleSpan128(XYZW); - public static ReadOnlySpan ZYXW_128 => MmShuffleByte128(3, 0, 1, 2); + public static ReadOnlySpan ZYXW_128 => MmShuffleSpan128(ZYXW); - public static ReadOnlySpan WXYZ_256 => MmShuffleByte256(2, 1, 0, 3); + public static ReadOnlySpan WXYZ_256 => MmShuffleSpan256(WXYZ); - public static ReadOnlySpan XYZW_256 => MmShuffleByte256(3, 2, 1, 0); + public static ReadOnlySpan XYZW_256 => MmShuffleSpan256(XYZW); - public static ReadOnlySpan ZYXW_256 => MmShuffleByte256(3, 0, 1, 2); + public static ReadOnlySpan ZYXW_256 => MmShuffleSpan256(ZYXW); - private static byte[] MmShuffleByte128(int p3, int p2, int p1, int p0) + private static ReadOnlySpan MmShuffleSpan128(byte control) { - byte[] result = new byte[16]; - - for (int i = 0; i < result.Length; i += 4) - { - result[i] = (byte)(p0 + i); - result[i + 1] = (byte)(p1 + i); - result[i + 2] = (byte)(p2 + i); - result[i + 3] = (byte)(p3 + i); - } + Span buffer = new byte[16]; + MmShuffleSpan(ref buffer, control); + return buffer; + } - return result; + private static ReadOnlySpan MmShuffleSpan256(byte control) + { + Span buffer = new byte[32]; + MmShuffleSpan(ref buffer, control); + return buffer; } - private static byte[] MmShuffleByte256(int p3, int p2, int p1, int p0) + [MethodImpl(InliningOptions.ShortMethod)] + public static byte MmShuffle(int p3, int p2, int p1, int p0) + => (byte)((p3 << 6) | (p2 << 4) | (p1 << 2) | p0); + + [MethodImpl(InliningOptions.ShortMethod)] + public static void MmShuffleSpan(ref Span span, byte control) { - byte[] result = new byte[32]; + InverseMmShuffle( + control, + out int p3, + out int p2, + out int p1, + out int p0); - for (int i = 0; i < result.Length; i += 4) + ref byte spanBase = ref MemoryMarshal.GetReference(span); + + for (int i = 0; i < span.Length; i += 4) { - result[i] = (byte)(p0 + i); - result[i + 1] = (byte)(p1 + i); - result[i + 2] = (byte)(p2 + i); - result[i + 3] = (byte)(p3 + i); + Unsafe.Add(ref spanBase, i) = (byte)(p0 + i); + Unsafe.Add(ref spanBase, i + 1) = (byte)(p1 + i); + Unsafe.Add(ref spanBase, i + 2) = (byte)(p2 + i); + Unsafe.Add(ref spanBase, i + 3) = (byte)(p3 + i); } - - return result; } - public static void InverseMmShuffle(byte control, out int p3, out int p2, out int p1, out int p0) + [MethodImpl(InliningOptions.ShortMethod)] + public static void InverseMmShuffle( + byte control, + out int p3, + out int p2, + out int p1, + out int p0) { p3 = control >> 6 & 0x3; p2 = control >> 4 & 0x3; diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs new file mode 100644 index 0000000000..baef86099b --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs @@ -0,0 +1,68 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using BenchmarkDotNet.Attributes; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class ShuffleByte4Channel + { + private byte[] source; + private byte[] destination; + + [GlobalSetup] + public void Setup() + { + this.source = new byte[this.Count]; + new Random(this.Count).NextBytes(this.source); + this.destination = new byte[this.Count]; + } + + [Params(128, 256, 512, 1024, 2048)] + public int Count { get; set; } + + [Benchmark] + public void Shuffle4Channel() + { + SimdUtils.Shuffle4Channel(this.source, this.destination, SimdUtils.Shuffle.WXYZ); + } + } + + // 2020-10-26 + // ########## + // + // BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1) + // Intel Core i7-8650U CPU 1.90GHz(Kaby Lake R), 1 CPU, 8 logical and 4 physical cores + // .NET Core SDK = 5.0.100-rc.2.20479.15 + // + // [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // + // Runtime=.NET Core 3.1 + // + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |---------------- |---------------- |-------------------------------------------------- |------ |----------:|----------:|----------:|------:|--------:|-------:|------:|------:|----------:| + // | Shuffle4Channel | AVX | Empty | 128 | 33.57 ns | 0.694 ns | 1.268 ns | 1.00 | 0.00 | 0.0134 | - | - | 56 B | + // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 63.97 ns | 0.940 ns | 1.045 ns | 1.94 | 0.10 | - | - | - | - | + // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 128 | 27.23 ns | 0.338 ns | 0.300 ns | 0.84 | 0.04 | 0.0095 | - | - | 40 B | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | AVX | Empty | 256 | 34.57 ns | 0.295 ns | 0.276 ns | 1.00 | 0.00 | 0.0134 | - | - | 56 B | + // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 124.62 ns | 0.257 ns | 0.228 ns | 3.60 | 0.03 | - | - | - | - | + // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 256 | 32.22 ns | 0.106 ns | 0.099 ns | 0.93 | 0.01 | 0.0095 | - | - | 40 B | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | AVX | Empty | 512 | 40.41 ns | 0.826 ns | 0.848 ns | 1.00 | 0.00 | 0.0134 | - | - | 56 B | + // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 251.65 ns | 0.440 ns | 0.412 ns | 6.23 | 0.13 | - | - | - | - | + // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 512 | 41.54 ns | 0.128 ns | 0.114 ns | 1.03 | 0.02 | 0.0095 | - | - | 40 B | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | AVX | Empty | 1024 | 51.54 ns | 0.156 ns | 0.121 ns | 1.00 | 0.00 | 0.0134 | - | - | 56 B | + // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 493.66 ns | 1.316 ns | 1.231 ns | 9.58 | 0.04 | - | - | - | - | + // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 1024 | 61.45 ns | 0.216 ns | 0.181 ns | 1.19 | 0.00 | 0.0095 | - | - | 40 B | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | AVX | Empty | 2048 | 76.85 ns | 0.176 ns | 0.138 ns | 1.00 | 0.00 | 0.0134 | - | - | 56 B | + // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 985.64 ns | 11.396 ns | 10.103 ns | 12.84 | 0.15 | - | - | - | - | + // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 2048 | 106.13 ns | 0.335 ns | 0.297 ns | 1.38 | 0.01 | 0.0095 | - | - | 40 B | +} diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs index 04aab18e4e..e07bcf257f 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -14,7 +14,10 @@ namespace SixLabors.ImageSharp.Tests.Common { SimdUtils.Shuffle.WXYZ, SimdUtils.Shuffle.XYZW, - SimdUtils.Shuffle.ZYXW + SimdUtils.Shuffle.ZYXW, + SimdUtils.Shuffle.MmShuffle(2, 1, 3, 0), + SimdUtils.Shuffle.MmShuffle(1, 1, 1, 1), + SimdUtils.Shuffle.MmShuffle(3, 3, 3, 3) }; [Theory] @@ -28,7 +31,7 @@ namespace SixLabors.ImageSharp.Tests.Common { foreach (var count in item) { - TestShuffle( + TestShuffleFloat4Channel( (int)count, (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, ctrl), ctrl); @@ -42,7 +45,32 @@ namespace SixLabors.ImageSharp.Tests.Common HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE); } - private static void TestShuffle( + [Theory] + [MemberData(nameof(ShuffleControls))] + public void BulkShuffleByte4Channel(byte control) + { + static void RunTest(string serialized) + { + byte ctrl = FeatureTestRunner.Deserialize(serialized); + foreach (var item in ArraySizesDivisibleBy4) + { + foreach (var count in item) + { + TestShuffleByte4Channel( + (int)count, + (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, ctrl), + ctrl); + } + } + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + control, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE); + } + + private static void TestShuffleFloat4Channel( int count, Action, Memory> convert, byte control) @@ -71,5 +99,36 @@ namespace SixLabors.ImageSharp.Tests.Common Assert.Equal(expected, result, new ApproximateFloatComparer(1e-5F)); } + + private static void TestShuffleByte4Channel( + int count, + Action, Memory> convert, + byte control) + { + byte[] source = new byte[count]; + new Random(count).NextBytes(source); + var result = new byte[count]; + + byte[] expected = new byte[count]; + + SimdUtils.Shuffle.InverseMmShuffle( + control, + out int p3, + out int p2, + out int p1, + out int p0); + + for (int i = 0; i < expected.Length; i += 4) + { + expected[i] = source[p0 + i]; + expected[i + 1] = source[p1 + i]; + expected[i + 2] = source[p2 + i]; + expected[i + 3] = source[p3 + i]; + } + + convert(source, result); + + Assert.Equal(expected, result); + } } } From 34963a7f7a40a1950376672e09deba5e794eeb7a Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 26 Oct 2020 22:13:21 +0000 Subject: [PATCH 076/104] Don't use static spans for now. --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 45 ++++--------------- .../Common/Helpers/SimdUtils.Shuffle.cs | 26 ----------- .../Color/Bulk/ShuffleByte4Channel.cs | 42 ++++++++--------- 3 files changed, 30 insertions(+), 83 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 899ab7130b..d68e16e23b 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -148,24 +148,12 @@ namespace SixLabors.ImageSharp { int n = dest.Length / Vector256.Count; - Vector256 vcm; - switch (control) - { - case Shuffle.WXYZ: - vcm = Unsafe.As>(ref MemoryMarshal.GetReference(Shuffle.WXYZ_256)); - break; - case Shuffle.XYZW: - vcm = Unsafe.As>(ref MemoryMarshal.GetReference(Shuffle.XYZW_256)); - break; - case Shuffle.ZYXW: - vcm = Unsafe.As>(ref MemoryMarshal.GetReference(Shuffle.ZYXW_256)); - break; - default: - Span bytes = stackalloc byte[Vector256.Count]; - Shuffle.MmShuffleSpan(ref bytes, control); - vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); - break; - } + // I've chosen to do this for convenience while we determine what + // shuffle controls to add to the library. + // We can add static ROS instances if need be in the future. + Span bytes = stackalloc byte[Vector256.Count]; + Shuffle.MmShuffleSpan(ref bytes, control); + Vector256 vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); @@ -183,24 +171,9 @@ namespace SixLabors.ImageSharp // Ssse3 int n = dest.Length / Vector128.Count; - Vector128 vcm; - switch (control) - { - case Shuffle.WXYZ: - vcm = Unsafe.As>(ref MemoryMarshal.GetReference(Shuffle.WXYZ_128)); - break; - case Shuffle.XYZW: - vcm = Unsafe.As>(ref MemoryMarshal.GetReference(Shuffle.XYZW_128)); - break; - case Shuffle.ZYXW: - vcm = Unsafe.As>(ref MemoryMarshal.GetReference(Shuffle.ZYXW_128)); - break; - default: - Span bytes = stackalloc byte[Vector128.Count]; - Shuffle.MmShuffleSpan(ref bytes, control); - vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); - break; - } + Span bytes = stackalloc byte[Vector128.Count]; + Shuffle.MmShuffleSpan(ref bytes, control); + Vector128 vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); ref Vector128 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index 76746e4d25..6b766b88de 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -107,32 +107,6 @@ namespace SixLabors.ImageSharp public const byte XYZW = (3 << 6) | (2 << 4) | (1 << 2) | 0; public const byte ZYXW = (3 << 6) | (0 << 4) | (1 << 2) | 2; - public static ReadOnlySpan WXYZ_128 => MmShuffleSpan128(WXYZ); - - public static ReadOnlySpan XYZW_128 => MmShuffleSpan128(XYZW); - - public static ReadOnlySpan ZYXW_128 => MmShuffleSpan128(ZYXW); - - public static ReadOnlySpan WXYZ_256 => MmShuffleSpan256(WXYZ); - - public static ReadOnlySpan XYZW_256 => MmShuffleSpan256(XYZW); - - public static ReadOnlySpan ZYXW_256 => MmShuffleSpan256(ZYXW); - - private static ReadOnlySpan MmShuffleSpan128(byte control) - { - Span buffer = new byte[16]; - MmShuffleSpan(ref buffer, control); - return buffer; - } - - private static ReadOnlySpan MmShuffleSpan256(byte control) - { - Span buffer = new byte[32]; - MmShuffleSpan(ref buffer, control); - return buffer; - } - [MethodImpl(InliningOptions.ShortMethod)] public static byte MmShuffle(int p3, int p2, int p1, int p0) => (byte)((p3 << 6) | (p2 << 4) | (p1 << 2) | p0); diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs index baef86099b..c45b103e38 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs @@ -44,25 +44,25 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk // // Runtime=.NET Core 3.1 // - // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | - // |---------------- |---------------- |-------------------------------------------------- |------ |----------:|----------:|----------:|------:|--------:|-------:|------:|------:|----------:| - // | Shuffle4Channel | AVX | Empty | 128 | 33.57 ns | 0.694 ns | 1.268 ns | 1.00 | 0.00 | 0.0134 | - | - | 56 B | - // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 63.97 ns | 0.940 ns | 1.045 ns | 1.94 | 0.10 | - | - | - | - | - // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 128 | 27.23 ns | 0.338 ns | 0.300 ns | 0.84 | 0.04 | 0.0095 | - | - | 40 B | - // | | | | | | | | | | | | | | - // | Shuffle4Channel | AVX | Empty | 256 | 34.57 ns | 0.295 ns | 0.276 ns | 1.00 | 0.00 | 0.0134 | - | - | 56 B | - // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 124.62 ns | 0.257 ns | 0.228 ns | 3.60 | 0.03 | - | - | - | - | - // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 256 | 32.22 ns | 0.106 ns | 0.099 ns | 0.93 | 0.01 | 0.0095 | - | - | 40 B | - // | | | | | | | | | | | | | | - // | Shuffle4Channel | AVX | Empty | 512 | 40.41 ns | 0.826 ns | 0.848 ns | 1.00 | 0.00 | 0.0134 | - | - | 56 B | - // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 251.65 ns | 0.440 ns | 0.412 ns | 6.23 | 0.13 | - | - | - | - | - // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 512 | 41.54 ns | 0.128 ns | 0.114 ns | 1.03 | 0.02 | 0.0095 | - | - | 40 B | - // | | | | | | | | | | | | | | - // | Shuffle4Channel | AVX | Empty | 1024 | 51.54 ns | 0.156 ns | 0.121 ns | 1.00 | 0.00 | 0.0134 | - | - | 56 B | - // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 493.66 ns | 1.316 ns | 1.231 ns | 9.58 | 0.04 | - | - | - | - | - // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 1024 | 61.45 ns | 0.216 ns | 0.181 ns | 1.19 | 0.00 | 0.0095 | - | - | 40 B | - // | | | | | | | | | | | | | | - // | Shuffle4Channel | AVX | Empty | 2048 | 76.85 ns | 0.176 ns | 0.138 ns | 1.00 | 0.00 | 0.0134 | - | - | 56 B | - // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 985.64 ns | 11.396 ns | 10.103 ns | 12.84 | 0.15 | - | - | - | - | - // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 2048 | 106.13 ns | 0.335 ns | 0.297 ns | 1.38 | 0.01 | 0.0095 | - | - | 40 B | + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |---------------- |---------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:| + // | Shuffle4Channel | AVX | Empty | 128 | 20.51 ns | 0.270 ns | 0.211 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 63.00 ns | 0.991 ns | 0.927 ns | 3.08 | 0.06 | - | - | - | - | + // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 128 | 17.25 ns | 0.066 ns | 0.058 ns | 0.84 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | AVX | Empty | 256 | 24.57 ns | 0.248 ns | 0.219 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 124.55 ns | 2.501 ns | 2.456 ns | 5.06 | 0.10 | - | - | - | - | + // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 256 | 21.80 ns | 0.094 ns | 0.088 ns | 0.89 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | AVX | Empty | 512 | 28.51 ns | 0.130 ns | 0.115 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 256.52 ns | 1.424 ns | 1.332 ns | 9.00 | 0.07 | - | - | - | - | + // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 512 | 29.72 ns | 0.217 ns | 0.203 ns | 1.04 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | AVX | Empty | 1024 | 36.40 ns | 0.357 ns | 0.334 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 492.71 ns | 1.498 ns | 1.251 ns | 13.52 | 0.12 | - | - | - | - | + // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 1024 | 44.71 ns | 0.264 ns | 0.234 ns | 1.23 | 0.02 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | AVX | Empty | 2048 | 59.38 ns | 0.180 ns | 0.159 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 975.05 ns | 2.043 ns | 1.811 ns | 16.42 | 0.05 | - | - | - | - | + // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 2048 | 81.83 ns | 0.212 ns | 0.198 ns | 1.38 | 0.01 | - | - | - | - | } From 84a1d1a28bbc5fff9da861b129ddbcccadb33b8b Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Tue, 27 Oct 2020 19:32:32 +0000 Subject: [PATCH 077/104] Add optimized fallback for existing shuffles. --- .../Common/Helpers/SimdUtils.Shuffle.cs | 133 +++++++++++++++++- .../Common/SimdUtilsTests.Shuffle.cs | 2 + 2 files changed, 129 insertions(+), 6 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index 6b766b88de..4d2678320b 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -2,6 +2,7 @@ // Licensed under the Apache License, Version 2.0. using System; +using System.Buffers.Binary; using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -67,14 +68,53 @@ namespace SixLabors.ImageSharp } [MethodImpl(InliningOptions.ColdPath)] - public static void ShuffleRemainder4Channel( - ReadOnlySpan source, - Span dest, + public static void ShuffleRemainder4Channel( + ReadOnlySpan source, + Span dest, + byte control) + { + ref float sBase = ref MemoryMarshal.GetReference(source); + ref float dBase = ref MemoryMarshal.GetReference(dest); + Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0); + + for (int i = 0; i < source.Length; i += 4) + { + Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i); + Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i); + Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i); + Unsafe.Add(ref dBase, i + 3) = Unsafe.Add(ref sBase, p3 + i); + } + } + + [MethodImpl(InliningOptions.ColdPath)] + public static void ShuffleRemainder4Channel( + ReadOnlySpan source, + Span dest, byte control) - where T : struct { - ref T sBase = ref MemoryMarshal.GetReference(source); - ref T dBase = ref MemoryMarshal.GetReference(dest); +#if NETCOREAPP + // The JIT can detect and optimize rotation idioms ROTL (Rotate Left) + // and ROTR (Rotate Right) emitting efficient CPU instructions: + // https://github.com/dotnet/coreclr/pull/1830 + switch (control) + { + case Shuffle.WXYZ: + WXYZ(source, dest); + return; + case Shuffle.WZYX: + WZYX(source, dest); + return; + case Shuffle.YZWX: + YZWX(source, dest); + return; + case Shuffle.ZYXW: + ZYXW(source, dest); + return; + } +#endif + + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0); for (int i = 0; i < source.Length; i += 4) @@ -86,6 +126,85 @@ namespace SixLabors.ImageSharp } } + [MethodImpl(InliningOptions.ShortMethod)] + private static void WXYZ(ReadOnlySpan source, Span dest) + { + ReadOnlySpan s = MemoryMarshal.Cast(source); + Span d = MemoryMarshal.Cast(dest); + ref uint sBase = ref MemoryMarshal.GetReference(s); + ref uint dBase = ref MemoryMarshal.GetReference(d); + + for (int i = 0; i < s.Length; i++) + { + uint packed = Unsafe.Add(ref sBase, i); + + // packed = [W Z Y X] + // ROTL(8, packed) = [Z Y X W] + Unsafe.Add(ref dBase, i) = (packed << 8) | (packed >> 24); + } + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void ZYXW(ReadOnlySpan source, Span dest) + { + ReadOnlySpan s = MemoryMarshal.Cast(source); + Span d = MemoryMarshal.Cast(dest); + ref uint sBase = ref MemoryMarshal.GetReference(s); + ref uint dBase = ref MemoryMarshal.GetReference(d); + + for (int i = 0; i < s.Length; i++) + { + uint packed = Unsafe.Add(ref sBase, i); + + // packed = [W Z Y X] + // tmp1 = [W 0 Y 0] + // tmp2 = [0 Z 0 X] + // tmp3=ROTL(16, tmp2) = [0 X 0 Z] + // tmp1 + tmp3 = [W X Y Z] + uint tmp1 = packed & 0xFF00FF00; + uint tmp2 = packed & 0x00FF00FF; + uint tmp3 = (tmp2 << 16) | (tmp2 >> 16); + + Unsafe.Add(ref dBase, i) = tmp1 + tmp3; + } + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void WZYX(ReadOnlySpan source, Span dest) + { + ReadOnlySpan s = MemoryMarshal.Cast(source); + Span d = MemoryMarshal.Cast(dest); + ref uint sBase = ref MemoryMarshal.GetReference(s); + ref uint dBase = ref MemoryMarshal.GetReference(d); + + for (int i = 0; i < s.Length; i++) + { + uint packed = Unsafe.Add(ref sBase, i); + + // packed = [W Z Y X] + // REVERSE(packedArgb) = [X Y Z W] + Unsafe.Add(ref dBase, i) = BinaryPrimitives.ReverseEndianness(packed); + } + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void YZWX(ReadOnlySpan source, Span dest) + { + ReadOnlySpan s = MemoryMarshal.Cast(source); + Span d = MemoryMarshal.Cast(dest); + ref uint sBase = ref MemoryMarshal.GetReference(s); + ref uint dBase = ref MemoryMarshal.GetReference(d); + + for (int i = 0; i < s.Length; i++) + { + uint packed = Unsafe.Add(ref sBase, i); + + // packed = [W Z Y X] + // ROTR(8, packedArgb) = [Y Z W X] + Unsafe.Add(ref dBase, i) = (packed >> 8) | (packed << 24); + } + } + [Conditional("DEBUG")] private static void VerifyShuffleSpanInput(ReadOnlySpan source, Span dest) where T : struct @@ -104,7 +223,9 @@ namespace SixLabors.ImageSharp public static class Shuffle { public const byte WXYZ = (2 << 6) | (1 << 4) | (0 << 2) | 3; + public const byte WZYX = (0 << 6) | (1 << 4) | (2 << 2) | 3; public const byte XYZW = (3 << 6) | (2 << 4) | (1 << 2) | 0; + public const byte YZWX = (0 << 6) | (3 << 4) | (2 << 2) | 1; public const byte ZYXW = (3 << 6) | (0 << 4) | (1 << 2) | 2; [MethodImpl(InliningOptions.ShortMethod)] diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs index e07bcf257f..cdb6a86dfc 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -13,7 +13,9 @@ namespace SixLabors.ImageSharp.Tests.Common new TheoryData { SimdUtils.Shuffle.WXYZ, + SimdUtils.Shuffle.WZYX, SimdUtils.Shuffle.XYZW, + SimdUtils.Shuffle.YZWX, SimdUtils.Shuffle.ZYXW, SimdUtils.Shuffle.MmShuffle(2, 1, 3, 0), SimdUtils.Shuffle.MmShuffle(1, 1, 1, 1), From 956d1a3c77c2a898f67dac2cccc04f526aa17f40 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 28 Oct 2020 00:35:10 +0000 Subject: [PATCH 078/104] Unroll loops --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 105 +++++++++++++++--- .../Common/Helpers/SimdUtils.Shuffle.cs | 2 +- 2 files changed, 88 insertions(+), 19 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index d68e16e23b..0ea17c7706 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -106,34 +106,72 @@ namespace SixLabors.ImageSharp { if (Avx.IsSupported) { - int n = dest.Length / Vector256.Count; - ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref Vector256 destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); - for (int i = 0; i < n; i++) + int n = dest.Length / Vector256.Count; + int m = ImageMaths.Modulo4(n); + int u = n - m; + + for (int i = 0; i < u; i += 4) + { + ref Vector256 vd0 = ref Unsafe.Add(ref destBase, i); + ref Vector256 vs0 = ref Unsafe.Add(ref sourceBase, i); + + vd0 = Avx.Permute(vs0, control); + Unsafe.Add(ref vd0, 1) = Avx.Permute(Unsafe.Add(ref vs0, 1), control); + Unsafe.Add(ref vd0, 2) = Avx.Permute(Unsafe.Add(ref vs0, 2), control); + Unsafe.Add(ref vd0, 3) = Avx.Permute(Unsafe.Add(ref vs0, 3), control); + } + + if (m > 0) { - Unsafe.Add(ref destBase, i) = Avx.Permute(Unsafe.Add(ref sourceBase, i), control); + for (int i = u; i < n; i++) + { + Unsafe.Add(ref destBase, i) = Avx.Permute(Unsafe.Add(ref sourceBase, i), control); + } } } else { // Sse - int n = dest.Length / Vector128.Count; - ref Vector128 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref Vector128 destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); - for (int i = 0; i < n; i++) + int n = dest.Length / Vector128.Count; + int m = ImageMaths.Modulo4(n); + int u = n - m; + + for (int i = 0; i < u; i += 4) { - Vector128 vs = Unsafe.Add(ref sourceBase, i); - Unsafe.Add(ref destBase, i) = Sse.Shuffle(vs, vs, control); + ref Vector128 vd0 = ref Unsafe.Add(ref destBase, i); + ref Vector128 vs0 = ref Unsafe.Add(ref sourceBase, i); + + vd0 = Sse.Shuffle(vs0, vs0, control); + + Vector128 vs1 = Unsafe.Add(ref vs0, 1); + Unsafe.Add(ref vd0, 1) = Sse.Shuffle(vs1, vs1, control); + + Vector128 vs2 = Unsafe.Add(ref vs0, 2); + Unsafe.Add(ref vd0, 2) = Sse.Shuffle(vs2, vs2, control); + + Vector128 vs3 = Unsafe.Add(ref vs0, 3); + Unsafe.Add(ref vd0, 3) = Sse.Shuffle(vs3, vs3, control); + } + + if (m > 0) + { + for (int i = u; i < n; i++) + { + Vector128 vs = Unsafe.Add(ref sourceBase, i); + Unsafe.Add(ref destBase, i) = Sse.Shuffle(vs, vs, control); + } } } } @@ -146,8 +184,6 @@ namespace SixLabors.ImageSharp { if (Avx2.IsSupported) { - int n = dest.Length / Vector256.Count; - // I've chosen to do this for convenience while we determine what // shuffle controls to add to the library. // We can add static ROS instances if need be in the future. @@ -161,16 +197,32 @@ namespace SixLabors.ImageSharp ref Vector256 destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); - for (int i = 0; i < n; i++) + int n = dest.Length / Vector256.Count; + int m = ImageMaths.Modulo4(n); + int u = n - m; + + for (int i = 0; i < u; i += 4) { - Unsafe.Add(ref destBase, i) = Avx2.Shuffle(Unsafe.Add(ref sourceBase, i), vcm); + ref Vector256 vs0 = ref Unsafe.Add(ref sourceBase, i); + ref Vector256 vd0 = ref Unsafe.Add(ref destBase, i); + + vd0 = Avx2.Shuffle(vs0, vcm); + Unsafe.Add(ref vd0, 1) = Avx2.Shuffle(Unsafe.Add(ref vs0, 1), vcm); + Unsafe.Add(ref vd0, 2) = Avx2.Shuffle(Unsafe.Add(ref vs0, 2), vcm); + Unsafe.Add(ref vd0, 3) = Avx2.Shuffle(Unsafe.Add(ref vs0, 3), vcm); + } + + if (m > 0) + { + for (int i = u; i < n; i++) + { + Unsafe.Add(ref destBase, i) = Avx2.Shuffle(Unsafe.Add(ref sourceBase, i), vcm); + } } } else { // Ssse3 - int n = dest.Length / Vector128.Count; - Span bytes = stackalloc byte[Vector128.Count]; Shuffle.MmShuffleSpan(ref bytes, control); Vector128 vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); @@ -181,10 +233,27 @@ namespace SixLabors.ImageSharp ref Vector128 destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); - for (int i = 0; i < n; i++) + int n = dest.Length / Vector128.Count; + int m = ImageMaths.Modulo4(n); + int u = n - m; + + for (int i = 0; i < u; i += 4) { - Vector128 vs = Unsafe.Add(ref sourceBase, i); - Unsafe.Add(ref destBase, i) = Ssse3.Shuffle(vs, vcm); + ref Vector128 vs0 = ref Unsafe.Add(ref sourceBase, i); + ref Vector128 vd0 = ref Unsafe.Add(ref destBase, i); + + vd0 = Ssse3.Shuffle(vs0, vcm); + Unsafe.Add(ref vd0, 1) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 1), vcm); + Unsafe.Add(ref vd0, 2) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 2), vcm); + Unsafe.Add(ref vd0, 3) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 3), vcm); + } + + if (m > 0) + { + for (int i = u; i < n; i++) + { + Unsafe.Add(ref destBase, i) = Ssse3.Shuffle(Unsafe.Add(ref sourceBase, i), vcm); + } } } } diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index 4d2678320b..59b6254192 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -229,7 +229,7 @@ namespace SixLabors.ImageSharp public const byte ZYXW = (3 << 6) | (0 << 4) | (1 << 2) | 2; [MethodImpl(InliningOptions.ShortMethod)] - public static byte MmShuffle(int p3, int p2, int p1, int p0) + public static byte MmShuffle(byte p3, byte p2, byte p1, byte p0) => (byte)((p3 << 6) | (p2 << 4) | (p1 << 2) | p0); [MethodImpl(InliningOptions.ShortMethod)] From 28dc056d831adcec0d47a332bf638abf5f3ff1f3 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 28 Oct 2020 01:08:53 +0000 Subject: [PATCH 079/104] Fix coverage --- src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs | 2 +- tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 0ea17c7706..367df03ec3 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -74,7 +74,7 @@ namespace SixLabors.ImageSharp if (Avx2.IsSupported || Ssse3.IsSupported) { int remainder; - if (Avx.IsSupported) + if (Avx2.IsSupported) { remainder = ImageMaths.ModuloP2(source.Length, Vector256.Count); } diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs index cdb6a86dfc..94298f94ca 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -44,7 +44,7 @@ namespace SixLabors.ImageSharp.Tests.Common FeatureTestRunner.RunWithHwIntrinsicsFeature( RunTest, control, - HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE); + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE); } [Theory] From 0f950a1e508b1db74cd8f757e164722ac1e0796a Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 28 Oct 2020 19:33:49 +0000 Subject: [PATCH 080/104] Implement new optimized 4 channel shuffle methods. --- .../Argb32.PixelOperations.Generated.cs | 64 ++++++------- .../Bgra32.PixelOperations.Generated.cs | 64 ++++++------- .../Rgba32.PixelOperations.Generated.cs | 64 ++++++------- .../Generated/_Common.ttinclude | 32 +++---- .../PixelFormats/Utils/PixelConverter.cs | 89 +++++++------------ .../PixelConversion_ConvertFromRgba32.cs | 56 ++++-------- ...ConverterTests.ReferenceImplementations.cs | 60 ++++++++----- .../PixelFormats/PixelConverterTests.cs | 67 +++++++------- 8 files changed, 216 insertions(+), 280 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Argb32.PixelOperations.Generated.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Argb32.PixelOperations.Generated.cs index 0b1292b641..3f48d2acca 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Argb32.PixelOperations.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Argb32.PixelOperations.Generated.cs @@ -53,66 +53,58 @@ namespace SixLabors.ImageSharp.PixelFormats Vector4Converters.RgbaCompatible.ToVector4(configuration, this, sourcePixels, destVectors, modifiers.Remove(PixelConversionModifiers.Scale)); } /// - public override void ToRgba32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromArgb32.ToRgba32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToRgba32(source, dest); } /// - public override void FromRgba32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromRgba32.ToArgb32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToArgb32(source, dest); } /// - public override void ToBgra32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromArgb32.ToBgra32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToBgra32(source, dest); } /// - public override void FromBgra32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromBgra32.ToArgb32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToArgb32(source, dest); } /// diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgra32.PixelOperations.Generated.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgra32.PixelOperations.Generated.cs index 5bdd10404d..8cf2d5850a 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgra32.PixelOperations.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgra32.PixelOperations.Generated.cs @@ -53,66 +53,58 @@ namespace SixLabors.ImageSharp.PixelFormats Vector4Converters.RgbaCompatible.ToVector4(configuration, this, sourcePixels, destVectors, modifiers.Remove(PixelConversionModifiers.Scale)); } /// - public override void ToRgba32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromBgra32.ToRgba32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToRgba32(source, dest); } /// - public override void FromRgba32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromRgba32.ToBgra32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToBgra32(source, dest); } /// - public override void ToArgb32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromBgra32.ToArgb32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToArgb32(source, dest); } /// - public override void FromArgb32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromArgb32.ToBgra32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToBgra32(source, dest); } /// diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgba32.PixelOperations.Generated.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgba32.PixelOperations.Generated.cs index b05c62f1f7..9a36ec29a4 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgba32.PixelOperations.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgba32.PixelOperations.Generated.cs @@ -42,66 +42,58 @@ namespace SixLabors.ImageSharp.PixelFormats } /// - public override void ToArgb32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromRgba32.ToArgb32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToArgb32(source, dest); } /// - public override void FromArgb32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromArgb32.ToRgba32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToRgba32(source, dest); } /// - public override void ToBgra32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromRgba32.ToBgra32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToBgra32(source, dest); } /// - public override void FromBgra32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromBgra32.ToRgba32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToRgba32(source, dest); } /// diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/_Common.ttinclude b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/_Common.ttinclude index 5d56731ba6..d8b5286cd7 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/_Common.ttinclude +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/_Common.ttinclude @@ -88,35 +88,31 @@ using System.Runtime.InteropServices; { #> /// - public override void To<#=otherPixelType#>(Configuration configuration, ReadOnlySpan<<#=thisPixelType#>> sourcePixels, Span<<#=otherPixelType#>> destinationPixels) + public override void To<#=otherPixelType#>( + Configuration configuration, + ReadOnlySpan<<#=thisPixelType#>> sourcePixels, + Span<<#=otherPixelType#>> destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As<<#=thisPixelType#>,uint>(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As<<#=otherPixelType#>, uint>(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.From<#=thisPixelType#>.To<#=otherPixelType#>(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast<<#=thisPixelType#>, byte>(sourcePixels); + Span dest = MemoryMarshal.Cast<<#=otherPixelType#>, byte>(destinationPixels); + PixelConverter.From<#=thisPixelType#>.To<#=otherPixelType#>(source, dest); } /// - public override void From<#=otherPixelType#>(Configuration configuration, ReadOnlySpan<<#=otherPixelType#>> sourcePixels, Span<<#=thisPixelType#>> destinationPixels) + public override void From<#=otherPixelType#>( + Configuration configuration, + ReadOnlySpan<<#=otherPixelType#>> sourcePixels, + Span<<#=thisPixelType#>> destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As<<#=otherPixelType#>,uint>(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As<<#=thisPixelType#>, uint>(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.From<#=otherPixelType#>.To<#=thisPixelType#>(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast<<#=otherPixelType#>, byte>(sourcePixels); + Span dest = MemoryMarshal.Cast<<#=thisPixelType#>, byte>(destinationPixels); + PixelConverter.From<#=otherPixelType#>.To<#=thisPixelType#>(source, dest); } <#+ } diff --git a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs index 8142640848..bc24258c91 100644 --- a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs +++ b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs @@ -1,6 +1,7 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. +using System; using System.Buffers.Binary; using System.Runtime.CompilerServices; @@ -21,88 +22,64 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils public static class FromRgba32 { /// - /// Converts a packed to . + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. /// [MethodImpl(InliningOptions.ShortMethod)] - public static uint ToArgb32(uint packedRgba) - { - // packedRgba = [aa bb gg rr] - // ROTL(8, packedRgba) = [bb gg rr aa] - return (packedRgba << 8) | (packedRgba >> 24); - } + public static void ToArgb32(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.WXYZ); /// - /// Converts a packed to . + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. /// [MethodImpl(InliningOptions.ShortMethod)] - public static uint ToBgra32(uint packedRgba) - { - // packedRgba = [aa bb gg rr] - // tmp1 = [aa 00 gg 00] - // tmp2 = [00 bb 00 rr] - // tmp3=ROTL(16, tmp2) = [00 rr 00 bb] - // tmp1 + tmp3 = [aa rr gg bb] - uint tmp1 = packedRgba & 0xFF00FF00; - uint tmp2 = packedRgba & 0x00FF00FF; - uint tmp3 = (tmp2 << 16) | (tmp2 >> 16); - return tmp1 + tmp3; - } + public static void ToBgra32(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.ZYXW); } public static class FromArgb32 { /// - /// Converts a packed to . + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. /// [MethodImpl(InliningOptions.ShortMethod)] - public static uint ToRgba32(uint packedArgb) - { - // packedArgb = [bb gg rr aa] - // ROTR(8, packedArgb) = [aa bb gg rr] - return (packedArgb >> 8) | (packedArgb << 24); - } + public static void ToRgba32(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.YZWX); /// - /// Converts a packed to . + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. /// [MethodImpl(InliningOptions.ShortMethod)] - public static uint ToBgra32(uint packedArgb) - { - // packedArgb = [bb gg rr aa] - // REVERSE(packedArgb) = [aa rr gg bb] - return BinaryPrimitives.ReverseEndianness(packedArgb); - } + public static void ToBgra32(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.WZYX); } public static class FromBgra32 { /// - /// Converts a packed to . + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. /// [MethodImpl(InliningOptions.ShortMethod)] - public static uint ToArgb32(uint packedBgra) - { - // packedBgra = [aa rr gg bb] - // REVERSE(packedBgra) = [bb gg rr aa] - return BinaryPrimitives.ReverseEndianness(packedBgra); - } + public static void ToArgb32(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.WZYX); /// - /// Converts a packed to . + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. /// [MethodImpl(InliningOptions.ShortMethod)] - public static uint ToRgba32(uint packedBgra) - { - // packedRgba = [aa rr gg bb] - // tmp1 = [aa 00 gg 00] - // tmp2 = [00 rr 00 bb] - // tmp3=ROTL(16, tmp2) = [00 bb 00 rr] - // tmp1 + tmp3 = [aa bb gg rr] - uint tmp1 = packedBgra & 0xFF00FF00; - uint tmp2 = packedBgra & 0x00FF00FF; - uint tmp3 = (tmp2 << 16) | (tmp2 >> 16); - return tmp1 + tmp3; - } + public static void ToRgba32(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.ZYXW); } } -} \ No newline at end of file +} diff --git a/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_ConvertFromRgba32.cs b/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_ConvertFromRgba32.cs index 7d6c2efedf..a933f890fc 100644 --- a/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_ConvertFromRgba32.cs +++ b/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_ConvertFromRgba32.cs @@ -168,49 +168,27 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion [Benchmark] public void PixelConverter_Rgba32_ToArgb32() { - ref uint sBase = ref Unsafe.As(ref this.PermutedRunnerRgbaToArgb.Source[0]); - ref uint dBase = ref Unsafe.As(ref this.PermutedRunnerRgbaToArgb.Dest[0]); + Span source = MemoryMarshal.Cast(this.PermutedRunnerRgbaToArgb.Source); + Span dest = MemoryMarshal.Cast(this.PermutedRunnerRgbaToArgb.Dest); - for (int i = 0; i < this.Count; i++) - { - uint s = Unsafe.Add(ref sBase, i); - Unsafe.Add(ref dBase, i) = PixelConverter.FromRgba32.ToArgb32(s); - } - } - - [Benchmark] - public void PixelConverter_Rgba32_ToArgb32_CopyThenWorkOnSingleBuffer() - { - Span source = MemoryMarshal.Cast(this.PermutedRunnerRgbaToArgb.Source); - Span dest = MemoryMarshal.Cast(this.PermutedRunnerRgbaToArgb.Dest); - source.CopyTo(dest); - - ref uint dBase = ref MemoryMarshal.GetReference(dest); - - for (int i = 0; i < this.Count; i++) - { - uint s = Unsafe.Add(ref dBase, i); - Unsafe.Add(ref dBase, i) = PixelConverter.FromRgba32.ToArgb32(s); - } + PixelConverter.FromRgba32.ToArgb32(source, dest); } /* RESULTS: - Method | Count | Mean | Error | StdDev | Scaled | ScaledSD | - ---------------------------------------------------------- |------ |-----------:|-----------:|-----------:|-------:|---------:| - ByRef | 256 | 328.7 ns | 6.6141 ns | 6.1868 ns | 1.00 | 0.00 | - ByVal | 256 | 322.0 ns | 4.3541 ns | 4.0728 ns | 0.98 | 0.02 | - FromBytes | 256 | 321.5 ns | 3.3499 ns | 3.1335 ns | 0.98 | 0.02 | - InlineShuffle | 256 | 330.7 ns | 4.2525 ns | 3.9778 ns | 1.01 | 0.02 | - PixelConverter_Rgba32_ToArgb32 | 256 | 167.4 ns | 0.6357 ns | 0.5309 ns | 0.51 | 0.01 | - PixelConverter_Rgba32_ToArgb32_CopyThenWorkOnSingleBuffer | 256 | 196.6 ns | 0.8929 ns | 0.7915 ns | 0.60 | 0.01 | - | | | | | | | - ByRef | 2048 | 2,534.4 ns | 8.2947 ns | 6.9265 ns | 1.00 | 0.00 | - ByVal | 2048 | 2,638.5 ns | 52.6843 ns | 70.3320 ns | 1.04 | 0.03 | - FromBytes | 2048 | 2,517.2 ns | 40.8055 ns | 38.1695 ns | 0.99 | 0.01 | - InlineShuffle | 2048 | 2,546.5 ns | 21.2506 ns | 19.8778 ns | 1.00 | 0.01 | - PixelConverter_Rgba32_ToArgb32 | 2048 | 1,265.7 ns | 5.1397 ns | 4.5562 ns | 0.50 | 0.00 | - PixelConverter_Rgba32_ToArgb32_CopyThenWorkOnSingleBuffer | 2048 | 1,410.3 ns | 11.1939 ns | 9.9231 ns | 0.56 | 0.00 | - */ + | Method | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | + |------------------------------- |------ |------------:|----------:|----------:|------------:|------:|--------:| + | ByRef | 256 | 288.84 ns | 19.601 ns | 52.319 ns | 268.10 ns | 1.00 | 0.00 | + | ByVal | 256 | 267.97 ns | 1.831 ns | 1.713 ns | 267.85 ns | 0.77 | 0.18 | + | FromBytes | 256 | 266.81 ns | 2.427 ns | 2.270 ns | 266.47 ns | 0.76 | 0.18 | + | InlineShuffle | 256 | 291.41 ns | 5.820 ns | 5.444 ns | 290.17 ns | 0.83 | 0.19 | + | PixelConverter_Rgba32_ToArgb32 | 256 | 38.62 ns | 0.431 ns | 0.403 ns | 38.68 ns | 0.11 | 0.03 | + | | | | | | | | | + | ByRef | 2048 | 2,197.69 ns | 15.826 ns | 14.804 ns | 2,197.25 ns | 1.00 | 0.00 | + | ByVal | 2048 | 2,226.81 ns | 44.266 ns | 62.054 ns | 2,197.17 ns | 1.03 | 0.04 | + | FromBytes | 2048 | 2,181.35 ns | 18.033 ns | 16.868 ns | 2,185.97 ns | 0.99 | 0.01 | + | InlineShuffle | 2048 | 2,233.10 ns | 27.673 ns | 24.531 ns | 2,229.78 ns | 1.02 | 0.01 | + | PixelConverter_Rgba32_ToArgb32 | 2048 | 139.90 ns | 2.152 ns | 3.825 ns | 138.70 ns | 0.06 | 0.00 | + */ } } diff --git a/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.ReferenceImplementations.cs b/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.ReferenceImplementations.cs index 6fda9dbbad..9d0d09a983 100644 --- a/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.ReferenceImplementations.cs +++ b/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.ReferenceImplementations.cs @@ -13,34 +13,49 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats { public static class ReferenceImplementations { - public static Rgba32 MakeRgba32(byte r, byte g, byte b, byte a) + public static byte[] MakeRgba32ByteArray(byte r, byte g, byte b, byte a) { - Rgba32 d = default; - d.R = r; - d.G = g; - d.B = b; - d.A = a; - return d; + var buffer = new byte[256]; + + for (int i = 0; i < buffer.Length; i += 4) + { + buffer[i] = r; + buffer[i + 1] = g; + buffer[i + 2] = b; + buffer[i + 3] = a; + } + + return buffer; } - public static Argb32 MakeArgb32(byte r, byte g, byte b, byte a) + public static byte[] MakeArgb32ByteArray(byte r, byte g, byte b, byte a) { - Argb32 d = default; - d.R = r; - d.G = g; - d.B = b; - d.A = a; - return d; + var buffer = new byte[256]; + + for (int i = 0; i < buffer.Length; i += 4) + { + buffer[i] = a; + buffer[i + 1] = r; + buffer[i + 2] = g; + buffer[i + 3] = b; + } + + return buffer; } - public static Bgra32 MakeBgra32(byte r, byte g, byte b, byte a) + public static byte[] MakeBgra32ByteArray(byte r, byte g, byte b, byte a) { - Bgra32 d = default; - d.R = r; - d.G = g; - d.B = b; - d.A = a; - return d; + var buffer = new byte[256]; + + for (int i = 0; i < buffer.Length; i += 4) + { + buffer[i] = b; + buffer[i + 1] = g; + buffer[i + 2] = r; + buffer[i + 3] = a; + } + + return buffer; } internal static void To( @@ -83,8 +98,7 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats if (typeof(TDestinationPixel) == typeof(L8)) { - ref L8 l8Ref = ref MemoryMarshal.GetReference( - MemoryMarshal.Cast(destinationPixels)); + ref L8 l8Ref = ref MemoryMarshal.GetReference(MemoryMarshal.Cast(destinationPixels)); for (int i = 0; i < count; i++) { ref TSourcePixel sp = ref Unsafe.Add(ref sourceRef, i); diff --git a/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.cs b/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.cs index 3de6804dcf..6eed875f38 100644 --- a/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.cs +++ b/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.cs @@ -1,6 +1,7 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. +using System; using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.PixelFormats.Utils; @@ -33,30 +34,28 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats [MemberData(nameof(RgbaData))] public void ToArgb32(byte r, byte g, byte b, byte a) { - Rgba32 s = ReferenceImplementations.MakeRgba32(r, g, b, a); + byte[] source = ReferenceImplementations.MakeRgba32ByteArray(r, g, b, a); + var actual = new byte[source.Length]; - // Act: - uint actualPacked = PixelConverter.FromRgba32.ToArgb32(s.PackedValue); + PixelConverter.FromRgba32.ToArgb32(source, actual); - // Assert: - uint expectedPacked = ReferenceImplementations.MakeArgb32(r, g, b, a).PackedValue; + byte[] expected = ReferenceImplementations.MakeArgb32ByteArray(r, g, b, a); - Assert.Equal(expectedPacked, actualPacked); + Assert.Equal(expected, actual); } [Theory] [MemberData(nameof(RgbaData))] public void ToBgra32(byte r, byte g, byte b, byte a) { - Rgba32 s = ReferenceImplementations.MakeRgba32(r, g, b, a); + byte[] source = ReferenceImplementations.MakeRgba32ByteArray(r, g, b, a); + var actual = new byte[source.Length]; - // Act: - uint actualPacked = PixelConverter.FromRgba32.ToBgra32(s.PackedValue); + PixelConverter.FromRgba32.ToBgra32(source, actual); - // Assert: - uint expectedPacked = ReferenceImplementations.MakeBgra32(r, g, b, a).PackedValue; + byte[] expected = ReferenceImplementations.MakeBgra32ByteArray(r, g, b, a); - Assert.Equal(expectedPacked, actualPacked); + Assert.Equal(expected, actual); } } @@ -66,30 +65,28 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats [MemberData(nameof(RgbaData))] public void ToRgba32(byte r, byte g, byte b, byte a) { - Argb32 s = ReferenceImplementations.MakeArgb32(r, g, b, a); + byte[] source = ReferenceImplementations.MakeArgb32ByteArray(r, g, b, a); + var actual = new byte[source.Length]; - // Act: - uint actualPacked = PixelConverter.FromArgb32.ToRgba32(s.PackedValue); + PixelConverter.FromArgb32.ToRgba32(source, actual); - // Assert: - uint expectedPacked = ReferenceImplementations.MakeRgba32(r, g, b, a).PackedValue; + byte[] expected = ReferenceImplementations.MakeRgba32ByteArray(r, g, b, a); - Assert.Equal(expectedPacked, actualPacked); + Assert.Equal(expected, actual); } [Theory] [MemberData(nameof(RgbaData))] public void ToBgra32(byte r, byte g, byte b, byte a) { - Argb32 s = ReferenceImplementations.MakeArgb32(r, g, b, a); + byte[] source = ReferenceImplementations.MakeArgb32ByteArray(r, g, b, a); + var actual = new byte[source.Length]; - // Act: - uint actualPacked = PixelConverter.FromArgb32.ToBgra32(s.PackedValue); + PixelConverter.FromArgb32.ToBgra32(source, actual); - // Assert: - uint expectedPacked = ReferenceImplementations.MakeBgra32(r, g, b, a).PackedValue; + byte[] expected = ReferenceImplementations.MakeBgra32ByteArray(r, g, b, a); - Assert.Equal(expectedPacked, actualPacked); + Assert.Equal(expected, actual); } } @@ -99,30 +96,28 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats [MemberData(nameof(RgbaData))] public void ToArgb32(byte r, byte g, byte b, byte a) { - Bgra32 s = ReferenceImplementations.MakeBgra32(r, g, b, a); + byte[] source = ReferenceImplementations.MakeBgra32ByteArray(r, g, b, a); + var actual = new byte[source.Length]; - // Act: - uint actualPacked = PixelConverter.FromBgra32.ToArgb32(s.PackedValue); + PixelConverter.FromBgra32.ToArgb32(source, actual); - // Assert: - uint expectedPacked = ReferenceImplementations.MakeArgb32(r, g, b, a).PackedValue; + byte[] expected = ReferenceImplementations.MakeArgb32ByteArray(r, g, b, a); - Assert.Equal(expectedPacked, actualPacked); + Assert.Equal(expected, actual); } [Theory] [MemberData(nameof(RgbaData))] public void ToRgba32(byte r, byte g, byte b, byte a) { - Bgra32 s = ReferenceImplementations.MakeBgra32(r, g, b, a); + byte[] source = ReferenceImplementations.MakeBgra32ByteArray(r, g, b, a); + var actual = new byte[source.Length]; - // Act: - uint actualPacked = PixelConverter.FromBgra32.ToRgba32(s.PackedValue); + PixelConverter.FromBgra32.ToRgba32(source, actual); - // Assert: - uint expectedPacked = ReferenceImplementations.MakeRgba32(r, g, b, a).PackedValue; + byte[] expected = ReferenceImplementations.MakeRgba32ByteArray(r, g, b, a); - Assert.Equal(expectedPacked, actualPacked); + Assert.Equal(expected, actual); } } } From aa20c09c4896738ba1df05ace362c2d08f64854d Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 28 Oct 2020 22:07:54 +0000 Subject: [PATCH 081/104] Update based on feedback --- .../Common/Helpers/IComponentShuffle.cs | 165 ++++++++++++++++++ .../Common/Helpers/SimdUtils.Shuffle.cs | 142 +-------------- .../PixelFormats/Utils/PixelConverter.cs | 13 +- .../Color/Bulk/ShuffleByte4Channel.cs | 2 +- .../Color/Bulk/ShuffleFloat4Channel.cs | 2 +- .../Config.HwIntrinsics.cs | 11 +- .../Common/SimdUtilsTests.Shuffle.cs | 93 ++++++---- 7 files changed, 245 insertions(+), 183 deletions(-) create mode 100644 src/ImageSharp/Common/Helpers/IComponentShuffle.cs diff --git a/src/ImageSharp/Common/Helpers/IComponentShuffle.cs b/src/ImageSharp/Common/Helpers/IComponentShuffle.cs new file mode 100644 index 0000000000..e354a57b00 --- /dev/null +++ b/src/ImageSharp/Common/Helpers/IComponentShuffle.cs @@ -0,0 +1,165 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Buffers.Binary; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace SixLabors.ImageSharp +{ + /// + /// Defines the contract for methods that allow the shuffling of pixel components. + /// Used for shuffling on platforms that do not support Hardware Intrinsics. + /// + internal interface IComponentShuffle + { + /// + /// Gets the shuffle control. + /// + byte Control { get; } + + /// + /// Shuffle 8-bit integers within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + void RunFallbackShuffle(ReadOnlySpan source, Span dest); + } + + internal readonly struct DefaultShuffle4 : IComponentShuffle + { + public DefaultShuffle4(byte p3, byte p2, byte p1, byte p0) + : this(SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0)) + { + } + + public DefaultShuffle4(byte control) => this.Control = control; + + public byte Control { get; } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); + SimdUtils.Shuffle.InverseMmShuffle( + this.Control, + out int p3, + out int p2, + out int p1, + out int p0); + + for (int i = 0; i < source.Length; i += 4) + { + Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i); + Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i); + Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i); + Unsafe.Add(ref dBase, i + 3) = Unsafe.Add(ref sBase, p3 + i); + } + } + } + + internal readonly struct WXYZShuffle4 : IComponentShuffle + { + public byte Control => SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3); + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ReadOnlySpan s = MemoryMarshal.Cast(source); + Span d = MemoryMarshal.Cast(dest); + ref uint sBase = ref MemoryMarshal.GetReference(s); + ref uint dBase = ref MemoryMarshal.GetReference(d); + + // The JIT can detect and optimize rotation idioms ROTL (Rotate Left) + // and ROTR (Rotate Right) emitting efficient CPU instructions: + // https://github.com/dotnet/coreclr/pull/1830 + for (int i = 0; i < s.Length; i++) + { + uint packed = Unsafe.Add(ref sBase, i); + + // packed = [W Z Y X] + // ROTL(8, packed) = [Z Y X W] + Unsafe.Add(ref dBase, i) = (packed << 8) | (packed >> 24); + } + } + } + + internal readonly struct WZYXShuffle4 : IComponentShuffle + { + public byte Control => SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3); + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ReadOnlySpan s = MemoryMarshal.Cast(source); + Span d = MemoryMarshal.Cast(dest); + ref uint sBase = ref MemoryMarshal.GetReference(s); + ref uint dBase = ref MemoryMarshal.GetReference(d); + + for (int i = 0; i < s.Length; i++) + { + uint packed = Unsafe.Add(ref sBase, i); + + // packed = [W Z Y X] + // REVERSE(packedArgb) = [X Y Z W] + Unsafe.Add(ref dBase, i) = BinaryPrimitives.ReverseEndianness(packed); + } + } + } + + internal readonly struct YZWXShuffle4 : IComponentShuffle + { + public byte Control => SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1); + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ReadOnlySpan s = MemoryMarshal.Cast(source); + Span d = MemoryMarshal.Cast(dest); + ref uint sBase = ref MemoryMarshal.GetReference(s); + ref uint dBase = ref MemoryMarshal.GetReference(d); + + for (int i = 0; i < s.Length; i++) + { + uint packed = Unsafe.Add(ref sBase, i); + + // packed = [W Z Y X] + // ROTR(8, packedArgb) = [Y Z W X] + Unsafe.Add(ref dBase, i) = (packed >> 8) | (packed << 24); + } + } + } + + internal readonly struct ZYXWShuffle4 : IComponentShuffle + { + public byte Control => SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2); + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ReadOnlySpan s = MemoryMarshal.Cast(source); + Span d = MemoryMarshal.Cast(dest); + ref uint sBase = ref MemoryMarshal.GetReference(s); + ref uint dBase = ref MemoryMarshal.GetReference(d); + + for (int i = 0; i < s.Length; i++) + { + uint packed = Unsafe.Add(ref sBase, i); + + // packed = [W Z Y X] + // tmp1 = [W 0 Y 0] + // tmp2 = [0 Z 0 X] + // tmp3=ROTL(16, tmp2) = [0 X 0 Z] + // tmp1 + tmp3 = [W X Y Z] + uint tmp1 = packed & 0xFF00FF00; + uint tmp2 = packed & 0x00FF00FF; + uint tmp3 = (tmp2 << 16) | (tmp2 >> 16); + + Unsafe.Add(ref dBase, i) = tmp1 + tmp3; + } + } + } +} diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index 59b6254192..febb31c2f8 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -2,7 +2,6 @@ // Licensed under the Apache License, Version 2.0. using System; -using System.Buffers.Binary; using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -40,34 +39,32 @@ namespace SixLabors.ImageSharp } /// - /// Shuffle 8-bit integers in a within 128-bit lanes in + /// Shuffle 8-bit integers within 128-bit lanes in /// using the control and store the results in . /// /// The source span of bytes. /// The destination span of bytes. - /// The byte control. + /// The type of shuffle to perform. [MethodImpl(InliningOptions.ShortMethod)] - public static void Shuffle4Channel( + public static void Shuffle4Channel( ReadOnlySpan source, Span dest, - byte control) + TShuffle shuffle) + where TShuffle : struct, IComponentShuffle { VerifyShuffleSpanInput(source, dest); - // TODO: There doesn't seem to be any APIs for - // System.Numerics that allow shuffling. #if SUPPORTS_RUNTIME_INTRINSICS - HwIntrinsics.Shuffle4ChannelReduce(ref source, ref dest, control); + HwIntrinsics.Shuffle4ChannelReduce(ref source, ref dest, shuffle.Control); #endif // Deal with the remainder: if (source.Length > 0) { - ShuffleRemainder4Channel(source, dest, control); + shuffle.RunFallbackShuffle(source, dest); } } - [MethodImpl(InliningOptions.ColdPath)] public static void ShuffleRemainder4Channel( ReadOnlySpan source, Span dest, @@ -86,125 +83,6 @@ namespace SixLabors.ImageSharp } } - [MethodImpl(InliningOptions.ColdPath)] - public static void ShuffleRemainder4Channel( - ReadOnlySpan source, - Span dest, - byte control) - { -#if NETCOREAPP - // The JIT can detect and optimize rotation idioms ROTL (Rotate Left) - // and ROTR (Rotate Right) emitting efficient CPU instructions: - // https://github.com/dotnet/coreclr/pull/1830 - switch (control) - { - case Shuffle.WXYZ: - WXYZ(source, dest); - return; - case Shuffle.WZYX: - WZYX(source, dest); - return; - case Shuffle.YZWX: - YZWX(source, dest); - return; - case Shuffle.ZYXW: - ZYXW(source, dest); - return; - } -#endif - - ref byte sBase = ref MemoryMarshal.GetReference(source); - ref byte dBase = ref MemoryMarshal.GetReference(dest); - Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0); - - for (int i = 0; i < source.Length; i += 4) - { - Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i); - Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i); - Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i); - Unsafe.Add(ref dBase, i + 3) = Unsafe.Add(ref sBase, p3 + i); - } - } - - [MethodImpl(InliningOptions.ShortMethod)] - private static void WXYZ(ReadOnlySpan source, Span dest) - { - ReadOnlySpan s = MemoryMarshal.Cast(source); - Span d = MemoryMarshal.Cast(dest); - ref uint sBase = ref MemoryMarshal.GetReference(s); - ref uint dBase = ref MemoryMarshal.GetReference(d); - - for (int i = 0; i < s.Length; i++) - { - uint packed = Unsafe.Add(ref sBase, i); - - // packed = [W Z Y X] - // ROTL(8, packed) = [Z Y X W] - Unsafe.Add(ref dBase, i) = (packed << 8) | (packed >> 24); - } - } - - [MethodImpl(InliningOptions.ShortMethod)] - private static void ZYXW(ReadOnlySpan source, Span dest) - { - ReadOnlySpan s = MemoryMarshal.Cast(source); - Span d = MemoryMarshal.Cast(dest); - ref uint sBase = ref MemoryMarshal.GetReference(s); - ref uint dBase = ref MemoryMarshal.GetReference(d); - - for (int i = 0; i < s.Length; i++) - { - uint packed = Unsafe.Add(ref sBase, i); - - // packed = [W Z Y X] - // tmp1 = [W 0 Y 0] - // tmp2 = [0 Z 0 X] - // tmp3=ROTL(16, tmp2) = [0 X 0 Z] - // tmp1 + tmp3 = [W X Y Z] - uint tmp1 = packed & 0xFF00FF00; - uint tmp2 = packed & 0x00FF00FF; - uint tmp3 = (tmp2 << 16) | (tmp2 >> 16); - - Unsafe.Add(ref dBase, i) = tmp1 + tmp3; - } - } - - [MethodImpl(InliningOptions.ShortMethod)] - private static void WZYX(ReadOnlySpan source, Span dest) - { - ReadOnlySpan s = MemoryMarshal.Cast(source); - Span d = MemoryMarshal.Cast(dest); - ref uint sBase = ref MemoryMarshal.GetReference(s); - ref uint dBase = ref MemoryMarshal.GetReference(d); - - for (int i = 0; i < s.Length; i++) - { - uint packed = Unsafe.Add(ref sBase, i); - - // packed = [W Z Y X] - // REVERSE(packedArgb) = [X Y Z W] - Unsafe.Add(ref dBase, i) = BinaryPrimitives.ReverseEndianness(packed); - } - } - - [MethodImpl(InliningOptions.ShortMethod)] - private static void YZWX(ReadOnlySpan source, Span dest) - { - ReadOnlySpan s = MemoryMarshal.Cast(source); - Span d = MemoryMarshal.Cast(dest); - ref uint sBase = ref MemoryMarshal.GetReference(s); - ref uint dBase = ref MemoryMarshal.GetReference(d); - - for (int i = 0; i < s.Length; i++) - { - uint packed = Unsafe.Add(ref sBase, i); - - // packed = [W Z Y X] - // ROTR(8, packedArgb) = [Y Z W X] - Unsafe.Add(ref dBase, i) = (packed >> 8) | (packed << 24); - } - } - [Conditional("DEBUG")] private static void VerifyShuffleSpanInput(ReadOnlySpan source, Span dest) where T : struct @@ -222,12 +100,6 @@ namespace SixLabors.ImageSharp public static class Shuffle { - public const byte WXYZ = (2 << 6) | (1 << 4) | (0 << 2) | 3; - public const byte WZYX = (0 << 6) | (1 << 4) | (2 << 2) | 3; - public const byte XYZW = (3 << 6) | (2 << 4) | (1 << 2) | 0; - public const byte YZWX = (0 << 6) | (3 << 4) | (2 << 2) | 1; - public const byte ZYXW = (3 << 6) | (0 << 4) | (1 << 2) | 2; - [MethodImpl(InliningOptions.ShortMethod)] public static byte MmShuffle(byte p3, byte p2, byte p1, byte p0) => (byte)((p3 << 6) | (p2 << 4) | (p1 << 2) | p0); diff --git a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs index bc24258c91..ab9011a5c7 100644 --- a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs +++ b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs @@ -2,7 +2,6 @@ // Licensed under the Apache License, Version 2.0. using System; -using System.Buffers.Binary; using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp.PixelFormats.Utils @@ -28,7 +27,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToArgb32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.WXYZ); + => SimdUtils.Shuffle4Channel(source, dest, default); /// /// Converts a representing a collection of @@ -37,7 +36,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgra32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.ZYXW); + => SimdUtils.Shuffle4Channel(source, dest, default); } public static class FromArgb32 @@ -49,7 +48,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgba32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.YZWX); + => SimdUtils.Shuffle4Channel(source, dest, default); /// /// Converts a representing a collection of @@ -58,7 +57,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgra32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.WZYX); + => SimdUtils.Shuffle4Channel(source, dest, default); } public static class FromBgra32 @@ -70,7 +69,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToArgb32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.WZYX); + => SimdUtils.Shuffle4Channel(source, dest, default); /// /// Converts a representing a collection of @@ -79,7 +78,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgba32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.ZYXW); + => SimdUtils.Shuffle4Channel(source, dest, default); } } } diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs index c45b103e38..bd4a8d534b 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs @@ -26,7 +26,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk [Benchmark] public void Shuffle4Channel() { - SimdUtils.Shuffle4Channel(this.source, this.destination, SimdUtils.Shuffle.WXYZ); + SimdUtils.Shuffle4Channel(this.source, this.destination, default); } } diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs index 36b9591d9d..04c6dbf21d 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs @@ -26,7 +26,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk [Benchmark] public void Shuffle4Channel() { - SimdUtils.Shuffle4Channel(this.source, this.destination, SimdUtils.Shuffle.WXYZ); + SimdUtils.Shuffle4Channel(this.source, this.destination, default(WXYZShuffle4).Control); } } diff --git a/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs b/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs index e8a06bf24e..eacd36799d 100644 --- a/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs +++ b/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs @@ -58,6 +58,12 @@ namespace SixLabors.ImageSharp.Benchmarks { public HwIntrinsics_SSE_AVX() { + this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31) + .WithEnvironmentVariables( + new EnvironmentVariable(EnableHWIntrinsic, Off), + new EnvironmentVariable(FeatureSIMD, Off)) + .WithId("No HwIntrinsics")); + #if SUPPORTS_RUNTIME_INTRINSICS if (Avx.IsSupported) { @@ -72,11 +78,6 @@ namespace SixLabors.ImageSharp.Benchmarks .WithId("SSE")); } #endif - this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31) - .WithEnvironmentVariables( - new EnvironmentVariable(EnableHWIntrinsic, Off), - new EnvironmentVariable(FeatureSIMD, Off)) - .WithId("No HwIntrinsics")); } } } diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs index 94298f94ca..06f61e617d 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -9,66 +9,91 @@ namespace SixLabors.ImageSharp.Tests.Common { public partial class SimdUtilsTests { - public static readonly TheoryData ShuffleControls = - new TheoryData - { - SimdUtils.Shuffle.WXYZ, - SimdUtils.Shuffle.WZYX, - SimdUtils.Shuffle.XYZW, - SimdUtils.Shuffle.YZWX, - SimdUtils.Shuffle.ZYXW, - SimdUtils.Shuffle.MmShuffle(2, 1, 3, 0), - SimdUtils.Shuffle.MmShuffle(1, 1, 1, 1), - SimdUtils.Shuffle.MmShuffle(3, 3, 3, 3) - }; - [Theory] - [MemberData(nameof(ShuffleControls))] - public void BulkShuffleFloat4Channel(byte control) + [MemberData(nameof(ArraySizesDivisibleBy4))] + public void BulkShuffleFloat4Channel(int count) { static void RunTest(string serialized) { - byte ctrl = FeatureTestRunner.Deserialize(serialized); - foreach (var item in ArraySizesDivisibleBy4) - { - foreach (var count in item) - { - TestShuffleFloat4Channel( - (int)count, - (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, ctrl), - ctrl); - } - } + // No need to test multiple shuffle controls as the + // pipeline is always the same. + int size = FeatureTestRunner.Deserialize(serialized); + byte control = default(WZYXShuffle4).Control; + + TestShuffleFloat4Channel( + size, + (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, control), + control); } FeatureTestRunner.RunWithHwIntrinsicsFeature( RunTest, - control, + count, HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE); } [Theory] - [MemberData(nameof(ShuffleControls))] - public void BulkShuffleByte4Channel(byte control) + [MemberData(nameof(ArraySizesDivisibleBy4))] + public void BulkShuffleByte4Channel(int count) { static void RunTest(string serialized) { - byte ctrl = FeatureTestRunner.Deserialize(serialized); + int size = FeatureTestRunner.Deserialize(serialized); foreach (var item in ArraySizesDivisibleBy4) { + // These cannot be expressed as a theory as you cannot + // use RemoteExecutor within generic methods nor pass + // IComponentShuffle to the generic utils method. foreach (var count in item) { + WXYZShuffle4 wxyz = default; + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, wxyz), + wxyz.Control); + + WZYXShuffle4 wzyx = default; TestShuffleByte4Channel( - (int)count, - (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, ctrl), - ctrl); + size, + (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, wzyx), + wzyx.Control); + + YZWXShuffle4 yzwx = default; + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, yzwx), + yzwx.Control); + + ZYXWShuffle4 zyxw = default; + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, zyxw), + zyxw.Control); + + var xwyz = new DefaultShuffle4(2, 1, 3, 0); + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, xwyz), + xwyz.Control); + + var yyyy = new DefaultShuffle4(1, 1, 1, 1); + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, yyyy), + yyyy.Control); + + var wwww = new DefaultShuffle4(3, 3, 3, 3); + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, wwww), + wwww.Control); } } } FeatureTestRunner.RunWithHwIntrinsicsFeature( RunTest, - control, + count, HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE); } From cdc1c0fce57544bae85a4f9766fcb3403976ed1a Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 29 Oct 2020 01:48:46 +0000 Subject: [PATCH 082/104] Fix benchmarks, cleanup. --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 24 ++------ .../Common/Helpers/SimdUtils.Shuffle.cs | 2 - .../Color/Bulk/ShuffleByte4Channel.cs | 57 +++++++++--------- .../Color/Bulk/ShuffleFloat4Channel.cs | 60 +++++++++---------- .../Config.HwIntrinsics.cs | 6 +- 5 files changed, 67 insertions(+), 82 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 367df03ec3..782328eddf 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -33,15 +33,9 @@ namespace SixLabors.ImageSharp { if (Avx.IsSupported || Sse.IsSupported) { - int remainder; - if (Avx.IsSupported) - { - remainder = ImageMaths.ModuloP2(source.Length, Vector256.Count); - } - else - { - remainder = ImageMaths.ModuloP2(source.Length, Vector128.Count); - } + int remainder = Avx.IsSupported + ? ImageMaths.ModuloP2(source.Length, Vector256.Count) + : ImageMaths.ModuloP2(source.Length, Vector128.Count); int adjustedCount = source.Length - remainder; @@ -73,15 +67,9 @@ namespace SixLabors.ImageSharp { if (Avx2.IsSupported || Ssse3.IsSupported) { - int remainder; - if (Avx2.IsSupported) - { - remainder = ImageMaths.ModuloP2(source.Length, Vector256.Count); - } - else - { - remainder = ImageMaths.ModuloP2(source.Length, Vector128.Count); - } + int remainder = Avx2.IsSupported + ? ImageMaths.ModuloP2(source.Length, Vector256.Count) + : ImageMaths.ModuloP2(source.Length, Vector128.Count); int adjustedCount = source.Length - remainder; diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index febb31c2f8..a4a40fb4fa 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -25,8 +25,6 @@ namespace SixLabors.ImageSharp { VerifyShuffleSpanInput(source, dest); - // TODO: There doesn't seem to be any APIs for - // System.Numerics that allow shuffling. #if SUPPORTS_RUNTIME_INTRINSICS HwIntrinsics.Shuffle4ChannelReduce(ref source, ref dest, control); #endif diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs index bd4a8d534b..749859eac9 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs @@ -30,39 +30,38 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk } } - // 2020-10-26 + // 2020-10-29 // ########## // // BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1) - // Intel Core i7-8650U CPU 1.90GHz(Kaby Lake R), 1 CPU, 8 logical and 4 physical cores - // .NET Core SDK = 5.0.100-rc.2.20479.15 - // - // [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT - // AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT - // No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT - // SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores + // .NET Core SDK=3.1.403 + // [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT // // Runtime=.NET Core 3.1 // - // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | - // |---------------- |---------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:| - // | Shuffle4Channel | AVX | Empty | 128 | 20.51 ns | 0.270 ns | 0.211 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 63.00 ns | 0.991 ns | 0.927 ns | 3.08 | 0.06 | - | - | - | - | - // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 128 | 17.25 ns | 0.066 ns | 0.058 ns | 0.84 | 0.01 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Shuffle4Channel | AVX | Empty | 256 | 24.57 ns | 0.248 ns | 0.219 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 124.55 ns | 2.501 ns | 2.456 ns | 5.06 | 0.10 | - | - | - | - | - // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 256 | 21.80 ns | 0.094 ns | 0.088 ns | 0.89 | 0.01 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Shuffle4Channel | AVX | Empty | 512 | 28.51 ns | 0.130 ns | 0.115 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 256.52 ns | 1.424 ns | 1.332 ns | 9.00 | 0.07 | - | - | - | - | - // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 512 | 29.72 ns | 0.217 ns | 0.203 ns | 1.04 | 0.01 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Shuffle4Channel | AVX | Empty | 1024 | 36.40 ns | 0.357 ns | 0.334 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 492.71 ns | 1.498 ns | 1.251 ns | 13.52 | 0.12 | - | - | - | - | - // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 1024 | 44.71 ns | 0.264 ns | 0.234 ns | 1.23 | 0.02 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Shuffle4Channel | AVX | Empty | 2048 | 59.38 ns | 0.180 ns | 0.159 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 975.05 ns | 2.043 ns | 1.811 ns | 16.42 | 0.05 | - | - | - | - | - // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 2048 | 81.83 ns | 0.212 ns | 0.198 ns | 1.38 | 0.01 | - | - | - | - | + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |---------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:| + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 17.39 ns | 0.187 ns | 0.175 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 128 | 21.72 ns | 0.299 ns | 0.279 ns | 1.25 | 0.02 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 128 | 18.10 ns | 0.346 ns | 0.289 ns | 1.04 | 0.02 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 35.51 ns | 0.711 ns | 0.790 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 256 | 23.90 ns | 0.508 ns | 0.820 ns | 0.69 | 0.02 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 256 | 20.40 ns | 0.133 ns | 0.111 ns | 0.57 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 73.39 ns | 0.310 ns | 0.259 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 512 | 26.10 ns | 0.418 ns | 0.391 ns | 0.36 | 0.01 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 512 | 27.59 ns | 0.556 ns | 0.571 ns | 0.38 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 150.64 ns | 2.903 ns | 2.716 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 1024 | 38.67 ns | 0.801 ns | 1.889 ns | 0.24 | 0.02 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 47.13 ns | 0.948 ns | 1.054 ns | 0.31 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 315.29 ns | 5.206 ns | 6.583 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 2048 | 57.37 ns | 1.152 ns | 1.078 ns | 0.18 | 0.01 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 65.75 ns | 1.198 ns | 1.600 ns | 0.21 | 0.01 | - | - | - | - | } diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs index 04c6dbf21d..6f5b5001be 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs @@ -10,6 +10,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk [Config(typeof(Config.HwIntrinsics_SSE_AVX))] public class ShuffleFloat4Channel { + private static readonly byte control = default(WXYZShuffle4).Control; private float[] source; private float[] destination; @@ -26,43 +27,42 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk [Benchmark] public void Shuffle4Channel() { - SimdUtils.Shuffle4Channel(this.source, this.destination, default(WXYZShuffle4).Control); + SimdUtils.Shuffle4Channel(this.source, this.destination, control); } } - // 2020-10-26 + // 2020-10-29 // ########## // // BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1) - // Intel Core i7-8650U CPU 1.90GHz(Kaby Lake R), 1 CPU, 8 logical and 4 physical cores - // .NET Core SDK = 5.0.100-rc.2.20479.15 - // - // [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT - // AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT - // No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT - // SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores + // .NET Core SDK=3.1.403 + // [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT // // Runtime=.NET Core 3.1 // - // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | - // |---------------- |---------------- |-------------------------------------------------- |------ |------------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:| - // | Shuffle4Channel | AVX | Empty | 128 | 14.49 ns | 0.244 ns | 0.217 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 87.74 ns | 0.524 ns | 0.490 ns | 6.06 | 0.09 | - | - | - | - | - // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 128 | 23.65 ns | 0.101 ns | 0.094 ns | 1.63 | 0.03 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Shuffle4Channel | AVX | Empty | 256 | 25.87 ns | 0.492 ns | 0.673 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 159.52 ns | 0.901 ns | 0.843 ns | 6.12 | 0.12 | - | - | - | - | - // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 256 | 45.47 ns | 0.404 ns | 0.378 ns | 1.75 | 0.03 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Shuffle4Channel | AVX | Empty | 512 | 49.51 ns | 0.088 ns | 0.083 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 297.96 ns | 0.926 ns | 0.821 ns | 6.02 | 0.02 | - | - | - | - | - // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 512 | 90.77 ns | 0.191 ns | 0.169 ns | 1.83 | 0.00 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Shuffle4Channel | AVX | Empty | 1024 | 113.09 ns | 1.913 ns | 3.090 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 604.58 ns | 1.464 ns | 1.298 ns | 5.29 | 0.18 | - | - | - | - | - // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 1024 | 179.44 ns | 0.208 ns | 0.184 ns | 1.57 | 0.05 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Shuffle4Channel | AVX | Empty | 2048 | 217.95 ns | 1.314 ns | 1.165 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 1,152.04 ns | 3.941 ns | 3.494 ns | 5.29 | 0.03 | - | - | - | - | - // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 2048 | 349.52 ns | 0.587 ns | 0.520 ns | 1.60 | 0.01 | - | - | - | - | + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |---------------- |------------------- |-------------------------------------------------- |------ |-----------:|----------:|----------:|------:|------:|------:|------:|----------:| + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 63.647 ns | 0.5475 ns | 0.4853 ns | 1.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 128 | 9.818 ns | 0.1457 ns | 0.1292 ns | 0.15 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 128 | 15.267 ns | 0.1005 ns | 0.0940 ns | 0.24 | - | - | - | - | + // | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 125.586 ns | 1.9312 ns | 1.8064 ns | 1.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 256 | 15.878 ns | 0.1983 ns | 0.1758 ns | 0.13 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 256 | 29.170 ns | 0.2925 ns | 0.2442 ns | 0.23 | - | - | - | - | + // | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 263.859 ns | 2.6660 ns | 2.3634 ns | 1.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 512 | 29.452 ns | 0.3334 ns | 0.3118 ns | 0.11 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 512 | 52.912 ns | 0.1932 ns | 0.1713 ns | 0.20 | - | - | - | - | + // | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 495.717 ns | 1.9850 ns | 1.8567 ns | 1.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 1024 | 53.757 ns | 0.3212 ns | 0.2847 ns | 0.11 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 107.815 ns | 1.6201 ns | 1.3528 ns | 0.22 | - | - | - | - | + // | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 980.134 ns | 3.7407 ns | 3.1237 ns | 1.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 2048 | 105.120 ns | 0.6140 ns | 0.5443 ns | 0.11 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 216.473 ns | 2.3268 ns | 2.0627 ns | 0.22 | - | - | - | - | } diff --git a/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs b/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs index eacd36799d..5ceb4c8a00 100644 --- a/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs +++ b/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs @@ -62,20 +62,20 @@ namespace SixLabors.ImageSharp.Benchmarks .WithEnvironmentVariables( new EnvironmentVariable(EnableHWIntrinsic, Off), new EnvironmentVariable(FeatureSIMD, Off)) - .WithId("No HwIntrinsics")); + .WithId("1. No HwIntrinsics").AsBaseline()); #if SUPPORTS_RUNTIME_INTRINSICS if (Avx.IsSupported) { this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31) - .WithId("AVX").AsBaseline()); + .WithId("2. AVX")); } if (Sse.IsSupported) { this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31) .WithEnvironmentVariables(new EnvironmentVariable(EnableAVX, Off)) - .WithId("SSE")); + .WithId("3. SSE")); } #endif } From 33df55aef25562c576568992a1ef439f81acfb4f Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 29 Oct 2020 15:21:52 +0000 Subject: [PATCH 083/104] Don't use Linq and test for common path first. --- src/ImageSharp/Image{TPixel}.cs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ImageSharp/Image{TPixel}.cs b/src/ImageSharp/Image{TPixel}.cs index 255193c8ea..83ecc37530 100644 --- a/src/ImageSharp/Image{TPixel}.cs +++ b/src/ImageSharp/Image{TPixel}.cs @@ -201,14 +201,14 @@ namespace SixLabors.ImageSharp public bool TryGetSinglePixelSpan(out Span span) { IMemoryGroup mg = this.GetPixelMemoryGroup(); - if (mg.Count > 1) + if (mg.Count == 1) { - span = default; - return false; + span = mg[0].Span; + return true; } - span = mg.Single().Span; - return true; + span = default; + return false; } /// From c4f7f67fd5d18ccb8699392a6cdbf192990fe294 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 30 Oct 2020 20:38:46 +0000 Subject: [PATCH 084/104] Initial 3padshuffle4 --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 84 +++++++++++++++++-- .../Common/Helpers/SimdUtils.Shuffle.cs | 63 ++++++++++++-- .../PixelFormats/Utils/PixelConverter.cs | 12 +-- .../Color/Bulk/Pad3Shuffle4Channel.cs | 67 +++++++++++++++ .../Color/Bulk/ShuffleByte4Channel.cs | 2 +- .../Color/Bulk/ShuffleFloat4Channel.cs | 2 +- .../Common/SimdUtilsTests.Shuffle.cs | 71 ++++++++++++++-- .../ImageSharp.Tests/Common/SimdUtilsTests.cs | 2 + 8 files changed, 274 insertions(+), 29 deletions(-) create mode 100644 tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 782328eddf..8a0b5460c5 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -26,7 +26,7 @@ namespace SixLabors.ImageSharp /// The destination span of floats. /// The byte control. [MethodImpl(InliningOptions.ShortMethod)] - public static void Shuffle4ChannelReduce( + public static void Shuffle4Reduce( ref ReadOnlySpan source, ref Span dest, byte control) @@ -41,7 +41,7 @@ namespace SixLabors.ImageSharp if (adjustedCount > 0) { - Shuffle4Channel( + Shuffle4( source.Slice(0, adjustedCount), dest.Slice(0, adjustedCount), control); @@ -53,14 +53,14 @@ namespace SixLabors.ImageSharp } /// - /// Shuffle 8-bit integers in a within 128-bit lanes in + /// Shuffle 8-bit integers within 128-bit lanes in /// using the control and store the results in . /// /// The source span of bytes. /// The destination span of bytes. /// The byte control. [MethodImpl(InliningOptions.ShortMethod)] - public static void Shuffle4ChannelReduce( + public static void Shuffle4Reduce( ref ReadOnlySpan source, ref Span dest, byte control) @@ -75,7 +75,7 @@ namespace SixLabors.ImageSharp if (adjustedCount > 0) { - Shuffle4Channel( + Shuffle4( source.Slice(0, adjustedCount), dest.Slice(0, adjustedCount), control); @@ -86,8 +86,41 @@ namespace SixLabors.ImageSharp } } + /// + /// Pads then shuffles 8-bit integers within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The byte control. + [MethodImpl(InliningOptions.ShortMethod)] + public static unsafe void Pad3Shuffle4Reduce( + ref ReadOnlySpan source, + ref Span dest, + byte control) + { + if (Ssse3.IsSupported) + { + int remainder = ImageMaths.ModuloP2(source.Length, Vector128.Count); + + int adjustedCount = source.Length - remainder; + int sourceSlice = (int)(adjustedCount * (3 / 4F)); + + if (adjustedCount > 0) + { + Pad3Shuffle4( + source.Slice(0, adjustedCount), + dest.Slice(0, adjustedCount), + control); + + source = source.Slice(sourceSlice); + dest = dest.Slice(adjustedCount); + } + } + } + [MethodImpl(InliningOptions.ShortMethod)] - private static void Shuffle4Channel( + private static void Shuffle4( ReadOnlySpan source, Span dest, byte control) @@ -165,7 +198,7 @@ namespace SixLabors.ImageSharp } [MethodImpl(InliningOptions.ShortMethod)] - private static void Shuffle4Channel( + private static void Shuffle4( ReadOnlySpan source, Span dest, byte control) @@ -246,6 +279,43 @@ namespace SixLabors.ImageSharp } } + [MethodImpl(InliningOptions.ShortMethod)] + private static unsafe void Pad3Shuffle4( + ReadOnlySpan source, + Span dest, + byte control) + { + if (Ssse3.IsSupported) + { + Vector128 wMask = Vector128.Create(0xff000000u).AsByte(); + Vector128 padMask = Vector128.Create(0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11, -1).AsByte(); + + Span bytes = stackalloc byte[Vector128.Count]; + Shuffle.MmShuffleSpan(ref bytes, control); + Vector128 vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + + fixed (byte* sBase = &source.GetPinnableReference()) + fixed (byte* dBase = &dest.GetPinnableReference()) + { + byte* s = sBase; + byte* d = dBase; + + // TODO: Consider unrolling and shuffling 4 at a time using Ssse3.AlignRight + // See https://stackoverflow.com/questions/2973708/fast-24-bit-array-32-bit-array-conversion + for (int i = 0; i < source.Length; i += 16) + { + Vector128 vs0 = Sse2.LoadVector128(s); + Vector128 val = Sse2.Or(wMask, Ssse3.Shuffle(vs0, padMask)); + val = Ssse3.Shuffle(val, vcm); + Sse2.Store(d, val); + + s += 12; + d += 16; + } + } + } + } + /// /// Performs a multiplication and an addition of the . /// diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index a4a40fb4fa..81d77d6551 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -18,7 +18,7 @@ namespace SixLabors.ImageSharp /// The destination span of floats. /// The byte control. [MethodImpl(InliningOptions.ShortMethod)] - public static void Shuffle4Channel( + public static void Shuffle4( ReadOnlySpan source, Span dest, byte control) @@ -26,13 +26,13 @@ namespace SixLabors.ImageSharp VerifyShuffleSpanInput(source, dest); #if SUPPORTS_RUNTIME_INTRINSICS - HwIntrinsics.Shuffle4ChannelReduce(ref source, ref dest, control); + HwIntrinsics.Shuffle4Reduce(ref source, ref dest, control); #endif // Deal with the remainder: if (source.Length > 0) { - ShuffleRemainder4Channel(source, dest, control); + Shuffle4Remainder(source, dest, control); } } @@ -44,7 +44,7 @@ namespace SixLabors.ImageSharp /// The destination span of bytes. /// The type of shuffle to perform. [MethodImpl(InliningOptions.ShortMethod)] - public static void Shuffle4Channel( + public static void Shuffle4( ReadOnlySpan source, Span dest, TShuffle shuffle) @@ -53,7 +53,7 @@ namespace SixLabors.ImageSharp VerifyShuffleSpanInput(source, dest); #if SUPPORTS_RUNTIME_INTRINSICS - HwIntrinsics.Shuffle4ChannelReduce(ref source, ref dest, shuffle.Control); + HwIntrinsics.Shuffle4Reduce(ref source, ref dest, shuffle.Control); #endif // Deal with the remainder: @@ -63,7 +63,26 @@ namespace SixLabors.ImageSharp } } - public static void ShuffleRemainder4Channel( + [MethodImpl(InliningOptions.ShortMethod)] + public static void Pad3Shuffle4( + ReadOnlySpan source, + Span dest, + byte control) + { + VerifyPadShuffleSpanInput(source, dest); + +#if SUPPORTS_RUNTIME_INTRINSICS + HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref dest, control); +#endif + + // Deal with the remainder: + if (source.Length > 0) + { + Pad3Shuffle4Remainder(source, dest, control); + } + } + + public static void Shuffle4Remainder( ReadOnlySpan source, Span dest, byte control) @@ -81,6 +100,24 @@ namespace SixLabors.ImageSharp } } + public static void Pad3Shuffle4Remainder( + ReadOnlySpan source, + Span dest, + byte control) + { + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); + Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0); + + for (int i = 0, j = 0; i < dest.Length; i += 4, j += 3) + { + Unsafe.Add(ref dBase, p0 + i) = Unsafe.Add(ref sBase, j); + Unsafe.Add(ref dBase, p1 + i) = Unsafe.Add(ref sBase, j + 1); + Unsafe.Add(ref dBase, p2 + i) = Unsafe.Add(ref sBase, j + 2); + Unsafe.Add(ref dBase, p3 + i) = byte.MaxValue; + } + } + [Conditional("DEBUG")] private static void VerifyShuffleSpanInput(ReadOnlySpan source, Span dest) where T : struct @@ -96,6 +133,20 @@ namespace SixLabors.ImageSharp "Input spans must be divisiable by 4!"); } + [Conditional("DEBUG")] + private static void VerifyPadShuffleSpanInput(ReadOnlySpan source, Span dest) + { + DebugGuard.IsTrue( + source.Length == (int)(dest.Length * 3 / 4F), + nameof(source), + "Input spans must be 3/4 the length of the output span!"); + + DebugGuard.IsTrue( + source.Length % 3 == 0, + nameof(source), + "Input spans must be divisiable by 3!"); + } + public static class Shuffle { [MethodImpl(InliningOptions.ShortMethod)] diff --git a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs index ab9011a5c7..5afd369be3 100644 --- a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs +++ b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs @@ -27,7 +27,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToArgb32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, default); + => SimdUtils.Shuffle4(source, dest, default); /// /// Converts a representing a collection of @@ -36,7 +36,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgra32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, default); + => SimdUtils.Shuffle4(source, dest, default); } public static class FromArgb32 @@ -48,7 +48,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgba32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, default); + => SimdUtils.Shuffle4(source, dest, default); /// /// Converts a representing a collection of @@ -57,7 +57,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgra32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, default); + => SimdUtils.Shuffle4(source, dest, default); } public static class FromBgra32 @@ -69,7 +69,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToArgb32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, default); + => SimdUtils.Shuffle4(source, dest, default); /// /// Converts a representing a collection of @@ -78,7 +78,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgba32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, default); + => SimdUtils.Shuffle4(source, dest, default); } } } diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs new file mode 100644 index 0000000000..c529b2af1e --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs @@ -0,0 +1,67 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using BenchmarkDotNet.Attributes; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class Pad3Shuffle4Channel + { + private byte[] source; + private byte[] destination; + + [GlobalSetup] + public void Setup() + { + this.source = new byte[this.Count]; + new Random(this.Count).NextBytes(this.source); + this.destination = new byte[this.Count]; + } + + [Params(96, 384, 768, 1536)] + public int Count { get; set; } + + [Benchmark] + public void Shuffle4Channel() + { + SimdUtils.Shuffle4(this.source, this.destination, default); + } + } + + // 2020-10-29 + // ########## + // + // BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1) + // Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores + // .NET Core SDK=3.1.403 + // [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // + // Runtime=.NET Core 3.1 + // + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |---------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:| + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 17.39 ns | 0.187 ns | 0.175 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 128 | 21.72 ns | 0.299 ns | 0.279 ns | 1.25 | 0.02 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 128 | 18.10 ns | 0.346 ns | 0.289 ns | 1.04 | 0.02 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 35.51 ns | 0.711 ns | 0.790 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 256 | 23.90 ns | 0.508 ns | 0.820 ns | 0.69 | 0.02 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 256 | 20.40 ns | 0.133 ns | 0.111 ns | 0.57 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 73.39 ns | 0.310 ns | 0.259 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 512 | 26.10 ns | 0.418 ns | 0.391 ns | 0.36 | 0.01 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 512 | 27.59 ns | 0.556 ns | 0.571 ns | 0.38 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 150.64 ns | 2.903 ns | 2.716 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 1024 | 38.67 ns | 0.801 ns | 1.889 ns | 0.24 | 0.02 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 47.13 ns | 0.948 ns | 1.054 ns | 0.31 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 315.29 ns | 5.206 ns | 6.583 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 2048 | 57.37 ns | 1.152 ns | 1.078 ns | 0.18 | 0.01 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 65.75 ns | 1.198 ns | 1.600 ns | 0.21 | 0.01 | - | - | - | - | +} diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs index 749859eac9..db49470011 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs @@ -26,7 +26,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk [Benchmark] public void Shuffle4Channel() { - SimdUtils.Shuffle4Channel(this.source, this.destination, default); + SimdUtils.Shuffle4(this.source, this.destination, default); } } diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs index 6f5b5001be..4a2512fea2 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs @@ -27,7 +27,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk [Benchmark] public void Shuffle4Channel() { - SimdUtils.Shuffle4Channel(this.source, this.destination, control); + SimdUtils.Shuffle4(this.source, this.destination, control); } } diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs index 06f61e617d..1c456e5a22 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -22,7 +22,7 @@ namespace SixLabors.ImageSharp.Tests.Common TestShuffleFloat4Channel( size, - (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, control), + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, control), control); } @@ -49,43 +49,43 @@ namespace SixLabors.ImageSharp.Tests.Common WXYZShuffle4 wxyz = default; TestShuffleByte4Channel( size, - (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, wxyz), + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wxyz), wxyz.Control); WZYXShuffle4 wzyx = default; TestShuffleByte4Channel( size, - (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, wzyx), + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wzyx), wzyx.Control); YZWXShuffle4 yzwx = default; TestShuffleByte4Channel( size, - (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, yzwx), + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yzwx), yzwx.Control); ZYXWShuffle4 zyxw = default; TestShuffleByte4Channel( size, - (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, zyxw), + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, zyxw), zyxw.Control); var xwyz = new DefaultShuffle4(2, 1, 3, 0); TestShuffleByte4Channel( size, - (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, xwyz), + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, xwyz), xwyz.Control); var yyyy = new DefaultShuffle4(1, 1, 1, 1); TestShuffleByte4Channel( size, - (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, yyyy), + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yyyy), yyyy.Control); var wwww = new DefaultShuffle4(3, 3, 3, 3); TestShuffleByte4Channel( size, - (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, wwww), + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wwww), wwww.Control); } } @@ -97,6 +97,29 @@ namespace SixLabors.ImageSharp.Tests.Common HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE); } + [Theory] + [MemberData(nameof(ArraySizesDivisibleBy3))] + public void BulkPad3Shuffle4Channel(int count) + { + static void RunTest(string serialized) + { + // No need to test multiple shuffle controls as the + // pipeline is always the same. + int size = FeatureTestRunner.Deserialize(serialized); + byte control = default(WZYXShuffle4).Control; + + TestPad3Shuffle4Channel( + size, + (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, control), + control); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + count, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE); + } + private static void TestShuffleFloat4Channel( int count, Action, Memory> convert, @@ -157,5 +180,37 @@ namespace SixLabors.ImageSharp.Tests.Common Assert.Equal(expected, result); } + + private static void TestPad3Shuffle4Channel( + int count, + Action, Memory> convert, + byte control) + { + byte[] source = new byte[count]; + new Random(count).NextBytes(source); + + var result = new byte[(int)(count * (4 / 3F))]; + + byte[] expected = new byte[result.Length]; + + SimdUtils.Shuffle.InverseMmShuffle( + control, + out int p3, + out int p2, + out int p1, + out int p0); + + for (int i = 0, j = 0; i < expected.Length; i += 4, j += 3) + { + expected[p0 + i] = source[j]; + expected[p1 + i] = source[j + 1]; + expected[p2 + i] = source[j + 2]; + expected[p3 + i] = byte.MaxValue; + } + + convert(source, result); + + Assert.Equal(expected, result); + } } } diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs index bddadff4da..fe432107a2 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs @@ -163,6 +163,8 @@ namespace SixLabors.ImageSharp.Tests.Common public static readonly TheoryData ArraySizesDivisibleBy8 = new TheoryData { 0, 8, 16, 1024 }; public static readonly TheoryData ArraySizesDivisibleBy4 = new TheoryData { 0, 4, 8, 28, 1020 }; + public static readonly TheoryData ArraySizesDivisibleBy3 = new TheoryData { 0, 3, 9, 36, 957 }; + public static readonly TheoryData ArraySizesDivisibleBy32 = new TheoryData { 0, 32, 512 }; From aec8e5d690fa7ccb38fce72acc8d0bd9b26d9758 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 30 Oct 2020 23:03:47 +0000 Subject: [PATCH 085/104] Add Shuffle4Slice3 --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 77 +++++++++++++++++++ .../Common/Helpers/SimdUtils.Shuffle.cs | 72 +++++++++++++++-- .../Color/Bulk/Pad3Shuffle4Channel.cs | 47 ++++++----- .../Color/Bulk/Shuffle4Slice3Channel.cs | 68 ++++++++++++++++ .../Color/Bulk/ShuffleFloat4Channel.cs | 4 +- .../Common/SimdUtilsTests.Shuffle.cs | 64 +++++++++++++++ 6 files changed, 299 insertions(+), 33 deletions(-) create mode 100644 tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 8a0b5460c5..29b569a809 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -119,6 +119,39 @@ namespace SixLabors.ImageSharp } } + /// + /// Shuffles then slices 8-bit integers within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The byte control. + [MethodImpl(InliningOptions.ShortMethod)] + public static unsafe void Shuffle4Slice3Reduce( + ref ReadOnlySpan source, + ref Span dest, + byte control) + { + if (Ssse3.IsSupported) + { + int remainder = ImageMaths.ModuloP2(dest.Length, Vector128.Count); + + int adjustedCount = dest.Length - remainder; + int destSlice = (int)(adjustedCount * (3 / 4F)); + + if (adjustedCount > 0) + { + Shuffle4Slice3( + source.Slice(0, adjustedCount), + dest.Slice(0, adjustedCount), + control); + + source = source.Slice(adjustedCount); + dest = dest.Slice(destSlice); + } + } + } + [MethodImpl(InliningOptions.ShortMethod)] private static void Shuffle4( ReadOnlySpan source, @@ -316,6 +349,50 @@ namespace SixLabors.ImageSharp } } + [MethodImpl(InliningOptions.ShortMethod)] + private static unsafe void Shuffle4Slice3( + ReadOnlySpan source, + Span dest, + byte control) + { + if (Ssse3.IsSupported) + { + Vector128 sliceMask = Vector128.Create(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, -1, -1, -1, -1).AsByte(); + + Span bytes = stackalloc byte[Vector128.Count]; + Shuffle.MmShuffleSpan(ref bytes, control); + Vector128 vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + + // var control = MmShuffle(3, 0, 1, 2); + // Span bytes = stackalloc byte[Vector128.Count]; + // MmShuffleSpan(ref bytes, control); + // Vector128 vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + // + // Vector128 s0 = Vector128.Create((byte)1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1).Dump("s0"); + // Vector128 padded = Ssse3.Shuffle(s0, padMask).Dump("padded"); + // + // padded = Sse3.Or(Vector128.Create(0xff000000u).AsByte(), padded).Dump("0r"); + // + // var shuffled = Ssse3.Shuffle(padded, vcm).Dump("shuffled"); + // var d0 = Ssse3.Shuffle(shuffled, sliceMask).Dump("d0"); + fixed (byte* sBase = &source.GetPinnableReference()) + fixed (byte* dBase = &dest.GetPinnableReference()) + { + byte* s = sBase; + byte* d = dBase; + + for (int i = 0; i < source.Length; i += 16) + { + Vector128 vs0 = Ssse3.Shuffle(Sse2.LoadVector128(s), vcm); + Sse2.Store(d, Ssse3.Shuffle(vs0, sliceMask)); + + s += 16; + d += 12; + } + } + } + } + /// /// Performs a multiplication and an addition of the . /// diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index 81d77d6551..f3946361b1 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -69,7 +69,7 @@ namespace SixLabors.ImageSharp Span dest, byte control) { - VerifyPadShuffleSpanInput(source, dest); + VerifyPad3Shuffle4SpanInput(source, dest); #if SUPPORTS_RUNTIME_INTRINSICS HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref dest, control); @@ -82,6 +82,25 @@ namespace SixLabors.ImageSharp } } + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle4Slice3( + ReadOnlySpan source, + Span dest, + byte control) + { + VerifyShuffle4Slice3SpanInput(source, dest); + +#if SUPPORTS_RUNTIME_INTRINSICS + HwIntrinsics.Shuffle4Slice3Reduce(ref source, ref dest, control); +#endif + + // Deal with the remainder: + if (source.Length > 0) + { + Shuffle4Slice3Remainder(source, dest, control); + } + } + public static void Shuffle4Remainder( ReadOnlySpan source, Span dest, @@ -118,6 +137,23 @@ namespace SixLabors.ImageSharp } } + public static void Shuffle4Slice3Remainder( + ReadOnlySpan source, + Span dest, + byte control) + { + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); + Shuffle.InverseMmShuffle(control, out int _, out int p2, out int p1, out int p0); + + for (int i = 0, j = 0; i < dest.Length; i += 3, j += 4) + { + Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + j); + Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + j); + Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + j); + } + } + [Conditional("DEBUG")] private static void VerifyShuffleSpanInput(ReadOnlySpan source, Span dest) where T : struct @@ -130,21 +166,45 @@ namespace SixLabors.ImageSharp DebugGuard.IsTrue( source.Length % 4 == 0, nameof(source), - "Input spans must be divisiable by 4!"); + "Input spans must be divisable by 4!"); } [Conditional("DEBUG")] - private static void VerifyPadShuffleSpanInput(ReadOnlySpan source, Span dest) + private static void VerifyPad3Shuffle4SpanInput(ReadOnlySpan source, Span dest) { + DebugGuard.IsTrue( + source.Length % 3 == 0, + nameof(source), + "Input span must be divisable by 3!"); + + DebugGuard.IsTrue( + dest.Length % 4 == 0, + nameof(dest), + "Output span must be divisable by 4!"); + DebugGuard.IsTrue( source.Length == (int)(dest.Length * 3 / 4F), nameof(source), - "Input spans must be 3/4 the length of the output span!"); + "Input span must be 3/4 the length of the output span!"); + } + [Conditional("DEBUG")] + private static void VerifyShuffle4Slice3SpanInput(ReadOnlySpan source, Span dest) + { DebugGuard.IsTrue( - source.Length % 3 == 0, + source.Length % 4 == 0, + nameof(source), + "Input span must be divisable by 4!"); + + DebugGuard.IsTrue( + dest.Length % 3 == 0, + nameof(dest), + "Output span must be divisable by 3!"); + + DebugGuard.IsTrue( + source.Length == (int)(dest.Length * 4 / 3F), nameof(source), - "Input spans must be divisiable by 3!"); + "Output span must be 3/4 the length of the input span!"); } public static class Shuffle diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs index c529b2af1e..8286fea0e5 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs @@ -9,6 +9,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk [Config(typeof(Config.HwIntrinsics_SSE_AVX))] public class Pad3Shuffle4Channel { + private static readonly byte Control = default(WXYZShuffle4).Control; private byte[] source; private byte[] destination; @@ -17,20 +18,20 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk { this.source = new byte[this.Count]; new Random(this.Count).NextBytes(this.source); - this.destination = new byte[this.Count]; + this.destination = new byte[(int)(this.Count * (4 / 3F))]; } [Params(96, 384, 768, 1536)] public int Count { get; set; } [Benchmark] - public void Shuffle4Channel() + public void Pad3Shuffle4() { - SimdUtils.Shuffle4(this.source, this.destination, default); + SimdUtils.Pad3Shuffle4(this.source, this.destination, Control); } } - // 2020-10-29 + // 2020-10-30 // ########## // // BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1) @@ -43,25 +44,21 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk // // Runtime=.NET Core 3.1 // - // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | - // |---------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:| - // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 17.39 ns | 0.187 ns | 0.175 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | 2. AVX | Empty | 128 | 21.72 ns | 0.299 ns | 0.279 ns | 1.25 | 0.02 | - | - | - | - | - // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 128 | 18.10 ns | 0.346 ns | 0.289 ns | 1.04 | 0.02 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 35.51 ns | 0.711 ns | 0.790 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | 2. AVX | Empty | 256 | 23.90 ns | 0.508 ns | 0.820 ns | 0.69 | 0.02 | - | - | - | - | - // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 256 | 20.40 ns | 0.133 ns | 0.111 ns | 0.57 | 0.01 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 73.39 ns | 0.310 ns | 0.259 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | 2. AVX | Empty | 512 | 26.10 ns | 0.418 ns | 0.391 ns | 0.36 | 0.01 | - | - | - | - | - // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 512 | 27.59 ns | 0.556 ns | 0.571 ns | 0.38 | 0.01 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 150.64 ns | 2.903 ns | 2.716 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | 2. AVX | Empty | 1024 | 38.67 ns | 0.801 ns | 1.889 ns | 0.24 | 0.02 | - | - | - | - | - // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 47.13 ns | 0.948 ns | 1.054 ns | 0.31 | 0.01 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 315.29 ns | 5.206 ns | 6.583 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | 2. AVX | Empty | 2048 | 57.37 ns | 1.152 ns | 1.078 ns | 0.18 | 0.01 | - | - | - | - | - // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 65.75 ns | 1.198 ns | 1.600 ns | 0.21 | 0.01 | - | - | - | - | + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:| + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 62.91 ns | 1.240 ns | 1.569 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 96 | 44.34 ns | 0.371 ns | 0.329 ns | 0.70 | 0.02 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 96 | 44.46 ns | 0.617 ns | 0.515 ns | 0.70 | 0.02 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 247.93 ns | 2.640 ns | 2.470 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 384 | 92.91 ns | 1.204 ns | 1.127 ns | 0.37 | 0.01 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 384 | 91.42 ns | 1.234 ns | 1.094 ns | 0.37 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 444.79 ns | 5.094 ns | 4.254 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 768 | 162.92 ns | 1.046 ns | 0.873 ns | 0.37 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 768 | 166.22 ns | 1.728 ns | 1.443 ns | 0.37 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 882.51 ns | 6.936 ns | 5.792 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 1536 | 309.72 ns | 3.777 ns | 3.533 ns | 0.35 | 0.01 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 323.18 ns | 4.079 ns | 3.816 ns | 0.37 | 0.00 | - | - | - | - | } diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs new file mode 100644 index 0000000000..b64379959d --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs @@ -0,0 +1,68 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using BenchmarkDotNet.Attributes; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class Shuffle4Slice3Channel + { + private static readonly byte Control = default(WXYZShuffle4).Control; + private byte[] source; + private byte[] destination; + + [GlobalSetup] + public void Setup() + { + this.source = new byte[this.Count]; + new Random(this.Count).NextBytes(this.source); + this.destination = new byte[(int)(this.Count * (3 / 4F))]; + } + + [Params(128, 256, 512, 1024, 2048)] + public int Count { get; set; } + + [Benchmark] + public void Shuffle4Slice3() + { + SimdUtils.Shuffle4Slice3(this.source, this.destination, Control); + } + } + + // 2020-10-29 + // ########## + // + // BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1) + // Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores + // .NET Core SDK=3.1.403 + // [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // + // Runtime=.NET Core 3.1 + // + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |---------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|----------:|------:|--------:|------:|------:|------:|----------:| + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 50.09 ns | 1.018 ns | 1.460 ns | 49.16 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 128 | 35.28 ns | 0.106 ns | 0.089 ns | 35.30 ns | 0.69 | 0.02 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 128 | 35.13 ns | 0.247 ns | 0.231 ns | 35.22 ns | 0.69 | 0.02 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 101.48 ns | 0.875 ns | 0.819 ns | 101.60 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 256 | 53.25 ns | 0.518 ns | 0.433 ns | 53.21 ns | 0.52 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 256 | 57.21 ns | 0.508 ns | 0.451 ns | 57.38 ns | 0.56 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 202.53 ns | 0.884 ns | 0.827 ns | 202.40 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 512 | 82.55 ns | 0.418 ns | 0.391 ns | 82.59 ns | 0.41 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 512 | 82.89 ns | 1.057 ns | 0.989 ns | 82.48 ns | 0.41 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 398.79 ns | 7.807 ns | 6.921 ns | 395.67 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 1024 | 144.51 ns | 1.033 ns | 0.966 ns | 144.42 ns | 0.36 | 0.01 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 143.77 ns | 0.820 ns | 0.684 ns | 143.62 ns | 0.36 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 798.44 ns | 4.447 ns | 3.472 ns | 799.39 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 2048 | 277.12 ns | 1.723 ns | 1.612 ns | 276.93 ns | 0.35 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 275.70 ns | 1.796 ns | 1.500 ns | 275.51 ns | 0.35 | 0.00 | - | - | - | - || +} diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs index 4a2512fea2..86b1f766e1 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs @@ -10,7 +10,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk [Config(typeof(Config.HwIntrinsics_SSE_AVX))] public class ShuffleFloat4Channel { - private static readonly byte control = default(WXYZShuffle4).Control; + private static readonly byte Control = default(WXYZShuffle4).Control; private float[] source; private float[] destination; @@ -27,7 +27,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk [Benchmark] public void Shuffle4Channel() { - SimdUtils.Shuffle4(this.source, this.destination, control); + SimdUtils.Shuffle4(this.source, this.destination, Control); } } diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs index 1c456e5a22..f801cd28b5 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -120,6 +120,29 @@ namespace SixLabors.ImageSharp.Tests.Common HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE); } + [Theory] + [MemberData(nameof(ArraySizesDivisibleBy4))] + public void BulkShuffle4Slice3Channel(int count) + { + static void RunTest(string serialized) + { + // No need to test multiple shuffle controls as the + // pipeline is always the same. + int size = FeatureTestRunner.Deserialize(serialized); + byte control = default(WZYXShuffle4).Control; + + TestShuffle4Slice3Channel( + size, + (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, control), + control); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + count, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE); + } + private static void TestShuffleFloat4Channel( int count, Action, Memory> convert, @@ -210,6 +233,47 @@ namespace SixLabors.ImageSharp.Tests.Common convert(source, result); + for (int i = 0; i < expected.Length; i++) + { + Assert.Equal(expected[i], result[i]); + } + + Assert.Equal(expected, result); + } + + private static void TestShuffle4Slice3Channel( + int count, + Action, Memory> convert, + byte control) + { + byte[] source = new byte[count]; + new Random(count).NextBytes(source); + + var result = new byte[(int)(count * (3 / 4F))]; + + byte[] expected = new byte[result.Length]; + + SimdUtils.Shuffle.InverseMmShuffle( + control, + out int _, + out int p2, + out int p1, + out int p0); + + for (int i = 0, j = 0; i < expected.Length; i += 3, j += 4) + { + expected[i] = source[p0 + j]; + expected[i + 1] = source[p1 + j]; + expected[i + 2] = source[p2 + j]; + } + + convert(source, result); + + for (int i = 0; i < expected.Length; i++) + { + Assert.Equal(expected[i], result[i]); + } + Assert.Equal(expected, result); } } From 5d401ac902ef753316d0d404d2af69de84092d81 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 30 Oct 2020 23:22:17 +0000 Subject: [PATCH 086/104] Cleanup --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 29b569a809..5cba631ec9 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -363,18 +363,6 @@ namespace SixLabors.ImageSharp Shuffle.MmShuffleSpan(ref bytes, control); Vector128 vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); - // var control = MmShuffle(3, 0, 1, 2); - // Span bytes = stackalloc byte[Vector128.Count]; - // MmShuffleSpan(ref bytes, control); - // Vector128 vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); - // - // Vector128 s0 = Vector128.Create((byte)1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1).Dump("s0"); - // Vector128 padded = Ssse3.Shuffle(s0, padMask).Dump("padded"); - // - // padded = Sse3.Or(Vector128.Create(0xff000000u).AsByte(), padded).Dump("0r"); - // - // var shuffled = Ssse3.Shuffle(padded, vcm).Dump("shuffled"); - // var d0 = Ssse3.Shuffle(shuffled, sliceMask).Dump("d0"); fixed (byte* sBase = &source.GetPinnableReference()) fixed (byte* dBase = &dest.GetPinnableReference()) { From bc647a402ace71e64a4c9b32944cafdd7359950f Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sat, 31 Oct 2020 00:23:38 +0000 Subject: [PATCH 087/104] fix spans directly --- src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 5cba631ec9..d6e45026b7 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -327,8 +327,8 @@ namespace SixLabors.ImageSharp Shuffle.MmShuffleSpan(ref bytes, control); Vector128 vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); - fixed (byte* sBase = &source.GetPinnableReference()) - fixed (byte* dBase = &dest.GetPinnableReference()) + fixed (byte* sBase = source) + fixed (byte* dBase = dest) { byte* s = sBase; byte* d = dBase; @@ -363,8 +363,8 @@ namespace SixLabors.ImageSharp Shuffle.MmShuffleSpan(ref bytes, control); Vector128 vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); - fixed (byte* sBase = &source.GetPinnableReference()) - fixed (byte* dBase = &dest.GetPinnableReference()) + fixed (byte* sBase = source) + fixed (byte* dBase = dest) { byte* s = sBase; byte* d = dBase; From d933ed6480992a81744fdd8322f57adf0e0d173c Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sat, 31 Oct 2020 18:58:59 +0000 Subject: [PATCH 088/104] Faster Pad3Shuffle4 --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 58 ++++++++++--------- .../Common/Helpers/SimdUtils.Shuffle.cs | 2 +- .../Color/Bulk/Pad3Shuffle4Channel.cs | 34 +++++------ .../Common/SimdUtilsTests.Shuffle.cs | 4 +- 4 files changed, 52 insertions(+), 46 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index d6e45026b7..5083a3c03d 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -101,20 +101,20 @@ namespace SixLabors.ImageSharp { if (Ssse3.IsSupported) { - int remainder = ImageMaths.ModuloP2(source.Length, Vector128.Count); + int remainder = source.Length % (Vector128.Count * 3); - int adjustedCount = source.Length - remainder; - int sourceSlice = (int)(adjustedCount * (3 / 4F)); + int sourceCount = source.Length - remainder; + int destCount = (int)(sourceCount * (4 / 3D)); - if (adjustedCount > 0) + if (sourceCount > 0) { Pad3Shuffle4( - source.Slice(0, adjustedCount), - dest.Slice(0, adjustedCount), + source.Slice(0, sourceCount), + dest.Slice(0, destCount), control); - source = source.Slice(sourceSlice); - dest = dest.Slice(adjustedCount); + source = source.Slice(sourceCount); + dest = dest.Slice(destCount); } } } @@ -320,31 +320,37 @@ namespace SixLabors.ImageSharp { if (Ssse3.IsSupported) { - Vector128 wMask = Vector128.Create(0xff000000u).AsByte(); - Vector128 padMask = Vector128.Create(0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11, -1).AsByte(); + Vector128 vmask = Vector128.Create(0, 1, 2, 0x80, 3, 4, 5, 0x80, 6, 7, 8, 0x80, 9, 10, 11, 0x80).AsByte(); + Vector128 vfill = Vector128.Create(0xff000000ff000000ul).AsByte(); Span bytes = stackalloc byte[Vector128.Count]; Shuffle.MmShuffleSpan(ref bytes, control); - Vector128 vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); - fixed (byte* sBase = source) - fixed (byte* dBase = dest) + ref Vector128 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector128 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + int n = source.Length / Vector128.Count; + + for (int i = 0, j = 0; i < n; i += 3, j += 4) { - byte* s = sBase; - byte* d = dBase; + ref Vector128 v0 = ref Unsafe.Add(ref sourceBase, i); + Vector128 v1 = Unsafe.Add(ref v0, 1); + Vector128 v2 = Unsafe.Add(ref v0, 2); + Vector128 v3 = Sse2.ShiftRightLogical128BitLane(v2, 4); - // TODO: Consider unrolling and shuffling 4 at a time using Ssse3.AlignRight - // See https://stackoverflow.com/questions/2973708/fast-24-bit-array-32-bit-array-conversion - for (int i = 0; i < source.Length; i += 16) - { - Vector128 vs0 = Sse2.LoadVector128(s); - Vector128 val = Sse2.Or(wMask, Ssse3.Shuffle(vs0, padMask)); - val = Ssse3.Shuffle(val, vcm); - Sse2.Store(d, val); + v2 = Ssse3.AlignRight(v2, v1, 8); + v1 = Ssse3.AlignRight(v1, v0, 12); - s += 12; - d += 16; - } + ref Vector128 vd = ref Unsafe.Add(ref destBase, j); + + vd = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v0, vmask), vfill), vshuffle); + Unsafe.Add(ref vd, 1) = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v1, vmask), vfill), vshuffle); + Unsafe.Add(ref vd, 2) = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v2, vmask), vfill), vshuffle); + Unsafe.Add(ref vd, 3) = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v3, vmask), vfill), vshuffle); } } } diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index f3946361b1..54ca2a73ec 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -183,7 +183,7 @@ namespace SixLabors.ImageSharp "Output span must be divisable by 4!"); DebugGuard.IsTrue( - source.Length == (int)(dest.Length * 3 / 4F), + source.Length == (int)(dest.Length * 3 / 4D), nameof(source), "Input span must be 3/4 the length of the output span!"); } diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs index 8286fea0e5..9eb1e109be 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs @@ -44,21 +44,21 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk // // Runtime=.NET Core 3.1 // - // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | - // |------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:| - // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 62.91 ns | 1.240 ns | 1.569 ns | 1.00 | 0.00 | - | - | - | - | - // | Pad3Shuffle4 | 2. AVX | Empty | 96 | 44.34 ns | 0.371 ns | 0.329 ns | 0.70 | 0.02 | - | - | - | - | - // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 96 | 44.46 ns | 0.617 ns | 0.515 ns | 0.70 | 0.02 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 247.93 ns | 2.640 ns | 2.470 ns | 1.00 | 0.00 | - | - | - | - | - // | Pad3Shuffle4 | 2. AVX | Empty | 384 | 92.91 ns | 1.204 ns | 1.127 ns | 0.37 | 0.01 | - | - | - | - | - // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 384 | 91.42 ns | 1.234 ns | 1.094 ns | 0.37 | 0.01 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 444.79 ns | 5.094 ns | 4.254 ns | 1.00 | 0.00 | - | - | - | - | - // | Pad3Shuffle4 | 2. AVX | Empty | 768 | 162.92 ns | 1.046 ns | 0.873 ns | 0.37 | 0.00 | - | - | - | - | - // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 768 | 166.22 ns | 1.728 ns | 1.443 ns | 0.37 | 0.00 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 882.51 ns | 6.936 ns | 5.792 ns | 1.00 | 0.00 | - | - | - | - | - // | Pad3Shuffle4 | 2. AVX | Empty | 1536 | 309.72 ns | 3.777 ns | 3.533 ns | 0.35 | 0.01 | - | - | - | - | - // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 323.18 ns | 4.079 ns | 3.816 ns | 0.37 | 0.00 | - | - | - | - | + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |------------- |------------------- |-------------------------------------------------- |------ |----------:|----------:|---------:|------:|--------:|------:|------:|------:|----------:| + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 71.84 ns | 1.491 ns | 3.146 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 96 | 23.14 ns | 0.186 ns | 0.165 ns | 0.33 | 0.02 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 96 | 22.94 ns | 0.127 ns | 0.112 ns | 0.33 | 0.02 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 230.90 ns | 0.877 ns | 0.777 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 384 | 39.54 ns | 0.601 ns | 0.533 ns | 0.17 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 384 | 39.88 ns | 0.657 ns | 0.548 ns | 0.17 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 451.67 ns | 2.932 ns | 2.448 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 768 | 60.79 ns | 1.200 ns | 1.561 ns | 0.13 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 768 | 62.30 ns | 0.469 ns | 0.416 ns | 0.14 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 896.11 ns | 10.358 ns | 9.689 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 1536 | 109.72 ns | 2.149 ns | 2.300 ns | 0.12 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 108.70 ns | 0.994 ns | 0.881 ns | 0.12 | 0.00 | - | - | - | - | } diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs index f801cd28b5..26f85dd76c 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -212,7 +212,7 @@ namespace SixLabors.ImageSharp.Tests.Common byte[] source = new byte[count]; new Random(count).NextBytes(source); - var result = new byte[(int)(count * (4 / 3F))]; + var result = new byte[(int)(count * (4 / 3D))]; byte[] expected = new byte[result.Length]; @@ -249,7 +249,7 @@ namespace SixLabors.ImageSharp.Tests.Common byte[] source = new byte[count]; new Random(count).NextBytes(source); - var result = new byte[(int)(count * (3 / 4F))]; + var result = new byte[(int)(count * (3 / 4D))]; byte[] expected = new byte[result.Length]; From 0392e082ab8345614e8c39482f44e29130eff0e9 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sat, 31 Oct 2020 21:35:23 +0000 Subject: [PATCH 089/104] Faster Shuffle4Slice3 --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 88 ++++++++++++------- .../Common/Helpers/SimdUtils.Shuffle.cs | 4 +- 2 files changed, 56 insertions(+), 36 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 5083a3c03d..abda6c4df6 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -104,7 +104,7 @@ namespace SixLabors.ImageSharp int remainder = source.Length % (Vector128.Count * 3); int sourceCount = source.Length - remainder; - int destCount = (int)(sourceCount * (4 / 3D)); + int destCount = sourceCount * 4 / 3; if (sourceCount > 0) { @@ -134,20 +134,20 @@ namespace SixLabors.ImageSharp { if (Ssse3.IsSupported) { - int remainder = ImageMaths.ModuloP2(dest.Length, Vector128.Count); + int remainder = source.Length % (Vector128.Count * 4); - int adjustedCount = dest.Length - remainder; - int destSlice = (int)(adjustedCount * (3 / 4F)); + int sourceCount = source.Length - remainder; + int destCount = sourceCount * 3 / 4; - if (adjustedCount > 0) + if (sourceCount > 0) { Shuffle4Slice3( - source.Slice(0, adjustedCount), - dest.Slice(0, adjustedCount), + source.Slice(0, sourceCount), + dest.Slice(0, destCount), control); - source = source.Slice(adjustedCount); - dest = dest.Slice(destSlice); + source = source.Slice(sourceCount); + dest = dest.Slice(destCount); } } } @@ -243,7 +243,7 @@ namespace SixLabors.ImageSharp // We can add static ROS instances if need be in the future. Span bytes = stackalloc byte[Vector256.Count]; Shuffle.MmShuffleSpan(ref bytes, control); - Vector256 vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + Vector256 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); @@ -260,17 +260,17 @@ namespace SixLabors.ImageSharp ref Vector256 vs0 = ref Unsafe.Add(ref sourceBase, i); ref Vector256 vd0 = ref Unsafe.Add(ref destBase, i); - vd0 = Avx2.Shuffle(vs0, vcm); - Unsafe.Add(ref vd0, 1) = Avx2.Shuffle(Unsafe.Add(ref vs0, 1), vcm); - Unsafe.Add(ref vd0, 2) = Avx2.Shuffle(Unsafe.Add(ref vs0, 2), vcm); - Unsafe.Add(ref vd0, 3) = Avx2.Shuffle(Unsafe.Add(ref vs0, 3), vcm); + vd0 = Avx2.Shuffle(vs0, vshuffle); + Unsafe.Add(ref vd0, 1) = Avx2.Shuffle(Unsafe.Add(ref vs0, 1), vshuffle); + Unsafe.Add(ref vd0, 2) = Avx2.Shuffle(Unsafe.Add(ref vs0, 2), vshuffle); + Unsafe.Add(ref vd0, 3) = Avx2.Shuffle(Unsafe.Add(ref vs0, 3), vshuffle); } if (m > 0) { for (int i = u; i < n; i++) { - Unsafe.Add(ref destBase, i) = Avx2.Shuffle(Unsafe.Add(ref sourceBase, i), vcm); + Unsafe.Add(ref destBase, i) = Avx2.Shuffle(Unsafe.Add(ref sourceBase, i), vshuffle); } } } @@ -279,7 +279,7 @@ namespace SixLabors.ImageSharp // Ssse3 Span bytes = stackalloc byte[Vector128.Count]; Shuffle.MmShuffleSpan(ref bytes, control); - Vector128 vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); ref Vector128 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); @@ -296,17 +296,17 @@ namespace SixLabors.ImageSharp ref Vector128 vs0 = ref Unsafe.Add(ref sourceBase, i); ref Vector128 vd0 = ref Unsafe.Add(ref destBase, i); - vd0 = Ssse3.Shuffle(vs0, vcm); - Unsafe.Add(ref vd0, 1) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 1), vcm); - Unsafe.Add(ref vd0, 2) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 2), vcm); - Unsafe.Add(ref vd0, 3) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 3), vcm); + vd0 = Ssse3.Shuffle(vs0, vshuffle); + Unsafe.Add(ref vd0, 1) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 1), vshuffle); + Unsafe.Add(ref vd0, 2) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 2), vshuffle); + Unsafe.Add(ref vd0, 3) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 3), vshuffle); } if (m > 0) { for (int i = u; i < n; i++) { - Unsafe.Add(ref destBase, i) = Ssse3.Shuffle(Unsafe.Add(ref sourceBase, i), vcm); + Unsafe.Add(ref destBase, i) = Ssse3.Shuffle(Unsafe.Add(ref sourceBase, i), vshuffle); } } } @@ -363,26 +363,46 @@ namespace SixLabors.ImageSharp { if (Ssse3.IsSupported) { - Vector128 sliceMask = Vector128.Create(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, -1, -1, -1, -1).AsByte(); + Vector128 vmasko = Vector128.Create(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15).AsByte(); + Vector128 vmaske = Ssse3.AlignRight(vmasko, vmasko, 12).AsByte(); Span bytes = stackalloc byte[Vector128.Count]; Shuffle.MmShuffleSpan(ref bytes, control); - Vector128 vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + + ref Vector128 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); - fixed (byte* sBase = source) - fixed (byte* dBase = dest) + ref Vector128 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + int n = source.Length / Vector128.Count; + + for (int i = 0, j = 0; i < n; i += 4, j += 3) { - byte* s = sBase; - byte* d = dBase; + ref Vector128 v0 = ref Unsafe.Add(ref sourceBase, i); + Vector128 v1 = Unsafe.Add(ref v0, 1); + Vector128 v2 = Unsafe.Add(ref v0, 2); + Vector128 v3 = Unsafe.Add(ref v0, 3); - for (int i = 0; i < source.Length; i += 16) - { - Vector128 vs0 = Ssse3.Shuffle(Sse2.LoadVector128(s), vcm); - Sse2.Store(d, Ssse3.Shuffle(vs0, sliceMask)); + v0 = Ssse3.Shuffle(Ssse3.Shuffle(v0, vshuffle), vmaske); + v1 = Ssse3.Shuffle(Ssse3.Shuffle(v1, vshuffle), vmasko); + v2 = Ssse3.Shuffle(Ssse3.Shuffle(v2, vshuffle), vmaske); + v3 = Ssse3.Shuffle(Ssse3.Shuffle(v3, vshuffle), vmasko); - s += 16; - d += 12; - } + v0 = Ssse3.AlignRight(v1, v0, 4); + v3 = Ssse3.AlignRight(v3, v2, 12); + + v1 = Sse2.ShiftLeftLogical128BitLane(v1, 4); + v2 = Sse2.ShiftRightLogical128BitLane(v2, 4); + + v1 = Ssse3.AlignRight(v2, v1, 8); + + ref Vector128 vd = ref Unsafe.Add(ref destBase, j); + + vd = v0; + Unsafe.Add(ref vd, 1) = v1; + Unsafe.Add(ref vd, 2) = v3; } } } diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index 54ca2a73ec..61c1ce48e2 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -183,7 +183,7 @@ namespace SixLabors.ImageSharp "Output span must be divisable by 4!"); DebugGuard.IsTrue( - source.Length == (int)(dest.Length * 3 / 4D), + source.Length == dest.Length * 3 / 4, nameof(source), "Input span must be 3/4 the length of the output span!"); } @@ -202,7 +202,7 @@ namespace SixLabors.ImageSharp "Output span must be divisable by 3!"); DebugGuard.IsTrue( - source.Length == (int)(dest.Length * 4 / 3F), + source.Length == dest.Length * 4 / 3, nameof(source), "Output span must be 3/4 the length of the input span!"); } From becb9f11379bf54776341b3651a0f750ac4cfa8f Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sat, 31 Oct 2020 21:38:45 +0000 Subject: [PATCH 090/104] Update benchmark --- .../Color/Bulk/Shuffle4Slice3Channel.cs | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs index b64379959d..e0fbe1c0b3 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs @@ -44,25 +44,25 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk // // Runtime=.NET Core 3.1 // - // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | - // |---------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|----------:|------:|--------:|------:|------:|------:|----------:| - // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 50.09 ns | 1.018 ns | 1.460 ns | 49.16 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | 2. AVX | Empty | 128 | 35.28 ns | 0.106 ns | 0.089 ns | 35.30 ns | 0.69 | 0.02 | - | - | - | - | - // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 128 | 35.13 ns | 0.247 ns | 0.231 ns | 35.22 ns | 0.69 | 0.02 | - | - | - | - | - // | | | | | | | | | | | | | | | - // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 101.48 ns | 0.875 ns | 0.819 ns | 101.60 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | 2. AVX | Empty | 256 | 53.25 ns | 0.518 ns | 0.433 ns | 53.21 ns | 0.52 | 0.00 | - | - | - | - | - // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 256 | 57.21 ns | 0.508 ns | 0.451 ns | 57.38 ns | 0.56 | 0.01 | - | - | - | - | - // | | | | | | | | | | | | | | | - // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 202.53 ns | 0.884 ns | 0.827 ns | 202.40 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | 2. AVX | Empty | 512 | 82.55 ns | 0.418 ns | 0.391 ns | 82.59 ns | 0.41 | 0.00 | - | - | - | - | - // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 512 | 82.89 ns | 1.057 ns | 0.989 ns | 82.48 ns | 0.41 | 0.00 | - | - | - | - | - // | | | | | | | | | | | | | | | - // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 398.79 ns | 7.807 ns | 6.921 ns | 395.67 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | 2. AVX | Empty | 1024 | 144.51 ns | 1.033 ns | 0.966 ns | 144.42 ns | 0.36 | 0.01 | - | - | - | - | - // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 143.77 ns | 0.820 ns | 0.684 ns | 143.62 ns | 0.36 | 0.01 | - | - | - | - | - // | | | | | | | | | | | | | | | - // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 798.44 ns | 4.447 ns | 3.472 ns | 799.39 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | 2. AVX | Empty | 2048 | 277.12 ns | 1.723 ns | 1.612 ns | 276.93 ns | 0.35 | 0.00 | - | - | - | - | - // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 275.70 ns | 1.796 ns | 1.500 ns | 275.51 ns | 0.35 | 0.00 | - | - | - | - || + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |--------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|------:|------:|------:|----------:| + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 52.24 ns | 1.081 ns | 1.062 ns | 1.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 128 | 25.52 ns | 0.189 ns | 0.158 ns | 0.49 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 128 | 26.11 ns | 0.524 ns | 0.644 ns | 0.50 | - | - | - | - | + // | | | | | | | | | | | | | + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 101.09 ns | 0.733 ns | 0.612 ns | 1.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 256 | 32.65 ns | 0.674 ns | 1.198 ns | 0.33 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 256 | 32.76 ns | 0.656 ns | 0.853 ns | 0.32 | - | - | - | - | + // | | | | | | | | | | | | | + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 209.58 ns | 3.826 ns | 5.957 ns | 1.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 512 | 46.32 ns | 0.729 ns | 1.296 ns | 0.22 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 512 | 46.97 ns | 0.196 ns | 0.183 ns | 0.22 | - | - | - | - | + // | | | | | | | | | | | | | + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 406.39 ns | 7.493 ns | 6.257 ns | 1.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 1024 | 74.53 ns | 1.509 ns | 1.678 ns | 0.18 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 74.04 ns | 0.703 ns | 0.657 ns | 0.18 | - | - | - | - | + // | | | | | | | | | | | | | + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 796.80 ns | 6.476 ns | 5.741 ns | 1.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 2048 | 130.70 ns | 2.512 ns | 2.227 ns | 0.16 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 129.42 ns | 2.555 ns | 2.133 ns | 0.16 | - | - | - | - | } From 50e30c3c42dc9a24a37d4c07e341c633f3602352 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 1 Nov 2020 23:36:12 +0000 Subject: [PATCH 091/104] Fast fallbacks --- .../{ => Shuffle}/IComponentShuffle.cs | 40 ++-- .../Common/Helpers/Shuffle/IPad3Shuffle4.cs | 96 ++++++++++ .../Common/Helpers/Shuffle/IShuffle4Slice3.cs | 74 ++++++++ .../Common/Helpers/SimdUtils.Shuffle.cs | 69 +++---- .../PixelFormats/Utils/PixelConverter.cs | 118 ++++++++++++ .../Color/Bulk/Pad3Shuffle4Channel.cs | 61 ++++-- .../Color/Bulk/Shuffle4Slice3Channel.cs | 71 ++++--- .../Common/SimdUtilsTests.Shuffle.cs | 173 +++++++++++------- 8 files changed, 541 insertions(+), 161 deletions(-) rename src/ImageSharp/Common/Helpers/{ => Shuffle}/IComponentShuffle.cs (85%) create mode 100644 src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs create mode 100644 src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs diff --git a/src/ImageSharp/Common/Helpers/IComponentShuffle.cs b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs similarity index 85% rename from src/ImageSharp/Common/Helpers/IComponentShuffle.cs rename to src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs index e354a57b00..803321d06d 100644 --- a/src/ImageSharp/Common/Helpers/IComponentShuffle.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs @@ -30,13 +30,20 @@ namespace SixLabors.ImageSharp internal readonly struct DefaultShuffle4 : IComponentShuffle { + private readonly byte p3; + private readonly byte p2; + private readonly byte p1; + private readonly byte p0; + public DefaultShuffle4(byte p3, byte p2, byte p1, byte p0) - : this(SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0)) { + this.p3 = p3; + this.p2 = p2; + this.p1 = p1; + this.p0 = p0; + this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0); } - public DefaultShuffle4(byte control) => this.Control = control; - public byte Control { get; } [MethodImpl(InliningOptions.ShortMethod)] @@ -44,12 +51,11 @@ namespace SixLabors.ImageSharp { ref byte sBase = ref MemoryMarshal.GetReference(source); ref byte dBase = ref MemoryMarshal.GetReference(dest); - SimdUtils.Shuffle.InverseMmShuffle( - this.Control, - out int p3, - out int p2, - out int p1, - out int p0); + + int p3 = this.p3; + int p2 = this.p2; + int p1 = this.p1; + int p0 = this.p0; for (int i = 0; i < source.Length; i += 4) { @@ -63,7 +69,9 @@ namespace SixLabors.ImageSharp internal readonly struct WXYZShuffle4 : IComponentShuffle { - public byte Control => SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3); + private static readonly byte WXYZ = SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3); + + public byte Control => WXYZ; [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) @@ -89,7 +97,9 @@ namespace SixLabors.ImageSharp internal readonly struct WZYXShuffle4 : IComponentShuffle { - public byte Control => SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3); + private static readonly byte WZYX = SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3); + + public byte Control => WZYX; [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) @@ -112,7 +122,9 @@ namespace SixLabors.ImageSharp internal readonly struct YZWXShuffle4 : IComponentShuffle { - public byte Control => SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1); + private static readonly byte YZWX = SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1); + + public byte Control => YZWX; [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) @@ -135,7 +147,9 @@ namespace SixLabors.ImageSharp internal readonly struct ZYXWShuffle4 : IComponentShuffle { - public byte Control => SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2); + private static readonly byte ZYXW = SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2); + + public byte Control => ZYXW; [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs new file mode 100644 index 0000000000..97bd5aa725 --- /dev/null +++ b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs @@ -0,0 +1,96 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace SixLabors.ImageSharp +{ + /// + internal interface IPad3Shuffle4 : IComponentShuffle + { + } + + internal readonly struct DefaultPad3Shuffle4 : IPad3Shuffle4 + { + private readonly byte p3; + private readonly byte p2; + private readonly byte p1; + private readonly byte p0; + + public DefaultPad3Shuffle4(byte p3, byte p2, byte p1, byte p0) + { + this.p3 = p3; + this.p2 = p2; + this.p1 = p1; + this.p0 = p0; + this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0); + } + + public byte Control { get; } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); + + int p3 = this.p3; + int p2 = this.p2; + int p1 = this.p1; + int p0 = this.p0; + + Span temp = stackalloc byte[4]; + ref byte t = ref MemoryMarshal.GetReference(temp); + ref uint tu = ref Unsafe.As(ref t); + + for (int i = 0, j = 0; i < source.Length; i += 3, j += 4) + { + ref var s = ref Unsafe.Add(ref sBase, i); + tu = Unsafe.As(ref s) | 0xFF000000; + + Unsafe.Add(ref dBase, j) = Unsafe.Add(ref t, p0); + Unsafe.Add(ref dBase, j + 1) = Unsafe.Add(ref t, p1); + Unsafe.Add(ref dBase, j + 2) = Unsafe.Add(ref t, p2); + Unsafe.Add(ref dBase, j + 3) = Unsafe.Add(ref t, p3); + } + } + } + + internal readonly struct XYZWPad3Shuffle4 : IPad3Shuffle4 + { + private static readonly byte XYZW = SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0); + + public byte Control => XYZW; + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref byte rs = ref MemoryMarshal.GetReference(source); + ref byte rd = ref MemoryMarshal.GetReference(dest); + + ref byte rsEnd = ref Unsafe.Add(ref rs, source.Length); + ref byte rsLoopEnd = ref Unsafe.Subtract(ref rsEnd, 4); + + while (Unsafe.IsAddressLessThan(ref rs, ref rsLoopEnd)) + { + Unsafe.As(ref rd) = Unsafe.As(ref rs) | 0xFF000000; + + rs = ref Unsafe.Add(ref rs, 3); + rd = ref Unsafe.Add(ref rd, 4); + } + + while (Unsafe.IsAddressLessThan(ref rs, ref rsEnd)) + { + Unsafe.Add(ref rd, 0) = Unsafe.Add(ref rs, 0); + Unsafe.Add(ref rd, 1) = Unsafe.Add(ref rs, 1); + Unsafe.Add(ref rd, 2) = Unsafe.Add(ref rs, 2); + Unsafe.Add(ref rd, 3) = byte.MaxValue; + + rs = ref Unsafe.Add(ref rs, 3); + rd = ref Unsafe.Add(ref rd, 4); + } + } + } +} diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs new file mode 100644 index 0000000000..c65c50f684 --- /dev/null +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs @@ -0,0 +1,74 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace SixLabors.ImageSharp +{ + /// + internal interface IShuffle4Slice3 : IComponentShuffle + { + } + + internal readonly struct DefaultShuffle4Slice3 : IShuffle4Slice3 + { + private readonly byte p2; + private readonly byte p1; + private readonly byte p0; + + public DefaultShuffle4Slice3(byte p3, byte p2, byte p1, byte p0) + { + this.p2 = p2; + this.p1 = p1; + this.p0 = p0; + this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0); + } + + public byte Control { get; } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); + + int p2 = this.p2; + int p1 = this.p1; + int p0 = this.p0; + + for (int i = 0, j = 0; i < dest.Length; i += 3, j += 4) + { + Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + j); + Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + j); + Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + j); + } + } + } + + internal readonly struct XYZWShuffle4Slice3 : IShuffle4Slice3 + { + private static readonly byte XYZW = SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0); + + public byte Control => XYZW; + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); + ref byte dBase = ref MemoryMarshal.GetReference(dest); + + int n = source.Length / 4; + for (int i = 0, j = 0; i < n; i++, j += 3) + { + Unsafe.As(ref Unsafe.Add(ref dBase, j)) = Unsafe.As(ref Unsafe.Add(ref sBase, i)); + } + } + } + + [StructLayout(LayoutKind.Explicit, Size = 3)] + internal readonly struct Xyz24 + { + } +} diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index 61c1ce48e2..7ef3be6fe3 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -63,45 +63,61 @@ namespace SixLabors.ImageSharp } } + /// + /// Pads then shuffles 8-bit integers within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The type of shuffle to perform. [MethodImpl(InliningOptions.ShortMethod)] - public static void Pad3Shuffle4( + public static void Pad3Shuffle4( ReadOnlySpan source, Span dest, - byte control) + TShuffle shuffle) + where TShuffle : struct, IPad3Shuffle4 { VerifyPad3Shuffle4SpanInput(source, dest); #if SUPPORTS_RUNTIME_INTRINSICS - HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref dest, control); + HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref dest, shuffle.Control); #endif // Deal with the remainder: if (source.Length > 0) { - Pad3Shuffle4Remainder(source, dest, control); + shuffle.RunFallbackShuffle(source, dest); } } + /// + /// Shuffles then slices 8-bit integers within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The type of shuffle to perform. [MethodImpl(InliningOptions.ShortMethod)] - public static void Shuffle4Slice3( + public static void Shuffle4Slice3( ReadOnlySpan source, Span dest, - byte control) + TShuffle shuffle) + where TShuffle : struct, IShuffle4Slice3 { VerifyShuffle4Slice3SpanInput(source, dest); #if SUPPORTS_RUNTIME_INTRINSICS - HwIntrinsics.Shuffle4Slice3Reduce(ref source, ref dest, control); + HwIntrinsics.Shuffle4Slice3Reduce(ref source, ref dest, shuffle.Control); #endif // Deal with the remainder: if (source.Length > 0) { - Shuffle4Slice3Remainder(source, dest, control); + shuffle.RunFallbackShuffle(source, dest); } } - public static void Shuffle4Remainder( + private static void Shuffle4Remainder( ReadOnlySpan source, Span dest, byte control) @@ -119,41 +135,6 @@ namespace SixLabors.ImageSharp } } - public static void Pad3Shuffle4Remainder( - ReadOnlySpan source, - Span dest, - byte control) - { - ref byte sBase = ref MemoryMarshal.GetReference(source); - ref byte dBase = ref MemoryMarshal.GetReference(dest); - Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0); - - for (int i = 0, j = 0; i < dest.Length; i += 4, j += 3) - { - Unsafe.Add(ref dBase, p0 + i) = Unsafe.Add(ref sBase, j); - Unsafe.Add(ref dBase, p1 + i) = Unsafe.Add(ref sBase, j + 1); - Unsafe.Add(ref dBase, p2 + i) = Unsafe.Add(ref sBase, j + 2); - Unsafe.Add(ref dBase, p3 + i) = byte.MaxValue; - } - } - - public static void Shuffle4Slice3Remainder( - ReadOnlySpan source, - Span dest, - byte control) - { - ref byte sBase = ref MemoryMarshal.GetReference(source); - ref byte dBase = ref MemoryMarshal.GetReference(dest); - Shuffle.InverseMmShuffle(control, out int _, out int p2, out int p1, out int p0); - - for (int i = 0, j = 0; i < dest.Length; i += 3, j += 4) - { - Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + j); - Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + j); - Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + j); - } - } - [Conditional("DEBUG")] private static void VerifyShuffleSpanInput(ReadOnlySpan source, Span dest) where T : struct diff --git a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs index 5afd369be3..c5f92648c0 100644 --- a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs +++ b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs @@ -37,6 +37,24 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgra32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToRgb24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Slice3(source, dest, default); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToBgr24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(3, 0, 1, 2)); } public static class FromArgb32 @@ -58,6 +76,24 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgra32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToRgb24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(0, 3, 2, 1)); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToBgr24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(0, 1, 2, 3)); } public static class FromBgra32 @@ -79,6 +115,88 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgba32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToRgb24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(3, 0, 1, 2)); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToBgr24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Slice3(source, dest, default); + } + + public static class FromRgb24 + { + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToRgba32(ReadOnlySpan source, Span dest) + => SimdUtils.Pad3Shuffle4(source, dest, default); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToArgb32(ReadOnlySpan source, Span dest) + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(2, 1, 0, 3)); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToBgra32(ReadOnlySpan source, Span dest) + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(3, 0, 1, 2)); + + // TODO: Bgr24 + } + + public static class FromBgr24 + { + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToArgb32(ReadOnlySpan source, Span dest) + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(0, 1, 2, 3)); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToRgba32(ReadOnlySpan source, Span dest) + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(3, 0, 1, 2)); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToBgra32(ReadOnlySpan source, Span dest) + => SimdUtils.Pad3Shuffle4(source, dest, default); + + // TODO: Rgb24 } } } diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs index 9eb1e109be..4af0286054 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs @@ -9,7 +9,8 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk [Config(typeof(Config.HwIntrinsics_SSE_AVX))] public class Pad3Shuffle4Channel { - private static readonly byte Control = default(WXYZShuffle4).Control; + private static readonly DefaultPad3Shuffle4 Control = new DefaultPad3Shuffle4(1, 0, 3, 2); + private static readonly XYZWPad3Shuffle4 ControlFast = default; private byte[] source; private byte[] destination; @@ -18,7 +19,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk { this.source = new byte[this.Count]; new Random(this.Count).NextBytes(this.source); - this.destination = new byte[(int)(this.Count * (4 / 3F))]; + this.destination = new byte[this.Count * 4 / 3]; } [Params(96, 384, 768, 1536)] @@ -29,6 +30,12 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk { SimdUtils.Pad3Shuffle4(this.source, this.destination, Control); } + + [Benchmark] + public void Pad3Shuffle4FastFallback() + { + SimdUtils.Pad3Shuffle4(this.source, this.destination, ControlFast); + } } // 2020-10-30 @@ -44,21 +51,37 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk // // Runtime=.NET Core 3.1 // - // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | - // |------------- |------------------- |-------------------------------------------------- |------ |----------:|----------:|---------:|------:|--------:|------:|------:|------:|----------:| - // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 71.84 ns | 1.491 ns | 3.146 ns | 1.00 | 0.00 | - | - | - | - | - // | Pad3Shuffle4 | 2. AVX | Empty | 96 | 23.14 ns | 0.186 ns | 0.165 ns | 0.33 | 0.02 | - | - | - | - | - // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 96 | 22.94 ns | 0.127 ns | 0.112 ns | 0.33 | 0.02 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 230.90 ns | 0.877 ns | 0.777 ns | 1.00 | 0.00 | - | - | - | - | - // | Pad3Shuffle4 | 2. AVX | Empty | 384 | 39.54 ns | 0.601 ns | 0.533 ns | 0.17 | 0.00 | - | - | - | - | - // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 384 | 39.88 ns | 0.657 ns | 0.548 ns | 0.17 | 0.00 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 451.67 ns | 2.932 ns | 2.448 ns | 1.00 | 0.00 | - | - | - | - | - // | Pad3Shuffle4 | 2. AVX | Empty | 768 | 60.79 ns | 1.200 ns | 1.561 ns | 0.13 | 0.00 | - | - | - | - | - // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 768 | 62.30 ns | 0.469 ns | 0.416 ns | 0.14 | 0.00 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 896.11 ns | 10.358 ns | 9.689 ns | 1.00 | 0.00 | - | - | - | - | - // | Pad3Shuffle4 | 2. AVX | Empty | 1536 | 109.72 ns | 2.149 ns | 2.300 ns | 0.12 | 0.00 | - | - | - | - | - // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 108.70 ns | 0.994 ns | 0.881 ns | 0.12 | 0.00 | - | - | - | - | + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |------------------------- |------------------- |-------------------------------------------------- |------ |------------:|----------:|----------:|------------:|------:|--------:|------:|------:|------:|----------:| + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 120.64 ns | 7.190 ns | 21.200 ns | 114.26 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 96 | 23.63 ns | 0.175 ns | 0.155 ns | 23.65 ns | 0.15 | 0.01 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 96 | 25.25 ns | 0.356 ns | 0.298 ns | 25.27 ns | 0.17 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 14.80 ns | 0.358 ns | 1.032 ns | 14.64 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 2. AVX | Empty | 96 | 24.84 ns | 0.376 ns | 0.333 ns | 24.74 ns | 1.57 | 0.06 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 96 | 24.58 ns | 0.471 ns | 0.704 ns | 24.38 ns | 1.60 | 0.09 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 258.92 ns | 4.873 ns | 4.069 ns | 257.95 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 384 | 41.41 ns | 0.859 ns | 1.204 ns | 41.33 ns | 0.16 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 384 | 40.74 ns | 0.848 ns | 0.793 ns | 40.48 ns | 0.16 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 74.50 ns | 0.490 ns | 0.383 ns | 74.49 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 2. AVX | Empty | 384 | 40.74 ns | 0.624 ns | 0.584 ns | 40.72 ns | 0.55 | 0.01 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 384 | 38.28 ns | 0.534 ns | 0.417 ns | 38.22 ns | 0.51 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 503.91 ns | 6.466 ns | 6.048 ns | 501.58 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 768 | 62.86 ns | 0.332 ns | 0.277 ns | 62.80 ns | 0.12 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 768 | 64.59 ns | 0.469 ns | 0.415 ns | 64.62 ns | 0.13 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 110.51 ns | 0.592 ns | 0.554 ns | 110.33 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 2. AVX | Empty | 768 | 64.72 ns | 1.306 ns | 1.090 ns | 64.51 ns | 0.59 | 0.01 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 768 | 62.11 ns | 0.816 ns | 0.682 ns | 61.98 ns | 0.56 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 1,005.84 ns | 13.176 ns | 12.325 ns | 1,004.70 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 1536 | 110.05 ns | 0.256 ns | 0.214 ns | 110.04 ns | 0.11 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 110.23 ns | 0.545 ns | 0.483 ns | 110.09 ns | 0.11 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 220.37 ns | 1.601 ns | 1.419 ns | 220.13 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 2. AVX | Empty | 1536 | 111.54 ns | 2.173 ns | 2.901 ns | 111.27 ns | 0.51 | 0.01 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 110.23 ns | 0.456 ns | 0.427 ns | 110.25 ns | 0.50 | 0.00 | - | - | - | - | } diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs index e0fbe1c0b3..9cf24ccd69 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs @@ -9,7 +9,8 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk [Config(typeof(Config.HwIntrinsics_SSE_AVX))] public class Shuffle4Slice3Channel { - private static readonly byte Control = default(WXYZShuffle4).Control; + private static readonly DefaultShuffle4Slice3 Control = new DefaultShuffle4Slice3(1, 0, 3, 2); + private static readonly XYZWShuffle4Slice3 ControlFast = default; private byte[] source; private byte[] destination; @@ -29,6 +30,12 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk { SimdUtils.Shuffle4Slice3(this.source, this.destination, Control); } + + [Benchmark] + public void Shuffle4Slice3FastFallback() + { + SimdUtils.Shuffle4Slice3(this.source, this.destination, ControlFast); + } } // 2020-10-29 @@ -44,25 +51,45 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk // // Runtime=.NET Core 3.1 // - // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated | - // |--------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|------:|------:|------:|----------:| - // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 52.24 ns | 1.081 ns | 1.062 ns | 1.00 | - | - | - | - | - // | Shuffle4Slice3 | 2. AVX | Empty | 128 | 25.52 ns | 0.189 ns | 0.158 ns | 0.49 | - | - | - | - | - // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 128 | 26.11 ns | 0.524 ns | 0.644 ns | 0.50 | - | - | - | - | - // | | | | | | | | | | | | | - // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 101.09 ns | 0.733 ns | 0.612 ns | 1.00 | - | - | - | - | - // | Shuffle4Slice3 | 2. AVX | Empty | 256 | 32.65 ns | 0.674 ns | 1.198 ns | 0.33 | - | - | - | - | - // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 256 | 32.76 ns | 0.656 ns | 0.853 ns | 0.32 | - | - | - | - | - // | | | | | | | | | | | | | - // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 209.58 ns | 3.826 ns | 5.957 ns | 1.00 | - | - | - | - | - // | Shuffle4Slice3 | 2. AVX | Empty | 512 | 46.32 ns | 0.729 ns | 1.296 ns | 0.22 | - | - | - | - | - // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 512 | 46.97 ns | 0.196 ns | 0.183 ns | 0.22 | - | - | - | - | - // | | | | | | | | | | | | | - // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 406.39 ns | 7.493 ns | 6.257 ns | 1.00 | - | - | - | - | - // | Shuffle4Slice3 | 2. AVX | Empty | 1024 | 74.53 ns | 1.509 ns | 1.678 ns | 0.18 | - | - | - | - | - // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 74.04 ns | 0.703 ns | 0.657 ns | 0.18 | - | - | - | - | - // | | | | | | | | | | | | | - // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 796.80 ns | 6.476 ns | 5.741 ns | 1.00 | - | - | - | - | - // | Shuffle4Slice3 | 2. AVX | Empty | 2048 | 130.70 ns | 2.512 ns | 2.227 ns | 0.16 | - | - | - | - | - // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 129.42 ns | 2.555 ns | 2.133 ns | 0.16 | - | - | - | - | + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |--------------------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|----------:|------:|--------:|------:|------:|------:|----------:| + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 56.44 ns | 2.843 ns | 8.382 ns | 56.70 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 128 | 27.15 ns | 0.556 ns | 0.762 ns | 27.34 ns | 0.41 | 0.03 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 128 | 26.36 ns | 0.321 ns | 0.268 ns | 26.26 ns | 0.38 | 0.02 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 25.85 ns | 0.494 ns | 0.462 ns | 25.84 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 2. AVX | Empty | 128 | 26.15 ns | 0.113 ns | 0.106 ns | 26.16 ns | 1.01 | 0.02 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 128 | 25.57 ns | 0.078 ns | 0.061 ns | 25.56 ns | 0.99 | 0.02 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 97.47 ns | 0.327 ns | 0.289 ns | 97.35 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 256 | 32.61 ns | 0.107 ns | 0.095 ns | 32.62 ns | 0.33 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 256 | 33.21 ns | 0.169 ns | 0.150 ns | 33.15 ns | 0.34 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 52.34 ns | 0.779 ns | 0.729 ns | 51.94 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 2. AVX | Empty | 256 | 32.16 ns | 0.111 ns | 0.104 ns | 32.16 ns | 0.61 | 0.01 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 256 | 33.61 ns | 0.342 ns | 0.319 ns | 33.62 ns | 0.64 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 210.74 ns | 3.825 ns | 5.956 ns | 207.70 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 512 | 51.03 ns | 0.535 ns | 0.501 ns | 51.18 ns | 0.24 | 0.01 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 512 | 66.60 ns | 1.313 ns | 1.613 ns | 65.93 ns | 0.31 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 119.12 ns | 1.905 ns | 1.689 ns | 118.52 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 2. AVX | Empty | 512 | 50.33 ns | 0.382 ns | 0.339 ns | 50.41 ns | 0.42 | 0.01 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 512 | 49.25 ns | 0.555 ns | 0.492 ns | 49.26 ns | 0.41 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 423.55 ns | 4.891 ns | 4.336 ns | 423.27 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 1024 | 77.13 ns | 1.355 ns | 2.264 ns | 76.19 ns | 0.19 | 0.01 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 79.39 ns | 0.103 ns | 0.086 ns | 79.37 ns | 0.19 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 226.57 ns | 2.930 ns | 2.598 ns | 226.10 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 2. AVX | Empty | 1024 | 80.25 ns | 1.647 ns | 2.082 ns | 80.98 ns | 0.35 | 0.01 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 84.99 ns | 1.234 ns | 1.155 ns | 85.60 ns | 0.38 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 794.96 ns | 1.735 ns | 1.538 ns | 795.15 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 2048 | 128.41 ns | 0.417 ns | 0.390 ns | 128.24 ns | 0.16 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 127.24 ns | 0.294 ns | 0.229 ns | 127.23 ns | 0.16 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 382.97 ns | 1.064 ns | 0.831 ns | 382.87 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 2. AVX | Empty | 2048 | 126.93 ns | 0.382 ns | 0.339 ns | 126.94 ns | 0.33 | 0.00 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 149.36 ns | 1.875 ns | 1.754 ns | 149.33 ns | 0.39 | 0.00 | - | - | - | - | } diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs index 26f85dd76c..29f3925fc9 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -39,56 +39,51 @@ namespace SixLabors.ImageSharp.Tests.Common static void RunTest(string serialized) { int size = FeatureTestRunner.Deserialize(serialized); - foreach (var item in ArraySizesDivisibleBy4) - { - // These cannot be expressed as a theory as you cannot - // use RemoteExecutor within generic methods nor pass - // IComponentShuffle to the generic utils method. - foreach (var count in item) - { - WXYZShuffle4 wxyz = default; - TestShuffleByte4Channel( - size, - (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wxyz), - wxyz.Control); - - WZYXShuffle4 wzyx = default; - TestShuffleByte4Channel( - size, - (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wzyx), - wzyx.Control); - - YZWXShuffle4 yzwx = default; - TestShuffleByte4Channel( - size, - (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yzwx), - yzwx.Control); - - ZYXWShuffle4 zyxw = default; - TestShuffleByte4Channel( - size, - (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, zyxw), - zyxw.Control); - - var xwyz = new DefaultShuffle4(2, 1, 3, 0); - TestShuffleByte4Channel( - size, - (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, xwyz), - xwyz.Control); - - var yyyy = new DefaultShuffle4(1, 1, 1, 1); - TestShuffleByte4Channel( - size, - (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yyyy), - yyyy.Control); - - var wwww = new DefaultShuffle4(3, 3, 3, 3); - TestShuffleByte4Channel( - size, - (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wwww), - wwww.Control); - } - } + + // These cannot be expressed as a theory as you cannot + // use RemoteExecutor within generic methods nor pass + // IComponentShuffle to the generic utils method. + WXYZShuffle4 wxyz = default; + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wxyz), + wxyz.Control); + + WZYXShuffle4 wzyx = default; + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wzyx), + wzyx.Control); + + YZWXShuffle4 yzwx = default; + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yzwx), + yzwx.Control); + + ZYXWShuffle4 zyxw = default; + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, zyxw), + zyxw.Control); + + var xwyz = new DefaultShuffle4(2, 1, 3, 0); + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, xwyz), + xwyz.Control); + + var yyyy = new DefaultShuffle4(1, 1, 1, 1); + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yyyy), + yyyy.Control); + + var wwww = new DefaultShuffle4(3, 3, 3, 3); + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wwww), + wwww.Control); } FeatureTestRunner.RunWithHwIntrinsicsFeature( @@ -103,21 +98,40 @@ namespace SixLabors.ImageSharp.Tests.Common { static void RunTest(string serialized) { - // No need to test multiple shuffle controls as the - // pipeline is always the same. int size = FeatureTestRunner.Deserialize(serialized); - byte control = default(WZYXShuffle4).Control; + // These cannot be expressed as a theory as you cannot + // use RemoteExecutor within generic methods nor pass + // IComponentShuffle to the generic utils method. + XYZWPad3Shuffle4 xyzw = default; TestPad3Shuffle4Channel( size, - (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, control), - control); + (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, xyzw), + xyzw.Control); + + var xwyz = new DefaultPad3Shuffle4(2, 1, 3, 0); + TestPad3Shuffle4Channel( + size, + (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, xwyz), + xwyz.Control); + + var yyyy = new DefaultPad3Shuffle4(1, 1, 1, 1); + TestPad3Shuffle4Channel( + size, + (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, yyyy), + yyyy.Control); + + var wwww = new DefaultPad3Shuffle4(3, 3, 3, 3); + TestPad3Shuffle4Channel( + size, + (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, wwww), + wwww.Control); } FeatureTestRunner.RunWithHwIntrinsicsFeature( RunTest, count, - HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE); + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE); } [Theory] @@ -126,15 +140,34 @@ namespace SixLabors.ImageSharp.Tests.Common { static void RunTest(string serialized) { - // No need to test multiple shuffle controls as the - // pipeline is always the same. int size = FeatureTestRunner.Deserialize(serialized); - byte control = default(WZYXShuffle4).Control; + // These cannot be expressed as a theory as you cannot + // use RemoteExecutor within generic methods nor pass + // IComponentShuffle to the generic utils method. + XYZWShuffle4Slice3 xyzw = default; TestShuffle4Slice3Channel( size, - (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, control), - control); + (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, xyzw), + xyzw.Control); + + var xwyz = new DefaultShuffle4Slice3(2, 1, 3, 0); + TestShuffle4Slice3Channel( + size, + (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, xwyz), + xwyz.Control); + + var yyyy = new DefaultShuffle4Slice3(1, 1, 1, 1); + TestShuffle4Slice3Channel( + size, + (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, yyyy), + yyyy.Control); + + var wwww = new DefaultShuffle4Slice3(3, 3, 3, 3); + TestShuffle4Slice3Channel( + size, + (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, wwww), + wwww.Control); } FeatureTestRunner.RunWithHwIntrinsicsFeature( @@ -212,7 +245,7 @@ namespace SixLabors.ImageSharp.Tests.Common byte[] source = new byte[count]; new Random(count).NextBytes(source); - var result = new byte[(int)(count * (4 / 3D))]; + var result = new byte[count * 4 / 3]; byte[] expected = new byte[result.Length]; @@ -231,6 +264,20 @@ namespace SixLabors.ImageSharp.Tests.Common expected[p3 + i] = byte.MaxValue; } + Span temp = stackalloc byte[4]; + for (int i = 0, j = 0; i < expected.Length; i += 4, j += 3) + { + temp[0] = source[j]; + temp[1] = source[j + 1]; + temp[2] = source[j + 2]; + temp[3] = byte.MaxValue; + + expected[i] = temp[p0]; + expected[i + 1] = temp[p1]; + expected[i + 2] = temp[p2]; + expected[i + 3] = temp[p3]; + } + convert(source, result); for (int i = 0; i < expected.Length; i++) @@ -249,7 +296,7 @@ namespace SixLabors.ImageSharp.Tests.Common byte[] source = new byte[count]; new Random(count).NextBytes(source); - var result = new byte[(int)(count * (3 / 4D))]; + var result = new byte[count * 3 / 4]; byte[] expected = new byte[result.Length]; From 6209c3c8cfb846a91105242253e5702acca0cc07 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 1 Nov 2020 23:51:38 +0000 Subject: [PATCH 092/104] Don't cast full spans --- .../Helpers/Shuffle/IComponentShuffle.cs | 44 +++++++++---------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs index 803321d06d..3f045a5799 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs @@ -6,6 +6,9 @@ using System.Buffers.Binary; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +// The JIT can detect and optimize rotation idioms ROTL (Rotate Left) +// and ROTR (Rotate Right) emitting efficient CPU instructions: +// https://github.com/dotnet/coreclr/pull/1830 namespace SixLabors.ImageSharp { /// @@ -76,15 +79,11 @@ namespace SixLabors.ImageSharp [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) { - ReadOnlySpan s = MemoryMarshal.Cast(source); - Span d = MemoryMarshal.Cast(dest); - ref uint sBase = ref MemoryMarshal.GetReference(s); - ref uint dBase = ref MemoryMarshal.GetReference(d); - - // The JIT can detect and optimize rotation idioms ROTL (Rotate Left) - // and ROTR (Rotate Right) emitting efficient CPU instructions: - // https://github.com/dotnet/coreclr/pull/1830 - for (int i = 0; i < s.Length; i++) + ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); + ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + int n = source.Length / 4; + + for (int i = 0; i < n; i++) { uint packed = Unsafe.Add(ref sBase, i); @@ -104,12 +103,11 @@ namespace SixLabors.ImageSharp [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) { - ReadOnlySpan s = MemoryMarshal.Cast(source); - Span d = MemoryMarshal.Cast(dest); - ref uint sBase = ref MemoryMarshal.GetReference(s); - ref uint dBase = ref MemoryMarshal.GetReference(d); + ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); + ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + int n = source.Length / 4; - for (int i = 0; i < s.Length; i++) + for (int i = 0; i < n; i++) { uint packed = Unsafe.Add(ref sBase, i); @@ -129,12 +127,11 @@ namespace SixLabors.ImageSharp [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) { - ReadOnlySpan s = MemoryMarshal.Cast(source); - Span d = MemoryMarshal.Cast(dest); - ref uint sBase = ref MemoryMarshal.GetReference(s); - ref uint dBase = ref MemoryMarshal.GetReference(d); + ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); + ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + int n = source.Length / 4; - for (int i = 0; i < s.Length; i++) + for (int i = 0; i < n; i++) { uint packed = Unsafe.Add(ref sBase, i); @@ -154,12 +151,11 @@ namespace SixLabors.ImageSharp [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) { - ReadOnlySpan s = MemoryMarshal.Cast(source); - Span d = MemoryMarshal.Cast(dest); - ref uint sBase = ref MemoryMarshal.GetReference(s); - ref uint dBase = ref MemoryMarshal.GetReference(d); + ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); + ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + int n = source.Length / 4; - for (int i = 0; i < s.Length; i++) + for (int i = 0; i < n; i++) { uint packed = Unsafe.Add(ref sBase, i); From b010a15012c267c7bb7c5f9b75d3c5844751e21b Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 2 Nov 2020 01:52:06 +0000 Subject: [PATCH 093/104] Shuffle3 + Tests --- .../Helpers/Shuffle/IComponentShuffle.cs | 5 + .../Common/Helpers/Shuffle/IPad3Shuffle4.cs | 5 + .../Common/Helpers/Shuffle/IShuffle3.cs | 90 ++++++++++++++++ .../Common/Helpers/Shuffle/IShuffle4Slice3.cs | 12 +-- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 102 +++++++++++++++++- .../Common/Helpers/SimdUtils.Shuffle.cs | 48 ++++++++- .../Common/SimdUtilsTests.Shuffle.cs | 76 ++++++++++++- 7 files changed, 323 insertions(+), 15 deletions(-) create mode 100644 src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs index 3f045a5799..1a4c6ab446 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs @@ -40,6 +40,11 @@ namespace SixLabors.ImageSharp public DefaultShuffle4(byte p3, byte p2, byte p1, byte p0) { + Guard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); + Guard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); + Guard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); + Guard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); + this.p3 = p3; this.p2 = p2; this.p1 = p1; diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs index 97bd5aa725..b223a6bc27 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs @@ -21,6 +21,11 @@ namespace SixLabors.ImageSharp public DefaultPad3Shuffle4(byte p3, byte p2, byte p1, byte p0) { + Guard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); + Guard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); + Guard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); + Guard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); + this.p3 = p3; this.p2 = p2; this.p1 = p1; diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs new file mode 100644 index 0000000000..fa4260e63d --- /dev/null +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs @@ -0,0 +1,90 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace SixLabors.ImageSharp +{ + /// + internal interface IShuffle3 : IComponentShuffle + { + } + + internal readonly struct DefaultShuffle3 : IShuffle3 + { + private readonly byte p2; + private readonly byte p1; + private readonly byte p0; + + public DefaultShuffle3(byte p2, byte p1, byte p0) + { + Guard.MustBeBetweenOrEqualTo(p2, 0, 2, nameof(p2)); + Guard.MustBeBetweenOrEqualTo(p1, 0, 2, nameof(p1)); + Guard.MustBeBetweenOrEqualTo(p0, 0, 2, nameof(p0)); + + this.p2 = p2; + this.p1 = p1; + this.p0 = p0; + this.Control = SimdUtils.Shuffle.MmShuffle(3, p2, p1, p0); + } + + public byte Control { get; } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); + + int p2 = this.p2; + int p1 = this.p1; + int p0 = this.p0; + + for (int i = 0; i < source.Length; i += 3) + { + Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i); + Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i); + Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i); + } + } + } + + internal readonly struct ZYXShuffle3 : IShuffle3 + { + private static readonly byte ZYX = SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2); + + public byte Control => ZYX; + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref Byte3 sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); + ref Byte3 dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + int n = source.Length / 3; + + for (int i = 0; i < n; i++) + { + uint packed = Unsafe.As(ref Unsafe.Add(ref sBase, i)); + + // packed = [W Z Y X] + // tmp1 = [W 0 Y 0] + // tmp2 = [0 Z 0 X] + // tmp3=ROTL(16, tmp2) = [0 X 0 Z] + // tmp1 + tmp3 = [W X Y Z] + uint tmp1 = packed & 0xFF00FF00; + uint tmp2 = packed & 0x00FF00FF; + uint tmp3 = (tmp2 << 16) | (tmp2 >> 16); + packed = tmp1 + tmp3; + + Unsafe.Add(ref dBase, i) = Unsafe.As(ref packed); + } + } + } + + [StructLayout(LayoutKind.Explicit, Size = 3)] + internal readonly struct Byte3 + { + } +} diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs index c65c50f684..1ceb38f1a4 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs @@ -20,6 +20,11 @@ namespace SixLabors.ImageSharp public DefaultShuffle4Slice3(byte p3, byte p2, byte p1, byte p0) { + Guard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); + Guard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); + Guard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); + Guard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); + this.p2 = p2; this.p1 = p1; this.p0 = p0; @@ -62,13 +67,8 @@ namespace SixLabors.ImageSharp int n = source.Length / 4; for (int i = 0, j = 0; i < n; i++, j += 3) { - Unsafe.As(ref Unsafe.Add(ref dBase, j)) = Unsafe.As(ref Unsafe.Add(ref sBase, i)); + Unsafe.As(ref Unsafe.Add(ref dBase, j)) = Unsafe.As(ref Unsafe.Add(ref sBase, i)); } } } - - [StructLayout(LayoutKind.Explicit, Size = 3)] - internal readonly struct Xyz24 - { - } } diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index abda6c4df6..974516c3e5 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -86,6 +86,38 @@ namespace SixLabors.ImageSharp } } + /// + /// Shuffles 8-bit integer triplets within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The byte control. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle3Reduce( + ref ReadOnlySpan source, + ref Span dest, + byte control) + { + if (Ssse3.IsSupported) + { + int remainder = source.Length % (Vector128.Count * 3); + + int adjustedCount = source.Length - remainder; + + if (adjustedCount > 0) + { + Shuffle3( + source.Slice(0, adjustedCount), + dest.Slice(0, adjustedCount), + control); + + source = source.Slice(adjustedCount); + dest = dest.Slice(adjustedCount); + } + } + } + /// /// Pads then shuffles 8-bit integers within 128-bit lanes in /// using the control and store the results in . @@ -94,7 +126,7 @@ namespace SixLabors.ImageSharp /// The destination span of bytes. /// The byte control. [MethodImpl(InliningOptions.ShortMethod)] - public static unsafe void Pad3Shuffle4Reduce( + public static void Pad3Shuffle4Reduce( ref ReadOnlySpan source, ref Span dest, byte control) @@ -127,7 +159,7 @@ namespace SixLabors.ImageSharp /// The destination span of bytes. /// The byte control. [MethodImpl(InliningOptions.ShortMethod)] - public static unsafe void Shuffle4Slice3Reduce( + public static void Shuffle4Slice3Reduce( ref ReadOnlySpan source, ref Span dest, byte control) @@ -313,7 +345,69 @@ namespace SixLabors.ImageSharp } [MethodImpl(InliningOptions.ShortMethod)] - private static unsafe void Pad3Shuffle4( + private static void Shuffle3( + ReadOnlySpan source, + Span dest, + byte control) + { + if (Ssse3.IsSupported) + { + Vector128 vmask = Vector128.Create(0, 1, 2, 0x80, 3, 4, 5, 0x80, 6, 7, 8, 0x80, 9, 10, 11, 0x80).AsByte(); + Vector128 vfill = Vector128.Create(0xff000000ff000000ul).AsByte(); + Vector128 vmasko = Vector128.Create(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15).AsByte(); + Vector128 vmaske = Ssse3.AlignRight(vmasko, vmasko, 12).AsByte(); + + Span bytes = stackalloc byte[Vector128.Count]; + Shuffle.MmShuffleSpan(ref bytes, control); + Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + + ref Vector128 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector128 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + int n = source.Length / Vector128.Count; + + for (int i = 0; i < n; i += 3) + { + ref Vector128 v0 = ref Unsafe.Add(ref sourceBase, i); + Vector128 v1 = Unsafe.Add(ref v0, 1); + Vector128 v2 = Unsafe.Add(ref v0, 2); + Vector128 v3 = Sse2.ShiftRightLogical128BitLane(v2, 4); + + v2 = Ssse3.AlignRight(v2, v1, 8); + v1 = Ssse3.AlignRight(v1, v0, 12); + + v0 = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v0, vmask), vfill), vshuffle); + v1 = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v1, vmask), vfill), vshuffle); + v2 = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v2, vmask), vfill), vshuffle); + v3 = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v3, vmask), vfill), vshuffle); + + v0 = Ssse3.Shuffle(v0, vmaske); + v1 = Ssse3.Shuffle(v1, vmasko); + v2 = Ssse3.Shuffle(v2, vmaske); + v3 = Ssse3.Shuffle(v3, vmasko); + + v0 = Ssse3.AlignRight(v1, v0, 4); + v3 = Ssse3.AlignRight(v3, v2, 12); + + v1 = Sse2.ShiftLeftLogical128BitLane(v1, 4); + v2 = Sse2.ShiftRightLogical128BitLane(v2, 4); + + v1 = Ssse3.AlignRight(v2, v1, 8); + + ref Vector128 vd = ref Unsafe.Add(ref destBase, i); + + vd = v0; + Unsafe.Add(ref vd, 1) = v1; + Unsafe.Add(ref vd, 2) = v3; + } + } + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void Pad3Shuffle4( ReadOnlySpan source, Span dest, byte control) @@ -356,7 +450,7 @@ namespace SixLabors.ImageSharp } [MethodImpl(InliningOptions.ShortMethod)] - private static unsafe void Shuffle4Slice3( + private static void Shuffle4Slice3( ReadOnlySpan source, Span dest, byte control) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index 7ef3be6fe3..79cb0da372 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -23,7 +23,7 @@ namespace SixLabors.ImageSharp Span dest, byte control) { - VerifyShuffleSpanInput(source, dest); + VerifyShuffle4SpanInput(source, dest); #if SUPPORTS_RUNTIME_INTRINSICS HwIntrinsics.Shuffle4Reduce(ref source, ref dest, control); @@ -50,7 +50,7 @@ namespace SixLabors.ImageSharp TShuffle shuffle) where TShuffle : struct, IComponentShuffle { - VerifyShuffleSpanInput(source, dest); + VerifyShuffle4SpanInput(source, dest); #if SUPPORTS_RUNTIME_INTRINSICS HwIntrinsics.Shuffle4Reduce(ref source, ref dest, shuffle.Control); @@ -63,6 +63,33 @@ namespace SixLabors.ImageSharp } } + /// + /// Shuffle 8-bit integer triplets within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The type of shuffle to perform. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle3( + ReadOnlySpan source, + Span dest, + TShuffle shuffle) + where TShuffle : struct, IShuffle3 + { + VerifyShuffle3SpanInput(source, dest); + +#if SUPPORTS_RUNTIME_INTRINSICS + HwIntrinsics.Shuffle3Reduce(ref source, ref dest, shuffle.Control); +#endif + + // Deal with the remainder: + if (source.Length > 0) + { + shuffle.RunFallbackShuffle(source, dest); + } + } + /// /// Pads then shuffles 8-bit integers within 128-bit lanes in /// using the control and store the results in . @@ -136,7 +163,7 @@ namespace SixLabors.ImageSharp } [Conditional("DEBUG")] - private static void VerifyShuffleSpanInput(ReadOnlySpan source, Span dest) + private static void VerifyShuffle4SpanInput(ReadOnlySpan source, Span dest) where T : struct { DebugGuard.IsTrue( @@ -150,6 +177,21 @@ namespace SixLabors.ImageSharp "Input spans must be divisable by 4!"); } + [Conditional("DEBUG")] + private static void VerifyShuffle3SpanInput(ReadOnlySpan source, Span dest) + where T : struct + { + DebugGuard.IsTrue( + source.Length == dest.Length, + nameof(source), + "Input spans must be of same length!"); + + DebugGuard.IsTrue( + source.Length % 3 == 0, + nameof(source), + "Input spans must be divisable by 3!"); + } + [Conditional("DEBUG")] private static void VerifyPad3Shuffle4SpanInput(ReadOnlySpan source, Span dest) { diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs index 29f3925fc9..75d7c87299 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -92,6 +92,48 @@ namespace SixLabors.ImageSharp.Tests.Common HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE); } + [Theory] + [MemberData(nameof(ArraySizesDivisibleBy3))] + public void BulkShuffleByte3Channel(int count) + { + static void RunTest(string serialized) + { + int size = FeatureTestRunner.Deserialize(serialized); + + // These cannot be expressed as a theory as you cannot + // use RemoteExecutor within generic methods nor pass + // IShuffle3 to the generic utils method. + ZYXShuffle3 zyx = default; + TestShuffleByte3Channel( + size, + (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, zyx), + zyx.Control); + + var xyz = new DefaultShuffle3(2, 1, 0); + TestShuffleByte3Channel( + size, + (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, xyz), + xyz.Control); + + var yyy = new DefaultShuffle3(1, 1, 1); + TestShuffleByte3Channel( + size, + (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, yyy), + yyy.Control); + + var zzz = new DefaultShuffle3(2, 2, 2); + TestShuffleByte3Channel( + size, + (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, zzz), + zzz.Control); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + count, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE); + } + [Theory] [MemberData(nameof(ArraySizesDivisibleBy3))] public void BulkPad3Shuffle4Channel(int count) @@ -102,7 +144,7 @@ namespace SixLabors.ImageSharp.Tests.Common // These cannot be expressed as a theory as you cannot // use RemoteExecutor within generic methods nor pass - // IComponentShuffle to the generic utils method. + // IPad3Shuffle4 to the generic utils method. XYZWPad3Shuffle4 xyzw = default; TestPad3Shuffle4Channel( size, @@ -144,7 +186,7 @@ namespace SixLabors.ImageSharp.Tests.Common // These cannot be expressed as a theory as you cannot // use RemoteExecutor within generic methods nor pass - // IComponentShuffle to the generic utils method. + // IShuffle4Slice3 to the generic utils method. XYZWShuffle4Slice3 xyzw = default; TestShuffle4Slice3Channel( size, @@ -237,6 +279,36 @@ namespace SixLabors.ImageSharp.Tests.Common Assert.Equal(expected, result); } + private static void TestShuffleByte3Channel( + int count, + Action, Memory> convert, + byte control) + { + byte[] source = new byte[count]; + new Random(count).NextBytes(source); + var result = new byte[count]; + + byte[] expected = new byte[count]; + + SimdUtils.Shuffle.InverseMmShuffle( + control, + out int _, + out int p2, + out int p1, + out int p0); + + for (int i = 0; i < expected.Length; i += 3) + { + expected[i] = source[p0 + i]; + expected[i + 1] = source[p1 + i]; + expected[i + 2] = source[p2 + i]; + } + + convert(source, result); + + Assert.Equal(expected, result); + } + private static void TestPad3Shuffle4Channel( int count, Action, Memory> convert, From 466048ef0d9b43a380cc11ce5e1d948d3cda7c48 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 2 Nov 2020 11:46:49 +0000 Subject: [PATCH 094/104] Cleanup and fix tests --- .../Helpers/Shuffle/IComponentShuffle.cs | 23 ++++--- .../Common/Helpers/Shuffle/IPad3Shuffle4.cs | 8 +-- .../Common/Helpers/Shuffle/IShuffle3.cs | 43 +------------ .../Common/Helpers/Shuffle/IShuffle4Slice3.cs | 13 ++-- .../Common/Helpers/SimdUtils.Shuffle.cs | 2 +- .../PixelFormats/Utils/PixelConverter.cs | 18 +++++- .../Color/Bulk/Shuffle3Channel.cs | 64 +++++++++++++++++++ .../Common/SimdUtilsTests.Shuffle.cs | 4 +- 8 files changed, 113 insertions(+), 62 deletions(-) create mode 100644 tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs index 1a4c6ab446..2056075e7c 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs @@ -31,7 +31,12 @@ namespace SixLabors.ImageSharp void RunFallbackShuffle(ReadOnlySpan source, Span dest); } - internal readonly struct DefaultShuffle4 : IComponentShuffle + /// + internal interface IShuffle4 : IComponentShuffle + { + } + + internal readonly struct DefaultShuffle4 : IShuffle4 { private readonly byte p3; private readonly byte p2; @@ -40,10 +45,10 @@ namespace SixLabors.ImageSharp public DefaultShuffle4(byte p3, byte p2, byte p1, byte p0) { - Guard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); - Guard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); - Guard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); - Guard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); + DebugGuard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); + DebugGuard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); + DebugGuard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); + DebugGuard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); this.p3 = p3; this.p2 = p2; @@ -75,7 +80,7 @@ namespace SixLabors.ImageSharp } } - internal readonly struct WXYZShuffle4 : IComponentShuffle + internal readonly struct WXYZShuffle4 : IShuffle4 { private static readonly byte WXYZ = SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3); @@ -99,7 +104,7 @@ namespace SixLabors.ImageSharp } } - internal readonly struct WZYXShuffle4 : IComponentShuffle + internal readonly struct WZYXShuffle4 : IShuffle4 { private static readonly byte WZYX = SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3); @@ -123,7 +128,7 @@ namespace SixLabors.ImageSharp } } - internal readonly struct YZWXShuffle4 : IComponentShuffle + internal readonly struct YZWXShuffle4 : IShuffle4 { private static readonly byte YZWX = SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1); @@ -147,7 +152,7 @@ namespace SixLabors.ImageSharp } } - internal readonly struct ZYXWShuffle4 : IComponentShuffle + internal readonly struct ZYXWShuffle4 : IShuffle4 { private static readonly byte ZYXW = SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2); diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs index b223a6bc27..1f3fce541d 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs @@ -21,10 +21,10 @@ namespace SixLabors.ImageSharp public DefaultPad3Shuffle4(byte p3, byte p2, byte p1, byte p0) { - Guard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); - Guard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); - Guard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); - Guard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); + DebugGuard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); + DebugGuard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); + DebugGuard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); + DebugGuard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); this.p3 = p3; this.p2 = p2; diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs index fa4260e63d..61e99890e7 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs @@ -20,9 +20,9 @@ namespace SixLabors.ImageSharp public DefaultShuffle3(byte p2, byte p1, byte p0) { - Guard.MustBeBetweenOrEqualTo(p2, 0, 2, nameof(p2)); - Guard.MustBeBetweenOrEqualTo(p1, 0, 2, nameof(p1)); - Guard.MustBeBetweenOrEqualTo(p0, 0, 2, nameof(p0)); + DebugGuard.MustBeBetweenOrEqualTo(p2, 0, 2, nameof(p2)); + DebugGuard.MustBeBetweenOrEqualTo(p1, 0, 2, nameof(p1)); + DebugGuard.MustBeBetweenOrEqualTo(p0, 0, 2, nameof(p0)); this.p2 = p2; this.p1 = p1; @@ -50,41 +50,4 @@ namespace SixLabors.ImageSharp } } } - - internal readonly struct ZYXShuffle3 : IShuffle3 - { - private static readonly byte ZYX = SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2); - - public byte Control => ZYX; - - [MethodImpl(InliningOptions.ShortMethod)] - public void RunFallbackShuffle(ReadOnlySpan source, Span dest) - { - ref Byte3 sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); - ref Byte3 dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); - int n = source.Length / 3; - - for (int i = 0; i < n; i++) - { - uint packed = Unsafe.As(ref Unsafe.Add(ref sBase, i)); - - // packed = [W Z Y X] - // tmp1 = [W 0 Y 0] - // tmp2 = [0 Z 0 X] - // tmp3=ROTL(16, tmp2) = [0 X 0 Z] - // tmp1 + tmp3 = [W X Y Z] - uint tmp1 = packed & 0xFF00FF00; - uint tmp2 = packed & 0x00FF00FF; - uint tmp3 = (tmp2 << 16) | (tmp2 >> 16); - packed = tmp1 + tmp3; - - Unsafe.Add(ref dBase, i) = Unsafe.As(ref packed); - } - } - } - - [StructLayout(LayoutKind.Explicit, Size = 3)] - internal readonly struct Byte3 - { - } } diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs index 1ceb38f1a4..c9b51aba2c 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs @@ -20,10 +20,10 @@ namespace SixLabors.ImageSharp public DefaultShuffle4Slice3(byte p3, byte p2, byte p1, byte p0) { - Guard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); - Guard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); - Guard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); - Guard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); + DebugGuard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); + DebugGuard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); + DebugGuard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); + DebugGuard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); this.p2 = p2; this.p1 = p1; @@ -71,4 +71,9 @@ namespace SixLabors.ImageSharp } } } + + [StructLayout(LayoutKind.Explicit, Size = 3)] + internal readonly struct Byte3 + { + } } diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index 79cb0da372..8fd6bcce68 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -48,7 +48,7 @@ namespace SixLabors.ImageSharp ReadOnlySpan source, Span dest, TShuffle shuffle) - where TShuffle : struct, IComponentShuffle + where TShuffle : struct, IShuffle4 { VerifyShuffle4SpanInput(source, dest); diff --git a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs index c5f92648c0..7215fa860b 100644 --- a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs +++ b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs @@ -164,7 +164,14 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils public static void ToBgra32(ReadOnlySpan source, Span dest) => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(3, 0, 1, 2)); - // TODO: Bgr24 + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToBgr24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle3(source, dest, new DefaultShuffle3(0, 1, 2)); } public static class FromBgr24 @@ -196,7 +203,14 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils public static void ToBgra32(ReadOnlySpan source, Span dest) => SimdUtils.Pad3Shuffle4(source, dest, default); - // TODO: Rgb24 + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToRgb24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle3(source, dest, new DefaultShuffle3(0, 1, 2)); } } } diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs new file mode 100644 index 0000000000..3667b973ef --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs @@ -0,0 +1,64 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using BenchmarkDotNet.Attributes; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class Shuffle3Channel + { + private static readonly DefaultShuffle3 Control = new DefaultShuffle3(1, 0, 2); + private byte[] source; + private byte[] destination; + + [GlobalSetup] + public void Setup() + { + this.source = new byte[this.Count]; + new Random(this.Count).NextBytes(this.source); + this.destination = new byte[this.Count]; + } + + [Params(96, 384, 768, 1536)] + public int Count { get; set; } + + [Benchmark] + public void Shuffle3() + { + SimdUtils.Shuffle3(this.source, this.destination, Control); + } + } + + // 2020-11-02 + // ########## + // + // BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1) + // Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores + // .NET Core SDK=3.1.403 + // [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // + // Runtime=.NET Core 3.1 + // + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |--------------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|----------:|------:|--------:|------:|------:|------:|----------:| + // | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 48.46 ns | 1.034 ns | 2.438 ns | 47.46 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle3 | 2. AVX | Empty | 96 | 32.42 ns | 0.537 ns | 0.476 ns | 32.34 ns | 0.66 | 0.04 | - | - | - | - | + // | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 96 | 32.51 ns | 0.373 ns | 0.349 ns | 32.56 ns | 0.66 | 0.03 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 199.04 ns | 1.512 ns | 1.180 ns | 199.17 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle3 | 2. AVX | Empty | 384 | 71.20 ns | 2.654 ns | 7.784 ns | 69.60 ns | 0.41 | 0.02 | - | - | - | - | + // | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 384 | 63.23 ns | 0.569 ns | 0.505 ns | 63.21 ns | 0.32 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 391.28 ns | 5.087 ns | 3.972 ns | 391.22 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle3 | 2. AVX | Empty | 768 | 109.12 ns | 2.149 ns | 2.010 ns | 108.66 ns | 0.28 | 0.01 | - | - | - | - | + // | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 768 | 106.51 ns | 0.734 ns | 0.613 ns | 106.56 ns | 0.27 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 773.70 ns | 5.516 ns | 4.890 ns | 772.96 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle3 | 2. AVX | Empty | 1536 | 190.41 ns | 1.090 ns | 0.851 ns | 190.38 ns | 0.25 | 0.00 | - | - | - | - | + // | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 190.94 ns | 0.985 ns | 0.769 ns | 190.85 ns | 0.25 | 0.00 | - | - | - | - | +} diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs index 75d7c87299..f1bfaa4ad4 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -42,7 +42,7 @@ namespace SixLabors.ImageSharp.Tests.Common // These cannot be expressed as a theory as you cannot // use RemoteExecutor within generic methods nor pass - // IComponentShuffle to the generic utils method. + // IShuffle4 to the generic utils method. WXYZShuffle4 wxyz = default; TestShuffleByte4Channel( size, @@ -103,7 +103,7 @@ namespace SixLabors.ImageSharp.Tests.Common // These cannot be expressed as a theory as you cannot // use RemoteExecutor within generic methods nor pass // IShuffle3 to the generic utils method. - ZYXShuffle3 zyx = default; + var zyx = new DefaultShuffle3(0, 1, 2); TestShuffleByte3Channel( size, (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, zyx), From f7b5a0ff79b028de37cace21774bb19eb58b1f67 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 2 Nov 2020 16:24:04 +0000 Subject: [PATCH 095/104] Fix Shuffle4Slice3, wire up shuffles. --- .../Common/Helpers/Shuffle/IShuffle4Slice3.cs | 6 +- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 18 ++- .../Argb32.PixelOperations.Generated.cs | 72 +++++---- .../Bgr24.PixelOperations.Generated.cs | 140 +++++++++++------- .../Bgra32.PixelOperations.Generated.cs | 72 +++++---- .../Rgb24.PixelOperations.Generated.cs | 129 ++++++++++------ .../Rgba32.PixelOperations.Generated.cs | 72 +++++---- .../Generated/_Common.ttinclude | 6 +- 8 files changed, 323 insertions(+), 192 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs index c9b51aba2c..70800f9de2 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs @@ -62,12 +62,12 @@ namespace SixLabors.ImageSharp public void RunFallbackShuffle(ReadOnlySpan source, Span dest) { ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); - ref byte dBase = ref MemoryMarshal.GetReference(dest); + ref Byte3 dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); int n = source.Length / 4; - for (int i = 0, j = 0; i < n; i++, j += 3) + for (int i = 0; i < n; i++) { - Unsafe.As(ref Unsafe.Add(ref dBase, j)) = Unsafe.As(ref Unsafe.Add(ref sBase, i)); + Unsafe.Add(ref dBase, i) = Unsafe.As(ref Unsafe.Add(ref sBase, i)); } } } diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 974516c3e5..296970ddcc 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -371,9 +371,11 @@ namespace SixLabors.ImageSharp for (int i = 0; i < n; i += 3) { - ref Vector128 v0 = ref Unsafe.Add(ref sourceBase, i); - Vector128 v1 = Unsafe.Add(ref v0, 1); - Vector128 v2 = Unsafe.Add(ref v0, 2); + ref Vector128 vs = ref Unsafe.Add(ref sourceBase, i); + + Vector128 v0 = vs; + Vector128 v1 = Unsafe.Add(ref vs, 1); + Vector128 v2 = Unsafe.Add(ref vs, 2); Vector128 v3 = Sse2.ShiftRightLogical128BitLane(v2, 4); v2 = Ssse3.AlignRight(v2, v1, 8); @@ -474,10 +476,12 @@ namespace SixLabors.ImageSharp for (int i = 0, j = 0; i < n; i += 4, j += 3) { - ref Vector128 v0 = ref Unsafe.Add(ref sourceBase, i); - Vector128 v1 = Unsafe.Add(ref v0, 1); - Vector128 v2 = Unsafe.Add(ref v0, 2); - Vector128 v3 = Unsafe.Add(ref v0, 3); + ref Vector128 vs = ref Unsafe.Add(ref sourceBase, i); + + Vector128 v0 = vs; + Vector128 v1 = Unsafe.Add(ref vs, 1); + Vector128 v2 = Unsafe.Add(ref vs, 2); + Vector128 v3 = Unsafe.Add(ref vs, 3); v0 = Ssse3.Shuffle(Ssse3.Shuffle(v0, vshuffle), vmaske); v1 = Ssse3.Shuffle(Ssse3.Shuffle(v1, vshuffle), vmasko); diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Argb32.PixelOperations.Generated.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Argb32.PixelOperations.Generated.cs index 3f48d2acca..d30616997c 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Argb32.PixelOperations.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Argb32.PixelOperations.Generated.cs @@ -106,23 +106,59 @@ namespace SixLabors.ImageSharp.PixelFormats Span dest = MemoryMarshal.Cast(destinationPixels); PixelConverter.FromBgra32.ToArgb32(source, dest); } + /// + public override void ToRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToRgb24(source, dest); + } /// - public override void ToBgr24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Argb32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Bgr24 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToArgb32(source, dest); + } + /// + public override void ToBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Argb32 sp = ref Unsafe.Add(ref sourceRef, i); - ref Bgr24 dp = ref Unsafe.Add(ref destRef, i); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToBgr24(source, dest); + } - dp.FromArgb32(sp); - } + /// + public override void FromBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToArgb32(source, dest); } /// @@ -197,24 +233,6 @@ namespace SixLabors.ImageSharp.PixelFormats } } - /// - public override void ToRgb24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) - { - Guard.NotNull(configuration, nameof(configuration)); - Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - - ref Argb32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Rgb24 destRef = ref MemoryMarshal.GetReference(destinationPixels); - - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Argb32 sp = ref Unsafe.Add(ref sourceRef, i); - ref Rgb24 dp = ref Unsafe.Add(ref destRef, i); - - dp.FromArgb32(sp); - } - } - /// public override void ToRgb48(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgr24.PixelOperations.Generated.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgr24.PixelOperations.Generated.cs index b73bb8b831..50d4942ecb 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgr24.PixelOperations.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgr24.PixelOperations.Generated.cs @@ -52,146 +52,182 @@ namespace SixLabors.ImageSharp.PixelFormats { Vector4Converters.RgbaCompatible.ToVector4(configuration, this, sourcePixels, destVectors, modifiers.Remove(PixelConversionModifiers.Scale | PixelConversionModifiers.Premultiply)); } - /// - public override void ToArgb32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Argb32 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToRgba32(source, dest); + } - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Argb32 dp = ref Unsafe.Add(ref destRef, i); + /// + public override void FromRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - dp.FromBgr24(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToBgr24(source, dest); } - /// - public override void ToBgra32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Bgra32 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToArgb32(source, dest); + } - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Bgra32 dp = ref Unsafe.Add(ref destRef, i); + /// + public override void FromArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - dp.FromBgr24(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToBgr24(source, dest); } - /// - public override void ToL8(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref L8 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToBgra32(source, dest); + } - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref L8 dp = ref Unsafe.Add(ref destRef, i); + /// + public override void FromBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - dp.FromBgr24(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToBgr24(source, dest); } - /// - public override void ToL16(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref L16 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToRgb24(source, dest); + } - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref L16 dp = ref Unsafe.Add(ref destRef, i); + /// + public override void FromRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - dp.FromBgr24(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToBgr24(source, dest); } /// - public override void ToLa16(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToL8(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref La16 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ref L8 destRef = ref MemoryMarshal.GetReference(destinationPixels); for (int i = 0; i < sourcePixels.Length; i++) { ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref La16 dp = ref Unsafe.Add(ref destRef, i); + ref L8 dp = ref Unsafe.Add(ref destRef, i); dp.FromBgr24(sp); } } /// - public override void ToLa32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToL16(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref La32 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ref L16 destRef = ref MemoryMarshal.GetReference(destinationPixels); for (int i = 0; i < sourcePixels.Length; i++) { ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref La32 dp = ref Unsafe.Add(ref destRef, i); + ref L16 dp = ref Unsafe.Add(ref destRef, i); dp.FromBgr24(sp); } } /// - public override void ToRgb24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToLa16(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Rgb24 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ref La16 destRef = ref MemoryMarshal.GetReference(destinationPixels); for (int i = 0; i < sourcePixels.Length; i++) { ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Rgb24 dp = ref Unsafe.Add(ref destRef, i); + ref La16 dp = ref Unsafe.Add(ref destRef, i); dp.FromBgr24(sp); } } /// - public override void ToRgba32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToLa32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Rgba32 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ref La32 destRef = ref MemoryMarshal.GetReference(destinationPixels); for (int i = 0; i < sourcePixels.Length; i++) { ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Rgba32 dp = ref Unsafe.Add(ref destRef, i); + ref La32 dp = ref Unsafe.Add(ref destRef, i); dp.FromBgr24(sp); } diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgra32.PixelOperations.Generated.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgra32.PixelOperations.Generated.cs index 8cf2d5850a..b38e5f19d6 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgra32.PixelOperations.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgra32.PixelOperations.Generated.cs @@ -106,23 +106,59 @@ namespace SixLabors.ImageSharp.PixelFormats Span dest = MemoryMarshal.Cast(destinationPixels); PixelConverter.FromArgb32.ToBgra32(source, dest); } + /// + public override void ToRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToRgb24(source, dest); + } /// - public override void ToBgr24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Bgra32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Bgr24 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToBgra32(source, dest); + } + /// + public override void ToBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Bgra32 sp = ref Unsafe.Add(ref sourceRef, i); - ref Bgr24 dp = ref Unsafe.Add(ref destRef, i); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToBgr24(source, dest); + } - dp.FromBgra32(sp); - } + /// + public override void FromBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToBgra32(source, dest); } /// @@ -197,24 +233,6 @@ namespace SixLabors.ImageSharp.PixelFormats } } - /// - public override void ToRgb24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) - { - Guard.NotNull(configuration, nameof(configuration)); - Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - - ref Bgra32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Rgb24 destRef = ref MemoryMarshal.GetReference(destinationPixels); - - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Bgra32 sp = ref Unsafe.Add(ref sourceRef, i); - ref Rgb24 dp = ref Unsafe.Add(ref destRef, i); - - dp.FromBgra32(sp); - } - } - /// public override void ToRgb48(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgb24.PixelOperations.Generated.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgb24.PixelOperations.Generated.cs index 332683fc7f..9a4173892e 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgb24.PixelOperations.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgb24.PixelOperations.Generated.cs @@ -52,59 +52,114 @@ namespace SixLabors.ImageSharp.PixelFormats { Vector4Converters.RgbaCompatible.ToVector4(configuration, this, sourcePixels, destVectors, modifiers.Remove(PixelConversionModifiers.Scale | PixelConversionModifiers.Premultiply)); } - /// - public override void ToArgb32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Rgb24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Argb32 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToRgba32(source, dest); + } - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Rgb24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Argb32 dp = ref Unsafe.Add(ref destRef, i); + /// + public override void FromRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - dp.FromRgb24(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToRgb24(source, dest); } /// - public override void ToBgr24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Rgb24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Bgr24 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToArgb32(source, dest); + } - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Rgb24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Bgr24 dp = ref Unsafe.Add(ref destRef, i); + /// + public override void FromArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - dp.FromRgb24(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToRgb24(source, dest); + } + /// + public override void ToBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToBgra32(source, dest); } /// - public override void ToBgra32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Rgb24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Bgra32 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToRgb24(source, dest); + } + /// + public override void ToBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Rgb24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Bgra32 dp = ref Unsafe.Add(ref destRef, i); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToBgr24(source, dest); + } - dp.FromRgb24(sp); - } + /// + public override void FromBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToRgb24(source, dest); } /// @@ -179,24 +234,6 @@ namespace SixLabors.ImageSharp.PixelFormats } } - /// - public override void ToRgba32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) - { - Guard.NotNull(configuration, nameof(configuration)); - Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - - ref Rgb24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Rgba32 destRef = ref MemoryMarshal.GetReference(destinationPixels); - - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Rgb24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Rgba32 dp = ref Unsafe.Add(ref destRef, i); - - dp.FromRgb24(sp); - } - } - /// public override void ToRgb48(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgba32.PixelOperations.Generated.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgba32.PixelOperations.Generated.cs index 9a36ec29a4..5b60ec10e3 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgba32.PixelOperations.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgba32.PixelOperations.Generated.cs @@ -95,23 +95,59 @@ namespace SixLabors.ImageSharp.PixelFormats Span dest = MemoryMarshal.Cast(destinationPixels); PixelConverter.FromBgra32.ToRgba32(source, dest); } + /// + public override void ToRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToRgb24(source, dest); + } /// - public override void ToBgr24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Rgba32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Bgr24 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToRgba32(source, dest); + } + /// + public override void ToBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Rgba32 sp = ref Unsafe.Add(ref sourceRef, i); - ref Bgr24 dp = ref Unsafe.Add(ref destRef, i); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToBgr24(source, dest); + } - dp.FromRgba32(sp); - } + /// + public override void FromBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToRgba32(source, dest); } /// @@ -186,24 +222,6 @@ namespace SixLabors.ImageSharp.PixelFormats } } - /// - public override void ToRgb24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) - { - Guard.NotNull(configuration, nameof(configuration)); - Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - - ref Rgba32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Rgb24 destRef = ref MemoryMarshal.GetReference(destinationPixels); - - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Rgba32 sp = ref Unsafe.Add(ref sourceRef, i); - ref Rgb24 dp = ref Unsafe.Add(ref destRef, i); - - dp.FromRgba32(sp); - } - } - /// public override void ToRgb48(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/_Common.ttinclude b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/_Common.ttinclude index d8b5286cd7..b728b01152 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/_Common.ttinclude +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/_Common.ttinclude @@ -17,7 +17,7 @@ using System.Runtime.InteropServices; <#+ static readonly string[] CommonPixelTypes = { "Argb32", "Bgr24", "Bgra32", "L8", "L16", "La16", "La32", "Rgb24", "Rgba32", "Rgb48", "Rgba64", "Bgra5551" }; - static readonly string[] Optimized32BitTypes = { "Rgba32", "Argb32", "Bgra32" }; + static readonly string[] OptimizedPixelTypes = { "Rgba32", "Argb32", "Bgra32", "Rgb24", "Bgr24" }; // Types with Rgba32-combatible to/from Vector4 conversion static readonly string[] Rgba32CompatibleTypes = { "Argb32", "Bgra32", "Rgb24", "Bgr24" }; @@ -148,8 +148,8 @@ using System.Runtime.InteropServices; GenerateRgba32CompatibleVector4ConversionMethods(pixelType, pixelType.EndsWith("32")); } - var matching32BitTypes = Optimized32BitTypes.Contains(pixelType) ? - Optimized32BitTypes.Where(p => p != pixelType) : + var matching32BitTypes = OptimizedPixelTypes.Contains(pixelType) ? + OptimizedPixelTypes.Where(p => p != pixelType) : Enumerable.Empty(); foreach (string destPixelType in matching32BitTypes) From 7e57ebd57451081d3f08974c8525694487709b0d Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 2 Nov 2020 18:14:11 +0000 Subject: [PATCH 096/104] Add Rgb24 <==> Vector4 benchmarks --- .../Color/Bulk/FromVector4.cs | 4 +- .../Color/Bulk/FromVector4_Rgb24.cs | 55 ++++++++++++++++ .../Color/Bulk/ToVector4_Rgb24.cs | 65 +++++++++++++++++++ 3 files changed, 122 insertions(+), 2 deletions(-) create mode 100644 tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4_Rgb24.cs create mode 100644 tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgb24.cs diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs index dc030e07a7..1db9147ad2 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs @@ -30,7 +30,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk protected Configuration Configuration => Configuration.Default; // [Params(64, 2048)] - [Params(1024)] + [Params(64, 256, 2048)] public int Count { get; set; } [GlobalSetup] @@ -58,7 +58,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk } } - [Benchmark] + [Benchmark(Baseline = true)] public void PixelOperations_Base() { new PixelOperations().FromVector4Destructive(this.Configuration, this.source.GetSpan(), this.destination.GetSpan()); diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4_Rgb24.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4_Rgb24.cs new file mode 100644 index 0000000000..5da6edc6b5 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4_Rgb24.cs @@ -0,0 +1,55 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.PixelFormats; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.ShortClr))] + public class FromVector4_Rgb24 : FromVector4 + { + } +} + +// 2020-11-02 +// ########## +// +// BenchmarkDotNet = v0.12.1, OS = Windows 10.0.19041.572(2004 /?/ 20H1) +// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores +// .NET Core SDK=3.1.403 +// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT +// Job-XYEQXL : .NET Framework 4.8 (4.8.4250.0), X64 RyuJIT +// Job-HSXNJV : .NET Core 2.1.23 (CoreCLR 4.6.29321.03, CoreFX 4.6.29321.01), X64 RyuJIT +// Job-YUREJO : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT +// +// IterationCount=3 LaunchCount=1 WarmupCount=3 +// +// | Method | Job | Runtime | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | +// |---------------------------- |----------- |-------------- |------ |-----------:|------------:|----------:|------:|--------:|-------:|------:|------:|----------:| +// | PixelOperations_Base | Job-XYEQXL | .NET 4.7.2 | 64 | 343.2 ns | 305.91 ns | 16.77 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-XYEQXL | .NET 4.7.2 | 64 | 320.8 ns | 19.93 ns | 1.09 ns | 0.94 | 0.05 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-HSXNJV | .NET Core 2.1 | 64 | 234.3 ns | 17.98 ns | 0.99 ns | 1.00 | 0.00 | 0.0052 | - | - | 24 B | +// | PixelOperations_Specialized | Job-HSXNJV | .NET Core 2.1 | 64 | 246.0 ns | 82.34 ns | 4.51 ns | 1.05 | 0.02 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-YUREJO | .NET Core 3.1 | 64 | 222.3 ns | 39.46 ns | 2.16 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-YUREJO | .NET Core 3.1 | 64 | 243.4 ns | 33.58 ns | 1.84 ns | 1.09 | 0.01 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-XYEQXL | .NET 4.7.2 | 256 | 824.9 ns | 32.77 ns | 1.80 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-XYEQXL | .NET 4.7.2 | 256 | 967.0 ns | 39.09 ns | 2.14 ns | 1.17 | 0.01 | 0.0172 | - | - | 72 B | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-HSXNJV | .NET Core 2.1 | 256 | 756.9 ns | 94.43 ns | 5.18 ns | 1.00 | 0.00 | 0.0048 | - | - | 24 B | +// | PixelOperations_Specialized | Job-HSXNJV | .NET Core 2.1 | 256 | 1,003.3 ns | 3,192.09 ns | 174.97 ns | 1.32 | 0.22 | 0.0172 | - | - | 72 B | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-YUREJO | .NET Core 3.1 | 256 | 748.6 ns | 248.03 ns | 13.60 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-YUREJO | .NET Core 3.1 | 256 | 437.0 ns | 36.48 ns | 2.00 ns | 0.58 | 0.01 | 0.0172 | - | - | 72 B | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-XYEQXL | .NET 4.7.2 | 2048 | 5,751.6 ns | 704.24 ns | 38.60 ns | 1.00 | 0.00 | - | - | - | 24 B | +// | PixelOperations_Specialized | Job-XYEQXL | .NET 4.7.2 | 2048 | 4,391.6 ns | 718.17 ns | 39.37 ns | 0.76 | 0.00 | 0.0153 | - | - | 72 B | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-HSXNJV | .NET Core 2.1 | 2048 | 6,202.0 ns | 1,815.18 ns | 99.50 ns | 1.00 | 0.00 | - | - | - | 24 B | +// | PixelOperations_Specialized | Job-HSXNJV | .NET Core 2.1 | 2048 | 4,225.6 ns | 1,004.03 ns | 55.03 ns | 0.68 | 0.01 | 0.0153 | - | - | 72 B | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-YUREJO | .NET Core 3.1 | 2048 | 6,157.1 ns | 2,516.98 ns | 137.96 ns | 1.00 | 0.00 | - | - | - | 24 B | +// | PixelOperations_Specialized | Job-YUREJO | .NET Core 3.1 | 2048 | 1,822.7 ns | 1,764.43 ns | 96.71 ns | 0.30 | 0.02 | 0.0172 | - | - | 72 B | diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgb24.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgb24.cs new file mode 100644 index 0000000000..aecd418316 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgb24.cs @@ -0,0 +1,65 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; + +using SixLabors.ImageSharp.Memory; +using SixLabors.ImageSharp.PixelFormats; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.ShortClr))] + public class ToVector4_Rgb24 : ToVector4 + { + [Benchmark(Baseline = true)] + public void PixelOperations_Base() + { + new PixelOperations().ToVector4( + this.Configuration, + this.source.GetSpan(), + this.destination.GetSpan()); + } + } +} + +// 2020-11-02 +// ########## +// +// BenchmarkDotNet = v0.12.1, OS = Windows 10.0.19041.572(2004 /?/ 20H1) +// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores +// .NET Core SDK=3.1.403 +// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT +// Job-XYEQXL : .NET Framework 4.8 (4.8.4250.0), X64 RyuJIT +// Job-HSXNJV : .NET Core 2.1.23 (CoreCLR 4.6.29321.03, CoreFX 4.6.29321.01), X64 RyuJIT +// Job-YUREJO : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT +// +// IterationCount=3 LaunchCount=1 WarmupCount=3 +// +// | Method | Job | Runtime | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | +// |---------------------------- |----------- |-------------- |------ |-----------:|------------:|----------:|------:|--------:|-------:|------:|------:|----------:| +// | PixelOperations_Base | Job-OIBEDX | .NET 4.7.2 | 64 | 298.4 ns | 33.63 ns | 1.84 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-OIBEDX | .NET 4.7.2 | 64 | 355.5 ns | 908.51 ns | 49.80 ns | 1.19 | 0.17 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-OPAORC | .NET Core 2.1 | 64 | 220.1 ns | 13.77 ns | 0.75 ns | 1.00 | 0.00 | 0.0055 | - | - | 24 B | +// | PixelOperations_Specialized | Job-OPAORC | .NET Core 2.1 | 64 | 228.5 ns | 41.41 ns | 2.27 ns | 1.04 | 0.01 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-VPSIRL | .NET Core 3.1 | 64 | 213.6 ns | 12.47 ns | 0.68 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-VPSIRL | .NET Core 3.1 | 64 | 217.0 ns | 9.95 ns | 0.55 ns | 1.02 | 0.01 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-OIBEDX | .NET 4.7.2 | 256 | 829.0 ns | 242.93 ns | 13.32 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-OIBEDX | .NET 4.7.2 | 256 | 448.9 ns | 4.04 ns | 0.22 ns | 0.54 | 0.01 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-OPAORC | .NET Core 2.1 | 256 | 863.0 ns | 1,253.26 ns | 68.70 ns | 1.00 | 0.00 | 0.0048 | - | - | 24 B | +// | PixelOperations_Specialized | Job-OPAORC | .NET Core 2.1 | 256 | 309.2 ns | 66.16 ns | 3.63 ns | 0.36 | 0.03 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-VPSIRL | .NET Core 3.1 | 256 | 737.0 ns | 253.90 ns | 13.92 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-VPSIRL | .NET Core 3.1 | 256 | 212.3 ns | 1.07 ns | 0.06 ns | 0.29 | 0.01 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-OIBEDX | .NET 4.7.2 | 2048 | 5,625.6 ns | 404.35 ns | 22.16 ns | 1.00 | 0.00 | - | - | - | 24 B | +// | PixelOperations_Specialized | Job-OIBEDX | .NET 4.7.2 | 2048 | 1,974.1 ns | 229.84 ns | 12.60 ns | 0.35 | 0.00 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-OPAORC | .NET Core 2.1 | 2048 | 5,467.2 ns | 537.29 ns | 29.45 ns | 1.00 | 0.00 | - | - | - | 24 B | +// | PixelOperations_Specialized | Job-OPAORC | .NET Core 2.1 | 2048 | 1,985.5 ns | 4,714.23 ns | 258.40 ns | 0.36 | 0.05 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-VPSIRL | .NET Core 3.1 | 2048 | 5,888.2 ns | 1,622.23 ns | 88.92 ns | 1.00 | 0.00 | - | - | - | 24 B | +// | PixelOperations_Specialized | Job-VPSIRL | .NET Core 3.1 | 2048 | 1,165.0 ns | 191.71 ns | 10.51 ns | 0.20 | 0.00 | - | - | - | - | From da4764645eb21f2a27953b1af765f09ca612b12b Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 4 Nov 2020 14:10:18 +0000 Subject: [PATCH 097/104] Unroll XYZWShuffle4Slice3 --- .../Common/Helpers/Shuffle/IShuffle4Slice3.cs | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs index 70800f9de2..083b0b2a96 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs @@ -65,9 +65,27 @@ namespace SixLabors.ImageSharp ref Byte3 dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); int n = source.Length / 4; - for (int i = 0; i < n; i++) + int m = ImageMaths.Modulo4(n); + int u = n - m; + + ref Byte3 dEnd = ref Unsafe.Add(ref dBase, u); + + while (Unsafe.IsAddressLessThan(ref dBase, ref dEnd)) + { + Unsafe.Add(ref dBase, 0) = Unsafe.As(ref Unsafe.Add(ref sBase, 0)); + Unsafe.Add(ref dBase, 1) = Unsafe.As(ref Unsafe.Add(ref sBase, 1)); + Unsafe.Add(ref dBase, 2) = Unsafe.As(ref Unsafe.Add(ref sBase, 2)); + Unsafe.Add(ref dBase, 3) = Unsafe.As(ref Unsafe.Add(ref sBase, 3)); + dBase = ref Unsafe.Add(ref dBase, 4); + sBase = ref Unsafe.Add(ref sBase, 4); + } + + if (m > 0) { - Unsafe.Add(ref dBase, i) = Unsafe.As(ref Unsafe.Add(ref sBase, i)); + for (int i = u; i < n; i++) + { + Unsafe.Add(ref dBase, i) = Unsafe.As(ref Unsafe.Add(ref sBase, i)); + } } } } From 863c672be1499dff4986e5ba2a517de4f241b566 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 4 Nov 2020 16:55:17 +0000 Subject: [PATCH 098/104] Fix shuffle +m slice fallback --- .../Common/Helpers/Shuffle/IPad3Shuffle4.cs | 30 +++++++++---------- .../Common/Helpers/Shuffle/IShuffle4Slice3.cs | 18 ++++++----- .../Color/Bulk/FromVector4.cs | 2 +- 3 files changed, 26 insertions(+), 24 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs index 1f3fce541d..fbd4a343db 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs @@ -72,29 +72,29 @@ namespace SixLabors.ImageSharp [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) { - ref byte rs = ref MemoryMarshal.GetReference(source); - ref byte rd = ref MemoryMarshal.GetReference(dest); + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); - ref byte rsEnd = ref Unsafe.Add(ref rs, source.Length); - ref byte rsLoopEnd = ref Unsafe.Subtract(ref rsEnd, 4); + ref byte sEnd = ref Unsafe.Add(ref sBase, source.Length); + ref byte sLoopEnd = ref Unsafe.Subtract(ref sEnd, 4); - while (Unsafe.IsAddressLessThan(ref rs, ref rsLoopEnd)) + while (Unsafe.IsAddressLessThan(ref sBase, ref sLoopEnd)) { - Unsafe.As(ref rd) = Unsafe.As(ref rs) | 0xFF000000; + Unsafe.As(ref dBase) = Unsafe.As(ref sBase) | 0xFF000000; - rs = ref Unsafe.Add(ref rs, 3); - rd = ref Unsafe.Add(ref rd, 4); + sBase = ref Unsafe.Add(ref sBase, 3); + dBase = ref Unsafe.Add(ref dBase, 4); } - while (Unsafe.IsAddressLessThan(ref rs, ref rsEnd)) + while (Unsafe.IsAddressLessThan(ref sBase, ref sEnd)) { - Unsafe.Add(ref rd, 0) = Unsafe.Add(ref rs, 0); - Unsafe.Add(ref rd, 1) = Unsafe.Add(ref rs, 1); - Unsafe.Add(ref rd, 2) = Unsafe.Add(ref rs, 2); - Unsafe.Add(ref rd, 3) = byte.MaxValue; + Unsafe.Add(ref dBase, 0) = Unsafe.Add(ref sBase, 0); + Unsafe.Add(ref dBase, 1) = Unsafe.Add(ref sBase, 1); + Unsafe.Add(ref dBase, 2) = Unsafe.Add(ref sBase, 2); + Unsafe.Add(ref dBase, 3) = byte.MaxValue; - rs = ref Unsafe.Add(ref rs, 3); - rd = ref Unsafe.Add(ref rd, 4); + sBase = ref Unsafe.Add(ref sBase, 3); + dBase = ref Unsafe.Add(ref dBase, 4); } } } diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs index 083b0b2a96..e11956c105 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs @@ -68,24 +68,26 @@ namespace SixLabors.ImageSharp int m = ImageMaths.Modulo4(n); int u = n - m; - ref Byte3 dEnd = ref Unsafe.Add(ref dBase, u); + ref uint sLoopEnd = ref Unsafe.Add(ref sBase, u); + ref uint sEnd = ref Unsafe.Add(ref sBase, n); - while (Unsafe.IsAddressLessThan(ref dBase, ref dEnd)) + while (Unsafe.IsAddressLessThan(ref sBase, ref sLoopEnd)) { Unsafe.Add(ref dBase, 0) = Unsafe.As(ref Unsafe.Add(ref sBase, 0)); Unsafe.Add(ref dBase, 1) = Unsafe.As(ref Unsafe.Add(ref sBase, 1)); Unsafe.Add(ref dBase, 2) = Unsafe.As(ref Unsafe.Add(ref sBase, 2)); Unsafe.Add(ref dBase, 3) = Unsafe.As(ref Unsafe.Add(ref sBase, 3)); - dBase = ref Unsafe.Add(ref dBase, 4); + sBase = ref Unsafe.Add(ref sBase, 4); + dBase = ref Unsafe.Add(ref dBase, 4); } - if (m > 0) + while (Unsafe.IsAddressLessThan(ref sBase, ref sEnd)) { - for (int i = u; i < n; i++) - { - Unsafe.Add(ref dBase, i) = Unsafe.As(ref Unsafe.Add(ref sBase, i)); - } + Unsafe.Add(ref dBase, 0) = Unsafe.As(ref Unsafe.Add(ref sBase, 0)); + + sBase = ref Unsafe.Add(ref sBase, 1); + dBase = ref Unsafe.Add(ref dBase, 1); } } } diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs index 1db9147ad2..04ca8cd652 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs @@ -91,7 +91,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk SimdUtils.BasicIntrinsics256.NormalizedFloatToByteSaturate(sBytes, dFloats); } - [Benchmark(Baseline = true)] + [Benchmark] public void ExtendedIntrinsic() { Span sBytes = MemoryMarshal.Cast(this.source.GetSpan()); From 382b5bc1f155a0e2f9dbc68b52704e22d3aa704b Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 4 Nov 2020 17:31:27 +0000 Subject: [PATCH 099/104] Inline controls as constants --- .../Helpers/Shuffle/IComponentShuffle.cs | 32 ++++++++++++------- .../Common/Helpers/Shuffle/IPad3Shuffle4.cs | 8 +++-- .../Common/Helpers/Shuffle/IShuffle4Slice3.cs | 8 +++-- 3 files changed, 30 insertions(+), 18 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs index 2056075e7c..7687a5b95f 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs @@ -82,9 +82,11 @@ namespace SixLabors.ImageSharp internal readonly struct WXYZShuffle4 : IShuffle4 { - private static readonly byte WXYZ = SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3); - - public byte Control => WXYZ; + public byte Control + { + [MethodImpl(InliningOptions.ShortMethod)] + get => SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3); + } [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) @@ -106,9 +108,11 @@ namespace SixLabors.ImageSharp internal readonly struct WZYXShuffle4 : IShuffle4 { - private static readonly byte WZYX = SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3); - - public byte Control => WZYX; + public byte Control + { + [MethodImpl(InliningOptions.ShortMethod)] + get => SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3); + } [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) @@ -130,9 +134,11 @@ namespace SixLabors.ImageSharp internal readonly struct YZWXShuffle4 : IShuffle4 { - private static readonly byte YZWX = SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1); - - public byte Control => YZWX; + public byte Control + { + [MethodImpl(InliningOptions.ShortMethod)] + get => SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1); + } [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) @@ -154,9 +160,11 @@ namespace SixLabors.ImageSharp internal readonly struct ZYXWShuffle4 : IShuffle4 { - private static readonly byte ZYXW = SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2); - - public byte Control => ZYXW; + public byte Control + { + [MethodImpl(InliningOptions.ShortMethod)] + get => SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2); + } [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs index fbd4a343db..0c2b1d5082 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs @@ -65,9 +65,11 @@ namespace SixLabors.ImageSharp internal readonly struct XYZWPad3Shuffle4 : IPad3Shuffle4 { - private static readonly byte XYZW = SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0); - - public byte Control => XYZW; + public byte Control + { + [MethodImpl(InliningOptions.ShortMethod)] + get => SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0); + } [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs index e11956c105..86e4174f11 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs @@ -54,9 +54,11 @@ namespace SixLabors.ImageSharp internal readonly struct XYZWShuffle4Slice3 : IShuffle4Slice3 { - private static readonly byte XYZW = SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0); - - public byte Control => XYZW; + public byte Control + { + [MethodImpl(InliningOptions.ShortMethod)] + get => SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0); + } [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) From 4812a8d0a5f139ec9fd7e4280f60af49776c5920 Mon Sep 17 00:00:00 2001 From: Anton Firszov Date: Fri, 6 Nov 2020 19:30:49 +0100 Subject: [PATCH 100/104] Fix #1414 --- .../Formats/Jpg/JpegDecoderTests.cs | 31 ++++++++++++++----- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.cs index 78218aec90..d233a8edaf 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.cs @@ -141,17 +141,32 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg TestEnvironment.InputImagesDirectoryFullPath, fileName); - var cts = new CancellationTokenSource(); - if (cancellationDelayMs == 0) - { - cts.Cancel(); - } - else + const int NumberOfRuns = 5; + + for (int i = 0; i < NumberOfRuns; i++) { - cts.CancelAfter(cancellationDelayMs); + var cts = new CancellationTokenSource(); + if (cancellationDelayMs == 0) + { + cts.Cancel(); + } + else + { + cts.CancelAfter(cancellationDelayMs); + } + + try + { + using var image = await Image.LoadAsync(hugeFile, cts.Token); + } + catch (TaskCanceledException) + { + // Succesfully observed a cancellation + return; + } } - await Assert.ThrowsAsync(() => Image.LoadAsync(hugeFile, cts.Token)); + throw new Exception($"No cancellation happened out of {NumberOfRuns} runs!"); } [Theory(Skip = "Identify is too fast, doesn't work reliably.")] From ded92172541f2ef6af3c44c250743218840ef70b Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 6 Nov 2020 19:49:52 +0000 Subject: [PATCH 101/104] Handle Bmp encoder padding. --- src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs | 4 ++-- tests/ImageSharp.Tests/Common/SimdUtilsTests.cs | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index 8fd6bcce68..07744566a3 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -225,9 +225,9 @@ namespace SixLabors.ImageSharp "Output span must be divisable by 3!"); DebugGuard.IsTrue( - source.Length == dest.Length * 4 / 3, + dest.Length >= source.Length * 3 / 4, nameof(source), - "Output span must be 3/4 the length of the input span!"); + "Output span must be at least 3/4 the length of the input span!"); } public static class Shuffle diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs index fe432107a2..ec09e43e57 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs @@ -164,8 +164,6 @@ namespace SixLabors.ImageSharp.Tests.Common public static readonly TheoryData ArraySizesDivisibleBy8 = new TheoryData { 0, 8, 16, 1024 }; public static readonly TheoryData ArraySizesDivisibleBy4 = new TheoryData { 0, 4, 8, 28, 1020 }; public static readonly TheoryData ArraySizesDivisibleBy3 = new TheoryData { 0, 3, 9, 36, 957 }; - - public static readonly TheoryData ArraySizesDivisibleBy32 = new TheoryData { 0, 32, 512 }; public static readonly TheoryData ArbitraryArraySizes = From 090158ea335e3b28f8a8d0e3c1b6a617d081a51c Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 6 Nov 2020 19:50:29 +0000 Subject: [PATCH 102/104] Update src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs Co-authored-by: Clinton Ingram --- src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 296970ddcc..86ba074280 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -489,12 +489,8 @@ namespace SixLabors.ImageSharp v3 = Ssse3.Shuffle(Ssse3.Shuffle(v3, vshuffle), vmasko); v0 = Ssse3.AlignRight(v1, v0, 4); - v3 = Ssse3.AlignRight(v3, v2, 12); - - v1 = Sse2.ShiftLeftLogical128BitLane(v1, 4); - v2 = Sse2.ShiftRightLogical128BitLane(v2, 4); - - v1 = Ssse3.AlignRight(v2, v1, 8); + v1 = Sse2.Or(Sse2.ShiftRightLogical128BitLane(v1, 4), Sse2.ShiftLeftLogical128BitLane(v2, 4)); + v2 = Ssse3.AlignRight(v3, v2, 12); ref Vector128 vd = ref Unsafe.Add(ref destBase, j); From 699d8ff01d36bb1e337762501e4fe4adfbe48e71 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 6 Nov 2020 20:25:28 +0000 Subject: [PATCH 103/104] Use ROS trick all round and optimize Shuffle3 --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 36 ++++++++++--------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 86ba074280..51c81be066 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -18,6 +18,10 @@ namespace SixLabors.ImageSharp public static ReadOnlySpan PermuteMaskEvenOdd8x32 => new byte[] { 0, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0 }; + private static ReadOnlySpan ShuffleMaskPad4Nx16 => new byte[] { 0, 1, 2, 0x80, 3, 4, 5, 0x80, 6, 7, 8, 0x80, 9, 10, 11, 0x80 }; + + private static ReadOnlySpan ShuffleMaskSlice4Nx16 => new byte[] { 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 0x80, 0x80, 0x80, 0x80 }; + /// /// Shuffle single-precision (32-bit) floating-point elements in /// using the control and store the results in . @@ -352,10 +356,12 @@ namespace SixLabors.ImageSharp { if (Ssse3.IsSupported) { - Vector128 vmask = Vector128.Create(0, 1, 2, 0x80, 3, 4, 5, 0x80, 6, 7, 8, 0x80, 9, 10, 11, 0x80).AsByte(); + ref byte vmaskBase = ref MemoryMarshal.GetReference(ShuffleMaskPad4Nx16); + Vector128 vmask = Unsafe.As>(ref vmaskBase); Vector128 vfill = Vector128.Create(0xff000000ff000000ul).AsByte(); - Vector128 vmasko = Vector128.Create(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15).AsByte(); - Vector128 vmaske = Ssse3.AlignRight(vmasko, vmasko, 12).AsByte(); + ref byte vmaskoBase = ref MemoryMarshal.GetReference(ShuffleMaskSlice4Nx16); + Vector128 vmasko = Unsafe.As>(ref vmaskoBase); + Vector128 vmaske = Ssse3.AlignRight(vmasko, vmasko, 12); Span bytes = stackalloc byte[Vector128.Count]; Shuffle.MmShuffleSpan(ref bytes, control); @@ -381,10 +387,10 @@ namespace SixLabors.ImageSharp v2 = Ssse3.AlignRight(v2, v1, 8); v1 = Ssse3.AlignRight(v1, v0, 12); - v0 = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v0, vmask), vfill), vshuffle); - v1 = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v1, vmask), vfill), vshuffle); - v2 = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v2, vmask), vfill), vshuffle); - v3 = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v3, vmask), vfill), vshuffle); + v0 = Ssse3.Shuffle(Ssse3.Shuffle(v0, vmask), vshuffle); + v1 = Ssse3.Shuffle(Ssse3.Shuffle(v1, vmask), vshuffle); + v2 = Ssse3.Shuffle(Ssse3.Shuffle(v2, vmask), vshuffle); + v3 = Ssse3.Shuffle(Ssse3.Shuffle(v3, vmask), vshuffle); v0 = Ssse3.Shuffle(v0, vmaske); v1 = Ssse3.Shuffle(v1, vmasko); @@ -392,12 +398,8 @@ namespace SixLabors.ImageSharp v3 = Ssse3.Shuffle(v3, vmasko); v0 = Ssse3.AlignRight(v1, v0, 4); - v3 = Ssse3.AlignRight(v3, v2, 12); - - v1 = Sse2.ShiftLeftLogical128BitLane(v1, 4); - v2 = Sse2.ShiftRightLogical128BitLane(v2, 4); - - v1 = Ssse3.AlignRight(v2, v1, 8); + v1 = Sse2.Or(Sse2.ShiftRightLogical128BitLane(v1, 4), Sse2.ShiftLeftLogical128BitLane(v2, 4)); + v2 = Ssse3.AlignRight(v3, v2, 12); ref Vector128 vd = ref Unsafe.Add(ref destBase, i); @@ -416,7 +418,8 @@ namespace SixLabors.ImageSharp { if (Ssse3.IsSupported) { - Vector128 vmask = Vector128.Create(0, 1, 2, 0x80, 3, 4, 5, 0x80, 6, 7, 8, 0x80, 9, 10, 11, 0x80).AsByte(); + ref byte vmaskBase = ref MemoryMarshal.GetReference(ShuffleMaskPad4Nx16); + Vector128 vmask = Unsafe.As>(ref vmaskBase); Vector128 vfill = Vector128.Create(0xff000000ff000000ul).AsByte(); Span bytes = stackalloc byte[Vector128.Count]; @@ -459,8 +462,9 @@ namespace SixLabors.ImageSharp { if (Ssse3.IsSupported) { - Vector128 vmasko = Vector128.Create(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15).AsByte(); - Vector128 vmaske = Ssse3.AlignRight(vmasko, vmasko, 12).AsByte(); + ref byte vmaskoBase = ref MemoryMarshal.GetReference(ShuffleMaskSlice4Nx16); + Vector128 vmasko = Unsafe.As>(ref vmaskoBase); + Vector128 vmaske = Ssse3.AlignRight(vmasko, vmasko, 12); Span bytes = stackalloc byte[Vector128.Count]; Shuffle.MmShuffleSpan(ref bytes, control); From 3ee5a388022178397161ff50ffa97819c2d4af34 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 6 Nov 2020 20:45:39 +0000 Subject: [PATCH 104/104] Fix shuffle --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 51c81be066..2ea7f2c9bd 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -358,7 +358,6 @@ namespace SixLabors.ImageSharp { ref byte vmaskBase = ref MemoryMarshal.GetReference(ShuffleMaskPad4Nx16); Vector128 vmask = Unsafe.As>(ref vmaskBase); - Vector128 vfill = Vector128.Create(0xff000000ff000000ul).AsByte(); ref byte vmaskoBase = ref MemoryMarshal.GetReference(ShuffleMaskSlice4Nx16); Vector128 vmasko = Unsafe.As>(ref vmaskoBase); Vector128 vmaske = Ssse3.AlignRight(vmasko, vmasko, 12); @@ -398,8 +397,12 @@ namespace SixLabors.ImageSharp v3 = Ssse3.Shuffle(v3, vmasko); v0 = Ssse3.AlignRight(v1, v0, 4); - v1 = Sse2.Or(Sse2.ShiftRightLogical128BitLane(v1, 4), Sse2.ShiftLeftLogical128BitLane(v2, 4)); - v2 = Ssse3.AlignRight(v3, v2, 12); + v3 = Ssse3.AlignRight(v3, v2, 12); + + v1 = Sse2.ShiftLeftLogical128BitLane(v1, 4); + v2 = Sse2.ShiftRightLogical128BitLane(v2, 4); + + v1 = Ssse3.AlignRight(v2, v1, 8); ref Vector128 vd = ref Unsafe.Add(ref destBase, i); @@ -493,8 +496,12 @@ namespace SixLabors.ImageSharp v3 = Ssse3.Shuffle(Ssse3.Shuffle(v3, vshuffle), vmasko); v0 = Ssse3.AlignRight(v1, v0, 4); - v1 = Sse2.Or(Sse2.ShiftRightLogical128BitLane(v1, 4), Sse2.ShiftLeftLogical128BitLane(v2, 4)); - v2 = Ssse3.AlignRight(v3, v2, 12); + v3 = Ssse3.AlignRight(v3, v2, 12); + + v1 = Sse2.ShiftLeftLogical128BitLane(v1, 4); + v2 = Sse2.ShiftRightLogical128BitLane(v2, 4); + + v1 = Ssse3.AlignRight(v2, v1, 8); ref Vector128 vd = ref Unsafe.Add(ref destBase, j);