From ca85f1003f5539c7e1e70848d81e48772154d1dd Mon Sep 17 00:00:00 2001 From: David Rolland Date: Sat, 26 Sep 2020 20:29:10 +0900 Subject: [PATCH 001/131] Fixed typo in AdaptiveThreshold processor and added specific test --- .../Binarization/AdaptiveThresholdProcessor{TPixel}.cs | 3 ++- .../Processing/Binarization/AdaptiveThresholdTests.cs | 1 + tests/Images/External | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Binarization/AdaptiveThresholdProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Binarization/AdaptiveThresholdProcessor{TPixel}.cs index 43023c938..6d95d51b3 100644 --- a/src/ImageSharp/Processing/Processors/Binarization/AdaptiveThresholdProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Binarization/AdaptiveThresholdProcessor{TPixel}.cs @@ -67,7 +67,8 @@ namespace SixLabors.ImageSharp.Processing.Processors.Binarization ref TPixel color = ref Unsafe.Add(ref rowRef, x); color.ToRgba32(ref rgb); - sum += (ulong)(rgb.R + rgb.G + rgb.G); + sum += (ulong)(rgb.R + rgb.G + rgb.B); + if (x - startX != 0) { intImage[x - startX, y - startY] = intImage[x - startX - 1, y - startY] + sum; diff --git a/tests/ImageSharp.Tests/Processing/Binarization/AdaptiveThresholdTests.cs b/tests/ImageSharp.Tests/Processing/Binarization/AdaptiveThresholdTests.cs index 8efac7593..f4f800107 100644 --- a/tests/ImageSharp.Tests/Processing/Binarization/AdaptiveThresholdTests.cs +++ b/tests/ImageSharp.Tests/Processing/Binarization/AdaptiveThresholdTests.cs @@ -100,6 +100,7 @@ namespace SixLabors.ImageSharp.Tests.Processing.Binarization [Theory] [WithFile(TestImages.Png.Bradley01, PixelTypes.Rgba32)] [WithFile(TestImages.Png.Bradley02, PixelTypes.Rgba32)] + [WithFile(TestImages.Png.Ducky, PixelTypes.Rgba32)] public void AdaptiveThreshold_Works(TestImageProvider provider) where TPixel : unmanaged, IPixel { diff --git a/tests/Images/External b/tests/Images/External index 6a0030806..28839f6ea 160000 --- a/tests/Images/External +++ b/tests/Images/External @@ -1 +1 @@ -Subproject commit 6a003080674d1fedc66292c13ce5a357b2a33083 +Subproject commit 28839f6eaa212a575005df630b6030bb4314a75b From 8ab45ccf242fa9b74f39a554feef6fd8c5df9fb9 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sat, 26 Sep 2020 16:49:00 +0200 Subject: [PATCH 002/131] Add reference output for #1362 --- tests/Images/External | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/Images/External b/tests/Images/External index 28839f6ea..cc6465910 160000 --- a/tests/Images/External +++ b/tests/Images/External @@ -1 +1 @@ -Subproject commit 28839f6eaa212a575005df630b6030bb4314a75b +Subproject commit cc6465910d092319ef9bf4e99698a0649996d3c5 From 3091072e382e6a6205ecf999f4518ef6ba4dff2c Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 7 Oct 2020 15:42:51 +0100 Subject: [PATCH 003/131] Add AVX backed Block8x8F Transpose method --- .../Jpeg/Components/Block8x8F.Generated.cs | 6 +- .../Jpeg/Components/Block8x8F.Generated.tt | 6 +- .../Formats/Jpeg/Components/Block8x8F.cs | 83 +++++++++++++++++++ .../Jpeg/Components/FastFloatingPointDCT.cs | 6 +- .../BlockOperations/Block8x8F_Transpose.cs | 45 ++++++++++ .../Formats/Jpg/Block8x8FTests.cs | 22 ++++- 6 files changed, 157 insertions(+), 11 deletions(-) create mode 100644 tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs index f6f590368..10cbee5e6 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs @@ -10,12 +10,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components { internal partial struct Block8x8F { - /// - /// Transpose the block into the destination block. + /// + /// Fallback method to transpose a block into the destination block on non AVX supported CPUs. /// /// The destination block [MethodImpl(InliningOptions.ShortMethod)] - public void TransposeInto(ref Block8x8F d) + public void TransposeIntoFallback(ref Block8x8F d) { d.V0L.X = V0L.X; d.V1L.X = V0L.Y; diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt index 6ee054021..f47d9106e 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt @@ -23,12 +23,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components { internal partial struct Block8x8F { - /// - /// Transpose the block into the destination block. + /// + /// Fallback method to transpose a block into the destination block on non AVX supported CPUs. /// /// The destination block [MethodImpl(InliningOptions.ShortMethod)] - public void TransposeInto(ref Block8x8F d) + public void TransposeIntoFallback(ref Block8x8F d) { <# PushIndent(" "); diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index b7835d670..d698e90b3 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -6,6 +6,10 @@ using System.Diagnostics; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif using System.Text; // ReSharper disable InconsistentNaming @@ -596,5 +600,84 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components DebugGuard.MustBeLessThan(idx, Size, nameof(idx)); DebugGuard.MustBeGreaterThanOrEqualTo(idx, 0, nameof(idx)); } + + /// + /// Transpose the block into the destination block. + /// + /// The destination block + [MethodImpl(InliningOptions.ShortMethod)] + public void TransposeInto(ref Block8x8F d) + { +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx.IsSupported) + { + this.TransposeIntoAvx(ref d); + } + else +#endif + { + this.TransposeIntoFallback(ref d); + } + } + +#if SUPPORTS_RUNTIME_INTRINSICS + /// + /// AVX-only variant for executing . + /// + /// + [MethodImpl(InliningOptions.ShortMethod)] + public void TransposeIntoAvx(ref Block8x8F d) + { + ref Vector256 r0 = ref Unsafe.As>(ref this.V0L); + ref Vector256 r1 = ref Unsafe.As>(ref this.V1L); + ref Vector256 r2 = ref Unsafe.As>(ref this.V2L); + ref Vector256 r3 = ref Unsafe.As>(ref this.V3L); + ref Vector256 r4 = ref Unsafe.As>(ref this.V4L); + ref Vector256 r5 = ref Unsafe.As>(ref this.V5L); + ref Vector256 r6 = ref Unsafe.As>(ref this.V6L); + ref Vector256 r7 = ref Unsafe.As>(ref this.V7L); + + Vector256 t0 = Avx.UnpackLow(r0, r1); + Vector256 t1 = Avx.UnpackHigh(r0, r1); + Vector256 t2 = Avx.UnpackLow(r2, r3); + Vector256 t3 = Avx.UnpackHigh(r2, r3); + Vector256 t4 = Avx.UnpackLow(r4, r5); + Vector256 t5 = Avx.UnpackHigh(r4, r5); + Vector256 t6 = Avx.UnpackLow(r6, r7); + Vector256 t7 = Avx.UnpackHigh(r6, r7); + + // Controls generated via + // _MM_SHUFFLE(fp3, fp2, fp1, fp0)(((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0))) + const byte Control1_0_1_0 = 0b1_00_01_00; // 1, 0, 1, 0 + const byte Control3_2_3_2 = 0b11_10_11_10; // 3, 2, 3, 2 + + r0 = Avx.Shuffle(t0, t2, Control1_0_1_0); + r1 = Avx.Shuffle(t0, t2, Control3_2_3_2); + r2 = Avx.Shuffle(t1, t3, Control1_0_1_0); + r3 = Avx.Shuffle(t1, t3, Control3_2_3_2); + r4 = Avx.Shuffle(t4, t6, Control1_0_1_0); + r5 = Avx.Shuffle(t4, t6, Control3_2_3_2); + r6 = Avx.Shuffle(t5, t7, Control1_0_1_0); + r7 = Avx.Shuffle(t5, t7, Control3_2_3_2); + + t0 = Avx.Permute2x128(r0, r4, 0x20); + t1 = Avx.Permute2x128(r1, r5, 0x20); + t2 = Avx.Permute2x128(r2, r6, 0x20); + t3 = Avx.Permute2x128(r3, r7, 0x20); + t4 = Avx.Permute2x128(r0, r4, 0x31); + t5 = Avx.Permute2x128(r1, r5, 0x31); + t6 = Avx.Permute2x128(r2, r6, 0x31); + t7 = Avx.Permute2x128(r3, r7, 0x31); + + Unsafe.As>(ref d.V0L) = t0; + Unsafe.As>(ref d.V1L) = t1; + Unsafe.As>(ref d.V2L) = t2; + Unsafe.As>(ref d.V3L) = t3; + Unsafe.As>(ref d.V4L) = t4; + Unsafe.As>(ref d.V5L) = t5; + Unsafe.As>(ref d.V6L) = t6; + Unsafe.As>(ref d.V7L) = t7; + } +#endif } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs index ee06f2bde..d0b373609 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs @@ -1,4 +1,4 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. using System.Numerics; @@ -50,8 +50,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components /// Temporary block provided by the caller public static void TransformIDCT(ref Block8x8F src, ref Block8x8F dest, ref Block8x8F temp) { - // TODO: Transpose is a bottleneck now. We need full AVX support to optimize it: - // https://github.com/dotnet/corefx/issues/22940 src.TransposeInto(ref temp); IDCT8x4_LeftPart(ref temp, ref dest); @@ -340,4 +338,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components dest.MultiplyInplace(C_0_125); } } -} \ No newline at end of file +} diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs new file mode 100644 index 000000000..d21e84835 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs @@ -0,0 +1,45 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Formats.Jpeg.Components; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations +{ + public class Block8x8F_Transpose + { + private static readonly Block8x8F Source = Create8x8FloatData(); + + [Benchmark] + public void TransposeIntoVector4() + { + var dest = default(Block8x8F); + Source.TransposeIntoFallback(ref dest); + } + +#if SUPPORTS_RUNTIME_INTRINSICS + [Benchmark] + public void TransposeIntoAvx() + { + var dest = default(Block8x8F); + Source.TransposeIntoAvx(ref dest); + } +#endif + + private static Block8x8F Create8x8FloatData() + { + var result = new float[64]; + for (int i = 0; i < 8; i++) + { + for (int j = 0; j < 8; j++) + { + result[(i * 8) + j] = (i * 10) + j; + } + } + + var source = default(Block8x8F); + source.LoadFrom(result); + return source; + } + } +} diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs index 722521f98..050b1889a 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs @@ -172,7 +172,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg source.LoadFrom(Create8x8FloatData()); var dest = default(Block8x8F); - source.TransposeInto(ref dest); + source.TransposeIntoFallback(ref dest); float[] actual = new float[64]; dest.ScaledCopyTo(actual); @@ -180,6 +180,26 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg Assert.Equal(expected, actual); } +#if SUPPORTS_RUNTIME_INTRINSICS + [Fact] + public void TransposeIntoAvx() + { + float[] expected = Create8x8FloatData(); + ReferenceImplementations.Transpose8x8(expected); + + var source = default(Block8x8F); + source.LoadFrom(Create8x8FloatData()); + + var dest = default(Block8x8F); + source.TransposeIntoAvx(ref dest); + + float[] actual = new float[64]; + dest.ScaledCopyTo(actual); + + Assert.Equal(expected, actual); + } +#endif + private class BufferHolder { public Block8x8F Buffer; From 398e901840c8809942f41ca5feba7dd88e67551d Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 7 Oct 2020 17:42:50 +0100 Subject: [PATCH 004/131] Add variant 2 --- .../Formats/Jpeg/Components/Block8x8F.cs | 118 ++++++++++++++---- 1 file changed, 97 insertions(+), 21 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index d698e90b3..683308e35 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -628,14 +628,15 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components [MethodImpl(InliningOptions.ShortMethod)] public void TransposeIntoAvx(ref Block8x8F d) { - ref Vector256 r0 = ref Unsafe.As>(ref this.V0L); - ref Vector256 r1 = ref Unsafe.As>(ref this.V1L); - ref Vector256 r2 = ref Unsafe.As>(ref this.V2L); - ref Vector256 r3 = ref Unsafe.As>(ref this.V3L); - ref Vector256 r4 = ref Unsafe.As>(ref this.V4L); - ref Vector256 r5 = ref Unsafe.As>(ref this.V5L); - ref Vector256 r6 = ref Unsafe.As>(ref this.V6L); - ref Vector256 r7 = ref Unsafe.As>(ref this.V7L); +#if avxvariant1 + Vector256 r0 = Unsafe.As>(ref this.V0L); + Vector256 r1 = Unsafe.As>(ref this.V1L); + Vector256 r2 = Unsafe.As>(ref this.V2L); + Vector256 r3 = Unsafe.As>(ref this.V3L); + Vector256 r4 = Unsafe.As>(ref this.V4L); + Vector256 r5 = Unsafe.As>(ref this.V5L); + Vector256 r6 = Unsafe.As>(ref this.V6L); + Vector256 r7 = Unsafe.As>(ref this.V7L); Vector256 t0 = Avx.UnpackLow(r0, r1); Vector256 t1 = Avx.UnpackHigh(r0, r1); @@ -646,11 +647,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components Vector256 t6 = Avx.UnpackLow(r6, r7); Vector256 t7 = Avx.UnpackHigh(r6, r7); - // Controls generated via - // _MM_SHUFFLE(fp3, fp2, fp1, fp0)(((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0))) - const byte Control1_0_1_0 = 0b1_00_01_00; // 1, 0, 1, 0 - const byte Control3_2_3_2 = 0b11_10_11_10; // 3, 2, 3, 2 - + // Controls generated via _MM_SHUFFLE + const byte Control1_0_1_0 = 0b1000100; + const byte Control3_2_3_2 = 0b11101110; r0 = Avx.Shuffle(t0, t2, Control1_0_1_0); r1 = Avx.Shuffle(t0, t2, Control3_2_3_2); r2 = Avx.Shuffle(t1, t3, Control1_0_1_0); @@ -660,14 +659,16 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components r6 = Avx.Shuffle(t5, t7, Control1_0_1_0); r7 = Avx.Shuffle(t5, t7, Control3_2_3_2); - t0 = Avx.Permute2x128(r0, r4, 0x20); - t1 = Avx.Permute2x128(r1, r5, 0x20); - t2 = Avx.Permute2x128(r2, r6, 0x20); - t3 = Avx.Permute2x128(r3, r7, 0x20); - t4 = Avx.Permute2x128(r0, r4, 0x31); - t5 = Avx.Permute2x128(r1, r5, 0x31); - t6 = Avx.Permute2x128(r2, r6, 0x31); - t7 = Avx.Permute2x128(r3, r7, 0x31); + const byte Control0x20 = 0b100000; + const byte Control0x31 = 0b110001; + t0 = Avx.Permute2x128(r0, r4, Control0x20); + t1 = Avx.Permute2x128(r1, r5, Control0x20); + t2 = Avx.Permute2x128(r2, r6, Control0x20); + t3 = Avx.Permute2x128(r3, r7, Control0x20); + t4 = Avx.Permute2x128(r0, r4, Control0x31); + t5 = Avx.Permute2x128(r1, r5, Control0x31); + t6 = Avx.Permute2x128(r2, r6, Control0x31); + t7 = Avx.Permute2x128(r3, r7, Control0x31); Unsafe.As>(ref d.V0L) = t0; Unsafe.As>(ref d.V1L) = t1; @@ -677,6 +678,81 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components Unsafe.As>(ref d.V5L) = t5; Unsafe.As>(ref d.V6L) = t6; Unsafe.As>(ref d.V7L) = t7; +#else + Vector256 r0 = Avx.InsertVector128( + Unsafe.As>(ref this.V0L).ToVector256(), + Unsafe.As>(ref this.V4L), + 1); + + Vector256 r1 = Avx.InsertVector128( + Unsafe.As>(ref this.V1L).ToVector256(), + Unsafe.As>(ref this.V5L), + 1); + + Vector256 r2 = Avx.InsertVector128( + Unsafe.As>(ref this.V2L).ToVector256(), + Unsafe.As>(ref this.V6L), + 1); + + Vector256 r3 = Avx.InsertVector128( + Unsafe.As>(ref this.V3L).ToVector256(), + Unsafe.As>(ref this.V7L), + 1); + + Vector256 r4 = Avx.InsertVector128( + Unsafe.As>(ref this.V0R).ToVector256(), + Unsafe.As>(ref this.V4R), + 1); + + Vector256 r5 = Avx.InsertVector128( + Unsafe.As>(ref this.V1R).ToVector256(), + Unsafe.As>(ref this.V5R), + 1); + + Vector256 r6 = Avx.InsertVector128( + Unsafe.As>(ref this.V2R).ToVector256(), + Unsafe.As>(ref this.V6R), + 1); + + Vector256 r7 = Avx.InsertVector128( + Unsafe.As>(ref this.V3R).ToVector256(), + Unsafe.As>(ref this.V7R), + 1); + + Vector256 t0 = Avx.UnpackLow(r0, r1); + Vector256 t1 = Avx.UnpackHigh(r0, r1); + Vector256 t2 = Avx.UnpackLow(r2, r3); + Vector256 t3 = Avx.UnpackHigh(r2, r3); + Vector256 t4 = Avx.UnpackLow(r4, r5); + Vector256 t5 = Avx.UnpackHigh(r4, r5); + Vector256 t6 = Avx.UnpackLow(r6, r7); + Vector256 t7 = Avx.UnpackHigh(r6, r7); + + Vector256 v = Avx.Shuffle(t0, t2, 0x4E); + r0 = Avx.Blend(t0, v, 0xCC); + r1 = Avx.Blend(t2, v, 0x33); + + v = Avx.Shuffle(t1, t3, 0x4E); + r2 = Avx.Blend(t1, v, 0xCC); + r3 = Avx.Blend(t3, v, 0x33); + + v = Avx.Shuffle(t4, t6, 0x4E); + r4 = Avx.Blend(t4, v, 0xCC); + r5 = Avx.Blend(t6, v, 0x33); + + v = Avx.Shuffle(t5, t7, 0x4E); + r6 = Avx.Blend(t5, v, 0xCC); + r7 = Avx.Blend(t7, v, 0x33); + + Unsafe.As>(ref d.V0L) = r0; + Unsafe.As>(ref d.V1L) = r1; + Unsafe.As>(ref d.V2L) = r2; + Unsafe.As>(ref d.V3L) = r3; + Unsafe.As>(ref d.V4L) = r4; + Unsafe.As>(ref d.V5L) = r5; + Unsafe.As>(ref d.V6L) = r6; + Unsafe.As>(ref d.V7L) = r7; +#endif } #endif } From 24641e74cd52b110fe8047950ec3e938150eb2af Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 7 Oct 2020 20:38:32 +0100 Subject: [PATCH 005/131] Update tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs Co-authored-by: Anton Firszov --- .../Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs index d21e84835..ae1b23df9 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs @@ -10,7 +10,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations { private static readonly Block8x8F Source = Create8x8FloatData(); - [Benchmark] + [Benchmark(Baseline=true)] public void TransposeIntoVector4() { var dest = default(Block8x8F); From ce74b9e820f931536397611731de9f47b0651c5e Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 7 Oct 2020 22:23:37 +0100 Subject: [PATCH 006/131] Use interleaving to prevent stack spills --- .../Formats/Jpeg/Components/Block8x8F.cs | 92 +++---------------- 1 file changed, 15 insertions(+), 77 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index 683308e35..547e11623 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -628,57 +628,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components [MethodImpl(InliningOptions.ShortMethod)] public void TransposeIntoAvx(ref Block8x8F d) { -#if avxvariant1 - Vector256 r0 = Unsafe.As>(ref this.V0L); - Vector256 r1 = Unsafe.As>(ref this.V1L); - Vector256 r2 = Unsafe.As>(ref this.V2L); - Vector256 r3 = Unsafe.As>(ref this.V3L); - Vector256 r4 = Unsafe.As>(ref this.V4L); - Vector256 r5 = Unsafe.As>(ref this.V5L); - Vector256 r6 = Unsafe.As>(ref this.V6L); - Vector256 r7 = Unsafe.As>(ref this.V7L); - - Vector256 t0 = Avx.UnpackLow(r0, r1); - Vector256 t1 = Avx.UnpackHigh(r0, r1); - Vector256 t2 = Avx.UnpackLow(r2, r3); - Vector256 t3 = Avx.UnpackHigh(r2, r3); - Vector256 t4 = Avx.UnpackLow(r4, r5); - Vector256 t5 = Avx.UnpackHigh(r4, r5); - Vector256 t6 = Avx.UnpackLow(r6, r7); - Vector256 t7 = Avx.UnpackHigh(r6, r7); - - // Controls generated via _MM_SHUFFLE - const byte Control1_0_1_0 = 0b1000100; - const byte Control3_2_3_2 = 0b11101110; - r0 = Avx.Shuffle(t0, t2, Control1_0_1_0); - r1 = Avx.Shuffle(t0, t2, Control3_2_3_2); - r2 = Avx.Shuffle(t1, t3, Control1_0_1_0); - r3 = Avx.Shuffle(t1, t3, Control3_2_3_2); - r4 = Avx.Shuffle(t4, t6, Control1_0_1_0); - r5 = Avx.Shuffle(t4, t6, Control3_2_3_2); - r6 = Avx.Shuffle(t5, t7, Control1_0_1_0); - r7 = Avx.Shuffle(t5, t7, Control3_2_3_2); - - const byte Control0x20 = 0b100000; - const byte Control0x31 = 0b110001; - t0 = Avx.Permute2x128(r0, r4, Control0x20); - t1 = Avx.Permute2x128(r1, r5, Control0x20); - t2 = Avx.Permute2x128(r2, r6, Control0x20); - t3 = Avx.Permute2x128(r3, r7, Control0x20); - t4 = Avx.Permute2x128(r0, r4, Control0x31); - t5 = Avx.Permute2x128(r1, r5, Control0x31); - t6 = Avx.Permute2x128(r2, r6, Control0x31); - t7 = Avx.Permute2x128(r3, r7, Control0x31); - - Unsafe.As>(ref d.V0L) = t0; - Unsafe.As>(ref d.V1L) = t1; - Unsafe.As>(ref d.V2L) = t2; - Unsafe.As>(ref d.V3L) = t3; - Unsafe.As>(ref d.V4L) = t4; - Unsafe.As>(ref d.V5L) = t5; - Unsafe.As>(ref d.V6L) = t6; - Unsafe.As>(ref d.V7L) = t7; -#else Vector256 r0 = Avx.InsertVector128( Unsafe.As>(ref this.V0L).ToVector256(), Unsafe.As>(ref this.V4L), @@ -720,39 +669,28 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components 1); Vector256 t0 = Avx.UnpackLow(r0, r1); - Vector256 t1 = Avx.UnpackHigh(r0, r1); Vector256 t2 = Avx.UnpackLow(r2, r3); - Vector256 t3 = Avx.UnpackHigh(r2, r3); + Vector256 v = Avx.Shuffle(t0, t2, 0x4E); + Unsafe.As>(ref d.V0L) = Avx.Blend(t0, v, 0xCC); + Unsafe.As>(ref d.V1L) = Avx.Blend(t2, v, 0x33); + Vector256 t4 = Avx.UnpackLow(r4, r5); - Vector256 t5 = Avx.UnpackHigh(r4, r5); Vector256 t6 = Avx.UnpackLow(r6, r7); - Vector256 t7 = Avx.UnpackHigh(r6, r7); - - Vector256 v = Avx.Shuffle(t0, t2, 0x4E); - r0 = Avx.Blend(t0, v, 0xCC); - r1 = Avx.Blend(t2, v, 0x33); + v = Avx.Shuffle(t4, t6, 0x4E); + Unsafe.As>(ref d.V4L) = Avx.Blend(t4, v, 0xCC); + Unsafe.As>(ref d.V5L) = Avx.Blend(t6, v, 0x33); + Vector256 t1 = Avx.UnpackHigh(r0, r1); + Vector256 t3 = Avx.UnpackHigh(r2, r3); v = Avx.Shuffle(t1, t3, 0x4E); - r2 = Avx.Blend(t1, v, 0xCC); - r3 = Avx.Blend(t3, v, 0x33); - - v = Avx.Shuffle(t4, t6, 0x4E); - r4 = Avx.Blend(t4, v, 0xCC); - r5 = Avx.Blend(t6, v, 0x33); + Unsafe.As>(ref d.V2L) = Avx.Blend(t1, v, 0xCC); + Unsafe.As>(ref d.V3L) = Avx.Blend(t3, v, 0x33); + Vector256 t5 = Avx.UnpackHigh(r4, r5); + Vector256 t7 = Avx.UnpackHigh(r6, r7); v = Avx.Shuffle(t5, t7, 0x4E); - r6 = Avx.Blend(t5, v, 0xCC); - r7 = Avx.Blend(t7, v, 0x33); - - Unsafe.As>(ref d.V0L) = r0; - Unsafe.As>(ref d.V1L) = r1; - Unsafe.As>(ref d.V2L) = r2; - Unsafe.As>(ref d.V3L) = r3; - Unsafe.As>(ref d.V4L) = r4; - Unsafe.As>(ref d.V5L) = r5; - Unsafe.As>(ref d.V6L) = r6; - Unsafe.As>(ref d.V7L) = r7; -#endif + Unsafe.As>(ref d.V6L) = Avx.Blend(t5, v, 0xCC); + Unsafe.As>(ref d.V7L) = Avx.Blend(t7, v, 0x33); } #endif } From a62347e761a0ef0393934c7f2483608266253239 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 7 Oct 2020 23:10:53 +0100 Subject: [PATCH 007/131] Update Block8x8FTests.cs --- tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs index 050b1889a..73a68063c 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs @@ -163,7 +163,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg } [Fact] - public void TransposeInto() + public void TransposeIntoFallback() { float[] expected = Create8x8FloatData(); ReferenceImplementations.Transpose8x8(expected); From 0c34ce36c6708f27abbdc0c0d2667870269fbfb3 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 9 Oct 2020 17:02:13 +0100 Subject: [PATCH 008/131] First pass at HW feature tests Designed to fail to ensure RemoteExecutor is running. --- .../Formats/Jpg/Block8x8FTests.cs | 30 ++- .../FeatureTesting/FeatureTestRunner.cs | 232 ++++++++++++++++++ 2 files changed, 261 insertions(+), 1 deletion(-) create mode 100644 tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs index 73a68063c..a09472b46 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs @@ -8,7 +8,7 @@ using System.Diagnostics; using SixLabors.ImageSharp.Formats.Jpeg.Components; using SixLabors.ImageSharp.Tests.Formats.Jpg.Utils; - +using SixLabors.ImageSharp.Tests.TestUtilities; using Xunit; using Xunit.Abstractions; @@ -200,6 +200,34 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg } #endif + [Fact] + public void TransposeInto() + { + static void RunTest() + { + // Just testing this fails in CI. RemoteExecutor is not working on my machine. + Assert.True(false); + + float[] expected = Create8x8FloatData(); + ReferenceImplementations.Transpose8x8(expected); + + var source = default(Block8x8F); + source.LoadFrom(Create8x8FloatData()); + + var dest = default(Block8x8F); + source.TransposeInto(ref dest); + + float[] actual = new float[64]; + dest.ScaledCopyTo(actual); + + Assert.Equal(expected, actual); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX); + } + private class BufferHolder { public Block8x8F Buffer; diff --git a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs new file mode 100644 index 000000000..8b5ed8d48 --- /dev/null +++ b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs @@ -0,0 +1,232 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Numerics; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics.X86; +#endif +using Microsoft.DotNet.RemoteExecutor; +using Xunit; +using Xunit.Abstractions; + +namespace SixLabors.ImageSharp.Tests.TestUtilities +{ + /// + /// Allows the testing against specific feature sets. + /// + public static class FeatureTestRunner + { + private static readonly char[] SplitChars = new[] { ',', ' ' }; + + /// + /// Allows the deserialization of parameters passed to the feature test. + /// + /// + /// This is required because does not allow + /// marshalling of fields so we cannot pass a wrapped + /// allowing automatic deserialization. + /// + /// + /// + /// The type to deserialize to. + /// The string value to deserialize. + /// The value. + public static T Deserialize(string value) + where T : IXunitSerializable + => BasicSerializer.Deserialize(value); + + // TODO: Write runner test and use this. + private static void AssertHwIntrinsicsFeatureDisabled(HwIntrinsics intrinsics) + { + switch (intrinsics) + { + case HwIntrinsics.DisableSIMD: + Assert.False(Vector.IsHardwareAccelerated); + break; +#if SUPPORTS_RUNTIME_INTRINSICS + case HwIntrinsics.DisableHWIntrinsic: + Assert.False(Vector.IsHardwareAccelerated); + break; + case HwIntrinsics.DisableSSE: + Assert.False(Sse.IsSupported); + break; + case HwIntrinsics.DisableSSE2: + Assert.False(Sse2.IsSupported); + break; + case HwIntrinsics.DisableAES: + Assert.False(Aes.IsSupported); + break; + case HwIntrinsics.DisablePCLMULQDQ: + Assert.False(Pclmulqdq.IsSupported); + break; + case HwIntrinsics.DisableSSE3: + Assert.False(Sse3.IsSupported); + break; + case HwIntrinsics.DisableSSSE3: + Assert.False(Ssse3.IsSupported); + break; + case HwIntrinsics.DisableSSE41: + Assert.False(Sse41.IsSupported); + break; + case HwIntrinsics.DisableSSE42: + Assert.False(Sse42.IsSupported); + break; + case HwIntrinsics.DisablePOPCNT: + Assert.False(Popcnt.IsSupported); + break; + case HwIntrinsics.DisableAVX: + Assert.False(Avx.IsSupported); + break; + case HwIntrinsics.DisableFMA: + Assert.False(Fma.IsSupported); + break; + case HwIntrinsics.DisableAVX2: + Assert.False(Avx2.IsSupported); + break; + case HwIntrinsics.DisableBMI1: + Assert.False(Bmi1.IsSupported); + break; + case HwIntrinsics.DisableBMI2: + Assert.False(Bmi2.IsSupported); + break; + case HwIntrinsics.DisableLZCNT: + Assert.False(Lzcnt.IsSupported); + break; +#endif + } + } + + /// + /// Runs the given test within an environment + /// where the given features. + /// + /// The test action to run. + /// The intrinsics features. + public static void RunWithHwIntrinsicsFeature( + Action action, + HwIntrinsics intrinsics) + { + if (!RemoteExecutor.IsSupported) + { + return; + } + + foreach (string intrinsic in intrinsics.ToFeatureCollection()) + { + var processStartInfo = new ProcessStartInfo(); + if (intrinsic != nameof(HwIntrinsics.AllowAll)) + { + processStartInfo.Environment[$"COMPlus_{intrinsic}"] = "0"; + } + + RemoteExecutor.Invoke( + action, + new RemoteInvokeOptions + { + StartInfo = processStartInfo + }) + .Dispose(); + } + } + + /// + /// Runs the given test within an environment + /// where the given features. + /// + /// The test action to run. + /// The intrinsics features. + /// The value to pass as a parameter to the test action. + public static void RunWithHwIntrinsicsFeature( + Action action, + HwIntrinsics intrinsics, + T serializable) + where T : IXunitSerializable + { + if (!RemoteExecutor.IsSupported) + { + return; + } + + foreach (string intrinsic in intrinsics.ToFeatureCollection()) + { + var processStartInfo = new ProcessStartInfo(); + if (intrinsic != nameof(HwIntrinsics.AllowAll)) + { + processStartInfo.Environment[$"COMPlus_Enable{intrinsic}"] = "0"; + } + + RemoteExecutor.Invoke( + action, + BasicSerializer.Serialize(serializable), + new RemoteInvokeOptions + { + StartInfo = processStartInfo + }) + .Dispose(); + } + } + + private static IEnumerable ToFeatureCollection(this HwIntrinsics intrinsics) + { + // Loop through and translate the given values into COMPlus equivaluents + var features = new List(); + var split = intrinsics.ToString("G").Split(SplitChars, StringSplitOptions.RemoveEmptyEntries).ToArray(); + foreach (string intrinsic in split) + { + switch (intrinsic) + { + case nameof(HwIntrinsics.DisableSIMD): + features.Add("FeatureSIMD"); + break; + + case nameof(HwIntrinsics.AllowAll): + + // Not a COMPlus value. We filter in calling method. + features.Add(nameof(HwIntrinsics.AllowAll)); + break; + + default: + features.Add(intrinsic.Replace("Disable", "Enable")); + break; + } + } + + return features; + } + } + + /// + /// See + /// + /// ends up impacting all SIMD support(including System.Numerics) + /// but not things like , , and . + /// + /// + [Flags] + public enum HwIntrinsics + { + // Use flags so we can pass multiple values without using params. + DisableSIMD = 0, + DisableHWIntrinsic = 1 << 0, + DisableSSE = 1 << 1, + DisableSSE2 = 1 << 2, + DisableAES = 1 << 3, + DisablePCLMULQDQ = 1 << 4, + DisableSSE3 = 1 << 5, + DisableSSSE3 = 1 << 6, + DisableSSE41 = 1 << 7, + DisableSSE42 = 1 << 8, + DisablePOPCNT = 1 << 9, + DisableAVX = 1 << 10, + DisableFMA = 1 << 11, + DisableAVX2 = 1 << 12, + DisableBMI1 = 1 << 13, + DisableBMI2 = 1 << 14, + DisableLZCNT = 1 << 15, + AllowAll = 1 << 16 + } +} From a15cf770f9dc5c40c3c761a0bec8611027e10d3f Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 9 Oct 2020 17:09:02 +0100 Subject: [PATCH 009/131] Fix build --- tests/Directory.Build.targets | 4 ++-- .../TestUtilities/FeatureTesting/FeatureTestRunner.cs | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/Directory.Build.targets b/tests/Directory.Build.targets index e9e93a855..4edc9fdff 100644 --- a/tests/Directory.Build.targets +++ b/tests/Directory.Build.targets @@ -30,8 +30,8 @@ - - + + diff --git a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs index 8b5ed8d48..172c03d5a 100644 --- a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs +++ b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs @@ -174,8 +174,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities { // Loop through and translate the given values into COMPlus equivaluents var features = new List(); - var split = intrinsics.ToString("G").Split(SplitChars, StringSplitOptions.RemoveEmptyEntries).ToArray(); - foreach (string intrinsic in split) + foreach (string intrinsic in intrinsics.ToString("G").Split(SplitChars, StringSplitOptions.RemoveEmptyEntries)) { switch (intrinsic) { From 10250ffc34f638b81099db36b5a1d6760ba50f73 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 9 Oct 2020 17:16:30 +0100 Subject: [PATCH 010/131] Test windows only --- .github/workflows/build-and-test.yml | 32 ++++++++++++++-------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 0e093a834..8fc2dd2bc 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -14,26 +14,26 @@ jobs: strategy: matrix: options: - - os: ubuntu-latest - framework: netcoreapp3.1 - runtime: -x64 - codecov: false - - os: windows-latest - framework: netcoreapp3.1 - runtime: -x64 - codecov: true + # - os: ubuntu-latest + # framework: netcoreapp3.1 + # runtime: -x64 + # codecov: false + # - os: windows-latest + # framework: netcoreapp3.1 + # runtime: -x64 + # codecov: true - os: windows-latest framework: netcoreapp2.1 runtime: -x64 codecov: false - - os: windows-latest - framework: net472 - runtime: -x64 - codecov: false - - os: windows-latest - framework: net472 - runtime: -x86 - codecov: false + # - os: windows-latest + # framework: net472 + # runtime: -x64 + # codecov: false + # - os: windows-latest + # framework: net472 + # runtime: -x86 + # codecov: false runs-on: ${{matrix.options.os}} if: "!contains(github.event.head_commit.message, '[skip ci]')" From 37b0585febdce8465211b15f2786f22afadfb259 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 9 Oct 2020 18:15:59 +0100 Subject: [PATCH 011/131] Use single test, enable runners --- .github/workflows/build-and-test.yml | 32 +++++++------- .../Formats/Jpg/Block8x8FTests.cs | 42 ------------------- 2 files changed, 16 insertions(+), 58 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 8fc2dd2bc..0e093a834 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -14,26 +14,26 @@ jobs: strategy: matrix: options: - # - os: ubuntu-latest - # framework: netcoreapp3.1 - # runtime: -x64 - # codecov: false - # - os: windows-latest - # framework: netcoreapp3.1 - # runtime: -x64 - # codecov: true + - os: ubuntu-latest + framework: netcoreapp3.1 + runtime: -x64 + codecov: false + - os: windows-latest + framework: netcoreapp3.1 + runtime: -x64 + codecov: true - os: windows-latest framework: netcoreapp2.1 runtime: -x64 codecov: false - # - os: windows-latest - # framework: net472 - # runtime: -x64 - # codecov: false - # - os: windows-latest - # framework: net472 - # runtime: -x86 - # codecov: false + - os: windows-latest + framework: net472 + runtime: -x64 + codecov: false + - os: windows-latest + framework: net472 + runtime: -x86 + codecov: false runs-on: ${{matrix.options.os}} if: "!contains(github.event.head_commit.message, '[skip ci]')" diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs index a09472b46..548238088 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs @@ -5,7 +5,6 @@ // #define BENCHMARKING using System; using System.Diagnostics; - using SixLabors.ImageSharp.Formats.Jpeg.Components; using SixLabors.ImageSharp.Tests.Formats.Jpg.Utils; using SixLabors.ImageSharp.Tests.TestUtilities; @@ -162,52 +161,11 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg // PrintLinearData((Span)mirror); } - [Fact] - public void TransposeIntoFallback() - { - float[] expected = Create8x8FloatData(); - ReferenceImplementations.Transpose8x8(expected); - - var source = default(Block8x8F); - source.LoadFrom(Create8x8FloatData()); - - var dest = default(Block8x8F); - source.TransposeIntoFallback(ref dest); - - float[] actual = new float[64]; - dest.ScaledCopyTo(actual); - - Assert.Equal(expected, actual); - } - -#if SUPPORTS_RUNTIME_INTRINSICS - [Fact] - public void TransposeIntoAvx() - { - float[] expected = Create8x8FloatData(); - ReferenceImplementations.Transpose8x8(expected); - - var source = default(Block8x8F); - source.LoadFrom(Create8x8FloatData()); - - var dest = default(Block8x8F); - source.TransposeIntoAvx(ref dest); - - float[] actual = new float[64]; - dest.ScaledCopyTo(actual); - - Assert.Equal(expected, actual); - } -#endif - [Fact] public void TransposeInto() { static void RunTest() { - // Just testing this fails in CI. RemoteExecutor is not working on my machine. - Assert.True(false); - float[] expected = Create8x8FloatData(); ReferenceImplementations.Transpose8x8(expected); From a7626e16dc36bd01eabe66c0ad3a41376de25b18 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 12 Oct 2020 14:13:05 +0100 Subject: [PATCH 012/131] Revert to 8e5a59f --- tests/Directory.Build.targets | 4 +- .../Formats/Jpg/Block8x8FTests.cs | 48 ++-- .../FeatureTesting/FeatureTestRunner.cs | 231 ------------------ 3 files changed, 33 insertions(+), 250 deletions(-) delete mode 100644 tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs diff --git a/tests/Directory.Build.targets b/tests/Directory.Build.targets index 4edc9fdff..e9e93a855 100644 --- a/tests/Directory.Build.targets +++ b/tests/Directory.Build.targets @@ -30,8 +30,8 @@ - - + + diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs index 548238088..73a68063c 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs @@ -5,9 +5,10 @@ // #define BENCHMARKING using System; using System.Diagnostics; + using SixLabors.ImageSharp.Formats.Jpeg.Components; using SixLabors.ImageSharp.Tests.Formats.Jpg.Utils; -using SixLabors.ImageSharp.Tests.TestUtilities; + using Xunit; using Xunit.Abstractions; @@ -162,29 +163,42 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg } [Fact] - public void TransposeInto() + public void TransposeIntoFallback() { - static void RunTest() - { - float[] expected = Create8x8FloatData(); - ReferenceImplementations.Transpose8x8(expected); + float[] expected = Create8x8FloatData(); + ReferenceImplementations.Transpose8x8(expected); - var source = default(Block8x8F); - source.LoadFrom(Create8x8FloatData()); + var source = default(Block8x8F); + source.LoadFrom(Create8x8FloatData()); - var dest = default(Block8x8F); - source.TransposeInto(ref dest); + var dest = default(Block8x8F); + source.TransposeIntoFallback(ref dest); - float[] actual = new float[64]; - dest.ScaledCopyTo(actual); + float[] actual = new float[64]; + dest.ScaledCopyTo(actual); - Assert.Equal(expected, actual); - } + Assert.Equal(expected, actual); + } - FeatureTestRunner.RunWithHwIntrinsicsFeature( - RunTest, - HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX); +#if SUPPORTS_RUNTIME_INTRINSICS + [Fact] + public void TransposeIntoAvx() + { + float[] expected = Create8x8FloatData(); + ReferenceImplementations.Transpose8x8(expected); + + var source = default(Block8x8F); + source.LoadFrom(Create8x8FloatData()); + + var dest = default(Block8x8F); + source.TransposeIntoAvx(ref dest); + + float[] actual = new float[64]; + dest.ScaledCopyTo(actual); + + Assert.Equal(expected, actual); } +#endif private class BufferHolder { diff --git a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs deleted file mode 100644 index 172c03d5a..000000000 --- a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs +++ /dev/null @@ -1,231 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -using System; -using System.Collections.Generic; -using System.Diagnostics; -using System.Linq; -using System.Numerics; -#if SUPPORTS_RUNTIME_INTRINSICS -using System.Runtime.Intrinsics.X86; -#endif -using Microsoft.DotNet.RemoteExecutor; -using Xunit; -using Xunit.Abstractions; - -namespace SixLabors.ImageSharp.Tests.TestUtilities -{ - /// - /// Allows the testing against specific feature sets. - /// - public static class FeatureTestRunner - { - private static readonly char[] SplitChars = new[] { ',', ' ' }; - - /// - /// Allows the deserialization of parameters passed to the feature test. - /// - /// - /// This is required because does not allow - /// marshalling of fields so we cannot pass a wrapped - /// allowing automatic deserialization. - /// - /// - /// - /// The type to deserialize to. - /// The string value to deserialize. - /// The value. - public static T Deserialize(string value) - where T : IXunitSerializable - => BasicSerializer.Deserialize(value); - - // TODO: Write runner test and use this. - private static void AssertHwIntrinsicsFeatureDisabled(HwIntrinsics intrinsics) - { - switch (intrinsics) - { - case HwIntrinsics.DisableSIMD: - Assert.False(Vector.IsHardwareAccelerated); - break; -#if SUPPORTS_RUNTIME_INTRINSICS - case HwIntrinsics.DisableHWIntrinsic: - Assert.False(Vector.IsHardwareAccelerated); - break; - case HwIntrinsics.DisableSSE: - Assert.False(Sse.IsSupported); - break; - case HwIntrinsics.DisableSSE2: - Assert.False(Sse2.IsSupported); - break; - case HwIntrinsics.DisableAES: - Assert.False(Aes.IsSupported); - break; - case HwIntrinsics.DisablePCLMULQDQ: - Assert.False(Pclmulqdq.IsSupported); - break; - case HwIntrinsics.DisableSSE3: - Assert.False(Sse3.IsSupported); - break; - case HwIntrinsics.DisableSSSE3: - Assert.False(Ssse3.IsSupported); - break; - case HwIntrinsics.DisableSSE41: - Assert.False(Sse41.IsSupported); - break; - case HwIntrinsics.DisableSSE42: - Assert.False(Sse42.IsSupported); - break; - case HwIntrinsics.DisablePOPCNT: - Assert.False(Popcnt.IsSupported); - break; - case HwIntrinsics.DisableAVX: - Assert.False(Avx.IsSupported); - break; - case HwIntrinsics.DisableFMA: - Assert.False(Fma.IsSupported); - break; - case HwIntrinsics.DisableAVX2: - Assert.False(Avx2.IsSupported); - break; - case HwIntrinsics.DisableBMI1: - Assert.False(Bmi1.IsSupported); - break; - case HwIntrinsics.DisableBMI2: - Assert.False(Bmi2.IsSupported); - break; - case HwIntrinsics.DisableLZCNT: - Assert.False(Lzcnt.IsSupported); - break; -#endif - } - } - - /// - /// Runs the given test within an environment - /// where the given features. - /// - /// The test action to run. - /// The intrinsics features. - public static void RunWithHwIntrinsicsFeature( - Action action, - HwIntrinsics intrinsics) - { - if (!RemoteExecutor.IsSupported) - { - return; - } - - foreach (string intrinsic in intrinsics.ToFeatureCollection()) - { - var processStartInfo = new ProcessStartInfo(); - if (intrinsic != nameof(HwIntrinsics.AllowAll)) - { - processStartInfo.Environment[$"COMPlus_{intrinsic}"] = "0"; - } - - RemoteExecutor.Invoke( - action, - new RemoteInvokeOptions - { - StartInfo = processStartInfo - }) - .Dispose(); - } - } - - /// - /// Runs the given test within an environment - /// where the given features. - /// - /// The test action to run. - /// The intrinsics features. - /// The value to pass as a parameter to the test action. - public static void RunWithHwIntrinsicsFeature( - Action action, - HwIntrinsics intrinsics, - T serializable) - where T : IXunitSerializable - { - if (!RemoteExecutor.IsSupported) - { - return; - } - - foreach (string intrinsic in intrinsics.ToFeatureCollection()) - { - var processStartInfo = new ProcessStartInfo(); - if (intrinsic != nameof(HwIntrinsics.AllowAll)) - { - processStartInfo.Environment[$"COMPlus_Enable{intrinsic}"] = "0"; - } - - RemoteExecutor.Invoke( - action, - BasicSerializer.Serialize(serializable), - new RemoteInvokeOptions - { - StartInfo = processStartInfo - }) - .Dispose(); - } - } - - private static IEnumerable ToFeatureCollection(this HwIntrinsics intrinsics) - { - // Loop through and translate the given values into COMPlus equivaluents - var features = new List(); - foreach (string intrinsic in intrinsics.ToString("G").Split(SplitChars, StringSplitOptions.RemoveEmptyEntries)) - { - switch (intrinsic) - { - case nameof(HwIntrinsics.DisableSIMD): - features.Add("FeatureSIMD"); - break; - - case nameof(HwIntrinsics.AllowAll): - - // Not a COMPlus value. We filter in calling method. - features.Add(nameof(HwIntrinsics.AllowAll)); - break; - - default: - features.Add(intrinsic.Replace("Disable", "Enable")); - break; - } - } - - return features; - } - } - - /// - /// See - /// - /// ends up impacting all SIMD support(including System.Numerics) - /// but not things like , , and . - /// - /// - [Flags] - public enum HwIntrinsics - { - // Use flags so we can pass multiple values without using params. - DisableSIMD = 0, - DisableHWIntrinsic = 1 << 0, - DisableSSE = 1 << 1, - DisableSSE2 = 1 << 2, - DisableAES = 1 << 3, - DisablePCLMULQDQ = 1 << 4, - DisableSSE3 = 1 << 5, - DisableSSSE3 = 1 << 6, - DisableSSE41 = 1 << 7, - DisableSSE42 = 1 << 8, - DisablePOPCNT = 1 << 9, - DisableAVX = 1 << 10, - DisableFMA = 1 << 11, - DisableAVX2 = 1 << 12, - DisableBMI1 = 1 << 13, - DisableBMI2 = 1 << 14, - DisableLZCNT = 1 << 15, - AllowAll = 1 << 16 - } -} From f08b68247d09c9cafe3cacacaa8de6927fca86f3 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 12 Oct 2020 14:46:04 +0100 Subject: [PATCH 013/131] Use Ubuntu for coverage. Touch #1376 --- .github/workflows/build-and-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 0e093a834..1422606a6 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -18,7 +18,7 @@ jobs: framework: netcoreapp3.1 runtime: -x64 codecov: false - - os: windows-latest + - os: ubuntu-latest framework: netcoreapp3.1 runtime: -x64 codecov: true From 90624b71fbb7b4bd2e84e38a5465f35db311826f Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 12 Oct 2020 14:57:54 +0100 Subject: [PATCH 014/131] Enable windows netcore 3.1 --- .github/workflows/build-and-test.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 1422606a6..c8f399794 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -17,11 +17,11 @@ jobs: - os: ubuntu-latest framework: netcoreapp3.1 runtime: -x64 - codecov: false - - os: ubuntu-latest + codecov: true + - os: windows-latest framework: netcoreapp3.1 runtime: -x64 - codecov: true + codecov: false - os: windows-latest framework: netcoreapp2.1 runtime: -x64 From f1f4d49c01c00216a356f8033878c4a7f9c7517b Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 12 Oct 2020 15:31:51 +0100 Subject: [PATCH 015/131] Try bumping SDK --- tests/Directory.Build.targets | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/Directory.Build.targets b/tests/Directory.Build.targets index e9e93a855..acdf68432 100644 --- a/tests/Directory.Build.targets +++ b/tests/Directory.Build.targets @@ -31,7 +31,7 @@ - + From e7988d21d248ae0984d08b14bfb6103461158d0a Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 12 Oct 2020 16:48:06 +0100 Subject: [PATCH 016/131] Update deterministic workaround --- Directory.Build.targets | 2 +- src/Directory.Build.targets | 25 +++++++++++++++++-------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/Directory.Build.targets b/Directory.Build.targets index 4e7ab9e6b..5d27adc65 100644 --- a/Directory.Build.targets +++ b/Directory.Build.targets @@ -18,7 +18,7 @@ - + diff --git a/src/Directory.Build.targets b/src/Directory.Build.targets index d1875262d..9b8be05b5 100644 --- a/src/Directory.Build.targets +++ b/src/Directory.Build.targets @@ -21,16 +21,25 @@ - + + $([System.IO.Path]::Combine('$(IntermediateOutputPath)','$(TargetFrameworkMoniker).AssemblyAttributes$(DefaultLanguageSourceExtension)')) + + + + + + + + + DependsOnTargets="InitializeSourceRootMappedPaths" + Returns="@(_LocalTopLevelSourceRoot)" + Condition="'$(DeterministicSourcePaths)' == 'true'"> <_LocalTopLevelSourceRoot Include="@(SourceRoot)" Condition="'%(SourceRoot.NestedRoot)' == ''"/> - + false @@ -62,7 +71,7 @@ - + @@ -74,7 +83,7 @@ SkipUnchangedFiles = "true" DestinationFolder="..\..\" /> - + - + From 9bbf90837306b84bbd432e6cba632fcd0f7fb40c Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 12 Oct 2020 17:03:39 +0100 Subject: [PATCH 017/131] Revert SDK bump --- Directory.Build.targets | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Directory.Build.targets b/Directory.Build.targets index 5d27adc65..4e7ab9e6b 100644 --- a/Directory.Build.targets +++ b/Directory.Build.targets @@ -18,7 +18,7 @@ - + From 8d3f5e79624041fc87ba04ad2c5f1a3a30ffe6a8 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 12 Oct 2020 18:06:51 +0100 Subject: [PATCH 018/131] Use coverlet nightlies --- Directory.Build.props | 1 + tests/Directory.Build.targets | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Directory.Build.props b/Directory.Build.props index 0f9c5bdde..c4610d0ed 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -120,6 +120,7 @@ https://api.nuget.org/v3/index.json; https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-eng/nuget/v3/index.json; + https://www.myget.org/F/coverlet-dev/api/v3/index.json; true $(MSBuildThisFileDirectory)shared-infrastructure/SixLabors.snk diff --git a/tests/Directory.Build.targets b/tests/Directory.Build.targets index acdf68432..9ee4e2a2e 100644 --- a/tests/Directory.Build.targets +++ b/tests/Directory.Build.targets @@ -28,7 +28,7 @@ - + From f71d7a9cb30d0124e7949f44c88bc3022b6a1ae5 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 12 Oct 2020 19:07:42 +0100 Subject: [PATCH 019/131] Update Directory.Build.targets --- tests/Directory.Build.targets | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/Directory.Build.targets b/tests/Directory.Build.targets index 9ee4e2a2e..335f3d106 100644 --- a/tests/Directory.Build.targets +++ b/tests/Directory.Build.targets @@ -31,7 +31,7 @@ - + From 87c7d3f89c5a563befde2b759b8e8c966ba75562 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 12 Oct 2020 23:06:32 +0100 Subject: [PATCH 020/131] Update test 3rd party libraries --- tests/Directory.Build.targets | 6 +++--- tests/ImageSharp.Tests/Formats/Tga/TgaTestUtils.cs | 6 +++--- .../ReferenceCodecs/MagickReferenceDecoder.cs | 10 +++++----- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/Directory.Build.targets b/tests/Directory.Build.targets index 335f3d106..b4e815eb4 100644 --- a/tests/Directory.Build.targets +++ b/tests/Directory.Build.targets @@ -29,12 +29,12 @@ - + - + - + diff --git a/tests/ImageSharp.Tests/Formats/Tga/TgaTestUtils.cs b/tests/ImageSharp.Tests/Formats/Tga/TgaTestUtils.cs index 0f76d9931..58ed31e61 100644 --- a/tests/ImageSharp.Tests/Formats/Tga/TgaTestUtils.cs +++ b/tests/ImageSharp.Tests/Formats/Tga/TgaTestUtils.cs @@ -18,7 +18,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Tga Image image, bool useExactComparer = true, float compareTolerance = 0.01f) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { string path = TestImageProvider.GetFilePathOrNull(provider); if (path == null) @@ -39,7 +39,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Tga } public static Image DecodeWithMagick(Configuration configuration, FileInfo fileInfo) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { using (var magickImage = new MagickImage(fileInfo)) { @@ -48,7 +48,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Tga Assert.True(result.TryGetSinglePixelSpan(out Span resultPixels)); - using (IPixelCollection pixels = magickImage.GetPixelsUnsafe()) + using (IUnsafePixelCollection pixels = magickImage.GetPixelsUnsafe()) { byte[] data = pixels.ToByteArray(PixelMapping.RGBA); diff --git a/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs b/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs index de8278a33..bb8407f19 100644 --- a/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs +++ b/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs @@ -18,7 +18,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs public static MagickReferenceDecoder Instance { get; } = new MagickReferenceDecoder(); private static void FromRgba32Bytes(Configuration configuration, Span rgbaBytes, IMemoryGroup destinationGroup) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { foreach (Memory m in destinationGroup) { @@ -33,7 +33,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs } private static void FromRgba64Bytes(Configuration configuration, Span rgbaBytes, IMemoryGroup destinationGroup) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { foreach (Memory m in destinationGroup) { @@ -48,17 +48,17 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs } public Task> DecodeAsync(Configuration configuration, Stream stream, CancellationToken cancellationToken) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel => Task.FromResult(this.Decode(configuration, stream)); public Image Decode(Configuration configuration, Stream stream) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { using var magickImage = new MagickImage(stream); var result = new Image(configuration, magickImage.Width, magickImage.Height); MemoryGroup resultPixels = result.GetRootFramePixelBuffer().FastMemoryGroup; - using (IPixelCollection pixels = magickImage.GetPixelsUnsafe()) + using (IUnsafePixelCollection pixels = magickImage.GetPixelsUnsafe()) { if (magickImage.Depth == 8) { From 8f94dc29c7a06c3694d6bb18c07a5d0dda81f43c Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 12 Oct 2020 23:13:55 +0100 Subject: [PATCH 021/131] Update Test SDK. --- tests/Directory.Build.targets | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/Directory.Build.targets b/tests/Directory.Build.targets index b4e815eb4..7a2ed7061 100644 --- a/tests/Directory.Build.targets +++ b/tests/Directory.Build.targets @@ -31,7 +31,7 @@ - + From cb87ae166480321a3cf28df5d7411170ddc5b388 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 12 Oct 2020 23:42:15 +0100 Subject: [PATCH 022/131] Tweak decode cancel timings --- tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.cs index 78218aec9..088421549 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.cs @@ -129,10 +129,10 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg [Theory] [InlineData(TestImages.Jpeg.Baseline.Jpeg420Small, 0)] [InlineData(TestImages.Jpeg.Issues.ExifGetString750Transform, 1)] - [InlineData(TestImages.Jpeg.Issues.ExifGetString750Transform, 10)] + [InlineData(TestImages.Jpeg.Issues.ExifGetString750Transform, 15)] [InlineData(TestImages.Jpeg.Issues.ExifGetString750Transform, 30)] [InlineData(TestImages.Jpeg.Issues.BadRstProgressive518, 1)] - [InlineData(TestImages.Jpeg.Issues.BadRstProgressive518, 10)] + [InlineData(TestImages.Jpeg.Issues.BadRstProgressive518, 15)] [InlineData(TestImages.Jpeg.Issues.BadRstProgressive518, 30)] public async Task Decode_IsCancellable(string fileName, int cancellationDelayMs) { From ad21822ff9a63ff0a3267b58b2b7555ca534c4ea Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 12 Oct 2020 23:42:22 +0100 Subject: [PATCH 023/131] Bump src deps. --- Directory.Build.targets | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/Directory.Build.targets b/Directory.Build.targets index 4e7ab9e6b..91d5a5d1f 100644 --- a/Directory.Build.targets +++ b/Directory.Build.targets @@ -24,16 +24,12 @@ - + - - + From e356bdecac2d344d010797fe19f38a4ae01226bb Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Tue, 13 Oct 2020 00:19:23 +0100 Subject: [PATCH 024/131] Fix missing dll.config file The file only exists if there are binding redirects. --- tests/ImageSharp.Tests/TestUtilities/TestEnvironment.cs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.cs b/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.cs index 1375b5763..48728faf0 100644 --- a/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.cs +++ b/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.cs @@ -170,7 +170,10 @@ namespace SixLabors.ImageSharp.Tests } string testProjectConfigPath = TestAssemblyFile.FullName + ".config"; - File.Copy(testProjectConfigPath, remoteExecutorConfigPath); + if (File.Exists(testProjectConfigPath)) + { + File.Copy(testProjectConfigPath, remoteExecutorConfigPath); + } if (Is64BitProcess) { From 31f7480c29c42d649606fa2db2b6c781dfe327fb Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Tue, 13 Oct 2020 00:19:35 +0100 Subject: [PATCH 025/131] Update xunit runner --- tests/Directory.Build.targets | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/Directory.Build.targets b/tests/Directory.Build.targets index 7a2ed7061..f9efbf79f 100644 --- a/tests/Directory.Build.targets +++ b/tests/Directory.Build.targets @@ -37,7 +37,7 @@ - + From c15a552f9aaf2019783f453c83081d5adcd4bd93 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Tue, 13 Oct 2020 18:04:03 +0100 Subject: [PATCH 026/131] Try System.Drawing BMP Decoder everywhere. --- .../TestUtilities/TestEnvironment.Formats.cs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.Formats.cs b/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.Formats.cs index 6e204e2d4..5370506dd 100644 --- a/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.Formats.cs +++ b/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.Formats.cs @@ -69,7 +69,10 @@ namespace SixLabors.ImageSharp.Tests cfg.ConfigureCodecs( BmpFormat.Instance, - IsWindows ? (IImageDecoder)SystemDrawingReferenceDecoder.Instance : MagickReferenceDecoder.Instance, + + // Try SD Bitmap decoder everywhere. + // IsWindows ? (IImageDecoder)SystemDrawingReferenceDecoder.Instance : MagickReferenceDecoder.Instance, + SystemDrawingReferenceDecoder.Instance, bmpEncoder, new BmpImageFormatDetector()); From 9ab1200f4c0e00d8e9c12a5cda66439548e971bd Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Tue, 13 Oct 2020 21:06:33 +0100 Subject: [PATCH 027/131] Revert "Try System.Drawing BMP Decoder everywhere." This reverts commit 94ba508fd7d07beec309af00fbd0bcbd409dcc09. --- .../TestUtilities/TestEnvironment.Formats.cs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.Formats.cs b/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.Formats.cs index 5370506dd..6e204e2d4 100644 --- a/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.Formats.cs +++ b/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.Formats.cs @@ -69,10 +69,7 @@ namespace SixLabors.ImageSharp.Tests cfg.ConfigureCodecs( BmpFormat.Instance, - - // Try SD Bitmap decoder everywhere. - // IsWindows ? (IImageDecoder)SystemDrawingReferenceDecoder.Instance : MagickReferenceDecoder.Instance, - SystemDrawingReferenceDecoder.Instance, + IsWindows ? (IImageDecoder)SystemDrawingReferenceDecoder.Instance : MagickReferenceDecoder.Instance, bmpEncoder, new BmpImageFormatDetector()); From 7c4a8a1a6fbf0364f9651c737991552468e57ce2 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Tue, 13 Oct 2020 23:41:29 +0100 Subject: [PATCH 028/131] Skip tests on Linux with known Magick issue --- .runsettings | 6 ++++++ Directory.Build.props | 1 + ImageSharp.sln | 1 + tests/Directory.Build.targets | 5 +++-- tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs | 1 + tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs | 6 ++++++ tests/ImageSharp.Tests/ImageSharp.Tests.csproj | 1 + 7 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 .runsettings diff --git a/.runsettings b/.runsettings new file mode 100644 index 000000000..7af0056c8 --- /dev/null +++ b/.runsettings @@ -0,0 +1,6 @@ + + + + category!=failing + + diff --git a/Directory.Build.props b/Directory.Build.props index c4610d0ed..bb97810a8 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -15,6 +15,7 @@ $(MSBuildThisFileDirectory)artifacts/ $(SixLaborsProjectCategory)/$(MSBuildProjectName) https://github.com/SixLabors/ImageSharp/ + $(MSBuildThisFileDirectory)/.runsettings diff --git a/ImageSharp.sln b/ImageSharp.sln index 509dcf96b..b1d3176ad 100644 --- a/ImageSharp.sln +++ b/ImageSharp.sln @@ -9,6 +9,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution .gitattributes = .gitattributes .gitignore = .gitignore .gitmodules = .gitmodules + .runsettings = .runsettings ci-build.ps1 = ci-build.ps1 ci-pack.ps1 = ci-pack.ps1 ci-test.ps1 = ci-test.ps1 diff --git a/tests/Directory.Build.targets b/tests/Directory.Build.targets index f9efbf79f..1f699c9dd 100644 --- a/tests/Directory.Build.targets +++ b/tests/Directory.Build.targets @@ -26,11 +26,12 @@ - + - + + diff --git a/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs index 3f767620a..5015faa70 100644 --- a/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs @@ -339,6 +339,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp } } + [ActiveIssue("https://github.com/SixLabors/ImageSharp/issues/1380", TestPlatforms.Linux)] [Theory] [WithFile(WinBmpv2, PixelTypes.Rgba32)] [WithFile(CoreHeader, PixelTypes.Rgba32)] diff --git a/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs b/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs index b05486e35..d6839198d 100644 --- a/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs @@ -152,18 +152,21 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp public void Encode_16Bit_WithV4Header_Works(TestImageProvider provider, BmpBitsPerPixel bitsPerPixel) where TPixel : unmanaged, IPixel => TestBmpEncoderCore(provider, bitsPerPixel, supportTransparency: true); + [ActiveIssue("https://github.com/SixLabors/ImageSharp/issues/1380", TestPlatforms.Linux)] [Theory] [WithFile(WinBmpv5, PixelTypes.Rgba32, BmpBitsPerPixel.Pixel8)] [WithFile(Bit8Palette4, PixelTypes.Rgba32, BmpBitsPerPixel.Pixel8)] public void Encode_8Bit_WithV3Header_Works(TestImageProvider provider, BmpBitsPerPixel bitsPerPixel) where TPixel : unmanaged, IPixel => TestBmpEncoderCore(provider, bitsPerPixel, supportTransparency: false); + [ActiveIssue("https://github.com/SixLabors/ImageSharp/issues/1380", TestPlatforms.Linux)] [Theory] [WithFile(WinBmpv5, PixelTypes.Rgba32, BmpBitsPerPixel.Pixel8)] [WithFile(Bit8Palette4, PixelTypes.Rgba32, BmpBitsPerPixel.Pixel8)] public void Encode_8Bit_WithV4Header_Works(TestImageProvider provider, BmpBitsPerPixel bitsPerPixel) where TPixel : unmanaged, IPixel => TestBmpEncoderCore(provider, bitsPerPixel, supportTransparency: true); + [ActiveIssue("https://github.com/SixLabors/ImageSharp/issues/1380", TestPlatforms.Linux)] [Theory] [WithFile(Bit8Gs, PixelTypes.L8, BmpBitsPerPixel.Pixel8)] public void Encode_8BitGray_WithV3Header_Works(TestImageProvider provider, BmpBitsPerPixel bitsPerPixel) @@ -173,6 +176,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp bitsPerPixel, supportTransparency: false); + [ActiveIssue("https://github.com/SixLabors/ImageSharp/issues/1380", TestPlatforms.Linux)] [Theory] [WithFile(Bit8Gs, PixelTypes.L8, BmpBitsPerPixel.Pixel8)] public void Encode_8BitGray_WithV4Header_Works(TestImageProvider provider, BmpBitsPerPixel bitsPerPixel) @@ -182,6 +186,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp bitsPerPixel, supportTransparency: true); + [ActiveIssue("https://github.com/SixLabors/ImageSharp/issues/1380", TestPlatforms.Linux)] [Theory] [WithFile(Bit32Rgb, PixelTypes.Rgba32)] public void Encode_8BitColor_WithWuQuantizer(TestImageProvider provider) @@ -208,6 +213,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp } } + [ActiveIssue("https://github.com/SixLabors/ImageSharp/issues/1380", TestPlatforms.Linux)] [Theory] [WithFile(Bit32Rgb, PixelTypes.Rgba32)] public void Encode_8BitColor_WithOctreeQuantizer(TestImageProvider provider) diff --git a/tests/ImageSharp.Tests/ImageSharp.Tests.csproj b/tests/ImageSharp.Tests/ImageSharp.Tests.csproj index ba849ab25..07ade97d5 100644 --- a/tests/ImageSharp.Tests/ImageSharp.Tests.csproj +++ b/tests/ImageSharp.Tests/ImageSharp.Tests.csproj @@ -20,6 +20,7 @@ + From 4454395cd8b2407627f542749f9f5d564a5eb3ff Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 14 Oct 2020 00:13:05 +0100 Subject: [PATCH 029/131] Add rule to coverlet settings. --- .runsettings | 1 + tests/coverlet.runsettings | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/.runsettings b/.runsettings index 7af0056c8..ca48342bd 100644 --- a/.runsettings +++ b/.runsettings @@ -1,3 +1,4 @@ + diff --git a/tests/coverlet.runsettings b/tests/coverlet.runsettings index ee408a5f0..cffce3540 100644 --- a/tests/coverlet.runsettings +++ b/tests/coverlet.runsettings @@ -1,5 +1,9 @@ + + + category!=failing + From 722a014f12c1e3136cb14b1bbd30a1b7611b3ee3 Mon Sep 17 00:00:00 2001 From: Dirk Lemstra Date: Wed, 14 Oct 2020 11:51:33 +0200 Subject: [PATCH 030/131] Upgraded Magick.NET. --- tests/Directory.Build.targets | 2 +- .../Formats/Tga/TgaTestUtils.cs | 6 +++--- .../ReferenceCodecs/MagickReferenceDecoder.cs | 19 ++++++++++++++----- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/tests/Directory.Build.targets b/tests/Directory.Build.targets index 335f3d106..ef67b122e 100644 --- a/tests/Directory.Build.targets +++ b/tests/Directory.Build.targets @@ -29,7 +29,7 @@ - + diff --git a/tests/ImageSharp.Tests/Formats/Tga/TgaTestUtils.cs b/tests/ImageSharp.Tests/Formats/Tga/TgaTestUtils.cs index 0f76d9931..58ed31e61 100644 --- a/tests/ImageSharp.Tests/Formats/Tga/TgaTestUtils.cs +++ b/tests/ImageSharp.Tests/Formats/Tga/TgaTestUtils.cs @@ -18,7 +18,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Tga Image image, bool useExactComparer = true, float compareTolerance = 0.01f) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { string path = TestImageProvider.GetFilePathOrNull(provider); if (path == null) @@ -39,7 +39,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Tga } public static Image DecodeWithMagick(Configuration configuration, FileInfo fileInfo) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { using (var magickImage = new MagickImage(fileInfo)) { @@ -48,7 +48,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Tga Assert.True(result.TryGetSinglePixelSpan(out Span resultPixels)); - using (IPixelCollection pixels = magickImage.GetPixelsUnsafe()) + using (IUnsafePixelCollection pixels = magickImage.GetPixelsUnsafe()) { byte[] data = pixels.ToByteArray(PixelMapping.RGBA); diff --git a/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs b/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs index de8278a33..615e91890 100644 --- a/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs +++ b/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs @@ -7,6 +7,7 @@ using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; using ImageMagick; +using ImageMagick.Formats.Bmp; using SixLabors.ImageSharp.Formats; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; @@ -18,7 +19,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs public static MagickReferenceDecoder Instance { get; } = new MagickReferenceDecoder(); private static void FromRgba32Bytes(Configuration configuration, Span rgbaBytes, IMemoryGroup destinationGroup) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { foreach (Memory m in destinationGroup) { @@ -33,7 +34,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs } private static void FromRgba64Bytes(Configuration configuration, Span rgbaBytes, IMemoryGroup destinationGroup) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { foreach (Memory m in destinationGroup) { @@ -48,17 +49,25 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs } public Task> DecodeAsync(Configuration configuration, Stream stream, CancellationToken cancellationToken) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel => Task.FromResult(this.Decode(configuration, stream)); public Image Decode(Configuration configuration, Stream stream) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { + var bmpReadDefines = new BmpReadDefines + { + IgnoreFileSize = true + }; + + var settings = new MagickReadSettings(); + settings.SetDefines(bmpReadDefines); + using var magickImage = new MagickImage(stream); var result = new Image(configuration, magickImage.Width, magickImage.Height); MemoryGroup resultPixels = result.GetRootFramePixelBuffer().FastMemoryGroup; - using (IPixelCollection pixels = magickImage.GetPixelsUnsafe()) + using (IUnsafePixelCollection pixels = magickImage.GetPixelsUnsafe()) { if (magickImage.Depth == 8) { From 97d6413640e062c3453c076b47f8c2d1e859ef0a Mon Sep 17 00:00:00 2001 From: Dirk Lemstra Date: Wed, 14 Oct 2020 12:22:58 +0200 Subject: [PATCH 031/131] Use settings in the constructor. --- .../TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs b/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs index 615e91890..d4c35a1ad 100644 --- a/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs +++ b/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs @@ -63,7 +63,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs var settings = new MagickReadSettings(); settings.SetDefines(bmpReadDefines); - using var magickImage = new MagickImage(stream); + using var magickImage = new MagickImage(stream, settings); var result = new Image(configuration, magickImage.Width, magickImage.Height); MemoryGroup resultPixels = result.GetRootFramePixelBuffer().FastMemoryGroup; From 08771fe8abb5085ffbfddbd2ec1b4df3ca11f37e Mon Sep 17 00:00:00 2001 From: Dirk Lemstra Date: Wed, 14 Oct 2020 13:53:32 +0000 Subject: [PATCH 032/131] Update tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs Co-authored-by: James Jackson-South --- .../TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs b/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs index d4c35a1ad..dda5f751c 100644 --- a/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs +++ b/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs @@ -56,6 +56,12 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { var bmpReadDefines = new BmpReadDefines + { + // See https://github.com/SixLabors/ImageSharp/issues/1380 + // Validation fails on Ubuntu despite identical header generation + // on all platforms. + IgnoreFileSize = !TestEnvironment.IsWindows + }; { IgnoreFileSize = true }; From b7185acf454a88130d53a329618512e4fad0c666 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 14 Oct 2020 15:50:13 +0100 Subject: [PATCH 033/131] Fix bad merge --- .../TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs b/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs index dda5f751c..8efc9a023 100644 --- a/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs +++ b/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs @@ -62,9 +62,6 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs // on all platforms. IgnoreFileSize = !TestEnvironment.IsWindows }; - { - IgnoreFileSize = true - }; var settings = new MagickReadSettings(); settings.SetDefines(bmpReadDefines); From 4992c87323916803ad35f095a33bb2ca8aa1b9ec Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 14 Oct 2020 16:56:48 +0100 Subject: [PATCH 034/131] Remove ActiveIssue filters --- tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs | 1 - tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs | 6 ------ 2 files changed, 7 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs index 5015faa70..3f767620a 100644 --- a/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs @@ -339,7 +339,6 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp } } - [ActiveIssue("https://github.com/SixLabors/ImageSharp/issues/1380", TestPlatforms.Linux)] [Theory] [WithFile(WinBmpv2, PixelTypes.Rgba32)] [WithFile(CoreHeader, PixelTypes.Rgba32)] diff --git a/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs b/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs index d6839198d..b05486e35 100644 --- a/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs @@ -152,21 +152,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp public void Encode_16Bit_WithV4Header_Works(TestImageProvider provider, BmpBitsPerPixel bitsPerPixel) where TPixel : unmanaged, IPixel => TestBmpEncoderCore(provider, bitsPerPixel, supportTransparency: true); - [ActiveIssue("https://github.com/SixLabors/ImageSharp/issues/1380", TestPlatforms.Linux)] [Theory] [WithFile(WinBmpv5, PixelTypes.Rgba32, BmpBitsPerPixel.Pixel8)] [WithFile(Bit8Palette4, PixelTypes.Rgba32, BmpBitsPerPixel.Pixel8)] public void Encode_8Bit_WithV3Header_Works(TestImageProvider provider, BmpBitsPerPixel bitsPerPixel) where TPixel : unmanaged, IPixel => TestBmpEncoderCore(provider, bitsPerPixel, supportTransparency: false); - [ActiveIssue("https://github.com/SixLabors/ImageSharp/issues/1380", TestPlatforms.Linux)] [Theory] [WithFile(WinBmpv5, PixelTypes.Rgba32, BmpBitsPerPixel.Pixel8)] [WithFile(Bit8Palette4, PixelTypes.Rgba32, BmpBitsPerPixel.Pixel8)] public void Encode_8Bit_WithV4Header_Works(TestImageProvider provider, BmpBitsPerPixel bitsPerPixel) where TPixel : unmanaged, IPixel => TestBmpEncoderCore(provider, bitsPerPixel, supportTransparency: true); - [ActiveIssue("https://github.com/SixLabors/ImageSharp/issues/1380", TestPlatforms.Linux)] [Theory] [WithFile(Bit8Gs, PixelTypes.L8, BmpBitsPerPixel.Pixel8)] public void Encode_8BitGray_WithV3Header_Works(TestImageProvider provider, BmpBitsPerPixel bitsPerPixel) @@ -176,7 +173,6 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp bitsPerPixel, supportTransparency: false); - [ActiveIssue("https://github.com/SixLabors/ImageSharp/issues/1380", TestPlatforms.Linux)] [Theory] [WithFile(Bit8Gs, PixelTypes.L8, BmpBitsPerPixel.Pixel8)] public void Encode_8BitGray_WithV4Header_Works(TestImageProvider provider, BmpBitsPerPixel bitsPerPixel) @@ -186,7 +182,6 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp bitsPerPixel, supportTransparency: true); - [ActiveIssue("https://github.com/SixLabors/ImageSharp/issues/1380", TestPlatforms.Linux)] [Theory] [WithFile(Bit32Rgb, PixelTypes.Rgba32)] public void Encode_8BitColor_WithWuQuantizer(TestImageProvider provider) @@ -213,7 +208,6 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp } } - [ActiveIssue("https://github.com/SixLabors/ImageSharp/issues/1380", TestPlatforms.Linux)] [Theory] [WithFile(Bit32Rgb, PixelTypes.Rgba32)] public void Encode_8BitColor_WithOctreeQuantizer(TestImageProvider provider) From 11f2c9666734779daa99d7a43c5e6bb31b4f5846 Mon Sep 17 00:00:00 2001 From: Dirk Lemstra Date: Wed, 14 Oct 2020 18:25:17 +0200 Subject: [PATCH 035/131] Reverted using the read defines. --- .../ReferenceCodecs/MagickReferenceDecoder.cs | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs b/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs index 8efc9a023..bb8407f19 100644 --- a/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs +++ b/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs @@ -7,7 +7,6 @@ using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; using ImageMagick; -using ImageMagick.Formats.Bmp; using SixLabors.ImageSharp.Formats; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; @@ -55,18 +54,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs public Image Decode(Configuration configuration, Stream stream) where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { - var bmpReadDefines = new BmpReadDefines - { - // See https://github.com/SixLabors/ImageSharp/issues/1380 - // Validation fails on Ubuntu despite identical header generation - // on all platforms. - IgnoreFileSize = !TestEnvironment.IsWindows - }; - - var settings = new MagickReadSettings(); - settings.SetDefines(bmpReadDefines); - - using var magickImage = new MagickImage(stream, settings); + using var magickImage = new MagickImage(stream); var result = new Image(configuration, magickImage.Width, magickImage.Height); MemoryGroup resultPixels = result.GetRootFramePixelBuffer().FastMemoryGroup; From bf7c00441cac8104e648fdff70b898f64aa31ed1 Mon Sep 17 00:00:00 2001 From: Dirk Lemstra Date: Wed, 14 Oct 2020 18:25:50 +0200 Subject: [PATCH 036/131] The colorPaletteSize is also part of the file size. --- src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs b/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs index eb29c4405..454440f63 100644 --- a/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs +++ b/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs @@ -171,7 +171,7 @@ namespace SixLabors.ImageSharp.Formats.Bmp var fileHeader = new BmpFileHeader( type: BmpConstants.TypeMarkers.Bitmap, - fileSize: BmpFileHeader.Size + infoHeaderSize + infoHeader.ImageSize, + fileSize: BmpFileHeader.Size + infoHeaderSize + colorPaletteSize + infoHeader.ImageSize, reserved: 0, offset: BmpFileHeader.Size + infoHeaderSize + colorPaletteSize); From 8d12e539a94e41eb44db11392371711809a1dc49 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 14 Oct 2020 21:02:50 +0100 Subject: [PATCH 037/131] Allow selectively bypassing bmp verification. --- .../Formats/Bmp/BmpDecoderTests.cs | 4 +++- .../Formats/Bmp/BmpEncoderTests.cs | 22 +++++++++++++++--- .../ReferenceCodecs/MagickReferenceDecoder.cs | 23 ++++++++++++++++++- 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs index 3f767620a..0c7c9a007 100644 --- a/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs @@ -348,7 +348,9 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp using (Image image = provider.GetImage(BmpDecoder)) { image.DebugSave(provider); - image.CompareToOriginal(provider); + + // Do not validate. Reference files will fail validation. + image.CompareToOriginal(provider, new MagickReferenceDecoder(false)); } } diff --git a/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs b/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs index b05486e35..83b67a01a 100644 --- a/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs @@ -10,7 +10,7 @@ using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.Processing; using SixLabors.ImageSharp.Processing.Processors.Quantization; using SixLabors.ImageSharp.Tests.TestUtilities.ImageComparison; - +using SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs; using Xunit; using Xunit.Abstractions; @@ -200,10 +200,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp Quantizer = new WuQuantizer() }; string actualOutputFile = provider.Utility.SaveTestOutputFile(image, "bmp", encoder, appendPixelTypeToFileName: false); + + // Use the default decoder to test our encoded image. This verifies the content. + // We do not verify the reference image though as some are invalid. IImageDecoder referenceDecoder = TestEnvironment.GetReferenceDecoder(actualOutputFile); using (var referenceImage = Image.Load(actualOutputFile, referenceDecoder)) { - referenceImage.CompareToReferenceOutput(ImageComparer.TolerantPercentage(0.01f), provider, extension: "bmp", appendPixelTypeToFileName: false); + referenceImage.CompareToReferenceOutput( + ImageComparer.TolerantPercentage(0.01f), + provider, + extension: "bmp", + appendPixelTypeToFileName: false, + decoder: new MagickReferenceDecoder(false)); } } } @@ -226,10 +234,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp Quantizer = new OctreeQuantizer() }; string actualOutputFile = provider.Utility.SaveTestOutputFile(image, "bmp", encoder, appendPixelTypeToFileName: false); + + // Use the default decoder to test our encoded image. This verifies the content. + // We do not verify the reference image though as some are invalid. IImageDecoder referenceDecoder = TestEnvironment.GetReferenceDecoder(actualOutputFile); using (var referenceImage = Image.Load(actualOutputFile, referenceDecoder)) { - referenceImage.CompareToReferenceOutput(ImageComparer.TolerantPercentage(0.01f), provider, extension: "bmp", appendPixelTypeToFileName: false); + referenceImage.CompareToReferenceOutput( + ImageComparer.TolerantPercentage(0.01f), + provider, + extension: "bmp", + appendPixelTypeToFileName: false, + decoder: new MagickReferenceDecoder(false)); } } } diff --git a/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs b/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs index bb8407f19..d20e7d2a7 100644 --- a/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs +++ b/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs @@ -7,6 +7,7 @@ using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; using ImageMagick; +using ImageMagick.Formats.Bmp; using SixLabors.ImageSharp.Formats; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; @@ -15,6 +16,18 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs { public class MagickReferenceDecoder : IImageDecoder { + private readonly bool validate; + + public MagickReferenceDecoder() + : this(true) + { + } + + public MagickReferenceDecoder(bool validate) + { + this.validate = validate; + } + public static MagickReferenceDecoder Instance { get; } = new MagickReferenceDecoder(); private static void FromRgba32Bytes(Configuration configuration, Span rgbaBytes, IMemoryGroup destinationGroup) @@ -54,7 +67,15 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs public Image Decode(Configuration configuration, Stream stream) where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { - using var magickImage = new MagickImage(stream); + var bmpReadDefines = new BmpReadDefines + { + IgnoreFileSize = !this.validate + }; + + var settings = new MagickReadSettings(); + settings.SetDefines(bmpReadDefines); + + using var magickImage = new MagickImage(stream, settings); var result = new Image(configuration, magickImage.Width, magickImage.Height); MemoryGroup resultPixels = result.GetRootFramePixelBuffer().FastMemoryGroup; From 7909bc1e6529c913ead01fca055193d1af21940f Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 14 Oct 2020 22:25:50 +0100 Subject: [PATCH 038/131] Disable failing RemoteExecutor tests on NETFX. --- Directory.Build.targets | 2 +- .../Formats/Gif/GifDecoderTests.cs | 1 + .../Jpg/JpegDecoderTests.Progressive.cs | 1 + .../Formats/Png/PngDecoderTests.cs | 1 + .../Formats/Png/PngEncoderTests.cs | 1 + .../Formats/Tga/TgaDecoderTests.cs | 1 + .../Processors/Convolution/BokehBlurTest.cs | 59 +++++-------------- 7 files changed, 20 insertions(+), 46 deletions(-) diff --git a/Directory.Build.targets b/Directory.Build.targets index 91d5a5d1f..2a7d25b97 100644 --- a/Directory.Build.targets +++ b/Directory.Build.targets @@ -18,7 +18,7 @@ - + diff --git a/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs index 63aae5c55..2e2da8d6a 100644 --- a/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs @@ -195,6 +195,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Gif Assert.IsType(ex.InnerException); } + [ActiveIssue("https://github.com/dotnet/arcade/issues/6393", TargetFrameworkMonikers.NetFramework)] [Theory] [WithFile(TestImages.Gif.Giphy, PixelTypes.Rgba32)] [WithFile(TestImages.Gif.Kumin, PixelTypes.Rgba32)] diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs index e29d8f158..62873a1c7 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs @@ -13,6 +13,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg { public const string DecodeProgressiveJpegOutputName = "DecodeProgressiveJpeg"; + [ActiveIssue("https://github.com/dotnet/arcade/issues/6393", TargetFrameworkMonikers.NetFramework)] [Theory] [WithFileCollection(nameof(ProgressiveTestJpegs), PixelTypes.Rgba32, false)] [WithFile(TestImages.Jpeg.Progressive.Progress, PixelTypes.Rgba32, true)] diff --git a/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs index 5b6adfe1a..80ca74719 100644 --- a/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs @@ -401,6 +401,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png Assert.IsType(ex.InnerException); } + [ActiveIssue("https://github.com/dotnet/arcade/issues/6393", TargetFrameworkMonikers.NetFramework)] [Theory] [WithFile(TestImages.Png.Splash, PixelTypes.Rgba32)] [WithFile(TestImages.Png.Bike, PixelTypes.Rgba32)] diff --git a/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs index b9f5f16fa..47419d47b 100644 --- a/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs @@ -534,6 +534,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png } } + [ActiveIssue("https://github.com/dotnet/arcade/issues/6393", TargetFrameworkMonikers.NetFramework)] [Theory] [WithTestPatternImages(100, 100, PixelTypes.Rgba32)] public void EncodeWorksWithoutSsse3Intrinsics(TestImageProvider provider) diff --git a/tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs index 5fb15541e..7514823fe 100644 --- a/tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs @@ -744,6 +744,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Tga Assert.IsType(ex.InnerException); } + [ActiveIssue("https://github.com/dotnet/arcade/issues/6393", TargetFrameworkMonikers.NetFramework)] [Theory] [WithFile(Bit24BottomLeft, PixelTypes.Rgba32)] [WithFile(Bit32BottomLeft, PixelTypes.Rgba32)] diff --git a/tests/ImageSharp.Tests/Processing/Processors/Convolution/BokehBlurTest.cs b/tests/ImageSharp.Tests/Processing/Processors/Convolution/BokehBlurTest.cs index 50b8782e4..6c48cf843 100644 --- a/tests/ImageSharp.Tests/Processing/Processors/Convolution/BokehBlurTest.cs +++ b/tests/ImageSharp.Tests/Processing/Processors/Convolution/BokehBlurTest.cs @@ -138,21 +138,10 @@ namespace SixLabors.ImageSharp.Tests.Processing.Processors.Convolution public void BokehBlurFilterProcessor(TestImageProvider provider, BokehBlurInfo value) where TPixel : unmanaged, IPixel { - static void RunTest(string providerDump, string infoDump) - { - TestImageProvider provider = - BasicSerializer.Deserialize>(providerDump); - BokehBlurInfo value = BasicSerializer.Deserialize(infoDump); - - provider.RunValidatingProcessorTest( - x => x.BokehBlur(value.Radius, value.Components, value.Gamma), - testOutputDetails: value.ToString(), - appendPixelTypeToFileName: false); - } - - RemoteExecutor - .Invoke(RunTest, BasicSerializer.Serialize(provider), BasicSerializer.Serialize(value)) - .Dispose(); + provider.RunValidatingProcessorTest( + x => x.BokehBlur(value.Radius, value.Components, value.Gamma), + testOutputDetails: value.ToString(), + appendPixelTypeToFileName: false); } [Theory] @@ -164,18 +153,9 @@ namespace SixLabors.ImageSharp.Tests.Processing.Processors.Convolution public void BokehBlurFilterProcessor_WorksWithAllPixelTypes(TestImageProvider provider) where TPixel : unmanaged, IPixel { - static void RunTest(string providerDump) - { - TestImageProvider provider = - BasicSerializer.Deserialize>(providerDump); - provider.RunValidatingProcessorTest( + provider.RunValidatingProcessorTest( x => x.BokehBlur(8, 2, 3), appendSourceFileOrDescription: false); - } - - RemoteExecutor - .Invoke(RunTest, BasicSerializer.Serialize(provider)) - .Dispose(); } [Theory] @@ -183,26 +163,15 @@ namespace SixLabors.ImageSharp.Tests.Processing.Processors.Convolution public void BokehBlurFilterProcessor_Bounded(TestImageProvider provider, BokehBlurInfo value) where TPixel : unmanaged, IPixel { - static void RunTest(string providerDump, string infoDump) - { - TestImageProvider provider = - BasicSerializer.Deserialize>(providerDump); - BokehBlurInfo value = BasicSerializer.Deserialize(infoDump); - - provider.RunValidatingProcessorTest( - x => - { - Size size = x.GetCurrentSize(); - var bounds = new Rectangle(10, 10, size.Width / 2, size.Height / 2); - x.BokehBlur(value.Radius, value.Components, value.Gamma, bounds); - }, - testOutputDetails: value.ToString(), - appendPixelTypeToFileName: false); - } - - RemoteExecutor - .Invoke(RunTest, BasicSerializer.Serialize(provider), BasicSerializer.Serialize(value)) - .Dispose(); + provider.RunValidatingProcessorTest( + x => + { + Size size = x.GetCurrentSize(); + var bounds = new Rectangle(10, 10, size.Width / 2, size.Height / 2); + x.BokehBlur(value.Radius, value.Components, value.Gamma, bounds); + }, + testOutputDetails: value.ToString(), + appendPixelTypeToFileName: false); } [Theory] From 21b0982327ad6a57245a310a9f8ec5a5b2f008ea Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 14 Oct 2020 22:48:56 +0100 Subject: [PATCH 039/131] Split out progressive jpeg decoding tests --- .../Formats/Gif/GifDecoderTests.cs | 2 +- .../Jpg/JpegDecoderTests.Progressive.cs | 27 +++++++++++++------ 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs index 2e2da8d6a..1256520e6 100644 --- a/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs @@ -209,7 +209,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Gif provider.LimitAllocatorBufferCapacity().InPixelsSqrt(100); using Image image = provider.GetImage(GifDecoder); - image.DebugSave(provider); + image.DebugSave(provider, nonContiguousBuffersStr); image.CompareToOriginal(provider); } diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs index 62873a1c7..5bf80580e 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs @@ -13,11 +13,25 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg { public const string DecodeProgressiveJpegOutputName = "DecodeProgressiveJpeg"; + [Theory] + [WithFileCollection(nameof(ProgressiveTestJpegs), PixelTypes.Rgba32)] + public void DecodeProgressiveJpeg(TestImageProvider provider) + where TPixel : unmanaged, IPixel + { + using Image image = provider.GetImage(JpegDecoder); + image.DebugSave(provider); + + provider.Utility.TestName = DecodeProgressiveJpegOutputName; + image.CompareToReferenceOutput( + GetImageComparer(provider), + provider, + appendPixelTypeToFileName: false); + } + [ActiveIssue("https://github.com/dotnet/arcade/issues/6393", TargetFrameworkMonikers.NetFramework)] [Theory] - [WithFileCollection(nameof(ProgressiveTestJpegs), PixelTypes.Rgba32, false)] - [WithFile(TestImages.Jpeg.Progressive.Progress, PixelTypes.Rgba32, true)] - public void DecodeProgressiveJpeg(TestImageProvider provider, bool enforceDiscontiguousBuffers) + [WithFile(TestImages.Jpeg.Progressive.Progress, PixelTypes.Rgba32)] + public void DecodeProgressiveJpeg_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) where TPixel : unmanaged, IPixel { static void RunTest(string providerDump, string nonContiguousBuffersStr) @@ -25,10 +39,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); - if (!string.IsNullOrEmpty(nonContiguousBuffersStr)) - { - provider.LimitAllocatorBufferCapacity().InBytesSqrt(200); - } + provider.LimitAllocatorBufferCapacity().InBytesSqrt(200); using Image image = provider.GetImage(JpegDecoder); image.DebugSave(provider, nonContiguousBuffersStr); @@ -45,7 +56,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg RemoteExecutor.Invoke( RunTest, providerDump, - enforceDiscontiguousBuffers ? "Disco" : string.Empty) + "Disco") .Dispose(); } } From 586f2dca750037d918cfa6572ed40ef1483628f7 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 14 Oct 2020 23:02:33 +0100 Subject: [PATCH 040/131] Skip limited buffer bitmap test on NETFX --- .../Formats/Bmp/BmpDecoderTests.cs | 30 +++++++++++++------ 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs index 0c7c9a007..e56ffa143 100644 --- a/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs @@ -39,22 +39,34 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp }; [Theory] - [WithFileCollection(nameof(MiscBmpFiles), PixelTypes.Rgba32, false)] - [WithFileCollection(nameof(MiscBmpFiles), PixelTypes.Rgba32, true)] - public void BmpDecoder_CanDecode_MiscellaneousBitmaps(TestImageProvider provider, bool enforceDiscontiguousBuffers) + [WithFileCollection(nameof(MiscBmpFiles), PixelTypes.Rgba32)] + public void BmpDecoder_CanDecode_MiscellaneousBitmaps(TestImageProvider provider) + where TPixel : unmanaged, IPixel + { + using Image image = provider.GetImage(BmpDecoder); + image.DebugSave(provider); + + if (TestEnvironment.IsWindows) + { + image.CompareToOriginal(provider); + } + } + + [ActiveIssue("https://github.com/dotnet/arcade/issues/6393", TargetFrameworkMonikers.NetFramework)] + [Theory] + [WithFileCollection(nameof(MiscBmpFiles), PixelTypes.Rgba32)] + public void BmpDecoder_CanDecode_MiscellaneousBitmaps_WithLimitedAllocatorBufferCapacity( + TestImageProvider provider) where TPixel : unmanaged, IPixel { static void RunTest(string providerDump, string nonContiguousBuffersStr) { TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); - if (!string.IsNullOrEmpty(nonContiguousBuffersStr)) - { - provider.LimitAllocatorBufferCapacity().InPixelsSqrt(100); - } + provider.LimitAllocatorBufferCapacity().InPixelsSqrt(100); using Image image = provider.GetImage(BmpDecoder); - image.DebugSave(provider, testOutputDetails: nonContiguousBuffersStr); + image.DebugSave(provider, nonContiguousBuffersStr); if (TestEnvironment.IsWindows) { @@ -66,7 +78,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp RemoteExecutor.Invoke( RunTest, providerDump, - enforceDiscontiguousBuffers ? "Disco" : string.Empty) + "Disco") .Dispose(); } From 147aea2448ace014ff669901d653cba0af6eb412 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 14 Oct 2020 23:50:04 +0100 Subject: [PATCH 041/131] Add skip for dotnet xunit --- .../ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs | 6 ++++++ .../ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs | 6 ++++++ .../Formats/Jpg/JpegDecoderTests.Progressive.cs | 6 ++++++ .../ImageSharp.Tests/Formats/Png/PngDecoderTests.cs | 12 ++++++++++++ .../ImageSharp.Tests/Formats/Png/PngEncoderTests.cs | 6 ++++++ .../ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs | 6 ++++++ 6 files changed, 42 insertions(+) diff --git a/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs index e56ffa143..68a3213d3 100644 --- a/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs @@ -59,6 +59,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp TestImageProvider provider) where TPixel : unmanaged, IPixel { + // dotnet xunit doesn't respect filter. + if (TestEnvironment.IsFramework) + { + return; + } + static void RunTest(string providerDump, string nonContiguousBuffersStr) { TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); diff --git a/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs index 1256520e6..b76ea264c 100644 --- a/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs @@ -202,6 +202,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Gif public void GifDecoder_CanDecode_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) where TPixel : unmanaged, IPixel { + // dotnet xunit doesn't respect filter. + if (TestEnvironment.IsFramework) + { + return; + } + static void RunTest(string providerDump, string nonContiguousBuffersStr) { TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs index 5bf80580e..1edbe5ba9 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs @@ -34,6 +34,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg public void DecodeProgressiveJpeg_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) where TPixel : unmanaged, IPixel { + // dotnet xunit doesn't respect filter. + if (TestEnvironment.IsFramework) + { + return; + } + static void RunTest(string providerDump, string nonContiguousBuffersStr) { TestImageProvider provider = diff --git a/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs index 80ca74719..851115478 100644 --- a/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs @@ -396,6 +396,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png public void PngDecoder_DegenerateMemoryRequest_ShouldTranslateTo_ImageFormatException(TestImageProvider provider) where TPixel : unmanaged, IPixel { + // dotnet xunit doesn't respect filter. + if (TestEnvironment.IsFramework) + { + return; + } + provider.LimitAllocatorBufferCapacity().InPixelsSqrt(10); InvalidImageContentException ex = Assert.Throws(() => provider.GetImage(PngDecoder)); Assert.IsType(ex.InnerException); @@ -408,6 +414,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png public void PngDecoder_CanDecode_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) where TPixel : unmanaged, IPixel { + // dotnet xunit doesn't respect filter. + if (TestEnvironment.IsFramework) + { + return; + } + static void RunTest(string providerDump, string nonContiguousBuffersStr) { TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); diff --git a/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs index 47419d47b..8c4f17f02 100644 --- a/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs @@ -540,6 +540,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png public void EncodeWorksWithoutSsse3Intrinsics(TestImageProvider provider) where TPixel : unmanaged, IPixel { + // dotnet xunit doesn't respect filter. + if (TestEnvironment.IsFramework) + { + return; + } + static void RunTest(string providerDump) { TestImageProvider provider = diff --git a/tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs index 7514823fe..8fff47719 100644 --- a/tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs @@ -751,6 +751,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Tga public void TgaDecoder_CanDecode_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) where TPixel : unmanaged, IPixel { + // dotnet xunit doesn't respect filter. + if (TestEnvironment.IsFramework) + { + return; + } + static void RunTest(string providerDump, string nonContiguousBuffersStr) { TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); From 45f303d094e60798f9ffe1650bf86fca92074b13 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 15 Oct 2020 02:19:35 +0100 Subject: [PATCH 042/131] Don't use generics. Could have saved myself days of issues if i'd just thought of this first. --- .../Formats/Bmp/BmpDecoderTests.cs | 16 ++++----------- .../Formats/Gif/GifDecoderTests.cs | 16 +++++---------- .../Jpg/JpegDecoderTests.Progressive.cs | 20 ++++++------------- .../Formats/Png/PngDecoderTests.cs | 20 +++---------------- .../Formats/Png/PngEncoderTests.cs | 14 +++---------- .../Formats/Tga/TgaDecoderTests.cs | 14 +++---------- 6 files changed, 24 insertions(+), 76 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs index 68a3213d3..f98fa3c7f 100644 --- a/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs @@ -52,26 +52,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp } } - [ActiveIssue("https://github.com/dotnet/arcade/issues/6393", TargetFrameworkMonikers.NetFramework)] [Theory] [WithFileCollection(nameof(MiscBmpFiles), PixelTypes.Rgba32)] - public void BmpDecoder_CanDecode_MiscellaneousBitmaps_WithLimitedAllocatorBufferCapacity( - TestImageProvider provider) - where TPixel : unmanaged, IPixel + public void BmpDecoder_CanDecode_MiscellaneousBitmaps_WithLimitedAllocatorBufferCapacity( + TestImageProvider provider) { - // dotnet xunit doesn't respect filter. - if (TestEnvironment.IsFramework) - { - return; - } - static void RunTest(string providerDump, string nonContiguousBuffersStr) { - TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); + TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); provider.LimitAllocatorBufferCapacity().InPixelsSqrt(100); - using Image image = provider.GetImage(BmpDecoder); + using Image image = provider.GetImage(BmpDecoder); image.DebugSave(provider, nonContiguousBuffersStr); if (TestEnvironment.IsWindows) diff --git a/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs index b76ea264c..eb2643b8c 100644 --- a/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs @@ -195,26 +195,20 @@ namespace SixLabors.ImageSharp.Tests.Formats.Gif Assert.IsType(ex.InnerException); } - [ActiveIssue("https://github.com/dotnet/arcade/issues/6393", TargetFrameworkMonikers.NetFramework)] [Theory] [WithFile(TestImages.Gif.Giphy, PixelTypes.Rgba32)] [WithFile(TestImages.Gif.Kumin, PixelTypes.Rgba32)] - public void GifDecoder_CanDecode_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) - where TPixel : unmanaged, IPixel + public void GifDecoder_CanDecode_WithLimitedAllocatorBufferCapacity( + TestImageProvider provider) { - // dotnet xunit doesn't respect filter. - if (TestEnvironment.IsFramework) - { - return; - } - static void RunTest(string providerDump, string nonContiguousBuffersStr) { - TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); + TestImageProvider provider + = BasicSerializer.Deserialize>(providerDump); provider.LimitAllocatorBufferCapacity().InPixelsSqrt(100); - using Image image = provider.GetImage(GifDecoder); + using Image image = provider.GetImage(GifDecoder); image.DebugSave(provider, nonContiguousBuffersStr); image.CompareToOriginal(provider); } diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs index 1edbe5ba9..98421ca5d 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs @@ -28,26 +28,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg appendPixelTypeToFileName: false); } - [ActiveIssue("https://github.com/dotnet/arcade/issues/6393", TargetFrameworkMonikers.NetFramework)] [Theory] [WithFile(TestImages.Jpeg.Progressive.Progress, PixelTypes.Rgba32)] - public void DecodeProgressiveJpeg_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) - where TPixel : unmanaged, IPixel + public void DecodeProgressiveJpeg_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) { - // dotnet xunit doesn't respect filter. - if (TestEnvironment.IsFramework) - { - return; - } - static void RunTest(string providerDump, string nonContiguousBuffersStr) { - TestImageProvider provider = - BasicSerializer.Deserialize>(providerDump); + TestImageProvider provider = + BasicSerializer.Deserialize>(providerDump); provider.LimitAllocatorBufferCapacity().InBytesSqrt(200); - using Image image = provider.GetImage(JpegDecoder); + using Image image = provider.GetImage(JpegDecoder); image.DebugSave(provider, nonContiguousBuffersStr); provider.Utility.TestName = DecodeProgressiveJpegOutputName; @@ -62,8 +54,8 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg RemoteExecutor.Invoke( RunTest, providerDump, - "Disco") - .Dispose(); + "Disco") + .Dispose(); } } } diff --git a/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs index 851115478..2164975df 100644 --- a/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs @@ -396,37 +396,23 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png public void PngDecoder_DegenerateMemoryRequest_ShouldTranslateTo_ImageFormatException(TestImageProvider provider) where TPixel : unmanaged, IPixel { - // dotnet xunit doesn't respect filter. - if (TestEnvironment.IsFramework) - { - return; - } - provider.LimitAllocatorBufferCapacity().InPixelsSqrt(10); InvalidImageContentException ex = Assert.Throws(() => provider.GetImage(PngDecoder)); Assert.IsType(ex.InnerException); } - [ActiveIssue("https://github.com/dotnet/arcade/issues/6393", TargetFrameworkMonikers.NetFramework)] [Theory] [WithFile(TestImages.Png.Splash, PixelTypes.Rgba32)] [WithFile(TestImages.Png.Bike, PixelTypes.Rgba32)] - public void PngDecoder_CanDecode_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) - where TPixel : unmanaged, IPixel + public void PngDecoder_CanDecode_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) { - // dotnet xunit doesn't respect filter. - if (TestEnvironment.IsFramework) - { - return; - } - static void RunTest(string providerDump, string nonContiguousBuffersStr) { - TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); + TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); provider.LimitAllocatorBufferCapacity().InPixelsSqrt(100); - using Image image = provider.GetImage(PngDecoder); + using Image image = provider.GetImage(PngDecoder); image.DebugSave(provider, testOutputDetails: nonContiguousBuffersStr); image.CompareToOriginal(provider); } diff --git a/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs index 8c4f17f02..b35e55887 100644 --- a/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs @@ -534,22 +534,14 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png } } - [ActiveIssue("https://github.com/dotnet/arcade/issues/6393", TargetFrameworkMonikers.NetFramework)] [Theory] [WithTestPatternImages(100, 100, PixelTypes.Rgba32)] - public void EncodeWorksWithoutSsse3Intrinsics(TestImageProvider provider) - where TPixel : unmanaged, IPixel + public void EncodeWorksWithoutSsse3Intrinsics(TestImageProvider provider) { - // dotnet xunit doesn't respect filter. - if (TestEnvironment.IsFramework) - { - return; - } - static void RunTest(string providerDump) { - TestImageProvider provider = - BasicSerializer.Deserialize>(providerDump); + TestImageProvider provider = + BasicSerializer.Deserialize>(providerDump); #if SUPPORTS_RUNTIME_INTRINSICS Assert.False(Ssse3.IsSupported); #endif diff --git a/tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs index 8fff47719..edb43aa12 100644 --- a/tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs @@ -744,26 +744,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.Tga Assert.IsType(ex.InnerException); } - [ActiveIssue("https://github.com/dotnet/arcade/issues/6393", TargetFrameworkMonikers.NetFramework)] [Theory] [WithFile(Bit24BottomLeft, PixelTypes.Rgba32)] [WithFile(Bit32BottomLeft, PixelTypes.Rgba32)] - public void TgaDecoder_CanDecode_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) - where TPixel : unmanaged, IPixel + public void TgaDecoder_CanDecode_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) { - // dotnet xunit doesn't respect filter. - if (TestEnvironment.IsFramework) - { - return; - } - static void RunTest(string providerDump, string nonContiguousBuffersStr) { - TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); + TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); provider.LimitAllocatorBufferCapacity().InPixelsSqrt(100); - using Image image = provider.GetImage(TgaDecoder); + using Image image = provider.GetImage(TgaDecoder); image.DebugSave(provider, testOutputDetails: nonContiguousBuffersStr); if (TestEnvironment.IsWindows) From 310710e3113223b976b638334b0290549da67bae Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 15 Oct 2020 21:32:37 +0100 Subject: [PATCH 043/131] Add FeatureTestRunner --- .../FeatureTesting/FeatureTestRunner.cs | 266 ++++++++++++++++++ .../Tests/FeatureTestRunnerTests.cs | 237 ++++++++++++++++ 2 files changed, 503 insertions(+) create mode 100644 tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs create mode 100644 tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs diff --git a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs new file mode 100644 index 000000000..57ed85a18 --- /dev/null +++ b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs @@ -0,0 +1,266 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using Microsoft.DotNet.RemoteExecutor; +using Xunit.Abstractions; + +namespace SixLabors.ImageSharp.Tests.TestUtilities +{ + /// + /// Allows the testing against specific feature sets. + /// + public static class FeatureTestRunner + { + private static readonly char[] SplitChars = new[] { ',', ' ' }; + + /// + /// Allows the deserialization of parameters passed to the feature test. + /// + /// + /// This is required because does not allow + /// marshalling of fields so we cannot pass a wrapped + /// allowing automatic deserialization. + /// + /// + /// + /// The type to deserialize to. + /// The string value to deserialize. + /// The value. + public static T Deserialize(string value) + where T : IXunitSerializable + => BasicSerializer.Deserialize(value); + + /// + /// Runs the given test within an environment + /// where the given features. + /// + /// The test action to run. + /// The intrinsics features. + public static void RunWithHwIntrinsicsFeature( + Action action, + HwIntrinsics intrinsics) + { + if (!RemoteExecutor.IsSupported) + { + return; + } + + foreach (KeyValuePair intrinsic in intrinsics.ToFeatureKeyValueCollection()) + { + var processStartInfo = new ProcessStartInfo(); + if (intrinsic.Key != HwIntrinsics.AllowAll) + { + processStartInfo.Environment[$"COMPlus_Enable{intrinsic}"] = "0"; + + RemoteExecutor.Invoke( + action, + new RemoteInvokeOptions + { + StartInfo = processStartInfo + }) + .Dispose(); + } + else + { + // Since we are running using the default architecture there is no + // point creating the overhead of running the action in a separate process. + action(); + } + } + } + + /// + /// Runs the given test within an environment + /// where the given features. + /// + /// + /// The test action to run. + /// The parameter passed will be a string representing the currently testing . + /// The intrinsics features. + public static void RunWithHwIntrinsicsFeature( + Action action, + HwIntrinsics intrinsics) + { + if (!RemoteExecutor.IsSupported) + { + return; + } + + foreach (KeyValuePair intrinsic in intrinsics.ToFeatureKeyValueCollection()) + { + var processStartInfo = new ProcessStartInfo(); + if (intrinsic.Key != HwIntrinsics.AllowAll) + { + processStartInfo.Environment[$"COMPlus_Enable{intrinsic}"] = "0"; + + RemoteExecutor.Invoke( + action, + intrinsic.Key.ToString(), + new RemoteInvokeOptions + { + StartInfo = processStartInfo + }) + .Dispose(); + } + else + { + // Since we are running using the default architecture there is no + // point creating the overhead of running the action in a separate process. + action(intrinsic.Key.ToString()); + } + } + } + + /// + /// Runs the given test within an environment + /// where the given features. + /// + /// The test action to run. + /// The intrinsics features. + /// The value to pass as a parameter to the test action. + public static void RunWithHwIntrinsicsFeature( + Action action, + HwIntrinsics intrinsics, + T serializable) + where T : IXunitSerializable + { + if (!RemoteExecutor.IsSupported) + { + return; + } + + foreach (KeyValuePair intrinsic in intrinsics.ToFeatureKeyValueCollection()) + { + var processStartInfo = new ProcessStartInfo(); + if (intrinsic.Key != HwIntrinsics.AllowAll) + { + processStartInfo.Environment[$"COMPlus_Enable{intrinsic}"] = "0"; + + RemoteExecutor.Invoke( + action, + BasicSerializer.Serialize(serializable), + new RemoteInvokeOptions + { + StartInfo = processStartInfo + }) + .Dispose(); + } + else + { + // Since we are running using the default architecture there is no + // point creating the overhead of running the action in a separate process. + action(BasicSerializer.Serialize(serializable)); + } + } + } + + /// + /// Runs the given test within an environment + /// where the given features. + /// + /// The test action to run. + /// The intrinsics features. + /// The value to pass as a parameter to the test action. + public static void RunWithHwIntrinsicsFeature( + Action action, + HwIntrinsics intrinsics, + T serializable) + where T : IXunitSerializable + { + if (!RemoteExecutor.IsSupported) + { + return; + } + + foreach (KeyValuePair intrinsic in intrinsics.ToFeatureKeyValueCollection()) + { + var processStartInfo = new ProcessStartInfo(); + if (intrinsic.Key != HwIntrinsics.AllowAll) + { + processStartInfo.Environment[$"COMPlus_Enable{intrinsic}"] = "0"; + + RemoteExecutor.Invoke( + action, + BasicSerializer.Serialize(serializable), + intrinsic.Key.ToString(), + new RemoteInvokeOptions + { + StartInfo = processStartInfo + }) + .Dispose(); + } + else + { + // Since we are running using the default architecture there is no + // point creating the overhead of running the action in a separate process. + action(BasicSerializer.Serialize(serializable), intrinsic.Key.ToString()); + } + } + } + + internal static Dictionary ToFeatureKeyValueCollection(this HwIntrinsics intrinsics) + { + // Loop through and translate the given values into COMPlus equivaluents + var features = new Dictionary(); + foreach (string intrinsic in intrinsics.ToString("G").Split(SplitChars, StringSplitOptions.RemoveEmptyEntries)) + { + var key = (HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic); + switch (intrinsic) + { + case nameof(HwIntrinsics.DisableSIMD): + features.Add(key, "FeatureSIMD"); + break; + + case nameof(HwIntrinsics.AllowAll): + + // Not a COMPlus value. We filter in calling method. + features.Add(key, nameof(HwIntrinsics.AllowAll)); + break; + + default: + features.Add(key, intrinsic.Replace("Disable", "Enable")); + break; + } + } + + return features; + } + } + + /// + /// See + /// + /// ends up impacting all SIMD support(including System.Numerics) + /// but not things like , , and . + /// + /// + [Flags] +#pragma warning disable RCS1135 // Declare enum member with zero value (when enum has FlagsAttribute). + public enum HwIntrinsics +#pragma warning restore RCS1135 // Declare enum member with zero value (when enum has FlagsAttribute). + { + // Use flags so we can pass multiple values without using params. + // Don't base on 0 or use inverse for All as that doesn't translate to string values. + DisableSIMD = 1 << 0, + DisableHWIntrinsic = 1 << 1, + DisableSSE = 1 << 2, + DisableSSE2 = 1 << 3, + DisableAES = 1 << 4, + DisablePCLMULQDQ = 1 << 5, + DisableSSE3 = 1 << 6, + DisableSSSE3 = 1 << 7, + DisableSSE41 = 1 << 8, + DisableSSE42 = 1 << 9, + DisablePOPCNT = 1 << 10, + DisableAVX = 1 << 11, + DisableFMA = 1 << 12, + DisableAVX2 = 1 << 13, + DisableBMI1 = 1 << 14, + DisableBMI2 = 1 << 15, + DisableLZCNT = 1 << 16, + AllowAll = 1 << 17 + } +} diff --git a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs new file mode 100644 index 000000000..9852ba347 --- /dev/null +++ b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs @@ -0,0 +1,237 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Numerics; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics.X86; +#endif +using Xunit; +using Xunit.Abstractions; + +namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests +{ + public class FeatureTestRunnerTests + { + public static TheoryData Intrinsics => + new TheoryData + { + { HwIntrinsics.DisableAES | HwIntrinsics.AllowAll, new string[] { "EnableAES", "AllowAll" } }, + { HwIntrinsics.DisableSIMD | HwIntrinsics.DisableHWIntrinsic, new string[] { "FeatureSIMD", "EnableHWIntrinsic" } }, + { HwIntrinsics.DisableSSE42 | HwIntrinsics.DisableAVX, new string[] { "EnableSSE42", "EnableAVX" } } + }; + + [Theory] + [MemberData(nameof(Intrinsics))] + public void ToFeatureCollectionReturnsExpectedResult(HwIntrinsics expectedItrinsics, string[] expectedValues) + { + Dictionary features = expectedItrinsics.ToFeatureKeyValueCollection(); + HwIntrinsics[] keys = features.Keys.ToArray(); + + HwIntrinsics actualIntrinsics = keys[0]; + for (int i = 1; i < keys.Length; i++) + { + actualIntrinsics |= keys[i]; + } + + Assert.Equal(expectedItrinsics, actualIntrinsics); + + IEnumerable actualValues = features.Select(x => x.Value); + Assert.Equal(expectedValues, actualValues); + } + + [Fact] + public void AllowsAllHwIntrinsicFeatures() + { + FeatureTestRunner.RunWithHwIntrinsicsFeature( + () => Assert.True(Vector.IsHardwareAccelerated), + HwIntrinsics.AllowAll); + } + + [Fact] + public void CanLimitHwIntrinsicFeatures() + { + FeatureTestRunner.RunWithHwIntrinsicsFeature( + () => Assert.False(Vector.IsHardwareAccelerated), + HwIntrinsics.DisableSIMD); + } + + [Fact] + public void CanLimitHwIntrinsicFeaturesWithIntrinsicsParam() + { + static void AssertHwIntrinsicsFeatureDisabled(string intrinsic) + { + Assert.NotNull(intrinsic); + + switch ((HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic)) + { + case HwIntrinsics.DisableSIMD: + Assert.False(Vector.IsHardwareAccelerated); + break; +#if SUPPORTS_RUNTIME_INTRINSICS + case HwIntrinsics.DisableHWIntrinsic: + Assert.False(Vector.IsHardwareAccelerated); + break; + case HwIntrinsics.DisableSSE: + Assert.False(Sse.IsSupported); + break; + case HwIntrinsics.DisableSSE2: + Assert.False(Sse2.IsSupported); + break; + case HwIntrinsics.DisableAES: + Assert.False(Aes.IsSupported); + break; + case HwIntrinsics.DisablePCLMULQDQ: + Assert.False(Pclmulqdq.IsSupported); + break; + case HwIntrinsics.DisableSSE3: + Assert.False(Sse3.IsSupported); + break; + case HwIntrinsics.DisableSSSE3: + Assert.False(Ssse3.IsSupported); + break; + case HwIntrinsics.DisableSSE41: + Assert.False(Sse41.IsSupported); + break; + case HwIntrinsics.DisableSSE42: + Assert.False(Sse42.IsSupported); + break; + case HwIntrinsics.DisablePOPCNT: + Assert.False(Popcnt.IsSupported); + break; + case HwIntrinsics.DisableAVX: + Assert.False(Avx.IsSupported); + break; + case HwIntrinsics.DisableFMA: + Assert.False(Fma.IsSupported); + break; + case HwIntrinsics.DisableAVX2: + Assert.False(Avx2.IsSupported); + break; + case HwIntrinsics.DisableBMI1: + Assert.False(Bmi1.IsSupported); + break; + case HwIntrinsics.DisableBMI2: + Assert.False(Bmi2.IsSupported); + break; + case HwIntrinsics.DisableLZCNT: + Assert.False(Lzcnt.IsSupported); + break; +#endif + } + } + + foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) + { + FeatureTestRunner.RunWithHwIntrinsicsFeature(AssertHwIntrinsicsFeatureDisabled, intrinsic); + } + } + + [Fact] + public void CanLimitHwIntrinsicFeaturesWithSerializableParam() + { + static void AssertHwIntrinsicsFeatureDisabled(string serializable) + { + Assert.NotNull(serializable); + Assert.NotNull(FeatureTestRunner.Deserialize(serializable)); + +#if SUPPORTS_RUNTIME_INTRINSICS + Assert.False(Sse.IsSupported); +#endif + } + + foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) + { + FeatureTestRunner.RunWithHwIntrinsicsFeature( + AssertHwIntrinsicsFeatureDisabled, + HwIntrinsics.DisableSSE, + new FakeSerializable()); + } + } + + [Fact] + public void CanLimitHwIntrinsicFeaturesWithSerializableAndIntrinsicsParams() + { + static void AssertHwIntrinsicsFeatureDisabled(string serializable, string intrinsic) + { + Assert.NotNull(serializable); + Assert.NotNull(FeatureTestRunner.Deserialize(serializable)); + + switch ((HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic)) + { + case HwIntrinsics.DisableSIMD: + Assert.False(Vector.IsHardwareAccelerated); + break; +#if SUPPORTS_RUNTIME_INTRINSICS + case HwIntrinsics.DisableHWIntrinsic: + Assert.False(Vector.IsHardwareAccelerated); + break; + case HwIntrinsics.DisableSSE: + Assert.False(Sse.IsSupported); + break; + case HwIntrinsics.DisableSSE2: + Assert.False(Sse2.IsSupported); + break; + case HwIntrinsics.DisableAES: + Assert.False(Aes.IsSupported); + break; + case HwIntrinsics.DisablePCLMULQDQ: + Assert.False(Pclmulqdq.IsSupported); + break; + case HwIntrinsics.DisableSSE3: + Assert.False(Sse3.IsSupported); + break; + case HwIntrinsics.DisableSSSE3: + Assert.False(Ssse3.IsSupported); + break; + case HwIntrinsics.DisableSSE41: + Assert.False(Sse41.IsSupported); + break; + case HwIntrinsics.DisableSSE42: + Assert.False(Sse42.IsSupported); + break; + case HwIntrinsics.DisablePOPCNT: + Assert.False(Popcnt.IsSupported); + break; + case HwIntrinsics.DisableAVX: + Assert.False(Avx.IsSupported); + break; + case HwIntrinsics.DisableFMA: + Assert.False(Fma.IsSupported); + break; + case HwIntrinsics.DisableAVX2: + Assert.False(Avx2.IsSupported); + break; + case HwIntrinsics.DisableBMI1: + Assert.False(Bmi1.IsSupported); + break; + case HwIntrinsics.DisableBMI2: + Assert.False(Bmi2.IsSupported); + break; + case HwIntrinsics.DisableLZCNT: + Assert.False(Lzcnt.IsSupported); + break; +#endif + } + } + + foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) + { + FeatureTestRunner.RunWithHwIntrinsicsFeature(AssertHwIntrinsicsFeatureDisabled, intrinsic, new FakeSerializable()); + } + } + + public class FakeSerializable : IXunitSerializable + { + public void Deserialize(IXunitSerializationInfo info) + { + } + + public void Serialize(IXunitSerializationInfo info) + { + } + } + } +} From 9380dd5b1e3487837c9494247a5bf8e6cc548c66 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 15 Oct 2020 21:36:51 +0100 Subject: [PATCH 044/131] Use single Block8x8F.TransponseInto test --- .../Formats/Jpg/Block8x8FTests.cs | 48 +++++++------------ 1 file changed, 17 insertions(+), 31 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs index 73a68063c..548238088 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs @@ -5,10 +5,9 @@ // #define BENCHMARKING using System; using System.Diagnostics; - using SixLabors.ImageSharp.Formats.Jpeg.Components; using SixLabors.ImageSharp.Tests.Formats.Jpg.Utils; - +using SixLabors.ImageSharp.Tests.TestUtilities; using Xunit; using Xunit.Abstractions; @@ -163,42 +162,29 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg } [Fact] - public void TransposeIntoFallback() + public void TransposeInto() { - float[] expected = Create8x8FloatData(); - ReferenceImplementations.Transpose8x8(expected); - - var source = default(Block8x8F); - source.LoadFrom(Create8x8FloatData()); - - var dest = default(Block8x8F); - source.TransposeIntoFallback(ref dest); - - float[] actual = new float[64]; - dest.ScaledCopyTo(actual); - - Assert.Equal(expected, actual); - } + static void RunTest() + { + float[] expected = Create8x8FloatData(); + ReferenceImplementations.Transpose8x8(expected); -#if SUPPORTS_RUNTIME_INTRINSICS - [Fact] - public void TransposeIntoAvx() - { - float[] expected = Create8x8FloatData(); - ReferenceImplementations.Transpose8x8(expected); + var source = default(Block8x8F); + source.LoadFrom(Create8x8FloatData()); - var source = default(Block8x8F); - source.LoadFrom(Create8x8FloatData()); + var dest = default(Block8x8F); + source.TransposeInto(ref dest); - var dest = default(Block8x8F); - source.TransposeIntoAvx(ref dest); + float[] actual = new float[64]; + dest.ScaledCopyTo(actual); - float[] actual = new float[64]; - dest.ScaledCopyTo(actual); + Assert.Equal(expected, actual); + } - Assert.Equal(expected, actual); + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX); } -#endif private class BufferHolder { From 86198504ee6d3ea9841be70e638d8f307fca4e94 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 15 Oct 2020 21:58:24 +0100 Subject: [PATCH 045/131] Ensure remoteexecutor tests run --- .github/workflows/build-and-test.yml | 32 +++++++++---------- .../Tests/FeatureTestRunnerTests.cs | 8 +++++ 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index c8f399794..ecb5ceb0e 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -17,23 +17,23 @@ jobs: - os: ubuntu-latest framework: netcoreapp3.1 runtime: -x64 - codecov: true - - os: windows-latest - framework: netcoreapp3.1 - runtime: -x64 - codecov: false - - os: windows-latest - framework: netcoreapp2.1 - runtime: -x64 - codecov: false - - os: windows-latest - framework: net472 - runtime: -x64 - codecov: false - - os: windows-latest - framework: net472 - runtime: -x86 codecov: false + # - os: windows-latest + # framework: netcoreapp3.1 + # runtime: -x64 + # codecov: false + # - os: windows-latest + # framework: netcoreapp2.1 + # runtime: -x64 + # codecov: false + # - os: windows-latest + # framework: net472 + # runtime: -x64 + # codecov: false + # - os: windows-latest + # framework: net472 + # runtime: -x86 + # codecov: false runs-on: ${{matrix.options.os}} if: "!contains(github.event.head_commit.message, '[skip ci]')" diff --git a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs index 9852ba347..070f20574 100644 --- a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs +++ b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs @@ -23,6 +23,14 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests { HwIntrinsics.DisableSSE42 | HwIntrinsics.DisableAVX, new string[] { "EnableSSE42", "EnableAVX" } } }; + [Fact] + public void TempAssertThrow() + { + FeatureTestRunner.RunWithHwIntrinsicsFeature( + () => Assert.True(false), + HwIntrinsics.DisableAVX); + } + [Theory] [MemberData(nameof(Intrinsics))] public void ToFeatureCollectionReturnsExpectedResult(HwIntrinsics expectedItrinsics, string[] expectedValues) From d3208ad354b14a253f217c1bf79c2271beb36b5e Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 15 Oct 2020 22:12:40 +0100 Subject: [PATCH 046/131] Increment tests --- .../Tests/FeatureTestRunnerTests.cs | 340 +++++++++--------- 1 file changed, 166 insertions(+), 174 deletions(-) diff --git a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs index 070f20574..023679b2e 100644 --- a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs +++ b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs @@ -23,14 +23,6 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests { HwIntrinsics.DisableSSE42 | HwIntrinsics.DisableAVX, new string[] { "EnableSSE42", "EnableAVX" } } }; - [Fact] - public void TempAssertThrow() - { - FeatureTestRunner.RunWithHwIntrinsicsFeature( - () => Assert.True(false), - HwIntrinsics.DisableAVX); - } - [Theory] [MemberData(nameof(Intrinsics))] public void ToFeatureCollectionReturnsExpectedResult(HwIntrinsics expectedItrinsics, string[] expectedValues) @@ -54,7 +46,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests public void AllowsAllHwIntrinsicFeatures() { FeatureTestRunner.RunWithHwIntrinsicsFeature( - () => Assert.True(Vector.IsHardwareAccelerated), + () => Assert.True(Vector.IsHardwareAccelerated, nameof(Vector.IsHardwareAccelerated)), HwIntrinsics.AllowAll); } @@ -62,174 +54,174 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests public void CanLimitHwIntrinsicFeatures() { FeatureTestRunner.RunWithHwIntrinsicsFeature( - () => Assert.False(Vector.IsHardwareAccelerated), + () => Assert.False(Vector.IsHardwareAccelerated, nameof(Vector.IsHardwareAccelerated)), HwIntrinsics.DisableSIMD); } - [Fact] - public void CanLimitHwIntrinsicFeaturesWithIntrinsicsParam() - { - static void AssertHwIntrinsicsFeatureDisabled(string intrinsic) - { - Assert.NotNull(intrinsic); - - switch ((HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic)) - { - case HwIntrinsics.DisableSIMD: - Assert.False(Vector.IsHardwareAccelerated); - break; -#if SUPPORTS_RUNTIME_INTRINSICS - case HwIntrinsics.DisableHWIntrinsic: - Assert.False(Vector.IsHardwareAccelerated); - break; - case HwIntrinsics.DisableSSE: - Assert.False(Sse.IsSupported); - break; - case HwIntrinsics.DisableSSE2: - Assert.False(Sse2.IsSupported); - break; - case HwIntrinsics.DisableAES: - Assert.False(Aes.IsSupported); - break; - case HwIntrinsics.DisablePCLMULQDQ: - Assert.False(Pclmulqdq.IsSupported); - break; - case HwIntrinsics.DisableSSE3: - Assert.False(Sse3.IsSupported); - break; - case HwIntrinsics.DisableSSSE3: - Assert.False(Ssse3.IsSupported); - break; - case HwIntrinsics.DisableSSE41: - Assert.False(Sse41.IsSupported); - break; - case HwIntrinsics.DisableSSE42: - Assert.False(Sse42.IsSupported); - break; - case HwIntrinsics.DisablePOPCNT: - Assert.False(Popcnt.IsSupported); - break; - case HwIntrinsics.DisableAVX: - Assert.False(Avx.IsSupported); - break; - case HwIntrinsics.DisableFMA: - Assert.False(Fma.IsSupported); - break; - case HwIntrinsics.DisableAVX2: - Assert.False(Avx2.IsSupported); - break; - case HwIntrinsics.DisableBMI1: - Assert.False(Bmi1.IsSupported); - break; - case HwIntrinsics.DisableBMI2: - Assert.False(Bmi2.IsSupported); - break; - case HwIntrinsics.DisableLZCNT: - Assert.False(Lzcnt.IsSupported); - break; -#endif - } - } - - foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) - { - FeatureTestRunner.RunWithHwIntrinsicsFeature(AssertHwIntrinsicsFeatureDisabled, intrinsic); - } - } - - [Fact] - public void CanLimitHwIntrinsicFeaturesWithSerializableParam() - { - static void AssertHwIntrinsicsFeatureDisabled(string serializable) - { - Assert.NotNull(serializable); - Assert.NotNull(FeatureTestRunner.Deserialize(serializable)); - -#if SUPPORTS_RUNTIME_INTRINSICS - Assert.False(Sse.IsSupported); -#endif - } - - foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) - { - FeatureTestRunner.RunWithHwIntrinsicsFeature( - AssertHwIntrinsicsFeatureDisabled, - HwIntrinsics.DisableSSE, - new FakeSerializable()); - } - } - - [Fact] - public void CanLimitHwIntrinsicFeaturesWithSerializableAndIntrinsicsParams() - { - static void AssertHwIntrinsicsFeatureDisabled(string serializable, string intrinsic) - { - Assert.NotNull(serializable); - Assert.NotNull(FeatureTestRunner.Deserialize(serializable)); - - switch ((HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic)) - { - case HwIntrinsics.DisableSIMD: - Assert.False(Vector.IsHardwareAccelerated); - break; -#if SUPPORTS_RUNTIME_INTRINSICS - case HwIntrinsics.DisableHWIntrinsic: - Assert.False(Vector.IsHardwareAccelerated); - break; - case HwIntrinsics.DisableSSE: - Assert.False(Sse.IsSupported); - break; - case HwIntrinsics.DisableSSE2: - Assert.False(Sse2.IsSupported); - break; - case HwIntrinsics.DisableAES: - Assert.False(Aes.IsSupported); - break; - case HwIntrinsics.DisablePCLMULQDQ: - Assert.False(Pclmulqdq.IsSupported); - break; - case HwIntrinsics.DisableSSE3: - Assert.False(Sse3.IsSupported); - break; - case HwIntrinsics.DisableSSSE3: - Assert.False(Ssse3.IsSupported); - break; - case HwIntrinsics.DisableSSE41: - Assert.False(Sse41.IsSupported); - break; - case HwIntrinsics.DisableSSE42: - Assert.False(Sse42.IsSupported); - break; - case HwIntrinsics.DisablePOPCNT: - Assert.False(Popcnt.IsSupported); - break; - case HwIntrinsics.DisableAVX: - Assert.False(Avx.IsSupported); - break; - case HwIntrinsics.DisableFMA: - Assert.False(Fma.IsSupported); - break; - case HwIntrinsics.DisableAVX2: - Assert.False(Avx2.IsSupported); - break; - case HwIntrinsics.DisableBMI1: - Assert.False(Bmi1.IsSupported); - break; - case HwIntrinsics.DisableBMI2: - Assert.False(Bmi2.IsSupported); - break; - case HwIntrinsics.DisableLZCNT: - Assert.False(Lzcnt.IsSupported); - break; -#endif - } - } - - foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) - { - FeatureTestRunner.RunWithHwIntrinsicsFeature(AssertHwIntrinsicsFeatureDisabled, intrinsic, new FakeSerializable()); - } - } + // [Fact] + // public void CanLimitHwIntrinsicFeaturesWithIntrinsicsParam() + // { + // static void AssertHwIntrinsicsFeatureDisabled(string intrinsic) + // { + // Assert.NotNull(intrinsic); + + // switch ((HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic)) + // { + // case HwIntrinsics.DisableSIMD: + // Assert.False(Vector.IsHardwareAccelerated); + // break; + //#if SUPPORTS_RUNTIME_INTRINSICS + // case HwIntrinsics.DisableHWIntrinsic: + // Assert.False(Vector.IsHardwareAccelerated); + // break; + // case HwIntrinsics.DisableSSE: + // Assert.False(Sse.IsSupported); + // break; + // case HwIntrinsics.DisableSSE2: + // Assert.False(Sse2.IsSupported); + // break; + // case HwIntrinsics.DisableAES: + // Assert.False(Aes.IsSupported); + // break; + // case HwIntrinsics.DisablePCLMULQDQ: + // Assert.False(Pclmulqdq.IsSupported); + // break; + // case HwIntrinsics.DisableSSE3: + // Assert.False(Sse3.IsSupported); + // break; + // case HwIntrinsics.DisableSSSE3: + // Assert.False(Ssse3.IsSupported); + // break; + // case HwIntrinsics.DisableSSE41: + // Assert.False(Sse41.IsSupported); + // break; + // case HwIntrinsics.DisableSSE42: + // Assert.False(Sse42.IsSupported); + // break; + // case HwIntrinsics.DisablePOPCNT: + // Assert.False(Popcnt.IsSupported); + // break; + // case HwIntrinsics.DisableAVX: + // Assert.False(Avx.IsSupported); + // break; + // case HwIntrinsics.DisableFMA: + // Assert.False(Fma.IsSupported); + // break; + // case HwIntrinsics.DisableAVX2: + // Assert.False(Avx2.IsSupported); + // break; + // case HwIntrinsics.DisableBMI1: + // Assert.False(Bmi1.IsSupported); + // break; + // case HwIntrinsics.DisableBMI2: + // Assert.False(Bmi2.IsSupported); + // break; + // case HwIntrinsics.DisableLZCNT: + // Assert.False(Lzcnt.IsSupported); + // break; + //#endif + // } + // } + + // foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) + // { + // FeatureTestRunner.RunWithHwIntrinsicsFeature(AssertHwIntrinsicsFeatureDisabled, intrinsic); + // } + // } + + // [Fact] + // public void CanLimitHwIntrinsicFeaturesWithSerializableParam() + // { + // static void AssertHwIntrinsicsFeatureDisabled(string serializable) + // { + // Assert.NotNull(serializable); + // Assert.NotNull(FeatureTestRunner.Deserialize(serializable)); + + //#if SUPPORTS_RUNTIME_INTRINSICS + // Assert.False(Sse.IsSupported); + //#endif + // } + + // foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) + // { + // FeatureTestRunner.RunWithHwIntrinsicsFeature( + // AssertHwIntrinsicsFeatureDisabled, + // HwIntrinsics.DisableSSE, + // new FakeSerializable()); + // } + // } + + // [Fact] + // public void CanLimitHwIntrinsicFeaturesWithSerializableAndIntrinsicsParams() + // { + // static void AssertHwIntrinsicsFeatureDisabled(string serializable, string intrinsic) + // { + // Assert.NotNull(serializable); + // Assert.NotNull(FeatureTestRunner.Deserialize(serializable)); + + // switch ((HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic)) + // { + // case HwIntrinsics.DisableSIMD: + // Assert.False(Vector.IsHardwareAccelerated, nameof(Vector.IsHardwareAccelerated)); + // break; + //#if SUPPORTS_RUNTIME_INTRINSICS + // case HwIntrinsics.DisableHWIntrinsic: + // Assert.False(Vector.IsHardwareAccelerated, nameof(Vector.IsHardwareAccelerated)); + // break; + // case HwIntrinsics.DisableSSE: + // Assert.False(Sse.IsSupported); + // break; + // case HwIntrinsics.DisableSSE2: + // Assert.False(Sse2.IsSupported); + // break; + // case HwIntrinsics.DisableAES: + // Assert.False(Aes.IsSupported); + // break; + // case HwIntrinsics.DisablePCLMULQDQ: + // Assert.False(Pclmulqdq.IsSupported); + // break; + // case HwIntrinsics.DisableSSE3: + // Assert.False(Sse3.IsSupported); + // break; + // case HwIntrinsics.DisableSSSE3: + // Assert.False(Ssse3.IsSupported); + // break; + // case HwIntrinsics.DisableSSE41: + // Assert.False(Sse41.IsSupported); + // break; + // case HwIntrinsics.DisableSSE42: + // Assert.False(Sse42.IsSupported); + // break; + // case HwIntrinsics.DisablePOPCNT: + // Assert.False(Popcnt.IsSupported); + // break; + // case HwIntrinsics.DisableAVX: + // Assert.False(Avx.IsSupported); + // break; + // case HwIntrinsics.DisableFMA: + // Assert.False(Fma.IsSupported); + // break; + // case HwIntrinsics.DisableAVX2: + // Assert.False(Avx2.IsSupported); + // break; + // case HwIntrinsics.DisableBMI1: + // Assert.False(Bmi1.IsSupported); + // break; + // case HwIntrinsics.DisableBMI2: + // Assert.False(Bmi2.IsSupported); + // break; + // case HwIntrinsics.DisableLZCNT: + // Assert.False(Lzcnt.IsSupported); + // break; + //#endif + // } + // } + + // foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) + // { + // FeatureTestRunner.RunWithHwIntrinsicsFeature(AssertHwIntrinsicsFeatureDisabled, intrinsic, new FakeSerializable()); + // } + // } public class FakeSerializable : IXunitSerializable { From 5e53e8363d7d5f27bfa07ac561b5a4f63dbaf95e Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 15 Oct 2020 22:22:08 +0100 Subject: [PATCH 047/131] Fix COMPlus environmental parameters --- .../TestUtilities/FeatureTesting/FeatureTestRunner.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs index 57ed85a18..a05347139 100644 --- a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs +++ b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs @@ -53,7 +53,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities var processStartInfo = new ProcessStartInfo(); if (intrinsic.Key != HwIntrinsics.AllowAll) { - processStartInfo.Environment[$"COMPlus_Enable{intrinsic}"] = "0"; + processStartInfo.Environment[$"COMPlus_{intrinsic}"] = "0"; RemoteExecutor.Invoke( action, @@ -94,7 +94,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities var processStartInfo = new ProcessStartInfo(); if (intrinsic.Key != HwIntrinsics.AllowAll) { - processStartInfo.Environment[$"COMPlus_Enable{intrinsic}"] = "0"; + processStartInfo.Environment[$"COMPlus_{intrinsic}"] = "0"; RemoteExecutor.Invoke( action, @@ -137,7 +137,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities var processStartInfo = new ProcessStartInfo(); if (intrinsic.Key != HwIntrinsics.AllowAll) { - processStartInfo.Environment[$"COMPlus_Enable{intrinsic}"] = "0"; + processStartInfo.Environment[$"COMPlus_{intrinsic}"] = "0"; RemoteExecutor.Invoke( action, @@ -180,7 +180,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities var processStartInfo = new ProcessStartInfo(); if (intrinsic.Key != HwIntrinsics.AllowAll) { - processStartInfo.Environment[$"COMPlus_Enable{intrinsic}"] = "0"; + processStartInfo.Environment[$"COMPlus_{intrinsic}"] = "0"; RemoteExecutor.Invoke( action, From e794901bf6cf802f90d9c5fd8dac9e4bf0f4272d Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 15 Oct 2020 22:30:11 +0100 Subject: [PATCH 048/131] Test another property to be sure --- .../TestUtilities/Tests/FeatureTestRunnerTests.cs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs index 023679b2e..13a080612 100644 --- a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs +++ b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs @@ -50,13 +50,15 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests HwIntrinsics.AllowAll); } +#if SUPPORTS_RUNTIME_INTRINSICS [Fact] public void CanLimitHwIntrinsicFeatures() { FeatureTestRunner.RunWithHwIntrinsicsFeature( - () => Assert.False(Vector.IsHardwareAccelerated, nameof(Vector.IsHardwareAccelerated)), - HwIntrinsics.DisableSIMD); + () => Assert.False(Sse.IsSupported, nameof(Sse.IsSupported)), + HwIntrinsics.DisableSSE); } +#endif // [Fact] // public void CanLimitHwIntrinsicFeaturesWithIntrinsicsParam() From 54836a623bf27c2b97e8abbc159cd60fe077dbfe Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 15 Oct 2020 22:44:35 +0100 Subject: [PATCH 049/131] Fix COMPlus value properly. --- .../TestUtilities/FeatureTesting/FeatureTestRunner.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs index a05347139..eb1714baa 100644 --- a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs +++ b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs @@ -53,7 +53,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities var processStartInfo = new ProcessStartInfo(); if (intrinsic.Key != HwIntrinsics.AllowAll) { - processStartInfo.Environment[$"COMPlus_{intrinsic}"] = "0"; + processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; RemoteExecutor.Invoke( action, @@ -94,7 +94,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities var processStartInfo = new ProcessStartInfo(); if (intrinsic.Key != HwIntrinsics.AllowAll) { - processStartInfo.Environment[$"COMPlus_{intrinsic}"] = "0"; + processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; RemoteExecutor.Invoke( action, @@ -137,7 +137,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities var processStartInfo = new ProcessStartInfo(); if (intrinsic.Key != HwIntrinsics.AllowAll) { - processStartInfo.Environment[$"COMPlus_{intrinsic}"] = "0"; + processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; RemoteExecutor.Invoke( action, @@ -180,7 +180,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities var processStartInfo = new ProcessStartInfo(); if (intrinsic.Key != HwIntrinsics.AllowAll) { - processStartInfo.Environment[$"COMPlus_{intrinsic}"] = "0"; + processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; RemoteExecutor.Invoke( action, From 13b570dd02c57caa059fdcb169124a26d641dae6 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 15 Oct 2020 22:50:22 +0100 Subject: [PATCH 050/131] Test FeatureSIMD --- .../TestUtilities/Tests/FeatureTestRunnerTests.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs index 13a080612..eb6f82885 100644 --- a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs +++ b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs @@ -55,8 +55,8 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests public void CanLimitHwIntrinsicFeatures() { FeatureTestRunner.RunWithHwIntrinsicsFeature( - () => Assert.False(Sse.IsSupported, nameof(Sse.IsSupported)), - HwIntrinsics.DisableSSE); + () => Assert.False(Vector.IsHardwareAccelerated, nameof(Vector.IsHardwareAccelerated)), + HwIntrinsics.DisableSIMD); } #endif From b90cac23dfa8214aa2ffb91e73326d2741984f12 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 15 Oct 2020 22:55:46 +0100 Subject: [PATCH 051/131] Now test both disable values. --- .../TestUtilities/Tests/FeatureTestRunnerTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs index eb6f82885..0f6cdc066 100644 --- a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs +++ b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs @@ -56,7 +56,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests { FeatureTestRunner.RunWithHwIntrinsicsFeature( () => Assert.False(Vector.IsHardwareAccelerated, nameof(Vector.IsHardwareAccelerated)), - HwIntrinsics.DisableSIMD); + HwIntrinsics.DisableSIMD | HwIntrinsics.DisableHWIntrinsic); } #endif From 54ea7d669099b55d1421b9bf94e141958d02e6d6 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 15 Oct 2020 23:08:27 +0100 Subject: [PATCH 052/131] Split base disabled checks --- .../Tests/FeatureTestRunnerTests.cs | 33 +++++++++++++++++-- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs index 0f6cdc066..8eb5c9fd6 100644 --- a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs +++ b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs @@ -50,13 +50,40 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests HwIntrinsics.AllowAll); } -#if SUPPORTS_RUNTIME_INTRINSICS [Fact] - public void CanLimitHwIntrinsicFeatures() + public void CanLimitHwIntrinsicSIMDFeatures() { FeatureTestRunner.RunWithHwIntrinsicsFeature( () => Assert.False(Vector.IsHardwareAccelerated, nameof(Vector.IsHardwareAccelerated)), - HwIntrinsics.DisableSIMD | HwIntrinsics.DisableHWIntrinsic); + HwIntrinsics.DisableSIMD); + } + +#if SUPPORTS_RUNTIME_INTRINSICS + [Fact] + public void CanLimitHwIntrinsicBaseFeatures() + { + static void AssertDisabled() + { + Assert.False(Sse.IsSupported); + Assert.False(Sse2.IsSupported); + Assert.False(Aes.IsSupported); + Assert.False(Pclmulqdq.IsSupported); + Assert.False(Sse3.IsSupported); + Assert.False(Ssse3.IsSupported); + Assert.False(Sse41.IsSupported); + Assert.False(Sse42.IsSupported); + Assert.False(Popcnt.IsSupported); + Assert.False(Avx.IsSupported); + Assert.False(Fma.IsSupported); + Assert.False(Avx2.IsSupported); + Assert.False(Bmi1.IsSupported); + Assert.False(Bmi2.IsSupported); + Assert.False(Lzcnt.IsSupported); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + AssertDisabled, + HwIntrinsics.DisableHWIntrinsic); } #endif From 911d28993cfd3a30304afb2b9e327f941a8afc35 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 15 Oct 2020 23:16:00 +0100 Subject: [PATCH 053/131] Enable additional tests --- .../Tests/FeatureTestRunnerTests.cs | 341 ++++++++++-------- 1 file changed, 183 insertions(+), 158 deletions(-) diff --git a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs index 8eb5c9fd6..eea22592b 100644 --- a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs +++ b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs @@ -46,7 +46,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests public void AllowsAllHwIntrinsicFeatures() { FeatureTestRunner.RunWithHwIntrinsicsFeature( - () => Assert.True(Vector.IsHardwareAccelerated, nameof(Vector.IsHardwareAccelerated)), + () => Assert.True(Vector.IsHardwareAccelerated), HwIntrinsics.AllowAll); } @@ -54,7 +54,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests public void CanLimitHwIntrinsicSIMDFeatures() { FeatureTestRunner.RunWithHwIntrinsicsFeature( - () => Assert.False(Vector.IsHardwareAccelerated, nameof(Vector.IsHardwareAccelerated)), + () => Assert.False(Vector.IsHardwareAccelerated), HwIntrinsics.DisableSIMD); } @@ -87,170 +87,195 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests } #endif - // [Fact] - // public void CanLimitHwIntrinsicFeaturesWithIntrinsicsParam() - // { - // static void AssertHwIntrinsicsFeatureDisabled(string intrinsic) - // { - // Assert.NotNull(intrinsic); + [Fact] + public void CanLimitHwIntrinsicFeaturesWithIntrinsicsParam() + { + static void AssertHwIntrinsicsFeatureDisabled(string intrinsic) + { + Assert.NotNull(intrinsic); - // switch ((HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic)) - // { - // case HwIntrinsics.DisableSIMD: - // Assert.False(Vector.IsHardwareAccelerated); - // break; - //#if SUPPORTS_RUNTIME_INTRINSICS - // case HwIntrinsics.DisableHWIntrinsic: - // Assert.False(Vector.IsHardwareAccelerated); - // break; - // case HwIntrinsics.DisableSSE: - // Assert.False(Sse.IsSupported); - // break; - // case HwIntrinsics.DisableSSE2: - // Assert.False(Sse2.IsSupported); - // break; - // case HwIntrinsics.DisableAES: - // Assert.False(Aes.IsSupported); - // break; - // case HwIntrinsics.DisablePCLMULQDQ: - // Assert.False(Pclmulqdq.IsSupported); - // break; - // case HwIntrinsics.DisableSSE3: - // Assert.False(Sse3.IsSupported); - // break; - // case HwIntrinsics.DisableSSSE3: - // Assert.False(Ssse3.IsSupported); - // break; - // case HwIntrinsics.DisableSSE41: - // Assert.False(Sse41.IsSupported); - // break; - // case HwIntrinsics.DisableSSE42: - // Assert.False(Sse42.IsSupported); - // break; - // case HwIntrinsics.DisablePOPCNT: - // Assert.False(Popcnt.IsSupported); - // break; - // case HwIntrinsics.DisableAVX: - // Assert.False(Avx.IsSupported); - // break; - // case HwIntrinsics.DisableFMA: - // Assert.False(Fma.IsSupported); - // break; - // case HwIntrinsics.DisableAVX2: - // Assert.False(Avx2.IsSupported); - // break; - // case HwIntrinsics.DisableBMI1: - // Assert.False(Bmi1.IsSupported); - // break; - // case HwIntrinsics.DisableBMI2: - // Assert.False(Bmi2.IsSupported); - // break; - // case HwIntrinsics.DisableLZCNT: - // Assert.False(Lzcnt.IsSupported); - // break; - //#endif - // } - // } + switch ((HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic)) + { + case HwIntrinsics.DisableSIMD: + Assert.False(Vector.IsHardwareAccelerated); + break; +#if SUPPORTS_RUNTIME_INTRINSICS + case HwIntrinsics.DisableHWIntrinsic: + Assert.False(Sse.IsSupported); + Assert.False(Sse2.IsSupported); + Assert.False(Aes.IsSupported); + Assert.False(Pclmulqdq.IsSupported); + Assert.False(Sse3.IsSupported); + Assert.False(Ssse3.IsSupported); + Assert.False(Sse41.IsSupported); + Assert.False(Sse42.IsSupported); + Assert.False(Popcnt.IsSupported); + Assert.False(Avx.IsSupported); + Assert.False(Fma.IsSupported); + Assert.False(Avx2.IsSupported); + Assert.False(Bmi1.IsSupported); + Assert.False(Bmi2.IsSupported); + Assert.False(Lzcnt.IsSupported); + break; + case HwIntrinsics.DisableSSE: + Assert.False(Sse.IsSupported); + break; + case HwIntrinsics.DisableSSE2: + Assert.False(Sse2.IsSupported); + break; + case HwIntrinsics.DisableAES: + Assert.False(Aes.IsSupported); + break; + case HwIntrinsics.DisablePCLMULQDQ: + Assert.False(Pclmulqdq.IsSupported); + break; + case HwIntrinsics.DisableSSE3: + Assert.False(Sse3.IsSupported); + break; + case HwIntrinsics.DisableSSSE3: + Assert.False(Ssse3.IsSupported); + break; + case HwIntrinsics.DisableSSE41: + Assert.False(Sse41.IsSupported); + break; + case HwIntrinsics.DisableSSE42: + Assert.False(Sse42.IsSupported); + break; + case HwIntrinsics.DisablePOPCNT: + Assert.False(Popcnt.IsSupported); + break; + case HwIntrinsics.DisableAVX: + Assert.False(Avx.IsSupported); + break; + case HwIntrinsics.DisableFMA: + Assert.False(Fma.IsSupported); + break; + case HwIntrinsics.DisableAVX2: + Assert.False(Avx2.IsSupported); + break; + case HwIntrinsics.DisableBMI1: + Assert.False(Bmi1.IsSupported); + break; + case HwIntrinsics.DisableBMI2: + Assert.False(Bmi2.IsSupported); + break; + case HwIntrinsics.DisableLZCNT: + Assert.False(Lzcnt.IsSupported); + break; +#endif + } + } - // foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) - // { - // FeatureTestRunner.RunWithHwIntrinsicsFeature(AssertHwIntrinsicsFeatureDisabled, intrinsic); - // } - // } + foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) + { + FeatureTestRunner.RunWithHwIntrinsicsFeature(AssertHwIntrinsicsFeatureDisabled, intrinsic); + } + } - // [Fact] - // public void CanLimitHwIntrinsicFeaturesWithSerializableParam() - // { - // static void AssertHwIntrinsicsFeatureDisabled(string serializable) - // { - // Assert.NotNull(serializable); - // Assert.NotNull(FeatureTestRunner.Deserialize(serializable)); + [Fact] + public void CanLimitHwIntrinsicFeaturesWithSerializableParam() + { + static void AssertHwIntrinsicsFeatureDisabled(string serializable) + { + Assert.NotNull(serializable); + Assert.NotNull(FeatureTestRunner.Deserialize(serializable)); - //#if SUPPORTS_RUNTIME_INTRINSICS - // Assert.False(Sse.IsSupported); - //#endif - // } +#if SUPPORTS_RUNTIME_INTRINSICS + Assert.False(Sse.IsSupported); +#endif + } - // foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) - // { - // FeatureTestRunner.RunWithHwIntrinsicsFeature( - // AssertHwIntrinsicsFeatureDisabled, - // HwIntrinsics.DisableSSE, - // new FakeSerializable()); - // } - // } + FeatureTestRunner.RunWithHwIntrinsicsFeature( + AssertHwIntrinsicsFeatureDisabled, + HwIntrinsics.DisableSSE, + new FakeSerializable()); + } - // [Fact] - // public void CanLimitHwIntrinsicFeaturesWithSerializableAndIntrinsicsParams() - // { - // static void AssertHwIntrinsicsFeatureDisabled(string serializable, string intrinsic) - // { - // Assert.NotNull(serializable); - // Assert.NotNull(FeatureTestRunner.Deserialize(serializable)); + [Fact] + public void CanLimitHwIntrinsicFeaturesWithSerializableAndIntrinsicsParams() + { + static void AssertHwIntrinsicsFeatureDisabled(string serializable, string intrinsic) + { + Assert.NotNull(serializable); + Assert.NotNull(FeatureTestRunner.Deserialize(serializable)); - // switch ((HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic)) - // { - // case HwIntrinsics.DisableSIMD: - // Assert.False(Vector.IsHardwareAccelerated, nameof(Vector.IsHardwareAccelerated)); - // break; - //#if SUPPORTS_RUNTIME_INTRINSICS - // case HwIntrinsics.DisableHWIntrinsic: - // Assert.False(Vector.IsHardwareAccelerated, nameof(Vector.IsHardwareAccelerated)); - // break; - // case HwIntrinsics.DisableSSE: - // Assert.False(Sse.IsSupported); - // break; - // case HwIntrinsics.DisableSSE2: - // Assert.False(Sse2.IsSupported); - // break; - // case HwIntrinsics.DisableAES: - // Assert.False(Aes.IsSupported); - // break; - // case HwIntrinsics.DisablePCLMULQDQ: - // Assert.False(Pclmulqdq.IsSupported); - // break; - // case HwIntrinsics.DisableSSE3: - // Assert.False(Sse3.IsSupported); - // break; - // case HwIntrinsics.DisableSSSE3: - // Assert.False(Ssse3.IsSupported); - // break; - // case HwIntrinsics.DisableSSE41: - // Assert.False(Sse41.IsSupported); - // break; - // case HwIntrinsics.DisableSSE42: - // Assert.False(Sse42.IsSupported); - // break; - // case HwIntrinsics.DisablePOPCNT: - // Assert.False(Popcnt.IsSupported); - // break; - // case HwIntrinsics.DisableAVX: - // Assert.False(Avx.IsSupported); - // break; - // case HwIntrinsics.DisableFMA: - // Assert.False(Fma.IsSupported); - // break; - // case HwIntrinsics.DisableAVX2: - // Assert.False(Avx2.IsSupported); - // break; - // case HwIntrinsics.DisableBMI1: - // Assert.False(Bmi1.IsSupported); - // break; - // case HwIntrinsics.DisableBMI2: - // Assert.False(Bmi2.IsSupported); - // break; - // case HwIntrinsics.DisableLZCNT: - // Assert.False(Lzcnt.IsSupported); - // break; - //#endif - // } - // } + switch ((HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic)) + { + case HwIntrinsics.DisableSIMD: + Assert.False(Vector.IsHardwareAccelerated, nameof(Vector.IsHardwareAccelerated)); + break; +#if SUPPORTS_RUNTIME_INTRINSICS + case HwIntrinsics.DisableHWIntrinsic: + Assert.False(Sse.IsSupported); + Assert.False(Sse2.IsSupported); + Assert.False(Aes.IsSupported); + Assert.False(Pclmulqdq.IsSupported); + Assert.False(Sse3.IsSupported); + Assert.False(Ssse3.IsSupported); + Assert.False(Sse41.IsSupported); + Assert.False(Sse42.IsSupported); + Assert.False(Popcnt.IsSupported); + Assert.False(Avx.IsSupported); + Assert.False(Fma.IsSupported); + Assert.False(Avx2.IsSupported); + Assert.False(Bmi1.IsSupported); + Assert.False(Bmi2.IsSupported); + Assert.False(Lzcnt.IsSupported); + break; + case HwIntrinsics.DisableSSE: + Assert.False(Sse.IsSupported); + break; + case HwIntrinsics.DisableSSE2: + Assert.False(Sse2.IsSupported); + break; + case HwIntrinsics.DisableAES: + Assert.False(Aes.IsSupported); + break; + case HwIntrinsics.DisablePCLMULQDQ: + Assert.False(Pclmulqdq.IsSupported); + break; + case HwIntrinsics.DisableSSE3: + Assert.False(Sse3.IsSupported); + break; + case HwIntrinsics.DisableSSSE3: + Assert.False(Ssse3.IsSupported); + break; + case HwIntrinsics.DisableSSE41: + Assert.False(Sse41.IsSupported); + break; + case HwIntrinsics.DisableSSE42: + Assert.False(Sse42.IsSupported); + break; + case HwIntrinsics.DisablePOPCNT: + Assert.False(Popcnt.IsSupported); + break; + case HwIntrinsics.DisableAVX: + Assert.False(Avx.IsSupported); + break; + case HwIntrinsics.DisableFMA: + Assert.False(Fma.IsSupported); + break; + case HwIntrinsics.DisableAVX2: + Assert.False(Avx2.IsSupported); + break; + case HwIntrinsics.DisableBMI1: + Assert.False(Bmi1.IsSupported); + break; + case HwIntrinsics.DisableBMI2: + Assert.False(Bmi2.IsSupported); + break; + case HwIntrinsics.DisableLZCNT: + Assert.False(Lzcnt.IsSupported); + break; +#endif + } + } - // foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) - // { - // FeatureTestRunner.RunWithHwIntrinsicsFeature(AssertHwIntrinsicsFeatureDisabled, intrinsic, new FakeSerializable()); - // } - // } + foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) + { + FeatureTestRunner.RunWithHwIntrinsicsFeature(AssertHwIntrinsicsFeatureDisabled, intrinsic, new FakeSerializable()); + } + } public class FakeSerializable : IXunitSerializable { From e6c73e4b2b28a0e99b6e5f688d4536acd5da2be3 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 15 Oct 2020 23:31:56 +0100 Subject: [PATCH 054/131] Update PngEncoderTests.cs --- .../Formats/Png/PngEncoderTests.cs | 28 ++++--------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs index b35e55887..9ba956d72 100644 --- a/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs @@ -2,13 +2,8 @@ // Licensed under the Apache License, Version 2.0. // ReSharper disable InconsistentNaming -using System.Diagnostics; using System.IO; using System.Linq; -#if SUPPORTS_RUNTIME_INTRINSICS -using System.Runtime.Intrinsics.X86; -#endif -using Microsoft.DotNet.RemoteExecutor; using SixLabors.ImageSharp.Formats; using SixLabors.ImageSharp.Formats.Png; using SixLabors.ImageSharp.Metadata; @@ -16,7 +11,6 @@ using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.Processing.Processors.Quantization; using SixLabors.ImageSharp.Tests.TestUtilities; using SixLabors.ImageSharp.Tests.TestUtilities.ImageComparison; - using Xunit; namespace SixLabors.ImageSharp.Tests.Formats.Png @@ -538,13 +532,10 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png [WithTestPatternImages(100, 100, PixelTypes.Rgba32)] public void EncodeWorksWithoutSsse3Intrinsics(TestImageProvider provider) { - static void RunTest(string providerDump) + static void RunTest(string serialized) { TestImageProvider provider = - BasicSerializer.Deserialize>(providerDump); -#if SUPPORTS_RUNTIME_INTRINSICS - Assert.False(Ssse3.IsSupported); -#endif + FeatureTestRunner.Deserialize>(serialized); foreach (PngInterlaceMode interlaceMode in InterlaceMode) { @@ -559,19 +550,10 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png } } - string providerDump = BasicSerializer.Serialize(provider); - - var processStartInfo = new ProcessStartInfo(); - processStartInfo.Environment[TestEnvironment.Features.EnableSSE3] = TestEnvironment.Features.Off; - - RemoteExecutor.Invoke( + FeatureTestRunner.RunWithHwIntrinsicsFeature( RunTest, - providerDump, - new RemoteInvokeOptions - { - StartInfo = processStartInfo - }) - .Dispose(); + HwIntrinsics.DisableSSSE3, + provider); } private static void TestPngEncoderCore( From ec36095092e207ba51267444d3dedbee11aed76f Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 16 Oct 2020 00:06:31 +0100 Subject: [PATCH 055/131] Use envonment aware benchmarking for intrinsics. --- .../BlockOperations/Block8x8F_Transpose.cs | 16 +--- .../Config.HwIntrinsics.cs | 81 +++++++++++++++++++ tests/ImageSharp.Benchmarks/Config.cs | 2 +- .../TestUtilities/TestEnvironment.Features.cs | 54 ------------- 4 files changed, 86 insertions(+), 67 deletions(-) create mode 100644 tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs delete mode 100644 tests/ImageSharp.Tests/TestUtilities/TestEnvironment.Features.cs diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs index ae1b23df9..814c91038 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs @@ -6,26 +6,18 @@ using SixLabors.ImageSharp.Formats.Jpeg.Components; namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations { + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] public class Block8x8F_Transpose { private static readonly Block8x8F Source = Create8x8FloatData(); - [Benchmark(Baseline=true)] - public void TransposeIntoVector4() + [Benchmark(Baseline = true)] + public void TransposeInto() { var dest = default(Block8x8F); - Source.TransposeIntoFallback(ref dest); + Source.TransposeInto(ref dest); } -#if SUPPORTS_RUNTIME_INTRINSICS - [Benchmark] - public void TransposeIntoAvx() - { - var dest = default(Block8x8F); - Source.TransposeIntoAvx(ref dest); - } -#endif - private static Block8x8F Create8x8FloatData() { var result = new float[64]; diff --git a/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs b/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs new file mode 100644 index 000000000..e860c5491 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs @@ -0,0 +1,81 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics.X86; +#endif +using BenchmarkDotNet.Environments; +using BenchmarkDotNet.Jobs; + +namespace SixLabors.ImageSharp.Benchmarks +{ + public partial class Config + { + private const string On = "1"; + private const string Off = "0"; + + // See https://github.com/SixLabors/ImageSharp/pull/1229#discussion_r440477861 + // * EnableHWIntrinsic + // * EnableSSE + // * EnableSSE2 + // * EnableAES + // * EnablePCLMULQDQ + // * EnableSSE3 + // * EnableSSSE3 + // * EnableSSE41 + // * EnableSSE42 + // * EnablePOPCNT + // * EnableAVX + // * EnableFMA + // * EnableAVX2 + // * EnableBMI1 + // * EnableBMI2 + // * EnableLZCNT + // + // `FeatureSIMD` ends up impacting all SIMD support(including `System.Numerics`) but not things + // like `LZCNT`, `BMI1`, or `BMI2` + // `EnableSSE3_4` is a legacy switch that exists for compat and is basically the same as `EnableSSE3` + private const string EnableAES = "COMPlus_EnableAES"; + private const string EnableAVX = "COMPlus_EnableAVX"; + private const string EnableAVX2 = "COMPlus_EnableAVX2"; + private const string EnableBMI1 = "COMPlus_EnableBMI1"; + private const string EnableBMI2 = "COMPlus_EnableBMI2"; + private const string EnableFMA = "COMPlus_EnableFMA"; + private const string EnableHWIntrinsic = "COMPlus_EnableHWIntrinsic"; + private const string EnableLZCNT = "COMPlus_EnableLZCNT"; + private const string EnablePCLMULQDQ = "COMPlus_EnablePCLMULQDQ"; + private const string EnablePOPCNT = "COMPlus_EnablePOPCNT"; + private const string EnableSSE = "COMPlus_EnableSSE"; + private const string EnableSSE2 = "COMPlus_EnableSSE2"; + private const string EnableSSE3 = "COMPlus_EnableSSE3"; + private const string EnableSSE3_4 = "COMPlus_EnableSSE3_4"; + private const string EnableSSE41 = "COMPlus_EnableSSE41"; + private const string EnableSSE42 = "COMPlus_EnableSSE42"; + private const string EnableSSSE3 = "COMPlus_EnableSSSE3"; + private const string FeatureSIMD = "COMPlus_FeatureSIMD"; + + public class HwIntrinsics_SSE_AVX : Config + { + public HwIntrinsics_SSE_AVX() + { +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx.IsSupported) + { + this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31) + .WithId("AVX").AsBaseline()); + } + + if (Sse.IsSupported) + { + this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31) + .WithEnvironmentVariables(new EnvironmentVariable(EnableAVX, Off)) + .WithId("SSE")); + } +#endif + this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31) + .WithEnvironmentVariables(new EnvironmentVariable(EnableHWIntrinsic, Off)) + .WithId("No HwIntrinsics")); + } + } + } +} diff --git a/tests/ImageSharp.Benchmarks/Config.cs b/tests/ImageSharp.Benchmarks/Config.cs index f9240779b..53271f522 100644 --- a/tests/ImageSharp.Benchmarks/Config.cs +++ b/tests/ImageSharp.Benchmarks/Config.cs @@ -12,7 +12,7 @@ using BenchmarkDotNet.Jobs; namespace SixLabors.ImageSharp.Benchmarks { - public class Config : ManualConfig + public partial class Config : ManualConfig { public Config() { diff --git a/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.Features.cs b/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.Features.cs deleted file mode 100644 index 3568c1e5d..000000000 --- a/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.Features.cs +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -namespace SixLabors.ImageSharp.Tests -{ - public static partial class TestEnvironment - { - internal static class Features - { - public const string On = "1"; - public const string Off = "0"; - - // See https://github.com/SixLabors/ImageSharp/pull/1229#discussion_r440477861 - // * EnableHWIntrinsic - // * EnableSSE - // * EnableSSE2 - // * EnableAES - // * EnablePCLMULQDQ - // * EnableSSE3 - // * EnableSSSE3 - // * EnableSSE41 - // * EnableSSE42 - // * EnablePOPCNT - // * EnableAVX - // * EnableFMA - // * EnableAVX2 - // * EnableBMI1 - // * EnableBMI2 - // * EnableLZCNT - // - // `FeatureSIMD` ends up impacting all SIMD support(including `System.Numerics`) but not things - // like `LZCNT`, `BMI1`, or `BMI2` - // `EnableSSE3_4` is a legacy switch that exists for compat and is basically the same as `EnableSSE3` - public const string EnableAES = "COMPlus_EnableAES"; - public const string EnableAVX = "COMPlus_EnableAVX"; - public const string EnableAVX2 = "COMPlus_EnableAVX2"; - public const string EnableBMI1 = "COMPlus_EnableBMI1"; - public const string EnableBMI2 = "COMPlus_EnableBMI2"; - public const string EnableFMA = "COMPlus_EnableFMA"; - public const string EnableHWIntrinsic = "COMPlus_EnableHWIntrinsic"; - public const string EnableLZCNT = "COMPlus_EnableLZCNT"; - public const string EnablePCLMULQDQ = "COMPlus_EnablePCLMULQDQ"; - public const string EnablePOPCNT = "COMPlus_EnablePOPCNT"; - public const string EnableSSE = "COMPlus_EnableSSE"; - public const string EnableSSE2 = "COMPlus_EnableSSE2"; - public const string EnableSSE3 = "COMPlus_EnableSSE3"; - public const string EnableSSE3_4 = "COMPlus_EnableSSE3_4"; - public const string EnableSSE41 = "COMPlus_EnableSSE41"; - public const string EnableSSE42 = "COMPlus_EnableSSE42"; - public const string EnableSSSE3 = "COMPlus_EnableSSSE3"; - public const string FeatureSIMD = "COMPlus_FeatureSIMD"; - } - } -} From 75e0ffc522bd28245384fe81ee6e2af459662dd1 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 16 Oct 2020 00:14:47 +0100 Subject: [PATCH 056/131] Use a single method for Block8x8F.TransposeInto. --- .../Jpeg/Components/Block8x8F.Generated.cs | 80 ------- .../Jpeg/Components/Block8x8F.Generated.tt | 32 --- .../Formats/Jpeg/Components/Block8x8F.cs | 211 +++++++++++------- 3 files changed, 135 insertions(+), 188 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs index 10cbee5e6..6a336ad2b 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs @@ -10,86 +10,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components { internal partial struct Block8x8F { - /// - /// Fallback method to transpose a block into the destination block on non AVX supported CPUs. - /// - /// The destination block - [MethodImpl(InliningOptions.ShortMethod)] - public void TransposeIntoFallback(ref Block8x8F d) - { - d.V0L.X = V0L.X; - d.V1L.X = V0L.Y; - d.V2L.X = V0L.Z; - d.V3L.X = V0L.W; - d.V4L.X = V0R.X; - d.V5L.X = V0R.Y; - d.V6L.X = V0R.Z; - d.V7L.X = V0R.W; - - d.V0L.Y = V1L.X; - d.V1L.Y = V1L.Y; - d.V2L.Y = V1L.Z; - d.V3L.Y = V1L.W; - d.V4L.Y = V1R.X; - d.V5L.Y = V1R.Y; - d.V6L.Y = V1R.Z; - d.V7L.Y = V1R.W; - - d.V0L.Z = V2L.X; - d.V1L.Z = V2L.Y; - d.V2L.Z = V2L.Z; - d.V3L.Z = V2L.W; - d.V4L.Z = V2R.X; - d.V5L.Z = V2R.Y; - d.V6L.Z = V2R.Z; - d.V7L.Z = V2R.W; - - d.V0L.W = V3L.X; - d.V1L.W = V3L.Y; - d.V2L.W = V3L.Z; - d.V3L.W = V3L.W; - d.V4L.W = V3R.X; - d.V5L.W = V3R.Y; - d.V6L.W = V3R.Z; - d.V7L.W = V3R.W; - - d.V0R.X = V4L.X; - d.V1R.X = V4L.Y; - d.V2R.X = V4L.Z; - d.V3R.X = V4L.W; - d.V4R.X = V4R.X; - d.V5R.X = V4R.Y; - d.V6R.X = V4R.Z; - d.V7R.X = V4R.W; - - d.V0R.Y = V5L.X; - d.V1R.Y = V5L.Y; - d.V2R.Y = V5L.Z; - d.V3R.Y = V5L.W; - d.V4R.Y = V5R.X; - d.V5R.Y = V5R.Y; - d.V6R.Y = V5R.Z; - d.V7R.Y = V5R.W; - - d.V0R.Z = V6L.X; - d.V1R.Z = V6L.Y; - d.V2R.Z = V6L.Z; - d.V3R.Z = V6L.W; - d.V4R.Z = V6R.X; - d.V5R.Z = V6R.Y; - d.V6R.Z = V6R.Z; - d.V7R.Z = V6R.W; - - d.V0R.W = V7L.X; - d.V1R.W = V7L.Y; - d.V2R.W = V7L.Z; - d.V3R.W = V7L.W; - d.V4R.W = V7R.X; - d.V5R.W = V7R.Y; - d.V6R.W = V7R.Z; - d.V7R.W = V7R.W; - } - /// /// Level shift by +maximum/2, clip to [0, maximum] /// diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt index f47d9106e..26cd5c2ac 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt @@ -23,38 +23,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components { internal partial struct Block8x8F { - /// - /// Fallback method to transpose a block into the destination block on non AVX supported CPUs. - /// - /// The destination block - [MethodImpl(InliningOptions.ShortMethod)] - public void TransposeIntoFallback(ref Block8x8F d) - { - <# - PushIndent(" "); - - for (int i = 0; i < 8; i++) - { - char destCoord = coordz[i % 4]; - char destSide = (i / 4) % 2 == 0 ? 'L' : 'R'; - - for (int j = 0; j < 8; j++) - { - if(i > 0 && j == 0){ - WriteLine(""); - } - - char srcCoord = coordz[j % 4]; - char srcSide = (j / 4) % 2 == 0 ? 'L' : 'R'; - - var expression = $"d.V{j}{destSide}.{destCoord} = V{i}{srcSide}.{srcCoord};\r\n"; - Write(expression); - } - } - PopIndent(); - #> - } - /// /// Level shift by +maximum/2, clip to [0, maximum] /// diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index 547e11623..ccdba4885 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -611,87 +611,146 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components #if SUPPORTS_RUNTIME_INTRINSICS if (Avx.IsSupported) { - this.TransposeIntoAvx(ref d); + // https://stackoverflow.com/questions/25622745/transpose-an-8x8-float-using-avx-avx2/25627536#25627536 + Vector256 r0 = Avx.InsertVector128( + Unsafe.As>(ref this.V0L).ToVector256(), + Unsafe.As>(ref this.V4L), + 1); + + Vector256 r1 = Avx.InsertVector128( + Unsafe.As>(ref this.V1L).ToVector256(), + Unsafe.As>(ref this.V5L), + 1); + + Vector256 r2 = Avx.InsertVector128( + Unsafe.As>(ref this.V2L).ToVector256(), + Unsafe.As>(ref this.V6L), + 1); + + Vector256 r3 = Avx.InsertVector128( + Unsafe.As>(ref this.V3L).ToVector256(), + Unsafe.As>(ref this.V7L), + 1); + + Vector256 r4 = Avx.InsertVector128( + Unsafe.As>(ref this.V0R).ToVector256(), + Unsafe.As>(ref this.V4R), + 1); + + Vector256 r5 = Avx.InsertVector128( + Unsafe.As>(ref this.V1R).ToVector256(), + Unsafe.As>(ref this.V5R), + 1); + + Vector256 r6 = Avx.InsertVector128( + Unsafe.As>(ref this.V2R).ToVector256(), + Unsafe.As>(ref this.V6R), + 1); + + Vector256 r7 = Avx.InsertVector128( + Unsafe.As>(ref this.V3R).ToVector256(), + Unsafe.As>(ref this.V7R), + 1); + + Vector256 t0 = Avx.UnpackLow(r0, r1); + Vector256 t2 = Avx.UnpackLow(r2, r3); + Vector256 v = Avx.Shuffle(t0, t2, 0x4E); + Unsafe.As>(ref d.V0L) = Avx.Blend(t0, v, 0xCC); + Unsafe.As>(ref d.V1L) = Avx.Blend(t2, v, 0x33); + + Vector256 t4 = Avx.UnpackLow(r4, r5); + Vector256 t6 = Avx.UnpackLow(r6, r7); + v = Avx.Shuffle(t4, t6, 0x4E); + Unsafe.As>(ref d.V4L) = Avx.Blend(t4, v, 0xCC); + Unsafe.As>(ref d.V5L) = Avx.Blend(t6, v, 0x33); + + Vector256 t1 = Avx.UnpackHigh(r0, r1); + Vector256 t3 = Avx.UnpackHigh(r2, r3); + v = Avx.Shuffle(t1, t3, 0x4E); + Unsafe.As>(ref d.V2L) = Avx.Blend(t1, v, 0xCC); + Unsafe.As>(ref d.V3L) = Avx.Blend(t3, v, 0x33); + + Vector256 t5 = Avx.UnpackHigh(r4, r5); + Vector256 t7 = Avx.UnpackHigh(r6, r7); + v = Avx.Shuffle(t5, t7, 0x4E); + Unsafe.As>(ref d.V6L) = Avx.Blend(t5, v, 0xCC); + Unsafe.As>(ref d.V7L) = Avx.Blend(t7, v, 0x33); } else #endif { - this.TransposeIntoFallback(ref d); + d.V0L.X = this.V0L.X; + d.V1L.X = this.V0L.Y; + d.V2L.X = this.V0L.Z; + d.V3L.X = this.V0L.W; + d.V4L.X = this.V0R.X; + d.V5L.X = this.V0R.Y; + d.V6L.X = this.V0R.Z; + d.V7L.X = this.V0R.W; + + d.V0L.Y = this.V1L.X; + d.V1L.Y = this.V1L.Y; + d.V2L.Y = this.V1L.Z; + d.V3L.Y = this.V1L.W; + d.V4L.Y = this.V1R.X; + d.V5L.Y = this.V1R.Y; + d.V6L.Y = this.V1R.Z; + d.V7L.Y = this.V1R.W; + + d.V0L.Z = this.V2L.X; + d.V1L.Z = this.V2L.Y; + d.V2L.Z = this.V2L.Z; + d.V3L.Z = this.V2L.W; + d.V4L.Z = this.V2R.X; + d.V5L.Z = this.V2R.Y; + d.V6L.Z = this.V2R.Z; + d.V7L.Z = this.V2R.W; + + d.V0L.W = this.V3L.X; + d.V1L.W = this.V3L.Y; + d.V2L.W = this.V3L.Z; + d.V3L.W = this.V3L.W; + d.V4L.W = this.V3R.X; + d.V5L.W = this.V3R.Y; + d.V6L.W = this.V3R.Z; + d.V7L.W = this.V3R.W; + + d.V0R.X = this.V4L.X; + d.V1R.X = this.V4L.Y; + d.V2R.X = this.V4L.Z; + d.V3R.X = this.V4L.W; + d.V4R.X = this.V4R.X; + d.V5R.X = this.V4R.Y; + d.V6R.X = this.V4R.Z; + d.V7R.X = this.V4R.W; + + d.V0R.Y = this.V5L.X; + d.V1R.Y = this.V5L.Y; + d.V2R.Y = this.V5L.Z; + d.V3R.Y = this.V5L.W; + d.V4R.Y = this.V5R.X; + d.V5R.Y = this.V5R.Y; + d.V6R.Y = this.V5R.Z; + d.V7R.Y = this.V5R.W; + + d.V0R.Z = this.V6L.X; + d.V1R.Z = this.V6L.Y; + d.V2R.Z = this.V6L.Z; + d.V3R.Z = this.V6L.W; + d.V4R.Z = this.V6R.X; + d.V5R.Z = this.V6R.Y; + d.V6R.Z = this.V6R.Z; + d.V7R.Z = this.V6R.W; + + d.V0R.W = this.V7L.X; + d.V1R.W = this.V7L.Y; + d.V2R.W = this.V7L.Z; + d.V3R.W = this.V7L.W; + d.V4R.W = this.V7R.X; + d.V5R.W = this.V7R.Y; + d.V6R.W = this.V7R.Z; + d.V7R.W = this.V7R.W; } } - -#if SUPPORTS_RUNTIME_INTRINSICS - /// - /// AVX-only variant for executing . - /// - /// - [MethodImpl(InliningOptions.ShortMethod)] - public void TransposeIntoAvx(ref Block8x8F d) - { - Vector256 r0 = Avx.InsertVector128( - Unsafe.As>(ref this.V0L).ToVector256(), - Unsafe.As>(ref this.V4L), - 1); - - Vector256 r1 = Avx.InsertVector128( - Unsafe.As>(ref this.V1L).ToVector256(), - Unsafe.As>(ref this.V5L), - 1); - - Vector256 r2 = Avx.InsertVector128( - Unsafe.As>(ref this.V2L).ToVector256(), - Unsafe.As>(ref this.V6L), - 1); - - Vector256 r3 = Avx.InsertVector128( - Unsafe.As>(ref this.V3L).ToVector256(), - Unsafe.As>(ref this.V7L), - 1); - - Vector256 r4 = Avx.InsertVector128( - Unsafe.As>(ref this.V0R).ToVector256(), - Unsafe.As>(ref this.V4R), - 1); - - Vector256 r5 = Avx.InsertVector128( - Unsafe.As>(ref this.V1R).ToVector256(), - Unsafe.As>(ref this.V5R), - 1); - - Vector256 r6 = Avx.InsertVector128( - Unsafe.As>(ref this.V2R).ToVector256(), - Unsafe.As>(ref this.V6R), - 1); - - Vector256 r7 = Avx.InsertVector128( - Unsafe.As>(ref this.V3R).ToVector256(), - Unsafe.As>(ref this.V7R), - 1); - - Vector256 t0 = Avx.UnpackLow(r0, r1); - Vector256 t2 = Avx.UnpackLow(r2, r3); - Vector256 v = Avx.Shuffle(t0, t2, 0x4E); - Unsafe.As>(ref d.V0L) = Avx.Blend(t0, v, 0xCC); - Unsafe.As>(ref d.V1L) = Avx.Blend(t2, v, 0x33); - - Vector256 t4 = Avx.UnpackLow(r4, r5); - Vector256 t6 = Avx.UnpackLow(r6, r7); - v = Avx.Shuffle(t4, t6, 0x4E); - Unsafe.As>(ref d.V4L) = Avx.Blend(t4, v, 0xCC); - Unsafe.As>(ref d.V5L) = Avx.Blend(t6, v, 0x33); - - Vector256 t1 = Avx.UnpackHigh(r0, r1); - Vector256 t3 = Avx.UnpackHigh(r2, r3); - v = Avx.Shuffle(t1, t3, 0x4E); - Unsafe.As>(ref d.V2L) = Avx.Blend(t1, v, 0xCC); - Unsafe.As>(ref d.V3L) = Avx.Blend(t3, v, 0x33); - - Vector256 t5 = Avx.UnpackHigh(r4, r5); - Vector256 t7 = Avx.UnpackHigh(r6, r7); - v = Avx.Shuffle(t5, t7, 0x4E); - Unsafe.As>(ref d.V6L) = Avx.Blend(t5, v, 0xCC); - Unsafe.As>(ref d.V7L) = Avx.Blend(t7, v, 0x33); - } -#endif } } From a79e6d3425a53a17891997b6cc3147d5c0b5f02b Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 16 Oct 2020 00:20:41 +0100 Subject: [PATCH 057/131] Enable all test platforms --- .github/workflows/build-and-test.yml | 32 ++++++++++++++-------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index ecb5ceb0e..c8f399794 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -17,23 +17,23 @@ jobs: - os: ubuntu-latest framework: netcoreapp3.1 runtime: -x64 + codecov: true + - os: windows-latest + framework: netcoreapp3.1 + runtime: -x64 + codecov: false + - os: windows-latest + framework: netcoreapp2.1 + runtime: -x64 + codecov: false + - os: windows-latest + framework: net472 + runtime: -x64 + codecov: false + - os: windows-latest + framework: net472 + runtime: -x86 codecov: false - # - os: windows-latest - # framework: netcoreapp3.1 - # runtime: -x64 - # codecov: false - # - os: windows-latest - # framework: netcoreapp2.1 - # runtime: -x64 - # codecov: false - # - os: windows-latest - # framework: net472 - # runtime: -x64 - # codecov: false - # - os: windows-latest - # framework: net472 - # runtime: -x86 - # codecov: false runs-on: ${{matrix.options.os}} if: "!contains(github.event.head_commit.message, '[skip ci]')" From aad4c1e89e6372e4a0dc066aca85caa78d6127eb Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 16 Oct 2020 00:36:39 +0100 Subject: [PATCH 058/131] Remove method baseline property --- .../Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs index 814c91038..1d103cd1a 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs @@ -11,7 +11,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations { private static readonly Block8x8F Source = Create8x8FloatData(); - [Benchmark(Baseline = true)] + [Benchmark] public void TransposeInto() { var dest = default(Block8x8F); From 7b53df11c82e6500596a598b5769aa7c0f48c1ec Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 16 Oct 2020 01:05:40 +0100 Subject: [PATCH 059/131] Fix incorrect test on NET 32bit --- .../TestUtilities/Tests/FeatureTestRunnerTests.cs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs index eea22592b..646000120 100644 --- a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs +++ b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs @@ -45,6 +45,11 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests [Fact] public void AllowsAllHwIntrinsicFeatures() { + if (!Vector.IsHardwareAccelerated) + { + return; + } + FeatureTestRunner.RunWithHwIntrinsicsFeature( () => Assert.True(Vector.IsHardwareAccelerated), HwIntrinsics.AllowAll); From 68d2406f4ead2fd3fa53a04e40bea9c279db19e9 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 16 Oct 2020 18:02:14 +0100 Subject: [PATCH 060/131] Optimize low hanging fruit and fix naming --- .../Jpeg/Components/Block8x8F.Generated.cs | 6 +- .../Jpeg/Components/Block8x8F.Generated.tt | 6 +- .../Formats/Jpeg/Components/Block8x8F.cs | 203 ++++++++++++------ .../Decoder/JpegBlockPostProcessor.cs | 4 +- .../Jpeg/Components/FastFloatingPointDCT.cs | 6 +- .../BlockOperations/Block8x8F_AddInPlace.cs | 21 ++ .../Block8x8F_MultiplyInPlaceBlock.cs | 37 ++++ .../Block8x8F_MultiplyInPlaceScalar.cs | 21 ++ .../Formats/Jpg/Block8x8FTests.cs | 160 ++++++++------ 9 files changed, 331 insertions(+), 133 deletions(-) create mode 100644 tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_AddInPlace.cs create mode 100644 tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_MultiplyInPlaceBlock.cs create mode 100644 tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_MultiplyInPlaceScalar.cs diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs index 6a336ad2b..0efefc06b 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs @@ -13,7 +13,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components /// /// Level shift by +maximum/2, clip to [0, maximum] /// - public void NormalizeColorsInplace(float maximum) + public void NormalizeColorsInPlace(float maximum) { var CMin4 = new Vector4(0F); var CMax4 = new Vector4(maximum); @@ -38,10 +38,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components } /// - /// AVX2-only variant for executing and in one step. + /// AVX2-only variant for executing and in one step. /// [MethodImpl(InliningOptions.ShortMethod)] - public void NormalizeColorsAndRoundInplaceVector8(float maximum) + public void NormalizeColorsAndRoundInPlaceVector8(float maximum) { var off = new Vector(MathF.Ceiling(maximum / 2)); var max = new Vector(maximum); diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt index 26cd5c2ac..e5a62dc07 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt @@ -26,7 +26,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components /// /// Level shift by +maximum/2, clip to [0, maximum] /// - public void NormalizeColorsInplace(float maximum) + public void NormalizeColorsInPlace(float maximum) { var CMin4 = new Vector4(0F); var CMax4 = new Vector4(maximum); @@ -49,10 +49,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components } /// - /// AVX2-only variant for executing and in one step. + /// AVX2-only variant for executing and in one step. /// [MethodImpl(InliningOptions.ShortMethod)] - public void NormalizeColorsAndRoundInplaceVector8(float maximum) + public void NormalizeColorsAndRoundInPlaceVector8(float maximum) { var off = new Vector(MathF.Ceiling(maximum / 2)); var max = new Vector(maximum); diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index ccdba4885..0dbdadbeb 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -281,73 +281,156 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components /// /// The value to multiply by. [MethodImpl(InliningOptions.ShortMethod)] - public void MultiplyInplace(float value) - { - this.V0L *= value; - this.V0R *= value; - this.V1L *= value; - this.V1R *= value; - this.V2L *= value; - this.V2R *= value; - this.V3L *= value; - this.V3R *= value; - this.V4L *= value; - this.V4R *= value; - this.V5L *= value; - this.V5R *= value; - this.V6L *= value; - this.V6R *= value; - this.V7L *= value; - this.V7R *= value; + public void MultiplyInPlace(float value) + { +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx.IsSupported) + { + var valueVec = Vector256.Create(value); + Unsafe.As>(ref this.V0L) = Avx.Multiply(Unsafe.As>(ref this.V0L), valueVec); + Unsafe.As>(ref this.V1L) = Avx.Multiply(Unsafe.As>(ref this.V1L), valueVec); + Unsafe.As>(ref this.V2L) = Avx.Multiply(Unsafe.As>(ref this.V2L), valueVec); + Unsafe.As>(ref this.V3L) = Avx.Multiply(Unsafe.As>(ref this.V3L), valueVec); + Unsafe.As>(ref this.V4L) = Avx.Multiply(Unsafe.As>(ref this.V4L), valueVec); + Unsafe.As>(ref this.V5L) = Avx.Multiply(Unsafe.As>(ref this.V5L), valueVec); + Unsafe.As>(ref this.V6L) = Avx.Multiply(Unsafe.As>(ref this.V6L), valueVec); + Unsafe.As>(ref this.V7L) = Avx.Multiply(Unsafe.As>(ref this.V7L), valueVec); + } + else +#endif + { + var valueVec = new Vector4(value); + this.V0L *= valueVec; + this.V0R *= valueVec; + this.V1L *= valueVec; + this.V1R *= valueVec; + this.V2L *= valueVec; + this.V2R *= valueVec; + this.V3L *= valueVec; + this.V3R *= valueVec; + this.V4L *= valueVec; + this.V4R *= valueVec; + this.V5L *= valueVec; + this.V5R *= valueVec; + this.V6L *= valueVec; + this.V6R *= valueVec; + this.V7L *= valueVec; + this.V7R *= valueVec; + } } /// /// Multiply all elements of the block by the corresponding elements of 'other'. /// [MethodImpl(InliningOptions.ShortMethod)] - public void MultiplyInplace(ref Block8x8F other) - { - this.V0L *= other.V0L; - this.V0R *= other.V0R; - this.V1L *= other.V1L; - this.V1R *= other.V1R; - this.V2L *= other.V2L; - this.V2R *= other.V2R; - this.V3L *= other.V3L; - this.V3R *= other.V3R; - this.V4L *= other.V4L; - this.V4R *= other.V4R; - this.V5L *= other.V5L; - this.V5R *= other.V5R; - this.V6L *= other.V6L; - this.V6R *= other.V6R; - this.V7L *= other.V7L; - this.V7R *= other.V7R; + public unsafe void MultiplyInPlace(ref Block8x8F other) + { +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx.IsSupported) + { + Unsafe.As>(ref this.V0L) + = Avx.Multiply( + Unsafe.As>(ref this.V0L), + Unsafe.As>(ref other.V0L)); + + Unsafe.As>(ref this.V1L) + = Avx.Multiply( + Unsafe.As>(ref this.V1L), + Unsafe.As>(ref other.V1L)); + + Unsafe.As>(ref this.V2L) + = Avx.Multiply( + Unsafe.As>(ref this.V2L), + Unsafe.As>(ref other.V2L)); + + Unsafe.As>(ref this.V3L) + = Avx.Multiply( + Unsafe.As>(ref this.V3L), + Unsafe.As>(ref other.V3L)); + + Unsafe.As>(ref this.V4L) + = Avx.Multiply( + Unsafe.As>(ref this.V4L), + Unsafe.As>(ref other.V4L)); + + Unsafe.As>(ref this.V5L) + = Avx.Multiply( + Unsafe.As>(ref this.V5L), + Unsafe.As>(ref other.V5L)); + + Unsafe.As>(ref this.V6L) + = Avx.Multiply( + Unsafe.As>(ref this.V6L), + Unsafe.As>(ref other.V6L)); + + Unsafe.As>(ref this.V7L) + = Avx.Multiply( + Unsafe.As>(ref this.V7L), + Unsafe.As>(ref other.V7L)); + } + else +#endif + { + this.V0L *= other.V0L; + this.V0R *= other.V0R; + this.V1L *= other.V1L; + this.V1R *= other.V1R; + this.V2L *= other.V2L; + this.V2R *= other.V2R; + this.V3L *= other.V3L; + this.V3R *= other.V3R; + this.V4L *= other.V4L; + this.V4R *= other.V4R; + this.V5L *= other.V5L; + this.V5R *= other.V5R; + this.V6L *= other.V6L; + this.V6R *= other.V6R; + this.V7L *= other.V7L; + this.V7R *= other.V7R; + } } /// /// Adds a vector to all elements of the block. /// - /// The added vector + /// The added vector. [MethodImpl(InliningOptions.ShortMethod)] - public void AddToAllInplace(Vector4 diff) - { - this.V0L += diff; - this.V0R += diff; - this.V1L += diff; - this.V1R += diff; - this.V2L += diff; - this.V2R += diff; - this.V3L += diff; - this.V3R += diff; - this.V4L += diff; - this.V4R += diff; - this.V5L += diff; - this.V5R += diff; - this.V6L += diff; - this.V6R += diff; - this.V7L += diff; - this.V7R += diff; + public void AddInPlace(float value) + { +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx.IsSupported) + { + var valueVec = Vector256.Create(value); + Unsafe.As>(ref this.V0L) = Avx.Add(Unsafe.As>(ref this.V0L), valueVec); + Unsafe.As>(ref this.V1L) = Avx.Add(Unsafe.As>(ref this.V1L), valueVec); + Unsafe.As>(ref this.V2L) = Avx.Add(Unsafe.As>(ref this.V2L), valueVec); + Unsafe.As>(ref this.V3L) = Avx.Add(Unsafe.As>(ref this.V3L), valueVec); + Unsafe.As>(ref this.V4L) = Avx.Add(Unsafe.As>(ref this.V4L), valueVec); + Unsafe.As>(ref this.V5L) = Avx.Add(Unsafe.As>(ref this.V5L), valueVec); + Unsafe.As>(ref this.V6L) = Avx.Add(Unsafe.As>(ref this.V6L), valueVec); + Unsafe.As>(ref this.V7L) = Avx.Add(Unsafe.As>(ref this.V7L), valueVec); + } + else +#endif + { + var valueVec = new Vector4(value); + this.V0L += valueVec; + this.V0R += valueVec; + this.V1L += valueVec; + this.V1R += valueVec; + this.V2L += valueVec; + this.V2R += valueVec; + this.V3L += valueVec; + this.V3R += valueVec; + this.V4L += valueVec; + this.V4R += valueVec; + this.V5L += valueVec; + this.V5R += valueVec; + this.V6L += valueVec; + this.V6R += valueVec; + this.V7L += valueVec; + this.V7R += valueVec; + } } /// @@ -468,23 +551,23 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components /// /// Level shift by +maximum/2, clip to [0..maximum], and round all the values in the block. /// - public void NormalizeColorsAndRoundInplace(float maximum) + public void NormalizeColorsAndRoundInPlace(float maximum) { if (SimdUtils.HasVector8) { - this.NormalizeColorsAndRoundInplaceVector8(maximum); + this.NormalizeColorsAndRoundInPlaceVector8(maximum); } else { - this.NormalizeColorsInplace(maximum); - this.RoundInplace(); + this.NormalizeColorsInPlace(maximum); + this.RoundInPlace(); } } /// /// Rounds all values in the block. /// - public void RoundInplace() + public void RoundInPlace() { for (int i = 0; i < Size; i++) { diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs index 40683e25a..e0311dafe 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs @@ -81,14 +81,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder b.LoadFrom(ref sourceBlock); // Dequantize: - b.MultiplyInplace(ref this.DequantiazationTable); + b.MultiplyInPlace(ref this.DequantiazationTable); FastFloatingPointDCT.TransformIDCT(ref b, ref this.WorkspaceBlock1, ref this.WorkspaceBlock2); // To conform better to libjpeg we actually NEED TO loose precision here. // This is because they store blocks as Int16 between all the operations. // To be "more accurate", we need to emulate this by rounding! - this.WorkspaceBlock1.NormalizeColorsAndRoundInplace(maximumValue); + this.WorkspaceBlock1.NormalizeColorsAndRoundInPlace(maximumValue); this.WorkspaceBlock1.ScaledCopyTo( ref destAreaOrigin, diff --git a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs index d0b373609..a6d0622dd 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs @@ -61,7 +61,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components IDCT8x4_RightPart(ref temp, ref dest); // TODO: What if we leave the blocks in a scaled-by-x8 state until final color packing? - dest.MultiplyInplace(C_0_125); + dest.MultiplyInPlace(C_0_125); } /// @@ -324,7 +324,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components src.TransposeInto(ref temp); if (offsetSourceByNeg128) { - temp.AddToAllInplace(new Vector4(-128)); + temp.AddInPlace(-128F); } FDCT8x4_LeftPart(ref temp, ref dest); @@ -335,7 +335,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components FDCT8x4_LeftPart(ref temp, ref dest); FDCT8x4_RightPart(ref temp, ref dest); - dest.MultiplyInplace(C_0_125); + dest.MultiplyInPlace(C_0_125); } } } diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_AddInPlace.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_AddInPlace.cs new file mode 100644 index 000000000..61fb2745b --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_AddInPlace.cs @@ -0,0 +1,21 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Formats.Jpeg.Components; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class Block8x8F_AddInPlace + { + [Benchmark] + public float AddInplace() + { + float f = 42F; + Block8x8F b = default; + b.AddInPlace(f); + return f; + } + } +} diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_MultiplyInPlaceBlock.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_MultiplyInPlaceBlock.cs new file mode 100644 index 000000000..0d1e67112 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_MultiplyInPlaceBlock.cs @@ -0,0 +1,37 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Formats.Jpeg.Components; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class Block8x8F_MultiplyInPlaceBlock + { + private static readonly Block8x8F Source = Create8x8FloatData(); + + [Benchmark] + public void MultiplyInPlaceBlock() + { + Block8x8F dest = default; + Source.MultiplyInPlace(ref dest); + } + + private static Block8x8F Create8x8FloatData() + { + var result = new float[64]; + for (int i = 0; i < 8; i++) + { + for (int j = 0; j < 8; j++) + { + result[(i * 8) + j] = (i * 10) + j; + } + } + + var source = default(Block8x8F); + source.LoadFrom(result); + return source; + } + } +} diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_MultiplyInPlaceScalar.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_MultiplyInPlaceScalar.cs new file mode 100644 index 000000000..31a6ca713 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_MultiplyInPlaceScalar.cs @@ -0,0 +1,21 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Formats.Jpeg.Components; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class Block8x8F_MultiplyInPlaceScalar + { + [Benchmark] + public float MultiplyInPlaceScalar() + { + float f = 42F; + Block8x8F b = default; + b.MultiplyInPlace(f); + return f; + } + } +} diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs index 548238088..927d7c252 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs @@ -5,6 +5,7 @@ // #define BENCHMARKING using System; using System.Diagnostics; + using SixLabors.ImageSharp.Formats.Jpeg.Components; using SixLabors.ImageSharp.Tests.Formats.Jpg.Utils; using SixLabors.ImageSharp.Tests.TestUtilities; @@ -44,20 +45,20 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg this.Measure( Times, () => + { + var block = default(Block8x8F); + + for (int i = 0; i < Block8x8F.Size; i++) { - var block = default(Block8x8F); - - for (int i = 0; i < Block8x8F.Size; i++) - { - block[i] = i; - } - - sum = 0; - for (int i = 0; i < Block8x8F.Size; i++) - { - sum += block[i]; - } - }); + block[i] = i; + } + + sum = 0; + for (int i = 0; i < Block8x8F.Size; i++) + { + sum += block[i]; + } + }); Assert.Equal(sum, 64f * 63f * 0.5f); } @@ -69,20 +70,20 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg this.Measure( Times, () => + { + // Block8x8F block = new Block8x8F(); + float[] block = new float[64]; + for (int i = 0; i < Block8x8F.Size; i++) { - // Block8x8F block = new Block8x8F(); - float[] block = new float[64]; - for (int i = 0; i < Block8x8F.Size; i++) - { - block[i] = i; - } - - sum = 0; - for (int i = 0; i < Block8x8F.Size; i++) - { - sum += block[i]; - } - }); + block[i] = i; + } + + sum = 0; + for (int i = 0; i < Block8x8F.Size; i++) + { + sum += block[i]; + } + }); Assert.Equal(sum, 64f * 63f * 0.5f); } @@ -100,11 +101,11 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg this.Measure( Times, () => - { - var b = default(Block8x8F); - b.LoadFrom(data); - b.ScaledCopyTo(mirror); - }); + { + var b = default(Block8x8F); + b.LoadFrom(data); + b.ScaledCopyTo(mirror); + }); Assert.Equal(data, mirror); @@ -125,11 +126,11 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg this.Measure( Times, () => - { - var b = default(Block8x8F); - Block8x8F.LoadFrom(&b, data); - Block8x8F.ScaledCopyTo(&b, mirror); - }); + { + var b = default(Block8x8F); + Block8x8F.LoadFrom(&b, data); + Block8x8F.ScaledCopyTo(&b, mirror); + }); Assert.Equal(data, mirror); @@ -150,11 +151,11 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg this.Measure( Times, () => - { - var v = default(Block8x8F); - v.LoadFrom(data); - v.ScaledCopyTo(mirror); - }); + { + var v = default(Block8x8F); + v.LoadFrom(data); + v.ScaledCopyTo(mirror); + }); Assert.Equal(data, mirror); @@ -234,7 +235,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg this.PrintLinearData(input); Block8x8F dest = block; - dest.NormalizeColorsInplace(255); + dest.NormalizeColorsInPlace(255); float[] array = new float[64]; dest.ScaledCopyTo(array); @@ -259,11 +260,11 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg Block8x8F source = CreateRandomFloatBlock(-200, 200, seed); Block8x8F expected = source; - expected.NormalizeColorsInplace(255); - expected.RoundInplace(); + expected.NormalizeColorsInPlace(255); + expected.RoundInPlace(); Block8x8F actual = source; - actual.NormalizeColorsAndRoundInplaceVector8(255); + actual.NormalizeColorsAndRoundInPlaceVector8(255); this.Output.WriteLine(expected.ToString()); this.Output.WriteLine(actual.ToString()); @@ -324,12 +325,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg [InlineData(1)] [InlineData(2)] [InlineData(3)] - public void RoundInplaceSlow(int seed) + public void RoundInPlaceSlow(int seed) { Block8x8F s = CreateRandomFloatBlock(-500, 500, seed); Block8x8F d = s; - d.RoundInplace(); + d.RoundInPlace(); this.Output.WriteLine(s.ToString()); this.Output.WriteLine(d.ToString()); @@ -344,19 +345,26 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg } [Fact] - public void MultiplyInplace_ByOtherBlock() + public void MultiplyInPlace_ByOtherBlock() { - Block8x8F original = CreateRandomFloatBlock(-500, 500, 42); - Block8x8F m = CreateRandomFloatBlock(-500, 500, 42); + static void RunTest() + { + Block8x8F original = CreateRandomFloatBlock(-500, 500, 42); + Block8x8F m = CreateRandomFloatBlock(-500, 500, 42); - Block8x8F actual = original; + Block8x8F actual = original; - actual.MultiplyInplace(ref m); + actual.MultiplyInPlace(ref m); - for (int i = 0; i < Block8x8F.Size; i++) - { - Assert.Equal(original[i] * m[i], actual[i]); + for (int i = 0; i < Block8x8F.Size; i++) + { + Assert.Equal(original[i] * m[i], actual[i]); + } } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX); } [Theory] @@ -396,23 +404,51 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg ReferenceImplementations.DequantizeBlock(&expected, &qt, unzig.Data); - actual.MultiplyInplace(ref zigQt); + actual.MultiplyInPlace(ref zigQt); this.CompareBlocks(expected, actual, 0); } [Fact] - public void MultiplyInplace_ByScalar() + public void AddToAllInPlace() { - Block8x8F original = CreateRandomFloatBlock(-500, 500); + static void RunTest() + { + Block8x8F original = CreateRandomFloatBlock(-500, 500); - Block8x8F actual = original; - actual.MultiplyInplace(42f); + Block8x8F actual = original; + actual.AddInPlace(42f); - for (int i = 0; i < 64; i++) + for (int i = 0; i < 64; i++) + { + Assert.Equal(original[i] + 42f, actual[i]); + } + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX); + } + + [Fact] + public void MultiplyInPlace_ByScalar() + { + static void RunTest() { - Assert.Equal(original[i] * 42f, actual[i]); + Block8x8F original = CreateRandomFloatBlock(-500, 500); + + Block8x8F actual = original; + actual.MultiplyInPlace(42f); + + for (int i = 0; i < 64; i++) + { + Assert.Equal(original[i] * 42f, actual[i]); + } } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX); } [Fact] From 5fadafe1689a2715b0f25458590c8a3ad743ddc7 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 16 Oct 2020 18:22:39 +0100 Subject: [PATCH 061/131] Fix warning --- .../Formats/Jpg/JpegDecoderTests.Progressive.cs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs index 98421ca5d..cc23a45fc 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs @@ -54,8 +54,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg RemoteExecutor.Invoke( RunTest, providerDump, - "Disco") - .Dispose(); + "Disco").Dispose(); } } } From 102876b60d417a1513eecbaf0d4c50f0cb856a7c Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 16 Oct 2020 22:18:00 +0100 Subject: [PATCH 062/131] Disable NetNative optimization on UWP. Fix #1204 --- ...lHistogramEqualizationProcessor{TPixel}.cs | 25 ++++++++++++++++--- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs index 19514c4b6..274376671 100644 --- a/src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs @@ -106,15 +106,23 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization } /// +#if NETSTANDARD2_0 + // https://github.com/SixLabors/ImageSharp/issues/1204 + [MethodImpl(MethodImplOptions.NoOptimization)] +#else [MethodImpl(InliningOptions.ShortMethod)] +#endif public void Invoke(int y) { ref int histogramBase = ref MemoryMarshal.GetReference(this.histogramBuffer.GetSpan()); ref TPixel pixelBase = ref MemoryMarshal.GetReference(this.source.GetPixelRowSpan(y)); + int levels = this.luminanceLevels; for (int x = 0; x < this.bounds.Width; x++) { - int luminance = GetLuminance(Unsafe.Add(ref pixelBase, x), this.luminanceLevels); + // TODO: We should bulk convert here. + var vector = Unsafe.Add(ref pixelBase, x).ToVector4(); + int luminance = ImageMaths.GetBT709Luminance(ref vector, levels); Unsafe.Add(ref histogramBase, luminance)++; } } @@ -147,18 +155,27 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization } /// +#if NETSTANDARD2_0 + // https://github.com/SixLabors/ImageSharp/issues/1204 + [MethodImpl(MethodImplOptions.NoOptimization)] +#else [MethodImpl(InliningOptions.ShortMethod)] +#endif public void Invoke(int y) { ref int cdfBase = ref MemoryMarshal.GetReference(this.cdfBuffer.GetSpan()); ref TPixel pixelBase = ref MemoryMarshal.GetReference(this.source.GetPixelRowSpan(y)); + int levels = this.luminanceLevels; + float noOfPixelsMinusCdfMin = this.numberOfPixelsMinusCdfMin; for (int x = 0; x < this.bounds.Width; x++) { + // TODO: We should bulk convert here. ref TPixel pixel = ref Unsafe.Add(ref pixelBase, x); - int luminance = GetLuminance(pixel, this.luminanceLevels); - float luminanceEqualized = Unsafe.Add(ref cdfBase, luminance) / this.numberOfPixelsMinusCdfMin; - pixel.FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, pixel.ToVector4().W)); + var vector = pixel.ToVector4(); + int luminance = ImageMaths.GetBT709Luminance(ref vector, levels); + float luminanceEqualized = Unsafe.Add(ref cdfBase, luminance) / noOfPixelsMinusCdfMin; + pixel.FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, vector.W)); } } } From abe1263b17e2c347d0d436d1176fe2d89defb0e2 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Tue, 20 Oct 2020 18:42:30 +0100 Subject: [PATCH 063/131] Auto repair Png options to use Bit8. Fixes #935 --- .../Formats/Png/PngEncoderOptionsHelpers.cs | 22 +++++++++---------- .../Formats/Png/PngEncoderTests.cs | 11 ++++++++++ tests/ImageSharp.Tests/TestImages.cs | 3 +++ tests/Images/Input/Png/issues/Issue_935.png | 3 +++ 4 files changed, 27 insertions(+), 12 deletions(-) create mode 100644 tests/Images/Input/Png/issues/Issue_935.png diff --git a/src/ImageSharp/Formats/Png/PngEncoderOptionsHelpers.cs b/src/ImageSharp/Formats/Png/PngEncoderOptionsHelpers.cs index b0311f088..9342e09df 100644 --- a/src/ImageSharp/Formats/Png/PngEncoderOptionsHelpers.cs +++ b/src/ImageSharp/Formats/Png/PngEncoderOptionsHelpers.cs @@ -35,6 +35,15 @@ namespace SixLabors.ImageSharp.Formats.Png options.ColorType ??= pngMetadata.ColorType ?? SuggestColorType(); options.BitDepth ??= pngMetadata.BitDepth ?? SuggestBitDepth(); + // Ensure bit depth and color type are a supported combination. + // Bit8 is the only bit depth supported by all color types. + byte bits = (byte)options.BitDepth; + byte[] validBitDepths = PngConstants.ColorTypes[options.ColorType.Value]; + if (Array.IndexOf(validBitDepths, bits) == -1) + { + options.BitDepth = PngBitDepth.Bit8; + } + options.InterlaceMethod ??= pngMetadata.InterlaceMethod; use16Bit = options.BitDepth == PngBitDepth.Bit16; @@ -44,12 +53,6 @@ namespace SixLabors.ImageSharp.Formats.Png { options.ChunkFilter = PngChunkFilter.ExcludeAll; } - - // Ensure we are not allowing impossible combinations. - if (!PngConstants.ColorTypes.ContainsKey(options.ColorType.Value)) - { - throw new NotSupportedException("Color type is not supported or not valid."); - } } /// @@ -68,15 +71,10 @@ namespace SixLabors.ImageSharp.Formats.Png return null; } - byte bits = (byte)options.BitDepth; - if (Array.IndexOf(PngConstants.ColorTypes[options.ColorType.Value], bits) == -1) - { - throw new NotSupportedException("Bit depth is not supported or not valid."); - } - // Use the metadata to determine what quantization depth to use if no quantizer has been set. if (options.Quantizer is null) { + byte bits = (byte)options.BitDepth; var maxColors = ImageMaths.GetColorCountForBitDepth(bits); options.Quantizer = new WuQuantizer(new QuantizerOptions { MaxColors = maxColors }); } diff --git a/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs index 9ba956d72..465bed8a1 100644 --- a/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs @@ -556,6 +556,17 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png provider); } + [Fact] + public void EncodeFixesInvalidOptions() + { + // https://github.com/SixLabors/ImageSharp/issues/935 + using var ms = new MemoryStream(); + var testFile = TestFile.Create(TestImages.Png.Issue935); + using Image image = testFile.CreateRgba32Image(new PngDecoder()); + + image.Save(ms, new PngEncoder { ColorType = PngColorType.RgbWithAlpha }); + } + private static void TestPngEncoderCore( TestImageProvider provider, PngColorType pngColorType, diff --git a/tests/ImageSharp.Tests/TestImages.cs b/tests/ImageSharp.Tests/TestImages.cs index fd5296c37..dce36bb0f 100644 --- a/tests/ImageSharp.Tests/TestImages.cs +++ b/tests/ImageSharp.Tests/TestImages.cs @@ -107,6 +107,9 @@ namespace SixLabors.ImageSharp.Tests public const string Issue1177_1 = "Png/issues/Issue_1177_1.png"; public const string Issue1177_2 = "Png/issues/Issue_1177_2.png"; + // Issue 935: https://github.com/SixLabors/ImageSharp/issues/935 + public const string Issue935 = "Png/issues/Issue_935.png"; + public static class Bad { public const string MissingDataChunk = "Png/xdtn0g01.png"; diff --git a/tests/Images/Input/Png/issues/Issue_935.png b/tests/Images/Input/Png/issues/Issue_935.png new file mode 100644 index 000000000..9f9e84dc3 --- /dev/null +++ b/tests/Images/Input/Png/issues/Issue_935.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a9c5cdacc9bedf481c883828de5bfb7902e2bec038fff08830171cf7075e4f9 +size 870 From f493aa4efa5ed84f1cd2c659e19bb5fa9177d82b Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 21 Oct 2020 00:00:46 +0100 Subject: [PATCH 064/131] Implement SimdUtils.HwIntrisics --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 276 ++++++++++++++++++ src/ImageSharp/Common/Helpers/SimdUtils.cs | 7 +- .../Color/Bulk/FromVector4.cs | 4 +- .../ImageSharp.Tests/Common/SimdUtilsTests.cs | 20 +- 4 files changed, 295 insertions(+), 12 deletions(-) create mode 100644 src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs new file mode 100644 index 000000000..2fe2f99ac --- /dev/null +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -0,0 +1,276 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +#if SUPPORTS_RUNTIME_INTRINSICS +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; + +namespace SixLabors.ImageSharp +{ + internal static partial class SimdUtils + { + public static class HwIntrinsics + { + private static ReadOnlySpan PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 }; + + /// + /// as many elements as possible, slicing them down (keeping the remainder). + /// + [MethodImpl(InliningOptions.ShortMethod)] + internal static void ByteToNormalizedFloatReduce( + ref ReadOnlySpan source, + ref Span dest) + { + DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); + + if (Avx2.IsSupported || Sse2.IsSupported) + { + int remainder; + if (Avx2.IsSupported) + { + remainder = ImageMaths.ModuloP2(source.Length, Vector256.Count); + } + else + { + remainder = ImageMaths.ModuloP2(source.Length, Vector128.Count); + } + + int adjustedCount = source.Length - remainder; + + if (adjustedCount > 0) + { + ByteToNormalizedFloat(source.Slice(0, adjustedCount), dest.Slice(0, adjustedCount)); + + source = source.Slice(adjustedCount); + dest = dest.Slice(adjustedCount); + } + } + } + + /// + /// Implementation , which is faster on new RyuJIT runtime. + /// + /// + /// Implementation is based on MagicScaler code: + /// https://github.com/saucecontrol/PhotoSauce/blob/b5811908041200488aa18fdfd17df5fc457415dc/src/MagicScaler/Magic/Processors/ConvertersFloat.cs#L80-L182 + /// + internal static unsafe void ByteToNormalizedFloat( + ReadOnlySpan source, + Span dest) + { + if (Avx2.IsSupported) + { + VerifySpanInput(source, dest, Vector256.Count); + + int n = dest.Length / Vector256.Count; + + byte* sourceBase = (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(source)); + + ref Vector256 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + var scale = Vector256.Create(1 / (float)byte.MaxValue); + + for (int i = 0; i < n; i++) + { + int si = Vector256.Count * i; + Vector256 i0 = Avx2.ConvertToVector256Int32(sourceBase + si); + Vector256 i1 = Avx2.ConvertToVector256Int32(sourceBase + si + Vector256.Count); + Vector256 i2 = Avx2.ConvertToVector256Int32(sourceBase + si + (Vector256.Count * 2)); + Vector256 i3 = Avx2.ConvertToVector256Int32(sourceBase + si + (Vector256.Count * 3)); + + Vector256 f0 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i0)); + Vector256 f1 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i1)); + Vector256 f2 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i2)); + Vector256 f3 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i3)); + + ref Vector256 d = ref Unsafe.Add(ref destBase, i * 4); + + d = f0; + Unsafe.Add(ref d, 1) = f1; + Unsafe.Add(ref d, 2) = f2; + Unsafe.Add(ref d, 3) = f3; + } + } + else + { + // Sse + VerifySpanInput(source, dest, Vector128.Count); + + int n = dest.Length / Vector128.Count; + + byte* sourceBase = (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(source)); + + ref Vector128 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + var scale = Vector128.Create(1 / (float)byte.MaxValue); + Vector128 zero = Vector128.Zero; + + for (int i = 0; i < n; i++) + { + int si = Vector128.Count * i; + + Vector128 i0, i1, i2, i3; + if (Sse41.IsSupported) + { + i0 = Sse41.ConvertToVector128Int32(sourceBase + si); + i1 = Sse41.ConvertToVector128Int32(sourceBase + si + Vector128.Count); + i2 = Sse41.ConvertToVector128Int32(sourceBase + si + (Vector128.Count * 2)); + i3 = Sse41.ConvertToVector128Int32(sourceBase + si + (Vector128.Count * 3)); + } + else + { + Vector128 b = Sse2.LoadVector128(sourceBase + si); + Vector128 s0 = Sse2.UnpackLow(b, zero).AsInt16(); + Vector128 s1 = Sse2.UnpackHigh(b, zero).AsInt16(); + + i0 = Sse2.UnpackLow(s0, zero.AsInt16()).AsInt32(); + i1 = Sse2.UnpackHigh(s0, zero.AsInt16()).AsInt32(); + i2 = Sse2.UnpackLow(s1, zero.AsInt16()).AsInt32(); + i3 = Sse2.UnpackHigh(s1, zero.AsInt16()).AsInt32(); + } + + Vector128 f0 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i0)); + Vector128 f1 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i1)); + Vector128 f2 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i2)); + Vector128 f3 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i3)); + + ref Vector128 d = ref Unsafe.Add(ref destBase, i * 4); + + d = f0; + Unsafe.Add(ref d, 1) = f1; + Unsafe.Add(ref d, 2) = f2; + Unsafe.Add(ref d, 3) = f3; + } + } + } + + /// + /// as many elements as possible, slicing them down (keeping the remainder). + /// + [MethodImpl(InliningOptions.ShortMethod)] + internal static void NormalizedFloatToByteSaturateReduce( + ref ReadOnlySpan source, + ref Span dest) + { + DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); + + if (Avx2.IsSupported || Sse2.IsSupported) + { + int remainder; + if (Avx2.IsSupported) + { + remainder = ImageMaths.ModuloP2(source.Length, Vector256.Count); + } + else + { + remainder = ImageMaths.ModuloP2(source.Length, Vector128.Count); + } + + int adjustedCount = source.Length - remainder; + + if (adjustedCount > 0) + { + NormalizedFloatToByteSaturate( + source.Slice(0, adjustedCount), + dest.Slice(0, adjustedCount)); + + source = source.Slice(adjustedCount); + dest = dest.Slice(adjustedCount); + } + } + } + + /// + /// Implementation of , which is faster on new .NET runtime. + /// + /// + /// Implementation is based on MagicScaler code: + /// https://github.com/saucecontrol/PhotoSauce/blob/b5811908041200488aa18fdfd17df5fc457415dc/src/MagicScaler/Magic/Processors/ConvertersFloat.cs#L541-L622 + /// + internal static void NormalizedFloatToByteSaturate( + ReadOnlySpan source, + Span dest) + { + if (Avx2.IsSupported) + { + VerifySpanInput(source, dest, Vector256.Count); + + int n = dest.Length / Vector256.Count; + + ref Vector256 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector256 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + var scale = Vector256.Create((float)byte.MaxValue); + ref byte maskBase = ref MemoryMarshal.GetReference(PermuteMaskDeinterleave8x32); + Vector256 mask = Unsafe.As>(ref maskBase); + + for (int i = 0; i < n; i++) + { + ref Vector256 s = ref Unsafe.Add(ref sourceBase, i * 4); + + Vector256 f0 = Avx.Multiply(scale, s); + Vector256 f1 = Avx.Multiply(scale, Unsafe.Add(ref s, 1)); + Vector256 f2 = Avx.Multiply(scale, Unsafe.Add(ref s, 2)); + Vector256 f3 = Avx.Multiply(scale, Unsafe.Add(ref s, 3)); + + Vector256 w0 = Avx.ConvertToVector256Int32(f0); + Vector256 w1 = Avx.ConvertToVector256Int32(f1); + Vector256 w2 = Avx.ConvertToVector256Int32(f2); + Vector256 w3 = Avx.ConvertToVector256Int32(f3); + + Vector256 u0 = Avx2.PackSignedSaturate(w0, w1); + Vector256 u1 = Avx2.PackSignedSaturate(w2, w3); + Vector256 b = Avx2.PackUnsignedSaturate(u0, u1); + b = Avx2.PermuteVar8x32(b.AsInt32(), mask).AsByte(); + + Unsafe.Add(ref destBase, i) = b; + } + } + else + { + // Sse + VerifySpanInput(source, dest, Vector128.Count); + + int n = dest.Length / Vector128.Count; + + ref Vector128 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector128 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + var scale = Vector128.Create((float)byte.MaxValue); + + for (int i = 0; i < n; i++) + { + ref Vector128 s = ref Unsafe.Add(ref sourceBase, i * 4); + + Vector128 f0 = Sse.Multiply(scale, s); + Vector128 f1 = Sse.Multiply(scale, Unsafe.Add(ref s, 1)); + Vector128 f2 = Sse.Multiply(scale, Unsafe.Add(ref s, 2)); + Vector128 f3 = Sse.Multiply(scale, Unsafe.Add(ref s, 3)); + + Vector128 w0 = Sse2.ConvertToVector128Int32(f0); + Vector128 w1 = Sse2.ConvertToVector128Int32(f1); + Vector128 w2 = Sse2.ConvertToVector128Int32(f2); + Vector128 w3 = Sse2.ConvertToVector128Int32(f3); + + Vector128 u0 = Sse2.PackSignedSaturate(w0, w1); + Vector128 u1 = Sse2.PackSignedSaturate(w2, w3); + + Unsafe.Add(ref destBase, i) = Sse2.PackUnsignedSaturate(u0, u1); + } + } + } + } + } +} +#endif diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.cs b/src/ImageSharp/Common/Helpers/SimdUtils.cs index 7f917648d..df533cedf 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.cs @@ -79,8 +79,9 @@ namespace SixLabors.ImageSharp internal static void ByteToNormalizedFloat(ReadOnlySpan source, Span dest) { DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); - -#if SUPPORTS_EXTENDED_INTRINSICS +#if SUPPORTS_RUNTIME_INTRINSICS + HwIntrinsics.ByteToNormalizedFloatReduce(ref source, ref dest); +#elif SUPPORTS_EXTENDED_INTRINSICS ExtendedIntrinsics.ByteToNormalizedFloatReduce(ref source, ref dest); #else BasicIntrinsics256.ByteToNormalizedFloatReduce(ref source, ref dest); @@ -110,7 +111,7 @@ namespace SixLabors.ImageSharp DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); #if SUPPORTS_RUNTIME_INTRINSICS - Avx2Intrinsics.NormalizedFloatToByteSaturateReduce(ref source, ref dest); + HwIntrinsics.NormalizedFloatToByteSaturateReduce(ref source, ref dest); #elif SUPPORTS_EXTENDED_INTRINSICS ExtendedIntrinsics.NormalizedFloatToByteSaturateReduce(ref source, ref dest); #else diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs index da15da24c..267bca4ad 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs @@ -104,12 +104,12 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk #if SUPPORTS_RUNTIME_INTRINSICS [Benchmark] - public void UseAvx2() + public void UseHwIntrinsics() { Span sBytes = MemoryMarshal.Cast(this.source.GetSpan()); Span dFloats = MemoryMarshal.Cast(this.destination.GetSpan()); - SimdUtils.Avx2Intrinsics.NormalizedFloatToByteSaturate(sBytes, dFloats); + SimdUtils.HwIntrinsics.NormalizedFloatToByteSaturate(sBytes, dFloats); } private static ReadOnlySpan PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 }; diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs index 6dce48935..eca4e72cb 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs @@ -204,6 +204,17 @@ namespace SixLabors.ImageSharp.Tests.Common (s, d) => SimdUtils.ExtendedIntrinsics.ByteToNormalizedFloat(s.Span, d.Span)); } +#if SUPPORTS_RUNTIME_INTRINSICS + [Theory] + [MemberData(nameof(ArraySizesDivisibleBy32))] + public void HwIntrinsics_BulkConvertByteToNormalizedFloat(int count) + { + TestImpl_BulkConvertByteToNormalizedFloat( + count, + (s, d) => SimdUtils.HwIntrinsics.ByteToNormalizedFloat(s.Span, d.Span)); + } +#endif + [Theory] [MemberData(nameof(ArbitraryArraySizes))] public void BulkConvertByteToNormalizedFloat(int count) @@ -281,16 +292,11 @@ namespace SixLabors.ImageSharp.Tests.Common [Theory] [MemberData(nameof(ArraySizesDivisibleBy32))] - public void Avx2_BulkConvertNormalizedFloatToByteClampOverflows(int count) + public void HwIntrinsics_BulkConvertNormalizedFloatToByteClampOverflows(int count) { - if (!System.Runtime.Intrinsics.X86.Avx2.IsSupported) - { - return; - } - TestImpl_BulkConvertNormalizedFloatToByteClampOverflows( count, - (s, d) => SimdUtils.Avx2Intrinsics.NormalizedFloatToByteSaturate(s.Span, d.Span)); + (s, d) => SimdUtils.HwIntrinsics.NormalizedFloatToByteSaturate(s.Span, d.Span)); } #endif From 8e993394b44eb7a436311aa88c541fd30144cbdd Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 21 Oct 2020 14:49:05 +0100 Subject: [PATCH 065/131] Benchmarks, tests, and cleanup. --- .../Helpers/SimdUtils.Avx2Intrinsics.cs | 103 ------------------ .../Color/Bulk/FromVector4.cs | 4 +- .../Color/Bulk/ToVector4_Rgba32.cs | 13 ++- .../ImageSharp.Tests/Common/SimdUtilsTests.cs | 30 +++-- .../FeatureTesting/FeatureTestRunner.cs | 50 +++++++++ 5 files changed, 86 insertions(+), 114 deletions(-) delete mode 100644 src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs deleted file mode 100644 index b56c92dab..000000000 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -#if SUPPORTS_RUNTIME_INTRINSICS - -using System; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; - -namespace SixLabors.ImageSharp -{ - internal static partial class SimdUtils - { - public static class Avx2Intrinsics - { - private static ReadOnlySpan PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 }; - - /// - /// as many elements as possible, slicing them down (keeping the remainder). - /// - [MethodImpl(InliningOptions.ShortMethod)] - internal static void NormalizedFloatToByteSaturateReduce( - ref ReadOnlySpan source, - ref Span dest) - { - DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); - - if (Avx2.IsSupported) - { - int remainder = ImageMaths.ModuloP2(source.Length, Vector.Count); - int adjustedCount = source.Length - remainder; - - if (adjustedCount > 0) - { - NormalizedFloatToByteSaturate( - source.Slice(0, adjustedCount), - dest.Slice(0, adjustedCount)); - - source = source.Slice(adjustedCount); - dest = dest.Slice(adjustedCount); - } - } - } - - /// - /// Implementation of , which is faster on new .NET runtime. - /// - /// - /// Implementation is based on MagicScaler code: - /// https://github.com/saucecontrol/PhotoSauce/blob/a9bd6e5162d2160419f0cf743fd4f536c079170b/src/MagicScaler/Magic/Processors/ConvertersFloat.cs#L453-L477 - /// - internal static void NormalizedFloatToByteSaturate( - ReadOnlySpan source, - Span dest) - { - VerifySpanInput(source, dest, Vector256.Count); - - int n = dest.Length / Vector256.Count; - - ref Vector256 sourceBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); - ref Vector256 destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); - - var maxBytes = Vector256.Create(255f); - ref byte maskBase = ref MemoryMarshal.GetReference(PermuteMaskDeinterleave8x32); - Vector256 mask = Unsafe.As>(ref maskBase); - - for (int i = 0; i < n; i++) - { - ref Vector256 s = ref Unsafe.Add(ref sourceBase, i * 4); - - Vector256 f0 = s; - Vector256 f1 = Unsafe.Add(ref s, 1); - Vector256 f2 = Unsafe.Add(ref s, 2); - Vector256 f3 = Unsafe.Add(ref s, 3); - - Vector256 w0 = ConvertToInt32(f0, maxBytes); - Vector256 w1 = ConvertToInt32(f1, maxBytes); - Vector256 w2 = ConvertToInt32(f2, maxBytes); - Vector256 w3 = ConvertToInt32(f3, maxBytes); - - Vector256 u0 = Avx2.PackSignedSaturate(w0, w1); - Vector256 u1 = Avx2.PackSignedSaturate(w2, w3); - Vector256 b = Avx2.PackUnsignedSaturate(u0, u1); - b = Avx2.PermuteVar8x32(b.AsInt32(), mask).AsByte(); - - Unsafe.Add(ref destBase, i) = b; - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector256 ConvertToInt32(Vector256 vf, Vector256 scale) - { - vf = Avx.Multiply(vf, scale); - return Avx.ConvertToVector256Int32(vf); - } - } - } -} -#endif diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs index 267bca4ad..dc030e07a 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs @@ -13,15 +13,13 @@ using System.Runtime.Intrinsics.X86; #endif using BenchmarkDotNet.Attributes; -using BenchmarkDotNet.Environments; -using BenchmarkDotNet.Jobs; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; // ReSharper disable InconsistentNaming namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk { - [Config(typeof(Config.ShortClr))] + [Config(typeof(Config.ShortCore31))] public abstract class FromVector4 where TPixel : unmanaged, IPixel { diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgba32.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgba32.cs index 145bf9889..9ae3b073d 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgba32.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgba32.cs @@ -13,7 +13,7 @@ using SixLabors.ImageSharp.PixelFormats; namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk { - [Config(typeof(Config.ShortClr))] + [Config(typeof(Config.ShortCore31))] public class ToVector4_Rgba32 : ToVector4 { [Benchmark] @@ -52,6 +52,17 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk SimdUtils.ExtendedIntrinsics.ByteToNormalizedFloat(sBytes, dFloats); } +#if SUPPORTS_RUNTIME_INTRINSICS + [Benchmark] + public void HwIntrinsics() + { + Span sBytes = MemoryMarshal.Cast(this.source.GetSpan()); + Span dFloats = MemoryMarshal.Cast(this.destination.GetSpan()); + + SimdUtils.HwIntrinsics.ByteToNormalizedFloat(sBytes, dFloats); + } +#endif + // [Benchmark] public void ExtendedIntrinsics_BulkConvertByteToNormalizedFloat_2Loops() { diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs index eca4e72cb..838db742a 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs @@ -7,7 +7,7 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using SixLabors.ImageSharp.Common.Tuples; - +using SixLabors.ImageSharp.Tests.TestUtilities; using Xunit; using Xunit.Abstractions; @@ -209,9 +209,17 @@ namespace SixLabors.ImageSharp.Tests.Common [MemberData(nameof(ArraySizesDivisibleBy32))] public void HwIntrinsics_BulkConvertByteToNormalizedFloat(int count) { - TestImpl_BulkConvertByteToNormalizedFloat( - count, - (s, d) => SimdUtils.HwIntrinsics.ByteToNormalizedFloat(s.Span, d.Span)); + static void RunTest(string serialized) + { + TestImpl_BulkConvertByteToNormalizedFloat( + FeatureTestRunner.Deserialize(serialized), + (s, d) => SimdUtils.HwIntrinsics.ByteToNormalizedFloat(s.Span, d.Span)); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE41, + count); } #endif @@ -294,9 +302,17 @@ namespace SixLabors.ImageSharp.Tests.Common [MemberData(nameof(ArraySizesDivisibleBy32))] public void HwIntrinsics_BulkConvertNormalizedFloatToByteClampOverflows(int count) { - TestImpl_BulkConvertNormalizedFloatToByteClampOverflows( - count, - (s, d) => SimdUtils.HwIntrinsics.NormalizedFloatToByteSaturate(s.Span, d.Span)); + static void RunTest(string serialized) + { + TestImpl_BulkConvertNormalizedFloatToByteClampOverflows( + FeatureTestRunner.Deserialize(serialized), + (s, d) => SimdUtils.HwIntrinsics.NormalizedFloatToByteSaturate(s.Span, d.Span)); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2, + count); } #endif diff --git a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs index eb1714baa..fdba9ce98 100644 --- a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs +++ b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs @@ -33,6 +33,14 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities where T : IXunitSerializable => BasicSerializer.Deserialize(value); + /// + /// Allows the deserialization of integers passed to the feature test. + /// + /// The string value to deserialize. + /// The value. + public static int Deserialize(string value) + => Convert.ToInt32(value); + /// /// Runs the given test within an environment /// where the given features. @@ -201,6 +209,48 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities } } + /// + /// Runs the given test within an environment + /// where the given features. + /// + /// The test action to run. + /// The intrinsics features. + /// The value to pass as a parameter to the test action. + public static void RunWithHwIntrinsicsFeature( + Action action, + HwIntrinsics intrinsics, + int serializable) + { + if (!RemoteExecutor.IsSupported) + { + return; + } + + foreach (KeyValuePair intrinsic in intrinsics.ToFeatureKeyValueCollection()) + { + var processStartInfo = new ProcessStartInfo(); + if (intrinsic.Key != HwIntrinsics.AllowAll) + { + processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; + + RemoteExecutor.Invoke( + action, + serializable.ToString(), + new RemoteInvokeOptions + { + StartInfo = processStartInfo + }) + .Dispose(); + } + else + { + // Since we are running using the default architecture there is no + // point creating the overhead of running the action in a separate process. + action(serializable.ToString()); + } + } + } + internal static Dictionary ToFeatureKeyValueCollection(this HwIntrinsics intrinsics) { // Loop through and translate the given values into COMPlus equivaluents From aecf80388cd4f8a33d709d2ff1f359de9cfa8319 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 21 Oct 2020 17:59:57 +0100 Subject: [PATCH 066/131] Add Avx2 Span Premultiplication and Reverse --- src/ImageSharp/Common/Helpers/ImageMaths.cs | 6 ++ .../Common/Helpers/Vector4Utilities.cs | 80 ++++++++++++++++--- .../Color/Bulk/PremultiplyVector4.cs | 68 ++++++++++++++++ .../Color/Bulk/UnPremultiplyVector4.cs | 68 ++++++++++++++++ .../Helpers/ImageMathsTests.cs | 15 ++++ .../Helpers/Vector4UtilsTests.cs | 2 + 6 files changed, 229 insertions(+), 10 deletions(-) create mode 100644 tests/ImageSharp.Benchmarks/Color/Bulk/PremultiplyVector4.cs create mode 100644 tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs diff --git a/src/ImageSharp/Common/Helpers/ImageMaths.cs b/src/ImageSharp/Common/Helpers/ImageMaths.cs index 977432f8b..d24230fe1 100644 --- a/src/ImageSharp/Common/Helpers/ImageMaths.cs +++ b/src/ImageSharp/Common/Helpers/ImageMaths.cs @@ -132,6 +132,12 @@ namespace SixLabors.ImageSharp return (a / GreatestCommonDivisor(a, b)) * b; } + /// + /// Calculates % 2 + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static int Modulo2(int x) => x & 1; + /// /// Calculates % 4 /// diff --git a/src/ImageSharp/Common/Helpers/Vector4Utilities.cs b/src/ImageSharp/Common/Helpers/Vector4Utilities.cs index fccc50755..848a91791 100644 --- a/src/ImageSharp/Common/Helpers/Vector4Utilities.cs +++ b/src/ImageSharp/Common/Helpers/Vector4Utilities.cs @@ -5,6 +5,10 @@ using System; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif namespace SixLabors.ImageSharp { @@ -13,6 +17,10 @@ namespace SixLabors.ImageSharp /// internal static class Vector4Utilities { + private const int BlendAlphaControl = 0b10001000; + + private static ReadOnlySpan PermuteAlphaMask8x32 => new byte[] { 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0, 7, 0, 0, 0, 7, 0, 0, 0, 7, 0, 0, 0 }; + /// /// Restricts a vector between a minimum and a maximum value. /// 5x Faster then . @@ -56,13 +64,39 @@ namespace SixLabors.ImageSharp [MethodImpl(InliningOptions.ShortMethod)] public static void Premultiply(Span vectors) { - // TODO: This method can be AVX2 optimized using Vector - ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx2.IsSupported && vectors.Length >= 2) + { + ref Vector256 vectorsBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(vectors)); - for (int i = 0; i < vectors.Length; i++) + Vector256 mask = + Unsafe.As>(ref MemoryMarshal.GetReference(PermuteAlphaMask8x32)); + + int n = (vectors.Length * 4) / Vector256.Count; + for (int i = 0; i < n; i++) + { + ref Vector256 source = ref Unsafe.Add(ref vectorsBase, i); + Vector256 multiply = Avx2.PermuteVar8x32(source, mask); + source = Avx.Blend(Avx.Multiply(source, multiply), source, BlendAlphaControl); + } + + if (ImageMaths.Modulo2(vectors.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + Premultiply(ref MemoryMarshal.GetReference(vectors.Slice(vectors.Length - 1))); + } + } + else +#endif { - ref Vector4 v = ref Unsafe.Add(ref baseRef, i); - Premultiply(ref v); + ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); + + for (int i = 0; i < vectors.Length; i++) + { + ref Vector4 v = ref Unsafe.Add(ref baseRef, i); + Premultiply(ref v); + } } } @@ -73,13 +107,39 @@ namespace SixLabors.ImageSharp [MethodImpl(InliningOptions.ShortMethod)] public static void UnPremultiply(Span vectors) { - // TODO: This method can be AVX2 optimized using Vector - ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx2.IsSupported && vectors.Length >= 2) + { + ref Vector256 vectorsBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(vectors)); - for (int i = 0; i < vectors.Length; i++) + Vector256 mask = + Unsafe.As>(ref MemoryMarshal.GetReference(PermuteAlphaMask8x32)); + + int n = (vectors.Length * 4) / Vector256.Count; + for (int i = 0; i < n; i++) + { + ref Vector256 source = ref Unsafe.Add(ref vectorsBase, i); + Vector256 multiply = Avx2.PermuteVar8x32(source, mask); + source = Avx.Blend(Avx.Divide(source, multiply), source, BlendAlphaControl); + } + + if (ImageMaths.Modulo2(vectors.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + UnPremultiply(ref MemoryMarshal.GetReference(vectors.Slice(vectors.Length - 1))); + } + } + else +#endif { - ref Vector4 v = ref Unsafe.Add(ref baseRef, i); - UnPremultiply(ref v); + ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); + + for (int i = 0; i < vectors.Length; i++) + { + ref Vector4 v = ref Unsafe.Add(ref baseRef, i); + UnPremultiply(ref v); + } } } diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/PremultiplyVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/PremultiplyVector4.cs new file mode 100644 index 000000000..2a886c687 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/PremultiplyVector4.cs @@ -0,0 +1,68 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using BenchmarkDotNet.Attributes; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.ShortCore31))] + public class PremultiplyVector4 + { + private static readonly Vector4[] Vectors = CreateVectors(); + + [Benchmark(Baseline = true)] + public void PremultiplyBaseline() + { + ref Vector4 baseRef = ref MemoryMarshal.GetReference(Vectors); + + for (int i = 0; i < Vectors.Length; i++) + { + ref Vector4 v = ref Unsafe.Add(ref baseRef, i); + Premultiply(ref v); + } + } + + [Benchmark] + public void Premultiply() + { + Vector4Utilities.Premultiply(Vectors); + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void Premultiply(ref Vector4 source) + { + float w = source.W; + source *= w; + source.W = w; + } + + private static Vector4[] CreateVectors() + { + var rnd = new Random(42); + return GenerateRandomVectorArray(rnd, 2048, 0, 1); + } + + private static Vector4[] GenerateRandomVectorArray(Random rnd, int length, float minVal, float maxVal) + { + var values = new Vector4[length]; + + for (int i = 0; i < length; i++) + { + ref Vector4 v = ref values[i]; + v.X = GetRandomFloat(rnd, minVal, maxVal); + v.Y = GetRandomFloat(rnd, minVal, maxVal); + v.Z = GetRandomFloat(rnd, minVal, maxVal); + v.W = GetRandomFloat(rnd, minVal, maxVal); + } + + return values; + } + + private static float GetRandomFloat(Random rnd, float minVal, float maxVal) + => ((float)rnd.NextDouble() * (maxVal - minVal)) + minVal; + } +} diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs new file mode 100644 index 000000000..89e055da4 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs @@ -0,0 +1,68 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using BenchmarkDotNet.Attributes; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.ShortCore31))] + public class UnPremultiplyVector4 + { + private static readonly Vector4[] Vectors = CreateVectors(); + + [Benchmark(Baseline = true)] + public void UnPremultiplyBaseline() + { + ref Vector4 baseRef = ref MemoryMarshal.GetReference(Vectors); + + for (int i = 0; i < Vectors.Length; i++) + { + ref Vector4 v = ref Unsafe.Add(ref baseRef, i); + UnPremultiply(ref v); + } + } + + [Benchmark] + public void UnPremultiply() + { + Vector4Utilities.UnPremultiply(Vectors); + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void UnPremultiply(ref Vector4 source) + { + float w = source.W; + source *= w; + source.W = w; + } + + private static Vector4[] CreateVectors() + { + var rnd = new Random(42); + return GenerateRandomVectorArray(rnd, 2048, 0, 1); + } + + private static Vector4[] GenerateRandomVectorArray(Random rnd, int length, float minVal, float maxVal) + { + var values = new Vector4[length]; + + for (int i = 0; i < length; i++) + { + ref Vector4 v = ref values[i]; + v.X = GetRandomFloat(rnd, minVal, maxVal); + v.Y = GetRandomFloat(rnd, minVal, maxVal); + v.Z = GetRandomFloat(rnd, minVal, maxVal); + v.W = GetRandomFloat(rnd, minVal, maxVal); + } + + return values; + } + + private static float GetRandomFloat(Random rnd, float minVal, float maxVal) + => ((float)rnd.NextDouble() * (maxVal - minVal)) + minVal; + } +} diff --git a/tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs b/tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs index 27689f681..7d1662387 100644 --- a/tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs +++ b/tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs @@ -10,6 +10,21 @@ namespace SixLabors.ImageSharp.Tests.Helpers { public class ImageMathsTests { + [Theory] + [InlineData(0)] + [InlineData(1)] + [InlineData(2)] + [InlineData(3)] + [InlineData(4)] + [InlineData(100)] + [InlineData(123)] + [InlineData(53436353)] + public void Modulo2(int x) + { + int actual = ImageMaths.Modulo2(x); + Assert.Equal(x % 2, actual); + } + [Theory] [InlineData(0)] [InlineData(1)] diff --git a/tests/ImageSharp.Tests/Helpers/Vector4UtilsTests.cs b/tests/ImageSharp.Tests/Helpers/Vector4UtilsTests.cs index c3b8e79ee..2bb43c440 100644 --- a/tests/ImageSharp.Tests/Helpers/Vector4UtilsTests.cs +++ b/tests/ImageSharp.Tests/Helpers/Vector4UtilsTests.cs @@ -17,6 +17,7 @@ namespace SixLabors.ImageSharp.Tests.Helpers [InlineData(0)] [InlineData(1)] [InlineData(30)] + [InlineData(63)] public void Premultiply_VectorSpan(int length) { var rnd = new Random(42); @@ -36,6 +37,7 @@ namespace SixLabors.ImageSharp.Tests.Helpers [InlineData(0)] [InlineData(1)] [InlineData(30)] + [InlineData(63)] public void UnPremultiply_VectorSpan(int length) { var rnd = new Random(42); From 1067acbe4c57ba7fc601186ad58b1087380a8a69 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 21 Oct 2020 22:22:47 +0100 Subject: [PATCH 067/131] Use Tanner's updated code. --- .../Common/Helpers/Vector4Utilities.cs | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Vector4Utilities.cs b/src/ImageSharp/Common/Helpers/Vector4Utilities.cs index 848a91791..5ae7ac1b7 100644 --- a/src/ImageSharp/Common/Helpers/Vector4Utilities.cs +++ b/src/ImageSharp/Common/Helpers/Vector4Utilities.cs @@ -61,7 +61,7 @@ namespace SixLabors.ImageSharp /// Bulk variant of /// /// The span of vectors - [MethodImpl(InliningOptions.ShortMethod)] + [MethodImpl(InliningOptions.HotPath | InliningOptions.ShortMethod)] public static void Premultiply(Span vectors) { #if SUPPORTS_RUNTIME_INTRINSICS @@ -73,12 +73,15 @@ namespace SixLabors.ImageSharp Vector256 mask = Unsafe.As>(ref MemoryMarshal.GetReference(PermuteAlphaMask8x32)); - int n = (vectors.Length * 4) / Vector256.Count; - for (int i = 0; i < n; i++) + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u)); + + while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast)) { - ref Vector256 source = ref Unsafe.Add(ref vectorsBase, i); + Vector256 source = vectorsBase; Vector256 multiply = Avx2.PermuteVar8x32(source, mask); - source = Avx.Blend(Avx.Multiply(source, multiply), source, BlendAlphaControl); + vectorsBase = Avx.Blend(Avx.Multiply(source, multiply), source, BlendAlphaControl); + vectorsBase = ref Unsafe.Add(ref vectorsBase, 1); } if (ImageMaths.Modulo2(vectors.Length) != 0) @@ -104,7 +107,7 @@ namespace SixLabors.ImageSharp /// Bulk variant of /// /// The span of vectors - [MethodImpl(InliningOptions.ShortMethod)] + [MethodImpl(InliningOptions.HotPath | InliningOptions.ShortMethod)] public static void UnPremultiply(Span vectors) { #if SUPPORTS_RUNTIME_INTRINSICS @@ -116,12 +119,15 @@ namespace SixLabors.ImageSharp Vector256 mask = Unsafe.As>(ref MemoryMarshal.GetReference(PermuteAlphaMask8x32)); - int n = (vectors.Length * 4) / Vector256.Count; - for (int i = 0; i < n; i++) + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u)); + + while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast)) { - ref Vector256 source = ref Unsafe.Add(ref vectorsBase, i); + Vector256 source = vectorsBase; Vector256 multiply = Avx2.PermuteVar8x32(source, mask); - source = Avx.Blend(Avx.Divide(source, multiply), source, BlendAlphaControl); + vectorsBase = Avx.Blend(Avx.Divide(source, multiply), source, BlendAlphaControl); + vectorsBase = ref Unsafe.Add(ref vectorsBase, 1); } if (ImageMaths.Modulo2(vectors.Length) != 0) From d4e0bdd7b7949072c6bc47e07301fce8ab5a96af Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 21 Oct 2020 23:59:37 +0100 Subject: [PATCH 068/131] Remove hotpath attr --- src/ImageSharp/Common/Helpers/Vector4Utilities.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Vector4Utilities.cs b/src/ImageSharp/Common/Helpers/Vector4Utilities.cs index 5ae7ac1b7..0137d0256 100644 --- a/src/ImageSharp/Common/Helpers/Vector4Utilities.cs +++ b/src/ImageSharp/Common/Helpers/Vector4Utilities.cs @@ -61,7 +61,7 @@ namespace SixLabors.ImageSharp /// Bulk variant of /// /// The span of vectors - [MethodImpl(InliningOptions.HotPath | InliningOptions.ShortMethod)] + [MethodImpl(InliningOptions.ShortMethod)] public static void Premultiply(Span vectors) { #if SUPPORTS_RUNTIME_INTRINSICS @@ -107,7 +107,7 @@ namespace SixLabors.ImageSharp /// Bulk variant of /// /// The span of vectors - [MethodImpl(InliningOptions.HotPath | InliningOptions.ShortMethod)] + [MethodImpl(InliningOptions.ShortMethod)] public static void UnPremultiply(Span vectors) { #if SUPPORTS_RUNTIME_INTRINSICS From e3faadbf2edac8a51d09bf593088f42a073bd60b Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 22 Oct 2020 10:34:42 +0100 Subject: [PATCH 069/131] Use Avx.Shuffle for lower latency --- src/ImageSharp/Common/Helpers/Vector4Utilities.cs | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Vector4Utilities.cs b/src/ImageSharp/Common/Helpers/Vector4Utilities.cs index 0137d0256..f617e9a3e 100644 --- a/src/ImageSharp/Common/Helpers/Vector4Utilities.cs +++ b/src/ImageSharp/Common/Helpers/Vector4Utilities.cs @@ -17,9 +17,8 @@ namespace SixLabors.ImageSharp /// internal static class Vector4Utilities { - private const int BlendAlphaControl = 0b10001000; - - private static ReadOnlySpan PermuteAlphaMask8x32 => new byte[] { 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0, 7, 0, 0, 0, 7, 0, 0, 0, 7, 0, 0, 0 }; + private const int BlendAlphaControl = 0b_10_00_10_00; + private const int ShuffleAlphaControl = 0b_11_11_11_11; /// /// Restricts a vector between a minimum and a maximum value. @@ -70,16 +69,13 @@ namespace SixLabors.ImageSharp ref Vector256 vectorsBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(vectors)); - Vector256 mask = - Unsafe.As>(ref MemoryMarshal.GetReference(PermuteAlphaMask8x32)); - // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u)); while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast)) { Vector256 source = vectorsBase; - Vector256 multiply = Avx2.PermuteVar8x32(source, mask); + Vector256 multiply = Avx.Shuffle(source, source, ShuffleAlphaControl); vectorsBase = Avx.Blend(Avx.Multiply(source, multiply), source, BlendAlphaControl); vectorsBase = ref Unsafe.Add(ref vectorsBase, 1); } @@ -116,16 +112,13 @@ namespace SixLabors.ImageSharp ref Vector256 vectorsBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(vectors)); - Vector256 mask = - Unsafe.As>(ref MemoryMarshal.GetReference(PermuteAlphaMask8x32)); - // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u)); while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast)) { Vector256 source = vectorsBase; - Vector256 multiply = Avx2.PermuteVar8x32(source, mask); + Vector256 multiply = Avx.Shuffle(source, source, ShuffleAlphaControl); vectorsBase = Avx.Blend(Avx.Divide(source, multiply), source, BlendAlphaControl); vectorsBase = ref Unsafe.Add(ref vectorsBase, 1); } From 05b66da9f79a8faba536d3614469d7b477e93eaa Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 22 Oct 2020 10:53:14 +0100 Subject: [PATCH 070/131] Fix base unpremultiply benchmark --- tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs index 89e055da4..1312c767b 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs @@ -36,7 +36,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk private static void UnPremultiply(ref Vector4 source) { float w = source.W; - source *= w; + source /= w; source.W = w; } From 9629f1c16e87e1a960e9e635f1595ba2af7dae02 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 23 Oct 2020 14:03:33 +0100 Subject: [PATCH 071/131] Add AVX2 implementation --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 2 +- .../ColorConverters/JpegColorConverter.cs | 130 ++++++++++++------ .../Codecs/Jpeg/Vector4OctetPack.cs | 40 ++++++ .../Config.HwIntrinsics.cs | 4 +- 4 files changed, 134 insertions(+), 42 deletions(-) create mode 100644 tests/ImageSharp.Benchmarks/Codecs/Jpeg/Vector4OctetPack.cs diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 2fe2f99ac..a51c21b37 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -14,7 +14,7 @@ namespace SixLabors.ImageSharp { public static class HwIntrinsics { - private static ReadOnlySpan PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 }; + public static ReadOnlySpan PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 }; /// /// as many elements as possible, slicing them down (keeping the remainder). diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs index f68bca041..f2a1c1e91 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs @@ -4,7 +4,12 @@ using System; using System.Collections.Generic; using System.Numerics; - +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.Tuples; @@ -190,45 +195,90 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters /// public void Pack(ref Vector4Pair r, ref Vector4Pair g, ref Vector4Pair b) { - this.V0.X = r.A.X; - this.V0.Y = g.A.X; - this.V0.Z = b.A.X; - this.V0.W = 1f; - - this.V1.X = r.A.Y; - this.V1.Y = g.A.Y; - this.V1.Z = b.A.Y; - this.V1.W = 1f; - - this.V2.X = r.A.Z; - this.V2.Y = g.A.Z; - this.V2.Z = b.A.Z; - this.V2.W = 1f; - - this.V3.X = r.A.W; - this.V3.Y = g.A.W; - this.V3.Z = b.A.W; - this.V3.W = 1f; - - this.V4.X = r.B.X; - this.V4.Y = g.B.X; - this.V4.Z = b.B.X; - this.V4.W = 1f; - - this.V5.X = r.B.Y; - this.V5.Y = g.B.Y; - this.V5.Z = b.B.Y; - this.V5.W = 1f; - - this.V6.X = r.B.Z; - this.V6.Y = g.B.Z; - this.V6.Z = b.B.Z; - this.V6.W = 1f; - - this.V7.X = r.B.W; - this.V7.Y = g.B.W; - this.V7.Z = b.B.W; - this.V7.W = 1f; +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx2.IsSupported) + { + Vector4 vo = Vector4.One; + Vector128 valpha = Unsafe.As>(ref vo); + + ref byte control = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskDeinterleave8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + Vector256 r0 = Avx.InsertVector128( + Unsafe.As>(ref r.A).ToVector256(), + Unsafe.As>(ref g.A), + 1); + + Vector256 r1 = Avx.InsertVector128( + Unsafe.As>(ref b.A).ToVector256(), + valpha, + 1); + + Vector256 r2 = Avx.InsertVector128( + Unsafe.As>(ref r.B).ToVector256(), + Unsafe.As>(ref g.B), + 1); + + Vector256 r3 = Avx.InsertVector128( + Unsafe.As>(ref b.B).ToVector256(), + valpha, + 1); + + Vector256 t0 = Avx.UnpackLow(r0, r1); + Vector256 t2 = Avx.UnpackHigh(r0, r1); + + Unsafe.As>(ref this.V0) = Avx2.PermuteVar8x32(t0, vcontrol); + Unsafe.As>(ref this.V2) = Avx2.PermuteVar8x32(t2, vcontrol); + + Vector256 t4 = Avx.UnpackLow(r2, r3); + Vector256 t6 = Avx.UnpackHigh(r2, r3); + + Unsafe.As>(ref this.V4) = Avx2.PermuteVar8x32(t4, vcontrol); + Unsafe.As>(ref this.V6) = Avx2.PermuteVar8x32(t6, vcontrol); + } + else +#endif + { + this.V0.X = r.A.X; + this.V0.Y = g.A.X; + this.V0.Z = b.A.X; + this.V0.W = 1f; + + this.V1.X = r.A.Y; + this.V1.Y = g.A.Y; + this.V1.Z = b.A.Y; + this.V1.W = 1f; + + this.V2.X = r.A.Z; + this.V2.Y = g.A.Z; + this.V2.Z = b.A.Z; + this.V2.W = 1f; + + this.V3.X = r.A.W; + this.V3.Y = g.A.W; + this.V3.Z = b.A.W; + this.V3.W = 1f; + + this.V4.X = r.B.X; + this.V4.Y = g.B.X; + this.V4.Z = b.B.X; + this.V4.W = 1f; + + this.V5.X = r.B.Y; + this.V5.Y = g.B.Y; + this.V5.Z = b.B.Y; + this.V5.W = 1f; + + this.V6.X = r.B.Z; + this.V6.Y = g.B.Z; + this.V6.Z = b.B.Z; + this.V6.W = 1f; + + this.V7.X = r.B.W; + this.V7.Y = g.B.W; + this.V7.Z = b.B.W; + this.V7.W = 1f; + } } } } diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/Vector4OctetPack.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/Vector4OctetPack.cs new file mode 100644 index 000000000..a7ea77198 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/Vector4OctetPack.cs @@ -0,0 +1,40 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System.Numerics; +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Tuples; +using static SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters.JpegColorConverter; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class Vector4OctetPack + { + private static Vector4Pair r = new Vector4Pair + { + A = new Vector4(1, 2, 3, 4), + B = new Vector4(5, 6, 7, 8) + }; + + private static Vector4Pair g = new Vector4Pair + { + A = new Vector4(9, 10, 11, 12), + B = new Vector4(13, 14, 15, 16) + }; + + private static Vector4Pair b = new Vector4Pair + { + A = new Vector4(17, 18, 19, 20), + B = new Vector4(21, 22, 23, 24) + }; + + [Benchmark] + public void Pack() + { + Vector4Octet v = default; + + v.Pack(ref r, ref g, ref b); + } + } +} diff --git a/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs b/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs index e860c5491..e8a06bf24 100644 --- a/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs +++ b/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs @@ -73,7 +73,9 @@ namespace SixLabors.ImageSharp.Benchmarks } #endif this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31) - .WithEnvironmentVariables(new EnvironmentVariable(EnableHWIntrinsic, Off)) + .WithEnvironmentVariables( + new EnvironmentVariable(EnableHWIntrinsic, Off), + new EnvironmentVariable(FeatureSIMD, Off)) .WithId("No HwIntrinsics")); } } From b8081fd3e94e2b338a9e12e9a0d859f0d9f785d6 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 23 Oct 2020 16:54:10 +0100 Subject: [PATCH 072/131] Use HW color conversion --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 23 +++ .../JpegColorConverter.FromYCbCrSimdAvx2.cs | 74 ++++++++- .../ColorConverters/JpegColorConverter.cs | 156 +++++++++--------- .../Codecs/Jpeg/Vector4OctetPack.cs | 40 ----- 4 files changed, 174 insertions(+), 119 deletions(-) delete mode 100644 tests/ImageSharp.Benchmarks/Codecs/Jpeg/Vector4OctetPack.cs diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index a51c21b37..c5a7f5e90 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -16,6 +16,29 @@ namespace SixLabors.ImageSharp { public static ReadOnlySpan PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 }; + /// + /// Performs a multiplication and an addition of the . + /// + /// The vector to add to the intermediate result. + /// The first vector to multiply. + /// The second vector to multiply. + /// The . + [MethodImpl(InliningOptions.ShortMethod)] + public static Vector256 MultiplyAdd( + in Vector256 va, + in Vector256 vm0, + in Vector256 vm1) + { + if (Fma.IsSupported) + { + return Fma.MultiplyAdd(vm1, vm0, va); + } + else + { + return Avx.Add(Avx.Multiply(vm0, vm1), va); + } + } + /// /// as many elements as possible, slicing them down (keeping the remainder). /// diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs index c4d1408a2..8c34baa1d 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs @@ -1,11 +1,15 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. using System; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; - +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#endif using SixLabors.ImageSharp.Tuples; // ReSharper disable ImpureMethodCallOnReadonlyValueField @@ -47,6 +51,71 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters "JpegColorConverter.FromYCbCrSimd256 can be used only on architecture having 256 byte floating point SIMD registers!"); } +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector256 cbBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector256 crBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + + ref Vector4Octet resultBase = + ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var chromaOffset = Vector256.Create(-halfValue); + var scale = Vector256.Create(1 / maxValue); + var rCrMult = Vector256.Create(1.402F); + var gCbMult = Vector256.Create(0.344136F); + var gCrMult = Vector256.Create(0.714136F); + var bCbMult = Vector256.Create(1.772F); + + // Used for packing. + Vector4 vo = Vector4.One; + Vector128 valpha = Unsafe.As>(ref vo); + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskDeinterleave8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + Vector4Pair rr = default; + Vector4Pair gg = default; + Vector4Pair bb = default; + + ref Vector256 rrRefAsVector = ref Unsafe.As>(ref rr); + ref Vector256 ggRefAsVector = ref Unsafe.As>(ref gg); + ref Vector256 bbRefAsVector = ref Unsafe.As>(ref bb); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + // y = yVals[i]; + // cb = cbVals[i] - 128F; + // cr = crVals[i] - 128F; + Vector256 y = Unsafe.Add(ref yBase, i); + Vector256 cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset); + Vector256 cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset); + + // r = y + (1.402F * cr); + // g = y - (0.344136F * cb) - (0.714136F * cr); + // b = y + (1.772F * cb); + // Adding & multiplying 8 elements at one time: + Vector256 r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult); + Vector256 g = Avx.Subtract(Avx.Subtract(y, Avx.Multiply(cb, gCbMult)), Avx.Multiply(cr, gCrMult)); + Vector256 b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult); + + r = Avx.Multiply(Avx.RoundToNearestInteger(r), scale); + g = Avx.Multiply(Avx.RoundToNearestInteger(g), scale); + b = Avx.Multiply(Avx.RoundToNearestInteger(b), scale); + + rrRefAsVector = r; + ggRefAsVector = g; + bbRefAsVector = b; + + // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: + ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); + destination.PackAvx2(ref rr, ref gg, ref bb, in valpha, in vcontrol); + } +#else ref Vector yBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); ref Vector cbBase = @@ -104,6 +173,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); destination.Pack(ref rr, ref gg, ref bb); } +#endif } } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs index f2a1c1e91..4e96f3471 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs @@ -190,95 +190,97 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters #pragma warning disable SA1132 // Do not combine fields public Vector4 V0, V1, V2, V3, V4, V5, V6, V7; +#if SUPPORTS_RUNTIME_INTRINSICS + /// /// Pack (r0,r1...r7) (g0,g1...g7) (b0,b1...b7) vector values as (r0,g0,b0,1), (r1,g1,b1,1) ... /// - public void Pack(ref Vector4Pair r, ref Vector4Pair g, ref Vector4Pair b) + [MethodImpl(InliningOptions.ShortMethod)] + public void PackAvx2( + ref Vector4Pair r, + ref Vector4Pair g, + ref Vector4Pair b, + in Vector128 a, + in Vector256 vcontrol) { -#if SUPPORTS_RUNTIME_INTRINSICS - if (Avx2.IsSupported) - { - Vector4 vo = Vector4.One; - Vector128 valpha = Unsafe.As>(ref vo); + Vector256 r0 = Avx.InsertVector128( + Unsafe.As>(ref r.A), + Unsafe.As>(ref g.A), + 1); - ref byte control = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskDeinterleave8x32); - Vector256 vcontrol = Unsafe.As>(ref control); + Vector256 r1 = Avx.InsertVector128( + Unsafe.As>(ref b.A), + a, + 1); - Vector256 r0 = Avx.InsertVector128( - Unsafe.As>(ref r.A).ToVector256(), - Unsafe.As>(ref g.A), - 1); + Vector256 r2 = Avx.InsertVector128( + Unsafe.As>(ref r.B).ToVector256(), + Unsafe.As>(ref g.B), + 1); - Vector256 r1 = Avx.InsertVector128( - Unsafe.As>(ref b.A).ToVector256(), - valpha, - 1); + Vector256 r3 = Avx.InsertVector128( + Unsafe.As>(ref b.B).ToVector256(), + a, + 1); - Vector256 r2 = Avx.InsertVector128( - Unsafe.As>(ref r.B).ToVector256(), - Unsafe.As>(ref g.B), - 1); + Vector256 t0 = Avx.UnpackLow(r0, r1); + Vector256 t2 = Avx.UnpackHigh(r0, r1); - Vector256 r3 = Avx.InsertVector128( - Unsafe.As>(ref b.B).ToVector256(), - valpha, - 1); + Unsafe.As>(ref this.V0) = Avx2.PermuteVar8x32(t0, vcontrol); + Unsafe.As>(ref this.V2) = Avx2.PermuteVar8x32(t2, vcontrol); - Vector256 t0 = Avx.UnpackLow(r0, r1); - Vector256 t2 = Avx.UnpackHigh(r0, r1); + Vector256 t4 = Avx.UnpackLow(r2, r3); + Vector256 t6 = Avx.UnpackHigh(r2, r3); - Unsafe.As>(ref this.V0) = Avx2.PermuteVar8x32(t0, vcontrol); - Unsafe.As>(ref this.V2) = Avx2.PermuteVar8x32(t2, vcontrol); - - Vector256 t4 = Avx.UnpackLow(r2, r3); - Vector256 t6 = Avx.UnpackHigh(r2, r3); - - Unsafe.As>(ref this.V4) = Avx2.PermuteVar8x32(t4, vcontrol); - Unsafe.As>(ref this.V6) = Avx2.PermuteVar8x32(t6, vcontrol); - } - else + Unsafe.As>(ref this.V4) = Avx2.PermuteVar8x32(t4, vcontrol); + Unsafe.As>(ref this.V6) = Avx2.PermuteVar8x32(t6, vcontrol); + } #endif - { - this.V0.X = r.A.X; - this.V0.Y = g.A.X; - this.V0.Z = b.A.X; - this.V0.W = 1f; - - this.V1.X = r.A.Y; - this.V1.Y = g.A.Y; - this.V1.Z = b.A.Y; - this.V1.W = 1f; - - this.V2.X = r.A.Z; - this.V2.Y = g.A.Z; - this.V2.Z = b.A.Z; - this.V2.W = 1f; - - this.V3.X = r.A.W; - this.V3.Y = g.A.W; - this.V3.Z = b.A.W; - this.V3.W = 1f; - - this.V4.X = r.B.X; - this.V4.Y = g.B.X; - this.V4.Z = b.B.X; - this.V4.W = 1f; - - this.V5.X = r.B.Y; - this.V5.Y = g.B.Y; - this.V5.Z = b.B.Y; - this.V5.W = 1f; - - this.V6.X = r.B.Z; - this.V6.Y = g.B.Z; - this.V6.Z = b.B.Z; - this.V6.W = 1f; - - this.V7.X = r.B.W; - this.V7.Y = g.B.W; - this.V7.Z = b.B.W; - this.V7.W = 1f; - } + + /// + /// Pack (r0,r1...r7) (g0,g1...g7) (b0,b1...b7) vector values as (r0,g0,b0,1), (r1,g1,b1,1) ... + /// + public void Pack(ref Vector4Pair r, ref Vector4Pair g, ref Vector4Pair b) + { + this.V0.X = r.A.X; + this.V0.Y = g.A.X; + this.V0.Z = b.A.X; + this.V0.W = 1f; + + this.V1.X = r.A.Y; + this.V1.Y = g.A.Y; + this.V1.Z = b.A.Y; + this.V1.W = 1f; + + this.V2.X = r.A.Z; + this.V2.Y = g.A.Z; + this.V2.Z = b.A.Z; + this.V2.W = 1f; + + this.V3.X = r.A.W; + this.V3.Y = g.A.W; + this.V3.Z = b.A.W; + this.V3.W = 1f; + + this.V4.X = r.B.X; + this.V4.Y = g.B.X; + this.V4.Z = b.B.X; + this.V4.W = 1f; + + this.V5.X = r.B.Y; + this.V5.Y = g.B.Y; + this.V5.Z = b.B.Y; + this.V5.W = 1f; + + this.V6.X = r.B.Z; + this.V6.Y = g.B.Z; + this.V6.Z = b.B.Z; + this.V6.W = 1f; + + this.V7.X = r.B.W; + this.V7.Y = g.B.W; + this.V7.Z = b.B.W; + this.V7.W = 1f; } } } diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/Vector4OctetPack.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/Vector4OctetPack.cs deleted file mode 100644 index a7ea77198..000000000 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/Vector4OctetPack.cs +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -using System.Numerics; -using BenchmarkDotNet.Attributes; -using SixLabors.ImageSharp.Tuples; -using static SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters.JpegColorConverter; - -namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg -{ - [Config(typeof(Config.HwIntrinsics_SSE_AVX))] - public class Vector4OctetPack - { - private static Vector4Pair r = new Vector4Pair - { - A = new Vector4(1, 2, 3, 4), - B = new Vector4(5, 6, 7, 8) - }; - - private static Vector4Pair g = new Vector4Pair - { - A = new Vector4(9, 10, 11, 12), - B = new Vector4(13, 14, 15, 16) - }; - - private static Vector4Pair b = new Vector4Pair - { - A = new Vector4(17, 18, 19, 20), - B = new Vector4(21, 22, 23, 24) - }; - - [Benchmark] - public void Pack() - { - Vector4Octet v = default; - - v.Pack(ref r, ref g, ref b); - } - } -} From 50bc02764398f78ae862ec2b30363cdf3d71f52e Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 23 Oct 2020 18:09:06 +0100 Subject: [PATCH 073/131] Fix access violation --- .../Components/Decoder/ColorConverters/JpegColorConverter.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs index 4e96f3471..b40d9b9e6 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs @@ -204,12 +204,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters in Vector256 vcontrol) { Vector256 r0 = Avx.InsertVector128( - Unsafe.As>(ref r.A), + Unsafe.As>(ref r.A).ToVector256(), Unsafe.As>(ref g.A), 1); Vector256 r1 = Avx.InsertVector128( - Unsafe.As>(ref b.A), + Unsafe.As>(ref b.A).ToVector256(), a, 1); From 40442c24424ca43583700b5577c557f2ba21bd75 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 23 Oct 2020 18:46:00 +0100 Subject: [PATCH 074/131] Inline the packing. --- .../JpegColorConverter.FromYCbCrSimdAvx2.cs | 59 +++++++++++++------ .../ColorConverters/JpegColorConverter.cs | 53 ----------------- 2 files changed, 42 insertions(+), 70 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs index 8c34baa1d..ca7971a07 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs @@ -59,8 +59,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters ref Vector256 crBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); - ref Vector4Octet resultBase = - ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); // Used for the color conversion var chromaOffset = Vector256.Create(-halfValue); @@ -76,14 +76,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskDeinterleave8x32); Vector256 vcontrol = Unsafe.As>(ref control); - Vector4Pair rr = default; - Vector4Pair gg = default; - Vector4Pair bb = default; - - ref Vector256 rrRefAsVector = ref Unsafe.As>(ref rr); - ref Vector256 ggRefAsVector = ref Unsafe.As>(ref gg); - ref Vector256 bbRefAsVector = ref Unsafe.As>(ref bb); - // Walking 8 elements at one step: int n = result.Length / 8; for (int i = 0; i < n; i++) @@ -107,13 +99,46 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters g = Avx.Multiply(Avx.RoundToNearestInteger(g), scale); b = Avx.Multiply(Avx.RoundToNearestInteger(b), scale); - rrRefAsVector = r; - ggRefAsVector = g; - bbRefAsVector = b; - - // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: - ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); - destination.PackAvx2(ref rr, ref gg, ref bb, in valpha, in vcontrol); + // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the + // expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: + // + // Left side. + Vector256 r0 = Avx.InsertVector128( + r, + Unsafe.As, Vector128>(ref g), + 1); + + Vector256 r1 = Avx.InsertVector128( + b, + valpha, + 1); + + // Right side + Vector256 r2 = Avx.InsertVector128( + Unsafe.Add(ref Unsafe.As, Vector128>(ref r), 1).ToVector256(), + Unsafe.Add(ref Unsafe.As, Vector128>(ref g), 1), + 1); + + Vector256 r3 = Avx.InsertVector128( + Unsafe.Add(ref Unsafe.As, Vector128>(ref b), 1).ToVector256(), + valpha, + 1); + + // Split into separate rows + Vector256 t0 = Avx.UnpackLow(r0, r1); + Vector256 t2 = Avx.UnpackHigh(r0, r1); + + // Deinterleave and set + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + destination = Avx2.PermuteVar8x32(t0, vcontrol); + Unsafe.Add(ref destination, 1) = Avx2.PermuteVar8x32(t2, vcontrol); + + // Repeat for right side. + Vector256 t4 = Avx.UnpackLow(r2, r3); + Vector256 t6 = Avx.UnpackHigh(r2, r3); + + Unsafe.Add(ref destination, 2) = Avx2.PermuteVar8x32(t4, vcontrol); + Unsafe.Add(ref destination, 3) = Avx2.PermuteVar8x32(t6, vcontrol); } #else ref Vector yBase = diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs index b40d9b9e6..7c780700c 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs @@ -4,12 +4,6 @@ using System; using System.Collections.Generic; using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -#if SUPPORTS_RUNTIME_INTRINSICS -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; -#endif using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.Tuples; @@ -190,53 +184,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters #pragma warning disable SA1132 // Do not combine fields public Vector4 V0, V1, V2, V3, V4, V5, V6, V7; -#if SUPPORTS_RUNTIME_INTRINSICS - - /// - /// Pack (r0,r1...r7) (g0,g1...g7) (b0,b1...b7) vector values as (r0,g0,b0,1), (r1,g1,b1,1) ... - /// - [MethodImpl(InliningOptions.ShortMethod)] - public void PackAvx2( - ref Vector4Pair r, - ref Vector4Pair g, - ref Vector4Pair b, - in Vector128 a, - in Vector256 vcontrol) - { - Vector256 r0 = Avx.InsertVector128( - Unsafe.As>(ref r.A).ToVector256(), - Unsafe.As>(ref g.A), - 1); - - Vector256 r1 = Avx.InsertVector128( - Unsafe.As>(ref b.A).ToVector256(), - a, - 1); - - Vector256 r2 = Avx.InsertVector128( - Unsafe.As>(ref r.B).ToVector256(), - Unsafe.As>(ref g.B), - 1); - - Vector256 r3 = Avx.InsertVector128( - Unsafe.As>(ref b.B).ToVector256(), - a, - 1); - - Vector256 t0 = Avx.UnpackLow(r0, r1); - Vector256 t2 = Avx.UnpackHigh(r0, r1); - - Unsafe.As>(ref this.V0) = Avx2.PermuteVar8x32(t0, vcontrol); - Unsafe.As>(ref this.V2) = Avx2.PermuteVar8x32(t2, vcontrol); - - Vector256 t4 = Avx.UnpackLow(r2, r3); - Vector256 t6 = Avx.UnpackHigh(r2, r3); - - Unsafe.As>(ref this.V4) = Avx2.PermuteVar8x32(t4, vcontrol); - Unsafe.As>(ref this.V6) = Avx2.PermuteVar8x32(t6, vcontrol); - } -#endif - /// /// Pack (r0,r1...r7) (g0,g1...g7) (b0,b1...b7) vector values as (r0,g0,b0,1), (r1,g1,b1,1) ... /// From 238564b6096f540e247cfa725e334b21a79da5ab Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sat, 24 Oct 2020 00:55:22 +0100 Subject: [PATCH 075/131] Use less permutes and more multiply/add --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 2 + .../JpegColorConverter.FromYCbCrSimdAvx2.cs | 63 ++++++------------- 2 files changed, 22 insertions(+), 43 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index c5a7f5e90..2d788992e 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -16,6 +16,8 @@ namespace SixLabors.ImageSharp { public static ReadOnlySpan PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 }; + public static ReadOnlySpan PermuteMaskEvenOdd8x32 => new byte[] { 0, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0 }; + /// /// Performs a multiplication and an addition of the . /// diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs index ca7971a07..1319b56ee 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs @@ -66,14 +66,13 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters var chromaOffset = Vector256.Create(-halfValue); var scale = Vector256.Create(1 / maxValue); var rCrMult = Vector256.Create(1.402F); - var gCbMult = Vector256.Create(0.344136F); - var gCrMult = Vector256.Create(0.714136F); + var gCbMult = Vector256.Create(-0.344136F); + var gCrMult = Vector256.Create(-0.714136F); var bCbMult = Vector256.Create(1.772F); // Used for packing. - Vector4 vo = Vector4.One; - Vector128 valpha = Unsafe.As>(ref vo); - ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskDeinterleave8x32); + var va = Vector256.Create(1F); + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); Vector256 vcontrol = Unsafe.As>(ref control); // Walking 8 elements at one step: @@ -87,58 +86,36 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters Vector256 cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset); Vector256 cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset); + y = Avx2.PermuteVar8x32(y, vcontrol); + cb = Avx2.PermuteVar8x32(cb, vcontrol); + cr = Avx2.PermuteVar8x32(cr, vcontrol); + // r = y + (1.402F * cr); // g = y - (0.344136F * cb) - (0.714136F * cr); // b = y + (1.772F * cb); // Adding & multiplying 8 elements at one time: Vector256 r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult); - Vector256 g = Avx.Subtract(Avx.Subtract(y, Avx.Multiply(cb, gCbMult)), Avx.Multiply(cr, gCrMult)); + Vector256 g = HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult); Vector256 b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult); + // TODO: We should be savving to RGBA not Vector4 r = Avx.Multiply(Avx.RoundToNearestInteger(r), scale); g = Avx.Multiply(Avx.RoundToNearestInteger(g), scale); b = Avx.Multiply(Avx.RoundToNearestInteger(b), scale); - // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the - // expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: - // - // Left side. - Vector256 r0 = Avx.InsertVector128( - r, - Unsafe.As, Vector128>(ref g), - 1); - - Vector256 r1 = Avx.InsertVector128( - b, - valpha, - 1); - - // Right side - Vector256 r2 = Avx.InsertVector128( - Unsafe.Add(ref Unsafe.As, Vector128>(ref r), 1).ToVector256(), - Unsafe.Add(ref Unsafe.As, Vector128>(ref g), 1), - 1); - - Vector256 r3 = Avx.InsertVector128( - Unsafe.Add(ref Unsafe.As, Vector128>(ref b), 1).ToVector256(), - valpha, - 1); - - // Split into separate rows - Vector256 t0 = Avx.UnpackLow(r0, r1); - Vector256 t2 = Avx.UnpackHigh(r0, r1); - - // Deinterleave and set + Vector256 vte = Avx.UnpackLow(r, b); + Vector256 vto = Avx.UnpackLow(g, va); + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); - destination = Avx2.PermuteVar8x32(t0, vcontrol); - Unsafe.Add(ref destination, 1) = Avx2.PermuteVar8x32(t2, vcontrol); - // Repeat for right side. - Vector256 t4 = Avx.UnpackLow(r2, r3); - Vector256 t6 = Avx.UnpackHigh(r2, r3); + destination = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto); + + vte = Avx.UnpackHigh(r, b); + vto = Avx.UnpackHigh(g, va); - Unsafe.Add(ref destination, 2) = Avx2.PermuteVar8x32(t4, vcontrol); - Unsafe.Add(ref destination, 3) = Avx2.PermuteVar8x32(t6, vcontrol); + Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto); } #else ref Vector yBase = From 893247bd882674a6cce48d505b1b34e68a3e27da Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 26 Oct 2020 17:05:50 +0000 Subject: [PATCH 076/131] Add 4 channel float shuffling. --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 80 +++++++++++ .../Common/Helpers/SimdUtils.Shuffle.cs | 131 ++++++++++++++++++ .../Color/Bulk/ShuffleFloat4Channel.cs | 68 +++++++++ .../ImageSharp.Benchmarks.csproj | 1 + .../Common/SimdUtilsTests.Shuffle.cs | 75 ++++++++++ .../ImageSharp.Tests/Common/SimdUtilsTests.cs | 14 +- .../Formats/Png/PngEncoderTests.cs | 2 +- .../FeatureTesting/FeatureTestRunner.cs | 21 +-- .../Tests/FeatureTestRunnerTests.cs | 4 +- 9 files changed, 377 insertions(+), 19 deletions(-) create mode 100644 src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs create mode 100644 tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs create mode 100644 tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 2d788992e..aea04737d 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -18,6 +18,86 @@ namespace SixLabors.ImageSharp public static ReadOnlySpan PermuteMaskEvenOdd8x32 => new byte[] { 0, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0 }; + /// + /// Shuffle single-precision (32-bit) floating-point elements in + /// using the control and store the results in . + /// + /// The source span of floats + /// The destination span of float + /// The byte control. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle4ChannelReduce( + ref ReadOnlySpan source, + ref Span dest, + byte control) + { + if (Avx.IsSupported || Sse.IsSupported) + { + int remainder; + if (Avx.IsSupported) + { + remainder = ImageMaths.ModuloP2(source.Length, Vector256.Count); + } + else + { + remainder = ImageMaths.ModuloP2(source.Length, Vector128.Count); + } + + int adjustedCount = source.Length - remainder; + + if (adjustedCount > 0) + { + Shuffle4Channel( + source.Slice(0, adjustedCount), + dest.Slice(0, adjustedCount), + control); + + source = source.Slice(adjustedCount); + dest = dest.Slice(adjustedCount); + } + } + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void Shuffle4Channel( + ReadOnlySpan source, + Span dest, + byte control) + { + if (Avx.IsSupported) + { + int n = dest.Length / Vector256.Count; + + ref Vector256 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector256 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + for (int i = 0; i < n; i++) + { + Unsafe.Add(ref destBase, i) = Avx.Permute(Unsafe.Add(ref sourceBase, i), control); + } + } + else + { + // Sse + int n = dest.Length / Vector128.Count; + + ref Vector128 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector128 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + for (int i = 0; i < n; i++) + { + Vector128 vs = Unsafe.Add(ref sourceBase, i); + Unsafe.Add(ref destBase, i) = Sse.Shuffle(vs, vs, control); + } + } + } + /// /// Performs a multiplication and an addition of the . /// diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs new file mode 100644 index 000000000..fe7cbb72a --- /dev/null +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -0,0 +1,131 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace SixLabors.ImageSharp +{ + internal static partial class SimdUtils + { + /// + /// Shuffle single-precision (32-bit) floating-point elements in + /// using the control and store the results in . + /// + /// The source span of floats + /// The destination span of float + /// The byte control. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle4Channel( + ReadOnlySpan source, + Span dest, + byte control) + { + VerifyShuffleSpanInput(source, dest); + + // TODO: There doesn't seem to be any APIs for + // System.Numerics that allow shuffling. +#if SUPPORTS_RUNTIME_INTRINSICS + HwIntrinsics.Shuffle4ChannelReduce(ref source, ref dest, control); +#endif + + // Deal with the remainder: + if (source.Length > 0) + { + ShuffleRemainder4Channel(source, dest, control); + } + } + + [MethodImpl(InliningOptions.ColdPath)] + public static void ShuffleRemainder4Channel( + ReadOnlySpan source, + Span dest, + byte control) + { + ref float sBase = ref MemoryMarshal.GetReference(source); + ref float dBase = ref MemoryMarshal.GetReference(dest); + Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0); + + for (int i = 0; i < source.Length; i += 4) + { + Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i); + Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i); + Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i); + Unsafe.Add(ref dBase, i + 3) = Unsafe.Add(ref sBase, p3 + i); + } + } + + [Conditional("DEBUG")] + private static void VerifyShuffleSpanInput(ReadOnlySpan source, Span dest) + { + DebugGuard.IsTrue( + source.Length == dest.Length, + nameof(source), + "Input spans must be of same length!"); + + DebugGuard.IsTrue( + source.Length % 4 == 0, + nameof(source), + "Input spans must be divisiable by 4!"); + } + + public static class Shuffle + { + public const byte WXYZ = (2 << 6) | (1 << 4) | (0 << 2) | 3; + public const byte XYZW = (3 << 6) | (2 << 4) | (1 << 2) | 0; + public const byte ZYXW = (3 << 6) | (0 << 4) | (1 << 2) | 2; + + public static ReadOnlySpan WXYZ_128 => MmShuffleByte128(2, 1, 0, 3); + + public static ReadOnlySpan XYZW_128 => MmShuffleByte128(3, 2, 1, 0); + + public static ReadOnlySpan ZYXW_128 => MmShuffleByte128(3, 0, 1, 2); + + public static ReadOnlySpan WXYZ_256 => MmShuffleByte256(2, 1, 0, 3); + + public static ReadOnlySpan XYZW_256 => MmShuffleByte256(3, 2, 1, 0); + + public static ReadOnlySpan ZYXW_256 => MmShuffleByte256(3, 0, 1, 2); + + private static byte[] MmShuffleByte128(int p3, int p2, int p1, int p0) + { + byte[] result = new byte[16]; + + for (int i = 0; i < result.Length; i += 4) + { + result[i] = (byte)(p0 + i); + result[i + 1] = (byte)(p1 + i); + result[i + 2] = (byte)(p2 + i); + result[i + 3] = (byte)(p3 + i); + } + + return result; + } + + private static byte[] MmShuffleByte256(int p3, int p2, int p1, int p0) + { + byte[] result = new byte[32]; + + for (int i = 0; i < result.Length; i += 4) + { + result[i] = (byte)(p0 + i); + result[i + 1] = (byte)(p1 + i); + result[i + 2] = (byte)(p2 + i); + result[i + 3] = (byte)(p3 + i); + } + + return result; + } + + public static void InverseMmShuffle(byte control, out int p3, out int p2, out int p1, out int p0) + { + p3 = control >> 6 & 0x3; + p2 = control >> 4 & 0x3; + p1 = control >> 2 & 0x3; + p0 = control >> 0 & 0x3; + } + } + } +} diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs new file mode 100644 index 000000000..36b9591d9 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs @@ -0,0 +1,68 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Tests; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class ShuffleFloat4Channel + { + private float[] source; + private float[] destination; + + [GlobalSetup] + public void Setup() + { + this.source = new Random(this.Count).GenerateRandomFloatArray(this.Count, 0, 256); + this.destination = new float[this.Count]; + } + + [Params(128, 256, 512, 1024, 2048)] + public int Count { get; set; } + + [Benchmark] + public void Shuffle4Channel() + { + SimdUtils.Shuffle4Channel(this.source, this.destination, SimdUtils.Shuffle.WXYZ); + } + } + + // 2020-10-26 + // ########## + // + // BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1) + // Intel Core i7-8650U CPU 1.90GHz(Kaby Lake R), 1 CPU, 8 logical and 4 physical cores + // .NET Core SDK = 5.0.100-rc.2.20479.15 + // + // [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // + // Runtime=.NET Core 3.1 + // + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |---------------- |---------------- |-------------------------------------------------- |------ |------------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:| + // | Shuffle4Channel | AVX | Empty | 128 | 14.49 ns | 0.244 ns | 0.217 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 87.74 ns | 0.524 ns | 0.490 ns | 6.06 | 0.09 | - | - | - | - | + // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 128 | 23.65 ns | 0.101 ns | 0.094 ns | 1.63 | 0.03 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | AVX | Empty | 256 | 25.87 ns | 0.492 ns | 0.673 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 159.52 ns | 0.901 ns | 0.843 ns | 6.12 | 0.12 | - | - | - | - | + // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 256 | 45.47 ns | 0.404 ns | 0.378 ns | 1.75 | 0.03 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | AVX | Empty | 512 | 49.51 ns | 0.088 ns | 0.083 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 297.96 ns | 0.926 ns | 0.821 ns | 6.02 | 0.02 | - | - | - | - | + // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 512 | 90.77 ns | 0.191 ns | 0.169 ns | 1.83 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | AVX | Empty | 1024 | 113.09 ns | 1.913 ns | 3.090 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 604.58 ns | 1.464 ns | 1.298 ns | 5.29 | 0.18 | - | - | - | - | + // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 1024 | 179.44 ns | 0.208 ns | 0.184 ns | 1.57 | 0.05 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | AVX | Empty | 2048 | 217.95 ns | 1.314 ns | 1.165 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 1,152.04 ns | 3.941 ns | 3.494 ns | 5.29 | 0.03 | - | - | - | - | + // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 2048 | 349.52 ns | 0.587 ns | 0.520 ns | 1.60 | 0.01 | - | - | - | - | +} diff --git a/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj b/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj index eaab162ff..4784a219b 100644 --- a/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj +++ b/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj @@ -17,6 +17,7 @@ + diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs new file mode 100644 index 000000000..04aab18e4 --- /dev/null +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -0,0 +1,75 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using SixLabors.ImageSharp.Tests.TestUtilities; +using Xunit; + +namespace SixLabors.ImageSharp.Tests.Common +{ + public partial class SimdUtilsTests + { + public static readonly TheoryData ShuffleControls = + new TheoryData + { + SimdUtils.Shuffle.WXYZ, + SimdUtils.Shuffle.XYZW, + SimdUtils.Shuffle.ZYXW + }; + + [Theory] + [MemberData(nameof(ShuffleControls))] + public void BulkShuffleFloat4Channel(byte control) + { + static void RunTest(string serialized) + { + byte ctrl = FeatureTestRunner.Deserialize(serialized); + foreach (var item in ArraySizesDivisibleBy4) + { + foreach (var count in item) + { + TestShuffle( + (int)count, + (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, ctrl), + ctrl); + } + } + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + control, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE); + } + + private static void TestShuffle( + int count, + Action, Memory> convert, + byte control) + { + float[] source = new Random(count).GenerateRandomFloatArray(count, 0, 256); + var result = new float[count]; + + float[] expected = new float[count]; + + SimdUtils.Shuffle.InverseMmShuffle( + control, + out int p3, + out int p2, + out int p1, + out int p0); + + for (int i = 0; i < expected.Length; i += 4) + { + expected[i] = source[p0 + i]; + expected[i + 1] = source[p1 + i]; + expected[i + 2] = source[p2 + i]; + expected[i + 3] = source[p3 + i]; + } + + convert(source, result); + + Assert.Equal(expected, result, new ApproximateFloatComparer(1e-5F)); + } + } +} diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs index 838db742a..bddadff4d 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs @@ -13,7 +13,7 @@ using Xunit.Abstractions; namespace SixLabors.ImageSharp.Tests.Common { - public class SimdUtilsTests + public partial class SimdUtilsTests { private ITestOutputHelper Output { get; } @@ -212,14 +212,14 @@ namespace SixLabors.ImageSharp.Tests.Common static void RunTest(string serialized) { TestImpl_BulkConvertByteToNormalizedFloat( - FeatureTestRunner.Deserialize(serialized), + FeatureTestRunner.Deserialize(serialized), (s, d) => SimdUtils.HwIntrinsics.ByteToNormalizedFloat(s.Span, d.Span)); } FeatureTestRunner.RunWithHwIntrinsicsFeature( RunTest, - HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE41, - count); + count, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE41); } #endif @@ -305,14 +305,14 @@ namespace SixLabors.ImageSharp.Tests.Common static void RunTest(string serialized) { TestImpl_BulkConvertNormalizedFloatToByteClampOverflows( - FeatureTestRunner.Deserialize(serialized), + FeatureTestRunner.Deserialize(serialized), (s, d) => SimdUtils.HwIntrinsics.NormalizedFloatToByteSaturate(s.Span, d.Span)); } FeatureTestRunner.RunWithHwIntrinsicsFeature( RunTest, - HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2, - count); + count, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2); } #endif diff --git a/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs index 465bed8a1..b4670cb5d 100644 --- a/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs @@ -535,7 +535,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png static void RunTest(string serialized) { TestImageProvider provider = - FeatureTestRunner.Deserialize>(serialized); + FeatureTestRunner.DeserializeForXunit>(serialized); foreach (PngInterlaceMode interlaceMode in InterlaceMode) { diff --git a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs index fdba9ce98..4720ea78a 100644 --- a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs +++ b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs @@ -29,17 +29,19 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities /// The type to deserialize to. /// The string value to deserialize. /// The value. - public static T Deserialize(string value) + public static T DeserializeForXunit(string value) where T : IXunitSerializable => BasicSerializer.Deserialize(value); /// - /// Allows the deserialization of integers passed to the feature test. + /// Allows the deserialization of types implementing + /// passed to the feature test. /// /// The string value to deserialize. - /// The value. - public static int Deserialize(string value) - => Convert.ToInt32(value); + /// The value. + public static T Deserialize(string value) + where T : IConvertible + => (T)Convert.ChangeType(value, typeof(T)); /// /// Runs the given test within an environment @@ -214,12 +216,13 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities /// where the given features. /// /// The test action to run. - /// The intrinsics features. /// The value to pass as a parameter to the test action. - public static void RunWithHwIntrinsicsFeature( + /// The intrinsics features. + public static void RunWithHwIntrinsicsFeature( Action action, - HwIntrinsics intrinsics, - int serializable) + T serializable, + HwIntrinsics intrinsics) + where T : IConvertible { if (!RemoteExecutor.IsSupported) { diff --git a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs index 646000120..4cbbefe68 100644 --- a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs +++ b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs @@ -183,7 +183,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests static void AssertHwIntrinsicsFeatureDisabled(string serializable) { Assert.NotNull(serializable); - Assert.NotNull(FeatureTestRunner.Deserialize(serializable)); + Assert.NotNull(FeatureTestRunner.DeserializeForXunit(serializable)); #if SUPPORTS_RUNTIME_INTRINSICS Assert.False(Sse.IsSupported); @@ -202,7 +202,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests static void AssertHwIntrinsicsFeatureDisabled(string serializable, string intrinsic) { Assert.NotNull(serializable); - Assert.NotNull(FeatureTestRunner.Deserialize(serializable)); + Assert.NotNull(FeatureTestRunner.DeserializeForXunit(serializable)); switch ((HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic)) { From 99d0a3111d42eb975a7a253c3c0be8c35d1ba125 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 26 Oct 2020 20:04:32 +0000 Subject: [PATCH 077/131] Add 4 channel byte shuffling --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 122 +++++++++++++++++- .../Common/Helpers/SimdUtils.Shuffle.cs | 115 ++++++++++++----- .../Color/Bulk/ShuffleByte4Channel.cs | 68 ++++++++++ .../Common/SimdUtilsTests.Shuffle.cs | 65 +++++++++- 4 files changed, 330 insertions(+), 40 deletions(-) create mode 100644 tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index aea04737d..899ab7130 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -22,8 +22,8 @@ namespace SixLabors.ImageSharp /// Shuffle single-precision (32-bit) floating-point elements in /// using the control and store the results in . /// - /// The source span of floats - /// The destination span of float + /// The source span of floats. + /// The destination span of floats. /// The byte control. [MethodImpl(InliningOptions.ShortMethod)] public static void Shuffle4ChannelReduce( @@ -58,6 +58,46 @@ namespace SixLabors.ImageSharp } } + /// + /// Shuffle 8-bit integers in a within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The byte control. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle4ChannelReduce( + ref ReadOnlySpan source, + ref Span dest, + byte control) + { + if (Avx2.IsSupported || Ssse3.IsSupported) + { + int remainder; + if (Avx.IsSupported) + { + remainder = ImageMaths.ModuloP2(source.Length, Vector256.Count); + } + else + { + remainder = ImageMaths.ModuloP2(source.Length, Vector128.Count); + } + + int adjustedCount = source.Length - remainder; + + if (adjustedCount > 0) + { + Shuffle4Channel( + source.Slice(0, adjustedCount), + dest.Slice(0, adjustedCount), + control); + + source = source.Slice(adjustedCount); + dest = dest.Slice(adjustedCount); + } + } + } + [MethodImpl(InliningOptions.ShortMethod)] private static void Shuffle4Channel( ReadOnlySpan source, @@ -98,6 +138,84 @@ namespace SixLabors.ImageSharp } } + [MethodImpl(InliningOptions.ShortMethod)] + private static void Shuffle4Channel( + ReadOnlySpan source, + Span dest, + byte control) + { + if (Avx2.IsSupported) + { + int n = dest.Length / Vector256.Count; + + Vector256 vcm; + switch (control) + { + case Shuffle.WXYZ: + vcm = Unsafe.As>(ref MemoryMarshal.GetReference(Shuffle.WXYZ_256)); + break; + case Shuffle.XYZW: + vcm = Unsafe.As>(ref MemoryMarshal.GetReference(Shuffle.XYZW_256)); + break; + case Shuffle.ZYXW: + vcm = Unsafe.As>(ref MemoryMarshal.GetReference(Shuffle.ZYXW_256)); + break; + default: + Span bytes = stackalloc byte[Vector256.Count]; + Shuffle.MmShuffleSpan(ref bytes, control); + vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + break; + } + + ref Vector256 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector256 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + for (int i = 0; i < n; i++) + { + Unsafe.Add(ref destBase, i) = Avx2.Shuffle(Unsafe.Add(ref sourceBase, i), vcm); + } + } + else + { + // Ssse3 + int n = dest.Length / Vector128.Count; + + Vector128 vcm; + switch (control) + { + case Shuffle.WXYZ: + vcm = Unsafe.As>(ref MemoryMarshal.GetReference(Shuffle.WXYZ_128)); + break; + case Shuffle.XYZW: + vcm = Unsafe.As>(ref MemoryMarshal.GetReference(Shuffle.XYZW_128)); + break; + case Shuffle.ZYXW: + vcm = Unsafe.As>(ref MemoryMarshal.GetReference(Shuffle.ZYXW_128)); + break; + default: + Span bytes = stackalloc byte[Vector128.Count]; + Shuffle.MmShuffleSpan(ref bytes, control); + vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + break; + } + + ref Vector128 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector128 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + for (int i = 0; i < n; i++) + { + Vector128 vs = Unsafe.Add(ref sourceBase, i); + Unsafe.Add(ref destBase, i) = Ssse3.Shuffle(vs, vcm); + } + } + } + /// /// Performs a multiplication and an addition of the . /// diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index fe7cbb72a..76746e4d2 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -14,8 +14,8 @@ namespace SixLabors.ImageSharp /// Shuffle single-precision (32-bit) floating-point elements in /// using the control and store the results in . /// - /// The source span of floats - /// The destination span of float + /// The source span of floats. + /// The destination span of floats. /// The byte control. [MethodImpl(InliningOptions.ShortMethod)] public static void Shuffle4Channel( @@ -38,14 +38,43 @@ namespace SixLabors.ImageSharp } } + /// + /// Shuffle 8-bit integers in a within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The byte control. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle4Channel( + ReadOnlySpan source, + Span dest, + byte control) + { + VerifyShuffleSpanInput(source, dest); + + // TODO: There doesn't seem to be any APIs for + // System.Numerics that allow shuffling. +#if SUPPORTS_RUNTIME_INTRINSICS + HwIntrinsics.Shuffle4ChannelReduce(ref source, ref dest, control); +#endif + + // Deal with the remainder: + if (source.Length > 0) + { + ShuffleRemainder4Channel(source, dest, control); + } + } + [MethodImpl(InliningOptions.ColdPath)] - public static void ShuffleRemainder4Channel( - ReadOnlySpan source, - Span dest, + public static void ShuffleRemainder4Channel( + ReadOnlySpan source, + Span dest, byte control) + where T : struct { - ref float sBase = ref MemoryMarshal.GetReference(source); - ref float dBase = ref MemoryMarshal.GetReference(dest); + ref T sBase = ref MemoryMarshal.GetReference(source); + ref T dBase = ref MemoryMarshal.GetReference(dest); Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0); for (int i = 0; i < source.Length; i += 4) @@ -58,7 +87,8 @@ namespace SixLabors.ImageSharp } [Conditional("DEBUG")] - private static void VerifyShuffleSpanInput(ReadOnlySpan source, Span dest) + private static void VerifyShuffleSpanInput(ReadOnlySpan source, Span dest) + where T : struct { DebugGuard.IsTrue( source.Length == dest.Length, @@ -77,49 +107,64 @@ namespace SixLabors.ImageSharp public const byte XYZW = (3 << 6) | (2 << 4) | (1 << 2) | 0; public const byte ZYXW = (3 << 6) | (0 << 4) | (1 << 2) | 2; - public static ReadOnlySpan WXYZ_128 => MmShuffleByte128(2, 1, 0, 3); + public static ReadOnlySpan WXYZ_128 => MmShuffleSpan128(WXYZ); - public static ReadOnlySpan XYZW_128 => MmShuffleByte128(3, 2, 1, 0); + public static ReadOnlySpan XYZW_128 => MmShuffleSpan128(XYZW); - public static ReadOnlySpan ZYXW_128 => MmShuffleByte128(3, 0, 1, 2); + public static ReadOnlySpan ZYXW_128 => MmShuffleSpan128(ZYXW); - public static ReadOnlySpan WXYZ_256 => MmShuffleByte256(2, 1, 0, 3); + public static ReadOnlySpan WXYZ_256 => MmShuffleSpan256(WXYZ); - public static ReadOnlySpan XYZW_256 => MmShuffleByte256(3, 2, 1, 0); + public static ReadOnlySpan XYZW_256 => MmShuffleSpan256(XYZW); - public static ReadOnlySpan ZYXW_256 => MmShuffleByte256(3, 0, 1, 2); + public static ReadOnlySpan ZYXW_256 => MmShuffleSpan256(ZYXW); - private static byte[] MmShuffleByte128(int p3, int p2, int p1, int p0) + private static ReadOnlySpan MmShuffleSpan128(byte control) { - byte[] result = new byte[16]; - - for (int i = 0; i < result.Length; i += 4) - { - result[i] = (byte)(p0 + i); - result[i + 1] = (byte)(p1 + i); - result[i + 2] = (byte)(p2 + i); - result[i + 3] = (byte)(p3 + i); - } + Span buffer = new byte[16]; + MmShuffleSpan(ref buffer, control); + return buffer; + } - return result; + private static ReadOnlySpan MmShuffleSpan256(byte control) + { + Span buffer = new byte[32]; + MmShuffleSpan(ref buffer, control); + return buffer; } - private static byte[] MmShuffleByte256(int p3, int p2, int p1, int p0) + [MethodImpl(InliningOptions.ShortMethod)] + public static byte MmShuffle(int p3, int p2, int p1, int p0) + => (byte)((p3 << 6) | (p2 << 4) | (p1 << 2) | p0); + + [MethodImpl(InliningOptions.ShortMethod)] + public static void MmShuffleSpan(ref Span span, byte control) { - byte[] result = new byte[32]; + InverseMmShuffle( + control, + out int p3, + out int p2, + out int p1, + out int p0); - for (int i = 0; i < result.Length; i += 4) + ref byte spanBase = ref MemoryMarshal.GetReference(span); + + for (int i = 0; i < span.Length; i += 4) { - result[i] = (byte)(p0 + i); - result[i + 1] = (byte)(p1 + i); - result[i + 2] = (byte)(p2 + i); - result[i + 3] = (byte)(p3 + i); + Unsafe.Add(ref spanBase, i) = (byte)(p0 + i); + Unsafe.Add(ref spanBase, i + 1) = (byte)(p1 + i); + Unsafe.Add(ref spanBase, i + 2) = (byte)(p2 + i); + Unsafe.Add(ref spanBase, i + 3) = (byte)(p3 + i); } - - return result; } - public static void InverseMmShuffle(byte control, out int p3, out int p2, out int p1, out int p0) + [MethodImpl(InliningOptions.ShortMethod)] + public static void InverseMmShuffle( + byte control, + out int p3, + out int p2, + out int p1, + out int p0) { p3 = control >> 6 & 0x3; p2 = control >> 4 & 0x3; diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs new file mode 100644 index 000000000..baef86099 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs @@ -0,0 +1,68 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using BenchmarkDotNet.Attributes; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class ShuffleByte4Channel + { + private byte[] source; + private byte[] destination; + + [GlobalSetup] + public void Setup() + { + this.source = new byte[this.Count]; + new Random(this.Count).NextBytes(this.source); + this.destination = new byte[this.Count]; + } + + [Params(128, 256, 512, 1024, 2048)] + public int Count { get; set; } + + [Benchmark] + public void Shuffle4Channel() + { + SimdUtils.Shuffle4Channel(this.source, this.destination, SimdUtils.Shuffle.WXYZ); + } + } + + // 2020-10-26 + // ########## + // + // BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1) + // Intel Core i7-8650U CPU 1.90GHz(Kaby Lake R), 1 CPU, 8 logical and 4 physical cores + // .NET Core SDK = 5.0.100-rc.2.20479.15 + // + // [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // + // Runtime=.NET Core 3.1 + // + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |---------------- |---------------- |-------------------------------------------------- |------ |----------:|----------:|----------:|------:|--------:|-------:|------:|------:|----------:| + // | Shuffle4Channel | AVX | Empty | 128 | 33.57 ns | 0.694 ns | 1.268 ns | 1.00 | 0.00 | 0.0134 | - | - | 56 B | + // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 63.97 ns | 0.940 ns | 1.045 ns | 1.94 | 0.10 | - | - | - | - | + // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 128 | 27.23 ns | 0.338 ns | 0.300 ns | 0.84 | 0.04 | 0.0095 | - | - | 40 B | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | AVX | Empty | 256 | 34.57 ns | 0.295 ns | 0.276 ns | 1.00 | 0.00 | 0.0134 | - | - | 56 B | + // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 124.62 ns | 0.257 ns | 0.228 ns | 3.60 | 0.03 | - | - | - | - | + // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 256 | 32.22 ns | 0.106 ns | 0.099 ns | 0.93 | 0.01 | 0.0095 | - | - | 40 B | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | AVX | Empty | 512 | 40.41 ns | 0.826 ns | 0.848 ns | 1.00 | 0.00 | 0.0134 | - | - | 56 B | + // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 251.65 ns | 0.440 ns | 0.412 ns | 6.23 | 0.13 | - | - | - | - | + // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 512 | 41.54 ns | 0.128 ns | 0.114 ns | 1.03 | 0.02 | 0.0095 | - | - | 40 B | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | AVX | Empty | 1024 | 51.54 ns | 0.156 ns | 0.121 ns | 1.00 | 0.00 | 0.0134 | - | - | 56 B | + // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 493.66 ns | 1.316 ns | 1.231 ns | 9.58 | 0.04 | - | - | - | - | + // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 1024 | 61.45 ns | 0.216 ns | 0.181 ns | 1.19 | 0.00 | 0.0095 | - | - | 40 B | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | AVX | Empty | 2048 | 76.85 ns | 0.176 ns | 0.138 ns | 1.00 | 0.00 | 0.0134 | - | - | 56 B | + // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 985.64 ns | 11.396 ns | 10.103 ns | 12.84 | 0.15 | - | - | - | - | + // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 2048 | 106.13 ns | 0.335 ns | 0.297 ns | 1.38 | 0.01 | 0.0095 | - | - | 40 B | +} diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs index 04aab18e4..e07bcf257 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -14,7 +14,10 @@ namespace SixLabors.ImageSharp.Tests.Common { SimdUtils.Shuffle.WXYZ, SimdUtils.Shuffle.XYZW, - SimdUtils.Shuffle.ZYXW + SimdUtils.Shuffle.ZYXW, + SimdUtils.Shuffle.MmShuffle(2, 1, 3, 0), + SimdUtils.Shuffle.MmShuffle(1, 1, 1, 1), + SimdUtils.Shuffle.MmShuffle(3, 3, 3, 3) }; [Theory] @@ -28,7 +31,7 @@ namespace SixLabors.ImageSharp.Tests.Common { foreach (var count in item) { - TestShuffle( + TestShuffleFloat4Channel( (int)count, (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, ctrl), ctrl); @@ -42,7 +45,32 @@ namespace SixLabors.ImageSharp.Tests.Common HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE); } - private static void TestShuffle( + [Theory] + [MemberData(nameof(ShuffleControls))] + public void BulkShuffleByte4Channel(byte control) + { + static void RunTest(string serialized) + { + byte ctrl = FeatureTestRunner.Deserialize(serialized); + foreach (var item in ArraySizesDivisibleBy4) + { + foreach (var count in item) + { + TestShuffleByte4Channel( + (int)count, + (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, ctrl), + ctrl); + } + } + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + control, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE); + } + + private static void TestShuffleFloat4Channel( int count, Action, Memory> convert, byte control) @@ -71,5 +99,36 @@ namespace SixLabors.ImageSharp.Tests.Common Assert.Equal(expected, result, new ApproximateFloatComparer(1e-5F)); } + + private static void TestShuffleByte4Channel( + int count, + Action, Memory> convert, + byte control) + { + byte[] source = new byte[count]; + new Random(count).NextBytes(source); + var result = new byte[count]; + + byte[] expected = new byte[count]; + + SimdUtils.Shuffle.InverseMmShuffle( + control, + out int p3, + out int p2, + out int p1, + out int p0); + + for (int i = 0; i < expected.Length; i += 4) + { + expected[i] = source[p0 + i]; + expected[i + 1] = source[p1 + i]; + expected[i + 2] = source[p2 + i]; + expected[i + 3] = source[p3 + i]; + } + + convert(source, result); + + Assert.Equal(expected, result); + } } } From 34963a7f7a40a1950376672e09deba5e794eeb7a Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 26 Oct 2020 22:13:21 +0000 Subject: [PATCH 078/131] Don't use static spans for now. --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 45 ++++--------------- .../Common/Helpers/SimdUtils.Shuffle.cs | 26 ----------- .../Color/Bulk/ShuffleByte4Channel.cs | 42 ++++++++--------- 3 files changed, 30 insertions(+), 83 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 899ab7130..d68e16e23 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -148,24 +148,12 @@ namespace SixLabors.ImageSharp { int n = dest.Length / Vector256.Count; - Vector256 vcm; - switch (control) - { - case Shuffle.WXYZ: - vcm = Unsafe.As>(ref MemoryMarshal.GetReference(Shuffle.WXYZ_256)); - break; - case Shuffle.XYZW: - vcm = Unsafe.As>(ref MemoryMarshal.GetReference(Shuffle.XYZW_256)); - break; - case Shuffle.ZYXW: - vcm = Unsafe.As>(ref MemoryMarshal.GetReference(Shuffle.ZYXW_256)); - break; - default: - Span bytes = stackalloc byte[Vector256.Count]; - Shuffle.MmShuffleSpan(ref bytes, control); - vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); - break; - } + // I've chosen to do this for convenience while we determine what + // shuffle controls to add to the library. + // We can add static ROS instances if need be in the future. + Span bytes = stackalloc byte[Vector256.Count]; + Shuffle.MmShuffleSpan(ref bytes, control); + Vector256 vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); @@ -183,24 +171,9 @@ namespace SixLabors.ImageSharp // Ssse3 int n = dest.Length / Vector128.Count; - Vector128 vcm; - switch (control) - { - case Shuffle.WXYZ: - vcm = Unsafe.As>(ref MemoryMarshal.GetReference(Shuffle.WXYZ_128)); - break; - case Shuffle.XYZW: - vcm = Unsafe.As>(ref MemoryMarshal.GetReference(Shuffle.XYZW_128)); - break; - case Shuffle.ZYXW: - vcm = Unsafe.As>(ref MemoryMarshal.GetReference(Shuffle.ZYXW_128)); - break; - default: - Span bytes = stackalloc byte[Vector128.Count]; - Shuffle.MmShuffleSpan(ref bytes, control); - vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); - break; - } + Span bytes = stackalloc byte[Vector128.Count]; + Shuffle.MmShuffleSpan(ref bytes, control); + Vector128 vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); ref Vector128 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index 76746e4d2..6b766b88d 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -107,32 +107,6 @@ namespace SixLabors.ImageSharp public const byte XYZW = (3 << 6) | (2 << 4) | (1 << 2) | 0; public const byte ZYXW = (3 << 6) | (0 << 4) | (1 << 2) | 2; - public static ReadOnlySpan WXYZ_128 => MmShuffleSpan128(WXYZ); - - public static ReadOnlySpan XYZW_128 => MmShuffleSpan128(XYZW); - - public static ReadOnlySpan ZYXW_128 => MmShuffleSpan128(ZYXW); - - public static ReadOnlySpan WXYZ_256 => MmShuffleSpan256(WXYZ); - - public static ReadOnlySpan XYZW_256 => MmShuffleSpan256(XYZW); - - public static ReadOnlySpan ZYXW_256 => MmShuffleSpan256(ZYXW); - - private static ReadOnlySpan MmShuffleSpan128(byte control) - { - Span buffer = new byte[16]; - MmShuffleSpan(ref buffer, control); - return buffer; - } - - private static ReadOnlySpan MmShuffleSpan256(byte control) - { - Span buffer = new byte[32]; - MmShuffleSpan(ref buffer, control); - return buffer; - } - [MethodImpl(InliningOptions.ShortMethod)] public static byte MmShuffle(int p3, int p2, int p1, int p0) => (byte)((p3 << 6) | (p2 << 4) | (p1 << 2) | p0); diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs index baef86099..c45b103e3 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs @@ -44,25 +44,25 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk // // Runtime=.NET Core 3.1 // - // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | - // |---------------- |---------------- |-------------------------------------------------- |------ |----------:|----------:|----------:|------:|--------:|-------:|------:|------:|----------:| - // | Shuffle4Channel | AVX | Empty | 128 | 33.57 ns | 0.694 ns | 1.268 ns | 1.00 | 0.00 | 0.0134 | - | - | 56 B | - // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 63.97 ns | 0.940 ns | 1.045 ns | 1.94 | 0.10 | - | - | - | - | - // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 128 | 27.23 ns | 0.338 ns | 0.300 ns | 0.84 | 0.04 | 0.0095 | - | - | 40 B | - // | | | | | | | | | | | | | | - // | Shuffle4Channel | AVX | Empty | 256 | 34.57 ns | 0.295 ns | 0.276 ns | 1.00 | 0.00 | 0.0134 | - | - | 56 B | - // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 124.62 ns | 0.257 ns | 0.228 ns | 3.60 | 0.03 | - | - | - | - | - // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 256 | 32.22 ns | 0.106 ns | 0.099 ns | 0.93 | 0.01 | 0.0095 | - | - | 40 B | - // | | | | | | | | | | | | | | - // | Shuffle4Channel | AVX | Empty | 512 | 40.41 ns | 0.826 ns | 0.848 ns | 1.00 | 0.00 | 0.0134 | - | - | 56 B | - // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 251.65 ns | 0.440 ns | 0.412 ns | 6.23 | 0.13 | - | - | - | - | - // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 512 | 41.54 ns | 0.128 ns | 0.114 ns | 1.03 | 0.02 | 0.0095 | - | - | 40 B | - // | | | | | | | | | | | | | | - // | Shuffle4Channel | AVX | Empty | 1024 | 51.54 ns | 0.156 ns | 0.121 ns | 1.00 | 0.00 | 0.0134 | - | - | 56 B | - // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 493.66 ns | 1.316 ns | 1.231 ns | 9.58 | 0.04 | - | - | - | - | - // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 1024 | 61.45 ns | 0.216 ns | 0.181 ns | 1.19 | 0.00 | 0.0095 | - | - | 40 B | - // | | | | | | | | | | | | | | - // | Shuffle4Channel | AVX | Empty | 2048 | 76.85 ns | 0.176 ns | 0.138 ns | 1.00 | 0.00 | 0.0134 | - | - | 56 B | - // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 985.64 ns | 11.396 ns | 10.103 ns | 12.84 | 0.15 | - | - | - | - | - // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 2048 | 106.13 ns | 0.335 ns | 0.297 ns | 1.38 | 0.01 | 0.0095 | - | - | 40 B | + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |---------------- |---------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:| + // | Shuffle4Channel | AVX | Empty | 128 | 20.51 ns | 0.270 ns | 0.211 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 63.00 ns | 0.991 ns | 0.927 ns | 3.08 | 0.06 | - | - | - | - | + // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 128 | 17.25 ns | 0.066 ns | 0.058 ns | 0.84 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | AVX | Empty | 256 | 24.57 ns | 0.248 ns | 0.219 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 124.55 ns | 2.501 ns | 2.456 ns | 5.06 | 0.10 | - | - | - | - | + // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 256 | 21.80 ns | 0.094 ns | 0.088 ns | 0.89 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | AVX | Empty | 512 | 28.51 ns | 0.130 ns | 0.115 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 256.52 ns | 1.424 ns | 1.332 ns | 9.00 | 0.07 | - | - | - | - | + // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 512 | 29.72 ns | 0.217 ns | 0.203 ns | 1.04 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | AVX | Empty | 1024 | 36.40 ns | 0.357 ns | 0.334 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 492.71 ns | 1.498 ns | 1.251 ns | 13.52 | 0.12 | - | - | - | - | + // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 1024 | 44.71 ns | 0.264 ns | 0.234 ns | 1.23 | 0.02 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | AVX | Empty | 2048 | 59.38 ns | 0.180 ns | 0.159 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 975.05 ns | 2.043 ns | 1.811 ns | 16.42 | 0.05 | - | - | - | - | + // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 2048 | 81.83 ns | 0.212 ns | 0.198 ns | 1.38 | 0.01 | - | - | - | - | } From 84a1d1a28bbc5fff9da861b129ddbcccadb33b8b Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Tue, 27 Oct 2020 19:32:32 +0000 Subject: [PATCH 079/131] Add optimized fallback for existing shuffles. --- .../Common/Helpers/SimdUtils.Shuffle.cs | 133 +++++++++++++++++- .../Common/SimdUtilsTests.Shuffle.cs | 2 + 2 files changed, 129 insertions(+), 6 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index 6b766b88d..4d2678320 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -2,6 +2,7 @@ // Licensed under the Apache License, Version 2.0. using System; +using System.Buffers.Binary; using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -67,14 +68,53 @@ namespace SixLabors.ImageSharp } [MethodImpl(InliningOptions.ColdPath)] - public static void ShuffleRemainder4Channel( - ReadOnlySpan source, - Span dest, + public static void ShuffleRemainder4Channel( + ReadOnlySpan source, + Span dest, + byte control) + { + ref float sBase = ref MemoryMarshal.GetReference(source); + ref float dBase = ref MemoryMarshal.GetReference(dest); + Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0); + + for (int i = 0; i < source.Length; i += 4) + { + Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i); + Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i); + Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i); + Unsafe.Add(ref dBase, i + 3) = Unsafe.Add(ref sBase, p3 + i); + } + } + + [MethodImpl(InliningOptions.ColdPath)] + public static void ShuffleRemainder4Channel( + ReadOnlySpan source, + Span dest, byte control) - where T : struct { - ref T sBase = ref MemoryMarshal.GetReference(source); - ref T dBase = ref MemoryMarshal.GetReference(dest); +#if NETCOREAPP + // The JIT can detect and optimize rotation idioms ROTL (Rotate Left) + // and ROTR (Rotate Right) emitting efficient CPU instructions: + // https://github.com/dotnet/coreclr/pull/1830 + switch (control) + { + case Shuffle.WXYZ: + WXYZ(source, dest); + return; + case Shuffle.WZYX: + WZYX(source, dest); + return; + case Shuffle.YZWX: + YZWX(source, dest); + return; + case Shuffle.ZYXW: + ZYXW(source, dest); + return; + } +#endif + + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0); for (int i = 0; i < source.Length; i += 4) @@ -86,6 +126,85 @@ namespace SixLabors.ImageSharp } } + [MethodImpl(InliningOptions.ShortMethod)] + private static void WXYZ(ReadOnlySpan source, Span dest) + { + ReadOnlySpan s = MemoryMarshal.Cast(source); + Span d = MemoryMarshal.Cast(dest); + ref uint sBase = ref MemoryMarshal.GetReference(s); + ref uint dBase = ref MemoryMarshal.GetReference(d); + + for (int i = 0; i < s.Length; i++) + { + uint packed = Unsafe.Add(ref sBase, i); + + // packed = [W Z Y X] + // ROTL(8, packed) = [Z Y X W] + Unsafe.Add(ref dBase, i) = (packed << 8) | (packed >> 24); + } + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void ZYXW(ReadOnlySpan source, Span dest) + { + ReadOnlySpan s = MemoryMarshal.Cast(source); + Span d = MemoryMarshal.Cast(dest); + ref uint sBase = ref MemoryMarshal.GetReference(s); + ref uint dBase = ref MemoryMarshal.GetReference(d); + + for (int i = 0; i < s.Length; i++) + { + uint packed = Unsafe.Add(ref sBase, i); + + // packed = [W Z Y X] + // tmp1 = [W 0 Y 0] + // tmp2 = [0 Z 0 X] + // tmp3=ROTL(16, tmp2) = [0 X 0 Z] + // tmp1 + tmp3 = [W X Y Z] + uint tmp1 = packed & 0xFF00FF00; + uint tmp2 = packed & 0x00FF00FF; + uint tmp3 = (tmp2 << 16) | (tmp2 >> 16); + + Unsafe.Add(ref dBase, i) = tmp1 + tmp3; + } + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void WZYX(ReadOnlySpan source, Span dest) + { + ReadOnlySpan s = MemoryMarshal.Cast(source); + Span d = MemoryMarshal.Cast(dest); + ref uint sBase = ref MemoryMarshal.GetReference(s); + ref uint dBase = ref MemoryMarshal.GetReference(d); + + for (int i = 0; i < s.Length; i++) + { + uint packed = Unsafe.Add(ref sBase, i); + + // packed = [W Z Y X] + // REVERSE(packedArgb) = [X Y Z W] + Unsafe.Add(ref dBase, i) = BinaryPrimitives.ReverseEndianness(packed); + } + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void YZWX(ReadOnlySpan source, Span dest) + { + ReadOnlySpan s = MemoryMarshal.Cast(source); + Span d = MemoryMarshal.Cast(dest); + ref uint sBase = ref MemoryMarshal.GetReference(s); + ref uint dBase = ref MemoryMarshal.GetReference(d); + + for (int i = 0; i < s.Length; i++) + { + uint packed = Unsafe.Add(ref sBase, i); + + // packed = [W Z Y X] + // ROTR(8, packedArgb) = [Y Z W X] + Unsafe.Add(ref dBase, i) = (packed >> 8) | (packed << 24); + } + } + [Conditional("DEBUG")] private static void VerifyShuffleSpanInput(ReadOnlySpan source, Span dest) where T : struct @@ -104,7 +223,9 @@ namespace SixLabors.ImageSharp public static class Shuffle { public const byte WXYZ = (2 << 6) | (1 << 4) | (0 << 2) | 3; + public const byte WZYX = (0 << 6) | (1 << 4) | (2 << 2) | 3; public const byte XYZW = (3 << 6) | (2 << 4) | (1 << 2) | 0; + public const byte YZWX = (0 << 6) | (3 << 4) | (2 << 2) | 1; public const byte ZYXW = (3 << 6) | (0 << 4) | (1 << 2) | 2; [MethodImpl(InliningOptions.ShortMethod)] diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs index e07bcf257..cdb6a86df 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -13,7 +13,9 @@ namespace SixLabors.ImageSharp.Tests.Common new TheoryData { SimdUtils.Shuffle.WXYZ, + SimdUtils.Shuffle.WZYX, SimdUtils.Shuffle.XYZW, + SimdUtils.Shuffle.YZWX, SimdUtils.Shuffle.ZYXW, SimdUtils.Shuffle.MmShuffle(2, 1, 3, 0), SimdUtils.Shuffle.MmShuffle(1, 1, 1, 1), From 956d1a3c77c2a898f67dac2cccc04f526aa17f40 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 28 Oct 2020 00:35:10 +0000 Subject: [PATCH 080/131] Unroll loops --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 105 +++++++++++++++--- .../Common/Helpers/SimdUtils.Shuffle.cs | 2 +- 2 files changed, 88 insertions(+), 19 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index d68e16e23..0ea17c770 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -106,34 +106,72 @@ namespace SixLabors.ImageSharp { if (Avx.IsSupported) { - int n = dest.Length / Vector256.Count; - ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref Vector256 destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); - for (int i = 0; i < n; i++) + int n = dest.Length / Vector256.Count; + int m = ImageMaths.Modulo4(n); + int u = n - m; + + for (int i = 0; i < u; i += 4) + { + ref Vector256 vd0 = ref Unsafe.Add(ref destBase, i); + ref Vector256 vs0 = ref Unsafe.Add(ref sourceBase, i); + + vd0 = Avx.Permute(vs0, control); + Unsafe.Add(ref vd0, 1) = Avx.Permute(Unsafe.Add(ref vs0, 1), control); + Unsafe.Add(ref vd0, 2) = Avx.Permute(Unsafe.Add(ref vs0, 2), control); + Unsafe.Add(ref vd0, 3) = Avx.Permute(Unsafe.Add(ref vs0, 3), control); + } + + if (m > 0) { - Unsafe.Add(ref destBase, i) = Avx.Permute(Unsafe.Add(ref sourceBase, i), control); + for (int i = u; i < n; i++) + { + Unsafe.Add(ref destBase, i) = Avx.Permute(Unsafe.Add(ref sourceBase, i), control); + } } } else { // Sse - int n = dest.Length / Vector128.Count; - ref Vector128 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref Vector128 destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); - for (int i = 0; i < n; i++) + int n = dest.Length / Vector128.Count; + int m = ImageMaths.Modulo4(n); + int u = n - m; + + for (int i = 0; i < u; i += 4) { - Vector128 vs = Unsafe.Add(ref sourceBase, i); - Unsafe.Add(ref destBase, i) = Sse.Shuffle(vs, vs, control); + ref Vector128 vd0 = ref Unsafe.Add(ref destBase, i); + ref Vector128 vs0 = ref Unsafe.Add(ref sourceBase, i); + + vd0 = Sse.Shuffle(vs0, vs0, control); + + Vector128 vs1 = Unsafe.Add(ref vs0, 1); + Unsafe.Add(ref vd0, 1) = Sse.Shuffle(vs1, vs1, control); + + Vector128 vs2 = Unsafe.Add(ref vs0, 2); + Unsafe.Add(ref vd0, 2) = Sse.Shuffle(vs2, vs2, control); + + Vector128 vs3 = Unsafe.Add(ref vs0, 3); + Unsafe.Add(ref vd0, 3) = Sse.Shuffle(vs3, vs3, control); + } + + if (m > 0) + { + for (int i = u; i < n; i++) + { + Vector128 vs = Unsafe.Add(ref sourceBase, i); + Unsafe.Add(ref destBase, i) = Sse.Shuffle(vs, vs, control); + } } } } @@ -146,8 +184,6 @@ namespace SixLabors.ImageSharp { if (Avx2.IsSupported) { - int n = dest.Length / Vector256.Count; - // I've chosen to do this for convenience while we determine what // shuffle controls to add to the library. // We can add static ROS instances if need be in the future. @@ -161,16 +197,32 @@ namespace SixLabors.ImageSharp ref Vector256 destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); - for (int i = 0; i < n; i++) + int n = dest.Length / Vector256.Count; + int m = ImageMaths.Modulo4(n); + int u = n - m; + + for (int i = 0; i < u; i += 4) { - Unsafe.Add(ref destBase, i) = Avx2.Shuffle(Unsafe.Add(ref sourceBase, i), vcm); + ref Vector256 vs0 = ref Unsafe.Add(ref sourceBase, i); + ref Vector256 vd0 = ref Unsafe.Add(ref destBase, i); + + vd0 = Avx2.Shuffle(vs0, vcm); + Unsafe.Add(ref vd0, 1) = Avx2.Shuffle(Unsafe.Add(ref vs0, 1), vcm); + Unsafe.Add(ref vd0, 2) = Avx2.Shuffle(Unsafe.Add(ref vs0, 2), vcm); + Unsafe.Add(ref vd0, 3) = Avx2.Shuffle(Unsafe.Add(ref vs0, 3), vcm); + } + + if (m > 0) + { + for (int i = u; i < n; i++) + { + Unsafe.Add(ref destBase, i) = Avx2.Shuffle(Unsafe.Add(ref sourceBase, i), vcm); + } } } else { // Ssse3 - int n = dest.Length / Vector128.Count; - Span bytes = stackalloc byte[Vector128.Count]; Shuffle.MmShuffleSpan(ref bytes, control); Vector128 vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); @@ -181,10 +233,27 @@ namespace SixLabors.ImageSharp ref Vector128 destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); - for (int i = 0; i < n; i++) + int n = dest.Length / Vector128.Count; + int m = ImageMaths.Modulo4(n); + int u = n - m; + + for (int i = 0; i < u; i += 4) { - Vector128 vs = Unsafe.Add(ref sourceBase, i); - Unsafe.Add(ref destBase, i) = Ssse3.Shuffle(vs, vcm); + ref Vector128 vs0 = ref Unsafe.Add(ref sourceBase, i); + ref Vector128 vd0 = ref Unsafe.Add(ref destBase, i); + + vd0 = Ssse3.Shuffle(vs0, vcm); + Unsafe.Add(ref vd0, 1) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 1), vcm); + Unsafe.Add(ref vd0, 2) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 2), vcm); + Unsafe.Add(ref vd0, 3) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 3), vcm); + } + + if (m > 0) + { + for (int i = u; i < n; i++) + { + Unsafe.Add(ref destBase, i) = Ssse3.Shuffle(Unsafe.Add(ref sourceBase, i), vcm); + } } } } diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index 4d2678320..59b625419 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -229,7 +229,7 @@ namespace SixLabors.ImageSharp public const byte ZYXW = (3 << 6) | (0 << 4) | (1 << 2) | 2; [MethodImpl(InliningOptions.ShortMethod)] - public static byte MmShuffle(int p3, int p2, int p1, int p0) + public static byte MmShuffle(byte p3, byte p2, byte p1, byte p0) => (byte)((p3 << 6) | (p2 << 4) | (p1 << 2) | p0); [MethodImpl(InliningOptions.ShortMethod)] From 28dc056d831adcec0d47a332bf638abf5f3ff1f3 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 28 Oct 2020 01:08:53 +0000 Subject: [PATCH 081/131] Fix coverage --- src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs | 2 +- tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 0ea17c770..367df03ec 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -74,7 +74,7 @@ namespace SixLabors.ImageSharp if (Avx2.IsSupported || Ssse3.IsSupported) { int remainder; - if (Avx.IsSupported) + if (Avx2.IsSupported) { remainder = ImageMaths.ModuloP2(source.Length, Vector256.Count); } diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs index cdb6a86df..94298f94c 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -44,7 +44,7 @@ namespace SixLabors.ImageSharp.Tests.Common FeatureTestRunner.RunWithHwIntrinsicsFeature( RunTest, control, - HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE); + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE); } [Theory] From 0f950a1e508b1db74cd8f757e164722ac1e0796a Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 28 Oct 2020 19:33:49 +0000 Subject: [PATCH 082/131] Implement new optimized 4 channel shuffle methods. --- .../Argb32.PixelOperations.Generated.cs | 64 ++++++------- .../Bgra32.PixelOperations.Generated.cs | 64 ++++++------- .../Rgba32.PixelOperations.Generated.cs | 64 ++++++------- .../Generated/_Common.ttinclude | 32 +++---- .../PixelFormats/Utils/PixelConverter.cs | 89 +++++++------------ .../PixelConversion_ConvertFromRgba32.cs | 56 ++++-------- ...ConverterTests.ReferenceImplementations.cs | 60 ++++++++----- .../PixelFormats/PixelConverterTests.cs | 67 +++++++------- 8 files changed, 216 insertions(+), 280 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Argb32.PixelOperations.Generated.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Argb32.PixelOperations.Generated.cs index 0b1292b64..3f48d2acc 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Argb32.PixelOperations.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Argb32.PixelOperations.Generated.cs @@ -53,66 +53,58 @@ namespace SixLabors.ImageSharp.PixelFormats Vector4Converters.RgbaCompatible.ToVector4(configuration, this, sourcePixels, destVectors, modifiers.Remove(PixelConversionModifiers.Scale)); } /// - public override void ToRgba32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromArgb32.ToRgba32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToRgba32(source, dest); } /// - public override void FromRgba32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromRgba32.ToArgb32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToArgb32(source, dest); } /// - public override void ToBgra32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromArgb32.ToBgra32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToBgra32(source, dest); } /// - public override void FromBgra32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromBgra32.ToArgb32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToArgb32(source, dest); } /// diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgra32.PixelOperations.Generated.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgra32.PixelOperations.Generated.cs index 5bdd10404..8cf2d5850 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgra32.PixelOperations.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgra32.PixelOperations.Generated.cs @@ -53,66 +53,58 @@ namespace SixLabors.ImageSharp.PixelFormats Vector4Converters.RgbaCompatible.ToVector4(configuration, this, sourcePixels, destVectors, modifiers.Remove(PixelConversionModifiers.Scale)); } /// - public override void ToRgba32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromBgra32.ToRgba32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToRgba32(source, dest); } /// - public override void FromRgba32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromRgba32.ToBgra32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToBgra32(source, dest); } /// - public override void ToArgb32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromBgra32.ToArgb32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToArgb32(source, dest); } /// - public override void FromArgb32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromArgb32.ToBgra32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToBgra32(source, dest); } /// diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgba32.PixelOperations.Generated.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgba32.PixelOperations.Generated.cs index b05c62f1f..9a36ec29a 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgba32.PixelOperations.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgba32.PixelOperations.Generated.cs @@ -42,66 +42,58 @@ namespace SixLabors.ImageSharp.PixelFormats } /// - public override void ToArgb32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromRgba32.ToArgb32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToArgb32(source, dest); } /// - public override void FromArgb32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromArgb32.ToRgba32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToRgba32(source, dest); } /// - public override void ToBgra32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromRgba32.ToBgra32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToBgra32(source, dest); } /// - public override void FromBgra32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromBgra32.ToRgba32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToRgba32(source, dest); } /// diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/_Common.ttinclude b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/_Common.ttinclude index 5d56731ba..d8b5286cd 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/_Common.ttinclude +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/_Common.ttinclude @@ -88,35 +88,31 @@ using System.Runtime.InteropServices; { #> /// - public override void To<#=otherPixelType#>(Configuration configuration, ReadOnlySpan<<#=thisPixelType#>> sourcePixels, Span<<#=otherPixelType#>> destinationPixels) + public override void To<#=otherPixelType#>( + Configuration configuration, + ReadOnlySpan<<#=thisPixelType#>> sourcePixels, + Span<<#=otherPixelType#>> destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As<<#=thisPixelType#>,uint>(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As<<#=otherPixelType#>, uint>(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.From<#=thisPixelType#>.To<#=otherPixelType#>(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast<<#=thisPixelType#>, byte>(sourcePixels); + Span dest = MemoryMarshal.Cast<<#=otherPixelType#>, byte>(destinationPixels); + PixelConverter.From<#=thisPixelType#>.To<#=otherPixelType#>(source, dest); } /// - public override void From<#=otherPixelType#>(Configuration configuration, ReadOnlySpan<<#=otherPixelType#>> sourcePixels, Span<<#=thisPixelType#>> destinationPixels) + public override void From<#=otherPixelType#>( + Configuration configuration, + ReadOnlySpan<<#=otherPixelType#>> sourcePixels, + Span<<#=thisPixelType#>> destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As<<#=otherPixelType#>,uint>(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As<<#=thisPixelType#>, uint>(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.From<#=otherPixelType#>.To<#=thisPixelType#>(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast<<#=otherPixelType#>, byte>(sourcePixels); + Span dest = MemoryMarshal.Cast<<#=thisPixelType#>, byte>(destinationPixels); + PixelConverter.From<#=otherPixelType#>.To<#=thisPixelType#>(source, dest); } <#+ } diff --git a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs index 814264084..bc24258c9 100644 --- a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs +++ b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs @@ -1,6 +1,7 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. +using System; using System.Buffers.Binary; using System.Runtime.CompilerServices; @@ -21,88 +22,64 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils public static class FromRgba32 { /// - /// Converts a packed to . + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. /// [MethodImpl(InliningOptions.ShortMethod)] - public static uint ToArgb32(uint packedRgba) - { - // packedRgba = [aa bb gg rr] - // ROTL(8, packedRgba) = [bb gg rr aa] - return (packedRgba << 8) | (packedRgba >> 24); - } + public static void ToArgb32(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.WXYZ); /// - /// Converts a packed to . + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. /// [MethodImpl(InliningOptions.ShortMethod)] - public static uint ToBgra32(uint packedRgba) - { - // packedRgba = [aa bb gg rr] - // tmp1 = [aa 00 gg 00] - // tmp2 = [00 bb 00 rr] - // tmp3=ROTL(16, tmp2) = [00 rr 00 bb] - // tmp1 + tmp3 = [aa rr gg bb] - uint tmp1 = packedRgba & 0xFF00FF00; - uint tmp2 = packedRgba & 0x00FF00FF; - uint tmp3 = (tmp2 << 16) | (tmp2 >> 16); - return tmp1 + tmp3; - } + public static void ToBgra32(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.ZYXW); } public static class FromArgb32 { /// - /// Converts a packed to . + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. /// [MethodImpl(InliningOptions.ShortMethod)] - public static uint ToRgba32(uint packedArgb) - { - // packedArgb = [bb gg rr aa] - // ROTR(8, packedArgb) = [aa bb gg rr] - return (packedArgb >> 8) | (packedArgb << 24); - } + public static void ToRgba32(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.YZWX); /// - /// Converts a packed to . + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. /// [MethodImpl(InliningOptions.ShortMethod)] - public static uint ToBgra32(uint packedArgb) - { - // packedArgb = [bb gg rr aa] - // REVERSE(packedArgb) = [aa rr gg bb] - return BinaryPrimitives.ReverseEndianness(packedArgb); - } + public static void ToBgra32(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.WZYX); } public static class FromBgra32 { /// - /// Converts a packed to . + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. /// [MethodImpl(InliningOptions.ShortMethod)] - public static uint ToArgb32(uint packedBgra) - { - // packedBgra = [aa rr gg bb] - // REVERSE(packedBgra) = [bb gg rr aa] - return BinaryPrimitives.ReverseEndianness(packedBgra); - } + public static void ToArgb32(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.WZYX); /// - /// Converts a packed to . + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. /// [MethodImpl(InliningOptions.ShortMethod)] - public static uint ToRgba32(uint packedBgra) - { - // packedRgba = [aa rr gg bb] - // tmp1 = [aa 00 gg 00] - // tmp2 = [00 rr 00 bb] - // tmp3=ROTL(16, tmp2) = [00 bb 00 rr] - // tmp1 + tmp3 = [aa bb gg rr] - uint tmp1 = packedBgra & 0xFF00FF00; - uint tmp2 = packedBgra & 0x00FF00FF; - uint tmp3 = (tmp2 << 16) | (tmp2 >> 16); - return tmp1 + tmp3; - } + public static void ToRgba32(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.ZYXW); } } -} \ No newline at end of file +} diff --git a/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_ConvertFromRgba32.cs b/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_ConvertFromRgba32.cs index 7d6c2efed..a933f890f 100644 --- a/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_ConvertFromRgba32.cs +++ b/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_ConvertFromRgba32.cs @@ -168,49 +168,27 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion [Benchmark] public void PixelConverter_Rgba32_ToArgb32() { - ref uint sBase = ref Unsafe.As(ref this.PermutedRunnerRgbaToArgb.Source[0]); - ref uint dBase = ref Unsafe.As(ref this.PermutedRunnerRgbaToArgb.Dest[0]); + Span source = MemoryMarshal.Cast(this.PermutedRunnerRgbaToArgb.Source); + Span dest = MemoryMarshal.Cast(this.PermutedRunnerRgbaToArgb.Dest); - for (int i = 0; i < this.Count; i++) - { - uint s = Unsafe.Add(ref sBase, i); - Unsafe.Add(ref dBase, i) = PixelConverter.FromRgba32.ToArgb32(s); - } - } - - [Benchmark] - public void PixelConverter_Rgba32_ToArgb32_CopyThenWorkOnSingleBuffer() - { - Span source = MemoryMarshal.Cast(this.PermutedRunnerRgbaToArgb.Source); - Span dest = MemoryMarshal.Cast(this.PermutedRunnerRgbaToArgb.Dest); - source.CopyTo(dest); - - ref uint dBase = ref MemoryMarshal.GetReference(dest); - - for (int i = 0; i < this.Count; i++) - { - uint s = Unsafe.Add(ref dBase, i); - Unsafe.Add(ref dBase, i) = PixelConverter.FromRgba32.ToArgb32(s); - } + PixelConverter.FromRgba32.ToArgb32(source, dest); } /* RESULTS: - Method | Count | Mean | Error | StdDev | Scaled | ScaledSD | - ---------------------------------------------------------- |------ |-----------:|-----------:|-----------:|-------:|---------:| - ByRef | 256 | 328.7 ns | 6.6141 ns | 6.1868 ns | 1.00 | 0.00 | - ByVal | 256 | 322.0 ns | 4.3541 ns | 4.0728 ns | 0.98 | 0.02 | - FromBytes | 256 | 321.5 ns | 3.3499 ns | 3.1335 ns | 0.98 | 0.02 | - InlineShuffle | 256 | 330.7 ns | 4.2525 ns | 3.9778 ns | 1.01 | 0.02 | - PixelConverter_Rgba32_ToArgb32 | 256 | 167.4 ns | 0.6357 ns | 0.5309 ns | 0.51 | 0.01 | - PixelConverter_Rgba32_ToArgb32_CopyThenWorkOnSingleBuffer | 256 | 196.6 ns | 0.8929 ns | 0.7915 ns | 0.60 | 0.01 | - | | | | | | | - ByRef | 2048 | 2,534.4 ns | 8.2947 ns | 6.9265 ns | 1.00 | 0.00 | - ByVal | 2048 | 2,638.5 ns | 52.6843 ns | 70.3320 ns | 1.04 | 0.03 | - FromBytes | 2048 | 2,517.2 ns | 40.8055 ns | 38.1695 ns | 0.99 | 0.01 | - InlineShuffle | 2048 | 2,546.5 ns | 21.2506 ns | 19.8778 ns | 1.00 | 0.01 | - PixelConverter_Rgba32_ToArgb32 | 2048 | 1,265.7 ns | 5.1397 ns | 4.5562 ns | 0.50 | 0.00 | - PixelConverter_Rgba32_ToArgb32_CopyThenWorkOnSingleBuffer | 2048 | 1,410.3 ns | 11.1939 ns | 9.9231 ns | 0.56 | 0.00 | - */ + | Method | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | + |------------------------------- |------ |------------:|----------:|----------:|------------:|------:|--------:| + | ByRef | 256 | 288.84 ns | 19.601 ns | 52.319 ns | 268.10 ns | 1.00 | 0.00 | + | ByVal | 256 | 267.97 ns | 1.831 ns | 1.713 ns | 267.85 ns | 0.77 | 0.18 | + | FromBytes | 256 | 266.81 ns | 2.427 ns | 2.270 ns | 266.47 ns | 0.76 | 0.18 | + | InlineShuffle | 256 | 291.41 ns | 5.820 ns | 5.444 ns | 290.17 ns | 0.83 | 0.19 | + | PixelConverter_Rgba32_ToArgb32 | 256 | 38.62 ns | 0.431 ns | 0.403 ns | 38.68 ns | 0.11 | 0.03 | + | | | | | | | | | + | ByRef | 2048 | 2,197.69 ns | 15.826 ns | 14.804 ns | 2,197.25 ns | 1.00 | 0.00 | + | ByVal | 2048 | 2,226.81 ns | 44.266 ns | 62.054 ns | 2,197.17 ns | 1.03 | 0.04 | + | FromBytes | 2048 | 2,181.35 ns | 18.033 ns | 16.868 ns | 2,185.97 ns | 0.99 | 0.01 | + | InlineShuffle | 2048 | 2,233.10 ns | 27.673 ns | 24.531 ns | 2,229.78 ns | 1.02 | 0.01 | + | PixelConverter_Rgba32_ToArgb32 | 2048 | 139.90 ns | 2.152 ns | 3.825 ns | 138.70 ns | 0.06 | 0.00 | + */ } } diff --git a/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.ReferenceImplementations.cs b/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.ReferenceImplementations.cs index 6fda9dbba..9d0d09a98 100644 --- a/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.ReferenceImplementations.cs +++ b/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.ReferenceImplementations.cs @@ -13,34 +13,49 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats { public static class ReferenceImplementations { - public static Rgba32 MakeRgba32(byte r, byte g, byte b, byte a) + public static byte[] MakeRgba32ByteArray(byte r, byte g, byte b, byte a) { - Rgba32 d = default; - d.R = r; - d.G = g; - d.B = b; - d.A = a; - return d; + var buffer = new byte[256]; + + for (int i = 0; i < buffer.Length; i += 4) + { + buffer[i] = r; + buffer[i + 1] = g; + buffer[i + 2] = b; + buffer[i + 3] = a; + } + + return buffer; } - public static Argb32 MakeArgb32(byte r, byte g, byte b, byte a) + public static byte[] MakeArgb32ByteArray(byte r, byte g, byte b, byte a) { - Argb32 d = default; - d.R = r; - d.G = g; - d.B = b; - d.A = a; - return d; + var buffer = new byte[256]; + + for (int i = 0; i < buffer.Length; i += 4) + { + buffer[i] = a; + buffer[i + 1] = r; + buffer[i + 2] = g; + buffer[i + 3] = b; + } + + return buffer; } - public static Bgra32 MakeBgra32(byte r, byte g, byte b, byte a) + public static byte[] MakeBgra32ByteArray(byte r, byte g, byte b, byte a) { - Bgra32 d = default; - d.R = r; - d.G = g; - d.B = b; - d.A = a; - return d; + var buffer = new byte[256]; + + for (int i = 0; i < buffer.Length; i += 4) + { + buffer[i] = b; + buffer[i + 1] = g; + buffer[i + 2] = r; + buffer[i + 3] = a; + } + + return buffer; } internal static void To( @@ -83,8 +98,7 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats if (typeof(TDestinationPixel) == typeof(L8)) { - ref L8 l8Ref = ref MemoryMarshal.GetReference( - MemoryMarshal.Cast(destinationPixels)); + ref L8 l8Ref = ref MemoryMarshal.GetReference(MemoryMarshal.Cast(destinationPixels)); for (int i = 0; i < count; i++) { ref TSourcePixel sp = ref Unsafe.Add(ref sourceRef, i); diff --git a/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.cs b/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.cs index 3de6804dc..6eed875f3 100644 --- a/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.cs +++ b/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.cs @@ -1,6 +1,7 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. +using System; using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.PixelFormats.Utils; @@ -33,30 +34,28 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats [MemberData(nameof(RgbaData))] public void ToArgb32(byte r, byte g, byte b, byte a) { - Rgba32 s = ReferenceImplementations.MakeRgba32(r, g, b, a); + byte[] source = ReferenceImplementations.MakeRgba32ByteArray(r, g, b, a); + var actual = new byte[source.Length]; - // Act: - uint actualPacked = PixelConverter.FromRgba32.ToArgb32(s.PackedValue); + PixelConverter.FromRgba32.ToArgb32(source, actual); - // Assert: - uint expectedPacked = ReferenceImplementations.MakeArgb32(r, g, b, a).PackedValue; + byte[] expected = ReferenceImplementations.MakeArgb32ByteArray(r, g, b, a); - Assert.Equal(expectedPacked, actualPacked); + Assert.Equal(expected, actual); } [Theory] [MemberData(nameof(RgbaData))] public void ToBgra32(byte r, byte g, byte b, byte a) { - Rgba32 s = ReferenceImplementations.MakeRgba32(r, g, b, a); + byte[] source = ReferenceImplementations.MakeRgba32ByteArray(r, g, b, a); + var actual = new byte[source.Length]; - // Act: - uint actualPacked = PixelConverter.FromRgba32.ToBgra32(s.PackedValue); + PixelConverter.FromRgba32.ToBgra32(source, actual); - // Assert: - uint expectedPacked = ReferenceImplementations.MakeBgra32(r, g, b, a).PackedValue; + byte[] expected = ReferenceImplementations.MakeBgra32ByteArray(r, g, b, a); - Assert.Equal(expectedPacked, actualPacked); + Assert.Equal(expected, actual); } } @@ -66,30 +65,28 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats [MemberData(nameof(RgbaData))] public void ToRgba32(byte r, byte g, byte b, byte a) { - Argb32 s = ReferenceImplementations.MakeArgb32(r, g, b, a); + byte[] source = ReferenceImplementations.MakeArgb32ByteArray(r, g, b, a); + var actual = new byte[source.Length]; - // Act: - uint actualPacked = PixelConverter.FromArgb32.ToRgba32(s.PackedValue); + PixelConverter.FromArgb32.ToRgba32(source, actual); - // Assert: - uint expectedPacked = ReferenceImplementations.MakeRgba32(r, g, b, a).PackedValue; + byte[] expected = ReferenceImplementations.MakeRgba32ByteArray(r, g, b, a); - Assert.Equal(expectedPacked, actualPacked); + Assert.Equal(expected, actual); } [Theory] [MemberData(nameof(RgbaData))] public void ToBgra32(byte r, byte g, byte b, byte a) { - Argb32 s = ReferenceImplementations.MakeArgb32(r, g, b, a); + byte[] source = ReferenceImplementations.MakeArgb32ByteArray(r, g, b, a); + var actual = new byte[source.Length]; - // Act: - uint actualPacked = PixelConverter.FromArgb32.ToBgra32(s.PackedValue); + PixelConverter.FromArgb32.ToBgra32(source, actual); - // Assert: - uint expectedPacked = ReferenceImplementations.MakeBgra32(r, g, b, a).PackedValue; + byte[] expected = ReferenceImplementations.MakeBgra32ByteArray(r, g, b, a); - Assert.Equal(expectedPacked, actualPacked); + Assert.Equal(expected, actual); } } @@ -99,30 +96,28 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats [MemberData(nameof(RgbaData))] public void ToArgb32(byte r, byte g, byte b, byte a) { - Bgra32 s = ReferenceImplementations.MakeBgra32(r, g, b, a); + byte[] source = ReferenceImplementations.MakeBgra32ByteArray(r, g, b, a); + var actual = new byte[source.Length]; - // Act: - uint actualPacked = PixelConverter.FromBgra32.ToArgb32(s.PackedValue); + PixelConverter.FromBgra32.ToArgb32(source, actual); - // Assert: - uint expectedPacked = ReferenceImplementations.MakeArgb32(r, g, b, a).PackedValue; + byte[] expected = ReferenceImplementations.MakeArgb32ByteArray(r, g, b, a); - Assert.Equal(expectedPacked, actualPacked); + Assert.Equal(expected, actual); } [Theory] [MemberData(nameof(RgbaData))] public void ToRgba32(byte r, byte g, byte b, byte a) { - Bgra32 s = ReferenceImplementations.MakeBgra32(r, g, b, a); + byte[] source = ReferenceImplementations.MakeBgra32ByteArray(r, g, b, a); + var actual = new byte[source.Length]; - // Act: - uint actualPacked = PixelConverter.FromBgra32.ToRgba32(s.PackedValue); + PixelConverter.FromBgra32.ToRgba32(source, actual); - // Assert: - uint expectedPacked = ReferenceImplementations.MakeRgba32(r, g, b, a).PackedValue; + byte[] expected = ReferenceImplementations.MakeRgba32ByteArray(r, g, b, a); - Assert.Equal(expectedPacked, actualPacked); + Assert.Equal(expected, actual); } } } From aa20c09c4896738ba1df05ace362c2d08f64854d Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 28 Oct 2020 22:07:54 +0000 Subject: [PATCH 083/131] Update based on feedback --- .../Common/Helpers/IComponentShuffle.cs | 165 ++++++++++++++++++ .../Common/Helpers/SimdUtils.Shuffle.cs | 142 +-------------- .../PixelFormats/Utils/PixelConverter.cs | 13 +- .../Color/Bulk/ShuffleByte4Channel.cs | 2 +- .../Color/Bulk/ShuffleFloat4Channel.cs | 2 +- .../Config.HwIntrinsics.cs | 11 +- .../Common/SimdUtilsTests.Shuffle.cs | 93 ++++++---- 7 files changed, 245 insertions(+), 183 deletions(-) create mode 100644 src/ImageSharp/Common/Helpers/IComponentShuffle.cs diff --git a/src/ImageSharp/Common/Helpers/IComponentShuffle.cs b/src/ImageSharp/Common/Helpers/IComponentShuffle.cs new file mode 100644 index 000000000..e354a57b0 --- /dev/null +++ b/src/ImageSharp/Common/Helpers/IComponentShuffle.cs @@ -0,0 +1,165 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Buffers.Binary; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace SixLabors.ImageSharp +{ + /// + /// Defines the contract for methods that allow the shuffling of pixel components. + /// Used for shuffling on platforms that do not support Hardware Intrinsics. + /// + internal interface IComponentShuffle + { + /// + /// Gets the shuffle control. + /// + byte Control { get; } + + /// + /// Shuffle 8-bit integers within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + void RunFallbackShuffle(ReadOnlySpan source, Span dest); + } + + internal readonly struct DefaultShuffle4 : IComponentShuffle + { + public DefaultShuffle4(byte p3, byte p2, byte p1, byte p0) + : this(SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0)) + { + } + + public DefaultShuffle4(byte control) => this.Control = control; + + public byte Control { get; } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); + SimdUtils.Shuffle.InverseMmShuffle( + this.Control, + out int p3, + out int p2, + out int p1, + out int p0); + + for (int i = 0; i < source.Length; i += 4) + { + Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i); + Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i); + Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i); + Unsafe.Add(ref dBase, i + 3) = Unsafe.Add(ref sBase, p3 + i); + } + } + } + + internal readonly struct WXYZShuffle4 : IComponentShuffle + { + public byte Control => SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3); + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ReadOnlySpan s = MemoryMarshal.Cast(source); + Span d = MemoryMarshal.Cast(dest); + ref uint sBase = ref MemoryMarshal.GetReference(s); + ref uint dBase = ref MemoryMarshal.GetReference(d); + + // The JIT can detect and optimize rotation idioms ROTL (Rotate Left) + // and ROTR (Rotate Right) emitting efficient CPU instructions: + // https://github.com/dotnet/coreclr/pull/1830 + for (int i = 0; i < s.Length; i++) + { + uint packed = Unsafe.Add(ref sBase, i); + + // packed = [W Z Y X] + // ROTL(8, packed) = [Z Y X W] + Unsafe.Add(ref dBase, i) = (packed << 8) | (packed >> 24); + } + } + } + + internal readonly struct WZYXShuffle4 : IComponentShuffle + { + public byte Control => SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3); + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ReadOnlySpan s = MemoryMarshal.Cast(source); + Span d = MemoryMarshal.Cast(dest); + ref uint sBase = ref MemoryMarshal.GetReference(s); + ref uint dBase = ref MemoryMarshal.GetReference(d); + + for (int i = 0; i < s.Length; i++) + { + uint packed = Unsafe.Add(ref sBase, i); + + // packed = [W Z Y X] + // REVERSE(packedArgb) = [X Y Z W] + Unsafe.Add(ref dBase, i) = BinaryPrimitives.ReverseEndianness(packed); + } + } + } + + internal readonly struct YZWXShuffle4 : IComponentShuffle + { + public byte Control => SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1); + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ReadOnlySpan s = MemoryMarshal.Cast(source); + Span d = MemoryMarshal.Cast(dest); + ref uint sBase = ref MemoryMarshal.GetReference(s); + ref uint dBase = ref MemoryMarshal.GetReference(d); + + for (int i = 0; i < s.Length; i++) + { + uint packed = Unsafe.Add(ref sBase, i); + + // packed = [W Z Y X] + // ROTR(8, packedArgb) = [Y Z W X] + Unsafe.Add(ref dBase, i) = (packed >> 8) | (packed << 24); + } + } + } + + internal readonly struct ZYXWShuffle4 : IComponentShuffle + { + public byte Control => SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2); + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ReadOnlySpan s = MemoryMarshal.Cast(source); + Span d = MemoryMarshal.Cast(dest); + ref uint sBase = ref MemoryMarshal.GetReference(s); + ref uint dBase = ref MemoryMarshal.GetReference(d); + + for (int i = 0; i < s.Length; i++) + { + uint packed = Unsafe.Add(ref sBase, i); + + // packed = [W Z Y X] + // tmp1 = [W 0 Y 0] + // tmp2 = [0 Z 0 X] + // tmp3=ROTL(16, tmp2) = [0 X 0 Z] + // tmp1 + tmp3 = [W X Y Z] + uint tmp1 = packed & 0xFF00FF00; + uint tmp2 = packed & 0x00FF00FF; + uint tmp3 = (tmp2 << 16) | (tmp2 >> 16); + + Unsafe.Add(ref dBase, i) = tmp1 + tmp3; + } + } + } +} diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index 59b625419..febb31c2f 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -2,7 +2,6 @@ // Licensed under the Apache License, Version 2.0. using System; -using System.Buffers.Binary; using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -40,34 +39,32 @@ namespace SixLabors.ImageSharp } /// - /// Shuffle 8-bit integers in a within 128-bit lanes in + /// Shuffle 8-bit integers within 128-bit lanes in /// using the control and store the results in . /// /// The source span of bytes. /// The destination span of bytes. - /// The byte control. + /// The type of shuffle to perform. [MethodImpl(InliningOptions.ShortMethod)] - public static void Shuffle4Channel( + public static void Shuffle4Channel( ReadOnlySpan source, Span dest, - byte control) + TShuffle shuffle) + where TShuffle : struct, IComponentShuffle { VerifyShuffleSpanInput(source, dest); - // TODO: There doesn't seem to be any APIs for - // System.Numerics that allow shuffling. #if SUPPORTS_RUNTIME_INTRINSICS - HwIntrinsics.Shuffle4ChannelReduce(ref source, ref dest, control); + HwIntrinsics.Shuffle4ChannelReduce(ref source, ref dest, shuffle.Control); #endif // Deal with the remainder: if (source.Length > 0) { - ShuffleRemainder4Channel(source, dest, control); + shuffle.RunFallbackShuffle(source, dest); } } - [MethodImpl(InliningOptions.ColdPath)] public static void ShuffleRemainder4Channel( ReadOnlySpan source, Span dest, @@ -86,125 +83,6 @@ namespace SixLabors.ImageSharp } } - [MethodImpl(InliningOptions.ColdPath)] - public static void ShuffleRemainder4Channel( - ReadOnlySpan source, - Span dest, - byte control) - { -#if NETCOREAPP - // The JIT can detect and optimize rotation idioms ROTL (Rotate Left) - // and ROTR (Rotate Right) emitting efficient CPU instructions: - // https://github.com/dotnet/coreclr/pull/1830 - switch (control) - { - case Shuffle.WXYZ: - WXYZ(source, dest); - return; - case Shuffle.WZYX: - WZYX(source, dest); - return; - case Shuffle.YZWX: - YZWX(source, dest); - return; - case Shuffle.ZYXW: - ZYXW(source, dest); - return; - } -#endif - - ref byte sBase = ref MemoryMarshal.GetReference(source); - ref byte dBase = ref MemoryMarshal.GetReference(dest); - Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0); - - for (int i = 0; i < source.Length; i += 4) - { - Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i); - Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i); - Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i); - Unsafe.Add(ref dBase, i + 3) = Unsafe.Add(ref sBase, p3 + i); - } - } - - [MethodImpl(InliningOptions.ShortMethod)] - private static void WXYZ(ReadOnlySpan source, Span dest) - { - ReadOnlySpan s = MemoryMarshal.Cast(source); - Span d = MemoryMarshal.Cast(dest); - ref uint sBase = ref MemoryMarshal.GetReference(s); - ref uint dBase = ref MemoryMarshal.GetReference(d); - - for (int i = 0; i < s.Length; i++) - { - uint packed = Unsafe.Add(ref sBase, i); - - // packed = [W Z Y X] - // ROTL(8, packed) = [Z Y X W] - Unsafe.Add(ref dBase, i) = (packed << 8) | (packed >> 24); - } - } - - [MethodImpl(InliningOptions.ShortMethod)] - private static void ZYXW(ReadOnlySpan source, Span dest) - { - ReadOnlySpan s = MemoryMarshal.Cast(source); - Span d = MemoryMarshal.Cast(dest); - ref uint sBase = ref MemoryMarshal.GetReference(s); - ref uint dBase = ref MemoryMarshal.GetReference(d); - - for (int i = 0; i < s.Length; i++) - { - uint packed = Unsafe.Add(ref sBase, i); - - // packed = [W Z Y X] - // tmp1 = [W 0 Y 0] - // tmp2 = [0 Z 0 X] - // tmp3=ROTL(16, tmp2) = [0 X 0 Z] - // tmp1 + tmp3 = [W X Y Z] - uint tmp1 = packed & 0xFF00FF00; - uint tmp2 = packed & 0x00FF00FF; - uint tmp3 = (tmp2 << 16) | (tmp2 >> 16); - - Unsafe.Add(ref dBase, i) = tmp1 + tmp3; - } - } - - [MethodImpl(InliningOptions.ShortMethod)] - private static void WZYX(ReadOnlySpan source, Span dest) - { - ReadOnlySpan s = MemoryMarshal.Cast(source); - Span d = MemoryMarshal.Cast(dest); - ref uint sBase = ref MemoryMarshal.GetReference(s); - ref uint dBase = ref MemoryMarshal.GetReference(d); - - for (int i = 0; i < s.Length; i++) - { - uint packed = Unsafe.Add(ref sBase, i); - - // packed = [W Z Y X] - // REVERSE(packedArgb) = [X Y Z W] - Unsafe.Add(ref dBase, i) = BinaryPrimitives.ReverseEndianness(packed); - } - } - - [MethodImpl(InliningOptions.ShortMethod)] - private static void YZWX(ReadOnlySpan source, Span dest) - { - ReadOnlySpan s = MemoryMarshal.Cast(source); - Span d = MemoryMarshal.Cast(dest); - ref uint sBase = ref MemoryMarshal.GetReference(s); - ref uint dBase = ref MemoryMarshal.GetReference(d); - - for (int i = 0; i < s.Length; i++) - { - uint packed = Unsafe.Add(ref sBase, i); - - // packed = [W Z Y X] - // ROTR(8, packedArgb) = [Y Z W X] - Unsafe.Add(ref dBase, i) = (packed >> 8) | (packed << 24); - } - } - [Conditional("DEBUG")] private static void VerifyShuffleSpanInput(ReadOnlySpan source, Span dest) where T : struct @@ -222,12 +100,6 @@ namespace SixLabors.ImageSharp public static class Shuffle { - public const byte WXYZ = (2 << 6) | (1 << 4) | (0 << 2) | 3; - public const byte WZYX = (0 << 6) | (1 << 4) | (2 << 2) | 3; - public const byte XYZW = (3 << 6) | (2 << 4) | (1 << 2) | 0; - public const byte YZWX = (0 << 6) | (3 << 4) | (2 << 2) | 1; - public const byte ZYXW = (3 << 6) | (0 << 4) | (1 << 2) | 2; - [MethodImpl(InliningOptions.ShortMethod)] public static byte MmShuffle(byte p3, byte p2, byte p1, byte p0) => (byte)((p3 << 6) | (p2 << 4) | (p1 << 2) | p0); diff --git a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs index bc24258c9..ab9011a5c 100644 --- a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs +++ b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs @@ -2,7 +2,6 @@ // Licensed under the Apache License, Version 2.0. using System; -using System.Buffers.Binary; using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp.PixelFormats.Utils @@ -28,7 +27,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToArgb32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.WXYZ); + => SimdUtils.Shuffle4Channel(source, dest, default); /// /// Converts a representing a collection of @@ -37,7 +36,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgra32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.ZYXW); + => SimdUtils.Shuffle4Channel(source, dest, default); } public static class FromArgb32 @@ -49,7 +48,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgba32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.YZWX); + => SimdUtils.Shuffle4Channel(source, dest, default); /// /// Converts a representing a collection of @@ -58,7 +57,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgra32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.WZYX); + => SimdUtils.Shuffle4Channel(source, dest, default); } public static class FromBgra32 @@ -70,7 +69,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToArgb32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.WZYX); + => SimdUtils.Shuffle4Channel(source, dest, default); /// /// Converts a representing a collection of @@ -79,7 +78,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgba32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.ZYXW); + => SimdUtils.Shuffle4Channel(source, dest, default); } } } diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs index c45b103e3..bd4a8d534 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs @@ -26,7 +26,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk [Benchmark] public void Shuffle4Channel() { - SimdUtils.Shuffle4Channel(this.source, this.destination, SimdUtils.Shuffle.WXYZ); + SimdUtils.Shuffle4Channel(this.source, this.destination, default); } } diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs index 36b9591d9..04c6dbf21 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs @@ -26,7 +26,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk [Benchmark] public void Shuffle4Channel() { - SimdUtils.Shuffle4Channel(this.source, this.destination, SimdUtils.Shuffle.WXYZ); + SimdUtils.Shuffle4Channel(this.source, this.destination, default(WXYZShuffle4).Control); } } diff --git a/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs b/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs index e8a06bf24..eacd36799 100644 --- a/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs +++ b/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs @@ -58,6 +58,12 @@ namespace SixLabors.ImageSharp.Benchmarks { public HwIntrinsics_SSE_AVX() { + this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31) + .WithEnvironmentVariables( + new EnvironmentVariable(EnableHWIntrinsic, Off), + new EnvironmentVariable(FeatureSIMD, Off)) + .WithId("No HwIntrinsics")); + #if SUPPORTS_RUNTIME_INTRINSICS if (Avx.IsSupported) { @@ -72,11 +78,6 @@ namespace SixLabors.ImageSharp.Benchmarks .WithId("SSE")); } #endif - this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31) - .WithEnvironmentVariables( - new EnvironmentVariable(EnableHWIntrinsic, Off), - new EnvironmentVariable(FeatureSIMD, Off)) - .WithId("No HwIntrinsics")); } } } diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs index 94298f94c..06f61e617 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -9,66 +9,91 @@ namespace SixLabors.ImageSharp.Tests.Common { public partial class SimdUtilsTests { - public static readonly TheoryData ShuffleControls = - new TheoryData - { - SimdUtils.Shuffle.WXYZ, - SimdUtils.Shuffle.WZYX, - SimdUtils.Shuffle.XYZW, - SimdUtils.Shuffle.YZWX, - SimdUtils.Shuffle.ZYXW, - SimdUtils.Shuffle.MmShuffle(2, 1, 3, 0), - SimdUtils.Shuffle.MmShuffle(1, 1, 1, 1), - SimdUtils.Shuffle.MmShuffle(3, 3, 3, 3) - }; - [Theory] - [MemberData(nameof(ShuffleControls))] - public void BulkShuffleFloat4Channel(byte control) + [MemberData(nameof(ArraySizesDivisibleBy4))] + public void BulkShuffleFloat4Channel(int count) { static void RunTest(string serialized) { - byte ctrl = FeatureTestRunner.Deserialize(serialized); - foreach (var item in ArraySizesDivisibleBy4) - { - foreach (var count in item) - { - TestShuffleFloat4Channel( - (int)count, - (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, ctrl), - ctrl); - } - } + // No need to test multiple shuffle controls as the + // pipeline is always the same. + int size = FeatureTestRunner.Deserialize(serialized); + byte control = default(WZYXShuffle4).Control; + + TestShuffleFloat4Channel( + size, + (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, control), + control); } FeatureTestRunner.RunWithHwIntrinsicsFeature( RunTest, - control, + count, HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE); } [Theory] - [MemberData(nameof(ShuffleControls))] - public void BulkShuffleByte4Channel(byte control) + [MemberData(nameof(ArraySizesDivisibleBy4))] + public void BulkShuffleByte4Channel(int count) { static void RunTest(string serialized) { - byte ctrl = FeatureTestRunner.Deserialize(serialized); + int size = FeatureTestRunner.Deserialize(serialized); foreach (var item in ArraySizesDivisibleBy4) { + // These cannot be expressed as a theory as you cannot + // use RemoteExecutor within generic methods nor pass + // IComponentShuffle to the generic utils method. foreach (var count in item) { + WXYZShuffle4 wxyz = default; + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, wxyz), + wxyz.Control); + + WZYXShuffle4 wzyx = default; TestShuffleByte4Channel( - (int)count, - (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, ctrl), - ctrl); + size, + (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, wzyx), + wzyx.Control); + + YZWXShuffle4 yzwx = default; + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, yzwx), + yzwx.Control); + + ZYXWShuffle4 zyxw = default; + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, zyxw), + zyxw.Control); + + var xwyz = new DefaultShuffle4(2, 1, 3, 0); + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, xwyz), + xwyz.Control); + + var yyyy = new DefaultShuffle4(1, 1, 1, 1); + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, yyyy), + yyyy.Control); + + var wwww = new DefaultShuffle4(3, 3, 3, 3); + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, wwww), + wwww.Control); } } } FeatureTestRunner.RunWithHwIntrinsicsFeature( RunTest, - control, + count, HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE); } From cdc1c0fce57544bae85a4f9766fcb3403976ed1a Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 29 Oct 2020 01:48:46 +0000 Subject: [PATCH 084/131] Fix benchmarks, cleanup. --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 24 ++------ .../Common/Helpers/SimdUtils.Shuffle.cs | 2 - .../Color/Bulk/ShuffleByte4Channel.cs | 57 +++++++++--------- .../Color/Bulk/ShuffleFloat4Channel.cs | 60 +++++++++---------- .../Config.HwIntrinsics.cs | 6 +- 5 files changed, 67 insertions(+), 82 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 367df03ec..782328edd 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -33,15 +33,9 @@ namespace SixLabors.ImageSharp { if (Avx.IsSupported || Sse.IsSupported) { - int remainder; - if (Avx.IsSupported) - { - remainder = ImageMaths.ModuloP2(source.Length, Vector256.Count); - } - else - { - remainder = ImageMaths.ModuloP2(source.Length, Vector128.Count); - } + int remainder = Avx.IsSupported + ? ImageMaths.ModuloP2(source.Length, Vector256.Count) + : ImageMaths.ModuloP2(source.Length, Vector128.Count); int adjustedCount = source.Length - remainder; @@ -73,15 +67,9 @@ namespace SixLabors.ImageSharp { if (Avx2.IsSupported || Ssse3.IsSupported) { - int remainder; - if (Avx2.IsSupported) - { - remainder = ImageMaths.ModuloP2(source.Length, Vector256.Count); - } - else - { - remainder = ImageMaths.ModuloP2(source.Length, Vector128.Count); - } + int remainder = Avx2.IsSupported + ? ImageMaths.ModuloP2(source.Length, Vector256.Count) + : ImageMaths.ModuloP2(source.Length, Vector128.Count); int adjustedCount = source.Length - remainder; diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index febb31c2f..a4a40fb4f 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -25,8 +25,6 @@ namespace SixLabors.ImageSharp { VerifyShuffleSpanInput(source, dest); - // TODO: There doesn't seem to be any APIs for - // System.Numerics that allow shuffling. #if SUPPORTS_RUNTIME_INTRINSICS HwIntrinsics.Shuffle4ChannelReduce(ref source, ref dest, control); #endif diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs index bd4a8d534..749859eac 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs @@ -30,39 +30,38 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk } } - // 2020-10-26 + // 2020-10-29 // ########## // // BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1) - // Intel Core i7-8650U CPU 1.90GHz(Kaby Lake R), 1 CPU, 8 logical and 4 physical cores - // .NET Core SDK = 5.0.100-rc.2.20479.15 - // - // [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT - // AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT - // No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT - // SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores + // .NET Core SDK=3.1.403 + // [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT // // Runtime=.NET Core 3.1 // - // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | - // |---------------- |---------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:| - // | Shuffle4Channel | AVX | Empty | 128 | 20.51 ns | 0.270 ns | 0.211 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 63.00 ns | 0.991 ns | 0.927 ns | 3.08 | 0.06 | - | - | - | - | - // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 128 | 17.25 ns | 0.066 ns | 0.058 ns | 0.84 | 0.01 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Shuffle4Channel | AVX | Empty | 256 | 24.57 ns | 0.248 ns | 0.219 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 124.55 ns | 2.501 ns | 2.456 ns | 5.06 | 0.10 | - | - | - | - | - // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 256 | 21.80 ns | 0.094 ns | 0.088 ns | 0.89 | 0.01 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Shuffle4Channel | AVX | Empty | 512 | 28.51 ns | 0.130 ns | 0.115 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 256.52 ns | 1.424 ns | 1.332 ns | 9.00 | 0.07 | - | - | - | - | - // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 512 | 29.72 ns | 0.217 ns | 0.203 ns | 1.04 | 0.01 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Shuffle4Channel | AVX | Empty | 1024 | 36.40 ns | 0.357 ns | 0.334 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 492.71 ns | 1.498 ns | 1.251 ns | 13.52 | 0.12 | - | - | - | - | - // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 1024 | 44.71 ns | 0.264 ns | 0.234 ns | 1.23 | 0.02 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Shuffle4Channel | AVX | Empty | 2048 | 59.38 ns | 0.180 ns | 0.159 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 975.05 ns | 2.043 ns | 1.811 ns | 16.42 | 0.05 | - | - | - | - | - // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 2048 | 81.83 ns | 0.212 ns | 0.198 ns | 1.38 | 0.01 | - | - | - | - | + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |---------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:| + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 17.39 ns | 0.187 ns | 0.175 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 128 | 21.72 ns | 0.299 ns | 0.279 ns | 1.25 | 0.02 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 128 | 18.10 ns | 0.346 ns | 0.289 ns | 1.04 | 0.02 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 35.51 ns | 0.711 ns | 0.790 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 256 | 23.90 ns | 0.508 ns | 0.820 ns | 0.69 | 0.02 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 256 | 20.40 ns | 0.133 ns | 0.111 ns | 0.57 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 73.39 ns | 0.310 ns | 0.259 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 512 | 26.10 ns | 0.418 ns | 0.391 ns | 0.36 | 0.01 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 512 | 27.59 ns | 0.556 ns | 0.571 ns | 0.38 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 150.64 ns | 2.903 ns | 2.716 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 1024 | 38.67 ns | 0.801 ns | 1.889 ns | 0.24 | 0.02 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 47.13 ns | 0.948 ns | 1.054 ns | 0.31 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 315.29 ns | 5.206 ns | 6.583 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 2048 | 57.37 ns | 1.152 ns | 1.078 ns | 0.18 | 0.01 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 65.75 ns | 1.198 ns | 1.600 ns | 0.21 | 0.01 | - | - | - | - | } diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs index 04c6dbf21..6f5b5001b 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs @@ -10,6 +10,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk [Config(typeof(Config.HwIntrinsics_SSE_AVX))] public class ShuffleFloat4Channel { + private static readonly byte control = default(WXYZShuffle4).Control; private float[] source; private float[] destination; @@ -26,43 +27,42 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk [Benchmark] public void Shuffle4Channel() { - SimdUtils.Shuffle4Channel(this.source, this.destination, default(WXYZShuffle4).Control); + SimdUtils.Shuffle4Channel(this.source, this.destination, control); } } - // 2020-10-26 + // 2020-10-29 // ########## // // BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1) - // Intel Core i7-8650U CPU 1.90GHz(Kaby Lake R), 1 CPU, 8 logical and 4 physical cores - // .NET Core SDK = 5.0.100-rc.2.20479.15 - // - // [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT - // AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT - // No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT - // SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores + // .NET Core SDK=3.1.403 + // [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT // // Runtime=.NET Core 3.1 // - // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | - // |---------------- |---------------- |-------------------------------------------------- |------ |------------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:| - // | Shuffle4Channel | AVX | Empty | 128 | 14.49 ns | 0.244 ns | 0.217 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 87.74 ns | 0.524 ns | 0.490 ns | 6.06 | 0.09 | - | - | - | - | - // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 128 | 23.65 ns | 0.101 ns | 0.094 ns | 1.63 | 0.03 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Shuffle4Channel | AVX | Empty | 256 | 25.87 ns | 0.492 ns | 0.673 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 159.52 ns | 0.901 ns | 0.843 ns | 6.12 | 0.12 | - | - | - | - | - // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 256 | 45.47 ns | 0.404 ns | 0.378 ns | 1.75 | 0.03 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Shuffle4Channel | AVX | Empty | 512 | 49.51 ns | 0.088 ns | 0.083 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 297.96 ns | 0.926 ns | 0.821 ns | 6.02 | 0.02 | - | - | - | - | - // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 512 | 90.77 ns | 0.191 ns | 0.169 ns | 1.83 | 0.00 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Shuffle4Channel | AVX | Empty | 1024 | 113.09 ns | 1.913 ns | 3.090 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 604.58 ns | 1.464 ns | 1.298 ns | 5.29 | 0.18 | - | - | - | - | - // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 1024 | 179.44 ns | 0.208 ns | 0.184 ns | 1.57 | 0.05 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Shuffle4Channel | AVX | Empty | 2048 | 217.95 ns | 1.314 ns | 1.165 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 1,152.04 ns | 3.941 ns | 3.494 ns | 5.29 | 0.03 | - | - | - | - | - // | Shuffle4Channel | SSE | COMPlus_EnableAVX=0 | 2048 | 349.52 ns | 0.587 ns | 0.520 ns | 1.60 | 0.01 | - | - | - | - | + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |---------------- |------------------- |-------------------------------------------------- |------ |-----------:|----------:|----------:|------:|------:|------:|------:|----------:| + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 63.647 ns | 0.5475 ns | 0.4853 ns | 1.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 128 | 9.818 ns | 0.1457 ns | 0.1292 ns | 0.15 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 128 | 15.267 ns | 0.1005 ns | 0.0940 ns | 0.24 | - | - | - | - | + // | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 125.586 ns | 1.9312 ns | 1.8064 ns | 1.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 256 | 15.878 ns | 0.1983 ns | 0.1758 ns | 0.13 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 256 | 29.170 ns | 0.2925 ns | 0.2442 ns | 0.23 | - | - | - | - | + // | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 263.859 ns | 2.6660 ns | 2.3634 ns | 1.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 512 | 29.452 ns | 0.3334 ns | 0.3118 ns | 0.11 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 512 | 52.912 ns | 0.1932 ns | 0.1713 ns | 0.20 | - | - | - | - | + // | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 495.717 ns | 1.9850 ns | 1.8567 ns | 1.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 1024 | 53.757 ns | 0.3212 ns | 0.2847 ns | 0.11 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 107.815 ns | 1.6201 ns | 1.3528 ns | 0.22 | - | - | - | - | + // | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 980.134 ns | 3.7407 ns | 3.1237 ns | 1.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 2048 | 105.120 ns | 0.6140 ns | 0.5443 ns | 0.11 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 216.473 ns | 2.3268 ns | 2.0627 ns | 0.22 | - | - | - | - | } diff --git a/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs b/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs index eacd36799..5ceb4c8a0 100644 --- a/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs +++ b/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs @@ -62,20 +62,20 @@ namespace SixLabors.ImageSharp.Benchmarks .WithEnvironmentVariables( new EnvironmentVariable(EnableHWIntrinsic, Off), new EnvironmentVariable(FeatureSIMD, Off)) - .WithId("No HwIntrinsics")); + .WithId("1. No HwIntrinsics").AsBaseline()); #if SUPPORTS_RUNTIME_INTRINSICS if (Avx.IsSupported) { this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31) - .WithId("AVX").AsBaseline()); + .WithId("2. AVX")); } if (Sse.IsSupported) { this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31) .WithEnvironmentVariables(new EnvironmentVariable(EnableAVX, Off)) - .WithId("SSE")); + .WithId("3. SSE")); } #endif } From 33df55aef25562c576568992a1ef439f81acfb4f Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 29 Oct 2020 15:21:52 +0000 Subject: [PATCH 085/131] Don't use Linq and test for common path first. --- src/ImageSharp/Image{TPixel}.cs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ImageSharp/Image{TPixel}.cs b/src/ImageSharp/Image{TPixel}.cs index 255193c8e..83ecc3753 100644 --- a/src/ImageSharp/Image{TPixel}.cs +++ b/src/ImageSharp/Image{TPixel}.cs @@ -201,14 +201,14 @@ namespace SixLabors.ImageSharp public bool TryGetSinglePixelSpan(out Span span) { IMemoryGroup mg = this.GetPixelMemoryGroup(); - if (mg.Count > 1) + if (mg.Count == 1) { - span = default; - return false; + span = mg[0].Span; + return true; } - span = mg.Single().Span; - return true; + span = default; + return false; } /// From c4f7f67fd5d18ccb8699392a6cdbf192990fe294 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 30 Oct 2020 20:38:46 +0000 Subject: [PATCH 086/131] Initial 3padshuffle4 --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 84 +++++++++++++++++-- .../Common/Helpers/SimdUtils.Shuffle.cs | 63 ++++++++++++-- .../PixelFormats/Utils/PixelConverter.cs | 12 +-- .../Color/Bulk/Pad3Shuffle4Channel.cs | 67 +++++++++++++++ .../Color/Bulk/ShuffleByte4Channel.cs | 2 +- .../Color/Bulk/ShuffleFloat4Channel.cs | 2 +- .../Common/SimdUtilsTests.Shuffle.cs | 71 ++++++++++++++-- .../ImageSharp.Tests/Common/SimdUtilsTests.cs | 2 + 8 files changed, 274 insertions(+), 29 deletions(-) create mode 100644 tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 782328edd..8a0b5460c 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -26,7 +26,7 @@ namespace SixLabors.ImageSharp /// The destination span of floats. /// The byte control. [MethodImpl(InliningOptions.ShortMethod)] - public static void Shuffle4ChannelReduce( + public static void Shuffle4Reduce( ref ReadOnlySpan source, ref Span dest, byte control) @@ -41,7 +41,7 @@ namespace SixLabors.ImageSharp if (adjustedCount > 0) { - Shuffle4Channel( + Shuffle4( source.Slice(0, adjustedCount), dest.Slice(0, adjustedCount), control); @@ -53,14 +53,14 @@ namespace SixLabors.ImageSharp } /// - /// Shuffle 8-bit integers in a within 128-bit lanes in + /// Shuffle 8-bit integers within 128-bit lanes in /// using the control and store the results in . /// /// The source span of bytes. /// The destination span of bytes. /// The byte control. [MethodImpl(InliningOptions.ShortMethod)] - public static void Shuffle4ChannelReduce( + public static void Shuffle4Reduce( ref ReadOnlySpan source, ref Span dest, byte control) @@ -75,7 +75,7 @@ namespace SixLabors.ImageSharp if (adjustedCount > 0) { - Shuffle4Channel( + Shuffle4( source.Slice(0, adjustedCount), dest.Slice(0, adjustedCount), control); @@ -86,8 +86,41 @@ namespace SixLabors.ImageSharp } } + /// + /// Pads then shuffles 8-bit integers within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The byte control. + [MethodImpl(InliningOptions.ShortMethod)] + public static unsafe void Pad3Shuffle4Reduce( + ref ReadOnlySpan source, + ref Span dest, + byte control) + { + if (Ssse3.IsSupported) + { + int remainder = ImageMaths.ModuloP2(source.Length, Vector128.Count); + + int adjustedCount = source.Length - remainder; + int sourceSlice = (int)(adjustedCount * (3 / 4F)); + + if (adjustedCount > 0) + { + Pad3Shuffle4( + source.Slice(0, adjustedCount), + dest.Slice(0, adjustedCount), + control); + + source = source.Slice(sourceSlice); + dest = dest.Slice(adjustedCount); + } + } + } + [MethodImpl(InliningOptions.ShortMethod)] - private static void Shuffle4Channel( + private static void Shuffle4( ReadOnlySpan source, Span dest, byte control) @@ -165,7 +198,7 @@ namespace SixLabors.ImageSharp } [MethodImpl(InliningOptions.ShortMethod)] - private static void Shuffle4Channel( + private static void Shuffle4( ReadOnlySpan source, Span dest, byte control) @@ -246,6 +279,43 @@ namespace SixLabors.ImageSharp } } + [MethodImpl(InliningOptions.ShortMethod)] + private static unsafe void Pad3Shuffle4( + ReadOnlySpan source, + Span dest, + byte control) + { + if (Ssse3.IsSupported) + { + Vector128 wMask = Vector128.Create(0xff000000u).AsByte(); + Vector128 padMask = Vector128.Create(0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11, -1).AsByte(); + + Span bytes = stackalloc byte[Vector128.Count]; + Shuffle.MmShuffleSpan(ref bytes, control); + Vector128 vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + + fixed (byte* sBase = &source.GetPinnableReference()) + fixed (byte* dBase = &dest.GetPinnableReference()) + { + byte* s = sBase; + byte* d = dBase; + + // TODO: Consider unrolling and shuffling 4 at a time using Ssse3.AlignRight + // See https://stackoverflow.com/questions/2973708/fast-24-bit-array-32-bit-array-conversion + for (int i = 0; i < source.Length; i += 16) + { + Vector128 vs0 = Sse2.LoadVector128(s); + Vector128 val = Sse2.Or(wMask, Ssse3.Shuffle(vs0, padMask)); + val = Ssse3.Shuffle(val, vcm); + Sse2.Store(d, val); + + s += 12; + d += 16; + } + } + } + } + /// /// Performs a multiplication and an addition of the . /// diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index a4a40fb4f..81d77d655 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -18,7 +18,7 @@ namespace SixLabors.ImageSharp /// The destination span of floats. /// The byte control. [MethodImpl(InliningOptions.ShortMethod)] - public static void Shuffle4Channel( + public static void Shuffle4( ReadOnlySpan source, Span dest, byte control) @@ -26,13 +26,13 @@ namespace SixLabors.ImageSharp VerifyShuffleSpanInput(source, dest); #if SUPPORTS_RUNTIME_INTRINSICS - HwIntrinsics.Shuffle4ChannelReduce(ref source, ref dest, control); + HwIntrinsics.Shuffle4Reduce(ref source, ref dest, control); #endif // Deal with the remainder: if (source.Length > 0) { - ShuffleRemainder4Channel(source, dest, control); + Shuffle4Remainder(source, dest, control); } } @@ -44,7 +44,7 @@ namespace SixLabors.ImageSharp /// The destination span of bytes. /// The type of shuffle to perform. [MethodImpl(InliningOptions.ShortMethod)] - public static void Shuffle4Channel( + public static void Shuffle4( ReadOnlySpan source, Span dest, TShuffle shuffle) @@ -53,7 +53,7 @@ namespace SixLabors.ImageSharp VerifyShuffleSpanInput(source, dest); #if SUPPORTS_RUNTIME_INTRINSICS - HwIntrinsics.Shuffle4ChannelReduce(ref source, ref dest, shuffle.Control); + HwIntrinsics.Shuffle4Reduce(ref source, ref dest, shuffle.Control); #endif // Deal with the remainder: @@ -63,7 +63,26 @@ namespace SixLabors.ImageSharp } } - public static void ShuffleRemainder4Channel( + [MethodImpl(InliningOptions.ShortMethod)] + public static void Pad3Shuffle4( + ReadOnlySpan source, + Span dest, + byte control) + { + VerifyPadShuffleSpanInput(source, dest); + +#if SUPPORTS_RUNTIME_INTRINSICS + HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref dest, control); +#endif + + // Deal with the remainder: + if (source.Length > 0) + { + Pad3Shuffle4Remainder(source, dest, control); + } + } + + public static void Shuffle4Remainder( ReadOnlySpan source, Span dest, byte control) @@ -81,6 +100,24 @@ namespace SixLabors.ImageSharp } } + public static void Pad3Shuffle4Remainder( + ReadOnlySpan source, + Span dest, + byte control) + { + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); + Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0); + + for (int i = 0, j = 0; i < dest.Length; i += 4, j += 3) + { + Unsafe.Add(ref dBase, p0 + i) = Unsafe.Add(ref sBase, j); + Unsafe.Add(ref dBase, p1 + i) = Unsafe.Add(ref sBase, j + 1); + Unsafe.Add(ref dBase, p2 + i) = Unsafe.Add(ref sBase, j + 2); + Unsafe.Add(ref dBase, p3 + i) = byte.MaxValue; + } + } + [Conditional("DEBUG")] private static void VerifyShuffleSpanInput(ReadOnlySpan source, Span dest) where T : struct @@ -96,6 +133,20 @@ namespace SixLabors.ImageSharp "Input spans must be divisiable by 4!"); } + [Conditional("DEBUG")] + private static void VerifyPadShuffleSpanInput(ReadOnlySpan source, Span dest) + { + DebugGuard.IsTrue( + source.Length == (int)(dest.Length * 3 / 4F), + nameof(source), + "Input spans must be 3/4 the length of the output span!"); + + DebugGuard.IsTrue( + source.Length % 3 == 0, + nameof(source), + "Input spans must be divisiable by 3!"); + } + public static class Shuffle { [MethodImpl(InliningOptions.ShortMethod)] diff --git a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs index ab9011a5c..5afd369be 100644 --- a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs +++ b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs @@ -27,7 +27,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToArgb32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, default); + => SimdUtils.Shuffle4(source, dest, default); /// /// Converts a representing a collection of @@ -36,7 +36,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgra32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, default); + => SimdUtils.Shuffle4(source, dest, default); } public static class FromArgb32 @@ -48,7 +48,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgba32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, default); + => SimdUtils.Shuffle4(source, dest, default); /// /// Converts a representing a collection of @@ -57,7 +57,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgra32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, default); + => SimdUtils.Shuffle4(source, dest, default); } public static class FromBgra32 @@ -69,7 +69,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToArgb32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, default); + => SimdUtils.Shuffle4(source, dest, default); /// /// Converts a representing a collection of @@ -78,7 +78,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgba32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, default); + => SimdUtils.Shuffle4(source, dest, default); } } } diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs new file mode 100644 index 000000000..c529b2af1 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs @@ -0,0 +1,67 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using BenchmarkDotNet.Attributes; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class Pad3Shuffle4Channel + { + private byte[] source; + private byte[] destination; + + [GlobalSetup] + public void Setup() + { + this.source = new byte[this.Count]; + new Random(this.Count).NextBytes(this.source); + this.destination = new byte[this.Count]; + } + + [Params(96, 384, 768, 1536)] + public int Count { get; set; } + + [Benchmark] + public void Shuffle4Channel() + { + SimdUtils.Shuffle4(this.source, this.destination, default); + } + } + + // 2020-10-29 + // ########## + // + // BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1) + // Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores + // .NET Core SDK=3.1.403 + // [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // + // Runtime=.NET Core 3.1 + // + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |---------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:| + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 17.39 ns | 0.187 ns | 0.175 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 128 | 21.72 ns | 0.299 ns | 0.279 ns | 1.25 | 0.02 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 128 | 18.10 ns | 0.346 ns | 0.289 ns | 1.04 | 0.02 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 35.51 ns | 0.711 ns | 0.790 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 256 | 23.90 ns | 0.508 ns | 0.820 ns | 0.69 | 0.02 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 256 | 20.40 ns | 0.133 ns | 0.111 ns | 0.57 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 73.39 ns | 0.310 ns | 0.259 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 512 | 26.10 ns | 0.418 ns | 0.391 ns | 0.36 | 0.01 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 512 | 27.59 ns | 0.556 ns | 0.571 ns | 0.38 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 150.64 ns | 2.903 ns | 2.716 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 1024 | 38.67 ns | 0.801 ns | 1.889 ns | 0.24 | 0.02 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 47.13 ns | 0.948 ns | 1.054 ns | 0.31 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 315.29 ns | 5.206 ns | 6.583 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 2048 | 57.37 ns | 1.152 ns | 1.078 ns | 0.18 | 0.01 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 65.75 ns | 1.198 ns | 1.600 ns | 0.21 | 0.01 | - | - | - | - | +} diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs index 749859eac..db4947001 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs @@ -26,7 +26,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk [Benchmark] public void Shuffle4Channel() { - SimdUtils.Shuffle4Channel(this.source, this.destination, default); + SimdUtils.Shuffle4(this.source, this.destination, default); } } diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs index 6f5b5001b..4a2512fea 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs @@ -27,7 +27,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk [Benchmark] public void Shuffle4Channel() { - SimdUtils.Shuffle4Channel(this.source, this.destination, control); + SimdUtils.Shuffle4(this.source, this.destination, control); } } diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs index 06f61e617..1c456e5a2 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -22,7 +22,7 @@ namespace SixLabors.ImageSharp.Tests.Common TestShuffleFloat4Channel( size, - (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, control), + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, control), control); } @@ -49,43 +49,43 @@ namespace SixLabors.ImageSharp.Tests.Common WXYZShuffle4 wxyz = default; TestShuffleByte4Channel( size, - (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, wxyz), + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wxyz), wxyz.Control); WZYXShuffle4 wzyx = default; TestShuffleByte4Channel( size, - (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, wzyx), + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wzyx), wzyx.Control); YZWXShuffle4 yzwx = default; TestShuffleByte4Channel( size, - (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, yzwx), + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yzwx), yzwx.Control); ZYXWShuffle4 zyxw = default; TestShuffleByte4Channel( size, - (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, zyxw), + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, zyxw), zyxw.Control); var xwyz = new DefaultShuffle4(2, 1, 3, 0); TestShuffleByte4Channel( size, - (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, xwyz), + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, xwyz), xwyz.Control); var yyyy = new DefaultShuffle4(1, 1, 1, 1); TestShuffleByte4Channel( size, - (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, yyyy), + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yyyy), yyyy.Control); var wwww = new DefaultShuffle4(3, 3, 3, 3); TestShuffleByte4Channel( size, - (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, wwww), + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wwww), wwww.Control); } } @@ -97,6 +97,29 @@ namespace SixLabors.ImageSharp.Tests.Common HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE); } + [Theory] + [MemberData(nameof(ArraySizesDivisibleBy3))] + public void BulkPad3Shuffle4Channel(int count) + { + static void RunTest(string serialized) + { + // No need to test multiple shuffle controls as the + // pipeline is always the same. + int size = FeatureTestRunner.Deserialize(serialized); + byte control = default(WZYXShuffle4).Control; + + TestPad3Shuffle4Channel( + size, + (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, control), + control); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + count, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE); + } + private static void TestShuffleFloat4Channel( int count, Action, Memory> convert, @@ -157,5 +180,37 @@ namespace SixLabors.ImageSharp.Tests.Common Assert.Equal(expected, result); } + + private static void TestPad3Shuffle4Channel( + int count, + Action, Memory> convert, + byte control) + { + byte[] source = new byte[count]; + new Random(count).NextBytes(source); + + var result = new byte[(int)(count * (4 / 3F))]; + + byte[] expected = new byte[result.Length]; + + SimdUtils.Shuffle.InverseMmShuffle( + control, + out int p3, + out int p2, + out int p1, + out int p0); + + for (int i = 0, j = 0; i < expected.Length; i += 4, j += 3) + { + expected[p0 + i] = source[j]; + expected[p1 + i] = source[j + 1]; + expected[p2 + i] = source[j + 2]; + expected[p3 + i] = byte.MaxValue; + } + + convert(source, result); + + Assert.Equal(expected, result); + } } } diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs index bddadff4d..fe432107a 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs @@ -163,6 +163,8 @@ namespace SixLabors.ImageSharp.Tests.Common public static readonly TheoryData ArraySizesDivisibleBy8 = new TheoryData { 0, 8, 16, 1024 }; public static readonly TheoryData ArraySizesDivisibleBy4 = new TheoryData { 0, 4, 8, 28, 1020 }; + public static readonly TheoryData ArraySizesDivisibleBy3 = new TheoryData { 0, 3, 9, 36, 957 }; + public static readonly TheoryData ArraySizesDivisibleBy32 = new TheoryData { 0, 32, 512 }; From aec8e5d690fa7ccb38fce72acc8d0bd9b26d9758 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 30 Oct 2020 23:03:47 +0000 Subject: [PATCH 087/131] Add Shuffle4Slice3 --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 77 +++++++++++++++++++ .../Common/Helpers/SimdUtils.Shuffle.cs | 72 +++++++++++++++-- .../Color/Bulk/Pad3Shuffle4Channel.cs | 47 ++++++----- .../Color/Bulk/Shuffle4Slice3Channel.cs | 68 ++++++++++++++++ .../Color/Bulk/ShuffleFloat4Channel.cs | 4 +- .../Common/SimdUtilsTests.Shuffle.cs | 64 +++++++++++++++ 6 files changed, 299 insertions(+), 33 deletions(-) create mode 100644 tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 8a0b5460c..29b569a80 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -119,6 +119,39 @@ namespace SixLabors.ImageSharp } } + /// + /// Shuffles then slices 8-bit integers within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The byte control. + [MethodImpl(InliningOptions.ShortMethod)] + public static unsafe void Shuffle4Slice3Reduce( + ref ReadOnlySpan source, + ref Span dest, + byte control) + { + if (Ssse3.IsSupported) + { + int remainder = ImageMaths.ModuloP2(dest.Length, Vector128.Count); + + int adjustedCount = dest.Length - remainder; + int destSlice = (int)(adjustedCount * (3 / 4F)); + + if (adjustedCount > 0) + { + Shuffle4Slice3( + source.Slice(0, adjustedCount), + dest.Slice(0, adjustedCount), + control); + + source = source.Slice(adjustedCount); + dest = dest.Slice(destSlice); + } + } + } + [MethodImpl(InliningOptions.ShortMethod)] private static void Shuffle4( ReadOnlySpan source, @@ -316,6 +349,50 @@ namespace SixLabors.ImageSharp } } + [MethodImpl(InliningOptions.ShortMethod)] + private static unsafe void Shuffle4Slice3( + ReadOnlySpan source, + Span dest, + byte control) + { + if (Ssse3.IsSupported) + { + Vector128 sliceMask = Vector128.Create(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, -1, -1, -1, -1).AsByte(); + + Span bytes = stackalloc byte[Vector128.Count]; + Shuffle.MmShuffleSpan(ref bytes, control); + Vector128 vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + + // var control = MmShuffle(3, 0, 1, 2); + // Span bytes = stackalloc byte[Vector128.Count]; + // MmShuffleSpan(ref bytes, control); + // Vector128 vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + // + // Vector128 s0 = Vector128.Create((byte)1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1).Dump("s0"); + // Vector128 padded = Ssse3.Shuffle(s0, padMask).Dump("padded"); + // + // padded = Sse3.Or(Vector128.Create(0xff000000u).AsByte(), padded).Dump("0r"); + // + // var shuffled = Ssse3.Shuffle(padded, vcm).Dump("shuffled"); + // var d0 = Ssse3.Shuffle(shuffled, sliceMask).Dump("d0"); + fixed (byte* sBase = &source.GetPinnableReference()) + fixed (byte* dBase = &dest.GetPinnableReference()) + { + byte* s = sBase; + byte* d = dBase; + + for (int i = 0; i < source.Length; i += 16) + { + Vector128 vs0 = Ssse3.Shuffle(Sse2.LoadVector128(s), vcm); + Sse2.Store(d, Ssse3.Shuffle(vs0, sliceMask)); + + s += 16; + d += 12; + } + } + } + } + /// /// Performs a multiplication and an addition of the . /// diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index 81d77d655..f3946361b 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -69,7 +69,7 @@ namespace SixLabors.ImageSharp Span dest, byte control) { - VerifyPadShuffleSpanInput(source, dest); + VerifyPad3Shuffle4SpanInput(source, dest); #if SUPPORTS_RUNTIME_INTRINSICS HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref dest, control); @@ -82,6 +82,25 @@ namespace SixLabors.ImageSharp } } + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle4Slice3( + ReadOnlySpan source, + Span dest, + byte control) + { + VerifyShuffle4Slice3SpanInput(source, dest); + +#if SUPPORTS_RUNTIME_INTRINSICS + HwIntrinsics.Shuffle4Slice3Reduce(ref source, ref dest, control); +#endif + + // Deal with the remainder: + if (source.Length > 0) + { + Shuffle4Slice3Remainder(source, dest, control); + } + } + public static void Shuffle4Remainder( ReadOnlySpan source, Span dest, @@ -118,6 +137,23 @@ namespace SixLabors.ImageSharp } } + public static void Shuffle4Slice3Remainder( + ReadOnlySpan source, + Span dest, + byte control) + { + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); + Shuffle.InverseMmShuffle(control, out int _, out int p2, out int p1, out int p0); + + for (int i = 0, j = 0; i < dest.Length; i += 3, j += 4) + { + Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + j); + Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + j); + Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + j); + } + } + [Conditional("DEBUG")] private static void VerifyShuffleSpanInput(ReadOnlySpan source, Span dest) where T : struct @@ -130,21 +166,45 @@ namespace SixLabors.ImageSharp DebugGuard.IsTrue( source.Length % 4 == 0, nameof(source), - "Input spans must be divisiable by 4!"); + "Input spans must be divisable by 4!"); } [Conditional("DEBUG")] - private static void VerifyPadShuffleSpanInput(ReadOnlySpan source, Span dest) + private static void VerifyPad3Shuffle4SpanInput(ReadOnlySpan source, Span dest) { + DebugGuard.IsTrue( + source.Length % 3 == 0, + nameof(source), + "Input span must be divisable by 3!"); + + DebugGuard.IsTrue( + dest.Length % 4 == 0, + nameof(dest), + "Output span must be divisable by 4!"); + DebugGuard.IsTrue( source.Length == (int)(dest.Length * 3 / 4F), nameof(source), - "Input spans must be 3/4 the length of the output span!"); + "Input span must be 3/4 the length of the output span!"); + } + [Conditional("DEBUG")] + private static void VerifyShuffle4Slice3SpanInput(ReadOnlySpan source, Span dest) + { DebugGuard.IsTrue( - source.Length % 3 == 0, + source.Length % 4 == 0, + nameof(source), + "Input span must be divisable by 4!"); + + DebugGuard.IsTrue( + dest.Length % 3 == 0, + nameof(dest), + "Output span must be divisable by 3!"); + + DebugGuard.IsTrue( + source.Length == (int)(dest.Length * 4 / 3F), nameof(source), - "Input spans must be divisiable by 3!"); + "Output span must be 3/4 the length of the input span!"); } public static class Shuffle diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs index c529b2af1..8286fea0e 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs @@ -9,6 +9,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk [Config(typeof(Config.HwIntrinsics_SSE_AVX))] public class Pad3Shuffle4Channel { + private static readonly byte Control = default(WXYZShuffle4).Control; private byte[] source; private byte[] destination; @@ -17,20 +18,20 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk { this.source = new byte[this.Count]; new Random(this.Count).NextBytes(this.source); - this.destination = new byte[this.Count]; + this.destination = new byte[(int)(this.Count * (4 / 3F))]; } [Params(96, 384, 768, 1536)] public int Count { get; set; } [Benchmark] - public void Shuffle4Channel() + public void Pad3Shuffle4() { - SimdUtils.Shuffle4(this.source, this.destination, default); + SimdUtils.Pad3Shuffle4(this.source, this.destination, Control); } } - // 2020-10-29 + // 2020-10-30 // ########## // // BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1) @@ -43,25 +44,21 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk // // Runtime=.NET Core 3.1 // - // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | - // |---------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:| - // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 17.39 ns | 0.187 ns | 0.175 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | 2. AVX | Empty | 128 | 21.72 ns | 0.299 ns | 0.279 ns | 1.25 | 0.02 | - | - | - | - | - // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 128 | 18.10 ns | 0.346 ns | 0.289 ns | 1.04 | 0.02 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 35.51 ns | 0.711 ns | 0.790 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | 2. AVX | Empty | 256 | 23.90 ns | 0.508 ns | 0.820 ns | 0.69 | 0.02 | - | - | - | - | - // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 256 | 20.40 ns | 0.133 ns | 0.111 ns | 0.57 | 0.01 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 73.39 ns | 0.310 ns | 0.259 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | 2. AVX | Empty | 512 | 26.10 ns | 0.418 ns | 0.391 ns | 0.36 | 0.01 | - | - | - | - | - // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 512 | 27.59 ns | 0.556 ns | 0.571 ns | 0.38 | 0.01 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 150.64 ns | 2.903 ns | 2.716 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | 2. AVX | Empty | 1024 | 38.67 ns | 0.801 ns | 1.889 ns | 0.24 | 0.02 | - | - | - | - | - // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 47.13 ns | 0.948 ns | 1.054 ns | 0.31 | 0.01 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 315.29 ns | 5.206 ns | 6.583 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | 2. AVX | Empty | 2048 | 57.37 ns | 1.152 ns | 1.078 ns | 0.18 | 0.01 | - | - | - | - | - // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 65.75 ns | 1.198 ns | 1.600 ns | 0.21 | 0.01 | - | - | - | - | + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:| + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 62.91 ns | 1.240 ns | 1.569 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 96 | 44.34 ns | 0.371 ns | 0.329 ns | 0.70 | 0.02 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 96 | 44.46 ns | 0.617 ns | 0.515 ns | 0.70 | 0.02 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 247.93 ns | 2.640 ns | 2.470 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 384 | 92.91 ns | 1.204 ns | 1.127 ns | 0.37 | 0.01 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 384 | 91.42 ns | 1.234 ns | 1.094 ns | 0.37 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 444.79 ns | 5.094 ns | 4.254 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 768 | 162.92 ns | 1.046 ns | 0.873 ns | 0.37 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 768 | 166.22 ns | 1.728 ns | 1.443 ns | 0.37 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 882.51 ns | 6.936 ns | 5.792 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 1536 | 309.72 ns | 3.777 ns | 3.533 ns | 0.35 | 0.01 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 323.18 ns | 4.079 ns | 3.816 ns | 0.37 | 0.00 | - | - | - | - | } diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs new file mode 100644 index 000000000..b64379959 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs @@ -0,0 +1,68 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using BenchmarkDotNet.Attributes; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class Shuffle4Slice3Channel + { + private static readonly byte Control = default(WXYZShuffle4).Control; + private byte[] source; + private byte[] destination; + + [GlobalSetup] + public void Setup() + { + this.source = new byte[this.Count]; + new Random(this.Count).NextBytes(this.source); + this.destination = new byte[(int)(this.Count * (3 / 4F))]; + } + + [Params(128, 256, 512, 1024, 2048)] + public int Count { get; set; } + + [Benchmark] + public void Shuffle4Slice3() + { + SimdUtils.Shuffle4Slice3(this.source, this.destination, Control); + } + } + + // 2020-10-29 + // ########## + // + // BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1) + // Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores + // .NET Core SDK=3.1.403 + // [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // + // Runtime=.NET Core 3.1 + // + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |---------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|----------:|------:|--------:|------:|------:|------:|----------:| + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 50.09 ns | 1.018 ns | 1.460 ns | 49.16 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 128 | 35.28 ns | 0.106 ns | 0.089 ns | 35.30 ns | 0.69 | 0.02 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 128 | 35.13 ns | 0.247 ns | 0.231 ns | 35.22 ns | 0.69 | 0.02 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 101.48 ns | 0.875 ns | 0.819 ns | 101.60 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 256 | 53.25 ns | 0.518 ns | 0.433 ns | 53.21 ns | 0.52 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 256 | 57.21 ns | 0.508 ns | 0.451 ns | 57.38 ns | 0.56 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 202.53 ns | 0.884 ns | 0.827 ns | 202.40 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 512 | 82.55 ns | 0.418 ns | 0.391 ns | 82.59 ns | 0.41 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 512 | 82.89 ns | 1.057 ns | 0.989 ns | 82.48 ns | 0.41 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 398.79 ns | 7.807 ns | 6.921 ns | 395.67 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 1024 | 144.51 ns | 1.033 ns | 0.966 ns | 144.42 ns | 0.36 | 0.01 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 143.77 ns | 0.820 ns | 0.684 ns | 143.62 ns | 0.36 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 798.44 ns | 4.447 ns | 3.472 ns | 799.39 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 2048 | 277.12 ns | 1.723 ns | 1.612 ns | 276.93 ns | 0.35 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 275.70 ns | 1.796 ns | 1.500 ns | 275.51 ns | 0.35 | 0.00 | - | - | - | - || +} diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs index 4a2512fea..86b1f766e 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs @@ -10,7 +10,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk [Config(typeof(Config.HwIntrinsics_SSE_AVX))] public class ShuffleFloat4Channel { - private static readonly byte control = default(WXYZShuffle4).Control; + private static readonly byte Control = default(WXYZShuffle4).Control; private float[] source; private float[] destination; @@ -27,7 +27,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk [Benchmark] public void Shuffle4Channel() { - SimdUtils.Shuffle4(this.source, this.destination, control); + SimdUtils.Shuffle4(this.source, this.destination, Control); } } diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs index 1c456e5a2..f801cd28b 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -120,6 +120,29 @@ namespace SixLabors.ImageSharp.Tests.Common HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE); } + [Theory] + [MemberData(nameof(ArraySizesDivisibleBy4))] + public void BulkShuffle4Slice3Channel(int count) + { + static void RunTest(string serialized) + { + // No need to test multiple shuffle controls as the + // pipeline is always the same. + int size = FeatureTestRunner.Deserialize(serialized); + byte control = default(WZYXShuffle4).Control; + + TestShuffle4Slice3Channel( + size, + (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, control), + control); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + count, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE); + } + private static void TestShuffleFloat4Channel( int count, Action, Memory> convert, @@ -210,6 +233,47 @@ namespace SixLabors.ImageSharp.Tests.Common convert(source, result); + for (int i = 0; i < expected.Length; i++) + { + Assert.Equal(expected[i], result[i]); + } + + Assert.Equal(expected, result); + } + + private static void TestShuffle4Slice3Channel( + int count, + Action, Memory> convert, + byte control) + { + byte[] source = new byte[count]; + new Random(count).NextBytes(source); + + var result = new byte[(int)(count * (3 / 4F))]; + + byte[] expected = new byte[result.Length]; + + SimdUtils.Shuffle.InverseMmShuffle( + control, + out int _, + out int p2, + out int p1, + out int p0); + + for (int i = 0, j = 0; i < expected.Length; i += 3, j += 4) + { + expected[i] = source[p0 + j]; + expected[i + 1] = source[p1 + j]; + expected[i + 2] = source[p2 + j]; + } + + convert(source, result); + + for (int i = 0; i < expected.Length; i++) + { + Assert.Equal(expected[i], result[i]); + } + Assert.Equal(expected, result); } } From 5d401ac902ef753316d0d404d2af69de84092d81 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 30 Oct 2020 23:22:17 +0000 Subject: [PATCH 088/131] Cleanup --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 29b569a80..5cba631ec 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -363,18 +363,6 @@ namespace SixLabors.ImageSharp Shuffle.MmShuffleSpan(ref bytes, control); Vector128 vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); - // var control = MmShuffle(3, 0, 1, 2); - // Span bytes = stackalloc byte[Vector128.Count]; - // MmShuffleSpan(ref bytes, control); - // Vector128 vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); - // - // Vector128 s0 = Vector128.Create((byte)1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1).Dump("s0"); - // Vector128 padded = Ssse3.Shuffle(s0, padMask).Dump("padded"); - // - // padded = Sse3.Or(Vector128.Create(0xff000000u).AsByte(), padded).Dump("0r"); - // - // var shuffled = Ssse3.Shuffle(padded, vcm).Dump("shuffled"); - // var d0 = Ssse3.Shuffle(shuffled, sliceMask).Dump("d0"); fixed (byte* sBase = &source.GetPinnableReference()) fixed (byte* dBase = &dest.GetPinnableReference()) { From bc647a402ace71e64a4c9b32944cafdd7359950f Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sat, 31 Oct 2020 00:23:38 +0000 Subject: [PATCH 089/131] fix spans directly --- src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 5cba631ec..d6e45026b 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -327,8 +327,8 @@ namespace SixLabors.ImageSharp Shuffle.MmShuffleSpan(ref bytes, control); Vector128 vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); - fixed (byte* sBase = &source.GetPinnableReference()) - fixed (byte* dBase = &dest.GetPinnableReference()) + fixed (byte* sBase = source) + fixed (byte* dBase = dest) { byte* s = sBase; byte* d = dBase; @@ -363,8 +363,8 @@ namespace SixLabors.ImageSharp Shuffle.MmShuffleSpan(ref bytes, control); Vector128 vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); - fixed (byte* sBase = &source.GetPinnableReference()) - fixed (byte* dBase = &dest.GetPinnableReference()) + fixed (byte* sBase = source) + fixed (byte* dBase = dest) { byte* s = sBase; byte* d = dBase; From d933ed6480992a81744fdd8322f57adf0e0d173c Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sat, 31 Oct 2020 18:58:59 +0000 Subject: [PATCH 090/131] Faster Pad3Shuffle4 --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 58 ++++++++++--------- .../Common/Helpers/SimdUtils.Shuffle.cs | 2 +- .../Color/Bulk/Pad3Shuffle4Channel.cs | 34 +++++------ .../Common/SimdUtilsTests.Shuffle.cs | 4 +- 4 files changed, 52 insertions(+), 46 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index d6e45026b..5083a3c03 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -101,20 +101,20 @@ namespace SixLabors.ImageSharp { if (Ssse3.IsSupported) { - int remainder = ImageMaths.ModuloP2(source.Length, Vector128.Count); + int remainder = source.Length % (Vector128.Count * 3); - int adjustedCount = source.Length - remainder; - int sourceSlice = (int)(adjustedCount * (3 / 4F)); + int sourceCount = source.Length - remainder; + int destCount = (int)(sourceCount * (4 / 3D)); - if (adjustedCount > 0) + if (sourceCount > 0) { Pad3Shuffle4( - source.Slice(0, adjustedCount), - dest.Slice(0, adjustedCount), + source.Slice(0, sourceCount), + dest.Slice(0, destCount), control); - source = source.Slice(sourceSlice); - dest = dest.Slice(adjustedCount); + source = source.Slice(sourceCount); + dest = dest.Slice(destCount); } } } @@ -320,31 +320,37 @@ namespace SixLabors.ImageSharp { if (Ssse3.IsSupported) { - Vector128 wMask = Vector128.Create(0xff000000u).AsByte(); - Vector128 padMask = Vector128.Create(0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11, -1).AsByte(); + Vector128 vmask = Vector128.Create(0, 1, 2, 0x80, 3, 4, 5, 0x80, 6, 7, 8, 0x80, 9, 10, 11, 0x80).AsByte(); + Vector128 vfill = Vector128.Create(0xff000000ff000000ul).AsByte(); Span bytes = stackalloc byte[Vector128.Count]; Shuffle.MmShuffleSpan(ref bytes, control); - Vector128 vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); - fixed (byte* sBase = source) - fixed (byte* dBase = dest) + ref Vector128 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector128 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + int n = source.Length / Vector128.Count; + + for (int i = 0, j = 0; i < n; i += 3, j += 4) { - byte* s = sBase; - byte* d = dBase; + ref Vector128 v0 = ref Unsafe.Add(ref sourceBase, i); + Vector128 v1 = Unsafe.Add(ref v0, 1); + Vector128 v2 = Unsafe.Add(ref v0, 2); + Vector128 v3 = Sse2.ShiftRightLogical128BitLane(v2, 4); - // TODO: Consider unrolling and shuffling 4 at a time using Ssse3.AlignRight - // See https://stackoverflow.com/questions/2973708/fast-24-bit-array-32-bit-array-conversion - for (int i = 0; i < source.Length; i += 16) - { - Vector128 vs0 = Sse2.LoadVector128(s); - Vector128 val = Sse2.Or(wMask, Ssse3.Shuffle(vs0, padMask)); - val = Ssse3.Shuffle(val, vcm); - Sse2.Store(d, val); + v2 = Ssse3.AlignRight(v2, v1, 8); + v1 = Ssse3.AlignRight(v1, v0, 12); - s += 12; - d += 16; - } + ref Vector128 vd = ref Unsafe.Add(ref destBase, j); + + vd = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v0, vmask), vfill), vshuffle); + Unsafe.Add(ref vd, 1) = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v1, vmask), vfill), vshuffle); + Unsafe.Add(ref vd, 2) = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v2, vmask), vfill), vshuffle); + Unsafe.Add(ref vd, 3) = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v3, vmask), vfill), vshuffle); } } } diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index f3946361b..54ca2a73e 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -183,7 +183,7 @@ namespace SixLabors.ImageSharp "Output span must be divisable by 4!"); DebugGuard.IsTrue( - source.Length == (int)(dest.Length * 3 / 4F), + source.Length == (int)(dest.Length * 3 / 4D), nameof(source), "Input span must be 3/4 the length of the output span!"); } diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs index 8286fea0e..9eb1e109b 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs @@ -44,21 +44,21 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk // // Runtime=.NET Core 3.1 // - // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | - // |------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:| - // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 62.91 ns | 1.240 ns | 1.569 ns | 1.00 | 0.00 | - | - | - | - | - // | Pad3Shuffle4 | 2. AVX | Empty | 96 | 44.34 ns | 0.371 ns | 0.329 ns | 0.70 | 0.02 | - | - | - | - | - // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 96 | 44.46 ns | 0.617 ns | 0.515 ns | 0.70 | 0.02 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 247.93 ns | 2.640 ns | 2.470 ns | 1.00 | 0.00 | - | - | - | - | - // | Pad3Shuffle4 | 2. AVX | Empty | 384 | 92.91 ns | 1.204 ns | 1.127 ns | 0.37 | 0.01 | - | - | - | - | - // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 384 | 91.42 ns | 1.234 ns | 1.094 ns | 0.37 | 0.01 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 444.79 ns | 5.094 ns | 4.254 ns | 1.00 | 0.00 | - | - | - | - | - // | Pad3Shuffle4 | 2. AVX | Empty | 768 | 162.92 ns | 1.046 ns | 0.873 ns | 0.37 | 0.00 | - | - | - | - | - // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 768 | 166.22 ns | 1.728 ns | 1.443 ns | 0.37 | 0.00 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 882.51 ns | 6.936 ns | 5.792 ns | 1.00 | 0.00 | - | - | - | - | - // | Pad3Shuffle4 | 2. AVX | Empty | 1536 | 309.72 ns | 3.777 ns | 3.533 ns | 0.35 | 0.01 | - | - | - | - | - // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 323.18 ns | 4.079 ns | 3.816 ns | 0.37 | 0.00 | - | - | - | - | + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |------------- |------------------- |-------------------------------------------------- |------ |----------:|----------:|---------:|------:|--------:|------:|------:|------:|----------:| + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 71.84 ns | 1.491 ns | 3.146 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 96 | 23.14 ns | 0.186 ns | 0.165 ns | 0.33 | 0.02 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 96 | 22.94 ns | 0.127 ns | 0.112 ns | 0.33 | 0.02 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 230.90 ns | 0.877 ns | 0.777 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 384 | 39.54 ns | 0.601 ns | 0.533 ns | 0.17 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 384 | 39.88 ns | 0.657 ns | 0.548 ns | 0.17 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 451.67 ns | 2.932 ns | 2.448 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 768 | 60.79 ns | 1.200 ns | 1.561 ns | 0.13 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 768 | 62.30 ns | 0.469 ns | 0.416 ns | 0.14 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 896.11 ns | 10.358 ns | 9.689 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 1536 | 109.72 ns | 2.149 ns | 2.300 ns | 0.12 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 108.70 ns | 0.994 ns | 0.881 ns | 0.12 | 0.00 | - | - | - | - | } diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs index f801cd28b..26f85dd76 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -212,7 +212,7 @@ namespace SixLabors.ImageSharp.Tests.Common byte[] source = new byte[count]; new Random(count).NextBytes(source); - var result = new byte[(int)(count * (4 / 3F))]; + var result = new byte[(int)(count * (4 / 3D))]; byte[] expected = new byte[result.Length]; @@ -249,7 +249,7 @@ namespace SixLabors.ImageSharp.Tests.Common byte[] source = new byte[count]; new Random(count).NextBytes(source); - var result = new byte[(int)(count * (3 / 4F))]; + var result = new byte[(int)(count * (3 / 4D))]; byte[] expected = new byte[result.Length]; From 0392e082ab8345614e8c39482f44e29130eff0e9 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sat, 31 Oct 2020 21:35:23 +0000 Subject: [PATCH 091/131] Faster Shuffle4Slice3 --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 88 ++++++++++++------- .../Common/Helpers/SimdUtils.Shuffle.cs | 4 +- 2 files changed, 56 insertions(+), 36 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 5083a3c03..abda6c4df 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -104,7 +104,7 @@ namespace SixLabors.ImageSharp int remainder = source.Length % (Vector128.Count * 3); int sourceCount = source.Length - remainder; - int destCount = (int)(sourceCount * (4 / 3D)); + int destCount = sourceCount * 4 / 3; if (sourceCount > 0) { @@ -134,20 +134,20 @@ namespace SixLabors.ImageSharp { if (Ssse3.IsSupported) { - int remainder = ImageMaths.ModuloP2(dest.Length, Vector128.Count); + int remainder = source.Length % (Vector128.Count * 4); - int adjustedCount = dest.Length - remainder; - int destSlice = (int)(adjustedCount * (3 / 4F)); + int sourceCount = source.Length - remainder; + int destCount = sourceCount * 3 / 4; - if (adjustedCount > 0) + if (sourceCount > 0) { Shuffle4Slice3( - source.Slice(0, adjustedCount), - dest.Slice(0, adjustedCount), + source.Slice(0, sourceCount), + dest.Slice(0, destCount), control); - source = source.Slice(adjustedCount); - dest = dest.Slice(destSlice); + source = source.Slice(sourceCount); + dest = dest.Slice(destCount); } } } @@ -243,7 +243,7 @@ namespace SixLabors.ImageSharp // We can add static ROS instances if need be in the future. Span bytes = stackalloc byte[Vector256.Count]; Shuffle.MmShuffleSpan(ref bytes, control); - Vector256 vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + Vector256 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); @@ -260,17 +260,17 @@ namespace SixLabors.ImageSharp ref Vector256 vs0 = ref Unsafe.Add(ref sourceBase, i); ref Vector256 vd0 = ref Unsafe.Add(ref destBase, i); - vd0 = Avx2.Shuffle(vs0, vcm); - Unsafe.Add(ref vd0, 1) = Avx2.Shuffle(Unsafe.Add(ref vs0, 1), vcm); - Unsafe.Add(ref vd0, 2) = Avx2.Shuffle(Unsafe.Add(ref vs0, 2), vcm); - Unsafe.Add(ref vd0, 3) = Avx2.Shuffle(Unsafe.Add(ref vs0, 3), vcm); + vd0 = Avx2.Shuffle(vs0, vshuffle); + Unsafe.Add(ref vd0, 1) = Avx2.Shuffle(Unsafe.Add(ref vs0, 1), vshuffle); + Unsafe.Add(ref vd0, 2) = Avx2.Shuffle(Unsafe.Add(ref vs0, 2), vshuffle); + Unsafe.Add(ref vd0, 3) = Avx2.Shuffle(Unsafe.Add(ref vs0, 3), vshuffle); } if (m > 0) { for (int i = u; i < n; i++) { - Unsafe.Add(ref destBase, i) = Avx2.Shuffle(Unsafe.Add(ref sourceBase, i), vcm); + Unsafe.Add(ref destBase, i) = Avx2.Shuffle(Unsafe.Add(ref sourceBase, i), vshuffle); } } } @@ -279,7 +279,7 @@ namespace SixLabors.ImageSharp // Ssse3 Span bytes = stackalloc byte[Vector128.Count]; Shuffle.MmShuffleSpan(ref bytes, control); - Vector128 vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); ref Vector128 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); @@ -296,17 +296,17 @@ namespace SixLabors.ImageSharp ref Vector128 vs0 = ref Unsafe.Add(ref sourceBase, i); ref Vector128 vd0 = ref Unsafe.Add(ref destBase, i); - vd0 = Ssse3.Shuffle(vs0, vcm); - Unsafe.Add(ref vd0, 1) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 1), vcm); - Unsafe.Add(ref vd0, 2) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 2), vcm); - Unsafe.Add(ref vd0, 3) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 3), vcm); + vd0 = Ssse3.Shuffle(vs0, vshuffle); + Unsafe.Add(ref vd0, 1) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 1), vshuffle); + Unsafe.Add(ref vd0, 2) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 2), vshuffle); + Unsafe.Add(ref vd0, 3) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 3), vshuffle); } if (m > 0) { for (int i = u; i < n; i++) { - Unsafe.Add(ref destBase, i) = Ssse3.Shuffle(Unsafe.Add(ref sourceBase, i), vcm); + Unsafe.Add(ref destBase, i) = Ssse3.Shuffle(Unsafe.Add(ref sourceBase, i), vshuffle); } } } @@ -363,26 +363,46 @@ namespace SixLabors.ImageSharp { if (Ssse3.IsSupported) { - Vector128 sliceMask = Vector128.Create(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, -1, -1, -1, -1).AsByte(); + Vector128 vmasko = Vector128.Create(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15).AsByte(); + Vector128 vmaske = Ssse3.AlignRight(vmasko, vmasko, 12).AsByte(); Span bytes = stackalloc byte[Vector128.Count]; Shuffle.MmShuffleSpan(ref bytes, control); - Vector128 vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + + ref Vector128 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); - fixed (byte* sBase = source) - fixed (byte* dBase = dest) + ref Vector128 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + int n = source.Length / Vector128.Count; + + for (int i = 0, j = 0; i < n; i += 4, j += 3) { - byte* s = sBase; - byte* d = dBase; + ref Vector128 v0 = ref Unsafe.Add(ref sourceBase, i); + Vector128 v1 = Unsafe.Add(ref v0, 1); + Vector128 v2 = Unsafe.Add(ref v0, 2); + Vector128 v3 = Unsafe.Add(ref v0, 3); - for (int i = 0; i < source.Length; i += 16) - { - Vector128 vs0 = Ssse3.Shuffle(Sse2.LoadVector128(s), vcm); - Sse2.Store(d, Ssse3.Shuffle(vs0, sliceMask)); + v0 = Ssse3.Shuffle(Ssse3.Shuffle(v0, vshuffle), vmaske); + v1 = Ssse3.Shuffle(Ssse3.Shuffle(v1, vshuffle), vmasko); + v2 = Ssse3.Shuffle(Ssse3.Shuffle(v2, vshuffle), vmaske); + v3 = Ssse3.Shuffle(Ssse3.Shuffle(v3, vshuffle), vmasko); - s += 16; - d += 12; - } + v0 = Ssse3.AlignRight(v1, v0, 4); + v3 = Ssse3.AlignRight(v3, v2, 12); + + v1 = Sse2.ShiftLeftLogical128BitLane(v1, 4); + v2 = Sse2.ShiftRightLogical128BitLane(v2, 4); + + v1 = Ssse3.AlignRight(v2, v1, 8); + + ref Vector128 vd = ref Unsafe.Add(ref destBase, j); + + vd = v0; + Unsafe.Add(ref vd, 1) = v1; + Unsafe.Add(ref vd, 2) = v3; } } } diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index 54ca2a73e..61c1ce48e 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -183,7 +183,7 @@ namespace SixLabors.ImageSharp "Output span must be divisable by 4!"); DebugGuard.IsTrue( - source.Length == (int)(dest.Length * 3 / 4D), + source.Length == dest.Length * 3 / 4, nameof(source), "Input span must be 3/4 the length of the output span!"); } @@ -202,7 +202,7 @@ namespace SixLabors.ImageSharp "Output span must be divisable by 3!"); DebugGuard.IsTrue( - source.Length == (int)(dest.Length * 4 / 3F), + source.Length == dest.Length * 4 / 3, nameof(source), "Output span must be 3/4 the length of the input span!"); } From becb9f11379bf54776341b3651a0f750ac4cfa8f Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sat, 31 Oct 2020 21:38:45 +0000 Subject: [PATCH 092/131] Update benchmark --- .../Color/Bulk/Shuffle4Slice3Channel.cs | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs index b64379959..e0fbe1c0b 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs @@ -44,25 +44,25 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk // // Runtime=.NET Core 3.1 // - // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | - // |---------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|----------:|------:|--------:|------:|------:|------:|----------:| - // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 50.09 ns | 1.018 ns | 1.460 ns | 49.16 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | 2. AVX | Empty | 128 | 35.28 ns | 0.106 ns | 0.089 ns | 35.30 ns | 0.69 | 0.02 | - | - | - | - | - // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 128 | 35.13 ns | 0.247 ns | 0.231 ns | 35.22 ns | 0.69 | 0.02 | - | - | - | - | - // | | | | | | | | | | | | | | | - // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 101.48 ns | 0.875 ns | 0.819 ns | 101.60 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | 2. AVX | Empty | 256 | 53.25 ns | 0.518 ns | 0.433 ns | 53.21 ns | 0.52 | 0.00 | - | - | - | - | - // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 256 | 57.21 ns | 0.508 ns | 0.451 ns | 57.38 ns | 0.56 | 0.01 | - | - | - | - | - // | | | | | | | | | | | | | | | - // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 202.53 ns | 0.884 ns | 0.827 ns | 202.40 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | 2. AVX | Empty | 512 | 82.55 ns | 0.418 ns | 0.391 ns | 82.59 ns | 0.41 | 0.00 | - | - | - | - | - // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 512 | 82.89 ns | 1.057 ns | 0.989 ns | 82.48 ns | 0.41 | 0.00 | - | - | - | - | - // | | | | | | | | | | | | | | | - // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 398.79 ns | 7.807 ns | 6.921 ns | 395.67 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | 2. AVX | Empty | 1024 | 144.51 ns | 1.033 ns | 0.966 ns | 144.42 ns | 0.36 | 0.01 | - | - | - | - | - // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 143.77 ns | 0.820 ns | 0.684 ns | 143.62 ns | 0.36 | 0.01 | - | - | - | - | - // | | | | | | | | | | | | | | | - // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 798.44 ns | 4.447 ns | 3.472 ns | 799.39 ns | 1.00 | 0.00 | - | - | - | - | - // | Shuffle4Channel | 2. AVX | Empty | 2048 | 277.12 ns | 1.723 ns | 1.612 ns | 276.93 ns | 0.35 | 0.00 | - | - | - | - | - // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 275.70 ns | 1.796 ns | 1.500 ns | 275.51 ns | 0.35 | 0.00 | - | - | - | - || + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |--------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|------:|------:|------:|----------:| + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 52.24 ns | 1.081 ns | 1.062 ns | 1.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 128 | 25.52 ns | 0.189 ns | 0.158 ns | 0.49 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 128 | 26.11 ns | 0.524 ns | 0.644 ns | 0.50 | - | - | - | - | + // | | | | | | | | | | | | | + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 101.09 ns | 0.733 ns | 0.612 ns | 1.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 256 | 32.65 ns | 0.674 ns | 1.198 ns | 0.33 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 256 | 32.76 ns | 0.656 ns | 0.853 ns | 0.32 | - | - | - | - | + // | | | | | | | | | | | | | + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 209.58 ns | 3.826 ns | 5.957 ns | 1.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 512 | 46.32 ns | 0.729 ns | 1.296 ns | 0.22 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 512 | 46.97 ns | 0.196 ns | 0.183 ns | 0.22 | - | - | - | - | + // | | | | | | | | | | | | | + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 406.39 ns | 7.493 ns | 6.257 ns | 1.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 1024 | 74.53 ns | 1.509 ns | 1.678 ns | 0.18 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 74.04 ns | 0.703 ns | 0.657 ns | 0.18 | - | - | - | - | + // | | | | | | | | | | | | | + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 796.80 ns | 6.476 ns | 5.741 ns | 1.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 2048 | 130.70 ns | 2.512 ns | 2.227 ns | 0.16 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 129.42 ns | 2.555 ns | 2.133 ns | 0.16 | - | - | - | - | } From 50e30c3c42dc9a24a37d4c07e341c633f3602352 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 1 Nov 2020 23:36:12 +0000 Subject: [PATCH 093/131] Fast fallbacks --- .../{ => Shuffle}/IComponentShuffle.cs | 40 ++-- .../Common/Helpers/Shuffle/IPad3Shuffle4.cs | 96 ++++++++++ .../Common/Helpers/Shuffle/IShuffle4Slice3.cs | 74 ++++++++ .../Common/Helpers/SimdUtils.Shuffle.cs | 69 +++---- .../PixelFormats/Utils/PixelConverter.cs | 118 ++++++++++++ .../Color/Bulk/Pad3Shuffle4Channel.cs | 61 ++++-- .../Color/Bulk/Shuffle4Slice3Channel.cs | 71 ++++--- .../Common/SimdUtilsTests.Shuffle.cs | 173 +++++++++++------- 8 files changed, 541 insertions(+), 161 deletions(-) rename src/ImageSharp/Common/Helpers/{ => Shuffle}/IComponentShuffle.cs (85%) create mode 100644 src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs create mode 100644 src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs diff --git a/src/ImageSharp/Common/Helpers/IComponentShuffle.cs b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs similarity index 85% rename from src/ImageSharp/Common/Helpers/IComponentShuffle.cs rename to src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs index e354a57b0..803321d06 100644 --- a/src/ImageSharp/Common/Helpers/IComponentShuffle.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs @@ -30,13 +30,20 @@ namespace SixLabors.ImageSharp internal readonly struct DefaultShuffle4 : IComponentShuffle { + private readonly byte p3; + private readonly byte p2; + private readonly byte p1; + private readonly byte p0; + public DefaultShuffle4(byte p3, byte p2, byte p1, byte p0) - : this(SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0)) { + this.p3 = p3; + this.p2 = p2; + this.p1 = p1; + this.p0 = p0; + this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0); } - public DefaultShuffle4(byte control) => this.Control = control; - public byte Control { get; } [MethodImpl(InliningOptions.ShortMethod)] @@ -44,12 +51,11 @@ namespace SixLabors.ImageSharp { ref byte sBase = ref MemoryMarshal.GetReference(source); ref byte dBase = ref MemoryMarshal.GetReference(dest); - SimdUtils.Shuffle.InverseMmShuffle( - this.Control, - out int p3, - out int p2, - out int p1, - out int p0); + + int p3 = this.p3; + int p2 = this.p2; + int p1 = this.p1; + int p0 = this.p0; for (int i = 0; i < source.Length; i += 4) { @@ -63,7 +69,9 @@ namespace SixLabors.ImageSharp internal readonly struct WXYZShuffle4 : IComponentShuffle { - public byte Control => SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3); + private static readonly byte WXYZ = SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3); + + public byte Control => WXYZ; [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) @@ -89,7 +97,9 @@ namespace SixLabors.ImageSharp internal readonly struct WZYXShuffle4 : IComponentShuffle { - public byte Control => SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3); + private static readonly byte WZYX = SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3); + + public byte Control => WZYX; [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) @@ -112,7 +122,9 @@ namespace SixLabors.ImageSharp internal readonly struct YZWXShuffle4 : IComponentShuffle { - public byte Control => SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1); + private static readonly byte YZWX = SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1); + + public byte Control => YZWX; [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) @@ -135,7 +147,9 @@ namespace SixLabors.ImageSharp internal readonly struct ZYXWShuffle4 : IComponentShuffle { - public byte Control => SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2); + private static readonly byte ZYXW = SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2); + + public byte Control => ZYXW; [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs new file mode 100644 index 000000000..97bd5aa72 --- /dev/null +++ b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs @@ -0,0 +1,96 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace SixLabors.ImageSharp +{ + /// + internal interface IPad3Shuffle4 : IComponentShuffle + { + } + + internal readonly struct DefaultPad3Shuffle4 : IPad3Shuffle4 + { + private readonly byte p3; + private readonly byte p2; + private readonly byte p1; + private readonly byte p0; + + public DefaultPad3Shuffle4(byte p3, byte p2, byte p1, byte p0) + { + this.p3 = p3; + this.p2 = p2; + this.p1 = p1; + this.p0 = p0; + this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0); + } + + public byte Control { get; } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); + + int p3 = this.p3; + int p2 = this.p2; + int p1 = this.p1; + int p0 = this.p0; + + Span temp = stackalloc byte[4]; + ref byte t = ref MemoryMarshal.GetReference(temp); + ref uint tu = ref Unsafe.As(ref t); + + for (int i = 0, j = 0; i < source.Length; i += 3, j += 4) + { + ref var s = ref Unsafe.Add(ref sBase, i); + tu = Unsafe.As(ref s) | 0xFF000000; + + Unsafe.Add(ref dBase, j) = Unsafe.Add(ref t, p0); + Unsafe.Add(ref dBase, j + 1) = Unsafe.Add(ref t, p1); + Unsafe.Add(ref dBase, j + 2) = Unsafe.Add(ref t, p2); + Unsafe.Add(ref dBase, j + 3) = Unsafe.Add(ref t, p3); + } + } + } + + internal readonly struct XYZWPad3Shuffle4 : IPad3Shuffle4 + { + private static readonly byte XYZW = SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0); + + public byte Control => XYZW; + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref byte rs = ref MemoryMarshal.GetReference(source); + ref byte rd = ref MemoryMarshal.GetReference(dest); + + ref byte rsEnd = ref Unsafe.Add(ref rs, source.Length); + ref byte rsLoopEnd = ref Unsafe.Subtract(ref rsEnd, 4); + + while (Unsafe.IsAddressLessThan(ref rs, ref rsLoopEnd)) + { + Unsafe.As(ref rd) = Unsafe.As(ref rs) | 0xFF000000; + + rs = ref Unsafe.Add(ref rs, 3); + rd = ref Unsafe.Add(ref rd, 4); + } + + while (Unsafe.IsAddressLessThan(ref rs, ref rsEnd)) + { + Unsafe.Add(ref rd, 0) = Unsafe.Add(ref rs, 0); + Unsafe.Add(ref rd, 1) = Unsafe.Add(ref rs, 1); + Unsafe.Add(ref rd, 2) = Unsafe.Add(ref rs, 2); + Unsafe.Add(ref rd, 3) = byte.MaxValue; + + rs = ref Unsafe.Add(ref rs, 3); + rd = ref Unsafe.Add(ref rd, 4); + } + } + } +} diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs new file mode 100644 index 000000000..c65c50f68 --- /dev/null +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs @@ -0,0 +1,74 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace SixLabors.ImageSharp +{ + /// + internal interface IShuffle4Slice3 : IComponentShuffle + { + } + + internal readonly struct DefaultShuffle4Slice3 : IShuffle4Slice3 + { + private readonly byte p2; + private readonly byte p1; + private readonly byte p0; + + public DefaultShuffle4Slice3(byte p3, byte p2, byte p1, byte p0) + { + this.p2 = p2; + this.p1 = p1; + this.p0 = p0; + this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0); + } + + public byte Control { get; } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); + + int p2 = this.p2; + int p1 = this.p1; + int p0 = this.p0; + + for (int i = 0, j = 0; i < dest.Length; i += 3, j += 4) + { + Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + j); + Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + j); + Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + j); + } + } + } + + internal readonly struct XYZWShuffle4Slice3 : IShuffle4Slice3 + { + private static readonly byte XYZW = SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0); + + public byte Control => XYZW; + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); + ref byte dBase = ref MemoryMarshal.GetReference(dest); + + int n = source.Length / 4; + for (int i = 0, j = 0; i < n; i++, j += 3) + { + Unsafe.As(ref Unsafe.Add(ref dBase, j)) = Unsafe.As(ref Unsafe.Add(ref sBase, i)); + } + } + } + + [StructLayout(LayoutKind.Explicit, Size = 3)] + internal readonly struct Xyz24 + { + } +} diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index 61c1ce48e..7ef3be6fe 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -63,45 +63,61 @@ namespace SixLabors.ImageSharp } } + /// + /// Pads then shuffles 8-bit integers within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The type of shuffle to perform. [MethodImpl(InliningOptions.ShortMethod)] - public static void Pad3Shuffle4( + public static void Pad3Shuffle4( ReadOnlySpan source, Span dest, - byte control) + TShuffle shuffle) + where TShuffle : struct, IPad3Shuffle4 { VerifyPad3Shuffle4SpanInput(source, dest); #if SUPPORTS_RUNTIME_INTRINSICS - HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref dest, control); + HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref dest, shuffle.Control); #endif // Deal with the remainder: if (source.Length > 0) { - Pad3Shuffle4Remainder(source, dest, control); + shuffle.RunFallbackShuffle(source, dest); } } + /// + /// Shuffles then slices 8-bit integers within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The type of shuffle to perform. [MethodImpl(InliningOptions.ShortMethod)] - public static void Shuffle4Slice3( + public static void Shuffle4Slice3( ReadOnlySpan source, Span dest, - byte control) + TShuffle shuffle) + where TShuffle : struct, IShuffle4Slice3 { VerifyShuffle4Slice3SpanInput(source, dest); #if SUPPORTS_RUNTIME_INTRINSICS - HwIntrinsics.Shuffle4Slice3Reduce(ref source, ref dest, control); + HwIntrinsics.Shuffle4Slice3Reduce(ref source, ref dest, shuffle.Control); #endif // Deal with the remainder: if (source.Length > 0) { - Shuffle4Slice3Remainder(source, dest, control); + shuffle.RunFallbackShuffle(source, dest); } } - public static void Shuffle4Remainder( + private static void Shuffle4Remainder( ReadOnlySpan source, Span dest, byte control) @@ -119,41 +135,6 @@ namespace SixLabors.ImageSharp } } - public static void Pad3Shuffle4Remainder( - ReadOnlySpan source, - Span dest, - byte control) - { - ref byte sBase = ref MemoryMarshal.GetReference(source); - ref byte dBase = ref MemoryMarshal.GetReference(dest); - Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0); - - for (int i = 0, j = 0; i < dest.Length; i += 4, j += 3) - { - Unsafe.Add(ref dBase, p0 + i) = Unsafe.Add(ref sBase, j); - Unsafe.Add(ref dBase, p1 + i) = Unsafe.Add(ref sBase, j + 1); - Unsafe.Add(ref dBase, p2 + i) = Unsafe.Add(ref sBase, j + 2); - Unsafe.Add(ref dBase, p3 + i) = byte.MaxValue; - } - } - - public static void Shuffle4Slice3Remainder( - ReadOnlySpan source, - Span dest, - byte control) - { - ref byte sBase = ref MemoryMarshal.GetReference(source); - ref byte dBase = ref MemoryMarshal.GetReference(dest); - Shuffle.InverseMmShuffle(control, out int _, out int p2, out int p1, out int p0); - - for (int i = 0, j = 0; i < dest.Length; i += 3, j += 4) - { - Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + j); - Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + j); - Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + j); - } - } - [Conditional("DEBUG")] private static void VerifyShuffleSpanInput(ReadOnlySpan source, Span dest) where T : struct diff --git a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs index 5afd369be..c5f92648c 100644 --- a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs +++ b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs @@ -37,6 +37,24 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgra32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToRgb24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Slice3(source, dest, default); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToBgr24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(3, 0, 1, 2)); } public static class FromArgb32 @@ -58,6 +76,24 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgra32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToRgb24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(0, 3, 2, 1)); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToBgr24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(0, 1, 2, 3)); } public static class FromBgra32 @@ -79,6 +115,88 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgba32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToRgb24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(3, 0, 1, 2)); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToBgr24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Slice3(source, dest, default); + } + + public static class FromRgb24 + { + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToRgba32(ReadOnlySpan source, Span dest) + => SimdUtils.Pad3Shuffle4(source, dest, default); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToArgb32(ReadOnlySpan source, Span dest) + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(2, 1, 0, 3)); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToBgra32(ReadOnlySpan source, Span dest) + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(3, 0, 1, 2)); + + // TODO: Bgr24 + } + + public static class FromBgr24 + { + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToArgb32(ReadOnlySpan source, Span dest) + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(0, 1, 2, 3)); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToRgba32(ReadOnlySpan source, Span dest) + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(3, 0, 1, 2)); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToBgra32(ReadOnlySpan source, Span dest) + => SimdUtils.Pad3Shuffle4(source, dest, default); + + // TODO: Rgb24 } } } diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs index 9eb1e109b..4af028605 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs @@ -9,7 +9,8 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk [Config(typeof(Config.HwIntrinsics_SSE_AVX))] public class Pad3Shuffle4Channel { - private static readonly byte Control = default(WXYZShuffle4).Control; + private static readonly DefaultPad3Shuffle4 Control = new DefaultPad3Shuffle4(1, 0, 3, 2); + private static readonly XYZWPad3Shuffle4 ControlFast = default; private byte[] source; private byte[] destination; @@ -18,7 +19,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk { this.source = new byte[this.Count]; new Random(this.Count).NextBytes(this.source); - this.destination = new byte[(int)(this.Count * (4 / 3F))]; + this.destination = new byte[this.Count * 4 / 3]; } [Params(96, 384, 768, 1536)] @@ -29,6 +30,12 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk { SimdUtils.Pad3Shuffle4(this.source, this.destination, Control); } + + [Benchmark] + public void Pad3Shuffle4FastFallback() + { + SimdUtils.Pad3Shuffle4(this.source, this.destination, ControlFast); + } } // 2020-10-30 @@ -44,21 +51,37 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk // // Runtime=.NET Core 3.1 // - // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | - // |------------- |------------------- |-------------------------------------------------- |------ |----------:|----------:|---------:|------:|--------:|------:|------:|------:|----------:| - // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 71.84 ns | 1.491 ns | 3.146 ns | 1.00 | 0.00 | - | - | - | - | - // | Pad3Shuffle4 | 2. AVX | Empty | 96 | 23.14 ns | 0.186 ns | 0.165 ns | 0.33 | 0.02 | - | - | - | - | - // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 96 | 22.94 ns | 0.127 ns | 0.112 ns | 0.33 | 0.02 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 230.90 ns | 0.877 ns | 0.777 ns | 1.00 | 0.00 | - | - | - | - | - // | Pad3Shuffle4 | 2. AVX | Empty | 384 | 39.54 ns | 0.601 ns | 0.533 ns | 0.17 | 0.00 | - | - | - | - | - // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 384 | 39.88 ns | 0.657 ns | 0.548 ns | 0.17 | 0.00 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 451.67 ns | 2.932 ns | 2.448 ns | 1.00 | 0.00 | - | - | - | - | - // | Pad3Shuffle4 | 2. AVX | Empty | 768 | 60.79 ns | 1.200 ns | 1.561 ns | 0.13 | 0.00 | - | - | - | - | - // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 768 | 62.30 ns | 0.469 ns | 0.416 ns | 0.14 | 0.00 | - | - | - | - | - // | | | | | | | | | | | | | | - // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 896.11 ns | 10.358 ns | 9.689 ns | 1.00 | 0.00 | - | - | - | - | - // | Pad3Shuffle4 | 2. AVX | Empty | 1536 | 109.72 ns | 2.149 ns | 2.300 ns | 0.12 | 0.00 | - | - | - | - | - // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 108.70 ns | 0.994 ns | 0.881 ns | 0.12 | 0.00 | - | - | - | - | + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |------------------------- |------------------- |-------------------------------------------------- |------ |------------:|----------:|----------:|------------:|------:|--------:|------:|------:|------:|----------:| + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 120.64 ns | 7.190 ns | 21.200 ns | 114.26 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 96 | 23.63 ns | 0.175 ns | 0.155 ns | 23.65 ns | 0.15 | 0.01 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 96 | 25.25 ns | 0.356 ns | 0.298 ns | 25.27 ns | 0.17 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 14.80 ns | 0.358 ns | 1.032 ns | 14.64 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 2. AVX | Empty | 96 | 24.84 ns | 0.376 ns | 0.333 ns | 24.74 ns | 1.57 | 0.06 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 96 | 24.58 ns | 0.471 ns | 0.704 ns | 24.38 ns | 1.60 | 0.09 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 258.92 ns | 4.873 ns | 4.069 ns | 257.95 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 384 | 41.41 ns | 0.859 ns | 1.204 ns | 41.33 ns | 0.16 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 384 | 40.74 ns | 0.848 ns | 0.793 ns | 40.48 ns | 0.16 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 74.50 ns | 0.490 ns | 0.383 ns | 74.49 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 2. AVX | Empty | 384 | 40.74 ns | 0.624 ns | 0.584 ns | 40.72 ns | 0.55 | 0.01 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 384 | 38.28 ns | 0.534 ns | 0.417 ns | 38.22 ns | 0.51 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 503.91 ns | 6.466 ns | 6.048 ns | 501.58 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 768 | 62.86 ns | 0.332 ns | 0.277 ns | 62.80 ns | 0.12 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 768 | 64.59 ns | 0.469 ns | 0.415 ns | 64.62 ns | 0.13 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 110.51 ns | 0.592 ns | 0.554 ns | 110.33 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 2. AVX | Empty | 768 | 64.72 ns | 1.306 ns | 1.090 ns | 64.51 ns | 0.59 | 0.01 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 768 | 62.11 ns | 0.816 ns | 0.682 ns | 61.98 ns | 0.56 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 1,005.84 ns | 13.176 ns | 12.325 ns | 1,004.70 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 1536 | 110.05 ns | 0.256 ns | 0.214 ns | 110.04 ns | 0.11 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 110.23 ns | 0.545 ns | 0.483 ns | 110.09 ns | 0.11 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 220.37 ns | 1.601 ns | 1.419 ns | 220.13 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 2. AVX | Empty | 1536 | 111.54 ns | 2.173 ns | 2.901 ns | 111.27 ns | 0.51 | 0.01 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 110.23 ns | 0.456 ns | 0.427 ns | 110.25 ns | 0.50 | 0.00 | - | - | - | - | } diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs index e0fbe1c0b..9cf24ccd6 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs @@ -9,7 +9,8 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk [Config(typeof(Config.HwIntrinsics_SSE_AVX))] public class Shuffle4Slice3Channel { - private static readonly byte Control = default(WXYZShuffle4).Control; + private static readonly DefaultShuffle4Slice3 Control = new DefaultShuffle4Slice3(1, 0, 3, 2); + private static readonly XYZWShuffle4Slice3 ControlFast = default; private byte[] source; private byte[] destination; @@ -29,6 +30,12 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk { SimdUtils.Shuffle4Slice3(this.source, this.destination, Control); } + + [Benchmark] + public void Shuffle4Slice3FastFallback() + { + SimdUtils.Shuffle4Slice3(this.source, this.destination, ControlFast); + } } // 2020-10-29 @@ -44,25 +51,45 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk // // Runtime=.NET Core 3.1 // - // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated | - // |--------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|------:|------:|------:|----------:| - // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 52.24 ns | 1.081 ns | 1.062 ns | 1.00 | - | - | - | - | - // | Shuffle4Slice3 | 2. AVX | Empty | 128 | 25.52 ns | 0.189 ns | 0.158 ns | 0.49 | - | - | - | - | - // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 128 | 26.11 ns | 0.524 ns | 0.644 ns | 0.50 | - | - | - | - | - // | | | | | | | | | | | | | - // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 101.09 ns | 0.733 ns | 0.612 ns | 1.00 | - | - | - | - | - // | Shuffle4Slice3 | 2. AVX | Empty | 256 | 32.65 ns | 0.674 ns | 1.198 ns | 0.33 | - | - | - | - | - // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 256 | 32.76 ns | 0.656 ns | 0.853 ns | 0.32 | - | - | - | - | - // | | | | | | | | | | | | | - // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 209.58 ns | 3.826 ns | 5.957 ns | 1.00 | - | - | - | - | - // | Shuffle4Slice3 | 2. AVX | Empty | 512 | 46.32 ns | 0.729 ns | 1.296 ns | 0.22 | - | - | - | - | - // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 512 | 46.97 ns | 0.196 ns | 0.183 ns | 0.22 | - | - | - | - | - // | | | | | | | | | | | | | - // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 406.39 ns | 7.493 ns | 6.257 ns | 1.00 | - | - | - | - | - // | Shuffle4Slice3 | 2. AVX | Empty | 1024 | 74.53 ns | 1.509 ns | 1.678 ns | 0.18 | - | - | - | - | - // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 74.04 ns | 0.703 ns | 0.657 ns | 0.18 | - | - | - | - | - // | | | | | | | | | | | | | - // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 796.80 ns | 6.476 ns | 5.741 ns | 1.00 | - | - | - | - | - // | Shuffle4Slice3 | 2. AVX | Empty | 2048 | 130.70 ns | 2.512 ns | 2.227 ns | 0.16 | - | - | - | - | - // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 129.42 ns | 2.555 ns | 2.133 ns | 0.16 | - | - | - | - | + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |--------------------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|----------:|------:|--------:|------:|------:|------:|----------:| + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 56.44 ns | 2.843 ns | 8.382 ns | 56.70 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 128 | 27.15 ns | 0.556 ns | 0.762 ns | 27.34 ns | 0.41 | 0.03 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 128 | 26.36 ns | 0.321 ns | 0.268 ns | 26.26 ns | 0.38 | 0.02 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 25.85 ns | 0.494 ns | 0.462 ns | 25.84 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 2. AVX | Empty | 128 | 26.15 ns | 0.113 ns | 0.106 ns | 26.16 ns | 1.01 | 0.02 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 128 | 25.57 ns | 0.078 ns | 0.061 ns | 25.56 ns | 0.99 | 0.02 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 97.47 ns | 0.327 ns | 0.289 ns | 97.35 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 256 | 32.61 ns | 0.107 ns | 0.095 ns | 32.62 ns | 0.33 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 256 | 33.21 ns | 0.169 ns | 0.150 ns | 33.15 ns | 0.34 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 52.34 ns | 0.779 ns | 0.729 ns | 51.94 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 2. AVX | Empty | 256 | 32.16 ns | 0.111 ns | 0.104 ns | 32.16 ns | 0.61 | 0.01 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 256 | 33.61 ns | 0.342 ns | 0.319 ns | 33.62 ns | 0.64 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 210.74 ns | 3.825 ns | 5.956 ns | 207.70 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 512 | 51.03 ns | 0.535 ns | 0.501 ns | 51.18 ns | 0.24 | 0.01 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 512 | 66.60 ns | 1.313 ns | 1.613 ns | 65.93 ns | 0.31 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 119.12 ns | 1.905 ns | 1.689 ns | 118.52 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 2. AVX | Empty | 512 | 50.33 ns | 0.382 ns | 0.339 ns | 50.41 ns | 0.42 | 0.01 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 512 | 49.25 ns | 0.555 ns | 0.492 ns | 49.26 ns | 0.41 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 423.55 ns | 4.891 ns | 4.336 ns | 423.27 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 1024 | 77.13 ns | 1.355 ns | 2.264 ns | 76.19 ns | 0.19 | 0.01 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 79.39 ns | 0.103 ns | 0.086 ns | 79.37 ns | 0.19 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 226.57 ns | 2.930 ns | 2.598 ns | 226.10 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 2. AVX | Empty | 1024 | 80.25 ns | 1.647 ns | 2.082 ns | 80.98 ns | 0.35 | 0.01 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 84.99 ns | 1.234 ns | 1.155 ns | 85.60 ns | 0.38 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 794.96 ns | 1.735 ns | 1.538 ns | 795.15 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 2048 | 128.41 ns | 0.417 ns | 0.390 ns | 128.24 ns | 0.16 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 127.24 ns | 0.294 ns | 0.229 ns | 127.23 ns | 0.16 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 382.97 ns | 1.064 ns | 0.831 ns | 382.87 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 2. AVX | Empty | 2048 | 126.93 ns | 0.382 ns | 0.339 ns | 126.94 ns | 0.33 | 0.00 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 149.36 ns | 1.875 ns | 1.754 ns | 149.33 ns | 0.39 | 0.00 | - | - | - | - | } diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs index 26f85dd76..29f3925fc 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -39,56 +39,51 @@ namespace SixLabors.ImageSharp.Tests.Common static void RunTest(string serialized) { int size = FeatureTestRunner.Deserialize(serialized); - foreach (var item in ArraySizesDivisibleBy4) - { - // These cannot be expressed as a theory as you cannot - // use RemoteExecutor within generic methods nor pass - // IComponentShuffle to the generic utils method. - foreach (var count in item) - { - WXYZShuffle4 wxyz = default; - TestShuffleByte4Channel( - size, - (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wxyz), - wxyz.Control); - - WZYXShuffle4 wzyx = default; - TestShuffleByte4Channel( - size, - (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wzyx), - wzyx.Control); - - YZWXShuffle4 yzwx = default; - TestShuffleByte4Channel( - size, - (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yzwx), - yzwx.Control); - - ZYXWShuffle4 zyxw = default; - TestShuffleByte4Channel( - size, - (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, zyxw), - zyxw.Control); - - var xwyz = new DefaultShuffle4(2, 1, 3, 0); - TestShuffleByte4Channel( - size, - (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, xwyz), - xwyz.Control); - - var yyyy = new DefaultShuffle4(1, 1, 1, 1); - TestShuffleByte4Channel( - size, - (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yyyy), - yyyy.Control); - - var wwww = new DefaultShuffle4(3, 3, 3, 3); - TestShuffleByte4Channel( - size, - (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wwww), - wwww.Control); - } - } + + // These cannot be expressed as a theory as you cannot + // use RemoteExecutor within generic methods nor pass + // IComponentShuffle to the generic utils method. + WXYZShuffle4 wxyz = default; + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wxyz), + wxyz.Control); + + WZYXShuffle4 wzyx = default; + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wzyx), + wzyx.Control); + + YZWXShuffle4 yzwx = default; + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yzwx), + yzwx.Control); + + ZYXWShuffle4 zyxw = default; + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, zyxw), + zyxw.Control); + + var xwyz = new DefaultShuffle4(2, 1, 3, 0); + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, xwyz), + xwyz.Control); + + var yyyy = new DefaultShuffle4(1, 1, 1, 1); + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yyyy), + yyyy.Control); + + var wwww = new DefaultShuffle4(3, 3, 3, 3); + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wwww), + wwww.Control); } FeatureTestRunner.RunWithHwIntrinsicsFeature( @@ -103,21 +98,40 @@ namespace SixLabors.ImageSharp.Tests.Common { static void RunTest(string serialized) { - // No need to test multiple shuffle controls as the - // pipeline is always the same. int size = FeatureTestRunner.Deserialize(serialized); - byte control = default(WZYXShuffle4).Control; + // These cannot be expressed as a theory as you cannot + // use RemoteExecutor within generic methods nor pass + // IComponentShuffle to the generic utils method. + XYZWPad3Shuffle4 xyzw = default; TestPad3Shuffle4Channel( size, - (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, control), - control); + (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, xyzw), + xyzw.Control); + + var xwyz = new DefaultPad3Shuffle4(2, 1, 3, 0); + TestPad3Shuffle4Channel( + size, + (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, xwyz), + xwyz.Control); + + var yyyy = new DefaultPad3Shuffle4(1, 1, 1, 1); + TestPad3Shuffle4Channel( + size, + (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, yyyy), + yyyy.Control); + + var wwww = new DefaultPad3Shuffle4(3, 3, 3, 3); + TestPad3Shuffle4Channel( + size, + (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, wwww), + wwww.Control); } FeatureTestRunner.RunWithHwIntrinsicsFeature( RunTest, count, - HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE); + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE); } [Theory] @@ -126,15 +140,34 @@ namespace SixLabors.ImageSharp.Tests.Common { static void RunTest(string serialized) { - // No need to test multiple shuffle controls as the - // pipeline is always the same. int size = FeatureTestRunner.Deserialize(serialized); - byte control = default(WZYXShuffle4).Control; + // These cannot be expressed as a theory as you cannot + // use RemoteExecutor within generic methods nor pass + // IComponentShuffle to the generic utils method. + XYZWShuffle4Slice3 xyzw = default; TestShuffle4Slice3Channel( size, - (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, control), - control); + (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, xyzw), + xyzw.Control); + + var xwyz = new DefaultShuffle4Slice3(2, 1, 3, 0); + TestShuffle4Slice3Channel( + size, + (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, xwyz), + xwyz.Control); + + var yyyy = new DefaultShuffle4Slice3(1, 1, 1, 1); + TestShuffle4Slice3Channel( + size, + (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, yyyy), + yyyy.Control); + + var wwww = new DefaultShuffle4Slice3(3, 3, 3, 3); + TestShuffle4Slice3Channel( + size, + (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, wwww), + wwww.Control); } FeatureTestRunner.RunWithHwIntrinsicsFeature( @@ -212,7 +245,7 @@ namespace SixLabors.ImageSharp.Tests.Common byte[] source = new byte[count]; new Random(count).NextBytes(source); - var result = new byte[(int)(count * (4 / 3D))]; + var result = new byte[count * 4 / 3]; byte[] expected = new byte[result.Length]; @@ -231,6 +264,20 @@ namespace SixLabors.ImageSharp.Tests.Common expected[p3 + i] = byte.MaxValue; } + Span temp = stackalloc byte[4]; + for (int i = 0, j = 0; i < expected.Length; i += 4, j += 3) + { + temp[0] = source[j]; + temp[1] = source[j + 1]; + temp[2] = source[j + 2]; + temp[3] = byte.MaxValue; + + expected[i] = temp[p0]; + expected[i + 1] = temp[p1]; + expected[i + 2] = temp[p2]; + expected[i + 3] = temp[p3]; + } + convert(source, result); for (int i = 0; i < expected.Length; i++) @@ -249,7 +296,7 @@ namespace SixLabors.ImageSharp.Tests.Common byte[] source = new byte[count]; new Random(count).NextBytes(source); - var result = new byte[(int)(count * (3 / 4D))]; + var result = new byte[count * 3 / 4]; byte[] expected = new byte[result.Length]; From 6209c3c8cfb846a91105242253e5702acca0cc07 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sun, 1 Nov 2020 23:51:38 +0000 Subject: [PATCH 094/131] Don't cast full spans --- .../Helpers/Shuffle/IComponentShuffle.cs | 44 +++++++++---------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs index 803321d06..3f045a579 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs @@ -6,6 +6,9 @@ using System.Buffers.Binary; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +// The JIT can detect and optimize rotation idioms ROTL (Rotate Left) +// and ROTR (Rotate Right) emitting efficient CPU instructions: +// https://github.com/dotnet/coreclr/pull/1830 namespace SixLabors.ImageSharp { /// @@ -76,15 +79,11 @@ namespace SixLabors.ImageSharp [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) { - ReadOnlySpan s = MemoryMarshal.Cast(source); - Span d = MemoryMarshal.Cast(dest); - ref uint sBase = ref MemoryMarshal.GetReference(s); - ref uint dBase = ref MemoryMarshal.GetReference(d); - - // The JIT can detect and optimize rotation idioms ROTL (Rotate Left) - // and ROTR (Rotate Right) emitting efficient CPU instructions: - // https://github.com/dotnet/coreclr/pull/1830 - for (int i = 0; i < s.Length; i++) + ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); + ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + int n = source.Length / 4; + + for (int i = 0; i < n; i++) { uint packed = Unsafe.Add(ref sBase, i); @@ -104,12 +103,11 @@ namespace SixLabors.ImageSharp [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) { - ReadOnlySpan s = MemoryMarshal.Cast(source); - Span d = MemoryMarshal.Cast(dest); - ref uint sBase = ref MemoryMarshal.GetReference(s); - ref uint dBase = ref MemoryMarshal.GetReference(d); + ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); + ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + int n = source.Length / 4; - for (int i = 0; i < s.Length; i++) + for (int i = 0; i < n; i++) { uint packed = Unsafe.Add(ref sBase, i); @@ -129,12 +127,11 @@ namespace SixLabors.ImageSharp [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) { - ReadOnlySpan s = MemoryMarshal.Cast(source); - Span d = MemoryMarshal.Cast(dest); - ref uint sBase = ref MemoryMarshal.GetReference(s); - ref uint dBase = ref MemoryMarshal.GetReference(d); + ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); + ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + int n = source.Length / 4; - for (int i = 0; i < s.Length; i++) + for (int i = 0; i < n; i++) { uint packed = Unsafe.Add(ref sBase, i); @@ -154,12 +151,11 @@ namespace SixLabors.ImageSharp [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) { - ReadOnlySpan s = MemoryMarshal.Cast(source); - Span d = MemoryMarshal.Cast(dest); - ref uint sBase = ref MemoryMarshal.GetReference(s); - ref uint dBase = ref MemoryMarshal.GetReference(d); + ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); + ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + int n = source.Length / 4; - for (int i = 0; i < s.Length; i++) + for (int i = 0; i < n; i++) { uint packed = Unsafe.Add(ref sBase, i); From b010a15012c267c7bb7c5f9b75d3c5844751e21b Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 2 Nov 2020 01:52:06 +0000 Subject: [PATCH 095/131] Shuffle3 + Tests --- .../Helpers/Shuffle/IComponentShuffle.cs | 5 + .../Common/Helpers/Shuffle/IPad3Shuffle4.cs | 5 + .../Common/Helpers/Shuffle/IShuffle3.cs | 90 ++++++++++++++++ .../Common/Helpers/Shuffle/IShuffle4Slice3.cs | 12 +-- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 102 +++++++++++++++++- .../Common/Helpers/SimdUtils.Shuffle.cs | 48 ++++++++- .../Common/SimdUtilsTests.Shuffle.cs | 76 ++++++++++++- 7 files changed, 323 insertions(+), 15 deletions(-) create mode 100644 src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs index 3f045a579..1a4c6ab44 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs @@ -40,6 +40,11 @@ namespace SixLabors.ImageSharp public DefaultShuffle4(byte p3, byte p2, byte p1, byte p0) { + Guard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); + Guard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); + Guard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); + Guard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); + this.p3 = p3; this.p2 = p2; this.p1 = p1; diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs index 97bd5aa72..b223a6bc2 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs @@ -21,6 +21,11 @@ namespace SixLabors.ImageSharp public DefaultPad3Shuffle4(byte p3, byte p2, byte p1, byte p0) { + Guard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); + Guard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); + Guard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); + Guard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); + this.p3 = p3; this.p2 = p2; this.p1 = p1; diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs new file mode 100644 index 000000000..fa4260e63 --- /dev/null +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs @@ -0,0 +1,90 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace SixLabors.ImageSharp +{ + /// + internal interface IShuffle3 : IComponentShuffle + { + } + + internal readonly struct DefaultShuffle3 : IShuffle3 + { + private readonly byte p2; + private readonly byte p1; + private readonly byte p0; + + public DefaultShuffle3(byte p2, byte p1, byte p0) + { + Guard.MustBeBetweenOrEqualTo(p2, 0, 2, nameof(p2)); + Guard.MustBeBetweenOrEqualTo(p1, 0, 2, nameof(p1)); + Guard.MustBeBetweenOrEqualTo(p0, 0, 2, nameof(p0)); + + this.p2 = p2; + this.p1 = p1; + this.p0 = p0; + this.Control = SimdUtils.Shuffle.MmShuffle(3, p2, p1, p0); + } + + public byte Control { get; } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); + + int p2 = this.p2; + int p1 = this.p1; + int p0 = this.p0; + + for (int i = 0; i < source.Length; i += 3) + { + Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i); + Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i); + Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i); + } + } + } + + internal readonly struct ZYXShuffle3 : IShuffle3 + { + private static readonly byte ZYX = SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2); + + public byte Control => ZYX; + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref Byte3 sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); + ref Byte3 dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + int n = source.Length / 3; + + for (int i = 0; i < n; i++) + { + uint packed = Unsafe.As(ref Unsafe.Add(ref sBase, i)); + + // packed = [W Z Y X] + // tmp1 = [W 0 Y 0] + // tmp2 = [0 Z 0 X] + // tmp3=ROTL(16, tmp2) = [0 X 0 Z] + // tmp1 + tmp3 = [W X Y Z] + uint tmp1 = packed & 0xFF00FF00; + uint tmp2 = packed & 0x00FF00FF; + uint tmp3 = (tmp2 << 16) | (tmp2 >> 16); + packed = tmp1 + tmp3; + + Unsafe.Add(ref dBase, i) = Unsafe.As(ref packed); + } + } + } + + [StructLayout(LayoutKind.Explicit, Size = 3)] + internal readonly struct Byte3 + { + } +} diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs index c65c50f68..1ceb38f1a 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs @@ -20,6 +20,11 @@ namespace SixLabors.ImageSharp public DefaultShuffle4Slice3(byte p3, byte p2, byte p1, byte p0) { + Guard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); + Guard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); + Guard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); + Guard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); + this.p2 = p2; this.p1 = p1; this.p0 = p0; @@ -62,13 +67,8 @@ namespace SixLabors.ImageSharp int n = source.Length / 4; for (int i = 0, j = 0; i < n; i++, j += 3) { - Unsafe.As(ref Unsafe.Add(ref dBase, j)) = Unsafe.As(ref Unsafe.Add(ref sBase, i)); + Unsafe.As(ref Unsafe.Add(ref dBase, j)) = Unsafe.As(ref Unsafe.Add(ref sBase, i)); } } } - - [StructLayout(LayoutKind.Explicit, Size = 3)] - internal readonly struct Xyz24 - { - } } diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index abda6c4df..974516c3e 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -86,6 +86,38 @@ namespace SixLabors.ImageSharp } } + /// + /// Shuffles 8-bit integer triplets within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The byte control. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle3Reduce( + ref ReadOnlySpan source, + ref Span dest, + byte control) + { + if (Ssse3.IsSupported) + { + int remainder = source.Length % (Vector128.Count * 3); + + int adjustedCount = source.Length - remainder; + + if (adjustedCount > 0) + { + Shuffle3( + source.Slice(0, adjustedCount), + dest.Slice(0, adjustedCount), + control); + + source = source.Slice(adjustedCount); + dest = dest.Slice(adjustedCount); + } + } + } + /// /// Pads then shuffles 8-bit integers within 128-bit lanes in /// using the control and store the results in . @@ -94,7 +126,7 @@ namespace SixLabors.ImageSharp /// The destination span of bytes. /// The byte control. [MethodImpl(InliningOptions.ShortMethod)] - public static unsafe void Pad3Shuffle4Reduce( + public static void Pad3Shuffle4Reduce( ref ReadOnlySpan source, ref Span dest, byte control) @@ -127,7 +159,7 @@ namespace SixLabors.ImageSharp /// The destination span of bytes. /// The byte control. [MethodImpl(InliningOptions.ShortMethod)] - public static unsafe void Shuffle4Slice3Reduce( + public static void Shuffle4Slice3Reduce( ref ReadOnlySpan source, ref Span dest, byte control) @@ -313,7 +345,69 @@ namespace SixLabors.ImageSharp } [MethodImpl(InliningOptions.ShortMethod)] - private static unsafe void Pad3Shuffle4( + private static void Shuffle3( + ReadOnlySpan source, + Span dest, + byte control) + { + if (Ssse3.IsSupported) + { + Vector128 vmask = Vector128.Create(0, 1, 2, 0x80, 3, 4, 5, 0x80, 6, 7, 8, 0x80, 9, 10, 11, 0x80).AsByte(); + Vector128 vfill = Vector128.Create(0xff000000ff000000ul).AsByte(); + Vector128 vmasko = Vector128.Create(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15).AsByte(); + Vector128 vmaske = Ssse3.AlignRight(vmasko, vmasko, 12).AsByte(); + + Span bytes = stackalloc byte[Vector128.Count]; + Shuffle.MmShuffleSpan(ref bytes, control); + Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + + ref Vector128 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector128 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + int n = source.Length / Vector128.Count; + + for (int i = 0; i < n; i += 3) + { + ref Vector128 v0 = ref Unsafe.Add(ref sourceBase, i); + Vector128 v1 = Unsafe.Add(ref v0, 1); + Vector128 v2 = Unsafe.Add(ref v0, 2); + Vector128 v3 = Sse2.ShiftRightLogical128BitLane(v2, 4); + + v2 = Ssse3.AlignRight(v2, v1, 8); + v1 = Ssse3.AlignRight(v1, v0, 12); + + v0 = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v0, vmask), vfill), vshuffle); + v1 = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v1, vmask), vfill), vshuffle); + v2 = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v2, vmask), vfill), vshuffle); + v3 = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v3, vmask), vfill), vshuffle); + + v0 = Ssse3.Shuffle(v0, vmaske); + v1 = Ssse3.Shuffle(v1, vmasko); + v2 = Ssse3.Shuffle(v2, vmaske); + v3 = Ssse3.Shuffle(v3, vmasko); + + v0 = Ssse3.AlignRight(v1, v0, 4); + v3 = Ssse3.AlignRight(v3, v2, 12); + + v1 = Sse2.ShiftLeftLogical128BitLane(v1, 4); + v2 = Sse2.ShiftRightLogical128BitLane(v2, 4); + + v1 = Ssse3.AlignRight(v2, v1, 8); + + ref Vector128 vd = ref Unsafe.Add(ref destBase, i); + + vd = v0; + Unsafe.Add(ref vd, 1) = v1; + Unsafe.Add(ref vd, 2) = v3; + } + } + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void Pad3Shuffle4( ReadOnlySpan source, Span dest, byte control) @@ -356,7 +450,7 @@ namespace SixLabors.ImageSharp } [MethodImpl(InliningOptions.ShortMethod)] - private static unsafe void Shuffle4Slice3( + private static void Shuffle4Slice3( ReadOnlySpan source, Span dest, byte control) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index 7ef3be6fe..79cb0da37 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -23,7 +23,7 @@ namespace SixLabors.ImageSharp Span dest, byte control) { - VerifyShuffleSpanInput(source, dest); + VerifyShuffle4SpanInput(source, dest); #if SUPPORTS_RUNTIME_INTRINSICS HwIntrinsics.Shuffle4Reduce(ref source, ref dest, control); @@ -50,7 +50,7 @@ namespace SixLabors.ImageSharp TShuffle shuffle) where TShuffle : struct, IComponentShuffle { - VerifyShuffleSpanInput(source, dest); + VerifyShuffle4SpanInput(source, dest); #if SUPPORTS_RUNTIME_INTRINSICS HwIntrinsics.Shuffle4Reduce(ref source, ref dest, shuffle.Control); @@ -63,6 +63,33 @@ namespace SixLabors.ImageSharp } } + /// + /// Shuffle 8-bit integer triplets within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The type of shuffle to perform. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle3( + ReadOnlySpan source, + Span dest, + TShuffle shuffle) + where TShuffle : struct, IShuffle3 + { + VerifyShuffle3SpanInput(source, dest); + +#if SUPPORTS_RUNTIME_INTRINSICS + HwIntrinsics.Shuffle3Reduce(ref source, ref dest, shuffle.Control); +#endif + + // Deal with the remainder: + if (source.Length > 0) + { + shuffle.RunFallbackShuffle(source, dest); + } + } + /// /// Pads then shuffles 8-bit integers within 128-bit lanes in /// using the control and store the results in . @@ -136,7 +163,7 @@ namespace SixLabors.ImageSharp } [Conditional("DEBUG")] - private static void VerifyShuffleSpanInput(ReadOnlySpan source, Span dest) + private static void VerifyShuffle4SpanInput(ReadOnlySpan source, Span dest) where T : struct { DebugGuard.IsTrue( @@ -150,6 +177,21 @@ namespace SixLabors.ImageSharp "Input spans must be divisable by 4!"); } + [Conditional("DEBUG")] + private static void VerifyShuffle3SpanInput(ReadOnlySpan source, Span dest) + where T : struct + { + DebugGuard.IsTrue( + source.Length == dest.Length, + nameof(source), + "Input spans must be of same length!"); + + DebugGuard.IsTrue( + source.Length % 3 == 0, + nameof(source), + "Input spans must be divisable by 3!"); + } + [Conditional("DEBUG")] private static void VerifyPad3Shuffle4SpanInput(ReadOnlySpan source, Span dest) { diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs index 29f3925fc..75d7c8729 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -92,6 +92,48 @@ namespace SixLabors.ImageSharp.Tests.Common HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE); } + [Theory] + [MemberData(nameof(ArraySizesDivisibleBy3))] + public void BulkShuffleByte3Channel(int count) + { + static void RunTest(string serialized) + { + int size = FeatureTestRunner.Deserialize(serialized); + + // These cannot be expressed as a theory as you cannot + // use RemoteExecutor within generic methods nor pass + // IShuffle3 to the generic utils method. + ZYXShuffle3 zyx = default; + TestShuffleByte3Channel( + size, + (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, zyx), + zyx.Control); + + var xyz = new DefaultShuffle3(2, 1, 0); + TestShuffleByte3Channel( + size, + (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, xyz), + xyz.Control); + + var yyy = new DefaultShuffle3(1, 1, 1); + TestShuffleByte3Channel( + size, + (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, yyy), + yyy.Control); + + var zzz = new DefaultShuffle3(2, 2, 2); + TestShuffleByte3Channel( + size, + (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, zzz), + zzz.Control); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + count, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE); + } + [Theory] [MemberData(nameof(ArraySizesDivisibleBy3))] public void BulkPad3Shuffle4Channel(int count) @@ -102,7 +144,7 @@ namespace SixLabors.ImageSharp.Tests.Common // These cannot be expressed as a theory as you cannot // use RemoteExecutor within generic methods nor pass - // IComponentShuffle to the generic utils method. + // IPad3Shuffle4 to the generic utils method. XYZWPad3Shuffle4 xyzw = default; TestPad3Shuffle4Channel( size, @@ -144,7 +186,7 @@ namespace SixLabors.ImageSharp.Tests.Common // These cannot be expressed as a theory as you cannot // use RemoteExecutor within generic methods nor pass - // IComponentShuffle to the generic utils method. + // IShuffle4Slice3 to the generic utils method. XYZWShuffle4Slice3 xyzw = default; TestShuffle4Slice3Channel( size, @@ -237,6 +279,36 @@ namespace SixLabors.ImageSharp.Tests.Common Assert.Equal(expected, result); } + private static void TestShuffleByte3Channel( + int count, + Action, Memory> convert, + byte control) + { + byte[] source = new byte[count]; + new Random(count).NextBytes(source); + var result = new byte[count]; + + byte[] expected = new byte[count]; + + SimdUtils.Shuffle.InverseMmShuffle( + control, + out int _, + out int p2, + out int p1, + out int p0); + + for (int i = 0; i < expected.Length; i += 3) + { + expected[i] = source[p0 + i]; + expected[i + 1] = source[p1 + i]; + expected[i + 2] = source[p2 + i]; + } + + convert(source, result); + + Assert.Equal(expected, result); + } + private static void TestPad3Shuffle4Channel( int count, Action, Memory> convert, From 466048ef0d9b43a380cc11ce5e1d948d3cda7c48 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 2 Nov 2020 11:46:49 +0000 Subject: [PATCH 096/131] Cleanup and fix tests --- .../Helpers/Shuffle/IComponentShuffle.cs | 23 ++++--- .../Common/Helpers/Shuffle/IPad3Shuffle4.cs | 8 +-- .../Common/Helpers/Shuffle/IShuffle3.cs | 43 +------------ .../Common/Helpers/Shuffle/IShuffle4Slice3.cs | 13 ++-- .../Common/Helpers/SimdUtils.Shuffle.cs | 2 +- .../PixelFormats/Utils/PixelConverter.cs | 18 +++++- .../Color/Bulk/Shuffle3Channel.cs | 64 +++++++++++++++++++ .../Common/SimdUtilsTests.Shuffle.cs | 4 +- 8 files changed, 113 insertions(+), 62 deletions(-) create mode 100644 tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs index 1a4c6ab44..2056075e7 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs @@ -31,7 +31,12 @@ namespace SixLabors.ImageSharp void RunFallbackShuffle(ReadOnlySpan source, Span dest); } - internal readonly struct DefaultShuffle4 : IComponentShuffle + /// + internal interface IShuffle4 : IComponentShuffle + { + } + + internal readonly struct DefaultShuffle4 : IShuffle4 { private readonly byte p3; private readonly byte p2; @@ -40,10 +45,10 @@ namespace SixLabors.ImageSharp public DefaultShuffle4(byte p3, byte p2, byte p1, byte p0) { - Guard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); - Guard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); - Guard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); - Guard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); + DebugGuard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); + DebugGuard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); + DebugGuard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); + DebugGuard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); this.p3 = p3; this.p2 = p2; @@ -75,7 +80,7 @@ namespace SixLabors.ImageSharp } } - internal readonly struct WXYZShuffle4 : IComponentShuffle + internal readonly struct WXYZShuffle4 : IShuffle4 { private static readonly byte WXYZ = SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3); @@ -99,7 +104,7 @@ namespace SixLabors.ImageSharp } } - internal readonly struct WZYXShuffle4 : IComponentShuffle + internal readonly struct WZYXShuffle4 : IShuffle4 { private static readonly byte WZYX = SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3); @@ -123,7 +128,7 @@ namespace SixLabors.ImageSharp } } - internal readonly struct YZWXShuffle4 : IComponentShuffle + internal readonly struct YZWXShuffle4 : IShuffle4 { private static readonly byte YZWX = SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1); @@ -147,7 +152,7 @@ namespace SixLabors.ImageSharp } } - internal readonly struct ZYXWShuffle4 : IComponentShuffle + internal readonly struct ZYXWShuffle4 : IShuffle4 { private static readonly byte ZYXW = SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2); diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs index b223a6bc2..1f3fce541 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs @@ -21,10 +21,10 @@ namespace SixLabors.ImageSharp public DefaultPad3Shuffle4(byte p3, byte p2, byte p1, byte p0) { - Guard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); - Guard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); - Guard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); - Guard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); + DebugGuard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); + DebugGuard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); + DebugGuard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); + DebugGuard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); this.p3 = p3; this.p2 = p2; diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs index fa4260e63..61e99890e 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs @@ -20,9 +20,9 @@ namespace SixLabors.ImageSharp public DefaultShuffle3(byte p2, byte p1, byte p0) { - Guard.MustBeBetweenOrEqualTo(p2, 0, 2, nameof(p2)); - Guard.MustBeBetweenOrEqualTo(p1, 0, 2, nameof(p1)); - Guard.MustBeBetweenOrEqualTo(p0, 0, 2, nameof(p0)); + DebugGuard.MustBeBetweenOrEqualTo(p2, 0, 2, nameof(p2)); + DebugGuard.MustBeBetweenOrEqualTo(p1, 0, 2, nameof(p1)); + DebugGuard.MustBeBetweenOrEqualTo(p0, 0, 2, nameof(p0)); this.p2 = p2; this.p1 = p1; @@ -50,41 +50,4 @@ namespace SixLabors.ImageSharp } } } - - internal readonly struct ZYXShuffle3 : IShuffle3 - { - private static readonly byte ZYX = SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2); - - public byte Control => ZYX; - - [MethodImpl(InliningOptions.ShortMethod)] - public void RunFallbackShuffle(ReadOnlySpan source, Span dest) - { - ref Byte3 sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); - ref Byte3 dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); - int n = source.Length / 3; - - for (int i = 0; i < n; i++) - { - uint packed = Unsafe.As(ref Unsafe.Add(ref sBase, i)); - - // packed = [W Z Y X] - // tmp1 = [W 0 Y 0] - // tmp2 = [0 Z 0 X] - // tmp3=ROTL(16, tmp2) = [0 X 0 Z] - // tmp1 + tmp3 = [W X Y Z] - uint tmp1 = packed & 0xFF00FF00; - uint tmp2 = packed & 0x00FF00FF; - uint tmp3 = (tmp2 << 16) | (tmp2 >> 16); - packed = tmp1 + tmp3; - - Unsafe.Add(ref dBase, i) = Unsafe.As(ref packed); - } - } - } - - [StructLayout(LayoutKind.Explicit, Size = 3)] - internal readonly struct Byte3 - { - } } diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs index 1ceb38f1a..c9b51aba2 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs @@ -20,10 +20,10 @@ namespace SixLabors.ImageSharp public DefaultShuffle4Slice3(byte p3, byte p2, byte p1, byte p0) { - Guard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); - Guard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); - Guard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); - Guard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); + DebugGuard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); + DebugGuard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); + DebugGuard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); + DebugGuard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); this.p2 = p2; this.p1 = p1; @@ -71,4 +71,9 @@ namespace SixLabors.ImageSharp } } } + + [StructLayout(LayoutKind.Explicit, Size = 3)] + internal readonly struct Byte3 + { + } } diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index 79cb0da37..8fd6bcce6 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -48,7 +48,7 @@ namespace SixLabors.ImageSharp ReadOnlySpan source, Span dest, TShuffle shuffle) - where TShuffle : struct, IComponentShuffle + where TShuffle : struct, IShuffle4 { VerifyShuffle4SpanInput(source, dest); diff --git a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs index c5f92648c..7215fa860 100644 --- a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs +++ b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs @@ -164,7 +164,14 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils public static void ToBgra32(ReadOnlySpan source, Span dest) => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(3, 0, 1, 2)); - // TODO: Bgr24 + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToBgr24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle3(source, dest, new DefaultShuffle3(0, 1, 2)); } public static class FromBgr24 @@ -196,7 +203,14 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils public static void ToBgra32(ReadOnlySpan source, Span dest) => SimdUtils.Pad3Shuffle4(source, dest, default); - // TODO: Rgb24 + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToRgb24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle3(source, dest, new DefaultShuffle3(0, 1, 2)); } } } diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs new file mode 100644 index 000000000..3667b973e --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs @@ -0,0 +1,64 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using BenchmarkDotNet.Attributes; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class Shuffle3Channel + { + private static readonly DefaultShuffle3 Control = new DefaultShuffle3(1, 0, 2); + private byte[] source; + private byte[] destination; + + [GlobalSetup] + public void Setup() + { + this.source = new byte[this.Count]; + new Random(this.Count).NextBytes(this.source); + this.destination = new byte[this.Count]; + } + + [Params(96, 384, 768, 1536)] + public int Count { get; set; } + + [Benchmark] + public void Shuffle3() + { + SimdUtils.Shuffle3(this.source, this.destination, Control); + } + } + + // 2020-11-02 + // ########## + // + // BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1) + // Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores + // .NET Core SDK=3.1.403 + // [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // + // Runtime=.NET Core 3.1 + // + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |--------------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|----------:|------:|--------:|------:|------:|------:|----------:| + // | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 48.46 ns | 1.034 ns | 2.438 ns | 47.46 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle3 | 2. AVX | Empty | 96 | 32.42 ns | 0.537 ns | 0.476 ns | 32.34 ns | 0.66 | 0.04 | - | - | - | - | + // | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 96 | 32.51 ns | 0.373 ns | 0.349 ns | 32.56 ns | 0.66 | 0.03 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 199.04 ns | 1.512 ns | 1.180 ns | 199.17 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle3 | 2. AVX | Empty | 384 | 71.20 ns | 2.654 ns | 7.784 ns | 69.60 ns | 0.41 | 0.02 | - | - | - | - | + // | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 384 | 63.23 ns | 0.569 ns | 0.505 ns | 63.21 ns | 0.32 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 391.28 ns | 5.087 ns | 3.972 ns | 391.22 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle3 | 2. AVX | Empty | 768 | 109.12 ns | 2.149 ns | 2.010 ns | 108.66 ns | 0.28 | 0.01 | - | - | - | - | + // | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 768 | 106.51 ns | 0.734 ns | 0.613 ns | 106.56 ns | 0.27 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 773.70 ns | 5.516 ns | 4.890 ns | 772.96 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle3 | 2. AVX | Empty | 1536 | 190.41 ns | 1.090 ns | 0.851 ns | 190.38 ns | 0.25 | 0.00 | - | - | - | - | + // | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 190.94 ns | 0.985 ns | 0.769 ns | 190.85 ns | 0.25 | 0.00 | - | - | - | - | +} diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs index 75d7c8729..f1bfaa4ad 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -42,7 +42,7 @@ namespace SixLabors.ImageSharp.Tests.Common // These cannot be expressed as a theory as you cannot // use RemoteExecutor within generic methods nor pass - // IComponentShuffle to the generic utils method. + // IShuffle4 to the generic utils method. WXYZShuffle4 wxyz = default; TestShuffleByte4Channel( size, @@ -103,7 +103,7 @@ namespace SixLabors.ImageSharp.Tests.Common // These cannot be expressed as a theory as you cannot // use RemoteExecutor within generic methods nor pass // IShuffle3 to the generic utils method. - ZYXShuffle3 zyx = default; + var zyx = new DefaultShuffle3(0, 1, 2); TestShuffleByte3Channel( size, (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, zyx), From f7b5a0ff79b028de37cace21774bb19eb58b1f67 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 2 Nov 2020 16:24:04 +0000 Subject: [PATCH 097/131] Fix Shuffle4Slice3, wire up shuffles. --- .../Common/Helpers/Shuffle/IShuffle4Slice3.cs | 6 +- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 18 ++- .../Argb32.PixelOperations.Generated.cs | 72 +++++---- .../Bgr24.PixelOperations.Generated.cs | 140 +++++++++++------- .../Bgra32.PixelOperations.Generated.cs | 72 +++++---- .../Rgb24.PixelOperations.Generated.cs | 129 ++++++++++------ .../Rgba32.PixelOperations.Generated.cs | 72 +++++---- .../Generated/_Common.ttinclude | 6 +- 8 files changed, 323 insertions(+), 192 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs index c9b51aba2..70800f9de 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs @@ -62,12 +62,12 @@ namespace SixLabors.ImageSharp public void RunFallbackShuffle(ReadOnlySpan source, Span dest) { ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); - ref byte dBase = ref MemoryMarshal.GetReference(dest); + ref Byte3 dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); int n = source.Length / 4; - for (int i = 0, j = 0; i < n; i++, j += 3) + for (int i = 0; i < n; i++) { - Unsafe.As(ref Unsafe.Add(ref dBase, j)) = Unsafe.As(ref Unsafe.Add(ref sBase, i)); + Unsafe.Add(ref dBase, i) = Unsafe.As(ref Unsafe.Add(ref sBase, i)); } } } diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 974516c3e..296970ddc 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -371,9 +371,11 @@ namespace SixLabors.ImageSharp for (int i = 0; i < n; i += 3) { - ref Vector128 v0 = ref Unsafe.Add(ref sourceBase, i); - Vector128 v1 = Unsafe.Add(ref v0, 1); - Vector128 v2 = Unsafe.Add(ref v0, 2); + ref Vector128 vs = ref Unsafe.Add(ref sourceBase, i); + + Vector128 v0 = vs; + Vector128 v1 = Unsafe.Add(ref vs, 1); + Vector128 v2 = Unsafe.Add(ref vs, 2); Vector128 v3 = Sse2.ShiftRightLogical128BitLane(v2, 4); v2 = Ssse3.AlignRight(v2, v1, 8); @@ -474,10 +476,12 @@ namespace SixLabors.ImageSharp for (int i = 0, j = 0; i < n; i += 4, j += 3) { - ref Vector128 v0 = ref Unsafe.Add(ref sourceBase, i); - Vector128 v1 = Unsafe.Add(ref v0, 1); - Vector128 v2 = Unsafe.Add(ref v0, 2); - Vector128 v3 = Unsafe.Add(ref v0, 3); + ref Vector128 vs = ref Unsafe.Add(ref sourceBase, i); + + Vector128 v0 = vs; + Vector128 v1 = Unsafe.Add(ref vs, 1); + Vector128 v2 = Unsafe.Add(ref vs, 2); + Vector128 v3 = Unsafe.Add(ref vs, 3); v0 = Ssse3.Shuffle(Ssse3.Shuffle(v0, vshuffle), vmaske); v1 = Ssse3.Shuffle(Ssse3.Shuffle(v1, vshuffle), vmasko); diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Argb32.PixelOperations.Generated.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Argb32.PixelOperations.Generated.cs index 3f48d2acc..d30616997 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Argb32.PixelOperations.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Argb32.PixelOperations.Generated.cs @@ -106,23 +106,59 @@ namespace SixLabors.ImageSharp.PixelFormats Span dest = MemoryMarshal.Cast(destinationPixels); PixelConverter.FromBgra32.ToArgb32(source, dest); } + /// + public override void ToRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToRgb24(source, dest); + } /// - public override void ToBgr24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Argb32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Bgr24 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToArgb32(source, dest); + } + /// + public override void ToBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Argb32 sp = ref Unsafe.Add(ref sourceRef, i); - ref Bgr24 dp = ref Unsafe.Add(ref destRef, i); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToBgr24(source, dest); + } - dp.FromArgb32(sp); - } + /// + public override void FromBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToArgb32(source, dest); } /// @@ -197,24 +233,6 @@ namespace SixLabors.ImageSharp.PixelFormats } } - /// - public override void ToRgb24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) - { - Guard.NotNull(configuration, nameof(configuration)); - Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - - ref Argb32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Rgb24 destRef = ref MemoryMarshal.GetReference(destinationPixels); - - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Argb32 sp = ref Unsafe.Add(ref sourceRef, i); - ref Rgb24 dp = ref Unsafe.Add(ref destRef, i); - - dp.FromArgb32(sp); - } - } - /// public override void ToRgb48(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgr24.PixelOperations.Generated.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgr24.PixelOperations.Generated.cs index b73bb8b83..50d4942ec 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgr24.PixelOperations.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgr24.PixelOperations.Generated.cs @@ -52,146 +52,182 @@ namespace SixLabors.ImageSharp.PixelFormats { Vector4Converters.RgbaCompatible.ToVector4(configuration, this, sourcePixels, destVectors, modifiers.Remove(PixelConversionModifiers.Scale | PixelConversionModifiers.Premultiply)); } - /// - public override void ToArgb32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Argb32 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToRgba32(source, dest); + } - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Argb32 dp = ref Unsafe.Add(ref destRef, i); + /// + public override void FromRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - dp.FromBgr24(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToBgr24(source, dest); } - /// - public override void ToBgra32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Bgra32 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToArgb32(source, dest); + } - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Bgra32 dp = ref Unsafe.Add(ref destRef, i); + /// + public override void FromArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - dp.FromBgr24(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToBgr24(source, dest); } - /// - public override void ToL8(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref L8 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToBgra32(source, dest); + } - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref L8 dp = ref Unsafe.Add(ref destRef, i); + /// + public override void FromBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - dp.FromBgr24(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToBgr24(source, dest); } - /// - public override void ToL16(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref L16 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToRgb24(source, dest); + } - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref L16 dp = ref Unsafe.Add(ref destRef, i); + /// + public override void FromRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - dp.FromBgr24(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToBgr24(source, dest); } /// - public override void ToLa16(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToL8(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref La16 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ref L8 destRef = ref MemoryMarshal.GetReference(destinationPixels); for (int i = 0; i < sourcePixels.Length; i++) { ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref La16 dp = ref Unsafe.Add(ref destRef, i); + ref L8 dp = ref Unsafe.Add(ref destRef, i); dp.FromBgr24(sp); } } /// - public override void ToLa32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToL16(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref La32 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ref L16 destRef = ref MemoryMarshal.GetReference(destinationPixels); for (int i = 0; i < sourcePixels.Length; i++) { ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref La32 dp = ref Unsafe.Add(ref destRef, i); + ref L16 dp = ref Unsafe.Add(ref destRef, i); dp.FromBgr24(sp); } } /// - public override void ToRgb24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToLa16(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Rgb24 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ref La16 destRef = ref MemoryMarshal.GetReference(destinationPixels); for (int i = 0; i < sourcePixels.Length; i++) { ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Rgb24 dp = ref Unsafe.Add(ref destRef, i); + ref La16 dp = ref Unsafe.Add(ref destRef, i); dp.FromBgr24(sp); } } /// - public override void ToRgba32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToLa32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Rgba32 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ref La32 destRef = ref MemoryMarshal.GetReference(destinationPixels); for (int i = 0; i < sourcePixels.Length; i++) { ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Rgba32 dp = ref Unsafe.Add(ref destRef, i); + ref La32 dp = ref Unsafe.Add(ref destRef, i); dp.FromBgr24(sp); } diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgra32.PixelOperations.Generated.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgra32.PixelOperations.Generated.cs index 8cf2d5850..b38e5f19d 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgra32.PixelOperations.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgra32.PixelOperations.Generated.cs @@ -106,23 +106,59 @@ namespace SixLabors.ImageSharp.PixelFormats Span dest = MemoryMarshal.Cast(destinationPixels); PixelConverter.FromArgb32.ToBgra32(source, dest); } + /// + public override void ToRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToRgb24(source, dest); + } /// - public override void ToBgr24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Bgra32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Bgr24 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToBgra32(source, dest); + } + /// + public override void ToBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Bgra32 sp = ref Unsafe.Add(ref sourceRef, i); - ref Bgr24 dp = ref Unsafe.Add(ref destRef, i); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToBgr24(source, dest); + } - dp.FromBgra32(sp); - } + /// + public override void FromBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToBgra32(source, dest); } /// @@ -197,24 +233,6 @@ namespace SixLabors.ImageSharp.PixelFormats } } - /// - public override void ToRgb24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) - { - Guard.NotNull(configuration, nameof(configuration)); - Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - - ref Bgra32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Rgb24 destRef = ref MemoryMarshal.GetReference(destinationPixels); - - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Bgra32 sp = ref Unsafe.Add(ref sourceRef, i); - ref Rgb24 dp = ref Unsafe.Add(ref destRef, i); - - dp.FromBgra32(sp); - } - } - /// public override void ToRgb48(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgb24.PixelOperations.Generated.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgb24.PixelOperations.Generated.cs index 332683fc7..9a4173892 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgb24.PixelOperations.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgb24.PixelOperations.Generated.cs @@ -52,59 +52,114 @@ namespace SixLabors.ImageSharp.PixelFormats { Vector4Converters.RgbaCompatible.ToVector4(configuration, this, sourcePixels, destVectors, modifiers.Remove(PixelConversionModifiers.Scale | PixelConversionModifiers.Premultiply)); } - /// - public override void ToArgb32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Rgb24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Argb32 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToRgba32(source, dest); + } - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Rgb24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Argb32 dp = ref Unsafe.Add(ref destRef, i); + /// + public override void FromRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - dp.FromRgb24(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToRgb24(source, dest); } /// - public override void ToBgr24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Rgb24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Bgr24 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToArgb32(source, dest); + } - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Rgb24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Bgr24 dp = ref Unsafe.Add(ref destRef, i); + /// + public override void FromArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - dp.FromRgb24(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToRgb24(source, dest); + } + /// + public override void ToBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToBgra32(source, dest); } /// - public override void ToBgra32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Rgb24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Bgra32 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToRgb24(source, dest); + } + /// + public override void ToBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Rgb24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Bgra32 dp = ref Unsafe.Add(ref destRef, i); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToBgr24(source, dest); + } - dp.FromRgb24(sp); - } + /// + public override void FromBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToRgb24(source, dest); } /// @@ -179,24 +234,6 @@ namespace SixLabors.ImageSharp.PixelFormats } } - /// - public override void ToRgba32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) - { - Guard.NotNull(configuration, nameof(configuration)); - Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - - ref Rgb24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Rgba32 destRef = ref MemoryMarshal.GetReference(destinationPixels); - - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Rgb24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Rgba32 dp = ref Unsafe.Add(ref destRef, i); - - dp.FromRgb24(sp); - } - } - /// public override void ToRgb48(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgba32.PixelOperations.Generated.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgba32.PixelOperations.Generated.cs index 9a36ec29a..5b60ec10e 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgba32.PixelOperations.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgba32.PixelOperations.Generated.cs @@ -95,23 +95,59 @@ namespace SixLabors.ImageSharp.PixelFormats Span dest = MemoryMarshal.Cast(destinationPixels); PixelConverter.FromBgra32.ToRgba32(source, dest); } + /// + public override void ToRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToRgb24(source, dest); + } /// - public override void ToBgr24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Rgba32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Bgr24 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToRgba32(source, dest); + } + /// + public override void ToBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Rgba32 sp = ref Unsafe.Add(ref sourceRef, i); - ref Bgr24 dp = ref Unsafe.Add(ref destRef, i); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToBgr24(source, dest); + } - dp.FromRgba32(sp); - } + /// + public override void FromBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToRgba32(source, dest); } /// @@ -186,24 +222,6 @@ namespace SixLabors.ImageSharp.PixelFormats } } - /// - public override void ToRgb24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) - { - Guard.NotNull(configuration, nameof(configuration)); - Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - - ref Rgba32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Rgb24 destRef = ref MemoryMarshal.GetReference(destinationPixels); - - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Rgba32 sp = ref Unsafe.Add(ref sourceRef, i); - ref Rgb24 dp = ref Unsafe.Add(ref destRef, i); - - dp.FromRgba32(sp); - } - } - /// public override void ToRgb48(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/_Common.ttinclude b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/_Common.ttinclude index d8b5286cd..b728b0115 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/_Common.ttinclude +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/_Common.ttinclude @@ -17,7 +17,7 @@ using System.Runtime.InteropServices; <#+ static readonly string[] CommonPixelTypes = { "Argb32", "Bgr24", "Bgra32", "L8", "L16", "La16", "La32", "Rgb24", "Rgba32", "Rgb48", "Rgba64", "Bgra5551" }; - static readonly string[] Optimized32BitTypes = { "Rgba32", "Argb32", "Bgra32" }; + static readonly string[] OptimizedPixelTypes = { "Rgba32", "Argb32", "Bgra32", "Rgb24", "Bgr24" }; // Types with Rgba32-combatible to/from Vector4 conversion static readonly string[] Rgba32CompatibleTypes = { "Argb32", "Bgra32", "Rgb24", "Bgr24" }; @@ -148,8 +148,8 @@ using System.Runtime.InteropServices; GenerateRgba32CompatibleVector4ConversionMethods(pixelType, pixelType.EndsWith("32")); } - var matching32BitTypes = Optimized32BitTypes.Contains(pixelType) ? - Optimized32BitTypes.Where(p => p != pixelType) : + var matching32BitTypes = OptimizedPixelTypes.Contains(pixelType) ? + OptimizedPixelTypes.Where(p => p != pixelType) : Enumerable.Empty(); foreach (string destPixelType in matching32BitTypes) From 7e57ebd57451081d3f08974c8525694487709b0d Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 2 Nov 2020 18:14:11 +0000 Subject: [PATCH 098/131] Add Rgb24 <==> Vector4 benchmarks --- .../Color/Bulk/FromVector4.cs | 4 +- .../Color/Bulk/FromVector4_Rgb24.cs | 55 ++++++++++++++++ .../Color/Bulk/ToVector4_Rgb24.cs | 65 +++++++++++++++++++ 3 files changed, 122 insertions(+), 2 deletions(-) create mode 100644 tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4_Rgb24.cs create mode 100644 tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgb24.cs diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs index dc030e07a..1db9147ad 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs @@ -30,7 +30,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk protected Configuration Configuration => Configuration.Default; // [Params(64, 2048)] - [Params(1024)] + [Params(64, 256, 2048)] public int Count { get; set; } [GlobalSetup] @@ -58,7 +58,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk } } - [Benchmark] + [Benchmark(Baseline = true)] public void PixelOperations_Base() { new PixelOperations().FromVector4Destructive(this.Configuration, this.source.GetSpan(), this.destination.GetSpan()); diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4_Rgb24.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4_Rgb24.cs new file mode 100644 index 000000000..5da6edc6b --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4_Rgb24.cs @@ -0,0 +1,55 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.PixelFormats; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.ShortClr))] + public class FromVector4_Rgb24 : FromVector4 + { + } +} + +// 2020-11-02 +// ########## +// +// BenchmarkDotNet = v0.12.1, OS = Windows 10.0.19041.572(2004 /?/ 20H1) +// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores +// .NET Core SDK=3.1.403 +// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT +// Job-XYEQXL : .NET Framework 4.8 (4.8.4250.0), X64 RyuJIT +// Job-HSXNJV : .NET Core 2.1.23 (CoreCLR 4.6.29321.03, CoreFX 4.6.29321.01), X64 RyuJIT +// Job-YUREJO : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT +// +// IterationCount=3 LaunchCount=1 WarmupCount=3 +// +// | Method | Job | Runtime | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | +// |---------------------------- |----------- |-------------- |------ |-----------:|------------:|----------:|------:|--------:|-------:|------:|------:|----------:| +// | PixelOperations_Base | Job-XYEQXL | .NET 4.7.2 | 64 | 343.2 ns | 305.91 ns | 16.77 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-XYEQXL | .NET 4.7.2 | 64 | 320.8 ns | 19.93 ns | 1.09 ns | 0.94 | 0.05 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-HSXNJV | .NET Core 2.1 | 64 | 234.3 ns | 17.98 ns | 0.99 ns | 1.00 | 0.00 | 0.0052 | - | - | 24 B | +// | PixelOperations_Specialized | Job-HSXNJV | .NET Core 2.1 | 64 | 246.0 ns | 82.34 ns | 4.51 ns | 1.05 | 0.02 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-YUREJO | .NET Core 3.1 | 64 | 222.3 ns | 39.46 ns | 2.16 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-YUREJO | .NET Core 3.1 | 64 | 243.4 ns | 33.58 ns | 1.84 ns | 1.09 | 0.01 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-XYEQXL | .NET 4.7.2 | 256 | 824.9 ns | 32.77 ns | 1.80 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-XYEQXL | .NET 4.7.2 | 256 | 967.0 ns | 39.09 ns | 2.14 ns | 1.17 | 0.01 | 0.0172 | - | - | 72 B | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-HSXNJV | .NET Core 2.1 | 256 | 756.9 ns | 94.43 ns | 5.18 ns | 1.00 | 0.00 | 0.0048 | - | - | 24 B | +// | PixelOperations_Specialized | Job-HSXNJV | .NET Core 2.1 | 256 | 1,003.3 ns | 3,192.09 ns | 174.97 ns | 1.32 | 0.22 | 0.0172 | - | - | 72 B | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-YUREJO | .NET Core 3.1 | 256 | 748.6 ns | 248.03 ns | 13.60 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-YUREJO | .NET Core 3.1 | 256 | 437.0 ns | 36.48 ns | 2.00 ns | 0.58 | 0.01 | 0.0172 | - | - | 72 B | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-XYEQXL | .NET 4.7.2 | 2048 | 5,751.6 ns | 704.24 ns | 38.60 ns | 1.00 | 0.00 | - | - | - | 24 B | +// | PixelOperations_Specialized | Job-XYEQXL | .NET 4.7.2 | 2048 | 4,391.6 ns | 718.17 ns | 39.37 ns | 0.76 | 0.00 | 0.0153 | - | - | 72 B | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-HSXNJV | .NET Core 2.1 | 2048 | 6,202.0 ns | 1,815.18 ns | 99.50 ns | 1.00 | 0.00 | - | - | - | 24 B | +// | PixelOperations_Specialized | Job-HSXNJV | .NET Core 2.1 | 2048 | 4,225.6 ns | 1,004.03 ns | 55.03 ns | 0.68 | 0.01 | 0.0153 | - | - | 72 B | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-YUREJO | .NET Core 3.1 | 2048 | 6,157.1 ns | 2,516.98 ns | 137.96 ns | 1.00 | 0.00 | - | - | - | 24 B | +// | PixelOperations_Specialized | Job-YUREJO | .NET Core 3.1 | 2048 | 1,822.7 ns | 1,764.43 ns | 96.71 ns | 0.30 | 0.02 | 0.0172 | - | - | 72 B | diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgb24.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgb24.cs new file mode 100644 index 000000000..aecd41831 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgb24.cs @@ -0,0 +1,65 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; + +using SixLabors.ImageSharp.Memory; +using SixLabors.ImageSharp.PixelFormats; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.ShortClr))] + public class ToVector4_Rgb24 : ToVector4 + { + [Benchmark(Baseline = true)] + public void PixelOperations_Base() + { + new PixelOperations().ToVector4( + this.Configuration, + this.source.GetSpan(), + this.destination.GetSpan()); + } + } +} + +// 2020-11-02 +// ########## +// +// BenchmarkDotNet = v0.12.1, OS = Windows 10.0.19041.572(2004 /?/ 20H1) +// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores +// .NET Core SDK=3.1.403 +// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT +// Job-XYEQXL : .NET Framework 4.8 (4.8.4250.0), X64 RyuJIT +// Job-HSXNJV : .NET Core 2.1.23 (CoreCLR 4.6.29321.03, CoreFX 4.6.29321.01), X64 RyuJIT +// Job-YUREJO : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT +// +// IterationCount=3 LaunchCount=1 WarmupCount=3 +// +// | Method | Job | Runtime | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | +// |---------------------------- |----------- |-------------- |------ |-----------:|------------:|----------:|------:|--------:|-------:|------:|------:|----------:| +// | PixelOperations_Base | Job-OIBEDX | .NET 4.7.2 | 64 | 298.4 ns | 33.63 ns | 1.84 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-OIBEDX | .NET 4.7.2 | 64 | 355.5 ns | 908.51 ns | 49.80 ns | 1.19 | 0.17 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-OPAORC | .NET Core 2.1 | 64 | 220.1 ns | 13.77 ns | 0.75 ns | 1.00 | 0.00 | 0.0055 | - | - | 24 B | +// | PixelOperations_Specialized | Job-OPAORC | .NET Core 2.1 | 64 | 228.5 ns | 41.41 ns | 2.27 ns | 1.04 | 0.01 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-VPSIRL | .NET Core 3.1 | 64 | 213.6 ns | 12.47 ns | 0.68 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-VPSIRL | .NET Core 3.1 | 64 | 217.0 ns | 9.95 ns | 0.55 ns | 1.02 | 0.01 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-OIBEDX | .NET 4.7.2 | 256 | 829.0 ns | 242.93 ns | 13.32 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-OIBEDX | .NET 4.7.2 | 256 | 448.9 ns | 4.04 ns | 0.22 ns | 0.54 | 0.01 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-OPAORC | .NET Core 2.1 | 256 | 863.0 ns | 1,253.26 ns | 68.70 ns | 1.00 | 0.00 | 0.0048 | - | - | 24 B | +// | PixelOperations_Specialized | Job-OPAORC | .NET Core 2.1 | 256 | 309.2 ns | 66.16 ns | 3.63 ns | 0.36 | 0.03 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-VPSIRL | .NET Core 3.1 | 256 | 737.0 ns | 253.90 ns | 13.92 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-VPSIRL | .NET Core 3.1 | 256 | 212.3 ns | 1.07 ns | 0.06 ns | 0.29 | 0.01 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-OIBEDX | .NET 4.7.2 | 2048 | 5,625.6 ns | 404.35 ns | 22.16 ns | 1.00 | 0.00 | - | - | - | 24 B | +// | PixelOperations_Specialized | Job-OIBEDX | .NET 4.7.2 | 2048 | 1,974.1 ns | 229.84 ns | 12.60 ns | 0.35 | 0.00 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-OPAORC | .NET Core 2.1 | 2048 | 5,467.2 ns | 537.29 ns | 29.45 ns | 1.00 | 0.00 | - | - | - | 24 B | +// | PixelOperations_Specialized | Job-OPAORC | .NET Core 2.1 | 2048 | 1,985.5 ns | 4,714.23 ns | 258.40 ns | 0.36 | 0.05 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-VPSIRL | .NET Core 3.1 | 2048 | 5,888.2 ns | 1,622.23 ns | 88.92 ns | 1.00 | 0.00 | - | - | - | 24 B | +// | PixelOperations_Specialized | Job-VPSIRL | .NET Core 3.1 | 2048 | 1,165.0 ns | 191.71 ns | 10.51 ns | 0.20 | 0.00 | - | - | - | - | From 1e615c286a7b938fb4f3b39d2d38b2463e9c981c Mon Sep 17 00:00:00 2001 From: Nicolas Portmann Date: Tue, 3 Nov 2020 20:25:21 +0100 Subject: [PATCH 099/131] Add initial vectorized color converter implementation --- ...cs => JpegColorConverter.FromCmykBasic.cs} | 16 +- .../JpegColorConverter.FromCmykSimdAvx2.cs | 145 +++++++++++++ ... JpegColorConverter.FromGrayScaleBasic.cs} | 13 +- ...pegColorConverter.FromGrayScaleSimdAvx2.cs | 109 ++++++++++ ....cs => JpegColorConverter.FromRgbBasic.cs} | 14 +- .../JpegColorConverter.FromRgbSimdAvx2.cs | 132 ++++++++++++ .../JpegColorConverter.FromYCbCrSimdAvx2.cs | 5 +- ...cs => JpegColorConverter.FromYccKBasic.cs} | 25 ++- .../JpegColorConverter.FromYccKSimdAvx2.cs | 193 ++++++++++++++++++ .../ColorConverters/JpegColorConverter.cs | 86 +++++++- .../Formats/Jpg/JpegColorConverterTests.cs | 2 +- 11 files changed, 704 insertions(+), 36 deletions(-) rename src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/{JpegColorConverter.FromCmyk.cs => JpegColorConverter.FromCmykBasic.cs} (75%) create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykSimdAvx2.cs rename src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/{JpegColorConverter.FromGrayScale.cs => JpegColorConverter.FromGrayScaleBasic.cs} (74%) create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleSimdAvx2.cs rename src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/{JpegColorConverter.FromRgb.cs => JpegColorConverter.FromRgbBasic.cs} (76%) create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbSimdAvx2.cs rename src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/{JpegColorConverter.FromYccK.cs => JpegColorConverter.FromYccKBasic.cs} (57%) create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKSimdAvx2.cs diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmyk.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs similarity index 75% rename from src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmyk.cs rename to src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs index 7b257b37d..117bb2c3f 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmyk.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs @@ -8,16 +8,20 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromCmyk : JpegColorConverter + internal sealed class FromCmykBasic : JpegColorConverter { - public FromCmyk(int precision) + public FromCmykBasic(int precision) : base(JpegColorSpace.Cmyk, precision) { } public override void ConvertToRgba(in ComponentValues values, Span result) { - // TODO: We can optimize a lot here with Vector and SRCS.Unsafe()! + ConvertCore(values, result, this.MaximumValue); + } + + internal static void ConvertCore(in ComponentValues values, Span result, float maxValue) + { ReadOnlySpan cVals = values.Component0; ReadOnlySpan mVals = values.Component1; ReadOnlySpan yVals = values.Component2; @@ -25,7 +29,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters var v = new Vector4(0, 0, 0, 1F); - var maximum = 1 / this.MaximumValue; + var maximum = 1 / maxValue; var scale = new Vector4(maximum, maximum, maximum, 1F); for (int i = 0; i < result.Length; i++) @@ -33,7 +37,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters float c = cVals[i]; float m = mVals[i]; float y = yVals[i]; - float k = kVals[i] / this.MaximumValue; + float k = kVals[i] / maxValue; v.X = c * k; v.Y = m * k; @@ -47,4 +51,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters } } } -} \ No newline at end of file +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykSimdAvx2.cs new file mode 100644 index 000000000..afd36073a --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykSimdAvx2.cs @@ -0,0 +1,145 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#else +using SixLabors.ImageSharp.Tuples; +#endif + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromCmykVector8 : JpegColorConverter + { + public FromCmykVector8(int precision) + : base(JpegColorSpace.Cmyk, precision) + { + } + + public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8; + + public override void ConvertToRgba(in ComponentValues values, Span result) + { + int remainder = result.Length % 8; + int simdCount = result.Length - remainder; + if (simdCount > 0) + { + ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue); + } + + FromCmykBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue); + } + + internal static void ConvertCore(in ComponentValues values, Span result, float maxValue) + { + // This implementation is actually AVX specific. + // An AVX register is capable of storing 8 float-s. + if (!IsAvailable) + { + throw new InvalidOperationException( + "JpegColorConverter.FromGrayscaleVector8 can be used only on architecture having 256 byte floating point SIMD registers!"); + } + +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 cBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector256 mBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector256 yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + ref Vector256 kBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); + + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var scale = Vector256.Create(1 / maxValue); + var one = Vector256.Create(1F); + + // Used for packing + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector256 k = Avx2.PermuteVar8x32(Unsafe.Add(ref kBase, i), vcontrol); + Vector256 c = Avx2.PermuteVar8x32(Unsafe.Add(ref cBase, i), vcontrol); + Vector256 m = Avx2.PermuteVar8x32(Unsafe.Add(ref mBase, i), vcontrol); + Vector256 y = Avx2.PermuteVar8x32(Unsafe.Add(ref yBase, i), vcontrol); + + k = Avx.Multiply(k, scale); + + c = Avx.Multiply(Avx.Multiply(c, k), scale); + m = Avx.Multiply(Avx.Multiply(m, k), scale); + y = Avx.Multiply(Avx.Multiply(y, k), scale); + + Vector256 cmLo = Avx.UnpackLow(c, m); + Vector256 yoLo = Avx.UnpackLow(y, one); + Vector256 cmHi = Avx.UnpackHigh(c, m); + Vector256 yoHi = Avx.UnpackHigh(y, one); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.Shuffle(cmLo, yoLo, 0b01_00_01_00); + Unsafe.Add(ref destination, 1) = Avx.Shuffle(cmLo, yoLo, 0b11_10_11_10); + Unsafe.Add(ref destination, 2) = Avx.Shuffle(cmHi, yoHi, 0b01_00_01_00); + Unsafe.Add(ref destination, 3) = Avx.Shuffle(cmHi, yoHi, 0b11_10_11_10); + } +#else + ref Vector cBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector mBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + ref Vector kBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); + + ref Vector4Octet resultBase = + ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + + Vector4Pair cc = default; + Vector4Pair mm = default; + Vector4Pair yy = default; + ref Vector ccRefAsVector = ref Unsafe.As>(ref cc); + ref Vector mmRefAsVector = ref Unsafe.As>(ref mm); + ref Vector yyRefAsVector = ref Unsafe.As>(ref yy); + + var scale = new Vector(1 / maxValue); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector c = Unsafe.Add(ref cBase, i); + Vector m = Unsafe.Add(ref mBase, i); + Vector y = Unsafe.Add(ref yBase, i); + Vector k = Unsafe.Add(ref kBase, i) * scale; + + c = (c * k) * scale; + m = (m * k) * scale; + y = (y * k) * scale; + + ccRefAsVector = c; + mmRefAsVector = m; + yyRefAsVector = y; + + // Collect (c0,c1...c8) (m0,m1...m8) (y0,y1...y8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: + ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); + destination.Pack(ref cc, ref mm, ref yy); + } +#endif + } + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScale.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs similarity index 74% rename from src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScale.cs rename to src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs index cf0bc2c92..459a33a96 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScale.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs @@ -10,16 +10,21 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromGrayscale : JpegColorConverter + internal sealed class FromGrayscaleBasic : JpegColorConverter { - public FromGrayscale(int precision) + public FromGrayscaleBasic(int precision) : base(JpegColorSpace.Grayscale, precision) { } public override void ConvertToRgba(in ComponentValues values, Span result) { - var maximum = 1 / this.MaximumValue; + ConvertCore(values, result, this.MaximumValue); + } + + internal static void ConvertCore(in ComponentValues values, Span result, float maxValue) + { + var maximum = 1 / maxValue; var scale = new Vector4(maximum, maximum, maximum, 1F); ref float sBase = ref MemoryMarshal.GetReference(values.Component0); @@ -35,4 +40,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters } } } -} \ No newline at end of file +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleSimdAvx2.cs new file mode 100644 index 000000000..c25057c6f --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleSimdAvx2.cs @@ -0,0 +1,109 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#else +using SixLabors.ImageSharp.Tuples; +#endif + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromGrayscaleVector8 : JpegColorConverter + { + public FromGrayscaleVector8(int precision) + : base(JpegColorSpace.Grayscale, precision) + { + } + + public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8; + + public override void ConvertToRgba(in ComponentValues values, Span result) + { + int remainder = result.Length % 8; + int simdCount = result.Length - remainder; + if (simdCount > 0) + { + ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue); + } + + FromGrayscaleBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue); + } + + internal static void ConvertCore(in ComponentValues values, Span result, float maxValue) + { + // This implementation is actually AVX specific. + // An AVX register is capable of storing 8 float-s. + if (!IsAvailable) + { + throw new InvalidOperationException( + "JpegColorConverter.FromGrayscaleVector8 can be used only on architecture having 256 byte floating point SIMD registers!"); + } + +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 gBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var scale = Vector256.Create(1 / maxValue); + var one = Vector256.Create(1F); + + // Used for packing + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector256 g = Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol); + + g = Avx.Multiply(g, scale); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.Blend(Avx.Permute(g, 0b00_00_00_00), one, 0b1000_1000); + Unsafe.Add(ref destination, 1) = Avx.Blend(Avx.Permute(g, 0b01_01_01_01), one, 0b1000_1000); + Unsafe.Add(ref destination, 2) = Avx.Blend(Avx.Permute(g, 0b10_10_10_10), one, 0b1000_1000); + Unsafe.Add(ref destination, 3) = Avx.Blend(Avx.Permute(g, 0b11_11_11_11), one, 0b1000_1000); + } +#else + ref Vector gBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + + ref Vector4Octet resultBase = + ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + + Vector4Pair gg = default; + ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); + + var scale = new Vector(1 / maxValue); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector g = Unsafe.Add(ref gBase, i); + g *= scale; + + ggRefAsVector = g; + + // Collect (g0,g1...g7) vector values in the expected (g0,g0,g0,1), (g1,g1,g1,1) ... order: + ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); + destination.Pack(ref gg); + } +#endif + } + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgb.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs similarity index 76% rename from src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgb.cs rename to src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs index 25889a6df..f9faf1435 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgb.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs @@ -8,23 +8,27 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromRgb : JpegColorConverter + internal sealed class FromRgbBasic : JpegColorConverter { - public FromRgb(int precision) + public FromRgbBasic(int precision) : base(JpegColorSpace.RGB, precision) { } public override void ConvertToRgba(in ComponentValues values, Span result) { - // TODO: We can optimize a lot here with Vector and SRCS.Unsafe()! + ConvertCore(values, result, this.MaximumValue); + } + + internal static void ConvertCore(in ComponentValues values, Span result, float maxValue) + { ReadOnlySpan rVals = values.Component0; ReadOnlySpan gVals = values.Component1; ReadOnlySpan bVals = values.Component2; var v = new Vector4(0, 0, 0, 1); - var maximum = 1 / this.MaximumValue; + var maximum = 1 / maxValue; var scale = new Vector4(maximum, maximum, maximum, 1F); for (int i = 0; i < result.Length; i++) @@ -44,4 +48,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters } } } -} \ No newline at end of file +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbSimdAvx2.cs new file mode 100644 index 000000000..ebaa51211 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbSimdAvx2.cs @@ -0,0 +1,132 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#else +using SixLabors.ImageSharp.Tuples; +#endif + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromRgbVector8 : JpegColorConverter + { + public FromRgbVector8(int precision) + : base(JpegColorSpace.RGB, precision) + { + } + + public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8; + + public override void ConvertToRgba(in ComponentValues values, Span result) + { + int remainder = result.Length % 8; + int simdCount = result.Length - remainder; + if (simdCount > 0) + { + ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue); + } + + FromRgbBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue); + } + + internal static void ConvertCore(in ComponentValues values, Span result, float maxValue) + { + // This implementation is actually AVX specific. + // An AVX register is capable of storing 8 float-s. + if (!IsAvailable) + { + throw new InvalidOperationException( + "JpegColorConverter.FromGrayscaleVector8 can be used only on architecture having 256 byte floating point SIMD registers!"); + } + +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 rBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector256 gBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector256 bBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var scale = Vector256.Create(1 / maxValue); + var one = Vector256.Create(1F); + + // Used for packing + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector256 r = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref rBase, i), vcontrol), scale); + Vector256 g = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol), scale); + Vector256 b = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref bBase, i), vcontrol), scale); + + Vector256 rgLo = Avx.UnpackLow(r, g); + Vector256 boLo = Avx.UnpackLow(b, one); + Vector256 rgHi = Avx.UnpackHigh(r, g); + Vector256 boHi = Avx.UnpackHigh(b, one); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.Shuffle(rgLo, boLo, 0b01_00_01_00); + Unsafe.Add(ref destination, 1) = Avx.Shuffle(rgLo, boLo, 0b11_10_11_10); + Unsafe.Add(ref destination, 2) = Avx.Shuffle(rgHi, boHi, 0b01_00_01_00); + Unsafe.Add(ref destination, 3) = Avx.Shuffle(rgHi, boHi, 0b11_10_11_10); + } +#else + ref Vector rBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector gBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector bBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + + ref Vector4Octet resultBase = + ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + + Vector4Pair rr = default; + Vector4Pair gg = default; + Vector4Pair bb = default; + ref Vector rrRefAsVector = ref Unsafe.As>(ref rr); + ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); + ref Vector bbRefAsVector = ref Unsafe.As>(ref bb); + + var scale = new Vector(1 / maxValue); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector r = Unsafe.Add(ref rBase, i); + Vector g = Unsafe.Add(ref gBase, i); + Vector b = Unsafe.Add(ref bBase, i); + r *= scale; + g *= scale; + b *= scale; + + rrRefAsVector = r; + ggRefAsVector = g; + bbRefAsVector = b; + + // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: + ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); + destination.Pack(ref rr, ref gg, ref bb); + } +#endif + } + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs index 1319b56ee..05258fb9b 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs @@ -9,8 +9,9 @@ using System.Runtime.InteropServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; using static SixLabors.ImageSharp.SimdUtils; -#endif +#else using SixLabors.ImageSharp.Tuples; +#endif // ReSharper disable ImpureMethodCallOnReadonlyValueField namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters @@ -98,7 +99,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters Vector256 g = HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult); Vector256 b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult); - // TODO: We should be savving to RGBA not Vector4 + // TODO: We should be saving to RGBA not Vector4 r = Avx.Multiply(Avx.RoundToNearestInteger(r), scale); g = Avx.Multiply(Avx.RoundToNearestInteger(g), scale); b = Avx.Multiply(Avx.RoundToNearestInteger(b), scale); diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccK.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs similarity index 57% rename from src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccK.cs rename to src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs index 1137cdc0e..100888956 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccK.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs @@ -8,14 +8,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromYccK : JpegColorConverter + internal sealed class FromYccKBasic : JpegColorConverter { - public FromYccK(int precision) + public FromYccKBasic(int precision) : base(JpegColorSpace.Ycck, precision) { } public override void ConvertToRgba(in ComponentValues values, Span result) + { + ConvertCore(values, result, this.MaximumValue, this.HalfValue); + } + + internal static void ConvertCore(in ComponentValues values, Span result, float maxValue, float halfValue) { // TODO: We can optimize a lot here with Vector and SRCS.Unsafe()! ReadOnlySpan yVals = values.Component0; @@ -25,19 +30,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters var v = new Vector4(0, 0, 0, 1F); - var maximum = 1 / this.MaximumValue; + var maximum = 1 / maxValue; var scale = new Vector4(maximum, maximum, maximum, 1F); for (int i = 0; i < result.Length; i++) { float y = yVals[i]; - float cb = cbVals[i] - this.HalfValue; - float cr = crVals[i] - this.HalfValue; - float k = kVals[i] / this.MaximumValue; + float cb = cbVals[i] - halfValue; + float cr = crVals[i] - halfValue; + float k = kVals[i] / maxValue; - v.X = (this.MaximumValue - MathF.Round(y + (1.402F * cr), MidpointRounding.AwayFromZero)) * k; - v.Y = (this.MaximumValue - MathF.Round(y - (0.344136F * cb) - (0.714136F * cr), MidpointRounding.AwayFromZero)) * k; - v.Z = (this.MaximumValue - MathF.Round(y + (1.772F * cb), MidpointRounding.AwayFromZero)) * k; + v.X = (maxValue - MathF.Round(y + (1.402F * cr), MidpointRounding.AwayFromZero)) * k; + v.Y = (maxValue - MathF.Round(y - (0.344136F * cb) - (0.714136F * cr), MidpointRounding.AwayFromZero)) * k; + v.Z = (maxValue - MathF.Round(y + (1.772F * cb), MidpointRounding.AwayFromZero)) * k; v.W = 1F; v *= scale; @@ -47,4 +52,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters } } } -} \ No newline at end of file +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKSimdAvx2.cs new file mode 100644 index 000000000..8645fb8fa --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKSimdAvx2.cs @@ -0,0 +1,193 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#else +using SixLabors.ImageSharp.Tuples; +#endif + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromYccKVector8 : JpegColorConverter + { + public FromYccKVector8(int precision) + : base(JpegColorSpace.Ycck, precision) + { + } + + public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8; + + public override void ConvertToRgba(in ComponentValues values, Span result) + { + int remainder = result.Length % 8; + int simdCount = result.Length - remainder; + if (simdCount > 0) + { + ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue, this.HalfValue); + } + + FromYccKBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue, this.HalfValue); + } + + internal static void ConvertCore(in ComponentValues values, Span result, float maxValue, float halfValue) + { + // This implementation is actually AVX specific. + // An AVX register is capable of storing 8 float-s. + if (!IsAvailable) + { + throw new InvalidOperationException( + "JpegColorConverter.FromYCbCrSimd256 can be used only on architecture having 256 byte floating point SIMD registers!"); + } + +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector256 cbBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector256 crBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + ref Vector256 kBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); + + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var chromaOffset = Vector256.Create(-halfValue); + var scale = Vector256.Create(1 / maxValue); + var max = Vector256.Create(maxValue); + var rCrMult = Vector256.Create(1.402F); + var gCbMult = Vector256.Create(-0.344136F); + var gCrMult = Vector256.Create(-0.714136F); + var bCbMult = Vector256.Create(1.772F); + + // Used for packing. + var va = Vector256.Create(1F); + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + // y = yVals[i]; + // cb = cbVals[i] - 128F; + // cr = crVals[i] - 128F; + // k = kVals[i] / 256F; + Vector256 y = Unsafe.Add(ref yBase, i); + Vector256 cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset); + Vector256 cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset); + Vector256 k = Avx.Divide(Unsafe.Add(ref kBase, i), max); + + y = Avx2.PermuteVar8x32(y, vcontrol); + cb = Avx2.PermuteVar8x32(cb, vcontrol); + cr = Avx2.PermuteVar8x32(cr, vcontrol); + k = Avx2.PermuteVar8x32(k, vcontrol); + + // r = y + (1.402F * cr); + // g = y - (0.344136F * cb) - (0.714136F * cr); + // b = y + (1.772F * cb); + // Adding & multiplying 8 elements at one time: + Vector256 r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult); + Vector256 g = HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult); + Vector256 b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult); + + r = Avx.Subtract(max, Avx.RoundToNearestInteger(r)); + g = Avx.Subtract(max, Avx.RoundToNearestInteger(g)); + b = Avx.Subtract(max, Avx.RoundToNearestInteger(b)); + + r = Avx.Multiply(Avx.Multiply(r, k), scale); + g = Avx.Multiply(Avx.Multiply(g, k), scale); + b = Avx.Multiply(Avx.Multiply(b, k), scale); + + Vector256 vte = Avx.UnpackLow(r, b); + Vector256 vto = Avx.UnpackLow(g, va); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto); + + vte = Avx.UnpackHigh(r, b); + vto = Avx.UnpackHigh(g, va); + + Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto); + } +#else + ref Vector yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector cbBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector crBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + ref Vector kBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); + + ref Vector4Octet resultBase = + ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + + var chromaOffset = new Vector(-halfValue); + + // Walking 8 elements at one step: + int n = result.Length / 8; + + Vector4Pair rr = default; + Vector4Pair gg = default; + Vector4Pair bb = default; + + ref Vector rrRefAsVector = ref Unsafe.As>(ref rr); + ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); + ref Vector bbRefAsVector = ref Unsafe.As>(ref bb); + + var scale = new Vector(1 / maxValue); + var max = new Vector(maxValue); + + for (int i = 0; i < n; i++) + { + // y = yVals[i]; + // cb = cbVals[i] - 128F; + // cr = crVals[i] - 128F; + // k = kVals[i] / 256F; + Vector y = Unsafe.Add(ref yBase, i); + Vector cb = Unsafe.Add(ref cbBase, i) + chromaOffset; + Vector cr = Unsafe.Add(ref crBase, i) + chromaOffset; + Vector k = Unsafe.Add(ref kBase, i) / max; + + // r = y + (1.402F * cr); + // g = y - (0.344136F * cb) - (0.714136F * cr); + // b = y + (1.772F * cb); + // Adding & multiplying 8 elements at one time: + Vector r = y + (cr * new Vector(1.402F)); + Vector g = y - (cb * new Vector(0.344136F)) - (cr * new Vector(0.714136F)); + Vector b = y + (cb * new Vector(1.772F)); + + r = (max - r.FastRound()) * k; + g = (max - g.FastRound()) * k; + b = (max - b.FastRound()) * k; + r *= scale; + g *= scale; + b *= scale; + + rrRefAsVector = r; + ggRefAsVector = g; + bbRefAsVector = b; + + // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: + ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); + destination.Pack(ref rr, ref gg, ref bb); + } +#endif + } + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs index 7c780700c..c49ec7e38 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs @@ -21,17 +21,17 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { // 8-bit converters GetYCbCrConverter(8), - new FromYccK(8), - new FromCmyk(8), - new FromGrayscale(8), - new FromRgb(8), + GetYccKConverter(8), + GetCmykConverter(8), + GetGrayScaleConverter(8), + GetRgbConverter(8), // 12-bit converters GetYCbCrConverter(12), - new FromYccK(12), - new FromCmyk(12), - new FromGrayscale(12), - new FromRgb(12), + GetYccKConverter(12), + GetCmykConverter(12), + GetGrayScaleConverter(12), + GetRgbConverter(12), }; /// @@ -94,6 +94,30 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters private static JpegColorConverter GetYCbCrConverter(int precision) => FromYCbCrSimdVector8.IsAvailable ? (JpegColorConverter)new FromYCbCrSimdVector8(precision) : new FromYCbCrSimd(precision); + /// + /// Returns the for the YccK colorspace that matches the current CPU architecture. + /// + private static JpegColorConverter GetYccKConverter(int precision) => + FromYccKVector8.IsAvailable ? (JpegColorConverter)new FromYccKVector8(precision) : new FromYccKBasic(precision); + + /// + /// Returns the for the CMYK colorspace that matches the current CPU architecture. + /// + private static JpegColorConverter GetCmykConverter(int precision) => + FromCmykVector8.IsAvailable ? (JpegColorConverter)new FromCmykVector8(precision) : new FromCmykBasic(precision); + + /// + /// Returns the for the gray scale colorspace that matches the current CPU architecture. + /// + private static JpegColorConverter GetGrayScaleConverter(int precision) => + FromGrayscaleVector8.IsAvailable ? (JpegColorConverter)new FromGrayscaleVector8(precision) : new FromGrayscaleBasic(precision); + + /// + /// Returns the for the RGB colorspace that matches the current CPU architecture. + /// + private static JpegColorConverter GetRgbConverter(int precision) => + FromRgbVector8.IsAvailable ? (JpegColorConverter)new FromRgbVector8(precision) : new FromRgbBasic(precision); + /// /// A stack-only struct to reference the input buffers using -s. /// @@ -229,6 +253,52 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters this.V7.Z = b.B.W; this.V7.W = 1f; } + + /// + /// Pack (g0,g1...g7) vector values as (g0,g0,g0,1), (g1,g1,g1,1) ... + /// + public void Pack(ref Vector4Pair g) + { + this.V0.X = g.A.X; + this.V0.Y = g.A.X; + this.V0.Z = g.A.X; + this.V0.W = 1f; + + this.V1.X = g.A.Y; + this.V1.Y = g.A.Y; + this.V1.Z = g.A.Y; + this.V1.W = 1f; + + this.V2.X = g.A.Z; + this.V2.Y = g.A.Z; + this.V2.Z = g.A.Z; + this.V2.W = 1f; + + this.V3.X = g.A.W; + this.V3.Y = g.A.W; + this.V3.Z = g.A.W; + this.V3.W = 1f; + + this.V4.X = g.B.X; + this.V4.Y = g.B.X; + this.V4.Z = g.B.X; + this.V4.W = 1f; + + this.V5.X = g.B.Y; + this.V5.Y = g.B.Y; + this.V5.Z = g.B.Y; + this.V5.W = 1f; + + this.V6.X = g.B.Z; + this.V6.Y = g.B.Z; + this.V6.Z = g.B.Z; + this.V6.W = 1f; + + this.V7.X = g.B.W; + this.V7.Y = g.B.W; + this.V7.Z = g.B.W; + this.V7.W = 1f; + } } } } diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs index 860f9c396..9abe31884 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs @@ -183,7 +183,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); var expected = new Rgb(v.X, v.Y, v.Z); - Assert.Equal(expected, actual); + Assert.Equal(expected, actual, ColorSpaceComparer); Assert.Equal(1, rgba.W); } } From da4764645eb21f2a27953b1af765f09ca612b12b Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 4 Nov 2020 14:10:18 +0000 Subject: [PATCH 100/131] Unroll XYZWShuffle4Slice3 --- .../Common/Helpers/Shuffle/IShuffle4Slice3.cs | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs index 70800f9de..083b0b2a9 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs @@ -65,9 +65,27 @@ namespace SixLabors.ImageSharp ref Byte3 dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); int n = source.Length / 4; - for (int i = 0; i < n; i++) + int m = ImageMaths.Modulo4(n); + int u = n - m; + + ref Byte3 dEnd = ref Unsafe.Add(ref dBase, u); + + while (Unsafe.IsAddressLessThan(ref dBase, ref dEnd)) + { + Unsafe.Add(ref dBase, 0) = Unsafe.As(ref Unsafe.Add(ref sBase, 0)); + Unsafe.Add(ref dBase, 1) = Unsafe.As(ref Unsafe.Add(ref sBase, 1)); + Unsafe.Add(ref dBase, 2) = Unsafe.As(ref Unsafe.Add(ref sBase, 2)); + Unsafe.Add(ref dBase, 3) = Unsafe.As(ref Unsafe.Add(ref sBase, 3)); + dBase = ref Unsafe.Add(ref dBase, 4); + sBase = ref Unsafe.Add(ref sBase, 4); + } + + if (m > 0) { - Unsafe.Add(ref dBase, i) = Unsafe.As(ref Unsafe.Add(ref sBase, i)); + for (int i = u; i < n; i++) + { + Unsafe.Add(ref dBase, i) = Unsafe.As(ref Unsafe.Add(ref sBase, i)); + } } } } From 863c672be1499dff4986e5ba2a517de4f241b566 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 4 Nov 2020 16:55:17 +0000 Subject: [PATCH 101/131] Fix shuffle +m slice fallback --- .../Common/Helpers/Shuffle/IPad3Shuffle4.cs | 30 +++++++++---------- .../Common/Helpers/Shuffle/IShuffle4Slice3.cs | 18 ++++++----- .../Color/Bulk/FromVector4.cs | 2 +- 3 files changed, 26 insertions(+), 24 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs index 1f3fce541..fbd4a343d 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs @@ -72,29 +72,29 @@ namespace SixLabors.ImageSharp [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) { - ref byte rs = ref MemoryMarshal.GetReference(source); - ref byte rd = ref MemoryMarshal.GetReference(dest); + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); - ref byte rsEnd = ref Unsafe.Add(ref rs, source.Length); - ref byte rsLoopEnd = ref Unsafe.Subtract(ref rsEnd, 4); + ref byte sEnd = ref Unsafe.Add(ref sBase, source.Length); + ref byte sLoopEnd = ref Unsafe.Subtract(ref sEnd, 4); - while (Unsafe.IsAddressLessThan(ref rs, ref rsLoopEnd)) + while (Unsafe.IsAddressLessThan(ref sBase, ref sLoopEnd)) { - Unsafe.As(ref rd) = Unsafe.As(ref rs) | 0xFF000000; + Unsafe.As(ref dBase) = Unsafe.As(ref sBase) | 0xFF000000; - rs = ref Unsafe.Add(ref rs, 3); - rd = ref Unsafe.Add(ref rd, 4); + sBase = ref Unsafe.Add(ref sBase, 3); + dBase = ref Unsafe.Add(ref dBase, 4); } - while (Unsafe.IsAddressLessThan(ref rs, ref rsEnd)) + while (Unsafe.IsAddressLessThan(ref sBase, ref sEnd)) { - Unsafe.Add(ref rd, 0) = Unsafe.Add(ref rs, 0); - Unsafe.Add(ref rd, 1) = Unsafe.Add(ref rs, 1); - Unsafe.Add(ref rd, 2) = Unsafe.Add(ref rs, 2); - Unsafe.Add(ref rd, 3) = byte.MaxValue; + Unsafe.Add(ref dBase, 0) = Unsafe.Add(ref sBase, 0); + Unsafe.Add(ref dBase, 1) = Unsafe.Add(ref sBase, 1); + Unsafe.Add(ref dBase, 2) = Unsafe.Add(ref sBase, 2); + Unsafe.Add(ref dBase, 3) = byte.MaxValue; - rs = ref Unsafe.Add(ref rs, 3); - rd = ref Unsafe.Add(ref rd, 4); + sBase = ref Unsafe.Add(ref sBase, 3); + dBase = ref Unsafe.Add(ref dBase, 4); } } } diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs index 083b0b2a9..e11956c10 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs @@ -68,24 +68,26 @@ namespace SixLabors.ImageSharp int m = ImageMaths.Modulo4(n); int u = n - m; - ref Byte3 dEnd = ref Unsafe.Add(ref dBase, u); + ref uint sLoopEnd = ref Unsafe.Add(ref sBase, u); + ref uint sEnd = ref Unsafe.Add(ref sBase, n); - while (Unsafe.IsAddressLessThan(ref dBase, ref dEnd)) + while (Unsafe.IsAddressLessThan(ref sBase, ref sLoopEnd)) { Unsafe.Add(ref dBase, 0) = Unsafe.As(ref Unsafe.Add(ref sBase, 0)); Unsafe.Add(ref dBase, 1) = Unsafe.As(ref Unsafe.Add(ref sBase, 1)); Unsafe.Add(ref dBase, 2) = Unsafe.As(ref Unsafe.Add(ref sBase, 2)); Unsafe.Add(ref dBase, 3) = Unsafe.As(ref Unsafe.Add(ref sBase, 3)); - dBase = ref Unsafe.Add(ref dBase, 4); + sBase = ref Unsafe.Add(ref sBase, 4); + dBase = ref Unsafe.Add(ref dBase, 4); } - if (m > 0) + while (Unsafe.IsAddressLessThan(ref sBase, ref sEnd)) { - for (int i = u; i < n; i++) - { - Unsafe.Add(ref dBase, i) = Unsafe.As(ref Unsafe.Add(ref sBase, i)); - } + Unsafe.Add(ref dBase, 0) = Unsafe.As(ref Unsafe.Add(ref sBase, 0)); + + sBase = ref Unsafe.Add(ref sBase, 1); + dBase = ref Unsafe.Add(ref dBase, 1); } } } diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs index 1db9147ad..04ca8cd65 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs @@ -91,7 +91,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk SimdUtils.BasicIntrinsics256.NormalizedFloatToByteSaturate(sBytes, dFloats); } - [Benchmark(Baseline = true)] + [Benchmark] public void ExtendedIntrinsic() { Span sBytes = MemoryMarshal.Cast(this.source.GetSpan()); From e550caaed7dda9f59df810bf44358757b9dc5acc Mon Sep 17 00:00:00 2001 From: Nicolas Portmann Date: Wed, 4 Nov 2020 18:03:36 +0100 Subject: [PATCH 102/131] Refactor JpegColorConverters --- src/ImageSharp/Common/Helpers/SimdUtils.cs | 12 ++ ...egColorConverter.Avx2JpegColorConverter.cs | 22 ++ ...gColorConverter.BasicJpegColorConverter.cs | 18 ++ .../JpegColorConverter.FromCmykAvx2.cs | 81 ++++++++ .../JpegColorConverter.FromCmykBasic.cs | 2 +- .../JpegColorConverter.FromCmykSimdAvx2.cs | 145 ------------- .../JpegColorConverter.FromCmykVector8.cs | 71 +++++++ .../JpegColorConverter.FromGrayScaleAvx2.cs | 63 ++++++ .../JpegColorConverter.FromGrayScaleBasic.cs | 2 +- ...pegColorConverter.FromGrayScaleSimdAvx2.cs | 109 ---------- ...JpegColorConverter.FromGrayScaleVector8.cs | 53 +++++ .../JpegColorConverter.FromRgbAvx2.cs | 72 +++++++ .../JpegColorConverter.FromRgbBasic.cs | 2 +- .../JpegColorConverter.FromRgbSimdAvx2.cs | 132 ------------ .../JpegColorConverter.FromRgbVector8.cs | 67 ++++++ .../JpegColorConverter.FromYCbCrAvx2.cs | 101 +++++++++ .../JpegColorConverter.FromYCbCrBasic.cs | 4 +- .../JpegColorConverter.FromYCbCrSimdAvx2.cs | 183 ----------------- ... => JpegColorConverter.FromYCbCrVector.cs} | 30 +-- .../JpegColorConverter.FromYCbCrVector8.cs | 87 ++++++++ .../JpegColorConverter.FromYccKAvx2.cs | 110 ++++++++++ .../JpegColorConverter.FromYccKBasic.cs | 2 +- .../JpegColorConverter.FromYccKSimdAvx2.cs | 193 ------------------ .../JpegColorConverter.FromYccKVector8.cs | 91 +++++++++ ...olorConverter.Vector8JpegColorConverter.cs | 18 ++ ...rConverter.VectorizedJpegColorConverter.cs | 46 +++++ .../ColorConverters/JpegColorConverter.cs | 99 ++++++--- .../Codecs/Jpeg/YCbCrColorConversion.cs | 14 +- .../Formats/Jpg/JpegColorConverterTests.cs | 68 +++--- 29 files changed, 1042 insertions(+), 855 deletions(-) create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Avx2JpegColorConverter.cs create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.BasicJpegColorConverter.cs create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs delete mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykSimdAvx2.cs create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykVector8.cs create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs delete mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleSimdAvx2.cs create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleVector8.cs create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbAvx2.cs delete mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbSimdAvx2.cs create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbVector8.cs create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrAvx2.cs delete mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs rename src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/{JpegColorConverter.FromYCbCrSimd.cs => JpegColorConverter.FromYCbCrVector.cs} (78%) create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector8.cs create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKAvx2.cs delete mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKSimdAvx2.cs create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKVector8.cs create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Vector8JpegColorConverter.cs create mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.VectorizedJpegColorConverter.cs diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.cs b/src/ImageSharp/Common/Helpers/SimdUtils.cs index df533cedf..073a5a9f6 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.cs @@ -25,6 +25,18 @@ namespace SixLabors.ImageSharp public static bool HasVector8 { get; } = Vector.IsHardwareAccelerated && Vector.Count == 8 && Vector.Count == 8; + public static bool HasAvx2 + { + get + { +#if SUPPORTS_RUNTIME_INTRINSICS + return Avx2.IsSupported; +#else + return false; +#endif + } + } + /// /// Transform all scalars in 'v' in a way that converting them to would have rounding semantics. /// diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Avx2JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Avx2JpegColorConverter.cs new file mode 100644 index 000000000..ef144fc1c --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Avx2JpegColorConverter.cs @@ -0,0 +1,22 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics.X86; +#endif + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal abstract class Avx2JpegColorConverter : VectorizedJpegColorConverter + { + protected Avx2JpegColorConverter(JpegColorSpace colorSpace, int precision) + : base(colorSpace, precision, 8) + { + } + + protected sealed override bool IsAvailable => SimdUtils.HasAvx2; + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.BasicJpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.BasicJpegColorConverter.cs new file mode 100644 index 000000000..ed2e2cd76 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.BasicJpegColorConverter.cs @@ -0,0 +1,18 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal abstract class BasicJpegColorConverter : JpegColorConverter + { + protected BasicJpegColorConverter(JpegColorSpace colorSpace, int precision) + : base(colorSpace, precision) + { + } + + protected override bool IsAvailable => true; + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs new file mode 100644 index 000000000..a9e3e5b51 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs @@ -0,0 +1,81 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#endif + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromCmykAvx2 : Avx2JpegColorConverter + { + public FromCmykAvx2(int precision) + : base(JpegColorSpace.Cmyk, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 cBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector256 mBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector256 yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + ref Vector256 kBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); + + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var scale = Vector256.Create(1 / this.MaximumValue); + var one = Vector256.Create(1F); + + // Used for packing + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector256 k = Avx2.PermuteVar8x32(Unsafe.Add(ref kBase, i), vcontrol); + Vector256 c = Avx2.PermuteVar8x32(Unsafe.Add(ref cBase, i), vcontrol); + Vector256 m = Avx2.PermuteVar8x32(Unsafe.Add(ref mBase, i), vcontrol); + Vector256 y = Avx2.PermuteVar8x32(Unsafe.Add(ref yBase, i), vcontrol); + + k = Avx.Multiply(k, scale); + + c = Avx.Multiply(Avx.Multiply(c, k), scale); + m = Avx.Multiply(Avx.Multiply(m, k), scale); + y = Avx.Multiply(Avx.Multiply(y, k), scale); + + Vector256 cmLo = Avx.UnpackLow(c, m); + Vector256 yoLo = Avx.UnpackLow(y, one); + Vector256 cmHi = Avx.UnpackHigh(c, m); + Vector256 yoHi = Avx.UnpackHigh(y, one); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.Shuffle(cmLo, yoLo, 0b01_00_01_00); + Unsafe.Add(ref destination, 1) = Avx.Shuffle(cmLo, yoLo, 0b11_10_11_10); + Unsafe.Add(ref destination, 2) = Avx.Shuffle(cmHi, yoHi, 0b01_00_01_00); + Unsafe.Add(ref destination, 3) = Avx.Shuffle(cmHi, yoHi, 0b11_10_11_10); + } +#endif + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromCmykBasic.ConvertCore(values, result, this.MaximumValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs index 117bb2c3f..6cbd52ec3 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykBasic.cs @@ -8,7 +8,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromCmykBasic : JpegColorConverter + internal sealed class FromCmykBasic : BasicJpegColorConverter { public FromCmykBasic(int precision) : base(JpegColorSpace.Cmyk, precision) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykSimdAvx2.cs deleted file mode 100644 index afd36073a..000000000 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykSimdAvx2.cs +++ /dev/null @@ -1,145 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -using System; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -#if SUPPORTS_RUNTIME_INTRINSICS -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; -using static SixLabors.ImageSharp.SimdUtils; -#else -using SixLabors.ImageSharp.Tuples; -#endif - -namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters -{ - internal abstract partial class JpegColorConverter - { - internal sealed class FromCmykVector8 : JpegColorConverter - { - public FromCmykVector8(int precision) - : base(JpegColorSpace.Cmyk, precision) - { - } - - public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8; - - public override void ConvertToRgba(in ComponentValues values, Span result) - { - int remainder = result.Length % 8; - int simdCount = result.Length - remainder; - if (simdCount > 0) - { - ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue); - } - - FromCmykBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue); - } - - internal static void ConvertCore(in ComponentValues values, Span result, float maxValue) - { - // This implementation is actually AVX specific. - // An AVX register is capable of storing 8 float-s. - if (!IsAvailable) - { - throw new InvalidOperationException( - "JpegColorConverter.FromGrayscaleVector8 can be used only on architecture having 256 byte floating point SIMD registers!"); - } - -#if SUPPORTS_RUNTIME_INTRINSICS - ref Vector256 cBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); - ref Vector256 mBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); - ref Vector256 yBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); - ref Vector256 kBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); - - ref Vector256 resultBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); - - // Used for the color conversion - var scale = Vector256.Create(1 / maxValue); - var one = Vector256.Create(1F); - - // Used for packing - ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); - Vector256 vcontrol = Unsafe.As>(ref control); - - int n = result.Length / 8; - for (int i = 0; i < n; i++) - { - Vector256 k = Avx2.PermuteVar8x32(Unsafe.Add(ref kBase, i), vcontrol); - Vector256 c = Avx2.PermuteVar8x32(Unsafe.Add(ref cBase, i), vcontrol); - Vector256 m = Avx2.PermuteVar8x32(Unsafe.Add(ref mBase, i), vcontrol); - Vector256 y = Avx2.PermuteVar8x32(Unsafe.Add(ref yBase, i), vcontrol); - - k = Avx.Multiply(k, scale); - - c = Avx.Multiply(Avx.Multiply(c, k), scale); - m = Avx.Multiply(Avx.Multiply(m, k), scale); - y = Avx.Multiply(Avx.Multiply(y, k), scale); - - Vector256 cmLo = Avx.UnpackLow(c, m); - Vector256 yoLo = Avx.UnpackLow(y, one); - Vector256 cmHi = Avx.UnpackHigh(c, m); - Vector256 yoHi = Avx.UnpackHigh(y, one); - - ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); - - destination = Avx.Shuffle(cmLo, yoLo, 0b01_00_01_00); - Unsafe.Add(ref destination, 1) = Avx.Shuffle(cmLo, yoLo, 0b11_10_11_10); - Unsafe.Add(ref destination, 2) = Avx.Shuffle(cmHi, yoHi, 0b01_00_01_00); - Unsafe.Add(ref destination, 3) = Avx.Shuffle(cmHi, yoHi, 0b11_10_11_10); - } -#else - ref Vector cBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); - ref Vector mBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); - ref Vector yBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); - ref Vector kBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); - - ref Vector4Octet resultBase = - ref Unsafe.As(ref MemoryMarshal.GetReference(result)); - - Vector4Pair cc = default; - Vector4Pair mm = default; - Vector4Pair yy = default; - ref Vector ccRefAsVector = ref Unsafe.As>(ref cc); - ref Vector mmRefAsVector = ref Unsafe.As>(ref mm); - ref Vector yyRefAsVector = ref Unsafe.As>(ref yy); - - var scale = new Vector(1 / maxValue); - - // Walking 8 elements at one step: - int n = result.Length / 8; - for (int i = 0; i < n; i++) - { - Vector c = Unsafe.Add(ref cBase, i); - Vector m = Unsafe.Add(ref mBase, i); - Vector y = Unsafe.Add(ref yBase, i); - Vector k = Unsafe.Add(ref kBase, i) * scale; - - c = (c * k) * scale; - m = (m * k) * scale; - y = (y * k) * scale; - - ccRefAsVector = c; - mmRefAsVector = m; - yyRefAsVector = y; - - // Collect (c0,c1...c8) (m0,m1...m8) (y0,y1...y8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: - ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); - destination.Pack(ref cc, ref mm, ref yy); - } -#endif - } - } - } -} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykVector8.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykVector8.cs new file mode 100644 index 000000000..e75634b0f --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykVector8.cs @@ -0,0 +1,71 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Tuples; + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromCmykVector8 : Vector8JpegColorConverter + { + public FromCmykVector8(int precision) + : base(JpegColorSpace.Cmyk, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { + ref Vector cBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector mBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + ref Vector kBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); + + ref Vector4Octet resultBase = + ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + + Vector4Pair cc = default; + Vector4Pair mm = default; + Vector4Pair yy = default; + ref Vector ccRefAsVector = ref Unsafe.As>(ref cc); + ref Vector mmRefAsVector = ref Unsafe.As>(ref mm); + ref Vector yyRefAsVector = ref Unsafe.As>(ref yy); + + var scale = new Vector(1 / this.MaximumValue); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector c = Unsafe.Add(ref cBase, i); + Vector m = Unsafe.Add(ref mBase, i); + Vector y = Unsafe.Add(ref yBase, i); + Vector k = Unsafe.Add(ref kBase, i) * scale; + + c = (c * k) * scale; + m = (m * k) * scale; + y = (y * k) * scale; + + ccRefAsVector = c; + mmRefAsVector = m; + yyRefAsVector = y; + + // Collect (c0,c1...c8) (m0,m1...m8) (y0,y1...y8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: + ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); + destination.Pack(ref cc, ref mm, ref yy); + } + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromCmykBasic.ConvertCore(values, result, this.MaximumValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs new file mode 100644 index 000000000..aebffc3df --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs @@ -0,0 +1,63 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#endif + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromGrayscaleAvx2 : Avx2JpegColorConverter + { + public FromGrayscaleAvx2(int precision) + : base(JpegColorSpace.Grayscale, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 gBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var scale = Vector256.Create(1 / this.MaximumValue); + var one = Vector256.Create(1F); + + // Used for packing + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector256 g = Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol); + + g = Avx.Multiply(g, scale); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.Blend(Avx.Permute(g, 0b00_00_00_00), one, 0b1000_1000); + Unsafe.Add(ref destination, 1) = Avx.Blend(Avx.Permute(g, 0b01_01_01_01), one, 0b1000_1000); + Unsafe.Add(ref destination, 2) = Avx.Blend(Avx.Permute(g, 0b10_10_10_10), one, 0b1000_1000); + Unsafe.Add(ref destination, 3) = Avx.Blend(Avx.Permute(g, 0b11_11_11_11), one, 0b1000_1000); + } +#endif + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromGrayscaleBasic.ConvertCore(values, result, this.MaximumValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs index 459a33a96..0b7a220d9 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleBasic.cs @@ -10,7 +10,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromGrayscaleBasic : JpegColorConverter + internal sealed class FromGrayscaleBasic : BasicJpegColorConverter { public FromGrayscaleBasic(int precision) : base(JpegColorSpace.Grayscale, precision) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleSimdAvx2.cs deleted file mode 100644 index c25057c6f..000000000 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleSimdAvx2.cs +++ /dev/null @@ -1,109 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -using System; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -#if SUPPORTS_RUNTIME_INTRINSICS -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; -using static SixLabors.ImageSharp.SimdUtils; -#else -using SixLabors.ImageSharp.Tuples; -#endif - -namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters -{ - internal abstract partial class JpegColorConverter - { - internal sealed class FromGrayscaleVector8 : JpegColorConverter - { - public FromGrayscaleVector8(int precision) - : base(JpegColorSpace.Grayscale, precision) - { - } - - public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8; - - public override void ConvertToRgba(in ComponentValues values, Span result) - { - int remainder = result.Length % 8; - int simdCount = result.Length - remainder; - if (simdCount > 0) - { - ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue); - } - - FromGrayscaleBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue); - } - - internal static void ConvertCore(in ComponentValues values, Span result, float maxValue) - { - // This implementation is actually AVX specific. - // An AVX register is capable of storing 8 float-s. - if (!IsAvailable) - { - throw new InvalidOperationException( - "JpegColorConverter.FromGrayscaleVector8 can be used only on architecture having 256 byte floating point SIMD registers!"); - } - -#if SUPPORTS_RUNTIME_INTRINSICS - ref Vector256 gBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); - - ref Vector256 resultBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); - - // Used for the color conversion - var scale = Vector256.Create(1 / maxValue); - var one = Vector256.Create(1F); - - // Used for packing - ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); - Vector256 vcontrol = Unsafe.As>(ref control); - - int n = result.Length / 8; - for (int i = 0; i < n; i++) - { - Vector256 g = Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol); - - g = Avx.Multiply(g, scale); - - ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); - - destination = Avx.Blend(Avx.Permute(g, 0b00_00_00_00), one, 0b1000_1000); - Unsafe.Add(ref destination, 1) = Avx.Blend(Avx.Permute(g, 0b01_01_01_01), one, 0b1000_1000); - Unsafe.Add(ref destination, 2) = Avx.Blend(Avx.Permute(g, 0b10_10_10_10), one, 0b1000_1000); - Unsafe.Add(ref destination, 3) = Avx.Blend(Avx.Permute(g, 0b11_11_11_11), one, 0b1000_1000); - } -#else - ref Vector gBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); - - ref Vector4Octet resultBase = - ref Unsafe.As(ref MemoryMarshal.GetReference(result)); - - Vector4Pair gg = default; - ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); - - var scale = new Vector(1 / maxValue); - - // Walking 8 elements at one step: - int n = result.Length / 8; - for (int i = 0; i < n; i++) - { - Vector g = Unsafe.Add(ref gBase, i); - g *= scale; - - ggRefAsVector = g; - - // Collect (g0,g1...g7) vector values in the expected (g0,g0,g0,1), (g1,g1,g1,1) ... order: - ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); - destination.Pack(ref gg); - } -#endif - } - } - } -} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleVector8.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleVector8.cs new file mode 100644 index 000000000..75ea60101 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleVector8.cs @@ -0,0 +1,53 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Tuples; + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromGrayscaleVector8 : Vector8JpegColorConverter + { + public FromGrayscaleVector8(int precision) + : base(JpegColorSpace.Grayscale, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { + ref Vector gBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + + ref Vector4Octet resultBase = + ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + + Vector4Pair gg = default; + ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); + + var scale = new Vector(1 / this.MaximumValue); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector g = Unsafe.Add(ref gBase, i); + g *= scale; + + ggRefAsVector = g; + + // Collect (g0,g1...g7) vector values in the expected (g0,g0,g0,1), (g1,g1,g1,1) ... order: + ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); + destination.Pack(ref gg); + } + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromGrayscaleBasic.ConvertCore(values, result, this.MaximumValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbAvx2.cs new file mode 100644 index 000000000..8f04c9152 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbAvx2.cs @@ -0,0 +1,72 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#endif + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromRgbAvx2 : Avx2JpegColorConverter + { + public FromRgbAvx2(int precision) + : base(JpegColorSpace.RGB, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 rBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector256 gBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector256 bBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var scale = Vector256.Create(1 / this.MaximumValue); + var one = Vector256.Create(1F); + + // Used for packing + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector256 r = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref rBase, i), vcontrol), scale); + Vector256 g = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol), scale); + Vector256 b = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref bBase, i), vcontrol), scale); + + Vector256 rgLo = Avx.UnpackLow(r, g); + Vector256 boLo = Avx.UnpackLow(b, one); + Vector256 rgHi = Avx.UnpackHigh(r, g); + Vector256 boHi = Avx.UnpackHigh(b, one); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.Shuffle(rgLo, boLo, 0b01_00_01_00); + Unsafe.Add(ref destination, 1) = Avx.Shuffle(rgLo, boLo, 0b11_10_11_10); + Unsafe.Add(ref destination, 2) = Avx.Shuffle(rgHi, boHi, 0b01_00_01_00); + Unsafe.Add(ref destination, 3) = Avx.Shuffle(rgHi, boHi, 0b11_10_11_10); + } +#endif + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromRgbBasic.ConvertCore(values, result, this.MaximumValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs index f9faf1435..ddca3fe2f 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbBasic.cs @@ -8,7 +8,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromRgbBasic : JpegColorConverter + internal sealed class FromRgbBasic : BasicJpegColorConverter { public FromRgbBasic(int precision) : base(JpegColorSpace.RGB, precision) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbSimdAvx2.cs deleted file mode 100644 index ebaa51211..000000000 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbSimdAvx2.cs +++ /dev/null @@ -1,132 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -using System; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -#if SUPPORTS_RUNTIME_INTRINSICS -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; -using static SixLabors.ImageSharp.SimdUtils; -#else -using SixLabors.ImageSharp.Tuples; -#endif - -namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters -{ - internal abstract partial class JpegColorConverter - { - internal sealed class FromRgbVector8 : JpegColorConverter - { - public FromRgbVector8(int precision) - : base(JpegColorSpace.RGB, precision) - { - } - - public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8; - - public override void ConvertToRgba(in ComponentValues values, Span result) - { - int remainder = result.Length % 8; - int simdCount = result.Length - remainder; - if (simdCount > 0) - { - ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue); - } - - FromRgbBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue); - } - - internal static void ConvertCore(in ComponentValues values, Span result, float maxValue) - { - // This implementation is actually AVX specific. - // An AVX register is capable of storing 8 float-s. - if (!IsAvailable) - { - throw new InvalidOperationException( - "JpegColorConverter.FromGrayscaleVector8 can be used only on architecture having 256 byte floating point SIMD registers!"); - } - -#if SUPPORTS_RUNTIME_INTRINSICS - ref Vector256 rBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); - ref Vector256 gBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); - ref Vector256 bBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); - - ref Vector256 resultBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); - - // Used for the color conversion - var scale = Vector256.Create(1 / maxValue); - var one = Vector256.Create(1F); - - // Used for packing - ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); - Vector256 vcontrol = Unsafe.As>(ref control); - - int n = result.Length / 8; - for (int i = 0; i < n; i++) - { - Vector256 r = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref rBase, i), vcontrol), scale); - Vector256 g = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol), scale); - Vector256 b = Avx.Multiply(Avx2.PermuteVar8x32(Unsafe.Add(ref bBase, i), vcontrol), scale); - - Vector256 rgLo = Avx.UnpackLow(r, g); - Vector256 boLo = Avx.UnpackLow(b, one); - Vector256 rgHi = Avx.UnpackHigh(r, g); - Vector256 boHi = Avx.UnpackHigh(b, one); - - ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); - - destination = Avx.Shuffle(rgLo, boLo, 0b01_00_01_00); - Unsafe.Add(ref destination, 1) = Avx.Shuffle(rgLo, boLo, 0b11_10_11_10); - Unsafe.Add(ref destination, 2) = Avx.Shuffle(rgHi, boHi, 0b01_00_01_00); - Unsafe.Add(ref destination, 3) = Avx.Shuffle(rgHi, boHi, 0b11_10_11_10); - } -#else - ref Vector rBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); - ref Vector gBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); - ref Vector bBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); - - ref Vector4Octet resultBase = - ref Unsafe.As(ref MemoryMarshal.GetReference(result)); - - Vector4Pair rr = default; - Vector4Pair gg = default; - Vector4Pair bb = default; - ref Vector rrRefAsVector = ref Unsafe.As>(ref rr); - ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); - ref Vector bbRefAsVector = ref Unsafe.As>(ref bb); - - var scale = new Vector(1 / maxValue); - - // Walking 8 elements at one step: - int n = result.Length / 8; - for (int i = 0; i < n; i++) - { - Vector r = Unsafe.Add(ref rBase, i); - Vector g = Unsafe.Add(ref gBase, i); - Vector b = Unsafe.Add(ref bBase, i); - r *= scale; - g *= scale; - b *= scale; - - rrRefAsVector = r; - ggRefAsVector = g; - bbRefAsVector = b; - - // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: - ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); - destination.Pack(ref rr, ref gg, ref bb); - } -#endif - } - } - } -} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbVector8.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbVector8.cs new file mode 100644 index 000000000..763064d1e --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromRgbVector8.cs @@ -0,0 +1,67 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Tuples; + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromRgbVector8 : Vector8JpegColorConverter + { + public FromRgbVector8(int precision) + : base(JpegColorSpace.RGB, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { + ref Vector rBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector gBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector bBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + + ref Vector4Octet resultBase = + ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + + Vector4Pair rr = default; + Vector4Pair gg = default; + Vector4Pair bb = default; + ref Vector rrRefAsVector = ref Unsafe.As>(ref rr); + ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); + ref Vector bbRefAsVector = ref Unsafe.As>(ref bb); + + var scale = new Vector(1 / this.MaximumValue); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + Vector r = Unsafe.Add(ref rBase, i); + Vector g = Unsafe.Add(ref gBase, i); + Vector b = Unsafe.Add(ref bBase, i); + r *= scale; + g *= scale; + b *= scale; + + rrRefAsVector = r; + ggRefAsVector = g; + bbRefAsVector = b; + + // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: + ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); + destination.Pack(ref rr, ref gg, ref bb); + } + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromRgbBasic.ConvertCore(values, result, this.MaximumValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrAvx2.cs new file mode 100644 index 000000000..f3a063620 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrAvx2.cs @@ -0,0 +1,101 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#endif + +// ReSharper disable ImpureMethodCallOnReadonlyValueField +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromYCbCrAvx2 : Avx2JpegColorConverter + { + public FromYCbCrAvx2(int precision) + : base(JpegColorSpace.YCbCr, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { + #if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector256 cbBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector256 crBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var chromaOffset = Vector256.Create(-this.HalfValue); + var scale = Vector256.Create(1 / this.MaximumValue); + var rCrMult = Vector256.Create(1.402F); + var gCbMult = Vector256.Create(-0.344136F); + var gCrMult = Vector256.Create(-0.714136F); + var bCbMult = Vector256.Create(1.772F); + + // Used for packing. + var va = Vector256.Create(1F); + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + // y = yVals[i]; + // cb = cbVals[i] - 128F; + // cr = crVals[i] - 128F; + Vector256 y = Unsafe.Add(ref yBase, i); + Vector256 cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset); + Vector256 cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset); + + y = Avx2.PermuteVar8x32(y, vcontrol); + cb = Avx2.PermuteVar8x32(cb, vcontrol); + cr = Avx2.PermuteVar8x32(cr, vcontrol); + + // r = y + (1.402F * cr); + // g = y - (0.344136F * cb) - (0.714136F * cr); + // b = y + (1.772F * cb); + // Adding & multiplying 8 elements at one time: + Vector256 r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult); + Vector256 g = HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult); + Vector256 b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult); + + // TODO: We should be saving to RGBA not Vector4 + r = Avx.Multiply(Avx.RoundToNearestInteger(r), scale); + g = Avx.Multiply(Avx.RoundToNearestInteger(g), scale); + b = Avx.Multiply(Avx.RoundToNearestInteger(b), scale); + + Vector256 vte = Avx.UnpackLow(r, b); + Vector256 vto = Avx.UnpackLow(g, va); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto); + + vte = Avx.UnpackHigh(r, b); + vto = Avx.UnpackHigh(g, va); + + Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto); + } +#endif + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromYCbCrBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrBasic.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrBasic.cs index 31fc05461..352e4acb7 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrBasic.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrBasic.cs @@ -8,7 +8,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromYCbCrBasic : JpegColorConverter + internal sealed class FromYCbCrBasic : BasicJpegColorConverter { public FromYCbCrBasic(int precision) : base(JpegColorSpace.YCbCr, precision) @@ -48,4 +48,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters } } } -} \ No newline at end of file +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs deleted file mode 100644 index 05258fb9b..000000000 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs +++ /dev/null @@ -1,183 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -using System; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -#if SUPPORTS_RUNTIME_INTRINSICS -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; -using static SixLabors.ImageSharp.SimdUtils; -#else -using SixLabors.ImageSharp.Tuples; -#endif - -// ReSharper disable ImpureMethodCallOnReadonlyValueField -namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters -{ - internal abstract partial class JpegColorConverter - { - internal sealed class FromYCbCrSimdVector8 : JpegColorConverter - { - public FromYCbCrSimdVector8(int precision) - : base(JpegColorSpace.YCbCr, precision) - { - } - - public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8; - - public override void ConvertToRgba(in ComponentValues values, Span result) - { - int remainder = result.Length % 8; - int simdCount = result.Length - remainder; - if (simdCount > 0) - { - ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue, this.HalfValue); - } - - FromYCbCrBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue, this.HalfValue); - } - - /// - /// SIMD convert using buffers of sizes divisible by 8. - /// - internal static void ConvertCore(in ComponentValues values, Span result, float maxValue, float halfValue) - { - // This implementation is actually AVX specific. - // An AVX register is capable of storing 8 float-s. - if (!IsAvailable) - { - throw new InvalidOperationException( - "JpegColorConverter.FromYCbCrSimd256 can be used only on architecture having 256 byte floating point SIMD registers!"); - } - -#if SUPPORTS_RUNTIME_INTRINSICS - ref Vector256 yBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); - ref Vector256 cbBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); - ref Vector256 crBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); - - ref Vector256 resultBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); - - // Used for the color conversion - var chromaOffset = Vector256.Create(-halfValue); - var scale = Vector256.Create(1 / maxValue); - var rCrMult = Vector256.Create(1.402F); - var gCbMult = Vector256.Create(-0.344136F); - var gCrMult = Vector256.Create(-0.714136F); - var bCbMult = Vector256.Create(1.772F); - - // Used for packing. - var va = Vector256.Create(1F); - ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); - Vector256 vcontrol = Unsafe.As>(ref control); - - // Walking 8 elements at one step: - int n = result.Length / 8; - for (int i = 0; i < n; i++) - { - // y = yVals[i]; - // cb = cbVals[i] - 128F; - // cr = crVals[i] - 128F; - Vector256 y = Unsafe.Add(ref yBase, i); - Vector256 cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset); - Vector256 cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset); - - y = Avx2.PermuteVar8x32(y, vcontrol); - cb = Avx2.PermuteVar8x32(cb, vcontrol); - cr = Avx2.PermuteVar8x32(cr, vcontrol); - - // r = y + (1.402F * cr); - // g = y - (0.344136F * cb) - (0.714136F * cr); - // b = y + (1.772F * cb); - // Adding & multiplying 8 elements at one time: - Vector256 r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult); - Vector256 g = HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult); - Vector256 b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult); - - // TODO: We should be saving to RGBA not Vector4 - r = Avx.Multiply(Avx.RoundToNearestInteger(r), scale); - g = Avx.Multiply(Avx.RoundToNearestInteger(g), scale); - b = Avx.Multiply(Avx.RoundToNearestInteger(b), scale); - - Vector256 vte = Avx.UnpackLow(r, b); - Vector256 vto = Avx.UnpackLow(g, va); - - ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); - - destination = Avx.UnpackLow(vte, vto); - Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto); - - vte = Avx.UnpackHigh(r, b); - vto = Avx.UnpackHigh(g, va); - - Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto); - Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto); - } -#else - ref Vector yBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); - ref Vector cbBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); - ref Vector crBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); - - ref Vector4Octet resultBase = - ref Unsafe.As(ref MemoryMarshal.GetReference(result)); - - var chromaOffset = new Vector(-halfValue); - - // Walking 8 elements at one step: - int n = result.Length / 8; - - Vector4Pair rr = default; - Vector4Pair gg = default; - Vector4Pair bb = default; - - ref Vector rrRefAsVector = ref Unsafe.As>(ref rr); - ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); - ref Vector bbRefAsVector = ref Unsafe.As>(ref bb); - - var scale = new Vector(1 / maxValue); - - for (int i = 0; i < n; i++) - { - // y = yVals[i]; - // cb = cbVals[i] - 128F; - // cr = crVals[i] - 128F; - Vector y = Unsafe.Add(ref yBase, i); - Vector cb = Unsafe.Add(ref cbBase, i) + chromaOffset; - Vector cr = Unsafe.Add(ref crBase, i) + chromaOffset; - - // r = y + (1.402F * cr); - // g = y - (0.344136F * cb) - (0.714136F * cr); - // b = y + (1.772F * cb); - // Adding & multiplying 8 elements at one time: - Vector r = y + (cr * new Vector(1.402F)); - Vector g = y - (cb * new Vector(0.344136F)) - (cr * new Vector(0.714136F)); - Vector b = y + (cb * new Vector(1.772F)); - - r = r.FastRound(); - g = g.FastRound(); - b = b.FastRound(); - r *= scale; - g *= scale; - b *= scale; - - rrRefAsVector = r; - ggRefAsVector = g; - bbRefAsVector = b; - - // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: - ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); - destination.Pack(ref rr, ref gg, ref bb); - } -#endif - } - } - } -} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimd.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector.cs similarity index 78% rename from src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimd.cs rename to src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector.cs index 541a03615..65a7c42d8 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimd.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector.cs @@ -5,36 +5,22 @@ using System; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; - using SixLabors.ImageSharp.Tuples; namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromYCbCrSimd : JpegColorConverter + internal sealed class FromYCbCrVector : VectorizedJpegColorConverter { - public FromYCbCrSimd(int precision) - : base(JpegColorSpace.YCbCr, precision) + public FromYCbCrVector(int precision) + : base(JpegColorSpace.YCbCr, precision, 8) { } - public override void ConvertToRgba(in ComponentValues values, Span result) - { - int remainder = result.Length % 8; - int simdCount = result.Length - remainder; - if (simdCount > 0) - { - ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue, this.HalfValue); - } + protected override bool IsAvailable => true; - FromYCbCrBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue, this.HalfValue); - } - - /// - /// SIMD convert using buffers of sizes divisible by 8. - /// - internal static void ConvertCore(in ComponentValues values, Span result, float maxValue, float halfValue) + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) { DebugGuard.IsTrue(result.Length % 8 == 0, nameof(result), "result.Length should be divisible by 8!"); @@ -48,7 +34,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters ref Vector4Octet resultBase = ref Unsafe.As(ref MemoryMarshal.GetReference(result)); - var chromaOffset = new Vector4(-halfValue); + var chromaOffset = new Vector4(-this.HalfValue); + var maxValue = this.MaximumValue; // Walking 8 elements at one step: int n = result.Length / 8; @@ -112,6 +99,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters destination.Pack(ref r, ref g, ref b); } } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromYCbCrBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue); } } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector8.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector8.cs new file mode 100644 index 000000000..abacf7161 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector8.cs @@ -0,0 +1,87 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Tuples; + +// ReSharper disable ImpureMethodCallOnReadonlyValueField +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromYCbCrVector8 : Vector8JpegColorConverter + { + public FromYCbCrVector8(int precision) + : base(JpegColorSpace.YCbCr, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { + ref Vector yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector cbBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector crBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + + ref Vector4Octet resultBase = + ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + + var chromaOffset = new Vector(-this.HalfValue); + + // Walking 8 elements at one step: + int n = result.Length / 8; + + Vector4Pair rr = default; + Vector4Pair gg = default; + Vector4Pair bb = default; + + ref Vector rrRefAsVector = ref Unsafe.As>(ref rr); + ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); + ref Vector bbRefAsVector = ref Unsafe.As>(ref bb); + + var scale = new Vector(1 / this.MaximumValue); + + for (int i = 0; i < n; i++) + { + // y = yVals[i]; + // cb = cbVals[i] - 128F; + // cr = crVals[i] - 128F; + Vector y = Unsafe.Add(ref yBase, i); + Vector cb = Unsafe.Add(ref cbBase, i) + chromaOffset; + Vector cr = Unsafe.Add(ref crBase, i) + chromaOffset; + + // r = y + (1.402F * cr); + // g = y - (0.344136F * cb) - (0.714136F * cr); + // b = y + (1.772F * cb); + // Adding & multiplying 8 elements at one time: + Vector r = y + (cr * new Vector(1.402F)); + Vector g = y - (cb * new Vector(0.344136F)) - (cr * new Vector(0.714136F)); + Vector b = y + (cb * new Vector(1.772F)); + + r = r.FastRound(); + g = g.FastRound(); + b = b.FastRound(); + r *= scale; + g *= scale; + b *= scale; + + rrRefAsVector = r; + ggRefAsVector = g; + bbRefAsVector = b; + + // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: + ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); + destination.Pack(ref rr, ref gg, ref bb); + } + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromYCbCrBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKAvx2.cs new file mode 100644 index 000000000..ea0132e1e --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKAvx2.cs @@ -0,0 +1,110 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#endif + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromYccKAvx2 : Avx2JpegColorConverter + { + public FromYccKAvx2(int precision) + : base(JpegColorSpace.Ycck, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector256 cbBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector256 crBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + ref Vector256 kBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); + + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var chromaOffset = Vector256.Create(-this.HalfValue); + var scale = Vector256.Create(1 / this.MaximumValue); + var max = Vector256.Create(this.MaximumValue); + var rCrMult = Vector256.Create(1.402F); + var gCbMult = Vector256.Create(-0.344136F); + var gCrMult = Vector256.Create(-0.714136F); + var bCbMult = Vector256.Create(1.772F); + + // Used for packing. + var va = Vector256.Create(1F); + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + // y = yVals[i]; + // cb = cbVals[i] - 128F; + // cr = crVals[i] - 128F; + // k = kVals[i] / 256F; + Vector256 y = Unsafe.Add(ref yBase, i); + Vector256 cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset); + Vector256 cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset); + Vector256 k = Avx.Divide(Unsafe.Add(ref kBase, i), max); + + y = Avx2.PermuteVar8x32(y, vcontrol); + cb = Avx2.PermuteVar8x32(cb, vcontrol); + cr = Avx2.PermuteVar8x32(cr, vcontrol); + k = Avx2.PermuteVar8x32(k, vcontrol); + + // r = y + (1.402F * cr); + // g = y - (0.344136F * cb) - (0.714136F * cr); + // b = y + (1.772F * cb); + // Adding & multiplying 8 elements at one time: + Vector256 r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult); + Vector256 g = + HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult); + Vector256 b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult); + + r = Avx.Subtract(max, Avx.RoundToNearestInteger(r)); + g = Avx.Subtract(max, Avx.RoundToNearestInteger(g)); + b = Avx.Subtract(max, Avx.RoundToNearestInteger(b)); + + r = Avx.Multiply(Avx.Multiply(r, k), scale); + g = Avx.Multiply(Avx.Multiply(g, k), scale); + b = Avx.Multiply(Avx.Multiply(b, k), scale); + + Vector256 vte = Avx.UnpackLow(r, b); + Vector256 vto = Avx.UnpackLow(g, va); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto); + + vte = Avx.UnpackHigh(r, b); + vto = Avx.UnpackHigh(g, va); + + Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto); + } +#endif + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromYccKBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs index 100888956..778e5325f 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKBasic.cs @@ -8,7 +8,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromYccKBasic : JpegColorConverter + internal sealed class FromYccKBasic : BasicJpegColorConverter { public FromYccKBasic(int precision) : base(JpegColorSpace.Ycck, precision) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKSimdAvx2.cs deleted file mode 100644 index 8645fb8fa..000000000 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKSimdAvx2.cs +++ /dev/null @@ -1,193 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -using System; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -#if SUPPORTS_RUNTIME_INTRINSICS -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; -using static SixLabors.ImageSharp.SimdUtils; -#else -using SixLabors.ImageSharp.Tuples; -#endif - -namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters -{ - internal abstract partial class JpegColorConverter - { - internal sealed class FromYccKVector8 : JpegColorConverter - { - public FromYccKVector8(int precision) - : base(JpegColorSpace.Ycck, precision) - { - } - - public static bool IsAvailable => Vector.IsHardwareAccelerated && SimdUtils.HasVector8; - - public override void ConvertToRgba(in ComponentValues values, Span result) - { - int remainder = result.Length % 8; - int simdCount = result.Length - remainder; - if (simdCount > 0) - { - ConvertCore(values.Slice(0, simdCount), result.Slice(0, simdCount), this.MaximumValue, this.HalfValue); - } - - FromYccKBasic.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder), this.MaximumValue, this.HalfValue); - } - - internal static void ConvertCore(in ComponentValues values, Span result, float maxValue, float halfValue) - { - // This implementation is actually AVX specific. - // An AVX register is capable of storing 8 float-s. - if (!IsAvailable) - { - throw new InvalidOperationException( - "JpegColorConverter.FromYCbCrSimd256 can be used only on architecture having 256 byte floating point SIMD registers!"); - } - -#if SUPPORTS_RUNTIME_INTRINSICS - ref Vector256 yBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); - ref Vector256 cbBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); - ref Vector256 crBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); - ref Vector256 kBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); - - ref Vector256 resultBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); - - // Used for the color conversion - var chromaOffset = Vector256.Create(-halfValue); - var scale = Vector256.Create(1 / maxValue); - var max = Vector256.Create(maxValue); - var rCrMult = Vector256.Create(1.402F); - var gCbMult = Vector256.Create(-0.344136F); - var gCrMult = Vector256.Create(-0.714136F); - var bCbMult = Vector256.Create(1.772F); - - // Used for packing. - var va = Vector256.Create(1F); - ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); - Vector256 vcontrol = Unsafe.As>(ref control); - - // Walking 8 elements at one step: - int n = result.Length / 8; - for (int i = 0; i < n; i++) - { - // y = yVals[i]; - // cb = cbVals[i] - 128F; - // cr = crVals[i] - 128F; - // k = kVals[i] / 256F; - Vector256 y = Unsafe.Add(ref yBase, i); - Vector256 cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset); - Vector256 cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset); - Vector256 k = Avx.Divide(Unsafe.Add(ref kBase, i), max); - - y = Avx2.PermuteVar8x32(y, vcontrol); - cb = Avx2.PermuteVar8x32(cb, vcontrol); - cr = Avx2.PermuteVar8x32(cr, vcontrol); - k = Avx2.PermuteVar8x32(k, vcontrol); - - // r = y + (1.402F * cr); - // g = y - (0.344136F * cb) - (0.714136F * cr); - // b = y + (1.772F * cb); - // Adding & multiplying 8 elements at one time: - Vector256 r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult); - Vector256 g = HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult); - Vector256 b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult); - - r = Avx.Subtract(max, Avx.RoundToNearestInteger(r)); - g = Avx.Subtract(max, Avx.RoundToNearestInteger(g)); - b = Avx.Subtract(max, Avx.RoundToNearestInteger(b)); - - r = Avx.Multiply(Avx.Multiply(r, k), scale); - g = Avx.Multiply(Avx.Multiply(g, k), scale); - b = Avx.Multiply(Avx.Multiply(b, k), scale); - - Vector256 vte = Avx.UnpackLow(r, b); - Vector256 vto = Avx.UnpackLow(g, va); - - ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); - - destination = Avx.UnpackLow(vte, vto); - Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto); - - vte = Avx.UnpackHigh(r, b); - vto = Avx.UnpackHigh(g, va); - - Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto); - Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto); - } -#else - ref Vector yBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); - ref Vector cbBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); - ref Vector crBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); - ref Vector kBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); - - ref Vector4Octet resultBase = - ref Unsafe.As(ref MemoryMarshal.GetReference(result)); - - var chromaOffset = new Vector(-halfValue); - - // Walking 8 elements at one step: - int n = result.Length / 8; - - Vector4Pair rr = default; - Vector4Pair gg = default; - Vector4Pair bb = default; - - ref Vector rrRefAsVector = ref Unsafe.As>(ref rr); - ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); - ref Vector bbRefAsVector = ref Unsafe.As>(ref bb); - - var scale = new Vector(1 / maxValue); - var max = new Vector(maxValue); - - for (int i = 0; i < n; i++) - { - // y = yVals[i]; - // cb = cbVals[i] - 128F; - // cr = crVals[i] - 128F; - // k = kVals[i] / 256F; - Vector y = Unsafe.Add(ref yBase, i); - Vector cb = Unsafe.Add(ref cbBase, i) + chromaOffset; - Vector cr = Unsafe.Add(ref crBase, i) + chromaOffset; - Vector k = Unsafe.Add(ref kBase, i) / max; - - // r = y + (1.402F * cr); - // g = y - (0.344136F * cb) - (0.714136F * cr); - // b = y + (1.772F * cb); - // Adding & multiplying 8 elements at one time: - Vector r = y + (cr * new Vector(1.402F)); - Vector g = y - (cb * new Vector(0.344136F)) - (cr * new Vector(0.714136F)); - Vector b = y + (cb * new Vector(1.772F)); - - r = (max - r.FastRound()) * k; - g = (max - g.FastRound()) * k; - b = (max - b.FastRound()) * k; - r *= scale; - g *= scale; - b *= scale; - - rrRefAsVector = r; - ggRefAsVector = g; - bbRefAsVector = b; - - // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: - ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); - destination.Pack(ref rr, ref gg, ref bb); - } -#endif - } - } - } -} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKVector8.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKVector8.cs new file mode 100644 index 000000000..c360392de --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYccKVector8.cs @@ -0,0 +1,91 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Tuples; + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal sealed class FromYccKVector8 : Vector8JpegColorConverter + { + public FromYccKVector8(int precision) + : base(JpegColorSpace.Ycck, precision) + { + } + + protected override void ConvertCoreVectorized(in ComponentValues values, Span result) + { + ref Vector yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector cbBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector crBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + ref Vector kBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component3)); + + ref Vector4Octet resultBase = + ref Unsafe.As(ref MemoryMarshal.GetReference(result)); + + var chromaOffset = new Vector(-this.HalfValue); + + // Walking 8 elements at one step: + int n = result.Length / 8; + + Vector4Pair rr = default; + Vector4Pair gg = default; + Vector4Pair bb = default; + + ref Vector rrRefAsVector = ref Unsafe.As>(ref rr); + ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); + ref Vector bbRefAsVector = ref Unsafe.As>(ref bb); + + var scale = new Vector(1 / this.MaximumValue); + var max = new Vector(this.MaximumValue); + + for (int i = 0; i < n; i++) + { + // y = yVals[i]; + // cb = cbVals[i] - 128F; + // cr = crVals[i] - 128F; + // k = kVals[i] / 256F; + Vector y = Unsafe.Add(ref yBase, i); + Vector cb = Unsafe.Add(ref cbBase, i) + chromaOffset; + Vector cr = Unsafe.Add(ref crBase, i) + chromaOffset; + Vector k = Unsafe.Add(ref kBase, i) / max; + + // r = y + (1.402F * cr); + // g = y - (0.344136F * cb) - (0.714136F * cr); + // b = y + (1.772F * cb); + // Adding & multiplying 8 elements at one time: + Vector r = y + (cr * new Vector(1.402F)); + Vector g = y - (cb * new Vector(0.344136F)) - (cr * new Vector(0.714136F)); + Vector b = y + (cb * new Vector(1.772F)); + + r = (max - r.FastRound()) * k; + g = (max - g.FastRound()) * k; + b = (max - b.FastRound()) * k; + r *= scale; + g *= scale; + b *= scale; + + rrRefAsVector = r; + ggRefAsVector = g; + bbRefAsVector = b; + + // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: + ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); + destination.Pack(ref rr, ref gg, ref bb); + } + } + + protected override void ConvertCore(in ComponentValues values, Span result) => + FromYccKBasic.ConvertCore(values, result, this.MaximumValue, this.HalfValue); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Vector8JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Vector8JpegColorConverter.cs new file mode 100644 index 000000000..3e9b889db --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Vector8JpegColorConverter.cs @@ -0,0 +1,18 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal abstract class Vector8JpegColorConverter : VectorizedJpegColorConverter + { + protected Vector8JpegColorConverter(JpegColorSpace colorSpace, int precision) + : base(colorSpace, precision, 8) + { + } + + protected sealed override bool IsAvailable => SimdUtils.HasVector8; + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.VectorizedJpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.VectorizedJpegColorConverter.cs new file mode 100644 index 000000000..522be82c2 --- /dev/null +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.VectorizedJpegColorConverter.cs @@ -0,0 +1,46 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; + +namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters +{ + internal abstract partial class JpegColorConverter + { + internal abstract class VectorizedJpegColorConverter : JpegColorConverter + { + private readonly int vectorSize; + + protected VectorizedJpegColorConverter(JpegColorSpace colorSpace, int precision, int vectorSize) + : base(colorSpace, precision) + { + this.vectorSize = vectorSize; + } + + public sealed override void ConvertToRgba(in ComponentValues values, Span result) + { + int remainder = result.Length % this.vectorSize; + int simdCount = result.Length - remainder; + if (simdCount > 0) + { + // This implementation is actually AVX specific. + // An AVX register is capable of storing 8 float-s. + if (!this.IsAvailable) + { + throw new InvalidOperationException( + "This converter can be used only on architecture having 256 byte floating point SIMD registers!"); + } + + this.ConvertCoreVectorized(values.Slice(0, simdCount), result.Slice(0, simdCount)); + } + + this.ConvertCore(values.Slice(simdCount, remainder), result.Slice(simdCount, remainder)); + } + + protected abstract void ConvertCoreVectorized(in ComponentValues values, Span result); + + protected abstract void ConvertCore(in ComponentValues values, Span result); + } + } +} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs index c49ec7e38..7b3c11938 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Generic; +using System.Linq; using System.Numerics; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.Tuples; @@ -17,22 +18,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters /// /// The available converters /// - private static readonly JpegColorConverter[] Converters = - { - // 8-bit converters - GetYCbCrConverter(8), - GetYccKConverter(8), - GetCmykConverter(8), - GetGrayScaleConverter(8), - GetRgbConverter(8), - - // 12-bit converters - GetYCbCrConverter(12), - GetYccKConverter(12), - GetCmykConverter(12), - GetGrayScaleConverter(12), - GetRgbConverter(12), - }; + private static readonly JpegColorConverter[] Converters = CreateConverters(); /// /// Initializes a new instance of the class. @@ -45,6 +31,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters this.HalfValue = MathF.Ceiling(this.MaximumValue / 2); } + /// + /// Gets a value indicating whether this is available + /// on the current runtime and CPU architecture. + /// + protected abstract bool IsAvailable { get; } + /// /// Gets the of this converter. /// @@ -89,34 +81,79 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters public abstract void ConvertToRgba(in ComponentValues values, Span result); /// - /// Returns the for the YCbCr colorspace that matches the current CPU architecture. + /// Returns the s for all supported colorspaces and precisions. /// - private static JpegColorConverter GetYCbCrConverter(int precision) => - FromYCbCrSimdVector8.IsAvailable ? (JpegColorConverter)new FromYCbCrSimdVector8(precision) : new FromYCbCrSimd(precision); + private static JpegColorConverter[] CreateConverters() + { + var converters = new List(); + + // 8-bit converters + converters.AddRange(GetYCbCrConverters(8)); + converters.AddRange(GetYccKConverters(8)); + converters.AddRange(GetCmykConverters(8)); + converters.AddRange(GetGrayScaleConverters(8)); + converters.AddRange(GetRgbConverters(8)); + + // 8-bit converters + converters.AddRange(GetYCbCrConverters(12)); + converters.AddRange(GetYccKConverters(12)); + converters.AddRange(GetCmykConverters(12)); + converters.AddRange(GetGrayScaleConverters(12)); + converters.AddRange(GetRgbConverters(12)); + + return converters.Where(x => x.IsAvailable).ToArray(); + } /// - /// Returns the for the YccK colorspace that matches the current CPU architecture. + /// Returns the s for the YCbCr colorspace. /// - private static JpegColorConverter GetYccKConverter(int precision) => - FromYccKVector8.IsAvailable ? (JpegColorConverter)new FromYccKVector8(precision) : new FromYccKBasic(precision); + private static IEnumerable GetYCbCrConverters(int precision) + { + yield return new FromYCbCrAvx2(precision); + yield return new FromYCbCrVector8(precision); + yield return new FromYCbCrVector(precision); + yield return new FromYCbCrBasic(precision); + } /// - /// Returns the for the CMYK colorspace that matches the current CPU architecture. + /// Returns the s for the YccK colorspace. /// - private static JpegColorConverter GetCmykConverter(int precision) => - FromCmykVector8.IsAvailable ? (JpegColorConverter)new FromCmykVector8(precision) : new FromCmykBasic(precision); + private static IEnumerable GetYccKConverters(int precision) + { + yield return new FromYccKAvx2(precision); + yield return new FromYccKVector8(precision); + yield return new FromYccKBasic(precision); + } /// - /// Returns the for the gray scale colorspace that matches the current CPU architecture. + /// Returns the s for the CMYK colorspace. /// - private static JpegColorConverter GetGrayScaleConverter(int precision) => - FromGrayscaleVector8.IsAvailable ? (JpegColorConverter)new FromGrayscaleVector8(precision) : new FromGrayscaleBasic(precision); + private static IEnumerable GetCmykConverters(int precision) + { + yield return new FromCmykAvx2(precision); + yield return new FromCmykVector8(precision); + yield return new FromCmykBasic(precision); + } /// - /// Returns the for the RGB colorspace that matches the current CPU architecture. + /// Returns the s for the gray scale colorspace. /// - private static JpegColorConverter GetRgbConverter(int precision) => - FromRgbVector8.IsAvailable ? (JpegColorConverter)new FromRgbVector8(precision) : new FromRgbBasic(precision); + private static IEnumerable GetGrayScaleConverters(int precision) + { + yield return new FromGrayscaleAvx2(precision); + yield return new FromGrayscaleVector8(precision); + yield return new FromGrayscaleBasic(precision); + } + + /// + /// Returns the s for the RGB colorspace. + /// + private static IEnumerable GetRgbConverters(int precision) + { + yield return new FromRgbAvx2(precision); + yield return new FromRgbVector8(precision); + yield return new FromRgbBasic(precision); + } /// /// A stack-only struct to reference the input buffers using -s. diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs index 7b47cf94a..9b4b4568f 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs @@ -41,7 +41,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg { var values = new JpegColorConverter.ComponentValues(this.input, 0); - JpegColorConverter.FromYCbCrBasic.ConvertCore(values, this.output, 255F, 128F); + new JpegColorConverter.FromYCbCrBasic(8).ConvertToRgba(values, this.output); } [Benchmark(Baseline = true)] @@ -49,7 +49,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg { var values = new JpegColorConverter.ComponentValues(this.input, 0); - JpegColorConverter.FromYCbCrSimd.ConvertCore(values, this.output, 255F, 128F); + new JpegColorConverter.FromYCbCrVector(8).ConvertToRgba(values, this.output); } [Benchmark] @@ -57,7 +57,15 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg { var values = new JpegColorConverter.ComponentValues(this.input, 0); - JpegColorConverter.FromYCbCrSimdVector8.ConvertCore(values, this.output, 255F, 128F); + new JpegColorConverter.FromYCbCrVector8(8).ConvertToRgba(values, this.output); + } + + [Benchmark] + public void SimdVectorAvx2() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromYCbCrAvx2(8).ConvertToRgba(values, this.output); } private static Buffer2D[] CreateRandomValues( diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs index 9abe31884..50513b289 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs @@ -22,6 +22,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg private static readonly ApproximateColorSpaceComparer ColorSpaceComparer = new ApproximateColorSpaceComparer(Precision); + // int inputBufferLength, int resultBufferLength, int seed public static readonly TheoryData CommonConversionData = new TheoryData { @@ -51,44 +52,30 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg seed); } - private static void ValidateYCbCr(in JpegColorConverter.ComponentValues values, Vector4[] result, int i) + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromYCbCrVector(int inputBufferLength, int resultBufferLength, int seed) { - float y = values.Component0[i]; - float cb = values.Component1[i]; - float cr = values.Component2[i]; - var ycbcr = new YCbCr(y, cb, cr); - - Vector4 rgba = result[i]; - var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); - var expected = ColorSpaceConverter.ToRgb(ycbcr); - - Assert.Equal(expected, actual, ColorSpaceComparer); - Assert.Equal(1, rgba.W); + ValidateRgbToYCbCrConversion( + new JpegColorConverter.FromYCbCrVector(8), + 3, + inputBufferLength, + resultBufferLength, + seed); } [Theory] - [InlineData(64, 1)] - [InlineData(16, 2)] - [InlineData(8, 3)] - public void FromYCbCrSimd_ConvertCore(int size, int seed) + [MemberData(nameof(CommonConversionData))] + public void FromYCbCrVector8(int inputBufferLength, int resultBufferLength, int seed) { - JpegColorConverter.ComponentValues values = CreateRandomValues(3, size, seed); - var result = new Vector4[size]; - - JpegColorConverter.FromYCbCrSimd.ConvertCore(values, result, 255, 128); - - for (int i = 0; i < size; i++) + if (!SimdUtils.HasVector8) { - ValidateYCbCr(values, result, i); + this.Output.WriteLine("No AVX2 present, skipping test!"); + return; } - } - [Theory] - [MemberData(nameof(CommonConversionData))] - public void FromYCbCrSimd(int inputBufferLength, int resultBufferLength, int seed) - { ValidateRgbToYCbCrConversion( - new JpegColorConverter.FromYCbCrSimd(8), + new JpegColorConverter.FromYCbCrVector8(8), 3, inputBufferLength, resultBufferLength, @@ -97,9 +84,9 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg [Theory] [MemberData(nameof(CommonConversionData))] - public void FromYCbCrSimdAvx2(int inputBufferLength, int resultBufferLength, int seed) + public void FromYCbCrAvx2(int inputBufferLength, int resultBufferLength, int seed) { - if (!SimdUtils.HasVector8) + if (!SimdUtils.HasAvx2) { this.Output.WriteLine("No AVX2 present, skipping test!"); return; @@ -107,7 +94,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg // JpegColorConverter.FromYCbCrSimdAvx2.LogPlz = s => this.Output.WriteLine(s); ValidateRgbToYCbCrConversion( - new JpegColorConverter.FromYCbCrSimdVector8(8), + new JpegColorConverter.FromYCbCrAvx2(8), 3, inputBufferLength, resultBufferLength, @@ -138,7 +125,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg JpegColorConverter.ComponentValues values = CreateRandomValues(3, count, 1); var result = new Vector4[count]; - JpegColorConverter converter = simd ? (JpegColorConverter)new JpegColorConverter.FromYCbCrSimd(8) : new JpegColorConverter.FromYCbCrBasic(8); + JpegColorConverter converter = simd ? (JpegColorConverter)new JpegColorConverter.FromYCbCrVector(8) : new JpegColorConverter.FromYCbCrBasic(8); // Warm up: converter.ConvertToRgba(values, result); @@ -331,5 +318,20 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg ValidateYCbCr(values, result, i); } } + + private static void ValidateYCbCr(in JpegColorConverter.ComponentValues values, Vector4[] result, int i) + { + float y = values.Component0[i]; + float cb = values.Component1[i]; + float cr = values.Component2[i]; + var ycbcr = new YCbCr(y, cb, cr); + + Vector4 rgba = result[i]; + var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); + var expected = ColorSpaceConverter.ToRgb(ycbcr); + + Assert.Equal(expected, actual, ColorSpaceComparer); + Assert.Equal(1, rgba.W); + } } } From 382b5bc1f155a0e2f9dbc68b52704e22d3aa704b Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 4 Nov 2020 17:31:27 +0000 Subject: [PATCH 103/131] Inline controls as constants --- .../Helpers/Shuffle/IComponentShuffle.cs | 32 ++++++++++++------- .../Common/Helpers/Shuffle/IPad3Shuffle4.cs | 8 +++-- .../Common/Helpers/Shuffle/IShuffle4Slice3.cs | 8 +++-- 3 files changed, 30 insertions(+), 18 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs index 2056075e7..7687a5b95 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs @@ -82,9 +82,11 @@ namespace SixLabors.ImageSharp internal readonly struct WXYZShuffle4 : IShuffle4 { - private static readonly byte WXYZ = SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3); - - public byte Control => WXYZ; + public byte Control + { + [MethodImpl(InliningOptions.ShortMethod)] + get => SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3); + } [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) @@ -106,9 +108,11 @@ namespace SixLabors.ImageSharp internal readonly struct WZYXShuffle4 : IShuffle4 { - private static readonly byte WZYX = SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3); - - public byte Control => WZYX; + public byte Control + { + [MethodImpl(InliningOptions.ShortMethod)] + get => SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3); + } [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) @@ -130,9 +134,11 @@ namespace SixLabors.ImageSharp internal readonly struct YZWXShuffle4 : IShuffle4 { - private static readonly byte YZWX = SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1); - - public byte Control => YZWX; + public byte Control + { + [MethodImpl(InliningOptions.ShortMethod)] + get => SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1); + } [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) @@ -154,9 +160,11 @@ namespace SixLabors.ImageSharp internal readonly struct ZYXWShuffle4 : IShuffle4 { - private static readonly byte ZYXW = SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2); - - public byte Control => ZYXW; + public byte Control + { + [MethodImpl(InliningOptions.ShortMethod)] + get => SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2); + } [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs index fbd4a343d..0c2b1d508 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs @@ -65,9 +65,11 @@ namespace SixLabors.ImageSharp internal readonly struct XYZWPad3Shuffle4 : IPad3Shuffle4 { - private static readonly byte XYZW = SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0); - - public byte Control => XYZW; + public byte Control + { + [MethodImpl(InliningOptions.ShortMethod)] + get => SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0); + } [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs index e11956c10..86e4174f1 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs @@ -54,9 +54,11 @@ namespace SixLabors.ImageSharp internal readonly struct XYZWShuffle4Slice3 : IShuffle4Slice3 { - private static readonly byte XYZW = SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0); - - public byte Control => XYZW; + public byte Control + { + [MethodImpl(InliningOptions.ShortMethod)] + get => SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0); + } [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) From 328b37af4bb0058afa7ba2004e64ba028760e79c Mon Sep 17 00:00:00 2001 From: Nicolas Portmann Date: Wed, 4 Nov 2020 18:57:41 +0100 Subject: [PATCH 104/131] Refactor and add tests --- .../Formats/Jpg/JpegColorConverterTests.cs | 438 +++++++++++++----- 1 file changed, 331 insertions(+), 107 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs index 50513b289..8662a6192 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs @@ -42,9 +42,9 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg [Theory] [MemberData(nameof(CommonConversionData))] - public void ConvertFromYCbCrBasic(int inputBufferLength, int resultBufferLength, int seed) + public void FromYCbCrBasic(int inputBufferLength, int resultBufferLength, int seed) { - ValidateRgbToYCbCrConversion( + ValidateConversion( new JpegColorConverter.FromYCbCrBasic(8), 3, inputBufferLength, @@ -56,7 +56,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg [MemberData(nameof(CommonConversionData))] public void FromYCbCrVector(int inputBufferLength, int resultBufferLength, int seed) { - ValidateRgbToYCbCrConversion( + ValidateConversion( new JpegColorConverter.FromYCbCrVector(8), 3, inputBufferLength, @@ -74,7 +74,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg return; } - ValidateRgbToYCbCrConversion( + ValidateConversion( new JpegColorConverter.FromYCbCrVector8(8), 3, inputBufferLength, @@ -92,8 +92,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg return; } - // JpegColorConverter.FromYCbCrSimdAvx2.LogPlz = s => this.Output.WriteLine(s); - ValidateRgbToYCbCrConversion( + ValidateConversion( new JpegColorConverter.FromYCbCrAvx2(8), 3, inputBufferLength, @@ -103,7 +102,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg [Theory] [MemberData(nameof(CommonConversionData))] - public void ConvertFromYCbCr_WithDefaultConverter(int inputBufferLength, int resultBufferLength, int seed) + public void FromYCbCr_WithDefaultConverter(int inputBufferLength, int resultBufferLength, int seed) { ValidateConversion( JpegColorSpace.YCbCr, @@ -113,149 +112,269 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg seed); } - // Benchmark, for local execution only - // [Theory] - // [InlineData(false)] - // [InlineData(true)] - public void BenchmarkYCbCr(bool simd) + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromCmykBasic(int inputBufferLength, int resultBufferLength, int seed) { - int count = 2053; - int times = 50000; - - JpegColorConverter.ComponentValues values = CreateRandomValues(3, count, 1); - var result = new Vector4[count]; + ValidateConversion( + new JpegColorConverter.FromCmykBasic(8), + 4, + inputBufferLength, + resultBufferLength, + seed); + } - JpegColorConverter converter = simd ? (JpegColorConverter)new JpegColorConverter.FromYCbCrVector(8) : new JpegColorConverter.FromYCbCrBasic(8); + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromCmykVector8(int inputBufferLength, int resultBufferLength, int seed) + { + if (!SimdUtils.HasVector8) + { + this.Output.WriteLine("No AVX2 present, skipping test!"); + return; + } - // Warm up: - converter.ConvertToRgba(values, result); + ValidateConversion( + new JpegColorConverter.FromCmykVector8(8), + 4, + inputBufferLength, + resultBufferLength, + seed); + } - using (new MeasureGuard(this.Output, $"{converter.GetType().Name} x {times}")) + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromCmykAvx2(int inputBufferLength, int resultBufferLength, int seed) + { + if (!SimdUtils.HasAvx2) { - for (int i = 0; i < times; i++) - { - converter.ConvertToRgba(values, result); - } + this.Output.WriteLine("No AVX2 present, skipping test!"); + return; } + + ValidateConversion( + new JpegColorConverter.FromCmykAvx2(8), + 4, + inputBufferLength, + resultBufferLength, + seed); } [Theory] [MemberData(nameof(CommonConversionData))] - public void ConvertFromCmyk(int inputBufferLength, int resultBufferLength, int seed) + public void FromCmyk_WithDefaultConverter(int inputBufferLength, int resultBufferLength, int seed) { - var v = new Vector4(0, 0, 0, 1F); - var scale = new Vector4(1 / 255F, 1 / 255F, 1 / 255F, 1F); + ValidateConversion( + JpegColorSpace.Cmyk, + 4, + inputBufferLength, + resultBufferLength, + seed); + } - var converter = JpegColorConverter.GetConverter(JpegColorSpace.Cmyk, 8); - JpegColorConverter.ComponentValues values = CreateRandomValues(4, inputBufferLength, seed); - var result = new Vector4[resultBufferLength]; + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromGrayscaleBasic(int inputBufferLength, int resultBufferLength, int seed) + { + ValidateConversion( + new JpegColorConverter.FromGrayscaleBasic(8), + 1, + inputBufferLength, + resultBufferLength, + seed); + } - converter.ConvertToRgba(values, result); + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromGrayscaleVector8(int inputBufferLength, int resultBufferLength, int seed) + { + if (!SimdUtils.HasVector8) + { + this.Output.WriteLine("No AVX2 present, skipping test!"); + return; + } - for (int i = 0; i < resultBufferLength; i++) + ValidateConversion( + new JpegColorConverter.FromGrayscaleVector8(8), + 1, + inputBufferLength, + resultBufferLength, + seed); + } + + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromGrayscaleAvx2(int inputBufferLength, int resultBufferLength, int seed) + { + if (!SimdUtils.HasAvx2) { - float c = values.Component0[i]; - float m = values.Component1[i]; - float y = values.Component2[i]; - float k = values.Component3[i] / 255F; + this.Output.WriteLine("No AVX2 present, skipping test!"); + return; + } - v.X = c * k; - v.Y = m * k; - v.Z = y * k; - v.W = 1F; + ValidateConversion( + new JpegColorConverter.FromGrayscaleAvx2(8), + 1, + inputBufferLength, + resultBufferLength, + seed); + } - v *= scale; + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromGraysacle_WithDefaultConverter(int inputBufferLength, int resultBufferLength, int seed) + { + ValidateConversion( + JpegColorSpace.Grayscale, + 1, + inputBufferLength, + resultBufferLength, + seed); + } - Vector4 rgba = result[i]; - var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); - var expected = new Rgb(v.X, v.Y, v.Z); + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromRgbBasic(int inputBufferLength, int resultBufferLength, int seed) + { + ValidateConversion( + new JpegColorConverter.FromRgbBasic(8), + 3, + inputBufferLength, + resultBufferLength, + seed); + } - Assert.Equal(expected, actual, ColorSpaceComparer); - Assert.Equal(1, rgba.W); + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromRgbVector8(int inputBufferLength, int resultBufferLength, int seed) + { + if (!SimdUtils.HasVector8) + { + this.Output.WriteLine("No AVX2 present, skipping test!"); + return; } + + ValidateConversion( + new JpegColorConverter.FromRgbVector8(8), + 3, + inputBufferLength, + resultBufferLength, + seed); } [Theory] [MemberData(nameof(CommonConversionData))] - public void ConvertFromGrayScale(int inputBufferLength, int resultBufferLength, int seed) + public void FromRgbAvx2(int inputBufferLength, int resultBufferLength, int seed) { - var converter = JpegColorConverter.GetConverter(JpegColorSpace.Grayscale, 8); - JpegColorConverter.ComponentValues values = CreateRandomValues(1, inputBufferLength, seed); - var result = new Vector4[resultBufferLength]; + if (!SimdUtils.HasAvx2) + { + this.Output.WriteLine("No AVX2 present, skipping test!"); + return; + } - converter.ConvertToRgba(values, result); + ValidateConversion( + new JpegColorConverter.FromRgbAvx2(8), + 3, + inputBufferLength, + resultBufferLength, + seed); + } - for (int i = 0; i < resultBufferLength; i++) - { - float y = values.Component0[i]; - Vector4 rgba = result[i]; - var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); - var expected = new Rgb(y / 255F, y / 255F, y / 255F); + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromRgb_WithDefaultConverter(int inputBufferLength, int resultBufferLength, int seed) + { + ValidateConversion( + JpegColorSpace.RGB, + 3, + inputBufferLength, + resultBufferLength, + seed); + } - Assert.Equal(expected, actual, ColorSpaceComparer); - Assert.Equal(1, rgba.W); - } + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromYccKBasic(int inputBufferLength, int resultBufferLength, int seed) + { + ValidateConversion( + new JpegColorConverter.FromYccKBasic(8), + 4, + inputBufferLength, + resultBufferLength, + seed); } [Theory] [MemberData(nameof(CommonConversionData))] - public void ConvertFromRgb(int inputBufferLength, int resultBufferLength, int seed) + public void FromYccKVector8(int inputBufferLength, int resultBufferLength, int seed) { - var converter = JpegColorConverter.GetConverter(JpegColorSpace.RGB, 8); - JpegColorConverter.ComponentValues values = CreateRandomValues(3, inputBufferLength, seed); - var result = new Vector4[resultBufferLength]; + if (!SimdUtils.HasVector8) + { + this.Output.WriteLine("No AVX2 present, skipping test!"); + return; + } - converter.ConvertToRgba(values, result); + ValidateConversion( + new JpegColorConverter.FromYccKVector8(8), + 4, + inputBufferLength, + resultBufferLength, + seed); + } - for (int i = 0; i < resultBufferLength; i++) + [Theory] + [MemberData(nameof(CommonConversionData))] + public void FromYccKAvx2(int inputBufferLength, int resultBufferLength, int seed) + { + if (!SimdUtils.HasAvx2) { - float r = values.Component0[i]; - float g = values.Component1[i]; - float b = values.Component2[i]; - Vector4 rgba = result[i]; - var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); - var expected = new Rgb(r / 255F, g / 255F, b / 255F); - - Assert.Equal(expected, actual, ColorSpaceComparer); - Assert.Equal(1, rgba.W); + this.Output.WriteLine("No AVX2 present, skipping test!"); + return; } + + ValidateConversion( + new JpegColorConverter.FromYccKAvx2(8), + 4, + inputBufferLength, + resultBufferLength, + seed); } [Theory] [MemberData(nameof(CommonConversionData))] - public void ConvertFromYcck(int inputBufferLength, int resultBufferLength, int seed) + public void FromYcck_WithDefaultConverter(int inputBufferLength, int resultBufferLength, int seed) { - var v = new Vector4(0, 0, 0, 1F); - var scale = new Vector4(1 / 255F, 1 / 255F, 1 / 255F, 1F); + ValidateConversion( + JpegColorSpace.Ycck, + 4, + inputBufferLength, + resultBufferLength, + seed); + } - var converter = JpegColorConverter.GetConverter(JpegColorSpace.Ycck, 8); - JpegColorConverter.ComponentValues values = CreateRandomValues(4, inputBufferLength, seed); - var result = new Vector4[resultBufferLength]; + // Benchmark, for local execution only + // [Theory] + // [InlineData(false)] + // [InlineData(true)] + public void BenchmarkYCbCr(bool simd) + { + int count = 2053; + int times = 50000; + JpegColorConverter.ComponentValues values = CreateRandomValues(3, count, 1); + var result = new Vector4[count]; + + JpegColorConverter converter = simd ? (JpegColorConverter)new JpegColorConverter.FromYCbCrVector(8) : new JpegColorConverter.FromYCbCrBasic(8); + + // Warm up: converter.ConvertToRgba(values, result); - for (int i = 0; i < resultBufferLength; i++) + using (new MeasureGuard(this.Output, $"{converter.GetType().Name} x {times}")) { - float y = values.Component0[i]; - float cb = values.Component1[i] - 128F; - float cr = values.Component2[i] - 128F; - float k = values.Component3[i] / 255F; - - v.X = (255F - (float)Math.Round(y + (1.402F * cr), MidpointRounding.AwayFromZero)) * k; - v.Y = (255F - (float)Math.Round( - y - (0.344136F * cb) - (0.714136F * cr), - MidpointRounding.AwayFromZero)) * k; - v.Z = (255F - (float)Math.Round(y + (1.772F * cb), MidpointRounding.AwayFromZero)) * k; - v.W = 1F; - - v *= scale; - - Vector4 rgba = result[i]; - var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); - var expected = new Rgb(v.X, v.Y, v.Z); - - Assert.Equal(expected, actual, ColorSpaceComparer); - Assert.Equal(1, rgba.W); + for (int i = 0; i < times; i++) + { + converter.ConvertToRgba(values, result); + } } } @@ -270,7 +389,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg var buffers = new Buffer2D[componentCount]; for (int i = 0; i < componentCount; i++) { - float[] values = new float[inputBufferLength]; + var values = new float[inputBufferLength]; for (int j = 0; j < inputBufferLength; j++) { @@ -293,7 +412,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg int resultBufferLength, int seed) { - ValidateRgbToYCbCrConversion( + ValidateConversion( JpegColorConverter.GetConverter(colorSpace, 8), componentCount, inputBufferLength, @@ -301,7 +420,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg seed); } - private static void ValidateRgbToYCbCrConversion( + private static void ValidateConversion( JpegColorConverter converter, int componentCount, int inputBufferLength, @@ -315,7 +434,36 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg for (int i = 0; i < resultBufferLength; i++) { - ValidateYCbCr(values, result, i); + Validate(converter.ColorSpace, values, result, i); + } + } + + private static void Validate( + JpegColorSpace colorSpace, + in JpegColorConverter.ComponentValues values, + Vector4[] result, + int i) + { + switch (colorSpace) + { + case JpegColorSpace.Grayscale: + ValidateGrayScale(values, result, i); + break; + case JpegColorSpace.Ycck: + ValidateCyyK(values, result, i); + break; + case JpegColorSpace.Cmyk: + ValidateCmyk(values, result, i); + break; + case JpegColorSpace.RGB: + ValidateRgb(values, result, i); + break; + case JpegColorSpace.YCbCr: + ValidateYCbCr(values, result, i); + break; + default: + Assert.True(false, $"Colorspace {colorSpace} not supported!"); + break; } } @@ -333,5 +481,81 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg Assert.Equal(expected, actual, ColorSpaceComparer); Assert.Equal(1, rgba.W); } + + private static void ValidateCyyK(in JpegColorConverter.ComponentValues values, Vector4[] result, int i) + { + var v = new Vector4(0, 0, 0, 1F); + var scale = new Vector4(1 / 255F, 1 / 255F, 1 / 255F, 1F); + + float y = values.Component0[i]; + float cb = values.Component1[i] - 128F; + float cr = values.Component2[i] - 128F; + float k = values.Component3[i] / 255F; + + v.X = (255F - (float)Math.Round(y + (1.402F * cr), MidpointRounding.AwayFromZero)) * k; + v.Y = (255F - (float)Math.Round( + y - (0.344136F * cb) - (0.714136F * cr), + MidpointRounding.AwayFromZero)) * k; + v.Z = (255F - (float)Math.Round(y + (1.772F * cb), MidpointRounding.AwayFromZero)) * k; + v.W = 1F; + + v *= scale; + + Vector4 rgba = result[i]; + var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); + var expected = new Rgb(v.X, v.Y, v.Z); + + Assert.Equal(expected, actual, ColorSpaceComparer); + Assert.Equal(1, rgba.W); + } + + private static void ValidateRgb(in JpegColorConverter.ComponentValues values, Vector4[] result, int i) + { + float r = values.Component0[i]; + float g = values.Component1[i]; + float b = values.Component2[i]; + Vector4 rgba = result[i]; + var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); + var expected = new Rgb(r / 255F, g / 255F, b / 255F); + + Assert.Equal(expected, actual, ColorSpaceComparer); + Assert.Equal(1, rgba.W); + } + + private static void ValidateGrayScale(in JpegColorConverter.ComponentValues values, Vector4[] result, int i) + { + float y = values.Component0[i]; + Vector4 rgba = result[i]; + var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); + var expected = new Rgb(y / 255F, y / 255F, y / 255F); + + Assert.Equal(expected, actual, ColorSpaceComparer); + Assert.Equal(1, rgba.W); + } + + private static void ValidateCmyk(in JpegColorConverter.ComponentValues values, Vector4[] result, int i) + { + var v = new Vector4(0, 0, 0, 1F); + var scale = new Vector4(1 / 255F, 1 / 255F, 1 / 255F, 1F); + + float c = values.Component0[i]; + float m = values.Component1[i]; + float y = values.Component2[i]; + float k = values.Component3[i] / 255F; + + v.X = c * k; + v.Y = m * k; + v.Z = y * k; + v.W = 1F; + + v *= scale; + + Vector4 rgba = result[i]; + var actual = new Rgb(rgba.X, rgba.Y, rgba.Z); + var expected = new Rgb(v.X, v.Y, v.Z); + + Assert.Equal(expected, actual, ColorSpaceComparer); + Assert.Equal(1, rgba.W); + } } } From 126247422d620e8ce236c501fdd288851768fece Mon Sep 17 00:00:00 2001 From: Nicolas Portmann Date: Wed, 4 Nov 2020 21:14:54 +0100 Subject: [PATCH 105/131] Add benchmarks --- .../Codecs/Jpeg/CmykColorConversion.cs | 38 ++++++++++++ .../Codecs/Jpeg/ColorConversionBenchmark.cs | 61 +++++++++++++++++++ .../Codecs/Jpeg/GrayscaleColorConversion.cs | 38 ++++++++++++ .../Codecs/Jpeg/RgbColorConversion.cs | 38 ++++++++++++ .../Codecs/Jpeg/YCbCrColorConversion.cs | 53 ++-------------- .../Codecs/Jpeg/YccKColorConverter.cs | 38 ++++++++++++ 6 files changed, 217 insertions(+), 49 deletions(-) create mode 100644 tests/ImageSharp.Benchmarks/Codecs/Jpeg/CmykColorConversion.cs create mode 100644 tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversionBenchmark.cs create mode 100644 tests/ImageSharp.Benchmarks/Codecs/Jpeg/GrayscaleColorConversion.cs create mode 100644 tests/ImageSharp.Benchmarks/Codecs/Jpeg/RgbColorConversion.cs create mode 100644 tests/ImageSharp.Benchmarks/Codecs/Jpeg/YccKColorConverter.cs diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/CmykColorConversion.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/CmykColorConversion.cs new file mode 100644 index 000000000..64947408b --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/CmykColorConversion.cs @@ -0,0 +1,38 @@ +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg +{ + [Config(typeof(Config.ShortClr))] + public class CmykColorConversion : ColorConversionBenchmark + { + public CmykColorConversion() + : base(4) + { + } + + [Benchmark(Baseline = true)] + public void Scalar() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromCmykBasic(8).ConvertToRgba(values, this.output); + } + + [Benchmark] + public void SimdVector8() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromCmykVector8(8).ConvertToRgba(values, this.output); + } + + [Benchmark] + public void SimdVectorAvx2() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromCmykAvx2(8).ConvertToRgba(values, this.output); + } + } +} diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversionBenchmark.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversionBenchmark.cs new file mode 100644 index 000000000..046159128 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversionBenchmark.cs @@ -0,0 +1,61 @@ +using System; +using System.Numerics; +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Memory; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg +{ + public abstract class ColorConversionBenchmark + { + private readonly int componentCount; + protected Buffer2D[] input; + protected Vector4[] output; + + protected ColorConversionBenchmark(int componentCount) + { + this.componentCount = componentCount; + } + + public const int Count = 128; + + [GlobalSetup] + public void Setup() + { + this.input = CreateRandomValues(componentCount, Count); + this.output = new Vector4[Count]; + } + + [GlobalCleanup] + public void Cleanup() + { + foreach (Buffer2D buffer in this.input) + { + buffer.Dispose(); + } + } + + private static Buffer2D[] CreateRandomValues( + int componentCount, + int inputBufferLength, + float minVal = 0f, + float maxVal = 255f) + { + var rnd = new Random(42); + var buffers = new Buffer2D[componentCount]; + for (int i = 0; i < componentCount; i++) + { + var values = new float[inputBufferLength]; + + for (int j = 0; j < inputBufferLength; j++) + { + values[j] = ((float)rnd.NextDouble() * (maxVal - minVal)) + minVal; + } + + // no need to dispose when buffer is not array owner + buffers[i] = Configuration.Default.MemoryAllocator.Allocate2D(values.Length, 1); + } + + return buffers; + } + } +} diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/GrayscaleColorConversion.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/GrayscaleColorConversion.cs new file mode 100644 index 000000000..ee4d7b1bd --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/GrayscaleColorConversion.cs @@ -0,0 +1,38 @@ +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg +{ + [Config(typeof(Config.ShortClr))] + public class GrayscaleColorConversion : ColorConversionBenchmark + { + public GrayscaleColorConversion() + : base(1) + { + } + + [Benchmark(Baseline = true)] + public void Scalar() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromGrayscaleBasic(8).ConvertToRgba(values, this.output); + } + + [Benchmark] + public void SimdVector8() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromGrayscaleVector8(8).ConvertToRgba(values, this.output); + } + + [Benchmark] + public void SimdVectorAvx2() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromGrayscaleAvx2(8).ConvertToRgba(values, this.output); + } + } +} diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/RgbColorConversion.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/RgbColorConversion.cs new file mode 100644 index 000000000..6f20ffda2 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/RgbColorConversion.cs @@ -0,0 +1,38 @@ +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg +{ + [Config(typeof(Config.ShortClr))] + public class RgbColorConversion : ColorConversionBenchmark + { + public RgbColorConversion() + : base(3) + { + } + + [Benchmark(Baseline = true)] + public void Scalar() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromRgbBasic(8).ConvertToRgba(values, this.output); + } + + [Benchmark] + public void SimdVector8() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromRgbVector8(8).ConvertToRgba(values, this.output); + } + + [Benchmark] + public void SimdVectorAvx2() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromRgbAvx2(8).ConvertToRgba(values, this.output); + } + } +} diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs index 9b4b4568f..c17f14fe6 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs @@ -1,39 +1,18 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. -using System; -using System.Numerics; - using BenchmarkDotNet.Attributes; using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters; -using SixLabors.ImageSharp.Memory; namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg { [Config(typeof(Config.ShortClr))] - public class YCbCrColorConversion + public class YCbCrColorConversion : ColorConversionBenchmark { - private Buffer2D[] input; - - private Vector4[] output; - - public const int Count = 128; - - [GlobalSetup] - public void Setup() - { - this.input = CreateRandomValues(3, Count); - this.output = new Vector4[Count]; - } - - [GlobalCleanup] - public void Cleanup() + public YCbCrColorConversion() + : base(3) { - foreach (Buffer2D buffer in this.input) - { - buffer.Dispose(); - } } [Benchmark] @@ -45,7 +24,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg } [Benchmark(Baseline = true)] - public void SimdVector4() + public void SimdVector() { var values = new JpegColorConverter.ComponentValues(this.input, 0); @@ -67,29 +46,5 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg new JpegColorConverter.FromYCbCrAvx2(8).ConvertToRgba(values, this.output); } - - private static Buffer2D[] CreateRandomValues( - int componentCount, - int inputBufferLength, - float minVal = 0f, - float maxVal = 255f) - { - var rnd = new Random(42); - var buffers = new Buffer2D[componentCount]; - for (int i = 0; i < componentCount; i++) - { - var values = new float[inputBufferLength]; - - for (int j = 0; j < inputBufferLength; j++) - { - values[j] = ((float)rnd.NextDouble() * (maxVal - minVal)) + minVal; - } - - // no need to dispose when buffer is not array owner - buffers[i] = Configuration.Default.MemoryAllocator.Allocate2D(values.Length, 1); - } - - return buffers; - } } } diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YccKColorConverter.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YccKColorConverter.cs new file mode 100644 index 000000000..5357cecd2 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YccKColorConverter.cs @@ -0,0 +1,38 @@ +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg +{ + [Config(typeof(Config.ShortClr))] + public class YccKColorConverter : ColorConversionBenchmark + { + public YccKColorConverter() + : base(4) + { + } + + [Benchmark(Baseline = true)] + public void Scalar() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromYccKBasic(8).ConvertToRgba(values, this.output); + } + + [Benchmark] + public void SimdVector8() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromYccKVector8(8).ConvertToRgba(values, this.output); + } + + [Benchmark] + public void SimdVectorAvx2() + { + var values = new JpegColorConverter.ComponentValues(this.input, 0); + + new JpegColorConverter.FromYccKAvx2(8).ConvertToRgba(values, this.output); + } + } +} From 680459ca546a2e44827f8b813893292d56833a91 Mon Sep 17 00:00:00 2001 From: Nicolas Portmann Date: Fri, 6 Nov 2020 18:05:39 +0100 Subject: [PATCH 106/131] Drop FromGrayscaleVector8 --- ...JpegColorConverter.FromGrayScaleVector8.cs | 53 ------------------- .../ColorConverters/JpegColorConverter.cs | 1 - .../Codecs/Jpeg/GrayscaleColorConversion.cs | 8 --- .../Formats/Jpg/JpegColorConverterTests.cs | 18 ------- 4 files changed, 80 deletions(-) delete mode 100644 src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleVector8.cs diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleVector8.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleVector8.cs deleted file mode 100644 index 75ea60101..000000000 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleVector8.cs +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -using System; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using SixLabors.ImageSharp.Tuples; - -namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters -{ - internal abstract partial class JpegColorConverter - { - internal sealed class FromGrayscaleVector8 : Vector8JpegColorConverter - { - public FromGrayscaleVector8(int precision) - : base(JpegColorSpace.Grayscale, precision) - { - } - - protected override void ConvertCoreVectorized(in ComponentValues values, Span result) - { - ref Vector gBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); - - ref Vector4Octet resultBase = - ref Unsafe.As(ref MemoryMarshal.GetReference(result)); - - Vector4Pair gg = default; - ref Vector ggRefAsVector = ref Unsafe.As>(ref gg); - - var scale = new Vector(1 / this.MaximumValue); - - // Walking 8 elements at one step: - int n = result.Length / 8; - for (int i = 0; i < n; i++) - { - Vector g = Unsafe.Add(ref gBase, i); - g *= scale; - - ggRefAsVector = g; - - // Collect (g0,g1...g7) vector values in the expected (g0,g0,g0,1), (g1,g1,g1,1) ... order: - ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); - destination.Pack(ref gg); - } - } - - protected override void ConvertCore(in ComponentValues values, Span result) => - FromGrayscaleBasic.ConvertCore(values, result, this.MaximumValue); - } - } -} diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs index 7b3c11938..7afc09ff5 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs @@ -141,7 +141,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters private static IEnumerable GetGrayScaleConverters(int precision) { yield return new FromGrayscaleAvx2(precision); - yield return new FromGrayscaleVector8(precision); yield return new FromGrayscaleBasic(precision); } diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/GrayscaleColorConversion.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/GrayscaleColorConversion.cs index ee4d7b1bd..3e8da6fb8 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/GrayscaleColorConversion.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/GrayscaleColorConversion.cs @@ -19,14 +19,6 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg new JpegColorConverter.FromGrayscaleBasic(8).ConvertToRgba(values, this.output); } - [Benchmark] - public void SimdVector8() - { - var values = new JpegColorConverter.ComponentValues(this.input, 0); - - new JpegColorConverter.FromGrayscaleVector8(8).ConvertToRgba(values, this.output); - } - [Benchmark] public void SimdVectorAvx2() { diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs index 8662a6192..bcc52de14 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs @@ -184,24 +184,6 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg seed); } - [Theory] - [MemberData(nameof(CommonConversionData))] - public void FromGrayscaleVector8(int inputBufferLength, int resultBufferLength, int seed) - { - if (!SimdUtils.HasVector8) - { - this.Output.WriteLine("No AVX2 present, skipping test!"); - return; - } - - ValidateConversion( - new JpegColorConverter.FromGrayscaleVector8(8), - 1, - inputBufferLength, - resultBufferLength, - seed); - } - [Theory] [MemberData(nameof(CommonConversionData))] public void FromGrayscaleAvx2(int inputBufferLength, int resultBufferLength, int seed) From 330db036950bb29a541bd62a639684e484c58288 Mon Sep 17 00:00:00 2001 From: Nicolas Portmann Date: Fri, 6 Nov 2020 18:08:31 +0100 Subject: [PATCH 107/131] Polish benchmarks (fix new warnings) --- .../Codecs/Jpeg/CmykColorConversion.cs | 5 ++++- .../Codecs/Jpeg/ColorConversionBenchmark.cs | 7 +++++-- .../Codecs/Jpeg/GrayscaleColorConversion.cs | 5 ++++- .../Codecs/Jpeg/RgbColorConversion.cs | 5 ++++- .../Codecs/Jpeg/YccKColorConverter.cs | 5 ++++- 5 files changed, 21 insertions(+), 6 deletions(-) diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/CmykColorConversion.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/CmykColorConversion.cs index 64947408b..1e6b9fe92 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/CmykColorConversion.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/CmykColorConversion.cs @@ -1,4 +1,7 @@ -using BenchmarkDotNet.Attributes; +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters; namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversionBenchmark.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversionBenchmark.cs index 046159128..da8c51735 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversionBenchmark.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/ColorConversionBenchmark.cs @@ -1,4 +1,7 @@ -using System; +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; using System.Numerics; using BenchmarkDotNet.Attributes; using SixLabors.ImageSharp.Memory; @@ -21,7 +24,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg [GlobalSetup] public void Setup() { - this.input = CreateRandomValues(componentCount, Count); + this.input = CreateRandomValues(this.componentCount, Count); this.output = new Vector4[Count]; } diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/GrayscaleColorConversion.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/GrayscaleColorConversion.cs index 3e8da6fb8..74b3e6db6 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/GrayscaleColorConversion.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/GrayscaleColorConversion.cs @@ -1,4 +1,7 @@ -using BenchmarkDotNet.Attributes; +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters; namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/RgbColorConversion.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/RgbColorConversion.cs index 6f20ffda2..68dc0f04e 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/RgbColorConversion.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/RgbColorConversion.cs @@ -1,4 +1,7 @@ -using BenchmarkDotNet.Attributes; +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters; namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YccKColorConverter.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YccKColorConverter.cs index 5357cecd2..ed8758131 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YccKColorConverter.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YccKColorConverter.cs @@ -1,4 +1,7 @@ -using BenchmarkDotNet.Attributes; +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; using SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters; namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg From 4812a8d0a5f139ec9fd7e4280f60af49776c5920 Mon Sep 17 00:00:00 2001 From: Anton Firszov Date: Fri, 6 Nov 2020 19:30:49 +0100 Subject: [PATCH 108/131] Fix #1414 --- .../Formats/Jpg/JpegDecoderTests.cs | 31 ++++++++++++++----- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.cs index 78218aec9..d233a8eda 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.cs @@ -141,17 +141,32 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg TestEnvironment.InputImagesDirectoryFullPath, fileName); - var cts = new CancellationTokenSource(); - if (cancellationDelayMs == 0) - { - cts.Cancel(); - } - else + const int NumberOfRuns = 5; + + for (int i = 0; i < NumberOfRuns; i++) { - cts.CancelAfter(cancellationDelayMs); + var cts = new CancellationTokenSource(); + if (cancellationDelayMs == 0) + { + cts.Cancel(); + } + else + { + cts.CancelAfter(cancellationDelayMs); + } + + try + { + using var image = await Image.LoadAsync(hugeFile, cts.Token); + } + catch (TaskCanceledException) + { + // Succesfully observed a cancellation + return; + } } - await Assert.ThrowsAsync(() => Image.LoadAsync(hugeFile, cts.Token)); + throw new Exception($"No cancellation happened out of {NumberOfRuns} runs!"); } [Theory(Skip = "Identify is too fast, doesn't work reliably.")] From ded92172541f2ef6af3c44c250743218840ef70b Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 6 Nov 2020 19:49:52 +0000 Subject: [PATCH 109/131] Handle Bmp encoder padding. --- src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs | 4 ++-- tests/ImageSharp.Tests/Common/SimdUtilsTests.cs | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index 8fd6bcce6..07744566a 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -225,9 +225,9 @@ namespace SixLabors.ImageSharp "Output span must be divisable by 3!"); DebugGuard.IsTrue( - source.Length == dest.Length * 4 / 3, + dest.Length >= source.Length * 3 / 4, nameof(source), - "Output span must be 3/4 the length of the input span!"); + "Output span must be at least 3/4 the length of the input span!"); } public static class Shuffle diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs index fe432107a..ec09e43e5 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs @@ -164,8 +164,6 @@ namespace SixLabors.ImageSharp.Tests.Common public static readonly TheoryData ArraySizesDivisibleBy8 = new TheoryData { 0, 8, 16, 1024 }; public static readonly TheoryData ArraySizesDivisibleBy4 = new TheoryData { 0, 4, 8, 28, 1020 }; public static readonly TheoryData ArraySizesDivisibleBy3 = new TheoryData { 0, 3, 9, 36, 957 }; - - public static readonly TheoryData ArraySizesDivisibleBy32 = new TheoryData { 0, 32, 512 }; public static readonly TheoryData ArbitraryArraySizes = From 090158ea335e3b28f8a8d0e3c1b6a617d081a51c Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 6 Nov 2020 19:50:29 +0000 Subject: [PATCH 110/131] Update src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs Co-authored-by: Clinton Ingram --- src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 296970ddc..86ba07428 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -489,12 +489,8 @@ namespace SixLabors.ImageSharp v3 = Ssse3.Shuffle(Ssse3.Shuffle(v3, vshuffle), vmasko); v0 = Ssse3.AlignRight(v1, v0, 4); - v3 = Ssse3.AlignRight(v3, v2, 12); - - v1 = Sse2.ShiftLeftLogical128BitLane(v1, 4); - v2 = Sse2.ShiftRightLogical128BitLane(v2, 4); - - v1 = Ssse3.AlignRight(v2, v1, 8); + v1 = Sse2.Or(Sse2.ShiftRightLogical128BitLane(v1, 4), Sse2.ShiftLeftLogical128BitLane(v2, 4)); + v2 = Ssse3.AlignRight(v3, v2, 12); ref Vector128 vd = ref Unsafe.Add(ref destBase, j); From 699d8ff01d36bb1e337762501e4fe4adfbe48e71 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 6 Nov 2020 20:25:28 +0000 Subject: [PATCH 111/131] Use ROS trick all round and optimize Shuffle3 --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 36 ++++++++++--------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 86ba07428..51c81be06 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -18,6 +18,10 @@ namespace SixLabors.ImageSharp public static ReadOnlySpan PermuteMaskEvenOdd8x32 => new byte[] { 0, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0 }; + private static ReadOnlySpan ShuffleMaskPad4Nx16 => new byte[] { 0, 1, 2, 0x80, 3, 4, 5, 0x80, 6, 7, 8, 0x80, 9, 10, 11, 0x80 }; + + private static ReadOnlySpan ShuffleMaskSlice4Nx16 => new byte[] { 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 0x80, 0x80, 0x80, 0x80 }; + /// /// Shuffle single-precision (32-bit) floating-point elements in /// using the control and store the results in . @@ -352,10 +356,12 @@ namespace SixLabors.ImageSharp { if (Ssse3.IsSupported) { - Vector128 vmask = Vector128.Create(0, 1, 2, 0x80, 3, 4, 5, 0x80, 6, 7, 8, 0x80, 9, 10, 11, 0x80).AsByte(); + ref byte vmaskBase = ref MemoryMarshal.GetReference(ShuffleMaskPad4Nx16); + Vector128 vmask = Unsafe.As>(ref vmaskBase); Vector128 vfill = Vector128.Create(0xff000000ff000000ul).AsByte(); - Vector128 vmasko = Vector128.Create(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15).AsByte(); - Vector128 vmaske = Ssse3.AlignRight(vmasko, vmasko, 12).AsByte(); + ref byte vmaskoBase = ref MemoryMarshal.GetReference(ShuffleMaskSlice4Nx16); + Vector128 vmasko = Unsafe.As>(ref vmaskoBase); + Vector128 vmaske = Ssse3.AlignRight(vmasko, vmasko, 12); Span bytes = stackalloc byte[Vector128.Count]; Shuffle.MmShuffleSpan(ref bytes, control); @@ -381,10 +387,10 @@ namespace SixLabors.ImageSharp v2 = Ssse3.AlignRight(v2, v1, 8); v1 = Ssse3.AlignRight(v1, v0, 12); - v0 = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v0, vmask), vfill), vshuffle); - v1 = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v1, vmask), vfill), vshuffle); - v2 = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v2, vmask), vfill), vshuffle); - v3 = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v3, vmask), vfill), vshuffle); + v0 = Ssse3.Shuffle(Ssse3.Shuffle(v0, vmask), vshuffle); + v1 = Ssse3.Shuffle(Ssse3.Shuffle(v1, vmask), vshuffle); + v2 = Ssse3.Shuffle(Ssse3.Shuffle(v2, vmask), vshuffle); + v3 = Ssse3.Shuffle(Ssse3.Shuffle(v3, vmask), vshuffle); v0 = Ssse3.Shuffle(v0, vmaske); v1 = Ssse3.Shuffle(v1, vmasko); @@ -392,12 +398,8 @@ namespace SixLabors.ImageSharp v3 = Ssse3.Shuffle(v3, vmasko); v0 = Ssse3.AlignRight(v1, v0, 4); - v3 = Ssse3.AlignRight(v3, v2, 12); - - v1 = Sse2.ShiftLeftLogical128BitLane(v1, 4); - v2 = Sse2.ShiftRightLogical128BitLane(v2, 4); - - v1 = Ssse3.AlignRight(v2, v1, 8); + v1 = Sse2.Or(Sse2.ShiftRightLogical128BitLane(v1, 4), Sse2.ShiftLeftLogical128BitLane(v2, 4)); + v2 = Ssse3.AlignRight(v3, v2, 12); ref Vector128 vd = ref Unsafe.Add(ref destBase, i); @@ -416,7 +418,8 @@ namespace SixLabors.ImageSharp { if (Ssse3.IsSupported) { - Vector128 vmask = Vector128.Create(0, 1, 2, 0x80, 3, 4, 5, 0x80, 6, 7, 8, 0x80, 9, 10, 11, 0x80).AsByte(); + ref byte vmaskBase = ref MemoryMarshal.GetReference(ShuffleMaskPad4Nx16); + Vector128 vmask = Unsafe.As>(ref vmaskBase); Vector128 vfill = Vector128.Create(0xff000000ff000000ul).AsByte(); Span bytes = stackalloc byte[Vector128.Count]; @@ -459,8 +462,9 @@ namespace SixLabors.ImageSharp { if (Ssse3.IsSupported) { - Vector128 vmasko = Vector128.Create(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15).AsByte(); - Vector128 vmaske = Ssse3.AlignRight(vmasko, vmasko, 12).AsByte(); + ref byte vmaskoBase = ref MemoryMarshal.GetReference(ShuffleMaskSlice4Nx16); + Vector128 vmasko = Unsafe.As>(ref vmaskoBase); + Vector128 vmaske = Ssse3.AlignRight(vmasko, vmasko, 12); Span bytes = stackalloc byte[Vector128.Count]; Shuffle.MmShuffleSpan(ref bytes, control); From 3ee5a388022178397161ff50ffa97819c2d4af34 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 6 Nov 2020 20:45:39 +0000 Subject: [PATCH 112/131] Fix shuffle --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 51c81be06..2ea7f2c9b 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -358,7 +358,6 @@ namespace SixLabors.ImageSharp { ref byte vmaskBase = ref MemoryMarshal.GetReference(ShuffleMaskPad4Nx16); Vector128 vmask = Unsafe.As>(ref vmaskBase); - Vector128 vfill = Vector128.Create(0xff000000ff000000ul).AsByte(); ref byte vmaskoBase = ref MemoryMarshal.GetReference(ShuffleMaskSlice4Nx16); Vector128 vmasko = Unsafe.As>(ref vmaskoBase); Vector128 vmaske = Ssse3.AlignRight(vmasko, vmasko, 12); @@ -398,8 +397,12 @@ namespace SixLabors.ImageSharp v3 = Ssse3.Shuffle(v3, vmasko); v0 = Ssse3.AlignRight(v1, v0, 4); - v1 = Sse2.Or(Sse2.ShiftRightLogical128BitLane(v1, 4), Sse2.ShiftLeftLogical128BitLane(v2, 4)); - v2 = Ssse3.AlignRight(v3, v2, 12); + v3 = Ssse3.AlignRight(v3, v2, 12); + + v1 = Sse2.ShiftLeftLogical128BitLane(v1, 4); + v2 = Sse2.ShiftRightLogical128BitLane(v2, 4); + + v1 = Ssse3.AlignRight(v2, v1, 8); ref Vector128 vd = ref Unsafe.Add(ref destBase, i); @@ -493,8 +496,12 @@ namespace SixLabors.ImageSharp v3 = Ssse3.Shuffle(Ssse3.Shuffle(v3, vshuffle), vmasko); v0 = Ssse3.AlignRight(v1, v0, 4); - v1 = Sse2.Or(Sse2.ShiftRightLogical128BitLane(v1, 4), Sse2.ShiftLeftLogical128BitLane(v2, 4)); - v2 = Ssse3.AlignRight(v3, v2, 12); + v3 = Ssse3.AlignRight(v3, v2, 12); + + v1 = Sse2.ShiftLeftLogical128BitLane(v1, 4); + v2 = Sse2.ShiftRightLogical128BitLane(v2, 4); + + v1 = Ssse3.AlignRight(v2, v1, 8); ref Vector128 vd = ref Unsafe.Add(ref destBase, j); From e3e8656d8f23216a3c180393b81a1959d97ae93b Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 6 Nov 2020 21:23:51 +0000 Subject: [PATCH 113/131] Respond to Anton fedback --- src/ImageSharp/Common/Helpers/SimdUtils.cs | 7 +++++++ ...pegColorConverter.Avx2JpegColorConverter.cs | 6 +----- .../JpegColorConverter.FromCmykAvx2.cs | 2 +- ... => JpegColorConverter.FromYCbCrVector4.cs} | 8 ++++---- .../ColorConverters/JpegColorConverter.cs | 18 ++++++++++++++---- .../Codecs/Jpeg/YCbCrColorConversion.cs | 2 +- .../Formats/Jpg/JpegColorConverterTests.cs | 12 +++++++++--- 7 files changed, 37 insertions(+), 18 deletions(-) rename src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/{JpegColorConverter.FromYCbCrVector.cs => JpegColorConverter.FromYCbCrVector4.cs} (94%) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.cs b/src/ImageSharp/Common/Helpers/SimdUtils.cs index 073a5a9f6..7cbb5bfe3 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.cs @@ -25,6 +25,13 @@ namespace SixLabors.ImageSharp public static bool HasVector8 { get; } = Vector.IsHardwareAccelerated && Vector.Count == 8 && Vector.Count == 8; + /// + /// Gets a value indicating whether code is being JIT-ed to SSE instructions + /// where float and integer registers are of size 128 byte. + /// + public static bool HasVector4 { get; } = + Vector.IsHardwareAccelerated && Vector.Count == 4; + public static bool HasAvx2 { get diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Avx2JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Avx2JpegColorConverter.cs index ef144fc1c..90ebce3b8 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Avx2JpegColorConverter.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.Avx2JpegColorConverter.cs @@ -1,10 +1,6 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. -#if SUPPORTS_RUNTIME_INTRINSICS -using System.Runtime.Intrinsics.X86; -#endif - namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs index a9e3e5b51..f9334de73 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromCmykAvx2.cs @@ -1,4 +1,4 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. using System; diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector4.cs similarity index 94% rename from src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector.cs rename to src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector4.cs index 65a7c42d8..193d7e71e 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector4.cs @@ -11,14 +11,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters { internal abstract partial class JpegColorConverter { - internal sealed class FromYCbCrVector : VectorizedJpegColorConverter + internal sealed class FromYCbCrVector4 : VectorizedJpegColorConverter { - public FromYCbCrVector(int precision) + public FromYCbCrVector4(int precision) : base(JpegColorSpace.YCbCr, precision, 8) { } - protected override bool IsAvailable => true; + protected override bool IsAvailable => SimdUtils.HasVector4; protected override void ConvertCoreVectorized(in ComponentValues values, Span result) { @@ -74,7 +74,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters tmp.MultiplyInplace(1.772F); b.AddInplace(ref tmp); - if (Vector.Count == 4) + if (SimdUtils.HasVector4) { // TODO: Find a way to properly run & test this path on AVX2 PC-s! (Have I already mentioned that Vector is terrible?) r.RoundAndDownscalePreVector8(maxValue); diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs index 7afc09ff5..b794e3a26 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs @@ -62,8 +62,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters /// public static JpegColorConverter GetConverter(JpegColorSpace colorSpace, int precision) { - JpegColorConverter converter = Array.Find(Converters, c => c.ColorSpace == colorSpace - && c.Precision == precision); + JpegColorConverter converter = Array.Find( + Converters, + c => c.ColorSpace == colorSpace + && c.Precision == precision); if (converter is null) { @@ -94,7 +96,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters converters.AddRange(GetGrayScaleConverters(8)); converters.AddRange(GetRgbConverters(8)); - // 8-bit converters + // 12-bit converters converters.AddRange(GetYCbCrConverters(12)); converters.AddRange(GetYccKConverters(12)); converters.AddRange(GetCmykConverters(12)); @@ -109,9 +111,11 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters /// private static IEnumerable GetYCbCrConverters(int precision) { +#if SUPPORTS_RUNTIME_INTRINSICS yield return new FromYCbCrAvx2(precision); +#endif yield return new FromYCbCrVector8(precision); - yield return new FromYCbCrVector(precision); + yield return new FromYCbCrVector4(precision); yield return new FromYCbCrBasic(precision); } @@ -120,7 +124,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters /// private static IEnumerable GetYccKConverters(int precision) { +#if SUPPORTS_RUNTIME_INTRINSICS yield return new FromYccKAvx2(precision); +#endif yield return new FromYccKVector8(precision); yield return new FromYccKBasic(precision); } @@ -130,7 +136,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters /// private static IEnumerable GetCmykConverters(int precision) { +#if SUPPORTS_RUNTIME_INTRINSICS yield return new FromCmykAvx2(precision); +#endif yield return new FromCmykVector8(precision); yield return new FromCmykBasic(precision); } @@ -140,7 +148,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters /// private static IEnumerable GetGrayScaleConverters(int precision) { +#if SUPPORTS_RUNTIME_INTRINSICS yield return new FromGrayscaleAvx2(precision); +#endif yield return new FromGrayscaleBasic(precision); } diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs index c17f14fe6..94b28e4d9 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/YCbCrColorConversion.cs @@ -28,7 +28,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg { var values = new JpegColorConverter.ComponentValues(this.input, 0); - new JpegColorConverter.FromYCbCrVector(8).ConvertToRgba(values, this.output); + new JpegColorConverter.FromYCbCrVector4(8).ConvertToRgba(values, this.output); } [Benchmark] diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs index bcc52de14..68210caea 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegColorConverterTests.cs @@ -54,10 +54,16 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg [Theory] [MemberData(nameof(CommonConversionData))] - public void FromYCbCrVector(int inputBufferLength, int resultBufferLength, int seed) + public void FromYCbCrVector4(int inputBufferLength, int resultBufferLength, int seed) { + if (!SimdUtils.HasVector4) + { + this.Output.WriteLine("No SSE present, skipping test!"); + return; + } + ValidateConversion( - new JpegColorConverter.FromYCbCrVector(8), + new JpegColorConverter.FromYCbCrVector4(8), 3, inputBufferLength, resultBufferLength, @@ -346,7 +352,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg JpegColorConverter.ComponentValues values = CreateRandomValues(3, count, 1); var result = new Vector4[count]; - JpegColorConverter converter = simd ? (JpegColorConverter)new JpegColorConverter.FromYCbCrVector(8) : new JpegColorConverter.FromYCbCrBasic(8); + JpegColorConverter converter = simd ? (JpegColorConverter)new JpegColorConverter.FromYCbCrVector4(8) : new JpegColorConverter.FromYCbCrBasic(8); // Warm up: converter.ConvertToRgba(values, result); From 27de70a9cf504f38d1e17885e981f395ff94677e Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Fri, 6 Nov 2020 21:48:26 +0000 Subject: [PATCH 114/131] Add missing conditional --- .../Components/Decoder/ColorConverters/JpegColorConverter.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs index b794e3a26..2d24f01dd 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs @@ -159,7 +159,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters /// private static IEnumerable GetRgbConverters(int precision) { +#if SUPPORTS_RUNTIME_INTRINSICS yield return new FromRgbAvx2(precision); +#endif yield return new FromRgbVector8(precision); yield return new FromRgbBasic(precision); } From eba801542c0d2f9f4b8500d426cf2b7104a943c7 Mon Sep 17 00:00:00 2001 From: Nicolas Portmann Date: Sat, 7 Nov 2020 10:44:36 +0100 Subject: [PATCH 115/131] Micro-optimize grayscale converter --- .../JpegColorConverter.FromGrayScaleAvx2.cs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs index aebffc3df..45846a6b5 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromGrayScaleAvx2.cs @@ -42,16 +42,16 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters int n = result.Length / 8; for (int i = 0; i < n; i++) { - Vector256 g = Avx2.PermuteVar8x32(Unsafe.Add(ref gBase, i), vcontrol); + Vector256 g = Avx.Multiply(Unsafe.Add(ref gBase, i), scale); - g = Avx.Multiply(g, scale); + g = Avx2.PermuteVar8x32(g, vcontrol); ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); destination = Avx.Blend(Avx.Permute(g, 0b00_00_00_00), one, 0b1000_1000); - Unsafe.Add(ref destination, 1) = Avx.Blend(Avx.Permute(g, 0b01_01_01_01), one, 0b1000_1000); - Unsafe.Add(ref destination, 2) = Avx.Blend(Avx.Permute(g, 0b10_10_10_10), one, 0b1000_1000); - Unsafe.Add(ref destination, 3) = Avx.Blend(Avx.Permute(g, 0b11_11_11_11), one, 0b1000_1000); + Unsafe.Add(ref destination, 1) = Avx.Blend(Avx.Shuffle(g, g, 0b01_01_01_01), one, 0b1000_1000); + Unsafe.Add(ref destination, 2) = Avx.Blend(Avx.Shuffle(g, g, 0b10_10_10_10), one, 0b1000_1000); + Unsafe.Add(ref destination, 3) = Avx.Blend(Avx.Shuffle(g, g, 0b11_11_11_11), one, 0b1000_1000); } #endif } From 963e5c6fd4271825e1739551ff9a97d86cc365e8 Mon Sep 17 00:00:00 2001 From: Nicolas Portmann Date: Sat, 7 Nov 2020 11:35:21 +0100 Subject: [PATCH 116/131] Remove useless branch --- .../JpegColorConverter.FromYCbCrVector4.cs | 23 ++++--------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector4.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector4.cs index 193d7e71e..42f8eef5a 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector4.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrVector4.cs @@ -22,6 +22,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters protected override void ConvertCoreVectorized(in ComponentValues values, Span result) { + // TODO: Find a way to properly run & test this path on AVX2 PC-s! (Have I already mentioned that Vector is terrible?) DebugGuard.IsTrue(result.Length % 8 == 0, nameof(result), "result.Length should be divisible by 8!"); ref Vector4Pair yBase = @@ -74,25 +75,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters tmp.MultiplyInplace(1.772F); b.AddInplace(ref tmp); - if (SimdUtils.HasVector4) - { - // TODO: Find a way to properly run & test this path on AVX2 PC-s! (Have I already mentioned that Vector is terrible?) - r.RoundAndDownscalePreVector8(maxValue); - g.RoundAndDownscalePreVector8(maxValue); - b.RoundAndDownscalePreVector8(maxValue); - } - else if (SimdUtils.HasVector8) - { - r.RoundAndDownscaleVector8(maxValue); - g.RoundAndDownscaleVector8(maxValue); - b.RoundAndDownscaleVector8(maxValue); - } - else - { - // TODO: Run fallback scalar code here - // However, no issues expected before someone implements this: https://github.com/dotnet/coreclr/issues/12007 - JpegThrowHelper.ThrowNotImplementedException("Your CPU architecture is too modern!"); - } + r.RoundAndDownscalePreVector8(maxValue); + g.RoundAndDownscalePreVector8(maxValue); + b.RoundAndDownscalePreVector8(maxValue); // Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order: ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); From bc7ea4f8bffeb21e4f39e9cdebc42b8c67aa39ae Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 8 Nov 2020 12:41:38 +0100 Subject: [PATCH 117/131] Use Interlocked.Increment during histogram calculation, fixes issue #1416 --- .../GlobalHistogramEqualizationProcessor{TPixel}.cs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs index 274376671..0c5a109a6 100644 --- a/src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs @@ -6,6 +6,7 @@ using System.Buffers; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using System.Threading; using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; @@ -51,7 +52,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization using IMemoryOwner histogramBuffer = memoryAllocator.Allocate(this.LuminanceLevels, AllocationOptions.Clean); - // Build the histogram of the grayscale levels + // Build the histogram of the grayscale levels. var grayscaleOperation = new GrayscaleLevelsRowOperation(interest, histogramBuffer, source, this.LuminanceLevels); ParallelRowIterator.IterateRows( this.Configuration, @@ -114,7 +115,6 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization #endif public void Invoke(int y) { - ref int histogramBase = ref MemoryMarshal.GetReference(this.histogramBuffer.GetSpan()); ref TPixel pixelBase = ref MemoryMarshal.GetReference(this.source.GetPixelRowSpan(y)); int levels = this.luminanceLevels; @@ -123,7 +123,8 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization // TODO: We should bulk convert here. var vector = Unsafe.Add(ref pixelBase, x).ToVector4(); int luminance = ImageMaths.GetBT709Luminance(ref vector, levels); - Unsafe.Add(ref histogramBase, luminance)++; + ref int histogramAtLuminance = ref MemoryMarshal.GetReference(this.histogramBuffer.Slice(luminance)); + Interlocked.Increment(ref histogramAtLuminance); } } } From 82a5a61f3d1ba11fdc6fae5cbcfebe379bc4686c Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 8 Nov 2020 12:55:53 +0100 Subject: [PATCH 118/131] Add global histogram equalization test which compares result to reference output --- .../HistogramEqualizationTests.cs | 39 ++++++++++++++----- tests/ImageSharp.Tests/TestImages.cs | 1 + tests/Images/External | 2 +- .../640px-Unequalized_Hawkes_Bay_NZ.jpg | 3 ++ 4 files changed, 34 insertions(+), 11 deletions(-) create mode 100644 tests/Images/Input/Jpg/baseline/640px-Unequalized_Hawkes_Bay_NZ.jpg diff --git a/tests/ImageSharp.Tests/Processing/Normalization/HistogramEqualizationTests.cs b/tests/ImageSharp.Tests/Processing/Normalization/HistogramEqualizationTests.cs index 1c1da6f19..4460f04fb 100644 --- a/tests/ImageSharp.Tests/Processing/Normalization/HistogramEqualizationTests.cs +++ b/tests/ImageSharp.Tests/Processing/Normalization/HistogramEqualizationTests.cs @@ -17,7 +17,7 @@ namespace SixLabors.ImageSharp.Tests.Processing.Normalization [Theory] [InlineData(256)] [InlineData(65536)] - public void HistogramEqualizationTest(int luminanceLevels) + public void GlobalHistogramEqualization_WithDifferentLumanceLevels(int luminanceLevels) { // Arrange var pixels = new byte[] @@ -45,20 +45,21 @@ namespace SixLabors.ImageSharp.Tests.Processing.Normalization var expected = new byte[] { - 0, 12, 53, 32, 146, 53, 174, 53, - 57, 32, 12, 227, 219, 202, 32, 154, - 65, 85, 93, 239, 251, 227, 65, 158, - 73, 146, 146, 247, 255, 235, 154, 130, - 97, 166, 117, 231, 243, 210, 117, 117, - 117, 190, 36, 190, 178, 93, 20, 170, - 130, 202, 73, 20, 12, 53, 85, 194, - 146, 206, 130, 117, 85, 166, 182, 215 + 0, 12, 53, 32, 146, 53, 174, 53, + 57, 32, 12, 227, 219, 202, 32, 154, + 65, 85, 93, 239, 251, 227, 65, 158, + 73, 146, 146, 247, 255, 235, 154, 130, + 97, 166, 117, 231, 243, 210, 117, 117, + 117, 190, 36, 190, 178, 93, 20, 170, + 130, 202, 73, 20, 12, 53, 85, 194, + 146, 206, 130, 117, 85, 166, 182, 215 }; // Act image.Mutate(x => x.HistogramEqualization(new HistogramEqualizationOptions { - LuminanceLevels = luminanceLevels + LuminanceLevels = luminanceLevels, + Method = HistogramEqualizationMethod.Global })); // Assert @@ -75,6 +76,24 @@ namespace SixLabors.ImageSharp.Tests.Processing.Normalization } } + [Theory] + [WithFile(TestImages.Jpeg.Baseline.HistogramEqImage, PixelTypes.Rgba32)] + public void GlobalHistogramEqualization_CompareToReferenceOutput(TestImageProvider provider) + where TPixel : unmanaged, IPixel + { + using (Image image = provider.GetImage()) + { + var options = new HistogramEqualizationOptions + { + Method = HistogramEqualizationMethod.Global, + LuminanceLevels = 256, + }; + image.Mutate(x => x.HistogramEqualization(options)); + image.DebugSave(provider); + image.CompareToReferenceOutput(ValidatorComparer, provider, extension: "png"); + } + } + [Theory] [WithFile(TestImages.Jpeg.Baseline.LowContrast, PixelTypes.Rgba32)] public void Adaptive_SlidingWindow_15Tiles_WithClipping(TestImageProvider provider) diff --git a/tests/ImageSharp.Tests/TestImages.cs b/tests/ImageSharp.Tests/TestImages.cs index dce36bb0f..e4c73928d 100644 --- a/tests/ImageSharp.Tests/TestImages.cs +++ b/tests/ImageSharp.Tests/TestImages.cs @@ -196,6 +196,7 @@ namespace SixLabors.ImageSharp.Tests public const string YcckSubsample1222 = "Jpg/baseline/ycck-subsample-1222.jpg"; public const string Iptc = "Jpg/baseline/iptc.jpg"; public const string App13WithEmptyIptc = "Jpg/baseline/iptc-psAPP13-wIPTCempty.jpg"; + public const string HistogramEqImage = "Jpg/baseline/640px-Unequalized_Hawkes_Bay_NZ.jpg"; public static readonly string[] All = { diff --git a/tests/Images/External b/tests/Images/External index cc6465910..8b43d14d2 160000 --- a/tests/Images/External +++ b/tests/Images/External @@ -1 +1 @@ -Subproject commit cc6465910d092319ef9bf4e99698a0649996d3c5 +Subproject commit 8b43d14d21ce9b436af3d12a70d38402cdba176b diff --git a/tests/Images/Input/Jpg/baseline/640px-Unequalized_Hawkes_Bay_NZ.jpg b/tests/Images/Input/Jpg/baseline/640px-Unequalized_Hawkes_Bay_NZ.jpg new file mode 100644 index 000000000..bb89de589 --- /dev/null +++ b/tests/Images/Input/Jpg/baseline/640px-Unequalized_Hawkes_Bay_NZ.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1fafc61231325c42d94fe163486a6c5144fb6211ccdceb902d5cb4ddebda9e1 +size 32428 From 2222db8e0baf52f46623b0668b356b8e42c37bb9 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 8 Nov 2020 17:45:29 +0100 Subject: [PATCH 119/131] Use Interlocked.Increment(ref Unsafe.Add(ref histogramBase, luminance)); --- .../GlobalHistogramEqualizationProcessor{TPixel}.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs index 0c5a109a6..488426f93 100644 --- a/src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs @@ -115,6 +115,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization #endif public void Invoke(int y) { + ref int histogramBase = ref MemoryMarshal.GetReference(this.histogramBuffer.GetSpan()); ref TPixel pixelBase = ref MemoryMarshal.GetReference(this.source.GetPixelRowSpan(y)); int levels = this.luminanceLevels; @@ -123,8 +124,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization // TODO: We should bulk convert here. var vector = Unsafe.Add(ref pixelBase, x).ToVector4(); int luminance = ImageMaths.GetBT709Luminance(ref vector, levels); - ref int histogramAtLuminance = ref MemoryMarshal.GetReference(this.histogramBuffer.Slice(luminance)); - Interlocked.Increment(ref histogramAtLuminance); + Interlocked.Increment(ref Unsafe.Add(ref histogramBase, luminance)); } } } From 2871e8ed2e695779aa30b59bd8d3f3204a7a401b Mon Sep 17 00:00:00 2001 From: AlexNDRmac Date: Mon, 26 Oct 2020 22:17:59 +0200 Subject: [PATCH 120/131] Add support using libgdiplus on macOS --- src/ImageSharp/ImageSharp.csproj | 8 ++++++++ tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj | 4 +++- tests/ImageSharp.Tests/ImageSharp.Tests.csproj | 2 ++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/ImageSharp/ImageSharp.csproj b/src/ImageSharp/ImageSharp.csproj index c3d9618c8..2a9b53cba 100644 --- a/src/ImageSharp/ImageSharp.csproj +++ b/src/ImageSharp/ImageSharp.csproj @@ -28,6 +28,14 @@ + + + + + + + + diff --git a/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj b/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj index 4784a219b..627e281f6 100644 --- a/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj +++ b/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj @@ -26,7 +26,9 @@ - + + + diff --git a/tests/ImageSharp.Tests/ImageSharp.Tests.csproj b/tests/ImageSharp.Tests/ImageSharp.Tests.csproj index 07ade97d5..d96d33a5a 100644 --- a/tests/ImageSharp.Tests/ImageSharp.Tests.csproj +++ b/tests/ImageSharp.Tests/ImageSharp.Tests.csproj @@ -22,6 +22,8 @@ + + From 8276bda4f2107da08cd32364d4e70ab07d0098c8 Mon Sep 17 00:00:00 2001 From: AlexNDRmac Date: Mon, 26 Oct 2020 22:31:16 +0200 Subject: [PATCH 121/131] Fix incompatible package reference for OSX --- src/ImageSharp/ImageSharp.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ImageSharp/ImageSharp.csproj b/src/ImageSharp/ImageSharp.csproj index 2a9b53cba..a5e6bbbc7 100644 --- a/src/ImageSharp/ImageSharp.csproj +++ b/src/ImageSharp/ImageSharp.csproj @@ -32,7 +32,7 @@ - + From 2749a25d78c02c2931552766cbb3549c3d81a55f Mon Sep 17 00:00:00 2001 From: AlexNDRmac Date: Thu, 12 Nov 2020 16:52:56 +0200 Subject: [PATCH 122/131] Move package to build props --- .github/workflows/build-and-test.yml | 4 ++++ src/ImageSharp/ImageSharp.csproj | 8 -------- tests/Directory.Build.targets | 2 ++ tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj | 2 -- tests/ImageSharp.Tests/ImageSharp.Tests.csproj | 2 -- 5 files changed, 6 insertions(+), 12 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index c8f399794..b618e1e65 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -18,6 +18,10 @@ jobs: framework: netcoreapp3.1 runtime: -x64 codecov: true + - os: macos-latest + framework: netcoreapp3.1 + runtime: -x64 + codecov: false - os: windows-latest framework: netcoreapp3.1 runtime: -x64 diff --git a/src/ImageSharp/ImageSharp.csproj b/src/ImageSharp/ImageSharp.csproj index a5e6bbbc7..c3d9618c8 100644 --- a/src/ImageSharp/ImageSharp.csproj +++ b/src/ImageSharp/ImageSharp.csproj @@ -28,14 +28,6 @@ - - - - - - - - diff --git a/tests/Directory.Build.targets b/tests/Directory.Build.targets index 1f699c9dd..76759948b 100644 --- a/tests/Directory.Build.targets +++ b/tests/Directory.Build.targets @@ -35,6 +35,8 @@ + + diff --git a/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj b/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj index 627e281f6..e8ad660ad 100644 --- a/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj +++ b/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj @@ -26,8 +26,6 @@ - - diff --git a/tests/ImageSharp.Tests/ImageSharp.Tests.csproj b/tests/ImageSharp.Tests/ImageSharp.Tests.csproj index d96d33a5a..07ade97d5 100644 --- a/tests/ImageSharp.Tests/ImageSharp.Tests.csproj +++ b/tests/ImageSharp.Tests/ImageSharp.Tests.csproj @@ -22,8 +22,6 @@ - - From c00ca80e199aa4b4a043211ee6215ea22822b3bf Mon Sep 17 00:00:00 2001 From: AlexNDRmac Date: Fri, 30 Oct 2020 11:48:24 +0200 Subject: [PATCH 123/131] Do not run windows specific tests on macOS --- .../TestUtilities/Tests/TestEnvironmentTests.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ImageSharp.Tests/TestUtilities/Tests/TestEnvironmentTests.cs b/tests/ImageSharp.Tests/TestUtilities/Tests/TestEnvironmentTests.cs index e72d953ac..e3418220b 100644 --- a/tests/ImageSharp.Tests/TestUtilities/Tests/TestEnvironmentTests.cs +++ b/tests/ImageSharp.Tests/TestUtilities/Tests/TestEnvironmentTests.cs @@ -67,7 +67,7 @@ namespace SixLabors.ImageSharp.Tests [InlineData("lol/Baz.gif", typeof(GifEncoder))] public void GetReferenceEncoder_ReturnsCorrectEncoders_Windows(string fileName, Type expectedEncoderType) { - if (TestEnvironment.IsLinux) + if (!TestEnvironment.IsWindows) { return; } @@ -83,7 +83,7 @@ namespace SixLabors.ImageSharp.Tests [InlineData("lol/Baz.gif", typeof(GifDecoder))] public void GetReferenceDecoder_ReturnsCorrectDecoders_Windows(string fileName, Type expectedDecoderType) { - if (TestEnvironment.IsLinux) + if (!TestEnvironment.IsWindows) { return; } From 7bbd678541894263413ec8634d4b20fcd2fd2d8b Mon Sep 17 00:00:00 2001 From: AlexNDRmac Date: Fri, 30 Oct 2020 12:15:34 +0200 Subject: [PATCH 124/131] Decorate OSX specific tests --- .../Processing/Processors/Transforms/ResizeTests.cs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/ImageSharp.Tests/Processing/Processors/Transforms/ResizeTests.cs b/tests/ImageSharp.Tests/Processing/Processors/Transforms/ResizeTests.cs index f40b8d11a..47d951837 100644 --- a/tests/ImageSharp.Tests/Processing/Processors/Transforms/ResizeTests.cs +++ b/tests/ImageSharp.Tests/Processing/Processors/Transforms/ResizeTests.cs @@ -355,6 +355,7 @@ namespace SixLabors.ImageSharp.Tests.Processing.Processors.Transforms } [Theory] + [PlatformSpecific(~TestPlatforms.OSX)] [WithFileCollection(nameof(CommonTestImages), DefaultPixelType)] public void ResizeFromSourceRectangle(TestImageProvider provider) where TPixel : unmanaged, IPixel @@ -437,6 +438,7 @@ namespace SixLabors.ImageSharp.Tests.Processing.Processors.Transforms } [Theory] + [PlatformSpecific(~TestPlatforms.OSX)] [WithFileCollection(nameof(CommonTestImages), DefaultPixelType)] public void ResizeWithBoxPadMode(TestImageProvider provider) where TPixel : unmanaged, IPixel @@ -547,6 +549,7 @@ namespace SixLabors.ImageSharp.Tests.Processing.Processors.Transforms } [Theory] + [PlatformSpecific(~TestPlatforms.OSX)] [WithFileCollection(nameof(CommonTestImages), DefaultPixelType)] public void ResizeWithPadMode(TestImageProvider provider) where TPixel : unmanaged, IPixel From 7de5c591021cc1589b612d2390a256e8edbe5890 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 15 Nov 2020 18:07:37 +0100 Subject: [PATCH 125/131] Use bulk conversion to rgba in Write8BitColor --- src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs b/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs index 454440f63..322d08e4c 100644 --- a/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs +++ b/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs @@ -6,7 +6,6 @@ using System.Buffers; using System.IO; using System.Runtime.InteropServices; using System.Threading; -using System.Threading.Tasks; using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.Common.Helpers; using SixLabors.ImageSharp.Memory; @@ -340,15 +339,15 @@ namespace SixLabors.ImageSharp.Formats.Bmp { using IQuantizer frameQuantizer = this.quantizer.CreatePixelSpecificQuantizer(this.configuration); using IndexedImageFrame quantized = frameQuantizer.BuildPaletteAndQuantizeFrame(image, image.Bounds()); + using IMemoryOwner rgbColorsBuffer = this.memoryAllocator.Allocate(quantized.Palette.Length); + Span rgbColors = rgbColorsBuffer.GetSpan(); ReadOnlySpan quantizedColors = quantized.Palette.Span; - var color = default(Rgba32); + PixelOperations.Instance.ToRgba32(Configuration.Default, quantizedColors, rgbColors); - // TODO: Use bulk conversion here for better perf int idx = 0; - foreach (TPixel quantizedColor in quantizedColors) + foreach (Rgba32 color in rgbColors) { - quantizedColor.ToRgba32(ref color); colorPalette[idx] = color.B; colorPalette[idx + 1] = color.G; colorPalette[idx + 2] = color.R; From 9449f947e2edc88c5a7361b2d774587a3f1e836a Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 15 Nov 2020 18:51:34 +0100 Subject: [PATCH 126/131] Dont use the default config in ToRgba32 --- src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs b/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs index 322d08e4c..c7e7a534b 100644 --- a/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs +++ b/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs @@ -343,7 +343,7 @@ namespace SixLabors.ImageSharp.Formats.Bmp Span rgbColors = rgbColorsBuffer.GetSpan(); ReadOnlySpan quantizedColors = quantized.Palette.Span; - PixelOperations.Instance.ToRgba32(Configuration.Default, quantizedColors, rgbColors); + PixelOperations.Instance.ToRgba32(this.configuration, quantizedColors, rgbColors); int idx = 0; foreach (Rgba32 color in rgbColors) From 85b65d393ab131ca9afc1e439729ff9d07a8cdff Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 15 Nov 2020 19:20:59 +0100 Subject: [PATCH 127/131] Use colorPalette span as destination of bulk conversion --- src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs b/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs index c7e7a534b..01bdbd1c0 100644 --- a/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs +++ b/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs @@ -339,22 +339,13 @@ namespace SixLabors.ImageSharp.Formats.Bmp { using IQuantizer frameQuantizer = this.quantizer.CreatePixelSpecificQuantizer(this.configuration); using IndexedImageFrame quantized = frameQuantizer.BuildPaletteAndQuantizeFrame(image, image.Bounds()); - using IMemoryOwner rgbColorsBuffer = this.memoryAllocator.Allocate(quantized.Palette.Length); - Span rgbColors = rgbColorsBuffer.GetSpan(); ReadOnlySpan quantizedColors = quantized.Palette.Span; - PixelOperations.Instance.ToRgba32(this.configuration, quantizedColors, rgbColors); - - int idx = 0; - foreach (Rgba32 color in rgbColors) + PixelOperations.Instance.ToBgra32(this.configuration, quantizedColors, MemoryMarshal.Cast(colorPalette)); + Span colorPaletteAsUInt = MemoryMarshal.Cast(colorPalette); + for (int i = 0; i < colorPaletteAsUInt.Length; i++) { - colorPalette[idx] = color.B; - colorPalette[idx + 1] = color.G; - colorPalette[idx + 2] = color.R; - - // Padding byte, always 0. - colorPalette[idx + 3] = 0; - idx += 4; + colorPaletteAsUInt[i] = colorPaletteAsUInt[i] & 0x00FFFFFF; // Padding byte, always 0. } stream.Write(colorPalette); From 440d8cf11633164995870127abe3b27845325937 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Mon, 16 Nov 2020 14:56:11 +0100 Subject: [PATCH 128/131] Use GetPixelRowSpan to access pixels, fixes #1429 --- ...eHistogramEqualizationProcessor{TPixel}.cs | 70 ++++++++----------- 1 file changed, 30 insertions(+), 40 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Normalization/AdaptiveHistogramEqualizationProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Normalization/AdaptiveHistogramEqualizationProcessor{TPixel}.cs index b5b07d7a8..95626ce1e 100644 --- a/src/ImageSharp/Processing/Processors/Normalization/AdaptiveHistogramEqualizationProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Normalization/AdaptiveHistogramEqualizationProcessor{TPixel}.cs @@ -86,42 +86,39 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization new Rectangle(0, 0, sourceWidth, tileYStartPositions.Count), in operation); - ref TPixel pixelsBase = ref source.GetPixelReference(0, 0); - // Fix left column - ProcessBorderColumn(ref pixelsBase, cdfData, 0, sourceWidth, sourceHeight, this.Tiles, tileHeight, xStart: 0, xEnd: halfTileWidth, luminanceLevels); + ProcessBorderColumn(source, cdfData, 0, sourceHeight, this.Tiles, tileHeight, xStart: 0, xEnd: halfTileWidth, luminanceLevels); // Fix right column int rightBorderStartX = ((this.Tiles - 1) * tileWidth) + halfTileWidth; - ProcessBorderColumn(ref pixelsBase, cdfData, this.Tiles - 1, sourceWidth, sourceHeight, this.Tiles, tileHeight, xStart: rightBorderStartX, xEnd: sourceWidth, luminanceLevels); + ProcessBorderColumn(source, cdfData, this.Tiles - 1, sourceHeight, this.Tiles, tileHeight, xStart: rightBorderStartX, xEnd: sourceWidth, luminanceLevels); // Fix top row - ProcessBorderRow(ref pixelsBase, cdfData, 0, sourceWidth, this.Tiles, tileWidth, yStart: 0, yEnd: halfTileHeight, luminanceLevels); + ProcessBorderRow(source, cdfData, 0, sourceWidth, this.Tiles, tileWidth, yStart: 0, yEnd: halfTileHeight, luminanceLevels); // Fix bottom row int bottomBorderStartY = ((this.Tiles - 1) * tileHeight) + halfTileHeight; - ProcessBorderRow(ref pixelsBase, cdfData, this.Tiles - 1, sourceWidth, this.Tiles, tileWidth, yStart: bottomBorderStartY, yEnd: sourceHeight, luminanceLevels); + ProcessBorderRow(source, cdfData, this.Tiles - 1, sourceWidth, this.Tiles, tileWidth, yStart: bottomBorderStartY, yEnd: sourceHeight, luminanceLevels); // Left top corner - ProcessCornerTile(ref pixelsBase, cdfData, sourceWidth, 0, 0, xStart: 0, xEnd: halfTileWidth, yStart: 0, yEnd: halfTileHeight, luminanceLevels); + ProcessCornerTile(source, cdfData, 0, 0, xStart: 0, xEnd: halfTileWidth, yStart: 0, yEnd: halfTileHeight, luminanceLevels); // Left bottom corner - ProcessCornerTile(ref pixelsBase, cdfData, sourceWidth, 0, this.Tiles - 1, xStart: 0, xEnd: halfTileWidth, yStart: bottomBorderStartY, yEnd: sourceHeight, luminanceLevels); + ProcessCornerTile(source, cdfData, 0, this.Tiles - 1, xStart: 0, xEnd: halfTileWidth, yStart: bottomBorderStartY, yEnd: sourceHeight, luminanceLevels); // Right top corner - ProcessCornerTile(ref pixelsBase, cdfData, sourceWidth, this.Tiles - 1, 0, xStart: rightBorderStartX, xEnd: sourceWidth, yStart: 0, yEnd: halfTileHeight, luminanceLevels); + ProcessCornerTile(source, cdfData, this.Tiles - 1, 0, xStart: rightBorderStartX, xEnd: sourceWidth, yStart: 0, yEnd: halfTileHeight, luminanceLevels); // Right bottom corner - ProcessCornerTile(ref pixelsBase, cdfData, sourceWidth, this.Tiles - 1, this.Tiles - 1, xStart: rightBorderStartX, xEnd: sourceWidth, yStart: bottomBorderStartY, yEnd: sourceHeight, luminanceLevels); + ProcessCornerTile(source, cdfData, this.Tiles - 1, this.Tiles - 1, xStart: rightBorderStartX, xEnd: sourceWidth, yStart: bottomBorderStartY, yEnd: sourceHeight, luminanceLevels); } } /// /// Processes the part of a corner tile which was previously left out. It consists of 1 / 4 of a tile and does not need interpolation. /// - /// The output pixels base reference. + /// The source image. /// The lookup table to remap the grey values. - /// The source image width. /// The x-position in the CDF lookup map. /// The y-position in the CDF lookup map. /// X start position. @@ -133,9 +130,8 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization /// or 65536 for 16-bit grayscale images. /// private static void ProcessCornerTile( - ref TPixel pixelsBase, + ImageFrame source, CdfTileData cdfData, - int sourceWidth, int cdfX, int cdfY, int xStart, @@ -146,12 +142,12 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization { for (int dy = yStart; dy < yEnd; dy++) { - int dyOffSet = dy * sourceWidth; + Span rowSpan = source.GetPixelRowSpan(dy); for (int dx = xStart; dx < xEnd; dx++) { - ref TPixel pixel = ref Unsafe.Add(ref pixelsBase, dyOffSet + dx); + TPixel pixel = rowSpan[dx]; float luminanceEqualized = cdfData.RemapGreyValue(cdfX, cdfY, GetLuminance(pixel, luminanceLevels)); - pixel.FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, pixel.ToVector4().W)); + rowSpan[dx].FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, pixel.ToVector4().W)); } } } @@ -159,10 +155,9 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization /// /// Processes a border column of the image which is half the size of the tile width. /// - /// The output pixels reference. + /// The source image. /// The pre-computed lookup tables to remap the grey values for each tiles. /// The X index of the lookup table to use. - /// The source image width. /// The source image height. /// The number of vertical tiles. /// The height of a tile. @@ -173,10 +168,9 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization /// or 65536 for 16-bit grayscale images. /// private static void ProcessBorderColumn( - ref TPixel pixelBase, + ImageFrame source, CdfTileData cdfData, int cdfX, - int sourceWidth, int sourceHeight, int tileCount, int tileHeight, @@ -194,12 +188,12 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization int tileY = 0; for (int dy = y; dy < yLimit; dy++) { - int dyOffSet = dy * sourceWidth; + Span rowSpan = source.GetPixelRowSpan(dy); for (int dx = xStart; dx < xEnd; dx++) { - ref TPixel pixel = ref Unsafe.Add(ref pixelBase, dyOffSet + dx); + TPixel pixel = rowSpan[dx]; float luminanceEqualized = InterpolateBetweenTwoTiles(pixel, cdfData, cdfX, cdfY, cdfX, cdfY + 1, tileY, tileHeight, luminanceLevels); - pixel.FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, pixel.ToVector4().W)); + rowSpan[dx].FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, pixel.ToVector4().W)); } tileY++; @@ -213,7 +207,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization /// /// Processes a border row of the image which is half of the size of the tile height. /// - /// The output pixels base reference. + /// The source image. /// The pre-computed lookup tables to remap the grey values for each tiles. /// The Y index of the lookup table to use. /// The source image width. @@ -226,7 +220,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization /// or 65536 for 16-bit grayscale images. /// private static void ProcessBorderRow( - ref TPixel pixelBase, + ImageFrame source, CdfTileData cdfData, int cdfY, int sourceWidth, @@ -244,14 +238,14 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization { for (int dy = yStart; dy < yEnd; dy++) { - int dyOffSet = dy * sourceWidth; + Span rowSpan = source.GetPixelRowSpan(dy); int tileX = 0; int xLimit = Math.Min(x + tileWidth, sourceWidth - 1); for (int dx = x; dx < xLimit; dx++) { - ref TPixel pixel = ref Unsafe.Add(ref pixelBase, dyOffSet + dx); + TPixel pixel = rowSpan[dx]; float luminanceEqualized = InterpolateBetweenTwoTiles(pixel, cdfData, cdfX, cdfY, cdfX + 1, cdfY, tileX, tileWidth, luminanceLevels); - pixel.FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, pixel.ToVector4().W)); + rowSpan[dx].FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, pixel.ToVector4().W)); tileX++; } } @@ -410,8 +404,6 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization [MethodImpl(InliningOptions.ShortMethod)] public void Invoke(in RowInterval rows) { - ref TPixel sourceBase = ref this.source.GetPixelReference(0, 0); - for (int index = rows.Min; index < rows.Max; index++) { (int y, int cdfY) tileYStartPosition = this.tileYStartPositions[index]; @@ -427,11 +419,11 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization int xEnd = Math.Min(x + this.tileWidth, this.sourceWidth); for (int dy = y; dy < yEnd; dy++) { - int dyOffSet = dy * this.sourceWidth; + Span rowSpan = this.source.GetPixelRowSpan(dy); int tileX = 0; for (int dx = x; dx < xEnd; dx++) { - ref TPixel pixel = ref Unsafe.Add(ref sourceBase, dyOffSet + dx); + TPixel pixel = rowSpan[dx]; float luminanceEqualized = InterpolateBetweenFourTiles( pixel, this.cdfData, @@ -445,7 +437,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization this.tileHeight, this.luminanceLevels); - pixel.FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, pixel.ToVector4().W)); + rowSpan[dx].FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, pixel.ToVector4().W)); tileX++; } @@ -597,15 +589,13 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization [MethodImpl(InliningOptions.ShortMethod)] public void Invoke(in RowInterval rows) { - ref TPixel sourceBase = ref this.source.GetPixelReference(0, 0); - for (int index = rows.Min; index < rows.Max; index++) { int cdfX = 0; int cdfY = this.tileYStartPositions[index].cdfY; int y = this.tileYStartPositions[index].y; int endY = Math.Min(y + this.tileHeight, this.sourceHeight); - ref int cdfMinBase = ref MemoryMarshal.GetReference(this.cdfMinBuffer2D.GetRowSpan(cdfY)); + Span cdfMinSpan = this.cdfMinBuffer2D.GetRowSpan(cdfY); using IMemoryOwner histogramBuffer = this.allocator.Allocate(this.luminanceLevels); Span histogram = histogramBuffer.GetSpan(); @@ -620,10 +610,10 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization int xlimit = Math.Min(x + this.tileWidth, this.sourceWidth); for (int dy = y; dy < endY; dy++) { - int dyOffset = dy * this.sourceWidth; + Span rowSpan = this.source.GetPixelRowSpan(dy); for (int dx = x; dx < xlimit; dx++) { - int luminance = GetLuminance(Unsafe.Add(ref sourceBase, dyOffset + dx), this.luminanceLevels); + int luminance = GetLuminance(rowSpan[dx], this.luminanceLevels); histogram[luminance]++; } } @@ -633,7 +623,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization this.processor.ClipHistogram(histogram, this.processor.ClipLimit); } - Unsafe.Add(ref cdfMinBase, cdfX) = this.processor.CalculateCdf(ref cdfBase, ref histogramBase, histogram.Length - 1); + cdfMinSpan[cdfX] += this.processor.CalculateCdf(ref cdfBase, ref histogramBase, histogram.Length - 1); cdfX++; } From 9d8ed6c852a45f07c5c8121b701cc1b53e8a927b Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Mon, 16 Nov 2020 15:06:11 +0100 Subject: [PATCH 129/131] Use GetPixelRowSpan to access pixel data in global hist equalization --- .../GlobalHistogramEqualizationProcessor{TPixel}.cs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs index 488426f93..2aaf8cdcd 100644 --- a/src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs @@ -116,13 +116,13 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization public void Invoke(int y) { ref int histogramBase = ref MemoryMarshal.GetReference(this.histogramBuffer.GetSpan()); - ref TPixel pixelBase = ref MemoryMarshal.GetReference(this.source.GetPixelRowSpan(y)); + Span pixelRow = this.source.GetPixelRowSpan(y); int levels = this.luminanceLevels; for (int x = 0; x < this.bounds.Width; x++) { // TODO: We should bulk convert here. - var vector = Unsafe.Add(ref pixelBase, x).ToVector4(); + var vector = pixelRow[x].ToVector4(); int luminance = ImageMaths.GetBT709Luminance(ref vector, levels); Interlocked.Increment(ref Unsafe.Add(ref histogramBase, luminance)); } @@ -165,18 +165,18 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization public void Invoke(int y) { ref int cdfBase = ref MemoryMarshal.GetReference(this.cdfBuffer.GetSpan()); - ref TPixel pixelBase = ref MemoryMarshal.GetReference(this.source.GetPixelRowSpan(y)); + Span pixelRow = this.source.GetPixelRowSpan(y); int levels = this.luminanceLevels; float noOfPixelsMinusCdfMin = this.numberOfPixelsMinusCdfMin; for (int x = 0; x < this.bounds.Width; x++) { // TODO: We should bulk convert here. - ref TPixel pixel = ref Unsafe.Add(ref pixelBase, x); + TPixel pixel = pixelRow[x]; var vector = pixel.ToVector4(); int luminance = ImageMaths.GetBT709Luminance(ref vector, levels); float luminanceEqualized = Unsafe.Add(ref cdfBase, luminance) / noOfPixelsMinusCdfMin; - pixel.FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, vector.W)); + pixelRow[x].FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, vector.W)); } } } From 3558e7ef97ddc1e3f8a1b8d7a43241799c97d779 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Mon, 16 Nov 2020 16:04:13 +0100 Subject: [PATCH 130/131] Use ref TPixel pixel = ref rowSpan[dx]; --- ...tiveHistogramEqualizationProcessor{TPixel}.cs | 16 ++++++++-------- ...obalHistogramEqualizationProcessor{TPixel}.cs | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Normalization/AdaptiveHistogramEqualizationProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Normalization/AdaptiveHistogramEqualizationProcessor{TPixel}.cs index 95626ce1e..14687426d 100644 --- a/src/ImageSharp/Processing/Processors/Normalization/AdaptiveHistogramEqualizationProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Normalization/AdaptiveHistogramEqualizationProcessor{TPixel}.cs @@ -145,9 +145,9 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization Span rowSpan = source.GetPixelRowSpan(dy); for (int dx = xStart; dx < xEnd; dx++) { - TPixel pixel = rowSpan[dx]; + ref TPixel pixel = ref rowSpan[dx]; float luminanceEqualized = cdfData.RemapGreyValue(cdfX, cdfY, GetLuminance(pixel, luminanceLevels)); - rowSpan[dx].FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, pixel.ToVector4().W)); + pixel.FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, pixel.ToVector4().W)); } } } @@ -191,9 +191,9 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization Span rowSpan = source.GetPixelRowSpan(dy); for (int dx = xStart; dx < xEnd; dx++) { - TPixel pixel = rowSpan[dx]; + ref TPixel pixel = ref rowSpan[dx]; float luminanceEqualized = InterpolateBetweenTwoTiles(pixel, cdfData, cdfX, cdfY, cdfX, cdfY + 1, tileY, tileHeight, luminanceLevels); - rowSpan[dx].FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, pixel.ToVector4().W)); + pixel.FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, pixel.ToVector4().W)); } tileY++; @@ -243,9 +243,9 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization int xLimit = Math.Min(x + tileWidth, sourceWidth - 1); for (int dx = x; dx < xLimit; dx++) { - TPixel pixel = rowSpan[dx]; + ref TPixel pixel = ref rowSpan[dx]; float luminanceEqualized = InterpolateBetweenTwoTiles(pixel, cdfData, cdfX, cdfY, cdfX + 1, cdfY, tileX, tileWidth, luminanceLevels); - rowSpan[dx].FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, pixel.ToVector4().W)); + pixel.FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, pixel.ToVector4().W)); tileX++; } } @@ -423,7 +423,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization int tileX = 0; for (int dx = x; dx < xEnd; dx++) { - TPixel pixel = rowSpan[dx]; + ref TPixel pixel = ref rowSpan[dx]; float luminanceEqualized = InterpolateBetweenFourTiles( pixel, this.cdfData, @@ -437,7 +437,7 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization this.tileHeight, this.luminanceLevels); - rowSpan[dx].FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, pixel.ToVector4().W)); + pixel.FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, pixel.ToVector4().W)); tileX++; } diff --git a/src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs index 2aaf8cdcd..74d293566 100644 --- a/src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs @@ -172,11 +172,11 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization for (int x = 0; x < this.bounds.Width; x++) { // TODO: We should bulk convert here. - TPixel pixel = pixelRow[x]; + ref TPixel pixel = ref pixelRow[x]; var vector = pixel.ToVector4(); int luminance = ImageMaths.GetBT709Luminance(ref vector, levels); float luminanceEqualized = Unsafe.Add(ref cdfBase, luminance) / noOfPixelsMinusCdfMin; - pixelRow[x].FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, vector.W)); + pixel.FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, vector.W)); } } } From fc4944a2c09f66df6e4eb2727c07b9ed1efd6acf Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Mon, 16 Nov 2020 16:36:25 +0100 Subject: [PATCH 131/131] Add histogram equalization benchmark --- .../Processing/HistogramEqualization.cs | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 tests/ImageSharp.Benchmarks/Processing/HistogramEqualization.cs diff --git a/tests/ImageSharp.Benchmarks/Processing/HistogramEqualization.cs b/tests/ImageSharp.Benchmarks/Processing/HistogramEqualization.cs new file mode 100644 index 000000000..081d3e8e3 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Processing/HistogramEqualization.cs @@ -0,0 +1,53 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System.IO; +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.PixelFormats; +using SixLabors.ImageSharp.Processing; +using SixLabors.ImageSharp.Processing.Processors.Normalization; +using SixLabors.ImageSharp.Tests; + +namespace SixLabors.ImageSharp.Benchmarks.Processing +{ + [Config(typeof(Config.ShortClr))] + public class HistogramEqualization : BenchmarkBase + { + private Image image; + + [GlobalSetup] + public void ReadImages() + { + if (this.image == null) + { + this.image = Image.Load(File.OpenRead(Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, TestImages.Jpeg.Baseline.HistogramEqImage))); + } + } + + [GlobalCleanup] + public void Cleanup() + { + this.image.Dispose(); + } + + [Benchmark(Description = "Global Histogram Equalization")] + public void GlobalHistogramEqualization() + { + this.image.Mutate(img => img.HistogramEqualization(new HistogramEqualizationOptions() + { + LuminanceLevels = 256, + Method = HistogramEqualizationMethod.Global + })); + } + + [Benchmark(Description = "AdaptiveHistogramEqualization (Tile interpolation)")] + public void AdaptiveHistogramEqualization() + { + this.image.Mutate(img => img.HistogramEqualization(new HistogramEqualizationOptions() + { + LuminanceLevels = 256, + Method = HistogramEqualizationMethod.AdaptiveTileInterpolation + })); + } + } +}