diff --git a/src/ImageSharp/Common/Helpers/IComponentShuffle.cs b/src/ImageSharp/Common/Helpers/IComponentShuffle.cs new file mode 100644 index 000000000..e354a57b0 --- /dev/null +++ b/src/ImageSharp/Common/Helpers/IComponentShuffle.cs @@ -0,0 +1,165 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Buffers.Binary; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace SixLabors.ImageSharp +{ + /// + /// Defines the contract for methods that allow the shuffling of pixel components. + /// Used for shuffling on platforms that do not support Hardware Intrinsics. + /// + internal interface IComponentShuffle + { + /// + /// Gets the shuffle control. + /// + byte Control { get; } + + /// + /// Shuffle 8-bit integers within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + void RunFallbackShuffle(ReadOnlySpan source, Span dest); + } + + internal readonly struct DefaultShuffle4 : IComponentShuffle + { + public DefaultShuffle4(byte p3, byte p2, byte p1, byte p0) + : this(SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0)) + { + } + + public DefaultShuffle4(byte control) => this.Control = control; + + public byte Control { get; } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); + SimdUtils.Shuffle.InverseMmShuffle( + this.Control, + out int p3, + out int p2, + out int p1, + out int p0); + + for (int i = 0; i < source.Length; i += 4) + { + Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i); + Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i); + Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i); + Unsafe.Add(ref dBase, i + 3) = Unsafe.Add(ref sBase, p3 + i); + } + } + } + + internal readonly struct WXYZShuffle4 : IComponentShuffle + { + public byte Control => SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3); + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ReadOnlySpan s = MemoryMarshal.Cast(source); + Span d = MemoryMarshal.Cast(dest); + ref uint sBase = ref MemoryMarshal.GetReference(s); + ref uint dBase = ref MemoryMarshal.GetReference(d); + + // The JIT can detect and optimize rotation idioms ROTL (Rotate Left) + // and ROTR (Rotate Right) emitting efficient CPU instructions: + // https://github.com/dotnet/coreclr/pull/1830 + for (int i = 0; i < s.Length; i++) + { + uint packed = Unsafe.Add(ref sBase, i); + + // packed = [W Z Y X] + // ROTL(8, packed) = [Z Y X W] + Unsafe.Add(ref dBase, i) = (packed << 8) | (packed >> 24); + } + } + } + + internal readonly struct WZYXShuffle4 : IComponentShuffle + { + public byte Control => SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3); + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ReadOnlySpan s = MemoryMarshal.Cast(source); + Span d = MemoryMarshal.Cast(dest); + ref uint sBase = ref MemoryMarshal.GetReference(s); + ref uint dBase = ref MemoryMarshal.GetReference(d); + + for (int i = 0; i < s.Length; i++) + { + uint packed = Unsafe.Add(ref sBase, i); + + // packed = [W Z Y X] + // REVERSE(packedArgb) = [X Y Z W] + Unsafe.Add(ref dBase, i) = BinaryPrimitives.ReverseEndianness(packed); + } + } + } + + internal readonly struct YZWXShuffle4 : IComponentShuffle + { + public byte Control => SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1); + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ReadOnlySpan s = MemoryMarshal.Cast(source); + Span d = MemoryMarshal.Cast(dest); + ref uint sBase = ref MemoryMarshal.GetReference(s); + ref uint dBase = ref MemoryMarshal.GetReference(d); + + for (int i = 0; i < s.Length; i++) + { + uint packed = Unsafe.Add(ref sBase, i); + + // packed = [W Z Y X] + // ROTR(8, packedArgb) = [Y Z W X] + Unsafe.Add(ref dBase, i) = (packed >> 8) | (packed << 24); + } + } + } + + internal readonly struct ZYXWShuffle4 : IComponentShuffle + { + public byte Control => SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2); + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ReadOnlySpan s = MemoryMarshal.Cast(source); + Span d = MemoryMarshal.Cast(dest); + ref uint sBase = ref MemoryMarshal.GetReference(s); + ref uint dBase = ref MemoryMarshal.GetReference(d); + + for (int i = 0; i < s.Length; i++) + { + uint packed = Unsafe.Add(ref sBase, i); + + // packed = [W Z Y X] + // tmp1 = [W 0 Y 0] + // tmp2 = [0 Z 0 X] + // tmp3=ROTL(16, tmp2) = [0 X 0 Z] + // tmp1 + tmp3 = [W X Y Z] + uint tmp1 = packed & 0xFF00FF00; + uint tmp2 = packed & 0x00FF00FF; + uint tmp3 = (tmp2 << 16) | (tmp2 >> 16); + + Unsafe.Add(ref dBase, i) = tmp1 + tmp3; + } + } + } +} diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index 59b625419..febb31c2f 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -2,7 +2,6 @@ // Licensed under the Apache License, Version 2.0. using System; -using System.Buffers.Binary; using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -40,34 +39,32 @@ namespace SixLabors.ImageSharp } /// - /// Shuffle 8-bit integers in a within 128-bit lanes in + /// Shuffle 8-bit integers within 128-bit lanes in /// using the control and store the results in . /// /// The source span of bytes. /// The destination span of bytes. - /// The byte control. + /// The type of shuffle to perform. [MethodImpl(InliningOptions.ShortMethod)] - public static void Shuffle4Channel( + public static void Shuffle4Channel( ReadOnlySpan source, Span dest, - byte control) + TShuffle shuffle) + where TShuffle : struct, IComponentShuffle { VerifyShuffleSpanInput(source, dest); - // TODO: There doesn't seem to be any APIs for - // System.Numerics that allow shuffling. #if SUPPORTS_RUNTIME_INTRINSICS - HwIntrinsics.Shuffle4ChannelReduce(ref source, ref dest, control); + HwIntrinsics.Shuffle4ChannelReduce(ref source, ref dest, shuffle.Control); #endif // Deal with the remainder: if (source.Length > 0) { - ShuffleRemainder4Channel(source, dest, control); + shuffle.RunFallbackShuffle(source, dest); } } - [MethodImpl(InliningOptions.ColdPath)] public static void ShuffleRemainder4Channel( ReadOnlySpan source, Span dest, @@ -86,125 +83,6 @@ namespace SixLabors.ImageSharp } } - [MethodImpl(InliningOptions.ColdPath)] - public static void ShuffleRemainder4Channel( - ReadOnlySpan source, - Span dest, - byte control) - { -#if NETCOREAPP - // The JIT can detect and optimize rotation idioms ROTL (Rotate Left) - // and ROTR (Rotate Right) emitting efficient CPU instructions: - // https://github.com/dotnet/coreclr/pull/1830 - switch (control) - { - case Shuffle.WXYZ: - WXYZ(source, dest); - return; - case Shuffle.WZYX: - WZYX(source, dest); - return; - case Shuffle.YZWX: - YZWX(source, dest); - return; - case Shuffle.ZYXW: - ZYXW(source, dest); - return; - } -#endif - - ref byte sBase = ref MemoryMarshal.GetReference(source); - ref byte dBase = ref MemoryMarshal.GetReference(dest); - Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0); - - for (int i = 0; i < source.Length; i += 4) - { - Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i); - Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i); - Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i); - Unsafe.Add(ref dBase, i + 3) = Unsafe.Add(ref sBase, p3 + i); - } - } - - [MethodImpl(InliningOptions.ShortMethod)] - private static void WXYZ(ReadOnlySpan source, Span dest) - { - ReadOnlySpan s = MemoryMarshal.Cast(source); - Span d = MemoryMarshal.Cast(dest); - ref uint sBase = ref MemoryMarshal.GetReference(s); - ref uint dBase = ref MemoryMarshal.GetReference(d); - - for (int i = 0; i < s.Length; i++) - { - uint packed = Unsafe.Add(ref sBase, i); - - // packed = [W Z Y X] - // ROTL(8, packed) = [Z Y X W] - Unsafe.Add(ref dBase, i) = (packed << 8) | (packed >> 24); - } - } - - [MethodImpl(InliningOptions.ShortMethod)] - private static void ZYXW(ReadOnlySpan source, Span dest) - { - ReadOnlySpan s = MemoryMarshal.Cast(source); - Span d = MemoryMarshal.Cast(dest); - ref uint sBase = ref MemoryMarshal.GetReference(s); - ref uint dBase = ref MemoryMarshal.GetReference(d); - - for (int i = 0; i < s.Length; i++) - { - uint packed = Unsafe.Add(ref sBase, i); - - // packed = [W Z Y X] - // tmp1 = [W 0 Y 0] - // tmp2 = [0 Z 0 X] - // tmp3=ROTL(16, tmp2) = [0 X 0 Z] - // tmp1 + tmp3 = [W X Y Z] - uint tmp1 = packed & 0xFF00FF00; - uint tmp2 = packed & 0x00FF00FF; - uint tmp3 = (tmp2 << 16) | (tmp2 >> 16); - - Unsafe.Add(ref dBase, i) = tmp1 + tmp3; - } - } - - [MethodImpl(InliningOptions.ShortMethod)] - private static void WZYX(ReadOnlySpan source, Span dest) - { - ReadOnlySpan s = MemoryMarshal.Cast(source); - Span d = MemoryMarshal.Cast(dest); - ref uint sBase = ref MemoryMarshal.GetReference(s); - ref uint dBase = ref MemoryMarshal.GetReference(d); - - for (int i = 0; i < s.Length; i++) - { - uint packed = Unsafe.Add(ref sBase, i); - - // packed = [W Z Y X] - // REVERSE(packedArgb) = [X Y Z W] - Unsafe.Add(ref dBase, i) = BinaryPrimitives.ReverseEndianness(packed); - } - } - - [MethodImpl(InliningOptions.ShortMethod)] - private static void YZWX(ReadOnlySpan source, Span dest) - { - ReadOnlySpan s = MemoryMarshal.Cast(source); - Span d = MemoryMarshal.Cast(dest); - ref uint sBase = ref MemoryMarshal.GetReference(s); - ref uint dBase = ref MemoryMarshal.GetReference(d); - - for (int i = 0; i < s.Length; i++) - { - uint packed = Unsafe.Add(ref sBase, i); - - // packed = [W Z Y X] - // ROTR(8, packedArgb) = [Y Z W X] - Unsafe.Add(ref dBase, i) = (packed >> 8) | (packed << 24); - } - } - [Conditional("DEBUG")] private static void VerifyShuffleSpanInput(ReadOnlySpan source, Span dest) where T : struct @@ -222,12 +100,6 @@ namespace SixLabors.ImageSharp public static class Shuffle { - public const byte WXYZ = (2 << 6) | (1 << 4) | (0 << 2) | 3; - public const byte WZYX = (0 << 6) | (1 << 4) | (2 << 2) | 3; - public const byte XYZW = (3 << 6) | (2 << 4) | (1 << 2) | 0; - public const byte YZWX = (0 << 6) | (3 << 4) | (2 << 2) | 1; - public const byte ZYXW = (3 << 6) | (0 << 4) | (1 << 2) | 2; - [MethodImpl(InliningOptions.ShortMethod)] public static byte MmShuffle(byte p3, byte p2, byte p1, byte p0) => (byte)((p3 << 6) | (p2 << 4) | (p1 << 2) | p0); diff --git a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs index bc24258c9..ab9011a5c 100644 --- a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs +++ b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs @@ -2,7 +2,6 @@ // Licensed under the Apache License, Version 2.0. using System; -using System.Buffers.Binary; using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp.PixelFormats.Utils @@ -28,7 +27,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToArgb32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.WXYZ); + => SimdUtils.Shuffle4Channel(source, dest, default); /// /// Converts a representing a collection of @@ -37,7 +36,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgra32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.ZYXW); + => SimdUtils.Shuffle4Channel(source, dest, default); } public static class FromArgb32 @@ -49,7 +48,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgba32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.YZWX); + => SimdUtils.Shuffle4Channel(source, dest, default); /// /// Converts a representing a collection of @@ -58,7 +57,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgra32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.WZYX); + => SimdUtils.Shuffle4Channel(source, dest, default); } public static class FromBgra32 @@ -70,7 +69,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToArgb32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.WZYX); + => SimdUtils.Shuffle4Channel(source, dest, default); /// /// Converts a representing a collection of @@ -79,7 +78,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgba32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, SimdUtils.Shuffle.ZYXW); + => SimdUtils.Shuffle4Channel(source, dest, default); } } } diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs index c45b103e3..bd4a8d534 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs @@ -26,7 +26,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk [Benchmark] public void Shuffle4Channel() { - SimdUtils.Shuffle4Channel(this.source, this.destination, SimdUtils.Shuffle.WXYZ); + SimdUtils.Shuffle4Channel(this.source, this.destination, default); } } diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs index 36b9591d9..04c6dbf21 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs @@ -26,7 +26,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk [Benchmark] public void Shuffle4Channel() { - SimdUtils.Shuffle4Channel(this.source, this.destination, SimdUtils.Shuffle.WXYZ); + SimdUtils.Shuffle4Channel(this.source, this.destination, default(WXYZShuffle4).Control); } } diff --git a/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs b/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs index e8a06bf24..eacd36799 100644 --- a/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs +++ b/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs @@ -58,6 +58,12 @@ namespace SixLabors.ImageSharp.Benchmarks { public HwIntrinsics_SSE_AVX() { + this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31) + .WithEnvironmentVariables( + new EnvironmentVariable(EnableHWIntrinsic, Off), + new EnvironmentVariable(FeatureSIMD, Off)) + .WithId("No HwIntrinsics")); + #if SUPPORTS_RUNTIME_INTRINSICS if (Avx.IsSupported) { @@ -72,11 +78,6 @@ namespace SixLabors.ImageSharp.Benchmarks .WithId("SSE")); } #endif - this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31) - .WithEnvironmentVariables( - new EnvironmentVariable(EnableHWIntrinsic, Off), - new EnvironmentVariable(FeatureSIMD, Off)) - .WithId("No HwIntrinsics")); } } } diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs index 94298f94c..06f61e617 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -9,66 +9,91 @@ namespace SixLabors.ImageSharp.Tests.Common { public partial class SimdUtilsTests { - public static readonly TheoryData ShuffleControls = - new TheoryData - { - SimdUtils.Shuffle.WXYZ, - SimdUtils.Shuffle.WZYX, - SimdUtils.Shuffle.XYZW, - SimdUtils.Shuffle.YZWX, - SimdUtils.Shuffle.ZYXW, - SimdUtils.Shuffle.MmShuffle(2, 1, 3, 0), - SimdUtils.Shuffle.MmShuffle(1, 1, 1, 1), - SimdUtils.Shuffle.MmShuffle(3, 3, 3, 3) - }; - [Theory] - [MemberData(nameof(ShuffleControls))] - public void BulkShuffleFloat4Channel(byte control) + [MemberData(nameof(ArraySizesDivisibleBy4))] + public void BulkShuffleFloat4Channel(int count) { static void RunTest(string serialized) { - byte ctrl = FeatureTestRunner.Deserialize(serialized); - foreach (var item in ArraySizesDivisibleBy4) - { - foreach (var count in item) - { - TestShuffleFloat4Channel( - (int)count, - (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, ctrl), - ctrl); - } - } + // No need to test multiple shuffle controls as the + // pipeline is always the same. + int size = FeatureTestRunner.Deserialize(serialized); + byte control = default(WZYXShuffle4).Control; + + TestShuffleFloat4Channel( + size, + (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, control), + control); } FeatureTestRunner.RunWithHwIntrinsicsFeature( RunTest, - control, + count, HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE); } [Theory] - [MemberData(nameof(ShuffleControls))] - public void BulkShuffleByte4Channel(byte control) + [MemberData(nameof(ArraySizesDivisibleBy4))] + public void BulkShuffleByte4Channel(int count) { static void RunTest(string serialized) { - byte ctrl = FeatureTestRunner.Deserialize(serialized); + int size = FeatureTestRunner.Deserialize(serialized); foreach (var item in ArraySizesDivisibleBy4) { + // These cannot be expressed as a theory as you cannot + // use RemoteExecutor within generic methods nor pass + // IComponentShuffle to the generic utils method. foreach (var count in item) { + WXYZShuffle4 wxyz = default; + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, wxyz), + wxyz.Control); + + WZYXShuffle4 wzyx = default; TestShuffleByte4Channel( - (int)count, - (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, ctrl), - ctrl); + size, + (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, wzyx), + wzyx.Control); + + YZWXShuffle4 yzwx = default; + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, yzwx), + yzwx.Control); + + ZYXWShuffle4 zyxw = default; + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, zyxw), + zyxw.Control); + + var xwyz = new DefaultShuffle4(2, 1, 3, 0); + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, xwyz), + xwyz.Control); + + var yyyy = new DefaultShuffle4(1, 1, 1, 1); + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, yyyy), + yyyy.Control); + + var wwww = new DefaultShuffle4(3, 3, 3, 3); + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, wwww), + wwww.Control); } } } FeatureTestRunner.RunWithHwIntrinsicsFeature( RunTest, - control, + count, HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE); }