diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs index 3f045a5799..1a4c6ab446 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs @@ -40,6 +40,11 @@ namespace SixLabors.ImageSharp public DefaultShuffle4(byte p3, byte p2, byte p1, byte p0) { + Guard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); + Guard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); + Guard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); + Guard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); + this.p3 = p3; this.p2 = p2; this.p1 = p1; diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs index 97bd5aa725..b223a6bc27 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs @@ -21,6 +21,11 @@ namespace SixLabors.ImageSharp public DefaultPad3Shuffle4(byte p3, byte p2, byte p1, byte p0) { + Guard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); + Guard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); + Guard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); + Guard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); + this.p3 = p3; this.p2 = p2; this.p1 = p1; diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs new file mode 100644 index 0000000000..fa4260e63d --- /dev/null +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs @@ -0,0 +1,90 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace SixLabors.ImageSharp +{ + /// + internal interface IShuffle3 : IComponentShuffle + { + } + + internal readonly struct DefaultShuffle3 : IShuffle3 + { + private readonly byte p2; + private readonly byte p1; + private readonly byte p0; + + public DefaultShuffle3(byte p2, byte p1, byte p0) + { + Guard.MustBeBetweenOrEqualTo(p2, 0, 2, nameof(p2)); + Guard.MustBeBetweenOrEqualTo(p1, 0, 2, nameof(p1)); + Guard.MustBeBetweenOrEqualTo(p0, 0, 2, nameof(p0)); + + this.p2 = p2; + this.p1 = p1; + this.p0 = p0; + this.Control = SimdUtils.Shuffle.MmShuffle(3, p2, p1, p0); + } + + public byte Control { get; } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); + + int p2 = this.p2; + int p1 = this.p1; + int p0 = this.p0; + + for (int i = 0; i < source.Length; i += 3) + { + Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i); + Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i); + Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i); + } + } + } + + internal readonly struct ZYXShuffle3 : IShuffle3 + { + private static readonly byte ZYX = SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2); + + public byte Control => ZYX; + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref Byte3 sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); + ref Byte3 dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + int n = source.Length / 3; + + for (int i = 0; i < n; i++) + { + uint packed = Unsafe.As(ref Unsafe.Add(ref sBase, i)); + + // packed = [W Z Y X] + // tmp1 = [W 0 Y 0] + // tmp2 = [0 Z 0 X] + // tmp3=ROTL(16, tmp2) = [0 X 0 Z] + // tmp1 + tmp3 = [W X Y Z] + uint tmp1 = packed & 0xFF00FF00; + uint tmp2 = packed & 0x00FF00FF; + uint tmp3 = (tmp2 << 16) | (tmp2 >> 16); + packed = tmp1 + tmp3; + + Unsafe.Add(ref dBase, i) = Unsafe.As(ref packed); + } + } + } + + [StructLayout(LayoutKind.Explicit, Size = 3)] + internal readonly struct Byte3 + { + } +} diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs index c65c50f684..1ceb38f1a4 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs @@ -20,6 +20,11 @@ namespace SixLabors.ImageSharp public DefaultShuffle4Slice3(byte p3, byte p2, byte p1, byte p0) { + Guard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); + Guard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); + Guard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); + Guard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); + this.p2 = p2; this.p1 = p1; this.p0 = p0; @@ -62,13 +67,8 @@ namespace SixLabors.ImageSharp int n = source.Length / 4; for (int i = 0, j = 0; i < n; i++, j += 3) { - Unsafe.As(ref Unsafe.Add(ref dBase, j)) = Unsafe.As(ref Unsafe.Add(ref sBase, i)); + Unsafe.As(ref Unsafe.Add(ref dBase, j)) = Unsafe.As(ref Unsafe.Add(ref sBase, i)); } } } - - [StructLayout(LayoutKind.Explicit, Size = 3)] - internal readonly struct Xyz24 - { - } } diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index abda6c4df6..974516c3e5 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -86,6 +86,38 @@ namespace SixLabors.ImageSharp } } + /// + /// Shuffles 8-bit integer triplets within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The byte control. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle3Reduce( + ref ReadOnlySpan source, + ref Span dest, + byte control) + { + if (Ssse3.IsSupported) + { + int remainder = source.Length % (Vector128.Count * 3); + + int adjustedCount = source.Length - remainder; + + if (adjustedCount > 0) + { + Shuffle3( + source.Slice(0, adjustedCount), + dest.Slice(0, adjustedCount), + control); + + source = source.Slice(adjustedCount); + dest = dest.Slice(adjustedCount); + } + } + } + /// /// Pads then shuffles 8-bit integers within 128-bit lanes in /// using the control and store the results in . @@ -94,7 +126,7 @@ namespace SixLabors.ImageSharp /// The destination span of bytes. /// The byte control. [MethodImpl(InliningOptions.ShortMethod)] - public static unsafe void Pad3Shuffle4Reduce( + public static void Pad3Shuffle4Reduce( ref ReadOnlySpan source, ref Span dest, byte control) @@ -127,7 +159,7 @@ namespace SixLabors.ImageSharp /// The destination span of bytes. /// The byte control. [MethodImpl(InliningOptions.ShortMethod)] - public static unsafe void Shuffle4Slice3Reduce( + public static void Shuffle4Slice3Reduce( ref ReadOnlySpan source, ref Span dest, byte control) @@ -313,7 +345,69 @@ namespace SixLabors.ImageSharp } [MethodImpl(InliningOptions.ShortMethod)] - private static unsafe void Pad3Shuffle4( + private static void Shuffle3( + ReadOnlySpan source, + Span dest, + byte control) + { + if (Ssse3.IsSupported) + { + Vector128 vmask = Vector128.Create(0, 1, 2, 0x80, 3, 4, 5, 0x80, 6, 7, 8, 0x80, 9, 10, 11, 0x80).AsByte(); + Vector128 vfill = Vector128.Create(0xff000000ff000000ul).AsByte(); + Vector128 vmasko = Vector128.Create(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15).AsByte(); + Vector128 vmaske = Ssse3.AlignRight(vmasko, vmasko, 12).AsByte(); + + Span bytes = stackalloc byte[Vector128.Count]; + Shuffle.MmShuffleSpan(ref bytes, control); + Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + + ref Vector128 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector128 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + int n = source.Length / Vector128.Count; + + for (int i = 0; i < n; i += 3) + { + ref Vector128 v0 = ref Unsafe.Add(ref sourceBase, i); + Vector128 v1 = Unsafe.Add(ref v0, 1); + Vector128 v2 = Unsafe.Add(ref v0, 2); + Vector128 v3 = Sse2.ShiftRightLogical128BitLane(v2, 4); + + v2 = Ssse3.AlignRight(v2, v1, 8); + v1 = Ssse3.AlignRight(v1, v0, 12); + + v0 = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v0, vmask), vfill), vshuffle); + v1 = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v1, vmask), vfill), vshuffle); + v2 = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v2, vmask), vfill), vshuffle); + v3 = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v3, vmask), vfill), vshuffle); + + v0 = Ssse3.Shuffle(v0, vmaske); + v1 = Ssse3.Shuffle(v1, vmasko); + v2 = Ssse3.Shuffle(v2, vmaske); + v3 = Ssse3.Shuffle(v3, vmasko); + + v0 = Ssse3.AlignRight(v1, v0, 4); + v3 = Ssse3.AlignRight(v3, v2, 12); + + v1 = Sse2.ShiftLeftLogical128BitLane(v1, 4); + v2 = Sse2.ShiftRightLogical128BitLane(v2, 4); + + v1 = Ssse3.AlignRight(v2, v1, 8); + + ref Vector128 vd = ref Unsafe.Add(ref destBase, i); + + vd = v0; + Unsafe.Add(ref vd, 1) = v1; + Unsafe.Add(ref vd, 2) = v3; + } + } + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void Pad3Shuffle4( ReadOnlySpan source, Span dest, byte control) @@ -356,7 +450,7 @@ namespace SixLabors.ImageSharp } [MethodImpl(InliningOptions.ShortMethod)] - private static unsafe void Shuffle4Slice3( + private static void Shuffle4Slice3( ReadOnlySpan source, Span dest, byte control) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index 7ef3be6fe3..79cb0da372 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -23,7 +23,7 @@ namespace SixLabors.ImageSharp Span dest, byte control) { - VerifyShuffleSpanInput(source, dest); + VerifyShuffle4SpanInput(source, dest); #if SUPPORTS_RUNTIME_INTRINSICS HwIntrinsics.Shuffle4Reduce(ref source, ref dest, control); @@ -50,7 +50,7 @@ namespace SixLabors.ImageSharp TShuffle shuffle) where TShuffle : struct, IComponentShuffle { - VerifyShuffleSpanInput(source, dest); + VerifyShuffle4SpanInput(source, dest); #if SUPPORTS_RUNTIME_INTRINSICS HwIntrinsics.Shuffle4Reduce(ref source, ref dest, shuffle.Control); @@ -63,6 +63,33 @@ namespace SixLabors.ImageSharp } } + /// + /// Shuffle 8-bit integer triplets within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The type of shuffle to perform. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle3( + ReadOnlySpan source, + Span dest, + TShuffle shuffle) + where TShuffle : struct, IShuffle3 + { + VerifyShuffle3SpanInput(source, dest); + +#if SUPPORTS_RUNTIME_INTRINSICS + HwIntrinsics.Shuffle3Reduce(ref source, ref dest, shuffle.Control); +#endif + + // Deal with the remainder: + if (source.Length > 0) + { + shuffle.RunFallbackShuffle(source, dest); + } + } + /// /// Pads then shuffles 8-bit integers within 128-bit lanes in /// using the control and store the results in . @@ -136,7 +163,7 @@ namespace SixLabors.ImageSharp } [Conditional("DEBUG")] - private static void VerifyShuffleSpanInput(ReadOnlySpan source, Span dest) + private static void VerifyShuffle4SpanInput(ReadOnlySpan source, Span dest) where T : struct { DebugGuard.IsTrue( @@ -150,6 +177,21 @@ namespace SixLabors.ImageSharp "Input spans must be divisable by 4!"); } + [Conditional("DEBUG")] + private static void VerifyShuffle3SpanInput(ReadOnlySpan source, Span dest) + where T : struct + { + DebugGuard.IsTrue( + source.Length == dest.Length, + nameof(source), + "Input spans must be of same length!"); + + DebugGuard.IsTrue( + source.Length % 3 == 0, + nameof(source), + "Input spans must be divisable by 3!"); + } + [Conditional("DEBUG")] private static void VerifyPad3Shuffle4SpanInput(ReadOnlySpan source, Span dest) { diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs index 29f3925fc9..75d7c87299 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -92,6 +92,48 @@ namespace SixLabors.ImageSharp.Tests.Common HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE); } + [Theory] + [MemberData(nameof(ArraySizesDivisibleBy3))] + public void BulkShuffleByte3Channel(int count) + { + static void RunTest(string serialized) + { + int size = FeatureTestRunner.Deserialize(serialized); + + // These cannot be expressed as a theory as you cannot + // use RemoteExecutor within generic methods nor pass + // IShuffle3 to the generic utils method. + ZYXShuffle3 zyx = default; + TestShuffleByte3Channel( + size, + (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, zyx), + zyx.Control); + + var xyz = new DefaultShuffle3(2, 1, 0); + TestShuffleByte3Channel( + size, + (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, xyz), + xyz.Control); + + var yyy = new DefaultShuffle3(1, 1, 1); + TestShuffleByte3Channel( + size, + (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, yyy), + yyy.Control); + + var zzz = new DefaultShuffle3(2, 2, 2); + TestShuffleByte3Channel( + size, + (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, zzz), + zzz.Control); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + count, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE); + } + [Theory] [MemberData(nameof(ArraySizesDivisibleBy3))] public void BulkPad3Shuffle4Channel(int count) @@ -102,7 +144,7 @@ namespace SixLabors.ImageSharp.Tests.Common // These cannot be expressed as a theory as you cannot // use RemoteExecutor within generic methods nor pass - // IComponentShuffle to the generic utils method. + // IPad3Shuffle4 to the generic utils method. XYZWPad3Shuffle4 xyzw = default; TestPad3Shuffle4Channel( size, @@ -144,7 +186,7 @@ namespace SixLabors.ImageSharp.Tests.Common // These cannot be expressed as a theory as you cannot // use RemoteExecutor within generic methods nor pass - // IComponentShuffle to the generic utils method. + // IShuffle4Slice3 to the generic utils method. XYZWShuffle4Slice3 xyzw = default; TestShuffle4Slice3Channel( size, @@ -237,6 +279,36 @@ namespace SixLabors.ImageSharp.Tests.Common Assert.Equal(expected, result); } + private static void TestShuffleByte3Channel( + int count, + Action, Memory> convert, + byte control) + { + byte[] source = new byte[count]; + new Random(count).NextBytes(source); + var result = new byte[count]; + + byte[] expected = new byte[count]; + + SimdUtils.Shuffle.InverseMmShuffle( + control, + out int _, + out int p2, + out int p1, + out int p0); + + for (int i = 0; i < expected.Length; i += 3) + { + expected[i] = source[p0 + i]; + expected[i + 1] = source[p1 + i]; + expected[i + 2] = source[p2 + i]; + } + + convert(source, result); + + Assert.Equal(expected, result); + } + private static void TestPad3Shuffle4Channel( int count, Action, Memory> convert,