diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index 6b766b88de..4d2678320b 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -2,6 +2,7 @@ // Licensed under the Apache License, Version 2.0. using System; +using System.Buffers.Binary; using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -67,14 +68,53 @@ namespace SixLabors.ImageSharp } [MethodImpl(InliningOptions.ColdPath)] - public static void ShuffleRemainder4Channel( - ReadOnlySpan source, - Span dest, + public static void ShuffleRemainder4Channel( + ReadOnlySpan source, + Span dest, + byte control) + { + ref float sBase = ref MemoryMarshal.GetReference(source); + ref float dBase = ref MemoryMarshal.GetReference(dest); + Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0); + + for (int i = 0; i < source.Length; i += 4) + { + Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i); + Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i); + Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i); + Unsafe.Add(ref dBase, i + 3) = Unsafe.Add(ref sBase, p3 + i); + } + } + + [MethodImpl(InliningOptions.ColdPath)] + public static void ShuffleRemainder4Channel( + ReadOnlySpan source, + Span dest, byte control) - where T : struct { - ref T sBase = ref MemoryMarshal.GetReference(source); - ref T dBase = ref MemoryMarshal.GetReference(dest); +#if NETCOREAPP + // The JIT can detect and optimize rotation idioms ROTL (Rotate Left) + // and ROTR (Rotate Right) emitting efficient CPU instructions: + // https://github.com/dotnet/coreclr/pull/1830 + switch (control) + { + case Shuffle.WXYZ: + WXYZ(source, dest); + return; + case Shuffle.WZYX: + WZYX(source, dest); + return; + case Shuffle.YZWX: + YZWX(source, dest); + return; + case Shuffle.ZYXW: + ZYXW(source, dest); + return; + } +#endif + + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0); for (int i = 0; i < source.Length; i += 4) @@ -86,6 +126,85 @@ namespace SixLabors.ImageSharp } } + [MethodImpl(InliningOptions.ShortMethod)] + private static void WXYZ(ReadOnlySpan source, Span dest) + { + ReadOnlySpan s = MemoryMarshal.Cast(source); + Span d = MemoryMarshal.Cast(dest); + ref uint sBase = ref MemoryMarshal.GetReference(s); + ref uint dBase = ref MemoryMarshal.GetReference(d); + + for (int i = 0; i < s.Length; i++) + { + uint packed = Unsafe.Add(ref sBase, i); + + // packed = [W Z Y X] + // ROTL(8, packed) = [Z Y X W] + Unsafe.Add(ref dBase, i) = (packed << 8) | (packed >> 24); + } + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void ZYXW(ReadOnlySpan source, Span dest) + { + ReadOnlySpan s = MemoryMarshal.Cast(source); + Span d = MemoryMarshal.Cast(dest); + ref uint sBase = ref MemoryMarshal.GetReference(s); + ref uint dBase = ref MemoryMarshal.GetReference(d); + + for (int i = 0; i < s.Length; i++) + { + uint packed = Unsafe.Add(ref sBase, i); + + // packed = [W Z Y X] + // tmp1 = [W 0 Y 0] + // tmp2 = [0 Z 0 X] + // tmp3=ROTL(16, tmp2) = [0 X 0 Z] + // tmp1 + tmp3 = [W X Y Z] + uint tmp1 = packed & 0xFF00FF00; + uint tmp2 = packed & 0x00FF00FF; + uint tmp3 = (tmp2 << 16) | (tmp2 >> 16); + + Unsafe.Add(ref dBase, i) = tmp1 + tmp3; + } + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void WZYX(ReadOnlySpan source, Span dest) + { + ReadOnlySpan s = MemoryMarshal.Cast(source); + Span d = MemoryMarshal.Cast(dest); + ref uint sBase = ref MemoryMarshal.GetReference(s); + ref uint dBase = ref MemoryMarshal.GetReference(d); + + for (int i = 0; i < s.Length; i++) + { + uint packed = Unsafe.Add(ref sBase, i); + + // packed = [W Z Y X] + // REVERSE(packedArgb) = [X Y Z W] + Unsafe.Add(ref dBase, i) = BinaryPrimitives.ReverseEndianness(packed); + } + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void YZWX(ReadOnlySpan source, Span dest) + { + ReadOnlySpan s = MemoryMarshal.Cast(source); + Span d = MemoryMarshal.Cast(dest); + ref uint sBase = ref MemoryMarshal.GetReference(s); + ref uint dBase = ref MemoryMarshal.GetReference(d); + + for (int i = 0; i < s.Length; i++) + { + uint packed = Unsafe.Add(ref sBase, i); + + // packed = [W Z Y X] + // ROTR(8, packedArgb) = [Y Z W X] + Unsafe.Add(ref dBase, i) = (packed >> 8) | (packed << 24); + } + } + [Conditional("DEBUG")] private static void VerifyShuffleSpanInput(ReadOnlySpan source, Span dest) where T : struct @@ -104,7 +223,9 @@ namespace SixLabors.ImageSharp public static class Shuffle { public const byte WXYZ = (2 << 6) | (1 << 4) | (0 << 2) | 3; + public const byte WZYX = (0 << 6) | (1 << 4) | (2 << 2) | 3; public const byte XYZW = (3 << 6) | (2 << 4) | (1 << 2) | 0; + public const byte YZWX = (0 << 6) | (3 << 4) | (2 << 2) | 1; public const byte ZYXW = (3 << 6) | (0 << 4) | (1 << 2) | 2; [MethodImpl(InliningOptions.ShortMethod)] diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs index e07bcf257f..cdb6a86dfc 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -13,7 +13,9 @@ namespace SixLabors.ImageSharp.Tests.Common new TheoryData { SimdUtils.Shuffle.WXYZ, + SimdUtils.Shuffle.WZYX, SimdUtils.Shuffle.XYZW, + SimdUtils.Shuffle.YZWX, SimdUtils.Shuffle.ZYXW, SimdUtils.Shuffle.MmShuffle(2, 1, 3, 0), SimdUtils.Shuffle.MmShuffle(1, 1, 1, 1),