diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs index 4f3d732b49..2810a212c6 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs @@ -3,10 +3,29 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using SixLabors.ImageSharp.PixelFormats; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif + namespace SixLabors.ImageSharp { internal static partial class SimdUtils { + private static ReadOnlySpan ShuffleMaskShiftAlpha => + new byte[] + { + 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15, + 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15 + }; + + public static ReadOnlySpan PermuteMaskShiftAlpha8x32 => + new byte[] + { + 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, + 5, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 + }; + [MethodImpl(InliningOptions.ShortMethod)] internal static void PackFromRgbPlanes( Configuration configuration, @@ -15,7 +34,17 @@ namespace SixLabors.ImageSharp ReadOnlySpan blueChannel, Span destination) { - PackFromRgbPlanesScalarBatchedReduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination); +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx2.IsSupported) + { + PackFromRgbPlanesAvx2Reduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination); + } + else +#endif + { + PackFromRgbPlanesScalarBatchedReduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination); + } + PackFromRgbPlanesRemainder(redChannel, greenChannel, blueChannel, destination); } @@ -29,6 +58,81 @@ namespace SixLabors.ImageSharp { } +#if SUPPORTS_RUNTIME_INTRINSICS + internal static void PackFromRgbPlanesAvx2Reduce( + ref ReadOnlySpan redChannel, + ref ReadOnlySpan greenChannel, + ref ReadOnlySpan blueChannel, + ref Span destination) + { + ref Vector256 rBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(redChannel)); + ref Vector256 gBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(greenChannel)); + ref Vector256 bBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(blueChannel)); + ref byte dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); + + int count = redChannel.Length / Vector256.Count; + + ref byte control1Bytes = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 control1 = Unsafe.As>(ref control1Bytes); + + ref byte control2Bytes = ref MemoryMarshal.GetReference(PermuteMaskShiftAlpha8x32); + Vector256 control2 = Unsafe.As>(ref control2Bytes); + + Vector256 a = Vector256.Create((byte)255); + + Vector256 shuffleAlpha = Unsafe.As>(ref MemoryMarshal.GetReference(ShuffleMaskShiftAlpha)); + + for (int i = 0; i < count; i++) + { + Vector256 r0 = Unsafe.Add(ref rBase, i); + Vector256 g0 = Unsafe.Add(ref gBase, i); + Vector256 b0 = Unsafe.Add(ref bBase, i); + + r0 = Avx2.PermuteVar8x32(r0.AsUInt32(), control1).AsByte(); + g0 = Avx2.PermuteVar8x32(g0.AsUInt32(), control1).AsByte(); + b0 = Avx2.PermuteVar8x32(b0.AsUInt32(), control1).AsByte(); + + Vector256 rg = Avx2.UnpackLow(r0, g0); + Vector256 b1 = Avx2.UnpackLow(b0, a); + + Vector256 rgb1 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + Vector256 rgb2 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + + rg = Avx2.UnpackHigh(r0, g0); + b1 = Avx2.UnpackHigh(b0, a); + + Vector256 rgb3 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + Vector256 rgb4 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + + rgb1 = Avx2.Shuffle(rgb1, shuffleAlpha); + rgb2 = Avx2.Shuffle(rgb2, shuffleAlpha); + rgb3 = Avx2.Shuffle(rgb3, shuffleAlpha); + rgb4 = Avx2.Shuffle(rgb4, shuffleAlpha); + + rgb1 = Avx2.PermuteVar8x32(rgb1.AsUInt32(), control2).AsByte(); + rgb2 = Avx2.PermuteVar8x32(rgb2.AsUInt32(), control2).AsByte(); + rgb3 = Avx2.PermuteVar8x32(rgb3.AsUInt32(), control2).AsByte(); + rgb4 = Avx2.PermuteVar8x32(rgb4.AsUInt32(), control2).AsByte(); + + ref byte d1 = ref Unsafe.Add(ref dBase, 24 * 4 * i); + ref byte d2 = ref Unsafe.Add(ref d1, 24); + ref byte d3 = ref Unsafe.Add(ref d2, 24); + ref byte d4 = ref Unsafe.Add(ref d3, 24); + + Unsafe.As>(ref d1) = rgb1; + Unsafe.As>(ref d2) = rgb2; + Unsafe.As>(ref d3) = rgb3; + Unsafe.As>(ref d4) = rgb4; + } + + int slice = count * Vector256.Count; + redChannel = redChannel.Slice(slice); + greenChannel = greenChannel.Slice(slice); + blueChannel = blueChannel.Slice(slice); + destination = destination.Slice(slice); + } +#endif + private static void PackFromRgbPlanesScalarBatchedReduce( ref ReadOnlySpan redChannel, ref ReadOnlySpan greenChannel, diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs index 38b4e2e8c6..878e55c873 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs @@ -6,6 +6,7 @@ using System.Linq; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; using SixLabors.ImageSharp.Common.Tuples; using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.Tests.TestUtilities; @@ -170,7 +171,7 @@ namespace SixLabors.ImageSharp.Tests.Common public static readonly TheoryData ArbitraryArraySizes = new TheoryData { - 0, 1, 2, 3, 4, 7, 8, 9, 15, 16, 17, 63, 64, 255, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 520, + 0, 1, 2, 3, 4, 7, 8, 9, 15, 16, 17, 63, 64, 255, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, }; [Theory] @@ -357,6 +358,42 @@ namespace SixLabors.ImageSharp.Tests.Common SimdUtils.PackFromRgbPlanes(Configuration.Default, r, g, b, actual)); } +#if SUPPORTS_RUNTIME_INTRINSICS + [Fact] + public void PackFromRgbPlanesAvx2Reduce_Rgb24() + { + if (!Avx2.IsSupported) + { + return; + } + + byte[] r = Enumerable.Range(0, 32).Select(x => (byte)x).ToArray(); + byte[] g = Enumerable.Range(100, 32).Select(x => (byte)x).ToArray(); + byte[] b = Enumerable.Range(200, 32).Select(x => (byte)x).ToArray(); + const int padding = 4; + Rgb24[] d = new Rgb24[32 + padding]; + + ReadOnlySpan rr = r.AsSpan(); + ReadOnlySpan gg = g.AsSpan(); + ReadOnlySpan bb = b.AsSpan(); + Span dd = d.AsSpan(); + + SimdUtils.PackFromRgbPlanesAvx2Reduce(ref rr, ref gg, ref bb, ref dd); + + for (int i = 0; i < 32; i++) + { + Assert.Equal(i, d[i].R); + Assert.Equal(i + 100, d[i].G); + Assert.Equal(i + 200, d[i].B); + } + + Assert.Equal(0, rr.Length); + Assert.Equal(0, gg.Length); + Assert.Equal(0, bb.Length); + Assert.Equal(padding, dd.Length); + } +#endif + internal static void TestPackFromRgbPlanes(int count, Action packMethod) where TPixel : unmanaged, IPixel {