From 9e0b7fc87464cc7a031ae6ba61f2312efc69e275 Mon Sep 17 00:00:00 2001 From: Anton Firszov Date: Sat, 5 Dec 2020 20:38:47 +0100 Subject: [PATCH] started Rgba32 --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 88 +++++++++++ .../Common/Helpers/SimdUtils.Pack.cs | 147 ++++++++---------- .../PixelFormats/PixelOperations{TPixel}.cs | 5 +- .../PixelConversion_PackFromRgbPlanes.cs | 22 ++- .../ImageSharp.Tests/Common/SimdUtilsTests.cs | 2 +- 5 files changed, 176 insertions(+), 88 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index b760301167..13effce3e0 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -7,6 +7,7 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; +using SixLabors.ImageSharp.PixelFormats; namespace SixLabors.ImageSharp { @@ -22,6 +23,20 @@ namespace SixLabors.ImageSharp private static ReadOnlySpan ShuffleMaskSlice4Nx16 => new byte[] { 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 0x80, 0x80, 0x80, 0x80 }; + private static ReadOnlySpan ShuffleMaskShiftAlpha => + new byte[] + { + 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15, + 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15 + }; + + public static ReadOnlySpan PermuteMaskShiftAlpha8x32 => + new byte[] + { + 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, + 5, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 + }; + /// /// Shuffle single-precision (32-bit) floating-point elements in /// using the control and store the results in . @@ -789,6 +804,79 @@ namespace SixLabors.ImageSharp } } } + + internal static void PackFromRgbPlanesAvx2Reduce( + ref ReadOnlySpan redChannel, + ref ReadOnlySpan greenChannel, + ref ReadOnlySpan blueChannel, + ref Span destination) + { + ref Vector256 rBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(redChannel)); + ref Vector256 gBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(greenChannel)); + ref Vector256 bBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(blueChannel)); + ref byte dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); + + int count = redChannel.Length / Vector256.Count; + + ref byte control1Bytes = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 control1 = Unsafe.As>(ref control1Bytes); + + ref byte control2Bytes = ref MemoryMarshal.GetReference(PermuteMaskShiftAlpha8x32); + Vector256 control2 = Unsafe.As>(ref control2Bytes); + + Vector256 a = Vector256.Create((byte)255); + + Vector256 shuffleAlpha = Unsafe.As>(ref MemoryMarshal.GetReference(ShuffleMaskShiftAlpha)); + + for (int i = 0; i < count; i++) + { + Vector256 r0 = Unsafe.Add(ref rBase, i); + Vector256 g0 = Unsafe.Add(ref gBase, i); + Vector256 b0 = Unsafe.Add(ref bBase, i); + + r0 = Avx2.PermuteVar8x32(r0.AsUInt32(), control1).AsByte(); + g0 = Avx2.PermuteVar8x32(g0.AsUInt32(), control1).AsByte(); + b0 = Avx2.PermuteVar8x32(b0.AsUInt32(), control1).AsByte(); + + Vector256 rg = Avx2.UnpackLow(r0, g0); + Vector256 b1 = Avx2.UnpackLow(b0, a); + + Vector256 rgb1 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + Vector256 rgb2 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + + rg = Avx2.UnpackHigh(r0, g0); + b1 = Avx2.UnpackHigh(b0, a); + + Vector256 rgb3 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + Vector256 rgb4 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + + rgb1 = Avx2.Shuffle(rgb1, shuffleAlpha); + rgb2 = Avx2.Shuffle(rgb2, shuffleAlpha); + rgb3 = Avx2.Shuffle(rgb3, shuffleAlpha); + rgb4 = Avx2.Shuffle(rgb4, shuffleAlpha); + + rgb1 = Avx2.PermuteVar8x32(rgb1.AsUInt32(), control2).AsByte(); + rgb2 = Avx2.PermuteVar8x32(rgb2.AsUInt32(), control2).AsByte(); + rgb3 = Avx2.PermuteVar8x32(rgb3.AsUInt32(), control2).AsByte(); + rgb4 = Avx2.PermuteVar8x32(rgb4.AsUInt32(), control2).AsByte(); + + ref byte d1 = ref Unsafe.Add(ref dBase, 24 * 4 * i); + ref byte d2 = ref Unsafe.Add(ref d1, 24); + ref byte d3 = ref Unsafe.Add(ref d2, 24); + ref byte d4 = ref Unsafe.Add(ref d3, 24); + + Unsafe.As>(ref d1) = rgb1; + Unsafe.As>(ref d2) = rgb2; + Unsafe.As>(ref d3) = rgb3; + Unsafe.As>(ref d4) = rgb4; + } + + int slice = count * Vector256.Count; + redChannel = redChannel.Slice(slice); + greenChannel = greenChannel.Slice(slice); + blueChannel = blueChannel.Slice(slice); + destination = destination.Slice(slice); + } } } } diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs index 2810a212c6..db88ef3d91 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs @@ -12,20 +12,6 @@ namespace SixLabors.ImageSharp { internal static partial class SimdUtils { - private static ReadOnlySpan ShuffleMaskShiftAlpha => - new byte[] - { - 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15, - 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15 - }; - - public static ReadOnlySpan PermuteMaskShiftAlpha8x32 => - new byte[] - { - 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, - 5, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 - }; - [MethodImpl(InliningOptions.ShortMethod)] internal static void PackFromRgbPlanes( Configuration configuration, @@ -34,10 +20,17 @@ namespace SixLabors.ImageSharp ReadOnlySpan blueChannel, Span destination) { + int count = redChannel.Length; + DebugGuard.IsTrue(greenChannel.Length == count, "Channels must be of same size!"); + DebugGuard.IsTrue(blueChannel.Length == count, "Channels must be of same size!"); + + // To avoid overflows, this check is not debug-only: + Guard.IsTrue(destination.Length > count + 2, nameof(destination), "'destination' must contain a padding of 3 elements!"); + #if SUPPORTS_RUNTIME_INTRINSICS if (Avx2.IsSupported) { - PackFromRgbPlanesAvx2Reduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination); + HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination); } else #endif @@ -56,101 +49,76 @@ namespace SixLabors.ImageSharp ReadOnlySpan blueChannel, Span destination) { + PackFromRgbPlanesScalarBatchedReduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination); + PackFromRgbPlanesRemainder(redChannel, greenChannel, blueChannel, destination); } -#if SUPPORTS_RUNTIME_INTRINSICS - internal static void PackFromRgbPlanesAvx2Reduce( + private static void PackFromRgbPlanesScalarBatchedReduce( ref ReadOnlySpan redChannel, ref ReadOnlySpan greenChannel, ref ReadOnlySpan blueChannel, ref Span destination) { - ref Vector256 rBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(redChannel)); - ref Vector256 gBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(greenChannel)); - ref Vector256 bBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(blueChannel)); - ref byte dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); - - int count = redChannel.Length / Vector256.Count; - - ref byte control1Bytes = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskEvenOdd8x32); - Vector256 control1 = Unsafe.As>(ref control1Bytes); - - ref byte control2Bytes = ref MemoryMarshal.GetReference(PermuteMaskShiftAlpha8x32); - Vector256 control2 = Unsafe.As>(ref control2Bytes); - - Vector256 a = Vector256.Create((byte)255); - - Vector256 shuffleAlpha = Unsafe.As>(ref MemoryMarshal.GetReference(ShuffleMaskShiftAlpha)); + ref ByteTuple4 r = ref Unsafe.As(ref MemoryMarshal.GetReference(redChannel)); + ref ByteTuple4 g = ref Unsafe.As(ref MemoryMarshal.GetReference(greenChannel)); + ref ByteTuple4 b = ref Unsafe.As(ref MemoryMarshal.GetReference(blueChannel)); + ref Rgb24 rgb = ref MemoryMarshal.GetReference(destination); + int count = destination.Length / 4; for (int i = 0; i < count; i++) { - Vector256 r0 = Unsafe.Add(ref rBase, i); - Vector256 g0 = Unsafe.Add(ref gBase, i); - Vector256 b0 = Unsafe.Add(ref bBase, i); - - r0 = Avx2.PermuteVar8x32(r0.AsUInt32(), control1).AsByte(); - g0 = Avx2.PermuteVar8x32(g0.AsUInt32(), control1).AsByte(); - b0 = Avx2.PermuteVar8x32(b0.AsUInt32(), control1).AsByte(); - - Vector256 rg = Avx2.UnpackLow(r0, g0); - Vector256 b1 = Avx2.UnpackLow(b0, a); - - Vector256 rgb1 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte(); - Vector256 rgb2 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte(); - - rg = Avx2.UnpackHigh(r0, g0); - b1 = Avx2.UnpackHigh(b0, a); + ref Rgb24 d0 = ref Unsafe.Add(ref rgb, i * 4); + ref Rgb24 d1 = ref Unsafe.Add(ref d0, 1); + ref Rgb24 d2 = ref Unsafe.Add(ref d0, 2); + ref Rgb24 d3 = ref Unsafe.Add(ref d0, 3); - Vector256 rgb3 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte(); - Vector256 rgb4 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + ref ByteTuple4 rr = ref Unsafe.Add(ref r, i); + ref ByteTuple4 gg = ref Unsafe.Add(ref g, i); + ref ByteTuple4 bb = ref Unsafe.Add(ref b, i); - rgb1 = Avx2.Shuffle(rgb1, shuffleAlpha); - rgb2 = Avx2.Shuffle(rgb2, shuffleAlpha); - rgb3 = Avx2.Shuffle(rgb3, shuffleAlpha); - rgb4 = Avx2.Shuffle(rgb4, shuffleAlpha); + d0.R = rr.V0; + d0.G = gg.V0; + d0.B = bb.V0; - rgb1 = Avx2.PermuteVar8x32(rgb1.AsUInt32(), control2).AsByte(); - rgb2 = Avx2.PermuteVar8x32(rgb2.AsUInt32(), control2).AsByte(); - rgb3 = Avx2.PermuteVar8x32(rgb3.AsUInt32(), control2).AsByte(); - rgb4 = Avx2.PermuteVar8x32(rgb4.AsUInt32(), control2).AsByte(); + d1.R = rr.V1; + d1.G = gg.V1; + d1.B = bb.V1; - ref byte d1 = ref Unsafe.Add(ref dBase, 24 * 4 * i); - ref byte d2 = ref Unsafe.Add(ref d1, 24); - ref byte d3 = ref Unsafe.Add(ref d2, 24); - ref byte d4 = ref Unsafe.Add(ref d3, 24); + d2.R = rr.V2; + d2.G = gg.V2; + d2.B = bb.V2; - Unsafe.As>(ref d1) = rgb1; - Unsafe.As>(ref d2) = rgb2; - Unsafe.As>(ref d3) = rgb3; - Unsafe.As>(ref d4) = rgb4; + d3.R = rr.V3; + d3.G = gg.V3; + d3.B = bb.V3; } - int slice = count * Vector256.Count; - redChannel = redChannel.Slice(slice); - greenChannel = greenChannel.Slice(slice); - blueChannel = blueChannel.Slice(slice); - destination = destination.Slice(slice); + int finished = count * 4; + redChannel = redChannel.Slice(finished); + greenChannel = greenChannel.Slice(finished); + blueChannel = blueChannel.Slice(finished); + destination = destination.Slice(finished); } -#endif private static void PackFromRgbPlanesScalarBatchedReduce( ref ReadOnlySpan redChannel, ref ReadOnlySpan greenChannel, ref ReadOnlySpan blueChannel, - ref Span destination) + ref Span destination) { ref ByteTuple4 r = ref Unsafe.As(ref MemoryMarshal.GetReference(redChannel)); ref ByteTuple4 g = ref Unsafe.As(ref MemoryMarshal.GetReference(greenChannel)); ref ByteTuple4 b = ref Unsafe.As(ref MemoryMarshal.GetReference(blueChannel)); - ref Rgb24 rgb = ref MemoryMarshal.GetReference(destination); + ref Rgba32 rgb = ref MemoryMarshal.GetReference(destination); int count = destination.Length / 4; + destination.Fill(new Rgba32(0, 0, 0, 255)); for (int i = 0; i < count; i++) { - ref Rgb24 d0 = ref Unsafe.Add(ref rgb, i * 4); - ref Rgb24 d1 = ref Unsafe.Add(ref d0, 1); - ref Rgb24 d2 = ref Unsafe.Add(ref d0, 2); - ref Rgb24 d3 = ref Unsafe.Add(ref d0, 3); + ref Rgba32 d0 = ref Unsafe.Add(ref rgb, i * 4); + ref Rgba32 d1 = ref Unsafe.Add(ref d0, 1); + ref Rgba32 d2 = ref Unsafe.Add(ref d0, 2); + ref Rgba32 d3 = ref Unsafe.Add(ref d0, 3); ref ByteTuple4 rr = ref Unsafe.Add(ref r, i); ref ByteTuple4 gg = ref Unsafe.Add(ref g, i); @@ -199,5 +167,26 @@ namespace SixLabors.ImageSharp d.B = Unsafe.Add(ref b, i); } } + + private static void PackFromRgbPlanesRemainder( + ReadOnlySpan redChannel, + ReadOnlySpan greenChannel, + ReadOnlySpan blueChannel, + Span destination) + { + ref byte r = ref MemoryMarshal.GetReference(redChannel); + ref byte g = ref MemoryMarshal.GetReference(greenChannel); + ref byte b = ref MemoryMarshal.GetReference(blueChannel); + ref Rgba32 rgba = ref MemoryMarshal.GetReference(destination); + + for (int i = 0; i < destination.Length; i++) + { + ref Rgba32 d = ref Unsafe.Add(ref rgba, i); + d.R = Unsafe.Add(ref r, i); + d.G = Unsafe.Add(ref g, i); + d.B = Unsafe.Add(ref b, i); + d.A = 255; + } + } } } \ No newline at end of file diff --git a/src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs b/src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs index e562f333c6..57e5e85828 100644 --- a/src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs +++ b/src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs @@ -161,14 +161,15 @@ namespace SixLabors.ImageSharp.PixelFormats } /// - /// Bulk operation that converts 3 seperate RGB channels to + /// Bulk operation that packs 3 seperate RGB channels to . + /// The destination must have a padding of 3. /// /// A to configure internal operations. /// A to the red values. /// A to the green values. /// A to the blue values. /// A to the destination pixels. - public virtual void PackFromRgbPlanes( + internal virtual void PackFromRgbPlanes( Configuration configuration, ReadOnlySpan redChannel, ReadOnlySpan greenChannel, diff --git a/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs b/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs index db66ae941d..6a41c4bf44 100644 --- a/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs +++ b/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs @@ -27,7 +27,7 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion private float[] rgbaFloat; - [Params(512)] + [Params(1024)] public int Count { get; set; } [GlobalSetup] @@ -36,7 +36,7 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion this.rBuf = new byte[this.Count]; this.gBuf = new byte[this.Count]; this.bBuf = new byte[this.Count]; - this.rgbBuf = new Rgb24[this.Count]; + this.rgbBuf = new Rgb24[this.Count + 3]; // padded this.rgbaBuf = new Rgba32[this.Count]; this.rFloat = new float[this.Count]; @@ -46,7 +46,7 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion this.rgbaFloat = new float[this.Count * 4]; } - // [Benchmark(Baseline = true)] + // [Benchmark] public void Rgb24_Scalar_PerElement_Pinned() { fixed (byte* r = &this.rBuf[0]) @@ -72,7 +72,7 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion Span b = this.rBuf; Span rgb = this.rgbBuf; - for (int i = 0; i < rgb.Length; i++) + for (int i = 0; i < r.Length; i++) { ref Rgb24 d = ref rgb[i]; d.R = r[i]; @@ -81,7 +81,7 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion } } - [Benchmark(Baseline = true)] + [Benchmark] public void Rgb24_Scalar_PerElement_Unsafe() { ref byte r = ref this.rBuf[0]; @@ -195,7 +195,7 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion } #if SUPPORTS_RUNTIME_INTRINSICS - [Benchmark] + [Benchmark(Baseline = true)] public void Rgba32_Vector_Float() { ref Vector256 rBase = ref Unsafe.As>(ref this.rFloat[0]); @@ -235,6 +235,16 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto); } } + + [Benchmark] + public void Rgba32_Vector_Bytes() + { + ReadOnlySpan r = this.rBuf; + ReadOnlySpan g = this.rBuf; + ReadOnlySpan b = this.rBuf; + Span rgb = this.rgbBuf; + SimdUtils.HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref r, ref g, ref b, ref rgb); + } #endif #pragma warning disable SA1132 diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs index 878e55c873..ae1b5c9e32 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs @@ -378,7 +378,7 @@ namespace SixLabors.ImageSharp.Tests.Common ReadOnlySpan bb = b.AsSpan(); Span dd = d.AsSpan(); - SimdUtils.PackFromRgbPlanesAvx2Reduce(ref rr, ref gg, ref bb, ref dd); + SimdUtils.HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref rr, ref gg, ref bb, ref dd); for (int i = 0; i < 32; i++) {