From 2ff0cb93dce87a52bca81d1b877b7eb5dd84feb6 Mon Sep 17 00:00:00 2001 From: Anton Firszov Date: Sat, 5 Dec 2020 21:02:28 +0100 Subject: [PATCH] polishing --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 61 ++++++++++++++++++- .../Common/Helpers/SimdUtils.Pack.cs | 28 ++++++--- .../PixelOperations/Rgb24.PixelOperations.cs | 16 +++++ .../PixelOperations/Rgba32.PixelOperations.cs | 16 +++++ .../PixelConversion_PackFromRgbPlanes.cs | 14 ++++- .../ImageSharp.Tests/Common/SimdUtilsTests.cs | 35 +++++++++++ 6 files changed, 160 insertions(+), 10 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 13effce3e..475d64bc4 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -30,7 +30,7 @@ namespace SixLabors.ImageSharp 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15 }; - public static ReadOnlySpan PermuteMaskShiftAlpha8x32 => + public static ReadOnlySpan PermuteMaskShiftAlpha8x32 => new byte[] { 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, @@ -877,6 +877,65 @@ namespace SixLabors.ImageSharp blueChannel = blueChannel.Slice(slice); destination = destination.Slice(slice); } + + internal static void PackFromRgbPlanesAvx2Reduce( + ref ReadOnlySpan redChannel, + ref ReadOnlySpan greenChannel, + ref ReadOnlySpan blueChannel, + ref Span destination) + { + ref Vector256 rBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(redChannel)); + ref Vector256 gBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(greenChannel)); + ref Vector256 bBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(blueChannel)); + ref Vector256 dBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + + int count = redChannel.Length / Vector256.Count; + + ref byte control1Bytes = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 control1 = Unsafe.As>(ref control1Bytes); + + ref byte control2Bytes = ref MemoryMarshal.GetReference(PermuteMaskShiftAlpha8x32); + Vector256 control2 = Unsafe.As>(ref control2Bytes); + + Vector256 a = Vector256.Create((byte)255); + + Vector256 shuffleAlpha = Unsafe.As>(ref MemoryMarshal.GetReference(ShuffleMaskShiftAlpha)); + + for (int i = 0; i < count; i++) + { + Vector256 r0 = Unsafe.Add(ref rBase, i); + Vector256 g0 = Unsafe.Add(ref gBase, i); + Vector256 b0 = Unsafe.Add(ref bBase, i); + + r0 = Avx2.PermuteVar8x32(r0.AsUInt32(), control1).AsByte(); + g0 = Avx2.PermuteVar8x32(g0.AsUInt32(), control1).AsByte(); + b0 = Avx2.PermuteVar8x32(b0.AsUInt32(), control1).AsByte(); + + Vector256 rg = Avx2.UnpackLow(r0, g0); + Vector256 b1 = Avx2.UnpackLow(b0, a); + + Vector256 rgb1 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + Vector256 rgb2 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + + rg = Avx2.UnpackHigh(r0, g0); + b1 = Avx2.UnpackHigh(b0, a); + + Vector256 rgb3 = Avx2.UnpackLow(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + Vector256 rgb4 = Avx2.UnpackHigh(rg.AsUInt16(), b1.AsUInt16()).AsByte(); + + ref Vector256 d0 = ref Unsafe.Add(ref dBase, i * 4); + d0 = rgb1; + Unsafe.Add(ref d0, 1) = rgb2; + Unsafe.Add(ref d0, 2) = rgb3; + Unsafe.Add(ref d0, 3) = rgb4; + } + + int slice = count * Vector256.Count; + redChannel = redChannel.Slice(slice); + greenChannel = greenChannel.Slice(slice); + blueChannel = blueChannel.Slice(slice); + destination = destination.Slice(slice); + } } } } diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs index db88ef3d9..8cd15e01b 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Pack.cs @@ -1,3 +1,6 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + using System; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -20,12 +23,9 @@ namespace SixLabors.ImageSharp ReadOnlySpan blueChannel, Span destination) { - int count = redChannel.Length; - DebugGuard.IsTrue(greenChannel.Length == count, "Channels must be of same size!"); - DebugGuard.IsTrue(blueChannel.Length == count, "Channels must be of same size!"); - - // To avoid overflows, this check is not debug-only: - Guard.IsTrue(destination.Length > count + 2, nameof(destination), "'destination' must contain a padding of 3 elements!"); + DebugGuard.IsTrue(greenChannel.Length == redChannel.Length, nameof(greenChannel), "Channels must be of same size!"); + DebugGuard.IsTrue(blueChannel.Length == redChannel.Length, nameof(blueChannel), "Channels must be of same size!"); + DebugGuard.IsTrue(destination.Length > redChannel.Length + 2, nameof(destination), "'destination' must contain a padding of 3 elements!"); #if SUPPORTS_RUNTIME_INTRINSICS if (Avx2.IsSupported) @@ -49,7 +49,21 @@ namespace SixLabors.ImageSharp ReadOnlySpan blueChannel, Span destination) { - PackFromRgbPlanesScalarBatchedReduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination); + DebugGuard.IsTrue(greenChannel.Length == redChannel.Length, nameof(greenChannel), "Channels must be of same size!"); + DebugGuard.IsTrue(blueChannel.Length == redChannel.Length, nameof(blueChannel), "Channels must be of same size!"); + DebugGuard.IsTrue(destination.Length > redChannel.Length, nameof(destination), "'destination' span should not be shorter than the source channels!"); + +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx2.IsSupported) + { + HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination); + } + else +#endif + { + PackFromRgbPlanesScalarBatchedReduce(ref redChannel, ref greenChannel, ref blueChannel, ref destination); + } + PackFromRgbPlanesRemainder(redChannel, greenChannel, blueChannel, destination); } diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgb24.PixelOperations.cs b/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgb24.PixelOperations.cs index 73b656f36..ebed7aadd 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgb24.PixelOperations.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgb24.PixelOperations.cs @@ -21,6 +21,22 @@ namespace SixLabors.ImageSharp.PixelFormats /// public override PixelTypeInfo GetPixelTypeInfo() => LazyInfo.Value; + + /// + internal override void PackFromRgbPlanes( + Configuration configuration, + ReadOnlySpan redChannel, + ReadOnlySpan greenChannel, + ReadOnlySpan blueChannel, + Span destination) + { + int count = redChannel.Length; + Guard.IsTrue(greenChannel.Length == count, nameof(greenChannel), "Channels must be of same size!"); + Guard.IsTrue(blueChannel.Length == count, nameof(blueChannel), "Channels must be of same size!"); + Guard.IsTrue(destination.Length > count + 2, nameof(destination), "'destination' must contain a padding of 3 elements!"); + + SimdUtils.PackFromRgbPlanes(configuration, redChannel, greenChannel, blueChannel, destination); + } } } } diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgba32.PixelOperations.cs b/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgba32.PixelOperations.cs index d8322e37d..0ddc2f8b1 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgba32.PixelOperations.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgba32.PixelOperations.cs @@ -56,6 +56,22 @@ namespace SixLabors.ImageSharp.PixelFormats MemoryMarshal.Cast(sourceVectors), MemoryMarshal.Cast(destinationPixels)); } + + /// + internal override void PackFromRgbPlanes( + Configuration configuration, + ReadOnlySpan redChannel, + ReadOnlySpan greenChannel, + ReadOnlySpan blueChannel, + Span destination) + { + int count = redChannel.Length; + Guard.IsTrue(greenChannel.Length == count, nameof(greenChannel), "Channels must be of same size!"); + Guard.IsTrue(blueChannel.Length == count, nameof(blueChannel), "Channels must be of same size!"); + Guard.IsTrue(destination.Length > count, nameof(destination), "'destination' span should not be shorter than the source channels!"); + + SimdUtils.PackFromRgbPlanes(configuration, redChannel, greenChannel, blueChannel, destination); + } } } } diff --git a/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs b/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs index 6a41c4bf4..11714027a 100644 --- a/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs +++ b/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs @@ -196,7 +196,7 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion #if SUPPORTS_RUNTIME_INTRINSICS [Benchmark(Baseline = true)] - public void Rgba32_Vector_Float() + public void Rgba32_Avx2_Float() { ref Vector256 rBase = ref Unsafe.As>(ref this.rFloat[0]); ref Vector256 gBase = ref Unsafe.As>(ref this.gFloat[0]); @@ -237,7 +237,7 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion } [Benchmark] - public void Rgba32_Vector_Bytes() + public void Rgba24_Avx2_Bytes() { ReadOnlySpan r = this.rBuf; ReadOnlySpan g = this.rBuf; @@ -245,6 +245,16 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion Span rgb = this.rgbBuf; SimdUtils.HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref r, ref g, ref b, ref rgb); } + + [Benchmark] + public void Rgba32_Avx2_Bytes() + { + ReadOnlySpan r = this.rBuf; + ReadOnlySpan g = this.rBuf; + ReadOnlySpan b = this.rBuf; + Span rgb = this.rgbaBuf; + SimdUtils.HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref r, ref g, ref b, ref rgb); + } #endif #pragma warning disable SA1132 diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs index ae1b5c9e3..565ea5f6d 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs @@ -392,6 +392,41 @@ namespace SixLabors.ImageSharp.Tests.Common Assert.Equal(0, bb.Length); Assert.Equal(padding, dd.Length); } + + [Fact] + public void PackFromRgbPlanesAvx2Reduce_Rgba32() + { + if (!Avx2.IsSupported) + { + return; + } + + byte[] r = Enumerable.Range(0, 32).Select(x => (byte)x).ToArray(); + byte[] g = Enumerable.Range(100, 32).Select(x => (byte)x).ToArray(); + byte[] b = Enumerable.Range(200, 32).Select(x => (byte)x).ToArray(); + + Rgba32[] d = new Rgba32[32]; + + ReadOnlySpan rr = r.AsSpan(); + ReadOnlySpan gg = g.AsSpan(); + ReadOnlySpan bb = b.AsSpan(); + Span dd = d.AsSpan(); + + SimdUtils.HwIntrinsics.PackFromRgbPlanesAvx2Reduce(ref rr, ref gg, ref bb, ref dd); + + for (int i = 0; i < 32; i++) + { + Assert.Equal(i, d[i].R); + Assert.Equal(i + 100, d[i].G); + Assert.Equal(i + 200, d[i].B); + Assert.Equal(255, d[i].A); + } + + Assert.Equal(0, rr.Length); + Assert.Equal(0, gg.Length); + Assert.Equal(0, bb.Length); + Assert.Equal(0, dd.Length); + } #endif internal static void TestPackFromRgbPlanes(int count, Action packMethod)