From 3969525d2ae9ee89055ab220528d1423da156b60 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sat, 13 Jan 2024 10:13:44 +1000 Subject: [PATCH] Port shuffle4 --- src/ImageSharp/Common/Helpers/Numerics.cs | 7 +- .../Helpers/Shuffle/IComponentShuffle.cs | 77 +++-- .../Common/Helpers/Shuffle/IPad3Shuffle4.cs | 8 +- .../Common/Helpers/Shuffle/IShuffle3.cs | 4 +- .../Common/Helpers/Shuffle/IShuffle4Slice3.cs | 8 +- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 264 +++++++++++------- .../Common/Helpers/SimdUtils.Shuffle.cs | 125 +++++---- .../Formats/Webp/BitWriter/BitWriterBase.cs | 1 - .../Webp/Chunks/WebpAnimationParameter.cs | 1 - .../Formats/Webp/Chunks/WebpFrameData.cs | 2 - .../Formats/Webp/Chunks/WebpVp8X.cs | 2 - .../Formats/Webp/Lossless/Vp8LEncoder.cs | 1 - .../Formats/Webp/Lossy/Vp8Encoder.cs | 1 - .../Helpers => Formats/Webp}/RiffHelper.cs | 2 +- .../Utils/Vector4Converters.RgbaCompatible.cs | 18 +- 15 files changed, 302 insertions(+), 219 deletions(-) rename src/ImageSharp/{Common/Helpers => Formats/Webp}/RiffHelper.cs (98%) diff --git a/src/ImageSharp/Common/Helpers/Numerics.cs b/src/ImageSharp/Common/Helpers/Numerics.cs index 293997c4d..ca28a7aab 100644 --- a/src/ImageSharp/Common/Helpers/Numerics.cs +++ b/src/ImageSharp/Common/Helpers/Numerics.cs @@ -5,7 +5,6 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; namespace SixLabors.ImageSharp; @@ -61,6 +60,12 @@ internal static class Numerics [MethodImpl(MethodImplOptions.AggressiveInlining)] public static nint Modulo4(nint x) => x & 3; + /// + /// Calculates % 4 + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static nuint Modulo4(nuint x) => x & 3; + /// /// Calculates % 8 /// diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs index 683ac518b..d4ab8c618 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs @@ -19,24 +19,26 @@ namespace SixLabors.ImageSharp; internal interface IComponentShuffle { /// - /// Shuffles then slices 8-bit integers within 128-bit lanes in - /// using the control and store the results in . + /// Shuffles then slices 8-bit integers in + /// using the control and store the results in . + /// If successful, this method will reduce the length of length + /// by the shuffle amount. /// /// The source span of bytes. - /// The destination span of bytes. - void ShuffleReduce(ref ReadOnlySpan source, ref Span dest); + /// The destination span of bytes. + void ShuffleReduce(ref ReadOnlySpan source, ref Span destination); /// - /// Shuffle 8-bit integers within 128-bit lanes in - /// using the control and store the results in . + /// Shuffle 8-bit integers in + /// using the control and store the results in . /// /// The source span of bytes. - /// The destination span of bytes. + /// The destination span of bytes. /// - /// Implementation can assume that source.Length is less or equal than dest.Length. + /// Implementation can assume that source.Length is less or equal than destination.Length. /// Loops should iterate using source.Length. /// - void RunFallbackShuffle(ReadOnlySpan source, Span dest); + void Shuffle(ReadOnlySpan source, Span destination); } /// @@ -44,24 +46,21 @@ internal interface IShuffle4 : IComponentShuffle { } -internal readonly struct DefaultShuffle4 : IShuffle4 +internal readonly struct DefaultShuffle4(byte control) : IShuffle4 { - public DefaultShuffle4(byte control) - => this.Control = control; - - public byte Control { get; } + public byte Control { get; } = control; [MethodImpl(InliningOptions.ShortMethod)] - public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) - => HwIntrinsics.Shuffle4Reduce(ref source, ref dest, this.Control); + public void ShuffleReduce(ref ReadOnlySpan source, ref Span destination) + => HwIntrinsics.Shuffle4Reduce(ref source, ref destination, this.Control); [MethodImpl(InliningOptions.ShortMethod)] - public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + public void Shuffle(ReadOnlySpan source, Span destination) { ref byte sBase = ref MemoryMarshal.GetReference(source); - ref byte dBase = ref MemoryMarshal.GetReference(dest); + ref byte dBase = ref MemoryMarshal.GetReference(destination); - Shuffle.InverseMMShuffle(this.Control, out uint p3, out uint p2, out uint p1, out uint p0); + SimdUtils.Shuffle.InverseMMShuffle(this.Control, out uint p3, out uint p2, out uint p1, out uint p0); for (nuint i = 0; i < (uint)source.Length; i += 4) { @@ -76,14 +75,14 @@ internal readonly struct DefaultShuffle4 : IShuffle4 internal readonly struct WXYZShuffle4 : IShuffle4 { [MethodImpl(InliningOptions.ShortMethod)] - public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) - => HwIntrinsics.Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle2103); + public void ShuffleReduce(ref ReadOnlySpan source, ref Span destination) + => HwIntrinsics.Shuffle4Reduce(ref source, ref destination, SimdUtils.Shuffle.MMShuffle2103); [MethodImpl(InliningOptions.ShortMethod)] - public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + public void Shuffle(ReadOnlySpan source, Span destination) { ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); - ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); uint n = (uint)source.Length / 4; for (nuint i = 0; i < n; i++) @@ -100,14 +99,14 @@ internal readonly struct WXYZShuffle4 : IShuffle4 internal readonly struct WZYXShuffle4 : IShuffle4 { [MethodImpl(InliningOptions.ShortMethod)] - public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) - => HwIntrinsics.Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle0123); + public void ShuffleReduce(ref ReadOnlySpan source, ref Span destination) + => HwIntrinsics.Shuffle4Reduce(ref source, ref destination, SimdUtils.Shuffle.MMShuffle0123); [MethodImpl(InliningOptions.ShortMethod)] - public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + public void Shuffle(ReadOnlySpan source, Span destination) { ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); - ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); uint n = (uint)source.Length / 4; for (nuint i = 0; i < n; i++) @@ -124,14 +123,14 @@ internal readonly struct WZYXShuffle4 : IShuffle4 internal readonly struct YZWXShuffle4 : IShuffle4 { [MethodImpl(InliningOptions.ShortMethod)] - public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) - => HwIntrinsics.Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle0321); + public void ShuffleReduce(ref ReadOnlySpan source, ref Span destination) + => HwIntrinsics.Shuffle4Reduce(ref source, ref destination, SimdUtils.Shuffle.MMShuffle0321); [MethodImpl(InliningOptions.ShortMethod)] - public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + public void Shuffle(ReadOnlySpan source, Span destination) { ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); - ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); uint n = (uint)source.Length / 4; for (nuint i = 0; i < n; i++) @@ -148,14 +147,14 @@ internal readonly struct YZWXShuffle4 : IShuffle4 internal readonly struct ZYXWShuffle4 : IShuffle4 { [MethodImpl(InliningOptions.ShortMethod)] - public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) - => HwIntrinsics.Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle3012); + public void ShuffleReduce(ref ReadOnlySpan source, ref Span destination) + => HwIntrinsics.Shuffle4Reduce(ref source, ref destination, SimdUtils.Shuffle.MMShuffle3012); [MethodImpl(InliningOptions.ShortMethod)] - public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + public void Shuffle(ReadOnlySpan source, Span destination) { ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); - ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); uint n = (uint)source.Length / 4; for (nuint i = 0; i < n; i++) @@ -179,14 +178,14 @@ internal readonly struct ZYXWShuffle4 : IShuffle4 internal readonly struct XWZYShuffle4 : IShuffle4 { [MethodImpl(InliningOptions.ShortMethod)] - public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) - => HwIntrinsics.Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle1230); + public void ShuffleReduce(ref ReadOnlySpan source, ref Span destination) + => HwIntrinsics.Shuffle4Reduce(ref source, ref destination, SimdUtils.Shuffle.MMShuffle1230); [MethodImpl(InliningOptions.ShortMethod)] - public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + public void Shuffle(ReadOnlySpan source, Span destination) { ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); - ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(destination)); uint n = (uint)source.Length / 4; for (nuint i = 0; i < n; i++) diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs index 6cf6eef08..255448d61 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs @@ -24,12 +24,12 @@ internal readonly struct DefaultPad3Shuffle4 : IPad3Shuffle4 => HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref dest, this.Control); [MethodImpl(InliningOptions.ShortMethod)] - public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + public void Shuffle(ReadOnlySpan source, Span dest) { ref byte sBase = ref MemoryMarshal.GetReference(source); ref byte dBase = ref MemoryMarshal.GetReference(dest); - Shuffle.InverseMMShuffle(this.Control, out uint p3, out uint p2, out uint p1, out uint p0); + SimdUtils.Shuffle.InverseMMShuffle(this.Control, out uint p3, out uint p2, out uint p1, out uint p0); Span temp = stackalloc byte[4]; ref byte t = ref MemoryMarshal.GetReference(temp); @@ -52,10 +52,10 @@ internal readonly struct XYZWPad3Shuffle4 : IPad3Shuffle4 { [MethodImpl(InliningOptions.ShortMethod)] public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) - => HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle3210); + => HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref dest, SimdUtils.Shuffle.MMShuffle3210); [MethodImpl(InliningOptions.ShortMethod)] - public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + public void Shuffle(ReadOnlySpan source, Span dest) { ref byte sBase = ref MemoryMarshal.GetReference(source); ref byte dBase = ref MemoryMarshal.GetReference(dest); diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs index 2cd586212..89faca243 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs @@ -24,12 +24,12 @@ internal readonly struct DefaultShuffle3 : IShuffle3 => HwIntrinsics.Shuffle3Reduce(ref source, ref dest, this.Control); [MethodImpl(InliningOptions.ShortMethod)] - public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + public void Shuffle(ReadOnlySpan source, Span dest) { ref byte sBase = ref MemoryMarshal.GetReference(source); ref byte dBase = ref MemoryMarshal.GetReference(dest); - Shuffle.InverseMMShuffle(this.Control, out _, out uint p2, out uint p1, out uint p0); + SimdUtils.Shuffle.InverseMMShuffle(this.Control, out _, out uint p2, out uint p1, out uint p0); for (nuint i = 0; i < (uint)source.Length; i += 3) { diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs index 5e82973e3..30fda7a8e 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs @@ -24,12 +24,12 @@ internal readonly struct DefaultShuffle4Slice3 : IShuffle4Slice3 => HwIntrinsics.Shuffle4Slice3Reduce(ref source, ref dest, this.Control); [MethodImpl(InliningOptions.ShortMethod)] - public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + public void Shuffle(ReadOnlySpan source, Span dest) { ref byte sBase = ref MemoryMarshal.GetReference(source); ref byte dBase = ref MemoryMarshal.GetReference(dest); - Shuffle.InverseMMShuffle(this.Control, out _, out uint p2, out uint p1, out uint p0); + SimdUtils.Shuffle.InverseMMShuffle(this.Control, out _, out uint p2, out uint p1, out uint p0); for (nuint i = 0, j = 0; i < (uint)dest.Length; i += 3, j += 4) { @@ -44,10 +44,10 @@ internal readonly struct XYZWShuffle4Slice3 : IShuffle4Slice3 { [MethodImpl(InliningOptions.ShortMethod)] public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) - => HwIntrinsics.Shuffle4Slice3Reduce(ref source, ref dest, Shuffle.MMShuffle3210); + => HwIntrinsics.Shuffle4Slice3Reduce(ref source, ref dest, SimdUtils.Shuffle.MMShuffle3210); [MethodImpl(InliningOptions.ShortMethod)] - public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + public void Shuffle(ReadOnlySpan source, Span dest) { ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); ref Byte3 dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index ad079b52e..4732effd4 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -51,22 +51,32 @@ internal static partial class SimdUtils /// /// Shuffle single-precision (32-bit) floating-point elements in - /// using the control and store the results in . + /// using the control and store the results in . /// /// The source span of floats. - /// The destination span of floats. + /// The destination span of floats. /// The byte control. [MethodImpl(InliningOptions.ShortMethod)] public static void Shuffle4Reduce( ref ReadOnlySpan source, - ref Span dest, + ref Span destination, [ConstantExpected] byte control) { - if (Avx.IsSupported || Sse.IsSupported) + if (Vector512.IsHardwareAccelerated || Vector256.IsHardwareAccelerated || Vector128.IsHardwareAccelerated) { - int remainder = Avx.IsSupported - ? Numerics.ModuloP2(source.Length, Vector256.Count) - : Numerics.ModuloP2(source.Length, Vector128.Count); + int remainder = 0; + if (Vector512.IsHardwareAccelerated) + { + remainder = Numerics.ModuloP2(source.Length, Vector512.Count); + } + else if (Vector256.IsHardwareAccelerated) + { + remainder = Numerics.ModuloP2(source.Length, Vector256.Count); + } + else if (Vector128.IsHardwareAccelerated) + { + remainder = Numerics.ModuloP2(source.Length, Vector128.Count); + } int adjustedCount = source.Length - remainder; @@ -74,17 +84,17 @@ internal static partial class SimdUtils { Shuffle4( source[..adjustedCount], - dest[..adjustedCount], + destination[..adjustedCount], control); source = source[adjustedCount..]; - dest = dest[adjustedCount..]; + destination = destination[adjustedCount..]; } } } /// - /// Shuffle 8-bit integers within 128-bit lanes in + /// Shuffle 8-bit integers /// using the control and store the results in . /// /// The source span of bytes. @@ -96,11 +106,21 @@ internal static partial class SimdUtils ref Span dest, byte control) { - if (Avx2.IsSupported || Ssse3.IsSupported) + if (Vector512.IsHardwareAccelerated || Vector256.IsHardwareAccelerated || Vector128.IsHardwareAccelerated) { - int remainder = Avx2.IsSupported - ? Numerics.ModuloP2(source.Length, Vector256.Count) - : Numerics.ModuloP2(source.Length, Vector128.Count); + int remainder = 0; + if (Vector512.IsHardwareAccelerated) + { + remainder = Numerics.ModuloP2(source.Length, Vector512.Count); + } + else if (Vector256.IsHardwareAccelerated) + { + remainder = Numerics.ModuloP2(source.Length, Vector256.Count); + } + else if (Vector128.IsHardwareAccelerated) + { + remainder = Numerics.ModuloP2(source.Length, Vector128.Count); + } int adjustedCount = source.Length - remainder; @@ -218,76 +238,102 @@ internal static partial class SimdUtils [MethodImpl(InliningOptions.ShortMethod)] private static void Shuffle4( ReadOnlySpan source, - Span dest, + Span destination, [ConstantExpected] byte control) { - if (Avx.IsSupported) + if (Vector512.IsHardwareAccelerated) { - ref Vector256 sourceBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Span temp = stackalloc int[Vector512.Count]; + Shuffle.MMShuffleSpan(ref temp, control); + Vector512 mask = Unsafe.As>(ref MemoryMarshal.GetReference(temp)); - ref Vector256 destBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); - nint n = (nint)dest.Vector256Count(); - nint m = Numerics.Modulo4(n); - nint u = n - m; + nuint n = (uint)destination.Length / (uint)Vector512.Count; + nuint m = Numerics.Modulo4(n); + nuint u = n - m; - for (nint i = 0; i < u; i += 4) + for (nuint i = 0; i < u; i += 4) { - ref Vector256 vd0 = ref Unsafe.Add(ref destBase, i); - ref Vector256 vs0 = ref Unsafe.Add(ref sourceBase, i); + ref Vector512 vs0 = ref Unsafe.Add(ref sourceBase, i); + ref Vector512 vd0 = ref Unsafe.Add(ref destinationBase, i); - vd0 = Avx.Permute(vs0, control); - Unsafe.Add(ref vd0, 1) = Avx.Permute(Unsafe.Add(ref vs0, 1), control); - Unsafe.Add(ref vd0, 2) = Avx.Permute(Unsafe.Add(ref vs0, 2), control); - Unsafe.Add(ref vd0, 3) = Avx.Permute(Unsafe.Add(ref vs0, 3), control); + vd0 = Vector512.Shuffle(vs0, mask); + Unsafe.Add(ref vd0, (nuint)1) = Vector512.Shuffle(Unsafe.Add(ref vs0, (nuint)1), mask); + Unsafe.Add(ref vd0, (nuint)2) = Vector512.Shuffle(Unsafe.Add(ref vs0, (nuint)2), mask); + Unsafe.Add(ref vd0, (nuint)3) = Vector512.Shuffle(Unsafe.Add(ref vs0, (nuint)3), mask); } if (m > 0) { - for (nint i = u; i < n; i++) + for (nuint i = u; i < n; i++) { - Unsafe.Add(ref destBase, i) = Avx.Permute(Unsafe.Add(ref sourceBase, i), control); + Unsafe.Add(ref destinationBase, i) = Vector512.Shuffle(Unsafe.Add(ref sourceBase, i), mask); } } } - else + else if (Vector256.IsHardwareAccelerated) { - // Sse - ref Vector128 sourceBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Span temp = stackalloc int[Vector256.Count]; + Shuffle.MMShuffleSpan(ref temp, control); + Vector256 mask = Unsafe.As>(ref MemoryMarshal.GetReference(temp)); - ref Vector128 destBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); - nint n = (nint)((uint)dest.Length / (uint)Vector128.Count); - nint m = Numerics.Modulo4(n); - nint u = n - m; + nuint n = (uint)destination.Length / (uint)Vector256.Count; + nuint m = Numerics.Modulo4(n); + nuint u = n - m; - for (nint i = 0; i < u; i += 4) + for (nuint i = 0; i < u; i += 4) { - ref Vector128 vd0 = ref Unsafe.Add(ref destBase, i); - ref Vector128 vs0 = ref Unsafe.Add(ref sourceBase, i); + ref Vector256 vs0 = ref Unsafe.Add(ref sourceBase, i); + ref Vector256 vd0 = ref Unsafe.Add(ref destinationBase, i); - vd0 = Sse.Shuffle(vs0, vs0, control); + vd0 = Vector256.Shuffle(vs0, mask); + Unsafe.Add(ref vd0, (nuint)1) = Vector256.Shuffle(Unsafe.Add(ref vs0, (nuint)1), mask); + Unsafe.Add(ref vd0, (nuint)2) = Vector256.Shuffle(Unsafe.Add(ref vs0, (nuint)2), mask); + Unsafe.Add(ref vd0, (nuint)3) = Vector256.Shuffle(Unsafe.Add(ref vs0, (nuint)3), mask); + } - Vector128 vs1 = Unsafe.Add(ref vs0, 1); - Unsafe.Add(ref vd0, 1) = Sse.Shuffle(vs1, vs1, control); + if (m > 0) + { + for (nuint i = u; i < n; i++) + { + Unsafe.Add(ref destinationBase, i) = Vector256.Shuffle(Unsafe.Add(ref sourceBase, i), mask); + } + } + } + else if (Vector128.IsHardwareAccelerated) + { + Span temp = stackalloc int[Vector128.Count]; + Shuffle.MMShuffleSpan(ref temp, control); + Vector128 mask = Unsafe.As>(ref MemoryMarshal.GetReference(temp)); + + ref Vector128 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref Vector128 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); - Vector128 vs2 = Unsafe.Add(ref vs0, 2); - Unsafe.Add(ref vd0, 2) = Sse.Shuffle(vs2, vs2, control); + nuint n = (uint)destination.Length / (uint)Vector128.Count; + nuint m = Numerics.Modulo4(n); + nuint u = n - m; + + for (nuint i = 0; i < u; i += 4) + { + ref Vector128 vs0 = ref Unsafe.Add(ref sourceBase, i); + ref Vector128 vd0 = ref Unsafe.Add(ref destinationBase, i); - Vector128 vs3 = Unsafe.Add(ref vs0, 3); - Unsafe.Add(ref vd0, 3) = Sse.Shuffle(vs3, vs3, control); + vd0 = Vector128.Shuffle(vs0, mask); + Unsafe.Add(ref vd0, (nuint)1) = Vector128.Shuffle(Unsafe.Add(ref vs0, (nuint)1), mask); + Unsafe.Add(ref vd0, (nuint)2) = Vector128.Shuffle(Unsafe.Add(ref vs0, (nuint)2), mask); + Unsafe.Add(ref vd0, (nuint)3) = Vector128.Shuffle(Unsafe.Add(ref vs0, (nuint)3), mask); } if (m > 0) { - for (nint i = u; i < n; i++) + for (nuint i = u; i < n; i++) { - Vector128 vs = Unsafe.Add(ref sourceBase, i); - Unsafe.Add(ref destBase, i) = Sse.Shuffle(vs, vs, control); + Unsafe.Add(ref destinationBase, i) = Vector128.Shuffle(Unsafe.Add(ref sourceBase, i), mask); } } } @@ -296,80 +342,102 @@ internal static partial class SimdUtils [MethodImpl(InliningOptions.ShortMethod)] private static void Shuffle4( ReadOnlySpan source, - Span dest, + Span destination, byte control) { - if (Avx2.IsSupported) + if (Vector512.IsHardwareAccelerated) { - // I've chosen to do this for convenience while we determine what - // shuffle controls to add to the library. - // We can add static ROS instances if need be in the future. - Span bytes = stackalloc byte[Vector256.Count]; - Shuffle.MMShuffleSpan(ref bytes, control); - Vector256 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + Span temp = stackalloc byte[Vector512.Count]; + Shuffle.MMShuffleSpan(ref temp, control); + Vector512 mask = Unsafe.As>(ref MemoryMarshal.GetReference(temp)); - ref Vector256 sourceBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); - ref Vector256 destBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + nuint n = (uint)destination.Length / (uint)Vector512.Count; + nuint m = Numerics.Modulo4(n); + nuint u = n - m; + + for (nuint i = 0; i < u; i += 4) + { + ref Vector512 vs0 = ref Unsafe.Add(ref sourceBase, i); + ref Vector512 vd0 = ref Unsafe.Add(ref destinationBase, i); + + vd0 = Vector512.Shuffle(vs0, mask); + Unsafe.Add(ref vd0, (nuint)1) = Vector512.Shuffle(Unsafe.Add(ref vs0, (nuint)1), mask); + Unsafe.Add(ref vd0, (nuint)2) = Vector512.Shuffle(Unsafe.Add(ref vs0, (nuint)2), mask); + Unsafe.Add(ref vd0, (nuint)3) = Vector512.Shuffle(Unsafe.Add(ref vs0, (nuint)3), mask); + } + + if (m > 0) + { + for (nuint i = u; i < n; i++) + { + Unsafe.Add(ref destinationBase, i) = Vector512.Shuffle(Unsafe.Add(ref sourceBase, i), mask); + } + } + } + else if (Vector256.IsHardwareAccelerated) + { + Span temp = stackalloc byte[Vector256.Count]; + Shuffle.MMShuffleSpan(ref temp, control); + Vector256 mask = Unsafe.As>(ref MemoryMarshal.GetReference(temp)); - nint n = (nint)((uint)dest.Length / (uint)Vector256.Count); - nint m = Numerics.Modulo4(n); - nint u = n - m; + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); - for (nint i = 0; i < u; i += 4) + nuint n = (uint)destination.Length / (uint)Vector256.Count; + nuint m = Numerics.Modulo4(n); + nuint u = n - m; + + for (nuint i = 0; i < u; i += 4) { ref Vector256 vs0 = ref Unsafe.Add(ref sourceBase, i); - ref Vector256 vd0 = ref Unsafe.Add(ref destBase, i); + ref Vector256 vd0 = ref Unsafe.Add(ref destinationBase, i); - vd0 = Avx2.Shuffle(vs0, vshuffle); - Unsafe.Add(ref vd0, 1) = Avx2.Shuffle(Unsafe.Add(ref vs0, 1), vshuffle); - Unsafe.Add(ref vd0, 2) = Avx2.Shuffle(Unsafe.Add(ref vs0, 2), vshuffle); - Unsafe.Add(ref vd0, 3) = Avx2.Shuffle(Unsafe.Add(ref vs0, 3), vshuffle); + vd0 = Vector256.Shuffle(vs0, mask); + Unsafe.Add(ref vd0, (nuint)1) = Vector256.Shuffle(Unsafe.Add(ref vs0, (nuint)1), mask); + Unsafe.Add(ref vd0, (nuint)2) = Vector256.Shuffle(Unsafe.Add(ref vs0, (nuint)2), mask); + Unsafe.Add(ref vd0, (nuint)3) = Vector256.Shuffle(Unsafe.Add(ref vs0, (nuint)3), mask); } if (m > 0) { - for (nint i = u; i < n; i++) + for (nuint i = u; i < n; i++) { - Unsafe.Add(ref destBase, i) = Avx2.Shuffle(Unsafe.Add(ref sourceBase, i), vshuffle); + Unsafe.Add(ref destinationBase, i) = Vector256.Shuffle(Unsafe.Add(ref sourceBase, i), mask); } } } - else + else if (Vector128.IsHardwareAccelerated) { - // Ssse3 - Span bytes = stackalloc byte[Vector128.Count]; - Shuffle.MMShuffleSpan(ref bytes, control); - Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); - - ref Vector128 sourceBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Span temp = stackalloc byte[Vector128.Count]; + Shuffle.MMShuffleSpan(ref temp, control); + Vector128 mask = Unsafe.As>(ref MemoryMarshal.GetReference(temp)); - ref Vector128 destBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + ref Vector128 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref Vector128 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); - nint n = (nint)((uint)dest.Length / (uint)Vector128.Count); - nint m = Numerics.Modulo4(n); - nint u = n - m; + nuint n = (uint)destination.Length / (uint)Vector128.Count; + nuint m = Numerics.Modulo4(n); + nuint u = n - m; - for (nint i = 0; i < u; i += 4) + for (nuint i = 0; i < u; i += 4) { ref Vector128 vs0 = ref Unsafe.Add(ref sourceBase, i); - ref Vector128 vd0 = ref Unsafe.Add(ref destBase, i); + ref Vector128 vd0 = ref Unsafe.Add(ref destinationBase, i); - vd0 = Ssse3.Shuffle(vs0, vshuffle); - Unsafe.Add(ref vd0, 1) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 1), vshuffle); - Unsafe.Add(ref vd0, 2) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 2), vshuffle); - Unsafe.Add(ref vd0, 3) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 3), vshuffle); + vd0 = Vector128.Shuffle(vs0, mask); + Unsafe.Add(ref vd0, (nuint)1) = Vector128.Shuffle(Unsafe.Add(ref vs0, (nuint)1), mask); + Unsafe.Add(ref vd0, (nuint)2) = Vector128.Shuffle(Unsafe.Add(ref vs0, (nuint)2), mask); + Unsafe.Add(ref vd0, (nuint)3) = Vector128.Shuffle(Unsafe.Add(ref vs0, (nuint)3), mask); } if (m > 0) { - for (nint i = u; i < n; i++) + for (nuint i = u; i < n; i++) { - Unsafe.Add(ref destBase, i) = Ssse3.Shuffle(Unsafe.Add(ref sourceBase, i), vshuffle); + Unsafe.Add(ref destinationBase, i) = Vector128.Shuffle(Unsafe.Add(ref sourceBase, i), mask); } } } diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index 83cd3d246..dbeb54a80 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -12,140 +12,140 @@ internal static partial class SimdUtils { /// /// Shuffle single-precision (32-bit) floating-point elements in - /// using the control and store the results in . + /// using the control and store the results in . /// /// The source span of floats. - /// The destination span of floats. + /// The destination span of floats. /// The byte control. [MethodImpl(InliningOptions.ShortMethod)] public static void Shuffle4( ReadOnlySpan source, - Span dest, + Span destination, [ConstantExpected] byte control) { - VerifyShuffle4SpanInput(source, dest); + VerifyShuffle4SpanInput(source, destination); - HwIntrinsics.Shuffle4Reduce(ref source, ref dest, control); + HwIntrinsics.Shuffle4Reduce(ref source, ref destination, control); // Deal with the remainder: if (source.Length > 0) { - Shuffle4Remainder(source, dest, control); + Shuffle4Remainder(source, destination, control); } } /// /// Shuffle 8-bit integers within 128-bit lanes in - /// using the control and store the results in . + /// using the control and store the results in . /// /// The type of shuffle struct. /// The source span of bytes. - /// The destination span of bytes. + /// The destination span of bytes. /// The type of shuffle to perform. [MethodImpl(InliningOptions.ShortMethod)] public static void Shuffle4( ReadOnlySpan source, - Span dest, + Span destination, TShuffle shuffle) where TShuffle : struct, IShuffle4 { - VerifyShuffle4SpanInput(source, dest); + VerifyShuffle4SpanInput(source, destination); - shuffle.ShuffleReduce(ref source, ref dest); + shuffle.ShuffleReduce(ref source, ref destination); // Deal with the remainder: if (source.Length > 0) { - shuffle.RunFallbackShuffle(source, dest); + shuffle.Shuffle(source, destination); } } /// /// Shuffle 8-bit integer triplets within 128-bit lanes in - /// using the control and store the results in . + /// using the control and store the results in . /// /// The type of shuffle struct. /// The source span of bytes. - /// The destination span of bytes. + /// The destination span of bytes. /// The type of shuffle to perform. [MethodImpl(InliningOptions.ShortMethod)] public static void Shuffle3( ReadOnlySpan source, - Span dest, + Span destination, TShuffle shuffle) where TShuffle : struct, IShuffle3 { - // Source length should be smaller than dest length, and divisible by 3. - VerifyShuffle3SpanInput(source, dest); + // Source length should be smaller than destination length, and divisible by 3. + VerifyShuffle3SpanInput(source, destination); - shuffle.ShuffleReduce(ref source, ref dest); + shuffle.ShuffleReduce(ref source, ref destination); // Deal with the remainder: if (source.Length > 0) { - shuffle.RunFallbackShuffle(source, dest); + shuffle.Shuffle(source, destination); } } /// /// Pads then shuffles 8-bit integers within 128-bit lanes in - /// using the control and store the results in . + /// using the control and store the results in . /// /// The type of shuffle struct. /// The source span of bytes. - /// The destination span of bytes. + /// The destination span of bytes. /// The type of shuffle to perform. [MethodImpl(InliningOptions.ShortMethod)] public static void Pad3Shuffle4( ReadOnlySpan source, - Span dest, + Span destination, TShuffle shuffle) where TShuffle : struct, IPad3Shuffle4 { - VerifyPad3Shuffle4SpanInput(source, dest); + VerifyPad3Shuffle4SpanInput(source, destination); - shuffle.ShuffleReduce(ref source, ref dest); + shuffle.ShuffleReduce(ref source, ref destination); // Deal with the remainder: if (source.Length > 0) { - shuffle.RunFallbackShuffle(source, dest); + shuffle.Shuffle(source, destination); } } /// /// Shuffles then slices 8-bit integers within 128-bit lanes in - /// using the control and store the results in . + /// using the control and store the results in . /// /// The type of shuffle struct. /// The source span of bytes. - /// The destination span of bytes. + /// The destination span of bytes. /// The type of shuffle to perform. [MethodImpl(InliningOptions.ShortMethod)] public static void Shuffle4Slice3( ReadOnlySpan source, - Span dest, + Span destination, TShuffle shuffle) where TShuffle : struct, IShuffle4Slice3 { - VerifyShuffle4Slice3SpanInput(source, dest); + VerifyShuffle4Slice3SpanInput(source, destination); - shuffle.ShuffleReduce(ref source, ref dest); + shuffle.ShuffleReduce(ref source, ref destination); // Deal with the remainder: if (source.Length > 0) { - shuffle.RunFallbackShuffle(source, dest); + shuffle.Shuffle(source, destination); } } private static void Shuffle4Remainder( ReadOnlySpan source, - Span dest, + Span destination, byte control) { ref float sBase = ref MemoryMarshal.GetReference(source); - ref float dBase = ref MemoryMarshal.GetReference(dest); + ref float dBase = ref MemoryMarshal.GetReference(destination); Shuffle.InverseMMShuffle(control, out uint p3, out uint p2, out uint p1, out uint p0); for (nuint i = 0; i < (uint)source.Length; i += 4) @@ -158,69 +158,69 @@ internal static partial class SimdUtils } [Conditional("DEBUG")] - internal static void VerifyShuffle4SpanInput(ReadOnlySpan source, Span dest) + internal static void VerifyShuffle4SpanInput(ReadOnlySpan source, Span destination) where T : struct { DebugGuard.IsTrue( - source.Length == dest.Length, + source.Length == destination.Length, nameof(source), "Input spans must be of same length!"); DebugGuard.IsTrue( source.Length % 4 == 0, nameof(source), - "Input spans must be divisable by 4!"); + "Input spans must be divisible by 4!"); } [Conditional("DEBUG")] - private static void VerifyShuffle3SpanInput(ReadOnlySpan source, Span dest) + private static void VerifyShuffle3SpanInput(ReadOnlySpan source, Span destination) where T : struct { DebugGuard.IsTrue( - source.Length <= dest.Length, + source.Length <= destination.Length, nameof(source), - "Source should fit into dest!"); + "Source should fit into destination!"); DebugGuard.IsTrue( source.Length % 3 == 0, nameof(source), - "Input spans must be divisable by 3!"); + "Input spans must be divisible by 3!"); } [Conditional("DEBUG")] - private static void VerifyPad3Shuffle4SpanInput(ReadOnlySpan source, Span dest) + private static void VerifyPad3Shuffle4SpanInput(ReadOnlySpan source, Span destination) { DebugGuard.IsTrue( source.Length % 3 == 0, nameof(source), - "Input span must be divisable by 3!"); + "Input span must be divisible by 3!"); DebugGuard.IsTrue( - dest.Length % 4 == 0, - nameof(dest), - "Output span must be divisable by 4!"); + destination.Length % 4 == 0, + nameof(destination), + "Output span must be divisible by 4!"); DebugGuard.IsTrue( - source.Length == dest.Length * 3 / 4, + source.Length == destination.Length * 3 / 4, nameof(source), "Input span must be 3/4 the length of the output span!"); } [Conditional("DEBUG")] - private static void VerifyShuffle4Slice3SpanInput(ReadOnlySpan source, Span dest) + private static void VerifyShuffle4Slice3SpanInput(ReadOnlySpan source, Span destination) { DebugGuard.IsTrue( source.Length % 4 == 0, nameof(source), - "Input span must be divisable by 4!"); + "Input span must be divisible by 4!"); DebugGuard.IsTrue( - dest.Length % 3 == 0, - nameof(dest), - "Output span must be divisable by 3!"); + destination.Length % 3 == 0, + nameof(destination), + "Output span must be divisible by 3!"); DebugGuard.IsTrue( - dest.Length >= source.Length * 3 / 4, + destination.Length >= source.Length * 3 / 4, nameof(source), "Output span must be at least 3/4 the length of the input span!"); } @@ -509,6 +509,27 @@ internal static partial class SimdUtils } } + [MethodImpl(InliningOptions.ShortMethod)] + public static void MMShuffleSpan(ref Span span, byte control) + { + InverseMMShuffle( + control, + out uint p3, + out uint p2, + out uint p1, + out uint p0); + + ref int spanBase = ref MemoryMarshal.GetReference(span); + + for (nuint i = 0; i < (uint)span.Length; i += 4) + { + Unsafe.Add(ref spanBase, i + 0) = (int)(p0 + i); + Unsafe.Add(ref spanBase, i + 1) = (int)(p1 + i); + Unsafe.Add(ref spanBase, i + 2) = (int)(p2 + i); + Unsafe.Add(ref spanBase, i + 3) = (int)(p3 + i); + } + } + [MethodImpl(InliningOptions.ShortMethod)] public static void InverseMMShuffle( byte control, diff --git a/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs b/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs index c98be1fcd..927992686 100644 --- a/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs +++ b/src/ImageSharp/Formats/Webp/BitWriter/BitWriterBase.cs @@ -1,7 +1,6 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -using SixLabors.ImageSharp.Common.Helpers; using SixLabors.ImageSharp.Formats.Webp.Chunks; using SixLabors.ImageSharp.Metadata.Profiles.Exif; using SixLabors.ImageSharp.Metadata.Profiles.Icc; diff --git a/src/ImageSharp/Formats/Webp/Chunks/WebpAnimationParameter.cs b/src/ImageSharp/Formats/Webp/Chunks/WebpAnimationParameter.cs index 3855a293c..cff9f47af 100644 --- a/src/ImageSharp/Formats/Webp/Chunks/WebpAnimationParameter.cs +++ b/src/ImageSharp/Formats/Webp/Chunks/WebpAnimationParameter.cs @@ -2,7 +2,6 @@ // Licensed under the Six Labors Split License. using System.Buffers.Binary; -using SixLabors.ImageSharp.Common.Helpers; namespace SixLabors.ImageSharp.Formats.Webp.Chunks; diff --git a/src/ImageSharp/Formats/Webp/Chunks/WebpFrameData.cs b/src/ImageSharp/Formats/Webp/Chunks/WebpFrameData.cs index 5ed7aab1e..c8ff579a8 100644 --- a/src/ImageSharp/Formats/Webp/Chunks/WebpFrameData.cs +++ b/src/ImageSharp/Formats/Webp/Chunks/WebpFrameData.cs @@ -1,8 +1,6 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -using SixLabors.ImageSharp.Common.Helpers; - namespace SixLabors.ImageSharp.Formats.Webp.Chunks; internal readonly struct WebpFrameData diff --git a/src/ImageSharp/Formats/Webp/Chunks/WebpVp8X.cs b/src/ImageSharp/Formats/Webp/Chunks/WebpVp8X.cs index 70d6870ce..f781d6114 100644 --- a/src/ImageSharp/Formats/Webp/Chunks/WebpVp8X.cs +++ b/src/ImageSharp/Formats/Webp/Chunks/WebpVp8X.cs @@ -1,8 +1,6 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -using SixLabors.ImageSharp.Common.Helpers; - namespace SixLabors.ImageSharp.Formats.Webp.Chunks; internal readonly struct WebpVp8X diff --git a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs index 518c09ff4..f15cb3eb5 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs @@ -6,7 +6,6 @@ using System.Buffers; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; -using SixLabors.ImageSharp.Common.Helpers; using SixLabors.ImageSharp.Formats.Webp.BitWriter; using SixLabors.ImageSharp.Formats.Webp.Chunks; using SixLabors.ImageSharp.Memory; diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs index 2b74c300a..6e9e4f9cd 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs @@ -4,7 +4,6 @@ using System.Buffers; using System.Runtime.CompilerServices; -using SixLabors.ImageSharp.Common.Helpers; using SixLabors.ImageSharp.Formats.Webp.BitWriter; using SixLabors.ImageSharp.Formats.Webp.Chunks; using SixLabors.ImageSharp.Memory; diff --git a/src/ImageSharp/Common/Helpers/RiffHelper.cs b/src/ImageSharp/Formats/Webp/RiffHelper.cs similarity index 98% rename from src/ImageSharp/Common/Helpers/RiffHelper.cs rename to src/ImageSharp/Formats/Webp/RiffHelper.cs index 8f06e5886..d3862ea8b 100644 --- a/src/ImageSharp/Common/Helpers/RiffHelper.cs +++ b/src/ImageSharp/Formats/Webp/RiffHelper.cs @@ -4,7 +4,7 @@ using System.Buffers.Binary; using System.Text; -namespace SixLabors.ImageSharp.Common.Helpers; +namespace SixLabors.ImageSharp.Formats.Webp; internal static class RiffHelper { diff --git a/src/ImageSharp/PixelFormats/Utils/Vector4Converters.RgbaCompatible.cs b/src/ImageSharp/PixelFormats/Utils/Vector4Converters.RgbaCompatible.cs index 3442a0807..8616ecb3b 100644 --- a/src/ImageSharp/PixelFormats/Utils/Vector4Converters.RgbaCompatible.cs +++ b/src/ImageSharp/PixelFormats/Utils/Vector4Converters.RgbaCompatible.cs @@ -28,7 +28,7 @@ internal static partial class Vector4Converters private static readonly int Vector4ConversionThreshold = CalculateVector4ConversionThreshold(); /// - /// Provides an efficient default implementation for + /// Provides an efficient default implementation for /// The method works by internally converting to a therefore it's not applicable for that type! /// [MethodImpl(InliningOptions.ShortMethod)] @@ -72,7 +72,7 @@ internal static partial class Vector4Converters } /// - /// Provides an efficient default implementation for + /// Provides an efficient default implementation for /// The method is works by internally converting to a therefore it's not applicable for that type! /// [MethodImpl(InliningOptions.ShortMethod)] @@ -102,16 +102,14 @@ internal static partial class Vector4Converters // For the opposite direction it's not easy to implement the trick used in RunRgba32CompatibleToVector4Conversion, // so let's allocate a temporary buffer as usually: - using (IMemoryOwner tempBuffer = configuration.MemoryAllocator.Allocate(count)) - { - Span tempSpan = tempBuffer.Memory.Span; + using IMemoryOwner tempBuffer = configuration.MemoryAllocator.Allocate(count); + Span tempSpan = tempBuffer.Memory.Span; - SimdUtils.NormalizedFloatToByteSaturate( - MemoryMarshal.Cast(sourceVectors), - MemoryMarshal.Cast(tempSpan)); + SimdUtils.NormalizedFloatToByteSaturate( + MemoryMarshal.Cast(sourceVectors), + MemoryMarshal.Cast(tempSpan)); - pixelOperations.FromRgba32(configuration, tempSpan, destPixels); - } + pixelOperations.FromRgba32(configuration, tempSpan, destPixels); } private static int CalculateVector4ConversionThreshold()