diff --git a/.editorconfig b/.editorconfig
index 2e3045fb1..c28089d72 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -172,6 +172,8 @@ dotnet_diagnostic.IDE0063.severity = suggestion
csharp_using_directive_placement = outside_namespace:warning
# Modifier preferences
csharp_prefer_static_local_function = true:warning
+# Primary constructor preferences
+csharp_style_prefer_primary_constructors = false:none
##########################################
# Unnecessary Code Rules
diff --git a/shared-infrastructure b/shared-infrastructure
index d65232bbb..1dbfb576c 160000
--- a/shared-infrastructure
+++ b/shared-infrastructure
@@ -1 +1 @@
-Subproject commit d65232bbbfe55a9a153b4058139dda5230e6eb4f
+Subproject commit 1dbfb576c83507645265c79e03369b66cdc0379f
diff --git a/src/ImageSharp.ruleset b/src/ImageSharp.ruleset
index f29278c95..b60989020 100644
--- a/src/ImageSharp.ruleset
+++ b/src/ImageSharp.ruleset
@@ -1,6 +1,4 @@
-
-
-
+
\ No newline at end of file
diff --git a/src/ImageSharp/Color/Color.cs b/src/ImageSharp/Color/Color.cs
index e61abf86f..82ecab390 100644
--- a/src/ImageSharp/Color/Color.cs
+++ b/src/ImageSharp/Color/Color.cs
@@ -98,10 +98,8 @@ public readonly partial struct Color : IEquatable
{
return new(pixel.ToScaledVector4());
}
- else
- {
- return new(pixel);
- }
+
+ return new(pixel);
}
///
@@ -250,15 +248,12 @@ public readonly partial struct Color : IEquatable
[MethodImpl(InliningOptions.ShortMethod)]
public string ToHex()
{
- Rgba32 rgba = default;
if (this.boxedHighPrecisionPixel is not null)
{
- this.boxedHighPrecisionPixel.ToRgba32(ref rgba);
- return rgba.ToHex();
+ return this.boxedHighPrecisionPixel.ToRgba32().ToHex();
}
- rgba.FromScaledVector4(this.data);
- return rgba.ToHex();
+ return Rgba32.FromScaledVector4(this.data).ToHex();
}
///
@@ -280,14 +275,10 @@ public readonly partial struct Color : IEquatable
if (this.boxedHighPrecisionPixel is null)
{
- pixel = default;
- pixel.FromScaledVector4(this.data);
- return pixel;
+ return TPixel.FromScaledVector4(this.data);
}
- pixel = default;
- pixel.FromScaledVector4(this.boxedHighPrecisionPixel.ToScaledVector4());
- return pixel;
+ return TPixel.FromScaledVector4(this.boxedHighPrecisionPixel.ToScaledVector4());
}
///
diff --git a/src/ImageSharp/Common/Helpers/ColorNumerics.cs b/src/ImageSharp/Common/Helpers/ColorNumerics.cs
index 553a7c2e8..1c30d857f 100644
--- a/src/ImageSharp/Common/Helpers/ColorNumerics.cs
+++ b/src/ImageSharp/Common/Helpers/ColorNumerics.cs
@@ -41,6 +41,34 @@ internal static class ColorNumerics
public static byte Get8BitBT709Luminance(byte r, byte g, byte b)
=> (byte)((r * .2126F) + (g * .7152F) + (b * .0722F) + 0.5F);
+ ///
+ /// Gets the luminance from the rgb components using the formula
+ /// as specified by ITU-R Recommendation BT.709.
+ ///
+ /// The red component.
+ /// The green component.
+ /// The blue component.
+ /// The .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte Get8BitBT709Luminance(ushort r, ushort g, ushort b)
+ => (byte)((From16BitTo8Bit(r) * .2126F) +
+ (From16BitTo8Bit(g) * .7152F) +
+ (From16BitTo8Bit(b) * .0722F) + 0.5F);
+
+ ///
+ /// Gets the luminance from the rgb components using the formula as
+ /// specified by ITU-R Recommendation BT.709.
+ ///
+ /// The red component.
+ /// The green component.
+ /// The blue component.
+ /// The .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort Get16BitBT709Luminance(byte r, byte g, byte b)
+ => (ushort)((From8BitTo16Bit(r) * .2126F) +
+ (From8BitTo16Bit(g) * .7152F) +
+ (From8BitTo16Bit(b) * .0722F) + 0.5F);
+
///
/// Gets the luminance from the rgb components using the formula as
/// specified by ITU-R Recommendation BT.709.
@@ -72,8 +100,8 @@ internal static class ColorNumerics
/// The 8 bit component value.
/// The
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static byte DownScaleFrom16BitTo8Bit(ushort component)
- {
+ public static byte From16BitTo8Bit(ushort component) =>
+
// To scale to 8 bits From a 16-bit value V the required value (from the PNG specification) is:
//
// (V * 255) / 65535
@@ -102,8 +130,7 @@ internal static class ColorNumerics
// An alternative arithmetic calculation which also gives no errors is:
//
// (V * 255 + 32895) >> 16
- return (byte)(((component * 255) + 32895) >> 16);
- }
+ (byte)(((component * 255) + 32895) >> 16);
///
/// Scales a value from an 8 bit to
@@ -112,7 +139,7 @@ internal static class ColorNumerics
/// The 8 bit component value.
/// The
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static ushort UpscaleFrom8BitTo16Bit(byte component)
+ public static ushort From8BitTo16Bit(byte component)
=> (ushort)(component * 257);
///
diff --git a/src/ImageSharp/Common/Helpers/Numerics.cs b/src/ImageSharp/Common/Helpers/Numerics.cs
index 293997c4d..ca28a7aab 100644
--- a/src/ImageSharp/Common/Helpers/Numerics.cs
+++ b/src/ImageSharp/Common/Helpers/Numerics.cs
@@ -5,7 +5,6 @@ using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
-using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.X86;
namespace SixLabors.ImageSharp;
@@ -61,6 +60,12 @@ internal static class Numerics
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static nint Modulo4(nint x) => x & 3;
+ ///
+ /// Calculates % 4
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static nuint Modulo4(nuint x) => x & 3;
+
///
/// Calculates % 8
///
diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs
index 683ac518b..c856267db 100644
--- a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs
+++ b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs
@@ -1,12 +1,6 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.
-using System.Buffers.Binary;
-using System.Numerics;
-using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
-using static SixLabors.ImageSharp.SimdUtils;
-
// The JIT can detect and optimize rotation idioms ROTL (Rotate Left)
// and ROTR (Rotate Right) emitting efficient CPU instructions:
// https://github.com/dotnet/coreclr/pull/1830
@@ -19,190 +13,24 @@ namespace SixLabors.ImageSharp;
internal interface IComponentShuffle
{
///
- /// Shuffles then slices 8-bit integers within 128-bit lanes in
- /// using the control and store the results in .
+ /// Shuffles then slices 8-bit integers in
+ /// using a byte control and store the results in .
+ /// If successful, this method will reduce the length of length
+ /// by the shuffle amount.
///
/// The source span of bytes.
- /// The destination span of bytes.
- void ShuffleReduce(ref ReadOnlySpan source, ref Span dest);
+ /// The destination span of bytes.
+ void ShuffleReduce(ref ReadOnlySpan source, ref Span destination);
///
- /// Shuffle 8-bit integers within 128-bit lanes in
- /// using the control and store the results in .
+ /// Shuffle 8-bit integers in
+ /// using the control and store the results in .
///
/// The source span of bytes.
- /// The destination span of bytes.
+ /// The destination span of bytes.
///
- /// Implementation can assume that source.Length is less or equal than dest.Length.
+ /// Implementation can assume that source.Length is less or equal than destination.Length.
/// Loops should iterate using source.Length.
///
- void RunFallbackShuffle(ReadOnlySpan source, Span dest);
-}
-
-///
-internal interface IShuffle4 : IComponentShuffle
-{
-}
-
-internal readonly struct DefaultShuffle4 : IShuffle4
-{
- public DefaultShuffle4(byte control)
- => this.Control = control;
-
- public byte Control { get; }
-
- [MethodImpl(InliningOptions.ShortMethod)]
- public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest)
- => HwIntrinsics.Shuffle4Reduce(ref source, ref dest, this.Control);
-
- [MethodImpl(InliningOptions.ShortMethod)]
- public void RunFallbackShuffle(ReadOnlySpan source, Span dest)
- {
- ref byte sBase = ref MemoryMarshal.GetReference(source);
- ref byte dBase = ref MemoryMarshal.GetReference(dest);
-
- Shuffle.InverseMMShuffle(this.Control, out uint p3, out uint p2, out uint p1, out uint p0);
-
- for (nuint i = 0; i < (uint)source.Length; i += 4)
- {
- Unsafe.Add(ref dBase, i + 0) = Unsafe.Add(ref sBase, p0 + i);
- Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i);
- Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i);
- Unsafe.Add(ref dBase, i + 3) = Unsafe.Add(ref sBase, p3 + i);
- }
- }
-}
-
-internal readonly struct WXYZShuffle4 : IShuffle4
-{
- [MethodImpl(InliningOptions.ShortMethod)]
- public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest)
- => HwIntrinsics.Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle2103);
-
- [MethodImpl(InliningOptions.ShortMethod)]
- public void RunFallbackShuffle(ReadOnlySpan source, Span dest)
- {
- ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source));
- ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest));
- uint n = (uint)source.Length / 4;
-
- for (nuint i = 0; i < n; i++)
- {
- uint packed = Unsafe.Add(ref sBase, i);
-
- // packed = [W Z Y X]
- // ROTL(8, packed) = [Z Y X W]
- Unsafe.Add(ref dBase, i) = (packed << 8) | (packed >> 24);
- }
- }
-}
-
-internal readonly struct WZYXShuffle4 : IShuffle4
-{
- [MethodImpl(InliningOptions.ShortMethod)]
- public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest)
- => HwIntrinsics.Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle0123);
-
- [MethodImpl(InliningOptions.ShortMethod)]
- public void RunFallbackShuffle(ReadOnlySpan source, Span dest)
- {
- ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source));
- ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest));
- uint n = (uint)source.Length / 4;
-
- for (nuint i = 0; i < n; i++)
- {
- uint packed = Unsafe.Add(ref sBase, i);
-
- // packed = [W Z Y X]
- // REVERSE(packedArgb) = [X Y Z W]
- Unsafe.Add(ref dBase, i) = BinaryPrimitives.ReverseEndianness(packed);
- }
- }
-}
-
-internal readonly struct YZWXShuffle4 : IShuffle4
-{
- [MethodImpl(InliningOptions.ShortMethod)]
- public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest)
- => HwIntrinsics.Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle0321);
-
- [MethodImpl(InliningOptions.ShortMethod)]
- public void RunFallbackShuffle(ReadOnlySpan source, Span dest)
- {
- ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source));
- ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest));
- uint n = (uint)source.Length / 4;
-
- for (nuint i = 0; i < n; i++)
- {
- uint packed = Unsafe.Add(ref sBase, i);
-
- // packed = [W Z Y X]
- // ROTR(8, packedArgb) = [Y Z W X]
- Unsafe.Add(ref dBase, i) = BitOperations.RotateRight(packed, 8);
- }
- }
-}
-
-internal readonly struct ZYXWShuffle4 : IShuffle4
-{
- [MethodImpl(InliningOptions.ShortMethod)]
- public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest)
- => HwIntrinsics.Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle3012);
-
- [MethodImpl(InliningOptions.ShortMethod)]
- public void RunFallbackShuffle(ReadOnlySpan source, Span dest)
- {
- ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source));
- ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest));
- uint n = (uint)source.Length / 4;
-
- for (nuint i = 0; i < n; i++)
- {
- uint packed = Unsafe.Add(ref sBase, i);
-
- // packed = [W Z Y X]
- // tmp1 = [W 0 Y 0]
- // tmp2 = [0 Z 0 X]
- // tmp3=ROTL(16, tmp2) = [0 X 0 Z]
- // tmp1 + tmp3 = [W X Y Z]
- uint tmp1 = packed & 0xFF00FF00;
- uint tmp2 = packed & 0x00FF00FF;
- uint tmp3 = BitOperations.RotateLeft(tmp2, 16);
-
- Unsafe.Add(ref dBase, i) = tmp1 + tmp3;
- }
- }
-}
-
-internal readonly struct XWZYShuffle4 : IShuffle4
-{
- [MethodImpl(InliningOptions.ShortMethod)]
- public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest)
- => HwIntrinsics.Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle1230);
-
- [MethodImpl(InliningOptions.ShortMethod)]
- public void RunFallbackShuffle(ReadOnlySpan source, Span dest)
- {
- ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source));
- ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest));
- uint n = (uint)source.Length / 4;
-
- for (nuint i = 0; i < n; i++)
- {
- uint packed = Unsafe.Add(ref sBase, i);
-
- // packed = [W Z Y X]
- // tmp1 = [0 Z 0 X]
- // tmp2 = [W 0 Y 0]
- // tmp3=ROTL(16, tmp2) = [Y 0 W 0]
- // tmp1 + tmp3 = [Y Z W X]
- uint tmp1 = packed & 0x00FF00FF;
- uint tmp2 = packed & 0xFF00FF00;
- uint tmp3 = BitOperations.RotateLeft(tmp2, 16);
-
- Unsafe.Add(ref dBase, i) = tmp1 + tmp3;
- }
- }
+ void Shuffle(ReadOnlySpan source, Span destination);
}
diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs
index 6cf6eef08..0f282c7f9 100644
--- a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs
+++ b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs
@@ -1,6 +1,7 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.
+using System.Diagnostics.CodeAnalysis;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using static SixLabors.ImageSharp.SimdUtils;
@@ -12,24 +13,23 @@ internal interface IPad3Shuffle4 : IComponentShuffle
{
}
-internal readonly struct DefaultPad3Shuffle4 : IPad3Shuffle4
+internal readonly struct DefaultPad3Shuffle4([ConstantExpected] byte control) : IPad3Shuffle4
{
- public DefaultPad3Shuffle4(byte control)
- => this.Control = control;
-
- public byte Control { get; }
+ public byte Control { get; } = control;
[MethodImpl(InliningOptions.ShortMethod)]
- public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest)
- => HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref dest, this.Control);
+ public void ShuffleReduce(ref ReadOnlySpan source, ref Span destination)
+#pragma warning disable CA1857 // A constant is expected for the parameter
+ => HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref destination, this.Control);
+#pragma warning restore CA1857 // A constant is expected for the parameter
[MethodImpl(InliningOptions.ShortMethod)]
- public void RunFallbackShuffle(ReadOnlySpan source, Span dest)
+ public void Shuffle(ReadOnlySpan source, Span destination)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
- ref byte dBase = ref MemoryMarshal.GetReference(dest);
+ ref byte dBase = ref MemoryMarshal.GetReference(destination);
- Shuffle.InverseMMShuffle(this.Control, out uint p3, out uint p2, out uint p1, out uint p0);
+ SimdUtils.Shuffle.InverseMMShuffle(this.Control, out uint p3, out uint p2, out uint p1, out uint p0);
Span temp = stackalloc byte[4];
ref byte t = ref MemoryMarshal.GetReference(temp);
@@ -51,14 +51,14 @@ internal readonly struct DefaultPad3Shuffle4 : IPad3Shuffle4
internal readonly struct XYZWPad3Shuffle4 : IPad3Shuffle4
{
[MethodImpl(InliningOptions.ShortMethod)]
- public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest)
- => HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle3210);
+ public void ShuffleReduce(ref ReadOnlySpan source, ref Span destination)
+ => HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref destination, SimdUtils.Shuffle.MMShuffle3210);
[MethodImpl(InliningOptions.ShortMethod)]
- public void RunFallbackShuffle(ReadOnlySpan source, Span dest)
+ public void Shuffle(ReadOnlySpan source, Span destination)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
- ref byte dBase = ref MemoryMarshal.GetReference(dest);
+ ref byte dBase = ref MemoryMarshal.GetReference(destination);
ref byte sEnd = ref Unsafe.Add(ref sBase, (uint)source.Length);
ref byte sLoopEnd = ref Unsafe.Subtract(ref sEnd, 4);
diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs
index 2cd586212..3c0973ad6 100644
--- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs
+++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs
@@ -1,6 +1,7 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.
+using System.Diagnostics.CodeAnalysis;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using static SixLabors.ImageSharp.SimdUtils;
@@ -12,24 +13,23 @@ internal interface IShuffle3 : IComponentShuffle
{
}
-internal readonly struct DefaultShuffle3 : IShuffle3
+internal readonly struct DefaultShuffle3([ConstantExpected] byte control) : IShuffle3
{
- public DefaultShuffle3(byte control)
- => this.Control = control;
-
- public byte Control { get; }
+ public byte Control { get; } = control;
[MethodImpl(InliningOptions.ShortMethod)]
- public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest)
- => HwIntrinsics.Shuffle3Reduce(ref source, ref dest, this.Control);
+ public void ShuffleReduce(ref ReadOnlySpan source, ref Span destination)
+#pragma warning disable CA1857 // A constant is expected for the parameter
+ => HwIntrinsics.Shuffle3Reduce(ref source, ref destination, this.Control);
+#pragma warning restore CA1857 // A constant is expected for the parameter
[MethodImpl(InliningOptions.ShortMethod)]
- public void RunFallbackShuffle(ReadOnlySpan source, Span dest)
+ public void Shuffle(ReadOnlySpan source, Span destination)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
- ref byte dBase = ref MemoryMarshal.GetReference(dest);
+ ref byte dBase = ref MemoryMarshal.GetReference(destination);
- Shuffle.InverseMMShuffle(this.Control, out _, out uint p2, out uint p1, out uint p0);
+ SimdUtils.Shuffle.InverseMMShuffle(this.Control, out _, out uint p2, out uint p1, out uint p0);
for (nuint i = 0; i < (uint)source.Length; i += 3)
{
diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4.cs
new file mode 100644
index 000000000..d5c6df2c8
--- /dev/null
+++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4.cs
@@ -0,0 +1,178 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using System.Buffers.Binary;
+using System.Diagnostics.CodeAnalysis;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using static SixLabors.ImageSharp.SimdUtils;
+
+namespace SixLabors.ImageSharp;
+
+///
+internal interface IShuffle4 : IComponentShuffle
+{
+}
+
+internal readonly struct DefaultShuffle4([ConstantExpected] byte control) : IShuffle4
+{
+ public byte Control { get; } = control;
+
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public void ShuffleReduce(ref ReadOnlySpan source, ref Span destination)
+#pragma warning disable CA1857 // A constant is expected for the parameter
+ => HwIntrinsics.Shuffle4Reduce(ref source, ref destination, this.Control);
+#pragma warning restore CA1857 // A constant is expected for the parameter
+
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public void Shuffle(ReadOnlySpan source, Span destination)
+ {
+ ref byte sBase = ref MemoryMarshal.GetReference(source);
+ ref byte dBase = ref MemoryMarshal.GetReference(destination);
+
+ SimdUtils.Shuffle.InverseMMShuffle(this.Control, out uint p3, out uint p2, out uint p1, out uint p0);
+
+ for (nuint i = 0; i < (uint)source.Length; i += 4)
+ {
+ Unsafe.Add(ref dBase, i + 0) = Unsafe.Add(ref sBase, p0 + i);
+ Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i);
+ Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i);
+ Unsafe.Add(ref dBase, i + 3) = Unsafe.Add(ref sBase, p3 + i);
+ }
+ }
+}
+
+internal readonly struct WXYZShuffle4 : IShuffle4
+{
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public void ShuffleReduce(ref ReadOnlySpan source, ref Span destination)
+ => HwIntrinsics.Shuffle4Reduce(ref source, ref destination, SimdUtils.Shuffle.MMShuffle2103);
+
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public void Shuffle(ReadOnlySpan source, Span destination)
+ {
+ ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source));
+ ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(destination));
+ uint n = (uint)source.Length / 4;
+
+ for (nuint i = 0; i < n; i++)
+ {
+ uint packed = Unsafe.Add(ref sBase, i);
+
+ // packed = [W Z Y X]
+ // ROTL(8, packed) = [Z Y X W]
+ Unsafe.Add(ref dBase, i) = (packed << 8) | (packed >> 24);
+ }
+ }
+}
+
+internal readonly struct WZYXShuffle4 : IShuffle4
+{
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public void ShuffleReduce(ref ReadOnlySpan source, ref Span destination)
+ => HwIntrinsics.Shuffle4Reduce(ref source, ref destination, SimdUtils.Shuffle.MMShuffle0123);
+
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public void Shuffle(ReadOnlySpan source, Span destination)
+ {
+ ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source));
+ ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(destination));
+ uint n = (uint)source.Length / 4;
+
+ for (nuint i = 0; i < n; i++)
+ {
+ uint packed = Unsafe.Add(ref sBase, i);
+
+ // packed = [W Z Y X]
+ // REVERSE(packedArgb) = [X Y Z W]
+ Unsafe.Add(ref dBase, i) = BinaryPrimitives.ReverseEndianness(packed);
+ }
+ }
+}
+
+internal readonly struct YZWXShuffle4 : IShuffle4
+{
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public void ShuffleReduce(ref ReadOnlySpan source, ref Span destination)
+ => HwIntrinsics.Shuffle4Reduce(ref source, ref destination, SimdUtils.Shuffle.MMShuffle0321);
+
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public void Shuffle(ReadOnlySpan source, Span destination)
+ {
+ ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source));
+ ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(destination));
+ uint n = (uint)source.Length / 4;
+
+ for (nuint i = 0; i < n; i++)
+ {
+ uint packed = Unsafe.Add(ref sBase, i);
+
+ // packed = [W Z Y X]
+ // ROTR(8, packedArgb) = [Y Z W X]
+ Unsafe.Add(ref dBase, i) = BitOperations.RotateRight(packed, 8);
+ }
+ }
+}
+
+internal readonly struct ZYXWShuffle4 : IShuffle4
+{
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public void ShuffleReduce(ref ReadOnlySpan source, ref Span destination)
+ => HwIntrinsics.Shuffle4Reduce(ref source, ref destination, SimdUtils.Shuffle.MMShuffle3012);
+
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public void Shuffle(ReadOnlySpan source, Span destination)
+ {
+ ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source));
+ ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(destination));
+ uint n = (uint)source.Length / 4;
+
+ for (nuint i = 0; i < n; i++)
+ {
+ uint packed = Unsafe.Add(ref sBase, i);
+
+ // packed = [W Z Y X]
+ // tmp1 = [W 0 Y 0]
+ // tmp2 = [0 Z 0 X]
+ // tmp3=ROTL(16, tmp2) = [0 X 0 Z]
+ // tmp1 + tmp3 = [W X Y Z]
+ uint tmp1 = packed & 0xFF00FF00;
+ uint tmp2 = packed & 0x00FF00FF;
+ uint tmp3 = BitOperations.RotateLeft(tmp2, 16);
+
+ Unsafe.Add(ref dBase, i) = tmp1 + tmp3;
+ }
+ }
+}
+
+internal readonly struct XWZYShuffle4 : IShuffle4
+{
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public void ShuffleReduce(ref ReadOnlySpan source, ref Span destination)
+ => HwIntrinsics.Shuffle4Reduce(ref source, ref destination, SimdUtils.Shuffle.MMShuffle1230);
+
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public void Shuffle(ReadOnlySpan source, Span destination)
+ {
+ ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source));
+ ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(destination));
+ uint n = (uint)source.Length / 4;
+
+ for (nuint i = 0; i < n; i++)
+ {
+ uint packed = Unsafe.Add(ref sBase, i);
+
+ // packed = [W Z Y X]
+ // tmp1 = [0 Z 0 X]
+ // tmp2 = [W 0 Y 0]
+ // tmp3=ROTL(16, tmp2) = [Y 0 W 0]
+ // tmp1 + tmp3 = [Y Z W X]
+ uint tmp1 = packed & 0x00FF00FF;
+ uint tmp2 = packed & 0xFF00FF00;
+ uint tmp3 = BitOperations.RotateLeft(tmp2, 16);
+
+ Unsafe.Add(ref dBase, i) = tmp1 + tmp3;
+ }
+ }
+}
diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs
index 5e82973e3..3e7e44066 100644
--- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs
+++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs
@@ -1,6 +1,7 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.
+using System.Diagnostics.CodeAnalysis;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using static SixLabors.ImageSharp.SimdUtils;
@@ -12,26 +13,25 @@ internal interface IShuffle4Slice3 : IComponentShuffle
{
}
-internal readonly struct DefaultShuffle4Slice3 : IShuffle4Slice3
+internal readonly struct DefaultShuffle4Slice3([ConstantExpected] byte control) : IShuffle4Slice3
{
- public DefaultShuffle4Slice3(byte control)
- => this.Control = control;
-
- public byte Control { get; }
+ public byte Control { get; } = control;
[MethodImpl(InliningOptions.ShortMethod)]
- public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest)
- => HwIntrinsics.Shuffle4Slice3Reduce(ref source, ref dest, this.Control);
+ public void ShuffleReduce(ref ReadOnlySpan source, ref Span destination)
+#pragma warning disable CA1857 // A constant is expected for the parameter
+ => HwIntrinsics.Shuffle4Slice3Reduce(ref source, ref destination, this.Control);
+#pragma warning restore CA1857 // A constant is expected for the parameter
[MethodImpl(InliningOptions.ShortMethod)]
- public void RunFallbackShuffle(ReadOnlySpan source, Span dest)
+ public void Shuffle(ReadOnlySpan source, Span destination)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
- ref byte dBase = ref MemoryMarshal.GetReference(dest);
+ ref byte dBase = ref MemoryMarshal.GetReference(destination);
- Shuffle.InverseMMShuffle(this.Control, out _, out uint p2, out uint p1, out uint p0);
+ SimdUtils.Shuffle.InverseMMShuffle(this.Control, out _, out uint p2, out uint p1, out uint p0);
- for (nuint i = 0, j = 0; i < (uint)dest.Length; i += 3, j += 4)
+ for (nuint i = 0, j = 0; i < (uint)destination.Length; i += 3, j += 4)
{
Unsafe.Add(ref dBase, i + 0) = Unsafe.Add(ref sBase, p0 + j);
Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + j);
@@ -43,14 +43,14 @@ internal readonly struct DefaultShuffle4Slice3 : IShuffle4Slice3
internal readonly struct XYZWShuffle4Slice3 : IShuffle4Slice3
{
[MethodImpl(InliningOptions.ShortMethod)]
- public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest)
- => HwIntrinsics.Shuffle4Slice3Reduce(ref source, ref dest, Shuffle.MMShuffle3210);
+ public void ShuffleReduce(ref ReadOnlySpan source, ref Span destination)
+ => HwIntrinsics.Shuffle4Slice3Reduce(ref source, ref destination, SimdUtils.Shuffle.MMShuffle3210);
[MethodImpl(InliningOptions.ShortMethod)]
- public void RunFallbackShuffle(ReadOnlySpan source, Span dest)
+ public void Shuffle(ReadOnlySpan source, Span destination)
{
ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source));
- ref Byte3 dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest));
+ ref Byte3 dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(destination));
nint n = (nint)(uint)source.Length / 4;
nint m = Numerics.Modulo4(n);
diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
index ad079b52e..f27852a82 100644
--- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
+++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
@@ -8,6 +8,7 @@ using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.X86;
+using SixLabors.ImageSharp.Common.Helpers;
using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp;
@@ -51,22 +52,34 @@ internal static partial class SimdUtils
///
/// Shuffle single-precision (32-bit) floating-point elements in
- /// using the control and store the results in .
+ /// using the control and store the results in .
///
/// The source span of floats.
- /// The destination span of floats.
+ /// The destination span of floats.
/// The byte control.
[MethodImpl(InliningOptions.ShortMethod)]
public static void Shuffle4Reduce(
ref ReadOnlySpan source,
- ref Span dest,
+ ref Span destination,
[ConstantExpected] byte control)
{
- if (Avx.IsSupported || Sse.IsSupported)
+ if ((Vector512.IsHardwareAccelerated && Vector512Utilities.SupportsShuffleFloat) ||
+ (Vector256.IsHardwareAccelerated && Vector256Utilities.SupportsShuffleFloat) ||
+ (Vector128.IsHardwareAccelerated && Vector128Utilities.SupportsShuffleFloat))
{
- int remainder = Avx.IsSupported
- ? Numerics.ModuloP2(source.Length, Vector256.Count)
- : Numerics.ModuloP2(source.Length, Vector128.Count);
+ int remainder = 0;
+ if (Vector512.IsHardwareAccelerated)
+ {
+ remainder = Numerics.ModuloP2(source.Length, Vector512.Count);
+ }
+ else if (Vector256.IsHardwareAccelerated)
+ {
+ remainder = Numerics.ModuloP2(source.Length, Vector256.Count);
+ }
+ else if (Vector128.IsHardwareAccelerated)
+ {
+ remainder = Numerics.ModuloP2(source.Length, Vector128.Count);
+ }
int adjustedCount = source.Length - remainder;
@@ -74,33 +87,45 @@ internal static partial class SimdUtils
{
Shuffle4(
source[..adjustedCount],
- dest[..adjustedCount],
+ destination[..adjustedCount],
control);
source = source[adjustedCount..];
- dest = dest[adjustedCount..];
+ destination = destination[adjustedCount..];
}
}
}
///
- /// Shuffle 8-bit integers within 128-bit lanes in
- /// using the control and store the results in .
+ /// Shuffle 8-bit integers
+ /// using the control and store the results in .
///
/// The source span of bytes.
- /// The destination span of bytes.
+ /// The destination span of bytes.
/// The byte control.
[MethodImpl(InliningOptions.ShortMethod)]
public static void Shuffle4Reduce(
ref ReadOnlySpan source,
- ref Span dest,
- byte control)
+ ref Span destination,
+ [ConstantExpected] byte control)
{
- if (Avx2.IsSupported || Ssse3.IsSupported)
+ if ((Vector512.IsHardwareAccelerated && Vector512Utilities.SupportsShuffleByte) ||
+ (Vector256.IsHardwareAccelerated && Vector256Utilities.SupportsShuffleByte) ||
+ (Vector128.IsHardwareAccelerated && Vector128Utilities.SupportsShuffleByte))
{
- int remainder = Avx2.IsSupported
- ? Numerics.ModuloP2(source.Length, Vector256.Count)
- : Numerics.ModuloP2(source.Length, Vector128.Count);
+ int remainder = 0;
+ if (Vector512.IsHardwareAccelerated)
+ {
+ remainder = Numerics.ModuloP2(source.Length, Vector512.Count);
+ }
+ else if (Vector256.IsHardwareAccelerated)
+ {
+ remainder = Numerics.ModuloP2(source.Length, Vector256.Count);
+ }
+ else if (Vector128.IsHardwareAccelerated)
+ {
+ remainder = Numerics.ModuloP2(source.Length, Vector128.Count);
+ }
int adjustedCount = source.Length - remainder;
@@ -108,29 +133,29 @@ internal static partial class SimdUtils
{
Shuffle4(
source[..adjustedCount],
- dest[..adjustedCount],
+ destination[..adjustedCount],
control);
source = source[adjustedCount..];
- dest = dest[adjustedCount..];
+ destination = destination[adjustedCount..];
}
}
}
///
- /// Shuffles 8-bit integer triplets within 128-bit lanes in
- /// using the control and store the results in .
+ /// Shuffles 8-bit integer triplets in
+ /// using the control and store the results in .
///
/// The source span of bytes.
- /// The destination span of bytes.
+ /// The destination span of bytes.
/// The byte control.
[MethodImpl(InliningOptions.ShortMethod)]
public static void Shuffle3Reduce(
ref ReadOnlySpan source,
- ref Span dest,
- byte control)
+ ref Span destination,
+ [ConstantExpected] byte control)
{
- if (Ssse3.IsSupported)
+ if (Vector128.IsHardwareAccelerated && Vector128Utilities.SupportsShuffleByte && Vector128Utilities.SupportsRightAlign)
{
int remainder = source.Length % (Vector128.Count * 3);
@@ -140,77 +165,77 @@ internal static partial class SimdUtils
{
Shuffle3(
source[..adjustedCount],
- dest[..adjustedCount],
+ destination[..adjustedCount],
control);
source = source[adjustedCount..];
- dest = dest[adjustedCount..];
+ destination = destination[adjustedCount..];
}
}
}
///
- /// Pads then shuffles 8-bit integers within 128-bit lanes in
- /// using the control and store the results in .
+ /// Pads then shuffles 8-bit integers in
+ /// using the control and store the results in .
///
/// The source span of bytes.
- /// The destination span of bytes.
+ /// The destination span of bytes.
/// The byte control.
[MethodImpl(InliningOptions.ShortMethod)]
public static void Pad3Shuffle4Reduce(
ref ReadOnlySpan source,
- ref Span dest,
- byte control)
+ ref Span destination,
+ [ConstantExpected] byte control)
{
- if (Ssse3.IsSupported)
+ if (Vector128.IsHardwareAccelerated && Vector128Utilities.SupportsShuffleByte && Vector128Utilities.SupportsShiftByte)
{
int remainder = source.Length % (Vector128.Count * 3);
int sourceCount = source.Length - remainder;
- int destCount = (int)((uint)sourceCount * 4 / 3);
+ int destinationCount = (int)((uint)sourceCount * 4 / 3);
if (sourceCount > 0)
{
Pad3Shuffle4(
source[..sourceCount],
- dest[..destCount],
+ destination[..destinationCount],
control);
source = source[sourceCount..];
- dest = dest[destCount..];
+ destination = destination[destinationCount..];
}
}
}
///
- /// Shuffles then slices 8-bit integers within 128-bit lanes in
- /// using the control and store the results in .
+ /// Shuffles then slices 8-bit integers in
+ /// using the control and store the results in .
///
/// The source span of bytes.
- /// The destination span of bytes.
+ /// The destination span of bytes.
/// The byte control.
[MethodImpl(InliningOptions.ShortMethod)]
public static void Shuffle4Slice3Reduce(
ref ReadOnlySpan source,
- ref Span dest,
- byte control)
+ ref Span destination,
+ [ConstantExpected] byte control)
{
- if (Ssse3.IsSupported)
+ if (Vector128.IsHardwareAccelerated && Vector128Utilities.SupportsShuffleByte && Vector128Utilities.SupportsShiftByte)
{
int remainder = source.Length & ((Vector128.Count * 4) - 1); // bit-hack for modulo
int sourceCount = source.Length - remainder;
- int destCount = (int)((uint)sourceCount * 3 / 4);
+ int destinationCount = (int)((uint)sourceCount * 3 / 4);
if (sourceCount > 0)
{
Shuffle4Slice3(
source[..sourceCount],
- dest[..destCount],
+ destination[..destinationCount],
control);
source = source[sourceCount..];
- dest = dest[destCount..];
+ destination = destination[destinationCount..];
}
}
}
@@ -218,76 +243,90 @@ internal static partial class SimdUtils
[MethodImpl(InliningOptions.ShortMethod)]
private static void Shuffle4(
ReadOnlySpan source,
- Span dest,
+ Span destination,
[ConstantExpected] byte control)
{
- if (Avx.IsSupported)
+ if (Vector512.IsHardwareAccelerated && Vector512Utilities.SupportsShuffleFloat)
{
- ref Vector256 sourceBase =
- ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
- ref Vector256 destBase =
- ref Unsafe.As>(ref MemoryMarshal.GetReference(dest));
+ nuint n = (uint)destination.Length / (uint)Vector512.Count;
+ nuint m = Numerics.Modulo4(n);
+ nuint u = n - m;
- nint n = (nint)dest.Vector256Count();
- nint m = Numerics.Modulo4(n);
- nint u = n - m;
-
- for (nint i = 0; i < u; i += 4)
+ for (nuint i = 0; i < u; i += 4)
{
- ref Vector256 vd0 = ref Unsafe.Add(ref destBase, i);
- ref Vector256 vs0 = ref Unsafe.Add(ref sourceBase, i);
+ ref Vector512 vs0 = ref Unsafe.Add(ref sourceBase, i);
+ ref Vector512 vd0 = ref Unsafe.Add(ref destinationBase, i);
- vd0 = Avx.Permute(vs0, control);
- Unsafe.Add(ref vd0, 1) = Avx.Permute(Unsafe.Add(ref vs0, 1), control);
- Unsafe.Add(ref vd0, 2) = Avx.Permute(Unsafe.Add(ref vs0, 2), control);
- Unsafe.Add(ref vd0, 3) = Avx.Permute(Unsafe.Add(ref vs0, 3), control);
+ vd0 = Vector512Utilities.Shuffle(vs0, control);
+ Unsafe.Add(ref vd0, (nuint)1) = Vector512Utilities.Shuffle(Unsafe.Add(ref vs0, (nuint)1), control);
+ Unsafe.Add(ref vd0, (nuint)2) = Vector512Utilities.Shuffle(Unsafe.Add(ref vs0, (nuint)2), control);
+ Unsafe.Add(ref vd0, (nuint)3) = Vector512Utilities.Shuffle(Unsafe.Add(ref vs0, (nuint)3), control);
}
if (m > 0)
{
- for (nint i = u; i < n; i++)
+ for (nuint i = u; i < n; i++)
{
- Unsafe.Add(ref destBase, i) = Avx.Permute(Unsafe.Add(ref sourceBase, i), control);
+ Unsafe.Add(ref destinationBase, i) = Vector512Utilities.Shuffle(Unsafe.Add(ref sourceBase, i), control);
}
}
}
- else
+ else if (Vector256.IsHardwareAccelerated && Vector256Utilities.SupportsShuffleFloat)
{
- // Sse
- ref Vector128 sourceBase =
- ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
- ref Vector128 destBase =
- ref Unsafe.As>(ref MemoryMarshal.GetReference(dest));
+ nuint n = (uint)destination.Length / (uint)Vector256.Count;
+ nuint m = Numerics.Modulo4(n);
+ nuint u = n - m;
- nint n = (nint)((uint)dest.Length / (uint)Vector128.Count);
- nint m = Numerics.Modulo4(n);
- nint u = n - m;
-
- for (nint i = 0; i < u; i += 4)
+ for (nuint i = 0; i < u; i += 4)
{
- ref Vector128 vd0 = ref Unsafe.Add(ref destBase, i);
- ref Vector128 vs0 = ref Unsafe.Add(ref sourceBase, i);
+ ref Vector256 vs0 = ref Unsafe.Add(ref sourceBase, i);
+ ref Vector256 vd0 = ref Unsafe.Add(ref destinationBase, i);
+
+ vd0 = Vector256Utilities.Shuffle(vs0, control);
+ Unsafe.Add(ref vd0, (nuint)1) = Vector256Utilities.Shuffle(Unsafe.Add(ref vs0, (nuint)1), control);
+ Unsafe.Add(ref vd0, (nuint)2) = Vector256Utilities.Shuffle(Unsafe.Add(ref vs0, (nuint)2), control);
+ Unsafe.Add(ref vd0, (nuint)3) = Vector256Utilities.Shuffle(Unsafe.Add(ref vs0, (nuint)3), control);
+ }
- vd0 = Sse.Shuffle(vs0, vs0, control);
+ if (m > 0)
+ {
+ for (nuint i = u; i < n; i++)
+ {
+ Unsafe.Add(ref destinationBase, i) = Vector256Utilities.Shuffle(Unsafe.Add(ref sourceBase, i), control);
+ }
+ }
+ }
+ else if (Vector128.IsHardwareAccelerated && Vector128Utilities.SupportsShuffleFloat)
+ {
+ ref Vector128 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ ref Vector128 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
- Vector128 vs1 = Unsafe.Add(ref vs0, 1);
- Unsafe.Add(ref vd0, 1) = Sse.Shuffle(vs1, vs1, control);
+ nuint n = (uint)destination.Length / (uint)Vector128.Count;
+ nuint m = Numerics.Modulo4(n);
+ nuint u = n - m;
- Vector128 vs2 = Unsafe.Add(ref vs0, 2);
- Unsafe.Add(ref vd0, 2) = Sse.Shuffle(vs2, vs2, control);
+ for (nuint i = 0; i < u; i += 4)
+ {
+ ref Vector128 vs0 = ref Unsafe.Add(ref sourceBase, i);
+ ref Vector128 vd0 = ref Unsafe.Add(ref destinationBase, i);
- Vector128 vs3 = Unsafe.Add(ref vs0, 3);
- Unsafe.Add(ref vd0, 3) = Sse.Shuffle(vs3, vs3, control);
+ vd0 = Vector128Utilities.Shuffle(vs0, control);
+ Unsafe.Add(ref vd0, (nuint)1) = Vector128Utilities.Shuffle(Unsafe.Add(ref vs0, (nuint)1), control);
+ Unsafe.Add(ref vd0, (nuint)2) = Vector128Utilities.Shuffle(Unsafe.Add(ref vs0, (nuint)2), control);
+ Unsafe.Add(ref vd0, (nuint)3) = Vector128Utilities.Shuffle(Unsafe.Add(ref vs0, (nuint)3), control);
}
if (m > 0)
{
- for (nint i = u; i < n; i++)
+ for (nuint i = u; i < n; i++)
{
- Vector128 vs = Unsafe.Add(ref sourceBase, i);
- Unsafe.Add(ref destBase, i) = Sse.Shuffle(vs, vs, control);
+ Unsafe.Add(ref destinationBase, i) = Vector128Utilities.Shuffle(Unsafe.Add(ref sourceBase, i), control);
}
}
}
@@ -296,80 +335,102 @@ internal static partial class SimdUtils
[MethodImpl(InliningOptions.ShortMethod)]
private static void Shuffle4(
ReadOnlySpan source,
- Span dest,
- byte control)
+ Span destination,
+ [ConstantExpected] byte control)
{
- if (Avx2.IsSupported)
+ if (Vector512.IsHardwareAccelerated && Vector512Utilities.SupportsShuffleByte)
{
- // I've chosen to do this for convenience while we determine what
- // shuffle controls to add to the library.
- // We can add static ROS instances if need be in the future.
- Span bytes = stackalloc byte[Vector256.Count];
- Shuffle.MMShuffleSpan(ref bytes, control);
- Vector256 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes));
+ Span temp = stackalloc byte[Vector512.Count];
+ Shuffle.MMShuffleSpan(ref temp, control);
+ Vector512 mask = Unsafe.As>(ref MemoryMarshal.GetReference(temp));
- ref Vector256 sourceBase =
- ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
- ref Vector256 destBase =
- ref Unsafe.As>(ref MemoryMarshal.GetReference(dest));
+ nuint n = (uint)destination.Length / (uint)Vector512.Count;
+ nuint m = Numerics.Modulo4(n);
+ nuint u = n - m;
+
+ for (nuint i = 0; i < u; i += 4)
+ {
+ ref Vector512 vs0 = ref Unsafe.Add(ref sourceBase, i);
+ ref Vector512 vd0 = ref Unsafe.Add(ref destinationBase, i);
+
+ vd0 = Vector512Utilities.Shuffle(vs0, mask);
+ Unsafe.Add(ref vd0, (nuint)1) = Vector512Utilities.Shuffle(Unsafe.Add(ref vs0, (nuint)1), mask);
+ Unsafe.Add(ref vd0, (nuint)2) = Vector512Utilities.Shuffle(Unsafe.Add(ref vs0, (nuint)2), mask);
+ Unsafe.Add(ref vd0, (nuint)3) = Vector512Utilities.Shuffle(Unsafe.Add(ref vs0, (nuint)3), mask);
+ }
+
+ if (m > 0)
+ {
+ for (nuint i = u; i < n; i++)
+ {
+ Unsafe.Add(ref destinationBase, i) = Vector512Utilities.Shuffle(Unsafe.Add(ref sourceBase, i), mask);
+ }
+ }
+ }
+ else if (Vector256.IsHardwareAccelerated && Vector256Utilities.SupportsShuffleByte)
+ {
+ Span temp = stackalloc byte[Vector256.Count];
+ Shuffle.MMShuffleSpan(ref temp, control);
+ Vector256 mask = Unsafe.As>(ref MemoryMarshal.GetReference(temp));
+
+ ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
- nint n = (nint)((uint)dest.Length / (uint)Vector256.Count);
- nint m = Numerics.Modulo4(n);
- nint u = n - m;
+ nuint n = (uint)destination.Length / (uint)Vector256.Count;
+ nuint m = Numerics.Modulo4(n);
+ nuint u = n - m;
- for (nint i = 0; i < u; i += 4)
+ for (nuint i = 0; i < u; i += 4)
{
ref Vector256 vs0 = ref Unsafe.Add(ref sourceBase, i);
- ref Vector256 vd0 = ref Unsafe.Add(ref destBase, i);
+ ref Vector256 vd0 = ref Unsafe.Add(ref destinationBase, i);
- vd0 = Avx2.Shuffle(vs0, vshuffle);
- Unsafe.Add(ref vd0, 1) = Avx2.Shuffle(Unsafe.Add(ref vs0, 1), vshuffle);
- Unsafe.Add(ref vd0, 2) = Avx2.Shuffle(Unsafe.Add(ref vs0, 2), vshuffle);
- Unsafe.Add(ref vd0, 3) = Avx2.Shuffle(Unsafe.Add(ref vs0, 3), vshuffle);
+ vd0 = Vector256Utilities.Shuffle(vs0, mask);
+ Unsafe.Add(ref vd0, (nuint)1) = Vector256Utilities.Shuffle(Unsafe.Add(ref vs0, (nuint)1), mask);
+ Unsafe.Add(ref vd0, (nuint)2) = Vector256Utilities.Shuffle(Unsafe.Add(ref vs0, (nuint)2), mask);
+ Unsafe.Add(ref vd0, (nuint)3) = Vector256Utilities.Shuffle(Unsafe.Add(ref vs0, (nuint)3), mask);
}
if (m > 0)
{
- for (nint i = u; i < n; i++)
+ for (nuint i = u; i < n; i++)
{
- Unsafe.Add(ref destBase, i) = Avx2.Shuffle(Unsafe.Add(ref sourceBase, i), vshuffle);
+ Unsafe.Add(ref destinationBase, i) = Vector256Utilities.Shuffle(Unsafe.Add(ref sourceBase, i), mask);
}
}
}
- else
+ else if (Vector128.IsHardwareAccelerated && Vector128Utilities.SupportsShuffleByte)
{
- // Ssse3
- Span bytes = stackalloc byte[Vector128.Count];
- Shuffle.MMShuffleSpan(ref bytes, control);
- Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes));
-
- ref Vector128 sourceBase =
- ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ Span temp = stackalloc byte[Vector128.Count];
+ Shuffle.MMShuffleSpan(ref temp, control);
+ Vector128 mask = Unsafe.As>(ref MemoryMarshal.GetReference(temp));
- ref Vector128 destBase =
- ref Unsafe.As>(ref MemoryMarshal.GetReference(dest));
+ ref Vector128 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ ref Vector128 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
- nint n = (nint)((uint)dest.Length / (uint)Vector128.Count);
- nint m = Numerics.Modulo4(n);
- nint u = n - m;
+ nuint n = (uint)destination.Length / (uint)Vector128.Count;
+ nuint m = Numerics.Modulo4(n);
+ nuint u = n - m;
- for (nint i = 0; i < u; i += 4)
+ for (nuint i = 0; i < u; i += 4)
{
ref Vector128 vs0 = ref Unsafe.Add(ref sourceBase, i);
- ref Vector128 vd0 = ref Unsafe.Add(ref destBase, i);
+ ref Vector128 vd0 = ref Unsafe.Add(ref destinationBase, i);
- vd0 = Ssse3.Shuffle(vs0, vshuffle);
- Unsafe.Add(ref vd0, 1) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 1), vshuffle);
- Unsafe.Add(ref vd0, 2) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 2), vshuffle);
- Unsafe.Add(ref vd0, 3) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 3), vshuffle);
+ vd0 = Vector128Utilities.Shuffle(vs0, mask);
+ Unsafe.Add(ref vd0, (nuint)1) = Vector128Utilities.Shuffle(Unsafe.Add(ref vs0, (nuint)1), mask);
+ Unsafe.Add(ref vd0, (nuint)2) = Vector128Utilities.Shuffle(Unsafe.Add(ref vs0, (nuint)2), mask);
+ Unsafe.Add(ref vd0, (nuint)3) = Vector128Utilities.Shuffle(Unsafe.Add(ref vs0, (nuint)3), mask);
}
if (m > 0)
{
- for (nint i = u; i < n; i++)
+ for (nuint i = u; i < n; i++)
{
- Unsafe.Add(ref destBase, i) = Ssse3.Shuffle(Unsafe.Add(ref sourceBase, i), vshuffle);
+ Unsafe.Add(ref destinationBase, i) = Vector128Utilities.Shuffle(Unsafe.Add(ref sourceBase, i), mask);
}
}
}
@@ -378,24 +439,21 @@ internal static partial class SimdUtils
[MethodImpl(InliningOptions.ShortMethod)]
private static void Shuffle3(
ReadOnlySpan