diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
index 4732effd4a..4b9a90a952 100644
--- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
+++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
@@ -8,6 +8,7 @@ using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.X86;
+using SixLabors.ImageSharp.Common.Helpers;
using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp;
@@ -95,15 +96,15 @@ internal static partial class SimdUtils
///
/// Shuffle 8-bit integers
- /// using the control and store the results in .
+ /// using the control and store the results in .
///
/// The source span of bytes.
- /// The destination span of bytes.
+ /// The destination span of bytes.
/// The byte control.
[MethodImpl(InliningOptions.ShortMethod)]
public static void Shuffle4Reduce(
ref ReadOnlySpan source,
- ref Span dest,
+ ref Span destination,
byte control)
{
if (Vector512.IsHardwareAccelerated || Vector256.IsHardwareAccelerated || Vector128.IsHardwareAccelerated)
@@ -128,29 +129,29 @@ internal static partial class SimdUtils
{
Shuffle4(
source[..adjustedCount],
- dest[..adjustedCount],
+ destination[..adjustedCount],
control);
source = source[adjustedCount..];
- dest = dest[adjustedCount..];
+ destination = destination[adjustedCount..];
}
}
}
///
/// Shuffles 8-bit integer triplets within 128-bit lanes in
- /// using the control and store the results in .
+ /// using the control and store the results in .
///
/// The source span of bytes.
- /// The destination span of bytes.
+ /// The destination span of bytes.
/// The byte control.
[MethodImpl(InliningOptions.ShortMethod)]
public static void Shuffle3Reduce(
ref ReadOnlySpan source,
- ref Span dest,
+ ref Span destination,
byte control)
{
- if (Ssse3.IsSupported)
+ if (Vector128.IsHardwareAccelerated && Vector128Utilities.SupportsRightShift)
{
int remainder = source.Length % (Vector128.Count * 3);
@@ -160,11 +161,11 @@ internal static partial class SimdUtils
{
Shuffle3(
source[..adjustedCount],
- dest[..adjustedCount],
+ destination[..adjustedCount],
control);
source = source[adjustedCount..];
- dest = dest[adjustedCount..];
+ destination = destination[adjustedCount..];
}
}
}
@@ -446,24 +447,21 @@ internal static partial class SimdUtils
[MethodImpl(InliningOptions.ShortMethod)]
private static void Shuffle3(
ReadOnlySpan source,
- Span dest,
+ Span destination,
byte control)
{
- if (Ssse3.IsSupported)
+ if (Vector128.IsHardwareAccelerated && Vector128Utilities.SupportsRightShift)
{
- Vector128 vmask = ShuffleMaskPad4Nx16();
- Vector128 vmasko = ShuffleMaskSlice4Nx16();
- Vector128 vmaske = Ssse3.AlignRight(vmasko, vmasko, 12);
+ Vector128 maskPad4Nx16 = ShuffleMaskPad4Nx16();
+ Vector128 maskSlice4Nx16 = ShuffleMaskSlice4Nx16();
+ Vector128 maskE = Vector128Utilities.AlignRight(maskSlice4Nx16, maskSlice4Nx16, 12);
Span bytes = stackalloc byte[Vector128.Count];
Shuffle.MMShuffleSpan(ref bytes, control);
- Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes));
+ Vector128 mask = Unsafe.As>(ref MemoryMarshal.GetReference(bytes));
- ref Vector128 sourceBase =
- ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
-
- ref Vector128 destBase =
- ref Unsafe.As>(ref MemoryMarshal.GetReference(dest));
+ ref Vector128 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ ref Vector128 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
nuint n = source.Vector128Count();
@@ -472,36 +470,36 @@ internal static partial class SimdUtils
ref Vector128 vs = ref Unsafe.Add(ref sourceBase, i);
Vector128 v0 = vs;
- Vector128 v1 = Unsafe.Add(ref vs, 1);
- Vector128 v2 = Unsafe.Add(ref vs, 2);
- Vector128 v3 = Sse2.ShiftRightLogical128BitLane(v2, 4);
+ Vector128 v1 = Unsafe.Add(ref vs, (nuint)1);
+ Vector128 v2 = Unsafe.Add(ref vs, (nuint)2);
+ Vector128 v3 = Vector128Utilities.ShiftRightBytesInVector(v2, 4);
- v2 = Ssse3.AlignRight(v2, v1, 8);
- v1 = Ssse3.AlignRight(v1, v0, 12);
+ v2 = Vector128Utilities.AlignRight(v2, v1, 8);
+ v1 = Vector128Utilities.AlignRight(v1, v0, 12);
- v0 = Ssse3.Shuffle(Ssse3.Shuffle(v0, vmask), vshuffle);
- v1 = Ssse3.Shuffle(Ssse3.Shuffle(v1, vmask), vshuffle);
- v2 = Ssse3.Shuffle(Ssse3.Shuffle(v2, vmask), vshuffle);
- v3 = Ssse3.Shuffle(Ssse3.Shuffle(v3, vmask), vshuffle);
+ v0 = Vector128.Shuffle(Vector128.Shuffle(v0, maskPad4Nx16), mask);
+ v1 = Vector128.Shuffle(Vector128.Shuffle(v1, maskPad4Nx16), mask);
+ v2 = Vector128.Shuffle(Vector128.Shuffle(v2, maskPad4Nx16), mask);
+ v3 = Vector128.Shuffle(Vector128.Shuffle(v3, maskPad4Nx16), mask);
- v0 = Ssse3.Shuffle(v0, vmaske);
- v1 = Ssse3.Shuffle(v1, vmasko);
- v2 = Ssse3.Shuffle(v2, vmaske);
- v3 = Ssse3.Shuffle(v3, vmasko);
+ v0 = Vector128.Shuffle(v0, maskE);
+ v1 = Vector128.Shuffle(v1, maskSlice4Nx16);
+ v2 = Vector128.Shuffle(v2, maskE);
+ v3 = Vector128.Shuffle(v3, maskSlice4Nx16);
- v0 = Ssse3.AlignRight(v1, v0, 4);
- v3 = Ssse3.AlignRight(v3, v2, 12);
+ v0 = Vector128Utilities.AlignRight(v1, v0, 4);
+ v3 = Vector128Utilities.AlignRight(v3, v2, 12);
- v1 = Sse2.ShiftLeftLogical128BitLane(v1, 4);
- v2 = Sse2.ShiftRightLogical128BitLane(v2, 4);
+ v1 = Vector128Utilities.ShiftLeftBytesInVector(v1, 4);
+ v2 = Vector128Utilities.ShiftRightBytesInVector(v2, 4);
- v1 = Ssse3.AlignRight(v2, v1, 8);
+ v1 = Vector128Utilities.AlignRight(v2, v1, 8);
- ref Vector128 vd = ref Unsafe.Add(ref destBase, i);
+ ref Vector128 vd = ref Unsafe.Add(ref destinationBase, i);
vd = v0;
- Unsafe.Add(ref vd, 1) = v1;
- Unsafe.Add(ref vd, 2) = v3;
+ Unsafe.Add(ref vd, (nuint)1) = v1;
+ Unsafe.Add(ref vd, (nuint)2) = v3;
}
}
}
@@ -509,7 +507,7 @@ internal static partial class SimdUtils
[MethodImpl(InliningOptions.ShortMethod)]
private static void Pad3Shuffle4(
ReadOnlySpan source,
- Span dest,
+ Span destination,
byte control)
{
if (Ssse3.IsSupported)
@@ -525,7 +523,7 @@ internal static partial class SimdUtils
ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
ref Vector128 destBase =
- ref Unsafe.As>(ref MemoryMarshal.GetReference(dest));
+ ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
nuint n = source.Vector128Count();
diff --git a/src/ImageSharp/Common/Helpers/Vector128Utilities.cs b/src/ImageSharp/Common/Helpers/Vector128Utilities.cs
new file mode 100644
index 0000000000..829362da88
--- /dev/null
+++ b/src/ImageSharp/Common/Helpers/Vector128Utilities.cs
@@ -0,0 +1,99 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using System.Diagnostics;
+using System.Diagnostics.CodeAnalysis;
+using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.Arm;
+using System.Runtime.Intrinsics.X86;
+
+namespace SixLabors.ImageSharp.Common.Helpers;
+
+///
+/// Defines utility methods for that have not yet been normalized in the runtime.
+/// Should only be used if the intrinsics are available.
+///
+internal static class Vector128Utilities
+{
+ ///
+ /// Gets a value indicating whether right shift operations are supported.
+ ///
+ public static bool SupportsRightShift
+ {
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ get => Ssse3.IsSupported || AdvSimd.IsSupported;
+ }
+
+ ///
+ /// Shifts a 128-bit value right by a specified number of bytes while shifting in zeros.
+ ///
+ /// The value to shift.
+ /// The number of bytes to shift by.
+ /// The .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128 ShiftRightBytesInVector(Vector128 value, [ConstantExpected(Max = (byte)15)] byte numBytes)
+ {
+ if (Sse2.IsSupported)
+ {
+ return Sse2.ShiftRightLogical128BitLane(value, numBytes);
+ }
+
+ if (AdvSimd.IsSupported)
+ {
+ return AdvSimd.ExtractVector128(value, Vector128.Zero, numBytes);
+ }
+
+ ThrowUnreachableException();
+ return default;
+ }
+
+ ///
+ /// Shifts a 128-bit value left by a specified number of bytes while shifting in zeros.
+ ///
+ /// The value to shift.
+ /// The number of bytes to shift by.
+ /// The .
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128 ShiftLeftBytesInVector(Vector128 value, [ConstantExpected(Max = (byte)15)] byte numBytes)
+ {
+ if (Sse2.IsSupported)
+ {
+ return Sse2.ShiftLeftLogical128BitLane(value, numBytes);
+ }
+
+ if (AdvSimd.IsSupported)
+ {
+ return AdvSimd.ExtractVector128(Vector128.Zero, value, numBytes);
+ }
+
+ ThrowUnreachableException();
+ return default;
+ }
+
+ ///
+ /// Right aligns elements of two source 128-bit values depending on bits in a mask.
+ ///
+ /// The left hand source vector.
+ /// The right hand source vector.
+ /// An 8-bit mask used for the operation.
+ /// The .
+ public static Vector128 AlignRight(Vector128 left, Vector128 right, [ConstantExpected(Max = (byte)15)] byte mask)
+ {
+ if (Sse3.IsSupported)
+ {
+ return Ssse3.AlignRight(left, right, mask);
+ }
+
+ if (AdvSimd.IsSupported)
+ {
+ return AdvSimd.ExtractVector128(right, left, mask);
+ }
+
+ ThrowUnreachableException();
+ return default;
+ }
+
+ [DoesNotReturn]
+ private static void ThrowUnreachableException() => throw new UnreachableException();
+}