From 505ecce3fa8d5c7cf8f967c7dd27f1f2f831fab0 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Mon, 12 May 2025 09:46:17 +1000 Subject: [PATCH] Update ShuffleNative (byte) --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 20 +++++++---- .../Common/Helpers/Vector128Utilities.cs | 35 +++++++++++-------- .../Common/Helpers/Vector256Utilities.cs | 2 +- .../Formats/Jpeg/Components/Block8x8F.cs | 1 - 4 files changed, 36 insertions(+), 22 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index e155e45361..dc610a6f9e 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -113,7 +113,7 @@ internal static partial class SimdUtils [ConstantExpected] byte control) { if ((Vector512.IsHardwareAccelerated && Vector512_.SupportsShuffleNativeByte) || - (Vector256.IsHardwareAccelerated && Vector256_.SupportsShuffleByte) || + (Vector256.IsHardwareAccelerated && Vector256_.SupportsShuffleNativeByte) || (Vector128.IsHardwareAccelerated && Vector128_.SupportsShuffleNativeByte)) { int remainder = 0; @@ -158,7 +158,7 @@ internal static partial class SimdUtils ref Span destination, [ConstantExpected] byte control) { - if (Vector128.IsHardwareAccelerated && Vector128_.SupportsShuffleNativeByte && Vector128_.SupportsRightAlign) + if (Vector128.IsHardwareAccelerated && Vector128_.SupportsShuffleNativeByte && Vector128_.SupportsAlignRight) { int remainder = source.Length % (Vector128.Count * 3); @@ -373,7 +373,7 @@ internal static partial class SimdUtils } } } - else if (Vector256.IsHardwareAccelerated && Vector256_.SupportsShuffleByte) + else if (Vector256.IsHardwareAccelerated && Vector256_.SupportsShuffleNativeByte) { Span temp = stackalloc byte[Vector256.Count]; Shuffle.MMShuffleSpan(ref temp, control); @@ -445,7 +445,9 @@ internal static partial class SimdUtils Span destination, [ConstantExpected] byte control) { - if (Vector128.IsHardwareAccelerated && Vector128_.SupportsShuffleNativeByte && Vector128_.SupportsRightAlign) + if (Vector128.IsHardwareAccelerated && + Vector128_.SupportsShuffleNativeByte && + Vector128_.SupportsAlignRight) { Vector128 maskPad4Nx16 = ShuffleMaskPad4Nx16(); Vector128 maskSlice4Nx16 = ShuffleMaskSlice4Nx16(); @@ -505,7 +507,10 @@ internal static partial class SimdUtils Span destination, [ConstantExpected] byte control) { - if (Vector128.IsHardwareAccelerated && Vector128_.SupportsShuffleNativeByte && Vector128_.SupportsShiftByte) + if (Vector128.IsHardwareAccelerated && + Vector128_.SupportsShuffleNativeByte && + Vector128_.SupportsShiftByte && + Vector128_.SupportsAlignRight) { Vector128 maskPad4Nx16 = ShuffleMaskPad4Nx16(); Vector128 fill = Vector128.Create(0xff000000ff000000ul).AsByte(); @@ -548,7 +553,10 @@ internal static partial class SimdUtils Span destination, [ConstantExpected] byte control) { - if (Vector128.IsHardwareAccelerated && Vector128_.SupportsShuffleNativeByte && Vector128_.SupportsShiftByte) + if (Vector128.IsHardwareAccelerated && + Vector128_.SupportsShuffleNativeByte && + Vector128_.SupportsShiftByte && + Vector128_.SupportsAlignRight) { Vector128 maskSlice4Nx16 = ShuffleMaskSlice4Nx16(); Vector128 maskE = Vector128_.AlignRight(maskSlice4Nx16, maskSlice4Nx16, 12); diff --git a/src/ImageSharp/Common/Helpers/Vector128Utilities.cs b/src/ImageSharp/Common/Helpers/Vector128Utilities.cs index 3471acbd36..83b842e130 100644 --- a/src/ImageSharp/Common/Helpers/Vector128Utilities.cs +++ b/src/ImageSharp/Common/Helpers/Vector128Utilities.cs @@ -4,6 +4,7 @@ using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.Wasm; @@ -38,13 +39,26 @@ internal static class Vector128_ public static bool SupportsShuffleNativeByte { [MethodImpl(MethodImplOptions.AggressiveInlining)] - get => Ssse3.IsSupported || AdvSimd.Arm64.IsSupported || PackedSimd.IsSupported; + get + { + if (Vector128.IsHardwareAccelerated) + { + if (RuntimeInformation.ProcessArchitecture is Architecture.X86 or Architecture.X64) + { + return Ssse3.IsSupported; + } + + return true; + } + + return false; + } } /// /// Gets a value indicating whether right align operations are supported. /// - public static bool SupportsRightAlign + public static bool SupportsAlignRight { [MethodImpl(MethodImplOptions.AggressiveInlining)] get => Ssse3.IsSupported || AdvSimd.IsSupported; @@ -91,23 +105,16 @@ internal static class Vector128_ [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 ShuffleNative(Vector128 vector, Vector128 indices) { + // For x64 we use the SSSE3 shuffle intrinsic to avoid additional instructions. 3 vs 1. if (Ssse3.IsSupported) { return Ssse3.Shuffle(vector, indices); } - if (AdvSimd.Arm64.IsSupported) - { - return AdvSimd.Arm64.VectorTableLookup(vector, indices); - } - - if (PackedSimd.IsSupported) - { - return PackedSimd.Swizzle(vector, indices); - } - - ThrowUnreachableException(); - return default; + // For ARM and WASM, codegen will be optimal. + // We don't throw for x86/x64 so we should never use this method without + // checking for support. + return Vector128.Shuffle(vector, indices); } /// diff --git a/src/ImageSharp/Common/Helpers/Vector256Utilities.cs b/src/ImageSharp/Common/Helpers/Vector256Utilities.cs index c835d267d8..817d6e6070 100644 --- a/src/ImageSharp/Common/Helpers/Vector256Utilities.cs +++ b/src/ImageSharp/Common/Helpers/Vector256Utilities.cs @@ -33,7 +33,7 @@ internal static class Vector256_ /// /// Gets a value indicating whether shuffle byte operations are supported. /// - public static bool SupportsShuffleByte + public static bool SupportsShuffleNativeByte { [MethodImpl(MethodImplOptions.AggressiveInlining)] get => Avx2.IsSupported; diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index a4a7d3ed0c..49b519201f 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -5,7 +5,6 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; using System.Text; using SixLabors.ImageSharp.Common.Helpers;