From 4931372733a618bc32f382a05c7231dbafb1b3a1 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Sat, 13 Jan 2024 15:43:26 +1000 Subject: [PATCH] Port first Shuffl3 method --- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 88 ++++++++--------- .../Common/Helpers/Vector128Utilities.cs | 99 +++++++++++++++++++ 2 files changed, 142 insertions(+), 45 deletions(-) create mode 100644 src/ImageSharp/Common/Helpers/Vector128Utilities.cs diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 4732effd4a..4b9a90a952 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -8,6 +8,7 @@ using System.Runtime.InteropServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; +using SixLabors.ImageSharp.Common.Helpers; using SixLabors.ImageSharp.PixelFormats; namespace SixLabors.ImageSharp; @@ -95,15 +96,15 @@ internal static partial class SimdUtils /// /// Shuffle 8-bit integers - /// using the control and store the results in . + /// using the control and store the results in . /// /// The source span of bytes. - /// The destination span of bytes. + /// The destination span of bytes. /// The byte control. [MethodImpl(InliningOptions.ShortMethod)] public static void Shuffle4Reduce( ref ReadOnlySpan source, - ref Span dest, + ref Span destination, byte control) { if (Vector512.IsHardwareAccelerated || Vector256.IsHardwareAccelerated || Vector128.IsHardwareAccelerated) @@ -128,29 +129,29 @@ internal static partial class SimdUtils { Shuffle4( source[..adjustedCount], - dest[..adjustedCount], + destination[..adjustedCount], control); source = source[adjustedCount..]; - dest = dest[adjustedCount..]; + destination = destination[adjustedCount..]; } } } /// /// Shuffles 8-bit integer triplets within 128-bit lanes in - /// using the control and store the results in . + /// using the control and store the results in . /// /// The source span of bytes. - /// The destination span of bytes. + /// The destination span of bytes. /// The byte control. [MethodImpl(InliningOptions.ShortMethod)] public static void Shuffle3Reduce( ref ReadOnlySpan source, - ref Span dest, + ref Span destination, byte control) { - if (Ssse3.IsSupported) + if (Vector128.IsHardwareAccelerated && Vector128Utilities.SupportsRightShift) { int remainder = source.Length % (Vector128.Count * 3); @@ -160,11 +161,11 @@ internal static partial class SimdUtils { Shuffle3( source[..adjustedCount], - dest[..adjustedCount], + destination[..adjustedCount], control); source = source[adjustedCount..]; - dest = dest[adjustedCount..]; + destination = destination[adjustedCount..]; } } } @@ -446,24 +447,21 @@ internal static partial class SimdUtils [MethodImpl(InliningOptions.ShortMethod)] private static void Shuffle3( ReadOnlySpan source, - Span dest, + Span destination, byte control) { - if (Ssse3.IsSupported) + if (Vector128.IsHardwareAccelerated && Vector128Utilities.SupportsRightShift) { - Vector128 vmask = ShuffleMaskPad4Nx16(); - Vector128 vmasko = ShuffleMaskSlice4Nx16(); - Vector128 vmaske = Ssse3.AlignRight(vmasko, vmasko, 12); + Vector128 maskPad4Nx16 = ShuffleMaskPad4Nx16(); + Vector128 maskSlice4Nx16 = ShuffleMaskSlice4Nx16(); + Vector128 maskE = Vector128Utilities.AlignRight(maskSlice4Nx16, maskSlice4Nx16, 12); Span bytes = stackalloc byte[Vector128.Count]; Shuffle.MMShuffleSpan(ref bytes, control); - Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + Vector128 mask = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); - ref Vector128 sourceBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); - - ref Vector128 destBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + ref Vector128 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref Vector128 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); nuint n = source.Vector128Count(); @@ -472,36 +470,36 @@ internal static partial class SimdUtils ref Vector128 vs = ref Unsafe.Add(ref sourceBase, i); Vector128 v0 = vs; - Vector128 v1 = Unsafe.Add(ref vs, 1); - Vector128 v2 = Unsafe.Add(ref vs, 2); - Vector128 v3 = Sse2.ShiftRightLogical128BitLane(v2, 4); + Vector128 v1 = Unsafe.Add(ref vs, (nuint)1); + Vector128 v2 = Unsafe.Add(ref vs, (nuint)2); + Vector128 v3 = Vector128Utilities.ShiftRightBytesInVector(v2, 4); - v2 = Ssse3.AlignRight(v2, v1, 8); - v1 = Ssse3.AlignRight(v1, v0, 12); + v2 = Vector128Utilities.AlignRight(v2, v1, 8); + v1 = Vector128Utilities.AlignRight(v1, v0, 12); - v0 = Ssse3.Shuffle(Ssse3.Shuffle(v0, vmask), vshuffle); - v1 = Ssse3.Shuffle(Ssse3.Shuffle(v1, vmask), vshuffle); - v2 = Ssse3.Shuffle(Ssse3.Shuffle(v2, vmask), vshuffle); - v3 = Ssse3.Shuffle(Ssse3.Shuffle(v3, vmask), vshuffle); + v0 = Vector128.Shuffle(Vector128.Shuffle(v0, maskPad4Nx16), mask); + v1 = Vector128.Shuffle(Vector128.Shuffle(v1, maskPad4Nx16), mask); + v2 = Vector128.Shuffle(Vector128.Shuffle(v2, maskPad4Nx16), mask); + v3 = Vector128.Shuffle(Vector128.Shuffle(v3, maskPad4Nx16), mask); - v0 = Ssse3.Shuffle(v0, vmaske); - v1 = Ssse3.Shuffle(v1, vmasko); - v2 = Ssse3.Shuffle(v2, vmaske); - v3 = Ssse3.Shuffle(v3, vmasko); + v0 = Vector128.Shuffle(v0, maskE); + v1 = Vector128.Shuffle(v1, maskSlice4Nx16); + v2 = Vector128.Shuffle(v2, maskE); + v3 = Vector128.Shuffle(v3, maskSlice4Nx16); - v0 = Ssse3.AlignRight(v1, v0, 4); - v3 = Ssse3.AlignRight(v3, v2, 12); + v0 = Vector128Utilities.AlignRight(v1, v0, 4); + v3 = Vector128Utilities.AlignRight(v3, v2, 12); - v1 = Sse2.ShiftLeftLogical128BitLane(v1, 4); - v2 = Sse2.ShiftRightLogical128BitLane(v2, 4); + v1 = Vector128Utilities.ShiftLeftBytesInVector(v1, 4); + v2 = Vector128Utilities.ShiftRightBytesInVector(v2, 4); - v1 = Ssse3.AlignRight(v2, v1, 8); + v1 = Vector128Utilities.AlignRight(v2, v1, 8); - ref Vector128 vd = ref Unsafe.Add(ref destBase, i); + ref Vector128 vd = ref Unsafe.Add(ref destinationBase, i); vd = v0; - Unsafe.Add(ref vd, 1) = v1; - Unsafe.Add(ref vd, 2) = v3; + Unsafe.Add(ref vd, (nuint)1) = v1; + Unsafe.Add(ref vd, (nuint)2) = v3; } } } @@ -509,7 +507,7 @@ internal static partial class SimdUtils [MethodImpl(InliningOptions.ShortMethod)] private static void Pad3Shuffle4( ReadOnlySpan source, - Span dest, + Span destination, byte control) { if (Ssse3.IsSupported) @@ -525,7 +523,7 @@ internal static partial class SimdUtils ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); ref Vector128 destBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); nuint n = source.Vector128Count(); diff --git a/src/ImageSharp/Common/Helpers/Vector128Utilities.cs b/src/ImageSharp/Common/Helpers/Vector128Utilities.cs new file mode 100644 index 0000000000..829362da88 --- /dev/null +++ b/src/ImageSharp/Common/Helpers/Vector128Utilities.cs @@ -0,0 +1,99 @@ +// Copyright (c) Six Labors. +// Licensed under the Six Labors Split License. + +using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.X86; + +namespace SixLabors.ImageSharp.Common.Helpers; + +/// +/// Defines utility methods for that have not yet been normalized in the runtime. +/// Should only be used if the intrinsics are available. +/// +internal static class Vector128Utilities +{ + /// + /// Gets a value indicating whether right shift operations are supported. + /// + public static bool SupportsRightShift + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => Ssse3.IsSupported || AdvSimd.IsSupported; + } + + /// + /// Shifts a 128-bit value right by a specified number of bytes while shifting in zeros. + /// + /// The value to shift. + /// The number of bytes to shift by. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 ShiftRightBytesInVector(Vector128 value, [ConstantExpected(Max = (byte)15)] byte numBytes) + { + if (Sse2.IsSupported) + { + return Sse2.ShiftRightLogical128BitLane(value, numBytes); + } + + if (AdvSimd.IsSupported) + { + return AdvSimd.ExtractVector128(value, Vector128.Zero, numBytes); + } + + ThrowUnreachableException(); + return default; + } + + /// + /// Shifts a 128-bit value left by a specified number of bytes while shifting in zeros. + /// + /// The value to shift. + /// The number of bytes to shift by. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 ShiftLeftBytesInVector(Vector128 value, [ConstantExpected(Max = (byte)15)] byte numBytes) + { + if (Sse2.IsSupported) + { + return Sse2.ShiftLeftLogical128BitLane(value, numBytes); + } + + if (AdvSimd.IsSupported) + { + return AdvSimd.ExtractVector128(Vector128.Zero, value, numBytes); + } + + ThrowUnreachableException(); + return default; + } + + /// + /// Right aligns elements of two source 128-bit values depending on bits in a mask. + /// + /// The left hand source vector. + /// The right hand source vector. + /// An 8-bit mask used for the operation. + /// The . + public static Vector128 AlignRight(Vector128 left, Vector128 right, [ConstantExpected(Max = (byte)15)] byte mask) + { + if (Sse3.IsSupported) + { + return Ssse3.AlignRight(left, right, mask); + } + + if (AdvSimd.IsSupported) + { + return AdvSimd.ExtractVector128(right, left, mask); + } + + ThrowUnreachableException(); + return default; + } + + [DoesNotReturn] + private static void ThrowUnreachableException() => throw new UnreachableException(); +}