From 4728b97d85b7ad66d3d7975942ed9b99af3ad2fd Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 15 Aug 2024 16:44:41 +1000 Subject: [PATCH] Use dedicated property --- .../Transforms/Resize/ResizeKernel.cs | 150 ++++++++++-------- .../Transforms/ResizeKernelMapTests.cs | 3 +- 2 files changed, 84 insertions(+), 69 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernel.cs b/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernel.cs index 41afec892c..7a70caa3c6 100644 --- a/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernel.cs +++ b/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernel.cs @@ -23,6 +23,9 @@ internal readonly unsafe struct ResizeKernel /// /// Initializes a new instance of the struct. /// + /// The starting index for the destination row. + /// The pointer to the buffer with the convolution factors. + /// The length of the kernel. [MethodImpl(InliningOptions.ShortMethod)] internal ResizeKernel(int startIndex, float* bufferPtr, int length) { @@ -31,6 +34,15 @@ internal readonly unsafe struct ResizeKernel this.Length = length; } + /// + /// Gets a value indicating whether vectorization is supported. + /// + public static bool SupportsVectorization + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => Vector256.IsHardwareAccelerated; + } + /// /// Gets the start index for the destination row. /// @@ -80,96 +92,99 @@ internal readonly unsafe struct ResizeKernel [MethodImpl(InliningOptions.ShortMethod)] public Vector4 ConvolveCore(ref Vector4 rowStartRef) { - if (Vector512.IsHardwareAccelerated) + if (SupportsVectorization) { - float* bufferStart = this.bufferPtr; - ref Vector4 rowEndRef = ref Unsafe.Add(ref rowStartRef, this.Length & ~7); - Vector512 result512_0 = Vector512.Zero; - Vector512 result512_1 = Vector512.Zero; - - while (Unsafe.IsAddressLessThan(ref rowStartRef, ref rowEndRef)) + if (Vector512.IsHardwareAccelerated) { - Vector512 pixels512_0 = Unsafe.As>(ref rowStartRef); - Vector512 pixels512_1 = Unsafe.As>(ref Unsafe.Add(ref rowStartRef, (nuint)4)); + float* bufferStart = this.bufferPtr; + ref Vector4 rowEndRef = ref Unsafe.Add(ref rowStartRef, this.Length & ~7); + Vector512 result512_0 = Vector512.Zero; + Vector512 result512_1 = Vector512.Zero; - result512_0 = Vector512Utilities.MultiplyAddEstimate(Vector512.Load(bufferStart), pixels512_0, result512_0); - result512_1 = Vector512Utilities.MultiplyAddEstimate(Vector512.Load(bufferStart + 16), pixels512_1, result512_1); + while (Unsafe.IsAddressLessThan(ref rowStartRef, ref rowEndRef)) + { + Vector512 pixels512_0 = Unsafe.As>(ref rowStartRef); + Vector512 pixels512_1 = Unsafe.As>(ref Unsafe.Add(ref rowStartRef, (nuint)4)); - bufferStart += 32; - rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)8); - } + result512_0 = Vector512Utilities.MultiplyAddEstimate(Vector512.Load(bufferStart), pixels512_0, result512_0); + result512_1 = Vector512Utilities.MultiplyAddEstimate(Vector512.Load(bufferStart + 16), pixels512_1, result512_1); - result512_0 += result512_1; + bufferStart += 32; + rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)8); + } - if ((this.Length & 7) >= 4) - { - Vector512 pixels512_0 = Unsafe.As>(ref rowStartRef); - result512_0 = Vector512Utilities.MultiplyAddEstimate(Vector512.Load(bufferStart), pixels512_0, result512_0); + result512_0 += result512_1; - bufferStart += 16; - rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)4); - } + if ((this.Length & 7) >= 4) + { + Vector512 pixels512_0 = Unsafe.As>(ref rowStartRef); + result512_0 = Vector512Utilities.MultiplyAddEstimate(Vector512.Load(bufferStart), pixels512_0, result512_0); - Vector256 result256 = result512_0.GetLower() + result512_0.GetUpper(); + bufferStart += 16; + rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)4); + } - if ((this.Length & 3) >= 2) - { - Vector256 pixels256_0 = Unsafe.As>(ref rowStartRef); - result256 = Vector256Utilities.MultiplyAddEstimate(Vector256.Load(bufferStart), pixels256_0, result256); + Vector256 result256 = result512_0.GetLower() + result512_0.GetUpper(); - bufferStart += 8; - rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)2); - } + if ((this.Length & 3) >= 2) + { + Vector256 pixels256_0 = Unsafe.As>(ref rowStartRef); + result256 = Vector256Utilities.MultiplyAddEstimate(Vector256.Load(bufferStart), pixels256_0, result256); - Vector128 result128 = result256.GetLower() + result256.GetUpper(); + bufferStart += 8; + rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)2); + } - if ((this.Length & 1) != 0) - { - Vector128 pixels128 = Unsafe.As>(ref rowStartRef); - result128 = Vector128Utilities.MultiplyAddEstimate(Vector128.Load(bufferStart), pixels128, result128); - } + Vector128 result128 = result256.GetLower() + result256.GetUpper(); - return *(Vector4*)&result128; - } - else if (Vector256.IsHardwareAccelerated) - { - float* bufferStart = this.bufferPtr; - ref Vector4 rowEndRef = ref Unsafe.Add(ref rowStartRef, this.Length & ~3); - Vector256 result256_0 = Vector256.Zero; - Vector256 result256_1 = Vector256.Zero; + if ((this.Length & 1) != 0) + { + Vector128 pixels128 = Unsafe.As>(ref rowStartRef); + result128 = Vector128Utilities.MultiplyAddEstimate(Vector128.Load(bufferStart), pixels128, result128); + } - while (Unsafe.IsAddressLessThan(ref rowStartRef, ref rowEndRef)) + return *(Vector4*)&result128; + } + else { - Vector256 pixels256_0 = Unsafe.As>(ref rowStartRef); - Vector256 pixels256_1 = Unsafe.As>(ref Unsafe.Add(ref rowStartRef, (nuint)2)); + float* bufferStart = this.bufferPtr; + ref Vector4 rowEndRef = ref Unsafe.Add(ref rowStartRef, this.Length & ~3); + Vector256 result256_0 = Vector256.Zero; + Vector256 result256_1 = Vector256.Zero; - result256_0 = Vector256Utilities.MultiplyAddEstimate(Vector256.Load(bufferStart), pixels256_0, result256_0); - result256_1 = Vector256Utilities.MultiplyAddEstimate(Vector256.Load(bufferStart + 8), pixels256_1, result256_1); + while (Unsafe.IsAddressLessThan(ref rowStartRef, ref rowEndRef)) + { + Vector256 pixels256_0 = Unsafe.As>(ref rowStartRef); + Vector256 pixels256_1 = Unsafe.As>(ref Unsafe.Add(ref rowStartRef, (nuint)2)); - bufferStart += 16; - rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)4); - } + result256_0 = Vector256Utilities.MultiplyAddEstimate(Vector256.Load(bufferStart), pixels256_0, result256_0); + result256_1 = Vector256Utilities.MultiplyAddEstimate(Vector256.Load(bufferStart + 8), pixels256_1, result256_1); - result256_0 += result256_1; + bufferStart += 16; + rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)4); + } - if ((this.Length & 3) >= 2) - { - Vector256 pixels256_0 = Unsafe.As>(ref rowStartRef); - result256_0 = Vector256Utilities.MultiplyAddEstimate(Vector256.Load(bufferStart), pixels256_0, result256_0); + result256_0 += result256_1; - bufferStart += 8; - rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)2); - } + if ((this.Length & 3) >= 2) + { + Vector256 pixels256_0 = Unsafe.As>(ref rowStartRef); + result256_0 = Vector256Utilities.MultiplyAddEstimate(Vector256.Load(bufferStart), pixels256_0, result256_0); - Vector128 result128 = result256_0.GetLower() + result256_0.GetUpper(); + bufferStart += 8; + rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)2); + } - if ((this.Length & 1) != 0) - { - Vector128 pixels128 = Unsafe.As>(ref rowStartRef); - result128 = Vector128Utilities.MultiplyAddEstimate(Vector128.Load(bufferStart), pixels128, result128); - } + Vector128 result128 = result256_0.GetLower() + result256_0.GetUpper(); + + if ((this.Length & 1) != 0) + { + Vector128 pixels128 = Unsafe.As>(ref rowStartRef); + result128 = Vector128Utilities.MultiplyAddEstimate(Vector128.Load(bufferStart), pixels128, result128); + } - return *(Vector4*)&result128; + return *(Vector4*)&result128; + } } else { @@ -195,6 +210,7 @@ internal readonly unsafe struct ResizeKernel /// Copy the contents of altering /// to the value . /// + /// The new value for . [MethodImpl(InliningOptions.ShortMethod)] internal ResizeKernel AlterLeftValue(int left) => new(left, this.bufferPtr, this.Length); diff --git a/tests/ImageSharp.Tests/Processing/Processors/Transforms/ResizeKernelMapTests.cs b/tests/ImageSharp.Tests/Processing/Processors/Transforms/ResizeKernelMapTests.cs index 337f8c75dc..6d0de65c42 100644 --- a/tests/ImageSharp.Tests/Processing/Processors/Transforms/ResizeKernelMapTests.cs +++ b/tests/ImageSharp.Tests/Processing/Processors/Transforms/ResizeKernelMapTests.cs @@ -1,7 +1,6 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. -using System.Runtime.Intrinsics; using System.Text; using SixLabors.ImageSharp.Processing; using SixLabors.ImageSharp.Processing.Processors.Transforms; @@ -142,7 +141,7 @@ public partial class ResizeKernelMapTests Span actualValues; ApproximateFloatComparer comparer; - if (Vector256.IsHardwareAccelerated) + if (ResizeKernel.SupportsVectorization) { comparer = new ApproximateFloatComparer(1e-4f);