From a96c78d60c6f0eb12684dd6ee050e557b5d9f12d Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Tue, 3 Feb 2026 20:44:58 +1000 Subject: [PATCH] Don't use Vector512 (SLOW) --- README.md | 2 +- src/ImageSharp/Common/Helpers/Numerics.cs | 34 +----- .../Transforms/Resize/ResizeKernel.cs | 110 +++++------------- 3 files changed, 30 insertions(+), 116 deletions(-) diff --git a/README.md b/README.md index cf58b6b14..dc3073479 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ SixLabors.ImageSharp
[![Build Status](https://img.shields.io/github/actions/workflow/status/SixLabors/ImageSharp/build-and-test.yml?branch=main)](https://github.com/SixLabors/ImageSharp/actions) -[![Code coverage](https://codecov.io/gh/SixLabors/ImageSharp/branch/main/graph/badge.svg)](https://codecov.io/gh/SixLabors/ImageSharp) +[![codecov](https://codecov.io/gh/SixLabors/ImageSharp/graph/badge.svg?token=g2WJwz770q)](https://codecov.io/gh/SixLabors/ImageSharp) [![License: Six Labors Split](https://img.shields.io/badge/license-Six%20Labors%20Split-%23e30183)](https://github.com/SixLabors/ImageSharp/blob/main/LICENSE) [![Twitter](https://img.shields.io/twitter/url/http/shields.io.svg?style=flat&logo=twitter)](https://twitter.com/intent/tweet?hashtags=imagesharp,dotnet,oss&text=ImageSharp.+A+new+cross-platform+2D+graphics+API+in+C%23&url=https%3a%2f%2fgithub.com%2fSixLabors%2fImageSharp&via=sixlabors) diff --git a/src/ImageSharp/Common/Helpers/Numerics.cs b/src/ImageSharp/Common/Helpers/Numerics.cs index 1914c59d3..efe68977b 100644 --- a/src/ImageSharp/Common/Helpers/Numerics.cs +++ b/src/ImageSharp/Common/Helpers/Numerics.cs @@ -1089,39 +1089,7 @@ internal static class Numerics [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void Normalize(Span span, float sum) { - if (Vector512.IsHardwareAccelerated) - { - ref float startRef = ref MemoryMarshal.GetReference(span); - ref float endRef = ref Unsafe.Add(ref startRef, span.Length & ~15); - Vector512 sum512 = Vector512.Create(sum); - - while (Unsafe.IsAddressLessThan(ref startRef, ref endRef)) - { - Unsafe.As>(ref startRef) /= sum512; - startRef = ref Unsafe.Add(ref startRef, (nuint)16); - } - - if ((span.Length & 15) >= 8) - { - Unsafe.As>(ref startRef) /= sum512.GetLower(); - startRef = ref Unsafe.Add(ref startRef, (nuint)8); - } - - if ((span.Length & 7) >= 4) - { - Unsafe.As>(ref startRef) /= sum512.GetLower().GetLower(); - startRef = ref Unsafe.Add(ref startRef, (nuint)4); - } - - endRef = ref Unsafe.Add(ref startRef, span.Length & 3); - - while (Unsafe.IsAddressLessThan(ref startRef, ref endRef)) - { - startRef /= sum; - startRef = ref Unsafe.Add(ref startRef, (nuint)1); - } - } - else if (Vector256.IsHardwareAccelerated) + if (Vector256.IsHardwareAccelerated) { ref float startRef = ref MemoryMarshal.GetReference(span); ref float endRef = ref Unsafe.Add(ref startRef, span.Length & ~7); diff --git a/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernel.cs b/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernel.cs index e94c6cd37..a85487d1c 100644 --- a/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernel.cs +++ b/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernel.cs @@ -94,97 +94,43 @@ internal readonly unsafe struct ResizeKernel { if (IsHardwareAccelerated) { - if (Vector512.IsHardwareAccelerated) - { - float* bufferStart = this.bufferPtr; - ref Vector4 rowEndRef = ref Unsafe.Add(ref rowStartRef, this.Length & ~7); - Vector512 result512_0 = Vector512.Zero; - Vector512 result512_1 = Vector512.Zero; - - while (Unsafe.IsAddressLessThan(ref rowStartRef, ref rowEndRef)) - { - Vector512 pixels512_0 = Unsafe.As>(ref rowStartRef); - Vector512 pixels512_1 = Unsafe.As>(ref Unsafe.Add(ref rowStartRef, (nuint)4)); - - result512_0 = Vector512_.MultiplyAdd(result512_0, Vector512.Load(bufferStart), pixels512_0); - result512_1 = Vector512_.MultiplyAdd(result512_1, Vector512.Load(bufferStart + 16), pixels512_1); - - bufferStart += 32; - rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)8); - } - - result512_0 += result512_1; - - if ((this.Length & 7) >= 4) - { - Vector512 pixels512_0 = Unsafe.As>(ref rowStartRef); - result512_0 = Vector512_.MultiplyAdd(result512_0, Vector512.Load(bufferStart), pixels512_0); - - bufferStart += 16; - rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)4); - } - - Vector256 result256 = result512_0.GetLower() + result512_0.GetUpper(); - - if ((this.Length & 3) >= 2) - { - Vector256 pixels256_0 = Unsafe.As>(ref rowStartRef); - result256 = Vector256_.MultiplyAdd(result256, Vector256.Load(bufferStart), pixels256_0); - - bufferStart += 8; - rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)2); - } - - Vector128 result128 = result256.GetLower() + result256.GetUpper(); - - if ((this.Length & 1) != 0) - { - Vector128 pixels128 = Unsafe.As>(ref rowStartRef); - result128 = Vector128_.MultiplyAdd(result128, Vector128.Load(bufferStart), pixels128); - } + float* bufferStart = this.bufferPtr; + ref Vector4 rowEndRef = ref Unsafe.Add(ref rowStartRef, this.Length & ~3); + Vector256 result256_0 = Vector256.Zero; + Vector256 result256_1 = Vector256.Zero; - return result128.AsVector4(); - } - else + while (Unsafe.IsAddressLessThan(ref rowStartRef, ref rowEndRef)) { - float* bufferStart = this.bufferPtr; - ref Vector4 rowEndRef = ref Unsafe.Add(ref rowStartRef, this.Length & ~3); - Vector256 result256_0 = Vector256.Zero; - Vector256 result256_1 = Vector256.Zero; + Vector256 pixels256_0 = Unsafe.As>(ref rowStartRef); + Vector256 pixels256_1 = Unsafe.As>(ref Unsafe.Add(ref rowStartRef, (nuint)2)); - while (Unsafe.IsAddressLessThan(ref rowStartRef, ref rowEndRef)) - { - Vector256 pixels256_0 = Unsafe.As>(ref rowStartRef); - Vector256 pixels256_1 = Unsafe.As>(ref Unsafe.Add(ref rowStartRef, (nuint)2)); + result256_0 = Vector256_.MultiplyAdd(result256_0, Vector256.Load(bufferStart), pixels256_0); + result256_1 = Vector256_.MultiplyAdd(result256_1, Vector256.Load(bufferStart + 8), pixels256_1); - result256_0 = Vector256_.MultiplyAdd(result256_0, Vector256.Load(bufferStart), pixels256_0); - result256_1 = Vector256_.MultiplyAdd(result256_1, Vector256.Load(bufferStart + 8), pixels256_1); - - bufferStart += 16; - rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)4); - } - - result256_0 += result256_1; + bufferStart += 16; + rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)4); + } - if ((this.Length & 3) >= 2) - { - Vector256 pixels256_0 = Unsafe.As>(ref rowStartRef); - result256_0 = Vector256_.MultiplyAdd(result256_0, Vector256.Load(bufferStart), pixels256_0); + result256_0 += result256_1; - bufferStart += 8; - rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)2); - } + if ((this.Length & 3) >= 2) + { + Vector256 pixels256_0 = Unsafe.As>(ref rowStartRef); + result256_0 = Vector256_.MultiplyAdd(result256_0, Vector256.Load(bufferStart), pixels256_0); - Vector128 result128 = result256_0.GetLower() + result256_0.GetUpper(); + bufferStart += 8; + rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)2); + } - if ((this.Length & 1) != 0) - { - Vector128 pixels128 = Unsafe.As>(ref rowStartRef); - result128 = Vector128_.MultiplyAdd(result128, Vector128.Load(bufferStart), pixels128); - } + Vector128 result128 = result256_0.GetLower() + result256_0.GetUpper(); - return result128.AsVector4(); + if ((this.Length & 1) != 0) + { + Vector128 pixels128 = Unsafe.As>(ref rowStartRef); + result128 = Vector128_.MultiplyAdd(result128, Vector128.Load(bufferStart), pixels128); } + + return result128.AsVector4(); } else { @@ -219,7 +165,7 @@ internal readonly unsafe struct ResizeKernel { DebugGuard.IsTrue(values.Length == this.Length, nameof(values), "ResizeKernel.Fill: values.Length != this.Length!"); - if (Vector256.IsHardwareAccelerated) + if (IsHardwareAccelerated) { Vector4* bufferStart = (Vector4*)this.bufferPtr; ref float valuesStart = ref MemoryMarshal.GetReference(values);