diff --git a/src/ImageSharp/Common/Helpers/Numerics.cs b/src/ImageSharp/Common/Helpers/Numerics.cs
index ced2be2e0c..e8f50b3eeb 100644
--- a/src/ImageSharp/Common/Helpers/Numerics.cs
+++ b/src/ImageSharp/Common/Helpers/Numerics.cs
@@ -1106,7 +1106,39 @@ internal static class Numerics
     [MethodImpl(MethodImplOptions.AggressiveInlining)]
     public static void Normalize(Span<float> span, float sum)
     {
-        if (Vector256.IsHardwareAccelerated)
+        if (Vector512.IsHardwareAccelerated)
+        {
+            ref float startRef = ref MemoryMarshal.GetReference(span);
+            ref float endRef = ref Unsafe.Add(ref startRef, span.Length & ~15);
+            Vector512<float> sum512 = Vector512.Create(sum);
+
+            while (Unsafe.IsAddressLessThan(ref startRef, ref endRef))
+            {
+                Unsafe.As<float, Vector512<float>>(ref startRef) /= sum512;
+                startRef = ref Unsafe.Add(ref startRef, (nuint)16);
+            }
+
+            if ((span.Length & 15) >= 8)
+            {
+                Unsafe.As<float, Vector256<float>>(ref startRef) /= sum512.GetLower();
+                startRef = ref Unsafe.Add(ref startRef, (nuint)8);
+            }
+
+            if ((span.Length & 7) >= 4)
+            {
+                Unsafe.As<float, Vector128<float>>(ref startRef) /= sum512.GetLower().GetLower();
+                startRef = ref Unsafe.Add(ref startRef, (nuint)4);
+            }
+
+            endRef = ref Unsafe.Add(ref startRef, span.Length & 3);
+
+            while (Unsafe.IsAddressLessThan(ref startRef, ref endRef))
+            {
+                startRef /= sum;
+                startRef = ref Unsafe.Add(ref startRef, (nuint)1);
+            }
+        }
+        else if (Vector256.IsHardwareAccelerated)
         {
             ref float startRef = ref MemoryMarshal.GetReference(span);
             ref float endRef = ref Unsafe.Add(ref startRef, span.Length & ~7);
diff --git a/src/ImageSharp/Common/Helpers/Vector128Utilities.cs b/src/ImageSharp/Common/Helpers/Vector128Utilities.cs
index 009c6e9581..07cfe02850 100644
--- a/src/ImageSharp/Common/Helpers/Vector128Utilities.cs
+++ b/src/ImageSharp/Common/Helpers/Vector128Utilities.cs
@@ -273,7 +273,7 @@ internal static class Vector128Utilities
     /// </para>
     /// </remarks>
     [MethodImpl(MethodImplOptions.AggressiveInlining)]
-    public static Vector128<float> MultiplyAdd(Vector128<float> a, Vector128<float> b, Vector128<float> c)
+    public static Vector128<float> MultiplyAddEstimate(Vector128<float> a, Vector128<float> b, Vector128<float> c)
     {
         if (Fma.IsSupported)
         {
diff --git a/src/ImageSharp/Common/Helpers/Vector256Utilities.cs b/src/ImageSharp/Common/Helpers/Vector256Utilities.cs
index 754d6dcb8b..082e4683b0 100644
--- a/src/ImageSharp/Common/Helpers/Vector256Utilities.cs
+++ b/src/ImageSharp/Common/Helpers/Vector256Utilities.cs
@@ -138,7 +138,7 @@ internal static class Vector256Utilities
     /// </para>
     /// </remarks>
     [MethodImpl(MethodImplOptions.AggressiveInlining)]
-    public static Vector256<float> MultiplyAdd(Vector256<float> a, Vector256<float> b, Vector256<float> c)
+    public static Vector256<float> MultiplyAddEstimate(Vector256<float> a, Vector256<float> b, Vector256<float> c)
     {
         if (Fma.IsSupported)
         {
diff --git a/src/ImageSharp/Common/Helpers/Vector512Utilities.cs b/src/ImageSharp/Common/Helpers/Vector512Utilities.cs
index 0165af90ef..3325ad1aeb 100644
--- a/src/ImageSharp/Common/Helpers/Vector512Utilities.cs
+++ b/src/ImageSharp/Common/Helpers/Vector512Utilities.cs
@@ -110,6 +110,38 @@ internal static class Vector512Utilities
         return Vector512.ConvertToInt32(val_2p23_f32 | sign);
     }
 
+    /// <summary>
+    /// Performs a multiply-add operation on three vectors, where each element of the resulting vector is the
+    /// product of corresponding elements in <paramref name="a"/> and <paramref name="b"/> added to the
+    /// corresponding element in <paramref name="c"/>.
+    /// If the CPU supports FMA (Fused Multiply-Add) instructions, the operation is performed as a single
+    /// fused operation for better performance and precision.
+    /// </summary>
+    /// <param name="a">The first vector of single-precision floating-point numbers to be multiplied.</param>
+    /// <param name="b">The second vector of single-precision floating-point numbers to be multiplied.</param>
+    /// <param name="c">The vector of single-precision floating-point numbers to be added to the product of
+    /// <paramref name="a"/> and <paramref name="b"/>.</param>
+    /// <returns>
+    /// A <see cref="Vector512{Single}"/> where each element is the result of multiplying the corresponding elements
+    /// of <paramref name="a"/> and <paramref name="b"/>, and then adding the corresponding element from <paramref name="c"/>.
+    /// </returns>
+    /// <remarks>
+    /// If the FMA (Fused Multiply-Add) instruction set is supported by the CPU, the operation is performed using
+    /// <see cref="Fma.MultiplyAdd(Vector256{float}, Vector256{float}, Vector256{float})"/> against the upper and lower
+    /// buts. This approach can result in slightly different results compared to performing the multiplication and
+    /// addition separately due to differences in how floating-point rounding is handled.
+    /// <para>
+    /// If FMA is not supported, the operation is performed as a separate multiplication and addition. This might lead
+    /// to a minor difference in precision compared to the fused operation, particularly in cases where numerical accuracy
+    /// is critical.
+    /// </para>
+    /// </remarks>
+    [MethodImpl(MethodImplOptions.AggressiveInlining)]
+    public static Vector512<float> MultiplyAddEstimate(Vector512<float> a, Vector512<float> b, Vector512<float> c)
+        => Vector512.Create(
+            Vector256Utilities.MultiplyAddEstimate(a.GetLower(), b.GetLower(), c.GetLower()),
+            Vector256Utilities.MultiplyAddEstimate(a.GetUpper(), b.GetUpper(), c.GetUpper()));
+
     [DoesNotReturn]
     private static void ThrowUnreachableException() => throw new UnreachableException();
 }
diff --git a/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernel.cs b/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernel.cs
index 3545bae3f7..41afec892c 100644
--- a/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernel.cs
+++ b/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernel.cs
@@ -80,7 +80,58 @@ internal readonly unsafe struct ResizeKernel
     [MethodImpl(InliningOptions.ShortMethod)]
     public Vector4 ConvolveCore(ref Vector4 rowStartRef)
     {
-        if (Vector256.IsHardwareAccelerated)
+        if (Vector512.IsHardwareAccelerated)
+        {
+            float* bufferStart = this.bufferPtr;
+            ref Vector4 rowEndRef = ref Unsafe.Add(ref rowStartRef, this.Length & ~7);
+            Vector512<float> result512_0 = Vector512<float>.Zero;
+            Vector512<float> result512_1 = Vector512<float>.Zero;
+
+            while (Unsafe.IsAddressLessThan(ref rowStartRef, ref rowEndRef))
+            {
+                Vector512<float> pixels512_0 = Unsafe.As<Vector4, Vector512<float>>(ref rowStartRef);
+                Vector512<float> pixels512_1 = Unsafe.As<Vector4, Vector512<float>>(ref Unsafe.Add(ref rowStartRef, (nuint)4));
+
+                result512_0 = Vector512Utilities.MultiplyAddEstimate(Vector512.Load(bufferStart), pixels512_0, result512_0);
+                result512_1 = Vector512Utilities.MultiplyAddEstimate(Vector512.Load(bufferStart + 16), pixels512_1, result512_1);
+
+                bufferStart += 32;
+                rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)8);
+            }
+
+            result512_0 += result512_1;
+
+            if ((this.Length & 7) >= 4)
+            {
+                Vector512<float> pixels512_0 = Unsafe.As<Vector4, Vector512<float>>(ref rowStartRef);
+                result512_0 = Vector512Utilities.MultiplyAddEstimate(Vector512.Load(bufferStart), pixels512_0, result512_0);
+
+                bufferStart += 16;
+                rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)4);
+            }
+
+            Vector256<float> result256 = result512_0.GetLower() + result512_0.GetUpper();
+
+            if ((this.Length & 3) >= 2)
+            {
+                Vector256<float> pixels256_0 = Unsafe.As<Vector4, Vector256<float>>(ref rowStartRef);
+                result256 = Vector256Utilities.MultiplyAddEstimate(Vector256.Load(bufferStart), pixels256_0, result256);
+
+                bufferStart += 8;
+                rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)2);
+            }
+
+            Vector128<float> result128 = result256.GetLower() + result256.GetUpper();
+
+            if ((this.Length & 1) != 0)
+            {
+                Vector128<float> pixels128 = Unsafe.As<Vector4, Vector128<float>>(ref rowStartRef);
+                result128 = Vector128Utilities.MultiplyAddEstimate(Vector128.Load(bufferStart), pixels128, result128);
+            }
+
+            return *(Vector4*)&result128;
+        }
+        else if (Vector256.IsHardwareAccelerated)
         {
             float* bufferStart = this.bufferPtr;
             ref Vector4 rowEndRef = ref Unsafe.Add(ref rowStartRef, this.Length & ~3);
@@ -92,8 +143,8 @@ internal readonly unsafe struct ResizeKernel
                 Vector256<float> pixels256_0 = Unsafe.As<Vector4, Vector256<float>>(ref rowStartRef);
                 Vector256<float> pixels256_1 = Unsafe.As<Vector4, Vector256<float>>(ref Unsafe.Add(ref rowStartRef, (nuint)2));
 
-                result256_0 = Vector256Utilities.MultiplyAdd(Vector256.Load(bufferStart), pixels256_0, result256_0);
-                result256_1 = Vector256Utilities.MultiplyAdd(Vector256.Load(bufferStart + 8), pixels256_1, result256_1);
+                result256_0 = Vector256Utilities.MultiplyAddEstimate(Vector256.Load(bufferStart), pixels256_0, result256_0);
+                result256_1 = Vector256Utilities.MultiplyAddEstimate(Vector256.Load(bufferStart + 8), pixels256_1, result256_1);
 
                 bufferStart += 16;
                 rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)4);
@@ -104,7 +155,7 @@ internal readonly unsafe struct ResizeKernel
             if ((this.Length & 3) >= 2)
             {
                 Vector256<float> pixels256_0 = Unsafe.As<Vector4, Vector256<float>>(ref rowStartRef);
-                result256_0 = Vector256Utilities.MultiplyAdd(Vector256.Load(bufferStart), pixels256_0, result256_0);
+                result256_0 = Vector256Utilities.MultiplyAddEstimate(Vector256.Load(bufferStart), pixels256_0, result256_0);
 
                 bufferStart += 8;
                 rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)2);
@@ -115,7 +166,7 @@ internal readonly unsafe struct ResizeKernel
             if ((this.Length & 1) != 0)
             {
                 Vector128<float> pixels128 = Unsafe.As<Vector4, Vector128<float>>(ref rowStartRef);
-                result128 = Vector128Utilities.MultiplyAdd(Vector128.Load(bufferStart), pixels128, result128);
+                result128 = Vector128Utilities.MultiplyAddEstimate(Vector128.Load(bufferStart), pixels128, result128);
             }
 
             return *(Vector4*)&result128;
@@ -170,7 +221,7 @@ internal readonly unsafe struct ResizeKernel
         {
             for (int i = 0; i < this.Length; i++)
             {
-                this.Values[i] = (float)values[i];
+                this.Values[i] = values[i];
             }
         }
     }