diff --git a/src/ImageSharp/Common/Helpers/Vector4Utilities.cs b/src/ImageSharp/Common/Helpers/Vector4Utilities.cs
index 848a917912..5ae7ac1b71 100644
--- a/src/ImageSharp/Common/Helpers/Vector4Utilities.cs
+++ b/src/ImageSharp/Common/Helpers/Vector4Utilities.cs
@@ -61,7 +61,7 @@ namespace SixLabors.ImageSharp
/// Bulk variant of
///
/// The span of vectors
- [MethodImpl(InliningOptions.ShortMethod)]
+ [MethodImpl(InliningOptions.HotPath | InliningOptions.ShortMethod)]
public static void Premultiply(Span vectors)
{
#if SUPPORTS_RUNTIME_INTRINSICS
@@ -73,12 +73,15 @@ namespace SixLabors.ImageSharp
Vector256 mask =
Unsafe.As>(ref MemoryMarshal.GetReference(PermuteAlphaMask8x32));
- int n = (vectors.Length * 4) / Vector256.Count;
- for (int i = 0; i < n; i++)
+ // Divide by 2 as 4 elements per Vector4 and 8 per Vector256
+ ref Vector256 vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u));
+
+ while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast))
{
- ref Vector256 source = ref Unsafe.Add(ref vectorsBase, i);
+ Vector256 source = vectorsBase;
Vector256 multiply = Avx2.PermuteVar8x32(source, mask);
- source = Avx.Blend(Avx.Multiply(source, multiply), source, BlendAlphaControl);
+ vectorsBase = Avx.Blend(Avx.Multiply(source, multiply), source, BlendAlphaControl);
+ vectorsBase = ref Unsafe.Add(ref vectorsBase, 1);
}
if (ImageMaths.Modulo2(vectors.Length) != 0)
@@ -104,7 +107,7 @@ namespace SixLabors.ImageSharp
/// Bulk variant of
///
/// The span of vectors
- [MethodImpl(InliningOptions.ShortMethod)]
+ [MethodImpl(InliningOptions.HotPath | InliningOptions.ShortMethod)]
public static void UnPremultiply(Span vectors)
{
#if SUPPORTS_RUNTIME_INTRINSICS
@@ -116,12 +119,15 @@ namespace SixLabors.ImageSharp
Vector256 mask =
Unsafe.As>(ref MemoryMarshal.GetReference(PermuteAlphaMask8x32));
- int n = (vectors.Length * 4) / Vector256.Count;
- for (int i = 0; i < n; i++)
+ // Divide by 2 as 4 elements per Vector4 and 8 per Vector256
+ ref Vector256 vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u));
+
+ while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast))
{
- ref Vector256 source = ref Unsafe.Add(ref vectorsBase, i);
+ Vector256 source = vectorsBase;
Vector256 multiply = Avx2.PermuteVar8x32(source, mask);
- source = Avx.Blend(Avx.Divide(source, multiply), source, BlendAlphaControl);
+ vectorsBase = Avx.Blend(Avx.Divide(source, multiply), source, BlendAlphaControl);
+ vectorsBase = ref Unsafe.Add(ref vectorsBase, 1);
}
if (ImageMaths.Modulo2(vectors.Length) != 0)