diff --git a/src/ImageSharp/Common/Helpers/Vector4Utilities.cs b/src/ImageSharp/Common/Helpers/Vector4Utilities.cs index 848a917912..5ae7ac1b71 100644 --- a/src/ImageSharp/Common/Helpers/Vector4Utilities.cs +++ b/src/ImageSharp/Common/Helpers/Vector4Utilities.cs @@ -61,7 +61,7 @@ namespace SixLabors.ImageSharp /// Bulk variant of /// /// The span of vectors - [MethodImpl(InliningOptions.ShortMethod)] + [MethodImpl(InliningOptions.HotPath | InliningOptions.ShortMethod)] public static void Premultiply(Span vectors) { #if SUPPORTS_RUNTIME_INTRINSICS @@ -73,12 +73,15 @@ namespace SixLabors.ImageSharp Vector256 mask = Unsafe.As>(ref MemoryMarshal.GetReference(PermuteAlphaMask8x32)); - int n = (vectors.Length * 4) / Vector256.Count; - for (int i = 0; i < n; i++) + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u)); + + while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast)) { - ref Vector256 source = ref Unsafe.Add(ref vectorsBase, i); + Vector256 source = vectorsBase; Vector256 multiply = Avx2.PermuteVar8x32(source, mask); - source = Avx.Blend(Avx.Multiply(source, multiply), source, BlendAlphaControl); + vectorsBase = Avx.Blend(Avx.Multiply(source, multiply), source, BlendAlphaControl); + vectorsBase = ref Unsafe.Add(ref vectorsBase, 1); } if (ImageMaths.Modulo2(vectors.Length) != 0) @@ -104,7 +107,7 @@ namespace SixLabors.ImageSharp /// Bulk variant of /// /// The span of vectors - [MethodImpl(InliningOptions.ShortMethod)] + [MethodImpl(InliningOptions.HotPath | InliningOptions.ShortMethod)] public static void UnPremultiply(Span vectors) { #if SUPPORTS_RUNTIME_INTRINSICS @@ -116,12 +119,15 @@ namespace SixLabors.ImageSharp Vector256 mask = Unsafe.As>(ref MemoryMarshal.GetReference(PermuteAlphaMask8x32)); - int n = (vectors.Length * 4) / Vector256.Count; - for (int i = 0; i < n; i++) + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u)); + + while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast)) { - ref Vector256 source = ref Unsafe.Add(ref vectorsBase, i); + Vector256 source = vectorsBase; Vector256 multiply = Avx2.PermuteVar8x32(source, mask); - source = Avx.Blend(Avx.Divide(source, multiply), source, BlendAlphaControl); + vectorsBase = Avx.Blend(Avx.Divide(source, multiply), source, BlendAlphaControl); + vectorsBase = ref Unsafe.Add(ref vectorsBase, 1); } if (ImageMaths.Modulo2(vectors.Length) != 0)