diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs
index cf1910121..c2d97efa0 100644
--- a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs
+++ b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs
@@ -3,6 +3,10 @@
//
using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
namespace SixLabors.ImageSharp.PixelFormats.PixelBlenders;
@@ -43,18 +47,79 @@ internal static class DefaultPixelBlenders
protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount)
{
amount = Numerics.Clamp(amount, 0, 1);
- for (int i = 0; i < destination.Length; i++)
+
+ if (Avx2.IsSupported && destination.Length >= 2)
+ {
+ // Divide by 2 as 4 elements per Vector4 and 8 per Vector256
+ ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u));
+
+ ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ Vector256 opacity = Vector256.Create(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ destinationBase = PorterDuffFunctions.NormalSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
+ }
+
+ if (Numerics.Modulo2(destination.Length) != 0)
+ {
+ // Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
+ int i = destination.Length - 1;
+ destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], amount);
+ }
+ }
+ else
{
- destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], amount);
+ for (int i = 0; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], amount);
+ }
}
}
///
protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount)
{
- for (int i = 0; i < destination.Length; i++)
+ if (Avx2.IsSupported && destination.Length >= 2)
+ {
+ // Divide by 2 as 4 elements per Vector4 and 8 per Vector256
+ ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u));
+
+ ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ ref float amountBase = ref MemoryMarshal.GetReference(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods.
+ Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F));
+
+ destinationBase = PorterDuffFunctions.NormalSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
+ amountBase = ref Unsafe.Add(ref amountBase, 1);
+ }
+
+ if (Numerics.Modulo2(destination.Length) != 0)
+ {
+ // Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
+ int i = destination.Length - 1;
+ destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F));
+ }
+ }
+ else
{
- destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1));
+ for (int i = 0; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F));
+ }
}
}
}
@@ -81,18 +146,79 @@ internal static class DefaultPixelBlenders
protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount)
{
amount = Numerics.Clamp(amount, 0, 1);
- for (int i = 0; i < destination.Length; i++)
+
+ if (Avx2.IsSupported && destination.Length >= 2)
+ {
+ // Divide by 2 as 4 elements per Vector4 and 8 per Vector256
+ ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u));
+
+ ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ Vector256 opacity = Vector256.Create(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ destinationBase = PorterDuffFunctions.MultiplySrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
+ }
+
+ if (Numerics.Modulo2(destination.Length) != 0)
+ {
+ // Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
+ int i = destination.Length - 1;
+ destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], amount);
+ }
+ }
+ else
{
- destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], amount);
+ for (int i = 0; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], amount);
+ }
}
}
///
protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount)
{
- for (int i = 0; i < destination.Length; i++)
+ if (Avx2.IsSupported && destination.Length >= 2)
+ {
+ // Divide by 2 as 4 elements per Vector4 and 8 per Vector256
+ ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u));
+
+ ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ ref float amountBase = ref MemoryMarshal.GetReference(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods.
+ Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F));
+
+ destinationBase = PorterDuffFunctions.MultiplySrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
+ amountBase = ref Unsafe.Add(ref amountBase, 1);
+ }
+
+ if (Numerics.Modulo2(destination.Length) != 0)
+ {
+ // Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
+ int i = destination.Length - 1;
+ destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F));
+ }
+ }
+ else
{
- destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1));
+ for (int i = 0; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F));
+ }
}
}
}
@@ -119,18 +245,79 @@ internal static class DefaultPixelBlenders
protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount)
{
amount = Numerics.Clamp(amount, 0, 1);
- for (int i = 0; i < destination.Length; i++)
+
+ if (Avx2.IsSupported && destination.Length >= 2)
+ {
+ // Divide by 2 as 4 elements per Vector4 and 8 per Vector256
+ ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u));
+
+ ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ Vector256 opacity = Vector256.Create(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ destinationBase = PorterDuffFunctions.AddSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
+ }
+
+ if (Numerics.Modulo2(destination.Length) != 0)
+ {
+ // Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
+ int i = destination.Length - 1;
+ destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], amount);
+ }
+ }
+ else
{
- destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], amount);
+ for (int i = 0; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], amount);
+ }
}
}
///
protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount)
{
- for (int i = 0; i < destination.Length; i++)
+ if (Avx2.IsSupported && destination.Length >= 2)
+ {
+ // Divide by 2 as 4 elements per Vector4 and 8 per Vector256
+ ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u));
+
+ ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ ref float amountBase = ref MemoryMarshal.GetReference(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods.
+ Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F));
+
+ destinationBase = PorterDuffFunctions.AddSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
+ amountBase = ref Unsafe.Add(ref amountBase, 1);
+ }
+
+ if (Numerics.Modulo2(destination.Length) != 0)
+ {
+ // Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
+ int i = destination.Length - 1;
+ destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F));
+ }
+ }
+ else
{
- destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1));
+ for (int i = 0; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F));
+ }
}
}
}
@@ -157,18 +344,79 @@ internal static class DefaultPixelBlenders
protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount)
{
amount = Numerics.Clamp(amount, 0, 1);
- for (int i = 0; i < destination.Length; i++)
+
+ if (Avx2.IsSupported && destination.Length >= 2)
+ {
+ // Divide by 2 as 4 elements per Vector4 and 8 per Vector256
+ ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u));
+
+ ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ Vector256 opacity = Vector256.Create(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ destinationBase = PorterDuffFunctions.SubtractSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
+ }
+
+ if (Numerics.Modulo2(destination.Length) != 0)
+ {
+ // Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
+ int i = destination.Length - 1;
+ destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], amount);
+ }
+ }
+ else
{
- destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], amount);
+ for (int i = 0; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], amount);
+ }
}
}
///
protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount)
{
- for (int i = 0; i < destination.Length; i++)
+ if (Avx2.IsSupported && destination.Length >= 2)
+ {
+ // Divide by 2 as 4 elements per Vector4 and 8 per Vector256
+ ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u));
+
+ ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ ref float amountBase = ref MemoryMarshal.GetReference(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods.
+ Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F));
+
+ destinationBase = PorterDuffFunctions.SubtractSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
+ amountBase = ref Unsafe.Add(ref amountBase, 1);
+ }
+
+ if (Numerics.Modulo2(destination.Length) != 0)
+ {
+ // Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
+ int i = destination.Length - 1;
+ destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F));
+ }
+ }
+ else
{
- destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1));
+ for (int i = 0; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F));
+ }
}
}
}
@@ -195,18 +443,79 @@ internal static class DefaultPixelBlenders
protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount)
{
amount = Numerics.Clamp(amount, 0, 1);
- for (int i = 0; i < destination.Length; i++)
+
+ if (Avx2.IsSupported && destination.Length >= 2)
+ {
+ // Divide by 2 as 4 elements per Vector4 and 8 per Vector256
+ ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u));
+
+ ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ Vector256 opacity = Vector256.Create(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ destinationBase = PorterDuffFunctions.ScreenSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
+ }
+
+ if (Numerics.Modulo2(destination.Length) != 0)
+ {
+ // Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
+ int i = destination.Length - 1;
+ destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], amount);
+ }
+ }
+ else
{
- destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], amount);
+ for (int i = 0; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], amount);
+ }
}
}
///
protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount)
{
- for (int i = 0; i < destination.Length; i++)
+ if (Avx2.IsSupported && destination.Length >= 2)
+ {
+ // Divide by 2 as 4 elements per Vector4 and 8 per Vector256
+ ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u));
+
+ ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ ref float amountBase = ref MemoryMarshal.GetReference(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods.
+ Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F));
+
+ destinationBase = PorterDuffFunctions.ScreenSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
+ amountBase = ref Unsafe.Add(ref amountBase, 1);
+ }
+
+ if (Numerics.Modulo2(destination.Length) != 0)
+ {
+ // Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
+ int i = destination.Length - 1;
+ destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F));
+ }
+ }
+ else
{
- destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1));
+ for (int i = 0; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F));
+ }
}
}
}
@@ -233,18 +542,79 @@ internal static class DefaultPixelBlenders
protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount)
{
amount = Numerics.Clamp(amount, 0, 1);
- for (int i = 0; i < destination.Length; i++)
+
+ if (Avx2.IsSupported && destination.Length >= 2)
+ {
+ // Divide by 2 as 4 elements per Vector4 and 8 per Vector256
+ ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u));
+
+ ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ Vector256 opacity = Vector256.Create(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ destinationBase = PorterDuffFunctions.DarkenSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
+ }
+
+ if (Numerics.Modulo2(destination.Length) != 0)
+ {
+ // Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
+ int i = destination.Length - 1;
+ destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], amount);
+ }
+ }
+ else
{
- destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], amount);
+ for (int i = 0; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], amount);
+ }
}
}
///
protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount)
{
- for (int i = 0; i < destination.Length; i++)
+ if (Avx2.IsSupported && destination.Length >= 2)
+ {
+ // Divide by 2 as 4 elements per Vector4 and 8 per Vector256
+ ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u));
+
+ ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ ref float amountBase = ref MemoryMarshal.GetReference(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods.
+ Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F));
+
+ destinationBase = PorterDuffFunctions.DarkenSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
+ amountBase = ref Unsafe.Add(ref amountBase, 1);
+ }
+
+ if (Numerics.Modulo2(destination.Length) != 0)
+ {
+ // Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
+ int i = destination.Length - 1;
+ destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F));
+ }
+ }
+ else
{
- destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1));
+ for (int i = 0; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F));
+ }
}
}
}
@@ -271,18 +641,79 @@ internal static class DefaultPixelBlenders
protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount)
{
amount = Numerics.Clamp(amount, 0, 1);
- for (int i = 0; i < destination.Length; i++)
+
+ if (Avx2.IsSupported && destination.Length >= 2)
+ {
+ // Divide by 2 as 4 elements per Vector4 and 8 per Vector256
+ ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u));
+
+ ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ Vector256 opacity = Vector256.Create(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ destinationBase = PorterDuffFunctions.LightenSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
+ }
+
+ if (Numerics.Modulo2(destination.Length) != 0)
+ {
+ // Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
+ int i = destination.Length - 1;
+ destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], amount);
+ }
+ }
+ else
{
- destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], amount);
+ for (int i = 0; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], amount);
+ }
}
}
///
protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount)
{
- for (int i = 0; i < destination.Length; i++)
+ if (Avx2.IsSupported && destination.Length >= 2)
+ {
+ // Divide by 2 as 4 elements per Vector4 and 8 per Vector256
+ ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u));
+
+ ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ ref float amountBase = ref MemoryMarshal.GetReference(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods.
+ Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F));
+
+ destinationBase = PorterDuffFunctions.LightenSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
+ amountBase = ref Unsafe.Add(ref amountBase, 1);
+ }
+
+ if (Numerics.Modulo2(destination.Length) != 0)
+ {
+ // Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
+ int i = destination.Length - 1;
+ destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F));
+ }
+ }
+ else
{
- destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1));
+ for (int i = 0; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F));
+ }
}
}
}
@@ -309,18 +740,79 @@ internal static class DefaultPixelBlenders
protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount)
{
amount = Numerics.Clamp(amount, 0, 1);
- for (int i = 0; i < destination.Length; i++)
+
+ if (Avx2.IsSupported && destination.Length >= 2)
+ {
+ // Divide by 2 as 4 elements per Vector4 and 8 per Vector256
+ ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u));
+
+ ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ Vector256 opacity = Vector256.Create(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ destinationBase = PorterDuffFunctions.OverlaySrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
+ }
+
+ if (Numerics.Modulo2(destination.Length) != 0)
+ {
+ // Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
+ int i = destination.Length - 1;
+ destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], amount);
+ }
+ }
+ else
{
- destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], amount);
+ for (int i = 0; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], amount);
+ }
}
}
///
protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount)
{
- for (int i = 0; i < destination.Length; i++)
+ if (Avx2.IsSupported && destination.Length >= 2)
+ {
+ // Divide by 2 as 4 elements per Vector4 and 8 per Vector256
+ ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u));
+
+ ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ ref float amountBase = ref MemoryMarshal.GetReference(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods.
+ Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F));
+
+ destinationBase = PorterDuffFunctions.OverlaySrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
+ amountBase = ref Unsafe.Add(ref amountBase, 1);
+ }
+
+ if (Numerics.Modulo2(destination.Length) != 0)
+ {
+ // Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
+ int i = destination.Length - 1;
+ destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F));
+ }
+ }
+ else
{
- destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1));
+ for (int i = 0; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F));
+ }
}
}
}
@@ -347,18 +839,79 @@ internal static class DefaultPixelBlenders
protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount)
{
amount = Numerics.Clamp(amount, 0, 1);
- for (int i = 0; i < destination.Length; i++)
+
+ if (Avx2.IsSupported && destination.Length >= 2)
+ {
+ // Divide by 2 as 4 elements per Vector4 and 8 per Vector256
+ ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u));
+
+ ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ Vector256 opacity = Vector256.Create(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ destinationBase = PorterDuffFunctions.HardLightSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
+ }
+
+ if (Numerics.Modulo2(destination.Length) != 0)
+ {
+ // Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
+ int i = destination.Length - 1;
+ destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], amount);
+ }
+ }
+ else
{
- destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], amount);
+ for (int i = 0; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], amount);
+ }
}
}
///
protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount)
{
- for (int i = 0; i < destination.Length; i++)
+ if (Avx2.IsSupported && destination.Length >= 2)
+ {
+ // Divide by 2 as 4 elements per Vector4 and 8 per Vector256
+ ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u));
+
+ ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ ref float amountBase = ref MemoryMarshal.GetReference(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ // TODO: It would be better if we can clamp this outside of the loop using our SIMD methods.
+ Vector256 opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F));
+
+ destinationBase = PorterDuffFunctions.HardLightSrc(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
+ amountBase = ref Unsafe.Add(ref amountBase, 1);
+ }
+
+ if (Numerics.Modulo2(destination.Length) != 0)
+ {
+ // Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
+ int i = destination.Length - 1;
+ destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F));
+ }
+ }
+ else
{
- destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1));
+ for (int i = 0; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F));
+ }
}
}
}
@@ -385,18 +938,79 @@ internal static class DefaultPixelBlenders
protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, float amount)
{
amount = Numerics.Clamp(amount, 0, 1);
- for (int i = 0; i < destination.Length; i++)
+
+ if (Avx2.IsSupported && destination.Length >= 2)
+ {
+ // Divide by 2 as 4 elements per Vector4 and 8 per Vector256
+ ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u));
+
+ ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background));
+ ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source));
+ Vector256 opacity = Vector256.Create(amount);
+
+ while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast))
+ {
+ destinationBase = PorterDuffFunctions.NormalSrcAtop(backgroundBase, sourceBase, opacity);
+ destinationBase = ref Unsafe.Add(ref destinationBase, 1);
+ backgroundBase = ref Unsafe.Add(ref backgroundBase, 1);
+ sourceBase = ref Unsafe.Add(ref sourceBase, 1);
+ }
+
+ if (Numerics.Modulo2(destination.Length) != 0)
+ {
+ // Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
+ int i = destination.Length - 1;
+ destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source[i], amount);
+ }
+ }
+ else
{
- destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source[i], amount);
+ for (int i = 0; i < destination.Length; i++)
+ {
+ destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source[i], amount);
+ }
}
}
///
protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount)
{
- for (int i = 0; i < destination.Length; i++)
+ if (Avx2.IsSupported && destination.Length >= 2)
+ {
+ // Divide by 2 as 4 elements per Vector4 and 8 per Vector256
+ ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination));
+ ref Vector256