|
|
|
@ -13,6 +13,10 @@ |
|
|
|
|
|
|
|
// <auto-generated /> |
|
|
|
using System.Numerics; |
|
|
|
using System.Runtime.CompilerServices; |
|
|
|
using System.Runtime.InteropServices; |
|
|
|
using System.Runtime.Intrinsics; |
|
|
|
using System.Runtime.Intrinsics.X86; |
|
|
|
|
|
|
|
namespace SixLabors.ImageSharp.PixelFormats.PixelBlenders; |
|
|
|
|
|
|
|
@ -86,18 +90,79 @@ var blenders = new []{ |
|
|
|
protected override void BlendFunction(Span<Vector4> destination, ReadOnlySpan<Vector4> background, ReadOnlySpan<Vector4> source, float amount) |
|
|
|
{ |
|
|
|
amount = Numerics.Clamp(amount, 0, 1); |
|
|
|
for (int i = 0; i < destination.Length; i++) |
|
|
|
|
|
|
|
if (Avx2.IsSupported && destination.Length >= 2) |
|
|
|
{ |
|
|
|
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float> |
|
|
|
ref Vector256<float> destinationBase = ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(destination)); |
|
|
|
ref Vector256<float> destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); |
|
|
|
|
|
|
|
ref Vector256<float> backgroundBase = ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(background)); |
|
|
|
ref Vector256<float> sourceBase = ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(source)); |
|
|
|
Vector256<float> opacity = Vector256.Create(amount); |
|
|
|
|
|
|
|
while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) |
|
|
|
{ |
|
|
|
destinationBase = PorterDuffFunctions.<#=blender_composer#>(backgroundBase, sourceBase, opacity); |
|
|
|
destinationBase = ref Unsafe.Add(ref destinationBase, 1); |
|
|
|
backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); |
|
|
|
sourceBase = ref Unsafe.Add(ref sourceBase, 1); |
|
|
|
} |
|
|
|
|
|
|
|
if (Numerics.Modulo2(destination.Length) != 0) |
|
|
|
{ |
|
|
|
// Vector4 fits neatly in pairs. Any overlap has to be equal to 1. |
|
|
|
int i = destination.Length - 1; |
|
|
|
destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], amount); |
|
|
|
} |
|
|
|
} |
|
|
|
else |
|
|
|
{ |
|
|
|
destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], amount); |
|
|
|
for (int i = 0; i < destination.Length; i++) |
|
|
|
{ |
|
|
|
destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], amount); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/// <inheritdoc /> |
|
|
|
protected override void BlendFunction(Span<Vector4> destination, ReadOnlySpan<Vector4> background, ReadOnlySpan<Vector4> source, ReadOnlySpan<float> amount) |
|
|
|
{ |
|
|
|
for (int i = 0; i < destination.Length; i++) |
|
|
|
if (Avx2.IsSupported && destination.Length >= 2) |
|
|
|
{ |
|
|
|
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float> |
|
|
|
ref Vector256<float> destinationBase = ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(destination)); |
|
|
|
ref Vector256<float> destinationLast = ref Unsafe.Add(ref destinationBase, (IntPtr)((uint)destination.Length / 2u)); |
|
|
|
|
|
|
|
ref Vector256<float> backgroundBase = ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(background)); |
|
|
|
ref Vector256<float> sourceBase = ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(source)); |
|
|
|
ref float amountBase = ref MemoryMarshal.GetReference(amount); |
|
|
|
|
|
|
|
while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) |
|
|
|
{ |
|
|
|
// TODO: It would be better if we can clamp this outside of the loop using our SIMD methods. |
|
|
|
Vector256<float> opacity = Vector256.Create(Numerics.Clamp(amountBase, 0, 1F)); |
|
|
|
|
|
|
|
destinationBase = PorterDuffFunctions.<#=blender_composer#>(backgroundBase, sourceBase, opacity); |
|
|
|
destinationBase = ref Unsafe.Add(ref destinationBase, 1); |
|
|
|
backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); |
|
|
|
sourceBase = ref Unsafe.Add(ref sourceBase, 1); |
|
|
|
amountBase = ref Unsafe.Add(ref amountBase, 1); |
|
|
|
} |
|
|
|
|
|
|
|
if (Numerics.Modulo2(destination.Length) != 0) |
|
|
|
{ |
|
|
|
// Vector4 fits neatly in pairs. Any overlap has to be equal to 1. |
|
|
|
int i = destination.Length - 1; |
|
|
|
destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); |
|
|
|
} |
|
|
|
} |
|
|
|
else |
|
|
|
{ |
|
|
|
destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], Numerics.Clamp(amount[i], 0, 1)); |
|
|
|
for (int i = 0; i < destination.Length; i++) |
|
|
|
{ |
|
|
|
destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|