diff --git a/src/ImageSharp/Common/Helpers/ImageMaths.cs b/src/ImageSharp/Common/Helpers/ImageMaths.cs
index 977432f8b..d24230fe1 100644
--- a/src/ImageSharp/Common/Helpers/ImageMaths.cs
+++ b/src/ImageSharp/Common/Helpers/ImageMaths.cs
@@ -132,6 +132,12 @@ namespace SixLabors.ImageSharp
return (a / GreatestCommonDivisor(a, b)) * b;
}
+ ///
+ /// Calculates % 2
+ ///
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public static int Modulo2(int x) => x & 1;
+
///
/// Calculates % 4
///
diff --git a/src/ImageSharp/Common/Helpers/Vector4Utilities.cs b/src/ImageSharp/Common/Helpers/Vector4Utilities.cs
index fccc50755..848a91791 100644
--- a/src/ImageSharp/Common/Helpers/Vector4Utilities.cs
+++ b/src/ImageSharp/Common/Helpers/Vector4Utilities.cs
@@ -5,6 +5,10 @@ using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
+#if SUPPORTS_RUNTIME_INTRINSICS
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+#endif
namespace SixLabors.ImageSharp
{
@@ -13,6 +17,10 @@ namespace SixLabors.ImageSharp
///
internal static class Vector4Utilities
{
+ private const int BlendAlphaControl = 0b10001000;
+
+ private static ReadOnlySpan PermuteAlphaMask8x32 => new byte[] { 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0, 7, 0, 0, 0, 7, 0, 0, 0, 7, 0, 0, 0 };
+
///
/// Restricts a vector between a minimum and a maximum value.
/// 5x Faster then .
@@ -56,13 +64,39 @@ namespace SixLabors.ImageSharp
[MethodImpl(InliningOptions.ShortMethod)]
public static void Premultiply(Span vectors)
{
- // TODO: This method can be AVX2 optimized using Vector
- ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
+#if SUPPORTS_RUNTIME_INTRINSICS
+ if (Avx2.IsSupported && vectors.Length >= 2)
+ {
+ ref Vector256 vectorsBase =
+ ref Unsafe.As>(ref MemoryMarshal.GetReference(vectors));
- for (int i = 0; i < vectors.Length; i++)
+ Vector256 mask =
+ Unsafe.As>(ref MemoryMarshal.GetReference(PermuteAlphaMask8x32));
+
+ int n = (vectors.Length * 4) / Vector256.Count;
+ for (int i = 0; i < n; i++)
+ {
+ ref Vector256 source = ref Unsafe.Add(ref vectorsBase, i);
+ Vector256 multiply = Avx2.PermuteVar8x32(source, mask);
+ source = Avx.Blend(Avx.Multiply(source, multiply), source, BlendAlphaControl);
+ }
+
+ if (ImageMaths.Modulo2(vectors.Length) != 0)
+ {
+ // Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
+ Premultiply(ref MemoryMarshal.GetReference(vectors.Slice(vectors.Length - 1)));
+ }
+ }
+ else
+#endif
{
- ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
- Premultiply(ref v);
+ ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
+
+ for (int i = 0; i < vectors.Length; i++)
+ {
+ ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
+ Premultiply(ref v);
+ }
}
}
@@ -73,13 +107,39 @@ namespace SixLabors.ImageSharp
[MethodImpl(InliningOptions.ShortMethod)]
public static void UnPremultiply(Span vectors)
{
- // TODO: This method can be AVX2 optimized using Vector
- ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
+#if SUPPORTS_RUNTIME_INTRINSICS
+ if (Avx2.IsSupported && vectors.Length >= 2)
+ {
+ ref Vector256 vectorsBase =
+ ref Unsafe.As>(ref MemoryMarshal.GetReference(vectors));
- for (int i = 0; i < vectors.Length; i++)
+ Vector256 mask =
+ Unsafe.As>(ref MemoryMarshal.GetReference(PermuteAlphaMask8x32));
+
+ int n = (vectors.Length * 4) / Vector256.Count;
+ for (int i = 0; i < n; i++)
+ {
+ ref Vector256 source = ref Unsafe.Add(ref vectorsBase, i);
+ Vector256 multiply = Avx2.PermuteVar8x32(source, mask);
+ source = Avx.Blend(Avx.Divide(source, multiply), source, BlendAlphaControl);
+ }
+
+ if (ImageMaths.Modulo2(vectors.Length) != 0)
+ {
+ // Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
+ UnPremultiply(ref MemoryMarshal.GetReference(vectors.Slice(vectors.Length - 1)));
+ }
+ }
+ else
+#endif
{
- ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
- UnPremultiply(ref v);
+ ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
+
+ for (int i = 0; i < vectors.Length; i++)
+ {
+ ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
+ UnPremultiply(ref v);
+ }
}
}
diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/PremultiplyVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/PremultiplyVector4.cs
new file mode 100644
index 000000000..2a886c687
--- /dev/null
+++ b/tests/ImageSharp.Benchmarks/Color/Bulk/PremultiplyVector4.cs
@@ -0,0 +1,68 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using BenchmarkDotNet.Attributes;
+
+namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
+{
+ [Config(typeof(Config.ShortCore31))]
+ public class PremultiplyVector4
+ {
+ private static readonly Vector4[] Vectors = CreateVectors();
+
+ [Benchmark(Baseline = true)]
+ public void PremultiplyBaseline()
+ {
+ ref Vector4 baseRef = ref MemoryMarshal.GetReference(Vectors);
+
+ for (int i = 0; i < Vectors.Length; i++)
+ {
+ ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
+ Premultiply(ref v);
+ }
+ }
+
+ [Benchmark]
+ public void Premultiply()
+ {
+ Vector4Utilities.Premultiply(Vectors);
+ }
+
+ [MethodImpl(InliningOptions.ShortMethod)]
+ private static void Premultiply(ref Vector4 source)
+ {
+ float w = source.W;
+ source *= w;
+ source.W = w;
+ }
+
+ private static Vector4[] CreateVectors()
+ {
+ var rnd = new Random(42);
+ return GenerateRandomVectorArray(rnd, 2048, 0, 1);
+ }
+
+ private static Vector4[] GenerateRandomVectorArray(Random rnd, int length, float minVal, float maxVal)
+ {
+ var values = new Vector4[length];
+
+ for (int i = 0; i < length; i++)
+ {
+ ref Vector4 v = ref values[i];
+ v.X = GetRandomFloat(rnd, minVal, maxVal);
+ v.Y = GetRandomFloat(rnd, minVal, maxVal);
+ v.Z = GetRandomFloat(rnd, minVal, maxVal);
+ v.W = GetRandomFloat(rnd, minVal, maxVal);
+ }
+
+ return values;
+ }
+
+ private static float GetRandomFloat(Random rnd, float minVal, float maxVal)
+ => ((float)rnd.NextDouble() * (maxVal - minVal)) + minVal;
+ }
+}
diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs
new file mode 100644
index 000000000..89e055da4
--- /dev/null
+++ b/tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs
@@ -0,0 +1,68 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using BenchmarkDotNet.Attributes;
+
+namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
+{
+ [Config(typeof(Config.ShortCore31))]
+ public class UnPremultiplyVector4
+ {
+ private static readonly Vector4[] Vectors = CreateVectors();
+
+ [Benchmark(Baseline = true)]
+ public void UnPremultiplyBaseline()
+ {
+ ref Vector4 baseRef = ref MemoryMarshal.GetReference(Vectors);
+
+ for (int i = 0; i < Vectors.Length; i++)
+ {
+ ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
+ UnPremultiply(ref v);
+ }
+ }
+
+ [Benchmark]
+ public void UnPremultiply()
+ {
+ Vector4Utilities.UnPremultiply(Vectors);
+ }
+
+ [MethodImpl(InliningOptions.ShortMethod)]
+ private static void UnPremultiply(ref Vector4 source)
+ {
+ float w = source.W;
+ source *= w;
+ source.W = w;
+ }
+
+ private static Vector4[] CreateVectors()
+ {
+ var rnd = new Random(42);
+ return GenerateRandomVectorArray(rnd, 2048, 0, 1);
+ }
+
+ private static Vector4[] GenerateRandomVectorArray(Random rnd, int length, float minVal, float maxVal)
+ {
+ var values = new Vector4[length];
+
+ for (int i = 0; i < length; i++)
+ {
+ ref Vector4 v = ref values[i];
+ v.X = GetRandomFloat(rnd, minVal, maxVal);
+ v.Y = GetRandomFloat(rnd, minVal, maxVal);
+ v.Z = GetRandomFloat(rnd, minVal, maxVal);
+ v.W = GetRandomFloat(rnd, minVal, maxVal);
+ }
+
+ return values;
+ }
+
+ private static float GetRandomFloat(Random rnd, float minVal, float maxVal)
+ => ((float)rnd.NextDouble() * (maxVal - minVal)) + minVal;
+ }
+}
diff --git a/tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs b/tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs
index 27689f681..7d1662387 100644
--- a/tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs
+++ b/tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs
@@ -10,6 +10,21 @@ namespace SixLabors.ImageSharp.Tests.Helpers
{
public class ImageMathsTests
{
+ [Theory]
+ [InlineData(0)]
+ [InlineData(1)]
+ [InlineData(2)]
+ [InlineData(3)]
+ [InlineData(4)]
+ [InlineData(100)]
+ [InlineData(123)]
+ [InlineData(53436353)]
+ public void Modulo2(int x)
+ {
+ int actual = ImageMaths.Modulo2(x);
+ Assert.Equal(x % 2, actual);
+ }
+
[Theory]
[InlineData(0)]
[InlineData(1)]
diff --git a/tests/ImageSharp.Tests/Helpers/Vector4UtilsTests.cs b/tests/ImageSharp.Tests/Helpers/Vector4UtilsTests.cs
index c3b8e79ee..2bb43c440 100644
--- a/tests/ImageSharp.Tests/Helpers/Vector4UtilsTests.cs
+++ b/tests/ImageSharp.Tests/Helpers/Vector4UtilsTests.cs
@@ -17,6 +17,7 @@ namespace SixLabors.ImageSharp.Tests.Helpers
[InlineData(0)]
[InlineData(1)]
[InlineData(30)]
+ [InlineData(63)]
public void Premultiply_VectorSpan(int length)
{
var rnd = new Random(42);
@@ -36,6 +37,7 @@ namespace SixLabors.ImageSharp.Tests.Helpers
[InlineData(0)]
[InlineData(1)]
[InlineData(30)]
+ [InlineData(63)]
public void UnPremultiply_VectorSpan(int length)
{
var rnd = new Random(42);