diff --git a/src/ImageSharp/Common/Helpers/ImageMaths.cs b/src/ImageSharp/Common/Helpers/ImageMaths.cs
index 977432f8b..d24230fe1 100644
--- a/src/ImageSharp/Common/Helpers/ImageMaths.cs
+++ b/src/ImageSharp/Common/Helpers/ImageMaths.cs
@@ -132,6 +132,12 @@ namespace SixLabors.ImageSharp
return (a / GreatestCommonDivisor(a, b)) * b;
}
+ ///
+ /// Calculates % 2
+ ///
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public static int Modulo2(int x) => x & 1;
+
///
/// Calculates % 4
///
diff --git a/src/ImageSharp/Common/Helpers/Vector4Utilities.cs b/src/ImageSharp/Common/Helpers/Vector4Utilities.cs
index fccc50755..f617e9a3e 100644
--- a/src/ImageSharp/Common/Helpers/Vector4Utilities.cs
+++ b/src/ImageSharp/Common/Helpers/Vector4Utilities.cs
@@ -5,6 +5,10 @@ using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
+#if SUPPORTS_RUNTIME_INTRINSICS
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+#endif
namespace SixLabors.ImageSharp
{
@@ -13,6 +17,9 @@ namespace SixLabors.ImageSharp
///
internal static class Vector4Utilities
{
+ private const int BlendAlphaControl = 0b_10_00_10_00;
+ private const int ShuffleAlphaControl = 0b_11_11_11_11;
+
///
/// Restricts a vector between a minimum and a maximum value.
/// 5x Faster then .
@@ -56,13 +63,39 @@ namespace SixLabors.ImageSharp
[MethodImpl(InliningOptions.ShortMethod)]
public static void Premultiply(Span vectors)
{
- // TODO: This method can be AVX2 optimized using Vector
- ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
+#if SUPPORTS_RUNTIME_INTRINSICS
+ if (Avx2.IsSupported && vectors.Length >= 2)
+ {
+ ref Vector256 vectorsBase =
+ ref Unsafe.As>(ref MemoryMarshal.GetReference(vectors));
- for (int i = 0; i < vectors.Length; i++)
+ // Divide by 2 as 4 elements per Vector4 and 8 per Vector256
+ ref Vector256 vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u));
+
+ while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast))
+ {
+ Vector256 source = vectorsBase;
+ Vector256 multiply = Avx.Shuffle(source, source, ShuffleAlphaControl);
+ vectorsBase = Avx.Blend(Avx.Multiply(source, multiply), source, BlendAlphaControl);
+ vectorsBase = ref Unsafe.Add(ref vectorsBase, 1);
+ }
+
+ if (ImageMaths.Modulo2(vectors.Length) != 0)
+ {
+ // Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
+ Premultiply(ref MemoryMarshal.GetReference(vectors.Slice(vectors.Length - 1)));
+ }
+ }
+ else
+#endif
{
- ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
- Premultiply(ref v);
+ ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
+
+ for (int i = 0; i < vectors.Length; i++)
+ {
+ ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
+ Premultiply(ref v);
+ }
}
}
@@ -73,13 +106,39 @@ namespace SixLabors.ImageSharp
[MethodImpl(InliningOptions.ShortMethod)]
public static void UnPremultiply(Span vectors)
{
- // TODO: This method can be AVX2 optimized using Vector
- ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
+#if SUPPORTS_RUNTIME_INTRINSICS
+ if (Avx2.IsSupported && vectors.Length >= 2)
+ {
+ ref Vector256 vectorsBase =
+ ref Unsafe.As>(ref MemoryMarshal.GetReference(vectors));
- for (int i = 0; i < vectors.Length; i++)
+ // Divide by 2 as 4 elements per Vector4 and 8 per Vector256
+ ref Vector256 vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u));
+
+ while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast))
+ {
+ Vector256 source = vectorsBase;
+ Vector256 multiply = Avx.Shuffle(source, source, ShuffleAlphaControl);
+ vectorsBase = Avx.Blend(Avx.Divide(source, multiply), source, BlendAlphaControl);
+ vectorsBase = ref Unsafe.Add(ref vectorsBase, 1);
+ }
+
+ if (ImageMaths.Modulo2(vectors.Length) != 0)
+ {
+ // Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
+ UnPremultiply(ref MemoryMarshal.GetReference(vectors.Slice(vectors.Length - 1)));
+ }
+ }
+ else
+#endif
{
- ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
- UnPremultiply(ref v);
+ ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
+
+ for (int i = 0; i < vectors.Length; i++)
+ {
+ ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
+ UnPremultiply(ref v);
+ }
}
}
diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/PremultiplyVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/PremultiplyVector4.cs
new file mode 100644
index 000000000..2a886c687
--- /dev/null
+++ b/tests/ImageSharp.Benchmarks/Color/Bulk/PremultiplyVector4.cs
@@ -0,0 +1,68 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using BenchmarkDotNet.Attributes;
+
+namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
+{
+ [Config(typeof(Config.ShortCore31))]
+ public class PremultiplyVector4
+ {
+ private static readonly Vector4[] Vectors = CreateVectors();
+
+ [Benchmark(Baseline = true)]
+ public void PremultiplyBaseline()
+ {
+ ref Vector4 baseRef = ref MemoryMarshal.GetReference(Vectors);
+
+ for (int i = 0; i < Vectors.Length; i++)
+ {
+ ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
+ Premultiply(ref v);
+ }
+ }
+
+ [Benchmark]
+ public void Premultiply()
+ {
+ Vector4Utilities.Premultiply(Vectors);
+ }
+
+ [MethodImpl(InliningOptions.ShortMethod)]
+ private static void Premultiply(ref Vector4 source)
+ {
+ float w = source.W;
+ source *= w;
+ source.W = w;
+ }
+
+ private static Vector4[] CreateVectors()
+ {
+ var rnd = new Random(42);
+ return GenerateRandomVectorArray(rnd, 2048, 0, 1);
+ }
+
+ private static Vector4[] GenerateRandomVectorArray(Random rnd, int length, float minVal, float maxVal)
+ {
+ var values = new Vector4[length];
+
+ for (int i = 0; i < length; i++)
+ {
+ ref Vector4 v = ref values[i];
+ v.X = GetRandomFloat(rnd, minVal, maxVal);
+ v.Y = GetRandomFloat(rnd, minVal, maxVal);
+ v.Z = GetRandomFloat(rnd, minVal, maxVal);
+ v.W = GetRandomFloat(rnd, minVal, maxVal);
+ }
+
+ return values;
+ }
+
+ private static float GetRandomFloat(Random rnd, float minVal, float maxVal)
+ => ((float)rnd.NextDouble() * (maxVal - minVal)) + minVal;
+ }
+}
diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs
new file mode 100644
index 000000000..1312c767b
--- /dev/null
+++ b/tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs
@@ -0,0 +1,68 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using BenchmarkDotNet.Attributes;
+
+namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
+{
+ [Config(typeof(Config.ShortCore31))]
+ public class UnPremultiplyVector4
+ {
+ private static readonly Vector4[] Vectors = CreateVectors();
+
+ [Benchmark(Baseline = true)]
+ public void UnPremultiplyBaseline()
+ {
+ ref Vector4 baseRef = ref MemoryMarshal.GetReference(Vectors);
+
+ for (int i = 0; i < Vectors.Length; i++)
+ {
+ ref Vector4 v = ref Unsafe.Add(ref baseRef, i);
+ UnPremultiply(ref v);
+ }
+ }
+
+ [Benchmark]
+ public void UnPremultiply()
+ {
+ Vector4Utilities.UnPremultiply(Vectors);
+ }
+
+ [MethodImpl(InliningOptions.ShortMethod)]
+ private static void UnPremultiply(ref Vector4 source)
+ {
+ float w = source.W;
+ source /= w;
+ source.W = w;
+ }
+
+ private static Vector4[] CreateVectors()
+ {
+ var rnd = new Random(42);
+ return GenerateRandomVectorArray(rnd, 2048, 0, 1);
+ }
+
+ private static Vector4[] GenerateRandomVectorArray(Random rnd, int length, float minVal, float maxVal)
+ {
+ var values = new Vector4[length];
+
+ for (int i = 0; i < length; i++)
+ {
+ ref Vector4 v = ref values[i];
+ v.X = GetRandomFloat(rnd, minVal, maxVal);
+ v.Y = GetRandomFloat(rnd, minVal, maxVal);
+ v.Z = GetRandomFloat(rnd, minVal, maxVal);
+ v.W = GetRandomFloat(rnd, minVal, maxVal);
+ }
+
+ return values;
+ }
+
+ private static float GetRandomFloat(Random rnd, float minVal, float maxVal)
+ => ((float)rnd.NextDouble() * (maxVal - minVal)) + minVal;
+ }
+}
diff --git a/tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs b/tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs
index 27689f681..7d1662387 100644
--- a/tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs
+++ b/tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs
@@ -10,6 +10,21 @@ namespace SixLabors.ImageSharp.Tests.Helpers
{
public class ImageMathsTests
{
+ [Theory]
+ [InlineData(0)]
+ [InlineData(1)]
+ [InlineData(2)]
+ [InlineData(3)]
+ [InlineData(4)]
+ [InlineData(100)]
+ [InlineData(123)]
+ [InlineData(53436353)]
+ public void Modulo2(int x)
+ {
+ int actual = ImageMaths.Modulo2(x);
+ Assert.Equal(x % 2, actual);
+ }
+
[Theory]
[InlineData(0)]
[InlineData(1)]
diff --git a/tests/ImageSharp.Tests/Helpers/Vector4UtilsTests.cs b/tests/ImageSharp.Tests/Helpers/Vector4UtilsTests.cs
index c3b8e79ee..2bb43c440 100644
--- a/tests/ImageSharp.Tests/Helpers/Vector4UtilsTests.cs
+++ b/tests/ImageSharp.Tests/Helpers/Vector4UtilsTests.cs
@@ -17,6 +17,7 @@ namespace SixLabors.ImageSharp.Tests.Helpers
[InlineData(0)]
[InlineData(1)]
[InlineData(30)]
+ [InlineData(63)]
public void Premultiply_VectorSpan(int length)
{
var rnd = new Random(42);
@@ -36,6 +37,7 @@ namespace SixLabors.ImageSharp.Tests.Helpers
[InlineData(0)]
[InlineData(1)]
[InlineData(30)]
+ [InlineData(63)]
public void UnPremultiply_VectorSpan(int length)
{
var rnd = new Random(42);