diff --git a/src/ImageSharp/Common/Helpers/ImageMaths.cs b/src/ImageSharp/Common/Helpers/ImageMaths.cs index 977432f8b..d24230fe1 100644 --- a/src/ImageSharp/Common/Helpers/ImageMaths.cs +++ b/src/ImageSharp/Common/Helpers/ImageMaths.cs @@ -132,6 +132,12 @@ namespace SixLabors.ImageSharp return (a / GreatestCommonDivisor(a, b)) * b; } + /// + /// Calculates % 2 + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static int Modulo2(int x) => x & 1; + /// /// Calculates % 4 /// diff --git a/src/ImageSharp/Common/Helpers/Vector4Utilities.cs b/src/ImageSharp/Common/Helpers/Vector4Utilities.cs index fccc50755..f617e9a3e 100644 --- a/src/ImageSharp/Common/Helpers/Vector4Utilities.cs +++ b/src/ImageSharp/Common/Helpers/Vector4Utilities.cs @@ -5,6 +5,10 @@ using System; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif namespace SixLabors.ImageSharp { @@ -13,6 +17,9 @@ namespace SixLabors.ImageSharp /// internal static class Vector4Utilities { + private const int BlendAlphaControl = 0b_10_00_10_00; + private const int ShuffleAlphaControl = 0b_11_11_11_11; + /// /// Restricts a vector between a minimum and a maximum value. /// 5x Faster then . @@ -56,13 +63,39 @@ namespace SixLabors.ImageSharp [MethodImpl(InliningOptions.ShortMethod)] public static void Premultiply(Span vectors) { - // TODO: This method can be AVX2 optimized using Vector - ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx2.IsSupported && vectors.Length >= 2) + { + ref Vector256 vectorsBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(vectors)); - for (int i = 0; i < vectors.Length; i++) + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u)); + + while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast)) + { + Vector256 source = vectorsBase; + Vector256 multiply = Avx.Shuffle(source, source, ShuffleAlphaControl); + vectorsBase = Avx.Blend(Avx.Multiply(source, multiply), source, BlendAlphaControl); + vectorsBase = ref Unsafe.Add(ref vectorsBase, 1); + } + + if (ImageMaths.Modulo2(vectors.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + Premultiply(ref MemoryMarshal.GetReference(vectors.Slice(vectors.Length - 1))); + } + } + else +#endif { - ref Vector4 v = ref Unsafe.Add(ref baseRef, i); - Premultiply(ref v); + ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); + + for (int i = 0; i < vectors.Length; i++) + { + ref Vector4 v = ref Unsafe.Add(ref baseRef, i); + Premultiply(ref v); + } } } @@ -73,13 +106,39 @@ namespace SixLabors.ImageSharp [MethodImpl(InliningOptions.ShortMethod)] public static void UnPremultiply(Span vectors) { - // TODO: This method can be AVX2 optimized using Vector - ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx2.IsSupported && vectors.Length >= 2) + { + ref Vector256 vectorsBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(vectors)); - for (int i = 0; i < vectors.Length; i++) + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u)); + + while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast)) + { + Vector256 source = vectorsBase; + Vector256 multiply = Avx.Shuffle(source, source, ShuffleAlphaControl); + vectorsBase = Avx.Blend(Avx.Divide(source, multiply), source, BlendAlphaControl); + vectorsBase = ref Unsafe.Add(ref vectorsBase, 1); + } + + if (ImageMaths.Modulo2(vectors.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + UnPremultiply(ref MemoryMarshal.GetReference(vectors.Slice(vectors.Length - 1))); + } + } + else +#endif { - ref Vector4 v = ref Unsafe.Add(ref baseRef, i); - UnPremultiply(ref v); + ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); + + for (int i = 0; i < vectors.Length; i++) + { + ref Vector4 v = ref Unsafe.Add(ref baseRef, i); + UnPremultiply(ref v); + } } } diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/PremultiplyVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/PremultiplyVector4.cs new file mode 100644 index 000000000..2a886c687 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/PremultiplyVector4.cs @@ -0,0 +1,68 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using BenchmarkDotNet.Attributes; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.ShortCore31))] + public class PremultiplyVector4 + { + private static readonly Vector4[] Vectors = CreateVectors(); + + [Benchmark(Baseline = true)] + public void PremultiplyBaseline() + { + ref Vector4 baseRef = ref MemoryMarshal.GetReference(Vectors); + + for (int i = 0; i < Vectors.Length; i++) + { + ref Vector4 v = ref Unsafe.Add(ref baseRef, i); + Premultiply(ref v); + } + } + + [Benchmark] + public void Premultiply() + { + Vector4Utilities.Premultiply(Vectors); + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void Premultiply(ref Vector4 source) + { + float w = source.W; + source *= w; + source.W = w; + } + + private static Vector4[] CreateVectors() + { + var rnd = new Random(42); + return GenerateRandomVectorArray(rnd, 2048, 0, 1); + } + + private static Vector4[] GenerateRandomVectorArray(Random rnd, int length, float minVal, float maxVal) + { + var values = new Vector4[length]; + + for (int i = 0; i < length; i++) + { + ref Vector4 v = ref values[i]; + v.X = GetRandomFloat(rnd, minVal, maxVal); + v.Y = GetRandomFloat(rnd, minVal, maxVal); + v.Z = GetRandomFloat(rnd, minVal, maxVal); + v.W = GetRandomFloat(rnd, minVal, maxVal); + } + + return values; + } + + private static float GetRandomFloat(Random rnd, float minVal, float maxVal) + => ((float)rnd.NextDouble() * (maxVal - minVal)) + minVal; + } +} diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs new file mode 100644 index 000000000..1312c767b --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs @@ -0,0 +1,68 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using BenchmarkDotNet.Attributes; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.ShortCore31))] + public class UnPremultiplyVector4 + { + private static readonly Vector4[] Vectors = CreateVectors(); + + [Benchmark(Baseline = true)] + public void UnPremultiplyBaseline() + { + ref Vector4 baseRef = ref MemoryMarshal.GetReference(Vectors); + + for (int i = 0; i < Vectors.Length; i++) + { + ref Vector4 v = ref Unsafe.Add(ref baseRef, i); + UnPremultiply(ref v); + } + } + + [Benchmark] + public void UnPremultiply() + { + Vector4Utilities.UnPremultiply(Vectors); + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void UnPremultiply(ref Vector4 source) + { + float w = source.W; + source /= w; + source.W = w; + } + + private static Vector4[] CreateVectors() + { + var rnd = new Random(42); + return GenerateRandomVectorArray(rnd, 2048, 0, 1); + } + + private static Vector4[] GenerateRandomVectorArray(Random rnd, int length, float minVal, float maxVal) + { + var values = new Vector4[length]; + + for (int i = 0; i < length; i++) + { + ref Vector4 v = ref values[i]; + v.X = GetRandomFloat(rnd, minVal, maxVal); + v.Y = GetRandomFloat(rnd, minVal, maxVal); + v.Z = GetRandomFloat(rnd, minVal, maxVal); + v.W = GetRandomFloat(rnd, minVal, maxVal); + } + + return values; + } + + private static float GetRandomFloat(Random rnd, float minVal, float maxVal) + => ((float)rnd.NextDouble() * (maxVal - minVal)) + minVal; + } +} diff --git a/tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs b/tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs index 27689f681..7d1662387 100644 --- a/tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs +++ b/tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs @@ -10,6 +10,21 @@ namespace SixLabors.ImageSharp.Tests.Helpers { public class ImageMathsTests { + [Theory] + [InlineData(0)] + [InlineData(1)] + [InlineData(2)] + [InlineData(3)] + [InlineData(4)] + [InlineData(100)] + [InlineData(123)] + [InlineData(53436353)] + public void Modulo2(int x) + { + int actual = ImageMaths.Modulo2(x); + Assert.Equal(x % 2, actual); + } + [Theory] [InlineData(0)] [InlineData(1)] diff --git a/tests/ImageSharp.Tests/Helpers/Vector4UtilsTests.cs b/tests/ImageSharp.Tests/Helpers/Vector4UtilsTests.cs index c3b8e79ee..2bb43c440 100644 --- a/tests/ImageSharp.Tests/Helpers/Vector4UtilsTests.cs +++ b/tests/ImageSharp.Tests/Helpers/Vector4UtilsTests.cs @@ -17,6 +17,7 @@ namespace SixLabors.ImageSharp.Tests.Helpers [InlineData(0)] [InlineData(1)] [InlineData(30)] + [InlineData(63)] public void Premultiply_VectorSpan(int length) { var rnd = new Random(42); @@ -36,6 +37,7 @@ namespace SixLabors.ImageSharp.Tests.Helpers [InlineData(0)] [InlineData(1)] [InlineData(30)] + [InlineData(63)] public void UnPremultiply_VectorSpan(int length) { var rnd = new Random(42);