Benchmarks, tests, and cleanup.

6 years ago · a5033e4eff
5 changed files with 86 additions and 114 deletions
--- a/src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs
+++ b/src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs
@ -1,103 +0,0 @@
 // Copyright (c) Six Labors.
 // Licensed under the Apache License, Version 2.0.
 #if SUPPORTS_RUNTIME_INTRINSICS
 using System;
 using System.Numerics;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 using System.Runtime.Intrinsics;
 using System.Runtime.Intrinsics.X86;
 namespace SixLabors.ImageSharp
 {
    internal static partial class SimdUtils
    {
        public static class Avx2Intrinsics
        {
            private static ReadOnlySpan<byte> PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 };
            /// <summary>
            /// <see cref="NormalizedFloatToByteSaturate"/> as many elements as possible, slicing them down (keeping the remainder).
            /// </summary>
            [MethodImpl(InliningOptions.ShortMethod)]
            internal static void NormalizedFloatToByteSaturateReduce(
                ref ReadOnlySpan<float> source,
                ref Span<byte> dest)
            {
                DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
                if (Avx2.IsSupported)
                {
                    int remainder = ImageMaths.ModuloP2(source.Length, Vector<byte>.Count);
                    int adjustedCount = source.Length - remainder;
                    if (adjustedCount > 0)
                    {
                        NormalizedFloatToByteSaturate(
                            source.Slice(0, adjustedCount),
                            dest.Slice(0, adjustedCount));
                        source = source.Slice(adjustedCount);
                        dest = dest.Slice(adjustedCount);
                    }
                }
            }
            /// <summary>
            /// Implementation of <see cref="SimdUtils.NormalizedFloatToByteSaturate"/>, which is faster on new .NET runtime.
            /// </summary>
            /// <remarks>
            /// Implementation is based on MagicScaler code:
            /// https://github.com/saucecontrol/PhotoSauce/blob/a9bd6e5162d2160419f0cf743fd4f536c079170b/src/MagicScaler/Magic/Processors/ConvertersFloat.cs#L453-L477
            /// </remarks>
            internal static void NormalizedFloatToByteSaturate(
                ReadOnlySpan<float> source,
                Span<byte> dest)
            {
                VerifySpanInput(source, dest, Vector256<byte>.Count);
                int n = dest.Length / Vector256<byte>.Count;
                ref Vector256<float> sourceBase =
                    ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(source));
                ref Vector256<byte> destBase = ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(dest));
                var maxBytes = Vector256.Create(255f);
                ref byte maskBase = ref MemoryMarshal.GetReference(PermuteMaskDeinterleave8x32);
                Vector256<int> mask = Unsafe.As<byte, Vector256<int>>(ref maskBase);
                for (int i = 0; i < n; i++)
                {
                    ref Vector256<float> s = ref Unsafe.Add(ref sourceBase, i * 4);
                    Vector256<float> f0 = s;
                    Vector256<float> f1 = Unsafe.Add(ref s, 1);
                    Vector256<float> f2 = Unsafe.Add(ref s, 2);
                    Vector256<float> f3 = Unsafe.Add(ref s, 3);
                    Vector256<int> w0 = ConvertToInt32(f0, maxBytes);
                    Vector256<int> w1 = ConvertToInt32(f1, maxBytes);
                    Vector256<int> w2 = ConvertToInt32(f2, maxBytes);
                    Vector256<int> w3 = ConvertToInt32(f3, maxBytes);
                    Vector256<short> u0 = Avx2.PackSignedSaturate(w0, w1);
                    Vector256<short> u1 = Avx2.PackSignedSaturate(w2, w3);
                    Vector256<byte> b = Avx2.PackUnsignedSaturate(u0, u1);
                    b = Avx2.PermuteVar8x32(b.AsInt32(), mask).AsByte();
                    Unsafe.Add(ref destBase, i) = b;
                }
            }
            [MethodImpl(MethodImplOptions.AggressiveInlining)]
            private static Vector256<int> ConvertToInt32(Vector256<float> vf, Vector256<float> scale)
            {
                vf = Avx.Multiply(vf, scale);
                return Avx.ConvertToVector256Int32(vf);
            }
        }
    }
 }
 #endif
--- a/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs
+++ b/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs
@ -13,15 +13,13 @@ using System.Runtime.Intrinsics.X86;
 #endif
 using BenchmarkDotNet.Attributes;
 using BenchmarkDotNet.Environments;
 using BenchmarkDotNet.Jobs;
 using SixLabors.ImageSharp.Memory;
 using SixLabors.ImageSharp.PixelFormats;
 // ReSharper disable InconsistentNaming
 namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
 {
-    [Config(typeof(Config.ShortClr))]
+    [Config(typeof(Config.ShortCore31))]
    public abstract class FromVector4<TPixel>
        where TPixel : unmanaged, IPixel<TPixel>
    {
--- a/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgba32.cs
+++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgba32.cs
@ -13,7 +13,7 @@ using SixLabors.ImageSharp.PixelFormats;
 namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
 {
-    [Config(typeof(Config.ShortClr))]
+    [Config(typeof(Config.ShortCore31))]
    public class ToVector4_Rgba32 : ToVector4<Rgba32>
    {
        [Benchmark]
@ -52,6 +52,17 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
            SimdUtils.ExtendedIntrinsics.ByteToNormalizedFloat(sBytes, dFloats);
        }
 #if SUPPORTS_RUNTIME_INTRINSICS
        [Benchmark]
        public void HwIntrinsics()
        {
            Span<byte> sBytes = MemoryMarshal.Cast<Rgba32, byte>(this.source.GetSpan());
            Span<float> dFloats = MemoryMarshal.Cast<Vector4, float>(this.destination.GetSpan());
            SimdUtils.HwIntrinsics.ByteToNormalizedFloat(sBytes, dFloats);
        }
 #endif
        // [Benchmark]
        public void ExtendedIntrinsics_BulkConvertByteToNormalizedFloat_2Loops()
        {
--- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs
+++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs
@ -7,7 +7,7 @@ using System.Numerics;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 using SixLabors.ImageSharp.Common.Tuples;
-
+using SixLabors.ImageSharp.Tests.TestUtilities;
 using Xunit;
 using Xunit.Abstractions;
@ -209,9 +209,17 @@ namespace SixLabors.ImageSharp.Tests.Common
        [MemberData(nameof(ArraySizesDivisibleBy32))]
        public void HwIntrinsics_BulkConvertByteToNormalizedFloat(int count)
        {
-            TestImpl_BulkConvertByteToNormalizedFloat(
+            static void RunTest(string serialized)
-                count,
+            {
-                (s, d) => SimdUtils.HwIntrinsics.ByteToNormalizedFloat(s.Span, d.Span));
+                TestImpl_BulkConvertByteToNormalizedFloat(
                    FeatureTestRunner.Deserialize(serialized),
                    (s, d) => SimdUtils.HwIntrinsics.ByteToNormalizedFloat(s.Span, d.Span));
            }
            FeatureTestRunner.RunWithHwIntrinsicsFeature(
                RunTest,
                HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE41,
                count);
        }
 #endif
@ -294,9 +302,17 @@ namespace SixLabors.ImageSharp.Tests.Common
        [MemberData(nameof(ArraySizesDivisibleBy32))]
        public void HwIntrinsics_BulkConvertNormalizedFloatToByteClampOverflows(int count)
        {
-            TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(
+            static void RunTest(string serialized)
-                count,
+            {
-                (s, d) => SimdUtils.HwIntrinsics.NormalizedFloatToByteSaturate(s.Span, d.Span));
+                TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(
                    FeatureTestRunner.Deserialize(serialized),
                    (s, d) => SimdUtils.HwIntrinsics.NormalizedFloatToByteSaturate(s.Span, d.Span));
            }
            FeatureTestRunner.RunWithHwIntrinsicsFeature(
                RunTest,
                HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2,
                count);
        }
 #endif
--- a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs
+++ b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs
@ -33,6 +33,14 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities
            where T : IXunitSerializable
            => BasicSerializer.Deserialize<T>(value);
        /// <summary>
        /// Allows the deserialization of integers passed to the feature test.
        /// </summary>
        /// <param name="value">The string value to deserialize.</param>
        /// <returns>The <see cref="int"/> value.</returns>
        public static int Deserialize(string value)
            => Convert.ToInt32(value);
        /// <summary>
        /// Runs the given test <paramref name="action"/> within an environment
        /// where the given <paramref name="intrinsics"/> features.
@ -201,6 +209,48 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities
            }
        }
        /// <summary>
        /// Runs the given test <paramref name="action"/> within an environment
        /// where the given <paramref name="intrinsics"/> features.
        /// </summary>
        /// <param name="action">The test action to run.</param>
        /// <param name="intrinsics">The intrinsics features.</param>
        /// <param name="serializable">The value to pass as a parameter to the test action.</param>
        public static void RunWithHwIntrinsicsFeature(
            Action<string> action,
            HwIntrinsics intrinsics,
            int serializable)
        {
            if (!RemoteExecutor.IsSupported)
            {
                return;
            }
            foreach (KeyValuePair<HwIntrinsics, string> intrinsic in intrinsics.ToFeatureKeyValueCollection())
            {
                var processStartInfo = new ProcessStartInfo();
                if (intrinsic.Key != HwIntrinsics.AllowAll)
                {
                    processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0";
                    RemoteExecutor.Invoke(
                        action,
                        serializable.ToString(),
                        new RemoteInvokeOptions
                        {
                            StartInfo = processStartInfo
                        })
                        .Dispose();
                }
                else
                {
                    // Since we are running using the default architecture there is no
                    // point creating the overhead of running the action in a separate process.
                    action(serializable.ToString());
                }
            }
        }
        internal static Dictionary<HwIntrinsics, string> ToFeatureKeyValueCollection(this HwIntrinsics intrinsics)
        {
            // Loop through and translate the given values into COMPlus equivaluents