From e3faadbf2edac8a51d09bf593088f42a073bd60b Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 22 Oct 2020 10:34:42 +0100 Subject: [PATCH] Use Avx.Shuffle for lower latency --- src/ImageSharp/Common/Helpers/Vector4Utilities.cs | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Vector4Utilities.cs b/src/ImageSharp/Common/Helpers/Vector4Utilities.cs index 0137d0256..f617e9a3e 100644 --- a/src/ImageSharp/Common/Helpers/Vector4Utilities.cs +++ b/src/ImageSharp/Common/Helpers/Vector4Utilities.cs @@ -17,9 +17,8 @@ namespace SixLabors.ImageSharp /// internal static class Vector4Utilities { - private const int BlendAlphaControl = 0b10001000; - - private static ReadOnlySpan PermuteAlphaMask8x32 => new byte[] { 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0, 7, 0, 0, 0, 7, 0, 0, 0, 7, 0, 0, 0 }; + private const int BlendAlphaControl = 0b_10_00_10_00; + private const int ShuffleAlphaControl = 0b_11_11_11_11; /// /// Restricts a vector between a minimum and a maximum value. @@ -70,16 +69,13 @@ namespace SixLabors.ImageSharp ref Vector256 vectorsBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(vectors)); - Vector256 mask = - Unsafe.As>(ref MemoryMarshal.GetReference(PermuteAlphaMask8x32)); - // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u)); while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast)) { Vector256 source = vectorsBase; - Vector256 multiply = Avx2.PermuteVar8x32(source, mask); + Vector256 multiply = Avx.Shuffle(source, source, ShuffleAlphaControl); vectorsBase = Avx.Blend(Avx.Multiply(source, multiply), source, BlendAlphaControl); vectorsBase = ref Unsafe.Add(ref vectorsBase, 1); } @@ -116,16 +112,13 @@ namespace SixLabors.ImageSharp ref Vector256 vectorsBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(vectors)); - Vector256 mask = - Unsafe.As>(ref MemoryMarshal.GetReference(PermuteAlphaMask8x32)); - // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u)); while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast)) { Vector256 source = vectorsBase; - Vector256 multiply = Avx2.PermuteVar8x32(source, mask); + Vector256 multiply = Avx.Shuffle(source, source, ShuffleAlphaControl); vectorsBase = Avx.Blend(Avx.Divide(source, multiply), source, BlendAlphaControl); vectorsBase = ref Unsafe.Add(ref vectorsBase, 1); }