Browse Source

Update benchmarks and add more tests

pull/2365/head
James Jackson-South 3 years ago
parent
commit
eda0869f09
  1. 2
      src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs
  2. 2
      src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs
  3. 2
      src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs
  4. 2
      src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs
  5. 10
      src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
  6. 10
      src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs
  7. 58
      tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs
  8. 39
      tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs
  9. 68
      tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs
  10. 39
      tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs
  11. 35
      tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs
  12. 275
      tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs

2
src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs

@ -64,7 +64,7 @@ internal readonly struct DefaultShuffle4 : IShuffle4
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);
Shuffle.InverseMmShuffle(this.Control, out int p3, out int p2, out int p1, out int p0);
Shuffle.InverseMMShuffle(this.Control, out int p3, out int p2, out int p1, out int p0);
for (int i = 0; i < source.Length; i += 4)
{

2
src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs

@ -32,7 +32,7 @@ internal readonly struct DefaultPad3Shuffle4 : IPad3Shuffle4
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);
Shuffle.InverseMmShuffle(this.Control, out int p3, out int p2, out int p1, out int p0);
Shuffle.InverseMMShuffle(this.Control, out int p3, out int p2, out int p1, out int p0);
Span<byte> temp = stackalloc byte[4];
ref byte t = ref MemoryMarshal.GetReference(temp);

2
src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs

@ -32,7 +32,7 @@ internal readonly struct DefaultShuffle3 : IShuffle3
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);
Shuffle.InverseMmShuffle(this.Control, out _, out int p2, out int p1, out int p0);
Shuffle.InverseMMShuffle(this.Control, out _, out int p2, out int p1, out int p0);
for (int i = 0; i < source.Length; i += 3)
{

2
src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs

@ -32,7 +32,7 @@ internal readonly struct DefaultShuffle4Slice3 : IShuffle4Slice3
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);
Shuffle.InverseMmShuffle(this.Control, out _, out int p2, out int p1, out int p0);
Shuffle.InverseMMShuffle(this.Control, out _, out int p2, out int p1, out int p0);
for (int i = 0, j = 0; i < dest.Length; i += 3, j += 4)
{

10
src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs

@ -297,7 +297,7 @@ internal static partial class SimdUtils
// shuffle controls to add to the library.
// We can add static ROS instances if need be in the future.
Span<byte> bytes = stackalloc byte[Vector256<byte>.Count];
Shuffle.MmShuffleSpan(ref bytes, control);
Shuffle.MMShuffleSpan(ref bytes, control);
Vector256<byte> vshuffle = Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(bytes));
ref Vector256<byte> sourceBase =
@ -333,7 +333,7 @@ internal static partial class SimdUtils
{
// Ssse3
Span<byte> bytes = stackalloc byte[Vector128<byte>.Count];
Shuffle.MmShuffleSpan(ref bytes, control);
Shuffle.MMShuffleSpan(ref bytes, control);
Vector128<byte> vshuffle = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(bytes));
ref Vector128<byte> sourceBase =
@ -382,7 +382,7 @@ internal static partial class SimdUtils
Vector128<byte> vmaske = Ssse3.AlignRight(vmasko, vmasko, 12);
Span<byte> bytes = stackalloc byte[Vector128<byte>.Count];
Shuffle.MmShuffleSpan(ref bytes, control);
Shuffle.MMShuffleSpan(ref bytes, control);
Vector128<byte> vshuffle = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(bytes));
ref Vector128<byte> sourceBase =
@ -445,7 +445,7 @@ internal static partial class SimdUtils
Vector128<byte> vfill = Vector128.Create(0xff000000ff000000ul).AsByte();
Span<byte> bytes = stackalloc byte[Vector128<byte>.Count];
Shuffle.MmShuffleSpan(ref bytes, control);
Shuffle.MMShuffleSpan(ref bytes, control);
Vector128<byte> vshuffle = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(bytes));
ref Vector128<byte> sourceBase =
@ -489,7 +489,7 @@ internal static partial class SimdUtils
Vector128<byte> vmaske = Ssse3.AlignRight(vmasko, vmasko, 12);
Span<byte> bytes = stackalloc byte[Vector128<byte>.Count];
Shuffle.MmShuffleSpan(ref bytes, control);
Shuffle.MMShuffleSpan(ref bytes, control);
Vector128<byte> vshuffle = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(bytes));
ref Vector128<byte> sourceBase =

10
src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs

@ -145,7 +145,7 @@ internal static partial class SimdUtils
{
ref float sBase = ref MemoryMarshal.GetReference(source);
ref float dBase = ref MemoryMarshal.GetReference(dest);
Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0);
Shuffle.InverseMMShuffle(control, out int p3, out int p2, out int p1, out int p0);
for (int i = 0; i < source.Length; i += 4)
{
@ -484,13 +484,13 @@ internal static partial class SimdUtils
public const byte MMShuffle3333 = 0b11111111;
[MethodImpl(InliningOptions.ShortMethod)]
public static byte MmShuffle(byte p3, byte p2, byte p1, byte p0)
public static byte MMShuffle(byte p3, byte p2, byte p1, byte p0)
=> (byte)((p3 << 6) | (p2 << 4) | (p1 << 2) | p0);
[MethodImpl(InliningOptions.ShortMethod)]
public static void MmShuffleSpan(ref Span<byte> span, byte control)
public static void MMShuffleSpan(ref Span<byte> span, byte control)
{
InverseMmShuffle(
InverseMMShuffle(
control,
out int p3,
out int p2,
@ -509,7 +509,7 @@ internal static partial class SimdUtils
}
[MethodImpl(InliningOptions.ShortMethod)]
public static void InverseMmShuffle(
public static void InverseMMShuffle(
byte control,
out int p3,
out int p2,

58
tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs

@ -8,8 +8,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk;
[Config(typeof(Config.HwIntrinsics_SSE_AVX))]
public class Pad3Shuffle4Channel
{
private static readonly DefaultPad3Shuffle4 Control = new DefaultPad3Shuffle4(1, 0, 3, 2);
private static readonly XYZWPad3Shuffle4 ControlFast = default;
private static readonly DefaultPad3Shuffle4 Control = new(SimdUtils.Shuffle.MMShuffle1032);
private byte[] source;
private byte[] destination;
@ -26,15 +25,11 @@ public class Pad3Shuffle4Channel
[Benchmark]
public void Pad3Shuffle4()
{
SimdUtils.Pad3Shuffle4(this.source, this.destination, Control);
}
=> SimdUtils.Pad3Shuffle4(this.source, this.destination, Control);
[Benchmark]
public void Pad3Shuffle4FastFallback()
{
SimdUtils.Pad3Shuffle4(this.source, this.destination, ControlFast);
}
=> SimdUtils.Pad3Shuffle4(this.source, this.destination, default(XYZWPad3Shuffle4));
}
// 2020-10-30
@ -83,3 +78,50 @@ public class Pad3Shuffle4Channel
// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 220.37 ns | 1.601 ns | 1.419 ns | 220.13 ns | 1.00 | 0.00 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 2. AVX | Empty | 1536 | 111.54 ns | 2.173 ns | 2.901 ns | 111.27 ns | 0.51 | 0.01 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 110.23 ns | 0.456 ns | 0.427 ns | 110.25 ns | 0.50 | 0.00 | - | - | - | - |
// 2023-02-21
// ##########
//
// BenchmarkDotNet=v0.13.0, OS=Windows 10.0.22621
// 11th Gen Intel Core i7-11370H 3.30GHz, 1 CPU, 8 logical and 4 physical cores
// .NET SDK= 7.0.103
// [Host] : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT
// 1. No HwIntrinsics : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT
// 2. SSE : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT
// 3. AVX : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT
// Runtime=.NET 6.0
// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated |
// |------------------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|------:|------:|------:|----------:|
// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 57.45 ns | 0.126 ns | 0.118 ns | 1.00 | - | - | - | - |
// | Pad3Shuffle4 | 2. SSE | COMPlus_EnableAVX=0 | 96 | 14.70 ns | 0.105 ns | 0.098 ns | 0.26 | - | - | - | - |
// | Pad3Shuffle4 | 3. AVX | Empty | 96 | 14.63 ns | 0.070 ns | 0.062 ns | 0.25 | - | - | - | - |
// | | | | | | | | | | | | |
// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 12.08 ns | 0.028 ns | 0.025 ns | 1.00 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 96 | 14.04 ns | 0.050 ns | 0.044 ns | 1.16 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 3. AVX | Empty | 96 | 13.90 ns | 0.086 ns | 0.080 ns | 1.15 | - | - | - | - |
// | | | | | | | | | | | | |
// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 202.67 ns | 2.010 ns | 1.678 ns | 1.00 | - | - | - | - |
// | Pad3Shuffle4 | 2. SSE | COMPlus_EnableAVX=0 | 384 | 25.54 ns | 0.060 ns | 0.053 ns | 0.13 | - | - | - | - |
// | Pad3Shuffle4 | 3. AVX | Empty | 384 | 25.72 ns | 0.139 ns | 0.130 ns | 0.13 | - | - | - | - |
// | | | | | | | | | | | | |
// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 60.35 ns | 0.080 ns | 0.071 ns | 1.00 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 384 | 25.18 ns | 0.388 ns | 0.324 ns | 0.42 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 3. AVX | Empty | 384 | 26.21 ns | 0.067 ns | 0.059 ns | 0.43 | - | - | - | - |
// | | | | | | | | | | | | |
// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 393.88 ns | 1.353 ns | 1.199 ns | 1.00 | - | - | - | - |
// | Pad3Shuffle4 | 2. SSE | COMPlus_EnableAVX=0 | 768 | 39.44 ns | 0.230 ns | 0.204 ns | 0.10 | - | - | - | - |
// | Pad3Shuffle4 | 3. AVX | Empty | 768 | 39.51 ns | 0.108 ns | 0.101 ns | 0.10 | - | - | - | - |
// | | | | | | | | | | | | |
// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 112.02 ns | 0.140 ns | 0.131 ns | 1.00 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 768 | 38.60 ns | 0.091 ns | 0.080 ns | 0.34 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 3. AVX | Empty | 768 | 38.18 ns | 0.100 ns | 0.084 ns | 0.34 | - | - | - | - |
// | | | | | | | | | | | | |
// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 777.95 ns | 1.719 ns | 1.342 ns | 1.00 | - | - | - | - |
// | Pad3Shuffle4 | 2. SSE | COMPlus_EnableAVX=0 | 1536 | 73.11 ns | 0.090 ns | 0.075 ns | 0.09 | - | - | - | - |
// | Pad3Shuffle4 | 3. AVX | Empty | 1536 | 73.41 ns | 0.125 ns | 0.117 ns | 0.09 | - | - | - | - |
// | | | | | | | | | | | | |
// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 218.14 ns | 0.377 ns | 0.334 ns | 1.00 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 1536 | 72.55 ns | 1.418 ns | 1.184 ns | 0.33 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 3. AVX | Empty | 1536 | 73.15 ns | 0.330 ns | 0.292 ns | 0.34 | - | - | - | - |

39
tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs

@ -1,14 +1,16 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.
using System.Runtime.InteropServices;
using BenchmarkDotNet.Attributes;
using Iced.Intel;
namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk;
[Config(typeof(Config.HwIntrinsics_SSE_AVX))]
public class Shuffle3Channel
{
private static readonly DefaultShuffle3 Control = new DefaultShuffle3(1, 0, 2);
private static readonly DefaultShuffle3 Control = new(SimdUtils.Shuffle.MMShuffle3102);
private byte[] source;
private byte[] destination;
@ -25,9 +27,7 @@ public class Shuffle3Channel
[Benchmark]
public void Shuffle3()
{
SimdUtils.Shuffle3(this.source, this.destination, Control);
}
=> SimdUtils.Shuffle3(this.source, this.destination, Control);
}
// 2020-11-02
@ -60,3 +60,34 @@ public class Shuffle3Channel
// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 773.70 ns | 5.516 ns | 4.890 ns | 772.96 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle3 | 2. AVX | Empty | 1536 | 190.41 ns | 1.090 ns | 0.851 ns | 190.38 ns | 0.25 | 0.00 | - | - | - | - |
// | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 190.94 ns | 0.985 ns | 0.769 ns | 190.85 ns | 0.25 | 0.00 | - | - | - | - |
// 2023-02-21
// ##########
//
// BenchmarkDotNet=v0.13.0, OS=Windows 10.0.22621
// 11th Gen Intel Core i7-11370H 3.30GHz, 1 CPU, 8 logical and 4 physical cores
// .NET SDK= 7.0.103
// [Host] : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT
// 1. No HwIntrinsics : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT
// 2. SSE : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT
// 3. AVX : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT
// Runtime=.NET 6.0
// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated |
// |--------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|------:|------:|------:|----------:|
// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 44.55 ns | 0.564 ns | 0.528 ns | 1.00 | - | - | - | - |
// | Shuffle3 | 2. SSE | COMPlus_EnableAVX=0 | 96 | 15.46 ns | 0.064 ns | 0.060 ns | 0.35 | - | - | - | - |
// | Shuffle3 | 3. AVX | Empty | 96 | 15.18 ns | 0.056 ns | 0.053 ns | 0.34 | - | - | - | - |
// | | | | | | | | | | | | |
// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 155.68 ns | 0.539 ns | 0.504 ns | 1.00 | - | - | - | - |
// | Shuffle3 | 2. SSE | COMPlus_EnableAVX=0 | 384 | 30.04 ns | 0.100 ns | 0.089 ns | 0.19 | - | - | - | - |
// | Shuffle3 | 3. AVX | Empty | 384 | 29.70 ns | 0.061 ns | 0.054 ns | 0.19 | - | - | - | - |
// | | | | | | | | | | | | |
// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 302.76 ns | 1.023 ns | 0.957 ns | 1.00 | - | - | - | - |
// | Shuffle3 | 2. SSE | COMPlus_EnableAVX=0 | 768 | 50.24 ns | 0.098 ns | 0.092 ns | 0.17 | - | - | - | - |
// | Shuffle3 | 3. AVX | Empty | 768 | 49.28 ns | 0.156 ns | 0.131 ns | 0.16 | - | - | - | - |
// | | | | | | | | | | | | |
// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 596.53 ns | 2.675 ns | 2.503 ns | 1.00 | - | - | - | - |
// | Shuffle3 | 2. SSE | COMPlus_EnableAVX=0 | 1536 | 94.09 ns | 0.312 ns | 0.260 ns | 0.16 | - | - | - | - |
// | Shuffle3 | 3. AVX | Empty | 1536 | 93.57 ns | 0.196 ns | 0.183 ns | 0.16 | - | - | - | - |

68
tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs

@ -1,15 +1,16 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.
using System.Runtime.InteropServices;
using BenchmarkDotNet.Attributes;
using Iced.Intel;
namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk;
[Config(typeof(Config.HwIntrinsics_SSE_AVX))]
public class Shuffle4Slice3Channel
{
private static readonly DefaultShuffle4Slice3 Control = new DefaultShuffle4Slice3(1, 0, 3, 2);
private static readonly XYZWShuffle4Slice3 ControlFast = default;
private static readonly DefaultShuffle4Slice3 Control = new(SimdUtils.Shuffle.MMShuffle1032);
private byte[] source;
private byte[] destination;
@ -26,15 +27,11 @@ public class Shuffle4Slice3Channel
[Benchmark]
public void Shuffle4Slice3()
{
SimdUtils.Shuffle4Slice3(this.source, this.destination, Control);
}
=> SimdUtils.Shuffle4Slice3(this.source, this.destination, Control);
[Benchmark]
public void Shuffle4Slice3FastFallback()
{
SimdUtils.Shuffle4Slice3(this.source, this.destination, ControlFast);
}
=> SimdUtils.Shuffle4Slice3(this.source, this.destination, default(XYZWShuffle4Slice3));
}
// 2020-10-29
@ -91,3 +88,58 @@ public class Shuffle4Slice3Channel
// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 382.97 ns | 1.064 ns | 0.831 ns | 382.87 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 2. AVX | Empty | 2048 | 126.93 ns | 0.382 ns | 0.339 ns | 126.94 ns | 0.33 | 0.00 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 149.36 ns | 1.875 ns | 1.754 ns | 149.33 ns | 0.39 | 0.00 | - | - | - | - |
// 2023-02-21
// ##########
//
// BenchmarkDotNet=v0.13.0, OS=Windows 10.0.22621
// 11th Gen Intel Core i7-11370H 3.30GHz, 1 CPU, 8 logical and 4 physical cores
// .NET SDK= 7.0.103
// [Host] : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT
// 1. No HwIntrinsics : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT
// 2. SSE : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT
// 3. AVX : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT
//
// Runtime=.NET 6.0
//
// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated |
// |--------------------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|------:|------:|------:|----------:|
// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 45.59 ns | 0.166 ns | 0.147 ns | 1.00 | - | - | - | - |
// | Shuffle4Slice3 | 2. SSE | COMPlus_EnableAVX=0 | 128 | 15.62 ns | 0.056 ns | 0.052 ns | 0.34 | - | - | - | - |
// | Shuffle4Slice3 | 3. AVX | Empty | 128 | 16.37 ns | 0.047 ns | 0.040 ns | 0.36 | - | - | - | - |
// | | | | | | | | | | | | |
// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 13.23 ns | 0.028 ns | 0.026 ns | 1.00 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 128 | 14.41 ns | 0.013 ns | 0.012 ns | 1.09 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 3. AVX | Empty | 128 | 14.70 ns | 0.050 ns | 0.047 ns | 1.11 | - | - | - | - |
// | | | | | | | | | | | | |
// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 85.48 ns | 0.192 ns | 0.179 ns | 1.00 | - | - | - | - |
// | Shuffle4Slice3 | 2. SSE | COMPlus_EnableAVX=0 | 256 | 19.18 ns | 0.230 ns | 0.204 ns | 0.22 | - | - | - | - |
// | Shuffle4Slice3 | 3. AVX | Empty | 256 | 18.66 ns | 0.017 ns | 0.015 ns | 0.22 | - | - | - | - |
// | | | | | | | | | | | | |
// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 24.34 ns | 0.078 ns | 0.073 ns | 1.00 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 256 | 18.58 ns | 0.061 ns | 0.057 ns | 0.76 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 3. AVX | Empty | 256 | 19.23 ns | 0.018 ns | 0.016 ns | 0.79 | - | - | - | - |
// | | | | | | | | | | | | |
// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 165.31 ns | 0.742 ns | 0.694 ns | 1.00 | - | - | - | - |
// | Shuffle4Slice3 | 2. SSE | COMPlus_EnableAVX=0 | 512 | 28.10 ns | 0.077 ns | 0.068 ns | 0.17 | - | - | - | - |
// | Shuffle4Slice3 | 3. AVX | Empty | 512 | 28.99 ns | 0.018 ns | 0.014 ns | 0.18 | - | - | - | - |
// | | | | | | | | | | | | |
// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 53.45 ns | 0.270 ns | 0.226 ns | 1.00 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 512 | 27.50 ns | 0.034 ns | 0.028 ns | 0.51 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 3. AVX | Empty | 512 | 28.76 ns | 0.017 ns | 0.015 ns | 0.54 | - | - | - | - |
// | | | | | | | | | | | | |
// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 323.87 ns | 0.549 ns | 0.487 ns | 1.00 | - | - | - | - |
// | Shuffle4Slice3 | 2. SSE | COMPlus_EnableAVX=0 | 1024 | 40.81 ns | 0.056 ns | 0.050 ns | 0.13 | - | - | - | - |
// | Shuffle4Slice3 | 3. AVX | Empty | 1024 | 39.95 ns | 0.075 ns | 0.067 ns | 0.12 | - | - | - | - |
// | | | | | | | | | | | | |
// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 101.37 ns | 0.080 ns | 0.067 ns | 1.00 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 1024 | 40.72 ns | 0.049 ns | 0.041 ns | 0.40 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 3. AVX | Empty | 1024 | 39.78 ns | 0.029 ns | 0.027 ns | 0.39 | - | - | - | - |
// | | | | | | | | | | | | |
// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 642.95 ns | 2.067 ns | 1.933 ns | 1.00 | - | - | - | - |
// | Shuffle4Slice3 | 2. SSE | COMPlus_EnableAVX=0 | 2048 | 73.19 ns | 0.082 ns | 0.077 ns | 0.11 | - | - | - | - |
// | Shuffle4Slice3 | 3. AVX | Empty | 2048 | 69.83 ns | 0.319 ns | 0.267 ns | 0.11 | - | - | - | - |
// | | | | | | | | | | | | |
// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 196.85 ns | 0.238 ns | 0.211 ns | 1.00 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 2048 | 72.89 ns | 0.117 ns | 0.098 ns | 0.37 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 3. AVX | Empty | 2048 | 69.59 ns | 0.073 ns | 0.061 ns | 0.35 | - | - | - | - |

39
tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs

@ -24,9 +24,7 @@ public class ShuffleByte4Channel
[Benchmark]
public void Shuffle4Channel()
{
SimdUtils.Shuffle4<WXYZShuffle4>(this.source, this.destination, default);
}
=> SimdUtils.Shuffle4<WXYZShuffle4>(this.source, this.destination, default);
}
// 2020-10-29
@ -63,3 +61,38 @@ public class ShuffleByte4Channel
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 315.29 ns | 5.206 ns | 6.583 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Channel | 2. AVX | Empty | 2048 | 57.37 ns | 1.152 ns | 1.078 ns | 0.18 | 0.01 | - | - | - | - |
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 65.75 ns | 1.198 ns | 1.600 ns | 0.21 | 0.01 | - | - | - | - |
// 2023-02-21
// ##########
//
// BenchmarkDotNet=v0.13.0, OS=Windows 10.0.22621
// 11th Gen Intel Core i7-11370H 3.30GHz, 1 CPU, 8 logical and 4 physical cores
// .NET SDK= 7.0.103
// [Host] : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT
// 1. No HwIntrinsics : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT
// 2. SSE : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT
// 3. AVX : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT
//
// Runtime=.NET 6.0
//
// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
// |---------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:|
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 10.76 ns | 0.033 ns | 0.029 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 128 | 11.39 ns | 0.045 ns | 0.040 ns | 1.06 | 0.01 | - | - | - | - |
// | Shuffle4Channel | 3. AVX | Empty | 128 | 14.05 ns | 0.029 ns | 0.024 ns | 1.31 | 0.00 | - | - | - | - |
// | | | | | | | | | | | | | |
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 32.09 ns | 0.655 ns | 1.000 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 256 | 14.03 ns | 0.047 ns | 0.041 ns | 0.44 | 0.02 | - | - | - | - |
// | Shuffle4Channel | 3. AVX | Empty | 256 | 15.18 ns | 0.052 ns | 0.043 ns | 0.48 | 0.03 | - | - | - | - |
// | | | | | | | | | | | | | |
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 59.26 ns | 0.084 ns | 0.070 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 512 | 18.80 ns | 0.036 ns | 0.034 ns | 0.32 | 0.00 | - | - | - | - |
// | Shuffle4Channel | 3. AVX | Empty | 512 | 17.69 ns | 0.038 ns | 0.034 ns | 0.30 | 0.00 | - | - | - | - |
// | | | | | | | | | | | | | |
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 112.48 ns | 0.285 ns | 0.253 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 1024 | 31.57 ns | 0.041 ns | 0.036 ns | 0.28 | 0.00 | - | - | - | - |
// | Shuffle4Channel | 3. AVX | Empty | 1024 | 28.41 ns | 0.068 ns | 0.064 ns | 0.25 | 0.00 | - | - | - | - |
// | | | | | | | | | | | | | |
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 218.59 ns | 0.303 ns | 0.283 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 2048 | 53.04 ns | 0.106 ns | 0.099 ns | 0.24 | 0.00 | - | - | - | - |
// | Shuffle4Channel | 3. AVX | Empty | 2048 | 34.74 ns | 0.061 ns | 0.054 ns | 0.16 | 0.00 | - | - | - | - |

35
tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs

@ -61,3 +61,38 @@ public class ShuffleFloat4Channel
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 980.134 ns | 3.7407 ns | 3.1237 ns | 1.00 | - | - | - | - |
// | Shuffle4Channel | 2. AVX | Empty | 2048 | 105.120 ns | 0.6140 ns | 0.5443 ns | 0.11 | - | - | - | - |
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 216.473 ns | 2.3268 ns | 2.0627 ns | 0.22 | - | - | - | - |
// 2023-02-21
// ##########
//
// BenchmarkDotNet=v0.13.0, OS=Windows 10.0.22621
// 11th Gen Intel Core i7-11370H 3.30GHz, 1 CPU, 8 logical and 4 physical cores
// .NET SDK= 7.0.103
// [Host] : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT
// 1. No HwIntrinsics : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT
// 2. SSE : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT
// 3. AVX : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT
//
// Runtime=.NET 6.0
//
// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated |
// |---------------- |------------------- |-------------------------------------------------- |------ |-----------:|----------:|----------:|------:|------:|------:|------:|----------:|
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 57.819 ns | 0.2360 ns | 0.1970 ns | 1.00 | - | - | - | - |
// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 128 | 11.564 ns | 0.0234 ns | 0.0195 ns | 0.20 | - | - | - | - |
// | Shuffle4Channel | 3. AVX | Empty | 128 | 7.770 ns | 0.0696 ns | 0.0617 ns | 0.13 | - | - | - | - |
// | | | | | | | | | | | | |
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 105.282 ns | 0.2713 ns | 0.2405 ns | 1.00 | - | - | - | - |
// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 256 | 19.867 ns | 0.0393 ns | 0.0348 ns | 0.19 | - | - | - | - |
// | Shuffle4Channel | 3. AVX | Empty | 256 | 17.586 ns | 0.0582 ns | 0.0544 ns | 0.17 | - | - | - | - |
// | | | | | | | | | | | | |
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 200.799 ns | 0.5678 ns | 0.5033 ns | 1.00 | - | - | - | - |
// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 512 | 41.137 ns | 0.1524 ns | 0.1351 ns | 0.20 | - | - | - | - |
// | Shuffle4Channel | 3. AVX | Empty | 512 | 24.040 ns | 0.0445 ns | 0.0395 ns | 0.12 | - | - | - | - |
// | | | | | | | | | | | | |
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 401.046 ns | 0.5865 ns | 0.5199 ns | 1.00 | - | - | - | - |
// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 1024 | 94.904 ns | 0.4633 ns | 0.4334 ns | 0.24 | - | - | - | - |
// | Shuffle4Channel | 3. AVX | Empty | 1024 | 68.456 ns | 0.1192 ns | 0.0996 ns | 0.17 | - | - | - | - |
// | | | | | | | | | | | | |
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 772.297 ns | 0.6270 ns | 0.5558 ns | 1.00 | - | - | - | - |
// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 2048 | 184.561 ns | 0.4319 ns | 0.4040 ns | 0.24 | - | - | - | - |
// | Shuffle4Channel | 3. AVX | Empty | 2048 | 133.634 ns | 1.7864 ns | 1.8345 ns | 0.17 | - | - | - | - |

275
tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs

@ -7,6 +7,271 @@ namespace SixLabors.ImageSharp.Tests.Common;
public partial class SimdUtilsTests
{
public static readonly TheoryData<byte, byte> MMShuffleData = new()
{
{ SimdUtils.Shuffle.MMShuffle(0, 0, 0, 0), SimdUtils.Shuffle.MMShuffle0000 },
{ SimdUtils.Shuffle.MMShuffle(0, 0, 0, 1), SimdUtils.Shuffle.MMShuffle0001 },
{ SimdUtils.Shuffle.MMShuffle(0, 0, 0, 2), SimdUtils.Shuffle.MMShuffle0002 },
{ SimdUtils.Shuffle.MMShuffle(0, 0, 0, 3), SimdUtils.Shuffle.MMShuffle0003 },
{ SimdUtils.Shuffle.MMShuffle(0, 0, 1, 0), SimdUtils.Shuffle.MMShuffle0010 },
{ SimdUtils.Shuffle.MMShuffle(0, 0, 1, 1), SimdUtils.Shuffle.MMShuffle0011 },
{ SimdUtils.Shuffle.MMShuffle(0, 0, 1, 2), SimdUtils.Shuffle.MMShuffle0012 },
{ SimdUtils.Shuffle.MMShuffle(0, 0, 1, 3), SimdUtils.Shuffle.MMShuffle0013 },
{ SimdUtils.Shuffle.MMShuffle(0, 0, 2, 0), SimdUtils.Shuffle.MMShuffle0020 },
{ SimdUtils.Shuffle.MMShuffle(0, 0, 2, 1), SimdUtils.Shuffle.MMShuffle0021 },
{ SimdUtils.Shuffle.MMShuffle(0, 0, 2, 2), SimdUtils.Shuffle.MMShuffle0022 },
{ SimdUtils.Shuffle.MMShuffle(0, 0, 2, 3), SimdUtils.Shuffle.MMShuffle0023 },
{ SimdUtils.Shuffle.MMShuffle(0, 0, 3, 0), SimdUtils.Shuffle.MMShuffle0030 },
{ SimdUtils.Shuffle.MMShuffle(0, 0, 3, 1), SimdUtils.Shuffle.MMShuffle0031 },
{ SimdUtils.Shuffle.MMShuffle(0, 0, 3, 2), SimdUtils.Shuffle.MMShuffle0032 },
{ SimdUtils.Shuffle.MMShuffle(0, 0, 3, 3), SimdUtils.Shuffle.MMShuffle0033 },
{ SimdUtils.Shuffle.MMShuffle(0, 1, 0, 0), SimdUtils.Shuffle.MMShuffle0100 },
{ SimdUtils.Shuffle.MMShuffle(0, 1, 0, 1), SimdUtils.Shuffle.MMShuffle0101 },
{ SimdUtils.Shuffle.MMShuffle(0, 1, 0, 2), SimdUtils.Shuffle.MMShuffle0102 },
{ SimdUtils.Shuffle.MMShuffle(0, 1, 0, 3), SimdUtils.Shuffle.MMShuffle0103 },
{ SimdUtils.Shuffle.MMShuffle(0, 1, 1, 0), SimdUtils.Shuffle.MMShuffle0110 },
{ SimdUtils.Shuffle.MMShuffle(0, 1, 1, 1), SimdUtils.Shuffle.MMShuffle0111 },
{ SimdUtils.Shuffle.MMShuffle(0, 1, 1, 2), SimdUtils.Shuffle.MMShuffle0112 },
{ SimdUtils.Shuffle.MMShuffle(0, 1, 1, 3), SimdUtils.Shuffle.MMShuffle0113 },
{ SimdUtils.Shuffle.MMShuffle(0, 1, 2, 0), SimdUtils.Shuffle.MMShuffle0120 },
{ SimdUtils.Shuffle.MMShuffle(0, 1, 2, 1), SimdUtils.Shuffle.MMShuffle0121 },
{ SimdUtils.Shuffle.MMShuffle(0, 1, 2, 2), SimdUtils.Shuffle.MMShuffle0122 },
{ SimdUtils.Shuffle.MMShuffle(0, 1, 2, 3), SimdUtils.Shuffle.MMShuffle0123 },
{ SimdUtils.Shuffle.MMShuffle(0, 1, 3, 0), SimdUtils.Shuffle.MMShuffle0130 },
{ SimdUtils.Shuffle.MMShuffle(0, 1, 3, 1), SimdUtils.Shuffle.MMShuffle0131 },
{ SimdUtils.Shuffle.MMShuffle(0, 1, 3, 2), SimdUtils.Shuffle.MMShuffle0132 },
{ SimdUtils.Shuffle.MMShuffle(0, 1, 3, 3), SimdUtils.Shuffle.MMShuffle0133 },
{ SimdUtils.Shuffle.MMShuffle(0, 2, 0, 0), SimdUtils.Shuffle.MMShuffle0200 },
{ SimdUtils.Shuffle.MMShuffle(0, 2, 0, 1), SimdUtils.Shuffle.MMShuffle0201 },
{ SimdUtils.Shuffle.MMShuffle(0, 2, 0, 2), SimdUtils.Shuffle.MMShuffle0202 },
{ SimdUtils.Shuffle.MMShuffle(0, 2, 0, 3), SimdUtils.Shuffle.MMShuffle0203 },
{ SimdUtils.Shuffle.MMShuffle(0, 2, 1, 0), SimdUtils.Shuffle.MMShuffle0210 },
{ SimdUtils.Shuffle.MMShuffle(0, 2, 1, 1), SimdUtils.Shuffle.MMShuffle0211 },
{ SimdUtils.Shuffle.MMShuffle(0, 2, 1, 2), SimdUtils.Shuffle.MMShuffle0212 },
{ SimdUtils.Shuffle.MMShuffle(0, 2, 1, 3), SimdUtils.Shuffle.MMShuffle0213 },
{ SimdUtils.Shuffle.MMShuffle(0, 2, 2, 0), SimdUtils.Shuffle.MMShuffle0220 },
{ SimdUtils.Shuffle.MMShuffle(0, 2, 2, 1), SimdUtils.Shuffle.MMShuffle0221 },
{ SimdUtils.Shuffle.MMShuffle(0, 2, 2, 2), SimdUtils.Shuffle.MMShuffle0222 },
{ SimdUtils.Shuffle.MMShuffle(0, 2, 2, 3), SimdUtils.Shuffle.MMShuffle0223 },
{ SimdUtils.Shuffle.MMShuffle(0, 2, 3, 0), SimdUtils.Shuffle.MMShuffle0230 },
{ SimdUtils.Shuffle.MMShuffle(0, 2, 3, 1), SimdUtils.Shuffle.MMShuffle0231 },
{ SimdUtils.Shuffle.MMShuffle(0, 2, 3, 2), SimdUtils.Shuffle.MMShuffle0232 },
{ SimdUtils.Shuffle.MMShuffle(0, 2, 3, 3), SimdUtils.Shuffle.MMShuffle0233 },
{ SimdUtils.Shuffle.MMShuffle(0, 3, 0, 0), SimdUtils.Shuffle.MMShuffle0300 },
{ SimdUtils.Shuffle.MMShuffle(0, 3, 0, 1), SimdUtils.Shuffle.MMShuffle0301 },
{ SimdUtils.Shuffle.MMShuffle(0, 3, 0, 2), SimdUtils.Shuffle.MMShuffle0302 },
{ SimdUtils.Shuffle.MMShuffle(0, 3, 0, 3), SimdUtils.Shuffle.MMShuffle0303 },
{ SimdUtils.Shuffle.MMShuffle(0, 3, 1, 0), SimdUtils.Shuffle.MMShuffle0310 },
{ SimdUtils.Shuffle.MMShuffle(0, 3, 1, 1), SimdUtils.Shuffle.MMShuffle0311 },
{ SimdUtils.Shuffle.MMShuffle(0, 3, 1, 2), SimdUtils.Shuffle.MMShuffle0312 },
{ SimdUtils.Shuffle.MMShuffle(0, 3, 1, 3), SimdUtils.Shuffle.MMShuffle0313 },
{ SimdUtils.Shuffle.MMShuffle(0, 3, 2, 0), SimdUtils.Shuffle.MMShuffle0320 },
{ SimdUtils.Shuffle.MMShuffle(0, 3, 2, 1), SimdUtils.Shuffle.MMShuffle0321 },
{ SimdUtils.Shuffle.MMShuffle(0, 3, 2, 2), SimdUtils.Shuffle.MMShuffle0322 },
{ SimdUtils.Shuffle.MMShuffle(0, 3, 2, 3), SimdUtils.Shuffle.MMShuffle0323 },
{ SimdUtils.Shuffle.MMShuffle(0, 3, 3, 0), SimdUtils.Shuffle.MMShuffle0330 },
{ SimdUtils.Shuffle.MMShuffle(0, 3, 3, 1), SimdUtils.Shuffle.MMShuffle0331 },
{ SimdUtils.Shuffle.MMShuffle(0, 3, 3, 2), SimdUtils.Shuffle.MMShuffle0332 },
{ SimdUtils.Shuffle.MMShuffle(0, 3, 3, 3), SimdUtils.Shuffle.MMShuffle0333 },
{ SimdUtils.Shuffle.MMShuffle(1, 0, 0, 0), SimdUtils.Shuffle.MMShuffle1000 },
{ SimdUtils.Shuffle.MMShuffle(1, 0, 0, 1), SimdUtils.Shuffle.MMShuffle1001 },
{ SimdUtils.Shuffle.MMShuffle(1, 0, 0, 2), SimdUtils.Shuffle.MMShuffle1002 },
{ SimdUtils.Shuffle.MMShuffle(1, 0, 0, 3), SimdUtils.Shuffle.MMShuffle1003 },
{ SimdUtils.Shuffle.MMShuffle(1, 0, 1, 0), SimdUtils.Shuffle.MMShuffle1010 },
{ SimdUtils.Shuffle.MMShuffle(1, 0, 1, 1), SimdUtils.Shuffle.MMShuffle1011 },
{ SimdUtils.Shuffle.MMShuffle(1, 0, 1, 2), SimdUtils.Shuffle.MMShuffle1012 },
{ SimdUtils.Shuffle.MMShuffle(1, 0, 1, 3), SimdUtils.Shuffle.MMShuffle1013 },
{ SimdUtils.Shuffle.MMShuffle(1, 0, 2, 0), SimdUtils.Shuffle.MMShuffle1020 },
{ SimdUtils.Shuffle.MMShuffle(1, 0, 2, 1), SimdUtils.Shuffle.MMShuffle1021 },
{ SimdUtils.Shuffle.MMShuffle(1, 0, 2, 2), SimdUtils.Shuffle.MMShuffle1022 },
{ SimdUtils.Shuffle.MMShuffle(1, 0, 2, 3), SimdUtils.Shuffle.MMShuffle1023 },
{ SimdUtils.Shuffle.MMShuffle(1, 0, 3, 0), SimdUtils.Shuffle.MMShuffle1030 },
{ SimdUtils.Shuffle.MMShuffle(1, 0, 3, 1), SimdUtils.Shuffle.MMShuffle1031 },
{ SimdUtils.Shuffle.MMShuffle(1, 0, 3, 2), SimdUtils.Shuffle.MMShuffle1032 },
{ SimdUtils.Shuffle.MMShuffle(1, 0, 3, 3), SimdUtils.Shuffle.MMShuffle1033 },
{ SimdUtils.Shuffle.MMShuffle(1, 1, 0, 0), SimdUtils.Shuffle.MMShuffle1100 },
{ SimdUtils.Shuffle.MMShuffle(1, 1, 0, 1), SimdUtils.Shuffle.MMShuffle1101 },
{ SimdUtils.Shuffle.MMShuffle(1, 1, 0, 2), SimdUtils.Shuffle.MMShuffle1102 },
{ SimdUtils.Shuffle.MMShuffle(1, 1, 0, 3), SimdUtils.Shuffle.MMShuffle1103 },
{ SimdUtils.Shuffle.MMShuffle(1, 1, 1, 0), SimdUtils.Shuffle.MMShuffle1110 },
{ SimdUtils.Shuffle.MMShuffle(1, 1, 1, 1), SimdUtils.Shuffle.MMShuffle1111 },
{ SimdUtils.Shuffle.MMShuffle(1, 1, 1, 2), SimdUtils.Shuffle.MMShuffle1112 },
{ SimdUtils.Shuffle.MMShuffle(1, 1, 1, 3), SimdUtils.Shuffle.MMShuffle1113 },
{ SimdUtils.Shuffle.MMShuffle(1, 1, 2, 0), SimdUtils.Shuffle.MMShuffle1120 },
{ SimdUtils.Shuffle.MMShuffle(1, 1, 2, 1), SimdUtils.Shuffle.MMShuffle1121 },
{ SimdUtils.Shuffle.MMShuffle(1, 1, 2, 2), SimdUtils.Shuffle.MMShuffle1122 },
{ SimdUtils.Shuffle.MMShuffle(1, 1, 2, 3), SimdUtils.Shuffle.MMShuffle1123 },
{ SimdUtils.Shuffle.MMShuffle(1, 1, 3, 0), SimdUtils.Shuffle.MMShuffle1130 },
{ SimdUtils.Shuffle.MMShuffle(1, 1, 3, 1), SimdUtils.Shuffle.MMShuffle1131 },
{ SimdUtils.Shuffle.MMShuffle(1, 1, 3, 2), SimdUtils.Shuffle.MMShuffle1132 },
{ SimdUtils.Shuffle.MMShuffle(1, 1, 3, 3), SimdUtils.Shuffle.MMShuffle1133 },
{ SimdUtils.Shuffle.MMShuffle(1, 2, 0, 0), SimdUtils.Shuffle.MMShuffle1200 },
{ SimdUtils.Shuffle.MMShuffle(1, 2, 0, 1), SimdUtils.Shuffle.MMShuffle1201 },
{ SimdUtils.Shuffle.MMShuffle(1, 2, 0, 2), SimdUtils.Shuffle.MMShuffle1202 },
{ SimdUtils.Shuffle.MMShuffle(1, 2, 0, 3), SimdUtils.Shuffle.MMShuffle1203 },
{ SimdUtils.Shuffle.MMShuffle(1, 2, 1, 0), SimdUtils.Shuffle.MMShuffle1210 },
{ SimdUtils.Shuffle.MMShuffle(1, 2, 1, 1), SimdUtils.Shuffle.MMShuffle1211 },
{ SimdUtils.Shuffle.MMShuffle(1, 2, 1, 2), SimdUtils.Shuffle.MMShuffle1212 },
{ SimdUtils.Shuffle.MMShuffle(1, 2, 1, 3), SimdUtils.Shuffle.MMShuffle1213 },
{ SimdUtils.Shuffle.MMShuffle(1, 2, 2, 0), SimdUtils.Shuffle.MMShuffle1220 },
{ SimdUtils.Shuffle.MMShuffle(1, 2, 2, 1), SimdUtils.Shuffle.MMShuffle1221 },
{ SimdUtils.Shuffle.MMShuffle(1, 2, 2, 2), SimdUtils.Shuffle.MMShuffle1222 },
{ SimdUtils.Shuffle.MMShuffle(1, 2, 2, 3), SimdUtils.Shuffle.MMShuffle1223 },
{ SimdUtils.Shuffle.MMShuffle(1, 2, 3, 0), SimdUtils.Shuffle.MMShuffle1230 },
{ SimdUtils.Shuffle.MMShuffle(1, 2, 3, 1), SimdUtils.Shuffle.MMShuffle1231 },
{ SimdUtils.Shuffle.MMShuffle(1, 2, 3, 2), SimdUtils.Shuffle.MMShuffle1232 },
{ SimdUtils.Shuffle.MMShuffle(1, 2, 3, 3), SimdUtils.Shuffle.MMShuffle1233 },
{ SimdUtils.Shuffle.MMShuffle(1, 3, 0, 0), SimdUtils.Shuffle.MMShuffle1300 },
{ SimdUtils.Shuffle.MMShuffle(1, 3, 0, 1), SimdUtils.Shuffle.MMShuffle1301 },
{ SimdUtils.Shuffle.MMShuffle(1, 3, 0, 2), SimdUtils.Shuffle.MMShuffle1302 },
{ SimdUtils.Shuffle.MMShuffle(1, 3, 0, 3), SimdUtils.Shuffle.MMShuffle1303 },
{ SimdUtils.Shuffle.MMShuffle(1, 3, 1, 0), SimdUtils.Shuffle.MMShuffle1310 },
{ SimdUtils.Shuffle.MMShuffle(1, 3, 1, 1), SimdUtils.Shuffle.MMShuffle1311 },
{ SimdUtils.Shuffle.MMShuffle(1, 3, 1, 2), SimdUtils.Shuffle.MMShuffle1312 },
{ SimdUtils.Shuffle.MMShuffle(1, 3, 1, 3), SimdUtils.Shuffle.MMShuffle1313 },
{ SimdUtils.Shuffle.MMShuffle(1, 3, 2, 0), SimdUtils.Shuffle.MMShuffle1320 },
{ SimdUtils.Shuffle.MMShuffle(1, 3, 2, 1), SimdUtils.Shuffle.MMShuffle1321 },
{ SimdUtils.Shuffle.MMShuffle(1, 3, 2, 2), SimdUtils.Shuffle.MMShuffle1322 },
{ SimdUtils.Shuffle.MMShuffle(1, 3, 2, 3), SimdUtils.Shuffle.MMShuffle1323 },
{ SimdUtils.Shuffle.MMShuffle(1, 3, 3, 0), SimdUtils.Shuffle.MMShuffle1330 },
{ SimdUtils.Shuffle.MMShuffle(1, 3, 3, 1), SimdUtils.Shuffle.MMShuffle1331 },
{ SimdUtils.Shuffle.MMShuffle(1, 3, 3, 2), SimdUtils.Shuffle.MMShuffle1332 },
{ SimdUtils.Shuffle.MMShuffle(1, 3, 3, 3), SimdUtils.Shuffle.MMShuffle1333 },
{ SimdUtils.Shuffle.MMShuffle(2, 0, 0, 0), SimdUtils.Shuffle.MMShuffle2000 },
{ SimdUtils.Shuffle.MMShuffle(2, 0, 0, 1), SimdUtils.Shuffle.MMShuffle2001 },
{ SimdUtils.Shuffle.MMShuffle(2, 0, 0, 2), SimdUtils.Shuffle.MMShuffle2002 },
{ SimdUtils.Shuffle.MMShuffle(2, 0, 0, 3), SimdUtils.Shuffle.MMShuffle2003 },
{ SimdUtils.Shuffle.MMShuffle(2, 0, 1, 0), SimdUtils.Shuffle.MMShuffle2010 },
{ SimdUtils.Shuffle.MMShuffle(2, 0, 1, 1), SimdUtils.Shuffle.MMShuffle2011 },
{ SimdUtils.Shuffle.MMShuffle(2, 0, 1, 2), SimdUtils.Shuffle.MMShuffle2012 },
{ SimdUtils.Shuffle.MMShuffle(2, 0, 1, 3), SimdUtils.Shuffle.MMShuffle2013 },
{ SimdUtils.Shuffle.MMShuffle(2, 0, 2, 0), SimdUtils.Shuffle.MMShuffle2020 },
{ SimdUtils.Shuffle.MMShuffle(2, 0, 2, 1), SimdUtils.Shuffle.MMShuffle2021 },
{ SimdUtils.Shuffle.MMShuffle(2, 0, 2, 2), SimdUtils.Shuffle.MMShuffle2022 },
{ SimdUtils.Shuffle.MMShuffle(2, 0, 2, 3), SimdUtils.Shuffle.MMShuffle2023 },
{ SimdUtils.Shuffle.MMShuffle(2, 0, 3, 0), SimdUtils.Shuffle.MMShuffle2030 },
{ SimdUtils.Shuffle.MMShuffle(2, 0, 3, 1), SimdUtils.Shuffle.MMShuffle2031 },
{ SimdUtils.Shuffle.MMShuffle(2, 0, 3, 2), SimdUtils.Shuffle.MMShuffle2032 },
{ SimdUtils.Shuffle.MMShuffle(2, 0, 3, 3), SimdUtils.Shuffle.MMShuffle2033 },
{ SimdUtils.Shuffle.MMShuffle(2, 1, 0, 0), SimdUtils.Shuffle.MMShuffle2100 },
{ SimdUtils.Shuffle.MMShuffle(2, 1, 0, 1), SimdUtils.Shuffle.MMShuffle2101 },
{ SimdUtils.Shuffle.MMShuffle(2, 1, 0, 2), SimdUtils.Shuffle.MMShuffle2102 },
{ SimdUtils.Shuffle.MMShuffle(2, 1, 0, 3), SimdUtils.Shuffle.MMShuffle2103 },
{ SimdUtils.Shuffle.MMShuffle(2, 1, 1, 0), SimdUtils.Shuffle.MMShuffle2110 },
{ SimdUtils.Shuffle.MMShuffle(2, 1, 1, 1), SimdUtils.Shuffle.MMShuffle2111 },
{ SimdUtils.Shuffle.MMShuffle(2, 1, 1, 2), SimdUtils.Shuffle.MMShuffle2112 },
{ SimdUtils.Shuffle.MMShuffle(2, 1, 1, 3), SimdUtils.Shuffle.MMShuffle2113 },
{ SimdUtils.Shuffle.MMShuffle(2, 1, 2, 0), SimdUtils.Shuffle.MMShuffle2120 },
{ SimdUtils.Shuffle.MMShuffle(2, 1, 2, 1), SimdUtils.Shuffle.MMShuffle2121 },
{ SimdUtils.Shuffle.MMShuffle(2, 1, 2, 2), SimdUtils.Shuffle.MMShuffle2122 },
{ SimdUtils.Shuffle.MMShuffle(2, 1, 2, 3), SimdUtils.Shuffle.MMShuffle2123 },
{ SimdUtils.Shuffle.MMShuffle(2, 1, 3, 0), SimdUtils.Shuffle.MMShuffle2130 },
{ SimdUtils.Shuffle.MMShuffle(2, 1, 3, 1), SimdUtils.Shuffle.MMShuffle2131 },
{ SimdUtils.Shuffle.MMShuffle(2, 1, 3, 2), SimdUtils.Shuffle.MMShuffle2132 },
{ SimdUtils.Shuffle.MMShuffle(2, 1, 3, 3), SimdUtils.Shuffle.MMShuffle2133 },
{ SimdUtils.Shuffle.MMShuffle(2, 2, 0, 0), SimdUtils.Shuffle.MMShuffle2200 },
{ SimdUtils.Shuffle.MMShuffle(2, 2, 0, 1), SimdUtils.Shuffle.MMShuffle2201 },
{ SimdUtils.Shuffle.MMShuffle(2, 2, 0, 2), SimdUtils.Shuffle.MMShuffle2202 },
{ SimdUtils.Shuffle.MMShuffle(2, 2, 0, 3), SimdUtils.Shuffle.MMShuffle2203 },
{ SimdUtils.Shuffle.MMShuffle(2, 2, 1, 0), SimdUtils.Shuffle.MMShuffle2210 },
{ SimdUtils.Shuffle.MMShuffle(2, 2, 1, 1), SimdUtils.Shuffle.MMShuffle2211 },
{ SimdUtils.Shuffle.MMShuffle(2, 2, 1, 2), SimdUtils.Shuffle.MMShuffle2212 },
{ SimdUtils.Shuffle.MMShuffle(2, 2, 1, 3), SimdUtils.Shuffle.MMShuffle2213 },
{ SimdUtils.Shuffle.MMShuffle(2, 2, 2, 0), SimdUtils.Shuffle.MMShuffle2220 },
{ SimdUtils.Shuffle.MMShuffle(2, 2, 2, 1), SimdUtils.Shuffle.MMShuffle2221 },
{ SimdUtils.Shuffle.MMShuffle(2, 2, 2, 2), SimdUtils.Shuffle.MMShuffle2222 },
{ SimdUtils.Shuffle.MMShuffle(2, 2, 2, 3), SimdUtils.Shuffle.MMShuffle2223 },
{ SimdUtils.Shuffle.MMShuffle(2, 2, 3, 0), SimdUtils.Shuffle.MMShuffle2230 },
{ SimdUtils.Shuffle.MMShuffle(2, 2, 3, 1), SimdUtils.Shuffle.MMShuffle2231 },
{ SimdUtils.Shuffle.MMShuffle(2, 2, 3, 2), SimdUtils.Shuffle.MMShuffle2232 },
{ SimdUtils.Shuffle.MMShuffle(2, 2, 3, 3), SimdUtils.Shuffle.MMShuffle2233 },
{ SimdUtils.Shuffle.MMShuffle(2, 3, 0, 0), SimdUtils.Shuffle.MMShuffle2300 },
{ SimdUtils.Shuffle.MMShuffle(2, 3, 0, 1), SimdUtils.Shuffle.MMShuffle2301 },
{ SimdUtils.Shuffle.MMShuffle(2, 3, 0, 2), SimdUtils.Shuffle.MMShuffle2302 },
{ SimdUtils.Shuffle.MMShuffle(2, 3, 0, 3), SimdUtils.Shuffle.MMShuffle2303 },
{ SimdUtils.Shuffle.MMShuffle(2, 3, 1, 0), SimdUtils.Shuffle.MMShuffle2310 },
{ SimdUtils.Shuffle.MMShuffle(2, 3, 1, 1), SimdUtils.Shuffle.MMShuffle2311 },
{ SimdUtils.Shuffle.MMShuffle(2, 3, 1, 2), SimdUtils.Shuffle.MMShuffle2312 },
{ SimdUtils.Shuffle.MMShuffle(2, 3, 1, 3), SimdUtils.Shuffle.MMShuffle2313 },
{ SimdUtils.Shuffle.MMShuffle(2, 3, 2, 0), SimdUtils.Shuffle.MMShuffle2320 },
{ SimdUtils.Shuffle.MMShuffle(2, 3, 2, 1), SimdUtils.Shuffle.MMShuffle2321 },
{ SimdUtils.Shuffle.MMShuffle(2, 3, 2, 2), SimdUtils.Shuffle.MMShuffle2322 },
{ SimdUtils.Shuffle.MMShuffle(2, 3, 2, 3), SimdUtils.Shuffle.MMShuffle2323 },
{ SimdUtils.Shuffle.MMShuffle(2, 3, 3, 0), SimdUtils.Shuffle.MMShuffle2330 },
{ SimdUtils.Shuffle.MMShuffle(2, 3, 3, 1), SimdUtils.Shuffle.MMShuffle2331 },
{ SimdUtils.Shuffle.MMShuffle(2, 3, 3, 2), SimdUtils.Shuffle.MMShuffle2332 },
{ SimdUtils.Shuffle.MMShuffle(2, 3, 3, 3), SimdUtils.Shuffle.MMShuffle2333 },
{ SimdUtils.Shuffle.MMShuffle(3, 0, 0, 0), SimdUtils.Shuffle.MMShuffle3000 },
{ SimdUtils.Shuffle.MMShuffle(3, 0, 0, 1), SimdUtils.Shuffle.MMShuffle3001 },
{ SimdUtils.Shuffle.MMShuffle(3, 0, 0, 2), SimdUtils.Shuffle.MMShuffle3002 },
{ SimdUtils.Shuffle.MMShuffle(3, 0, 0, 3), SimdUtils.Shuffle.MMShuffle3003 },
{ SimdUtils.Shuffle.MMShuffle(3, 0, 1, 0), SimdUtils.Shuffle.MMShuffle3010 },
{ SimdUtils.Shuffle.MMShuffle(3, 0, 1, 1), SimdUtils.Shuffle.MMShuffle3011 },
{ SimdUtils.Shuffle.MMShuffle(3, 0, 1, 2), SimdUtils.Shuffle.MMShuffle3012 },
{ SimdUtils.Shuffle.MMShuffle(3, 0, 1, 3), SimdUtils.Shuffle.MMShuffle3013 },
{ SimdUtils.Shuffle.MMShuffle(3, 0, 2, 0), SimdUtils.Shuffle.MMShuffle3020 },
{ SimdUtils.Shuffle.MMShuffle(3, 0, 2, 1), SimdUtils.Shuffle.MMShuffle3021 },
{ SimdUtils.Shuffle.MMShuffle(3, 0, 2, 2), SimdUtils.Shuffle.MMShuffle3022 },
{ SimdUtils.Shuffle.MMShuffle(3, 0, 2, 3), SimdUtils.Shuffle.MMShuffle3023 },
{ SimdUtils.Shuffle.MMShuffle(3, 0, 3, 0), SimdUtils.Shuffle.MMShuffle3030 },
{ SimdUtils.Shuffle.MMShuffle(3, 0, 3, 1), SimdUtils.Shuffle.MMShuffle3031 },
{ SimdUtils.Shuffle.MMShuffle(3, 0, 3, 2), SimdUtils.Shuffle.MMShuffle3032 },
{ SimdUtils.Shuffle.MMShuffle(3, 0, 3, 3), SimdUtils.Shuffle.MMShuffle3033 },
{ SimdUtils.Shuffle.MMShuffle(3, 1, 0, 0), SimdUtils.Shuffle.MMShuffle3100 },
{ SimdUtils.Shuffle.MMShuffle(3, 1, 0, 1), SimdUtils.Shuffle.MMShuffle3101 },
{ SimdUtils.Shuffle.MMShuffle(3, 1, 0, 2), SimdUtils.Shuffle.MMShuffle3102 },
{ SimdUtils.Shuffle.MMShuffle(3, 1, 0, 3), SimdUtils.Shuffle.MMShuffle3103 },
{ SimdUtils.Shuffle.MMShuffle(3, 1, 1, 0), SimdUtils.Shuffle.MMShuffle3110 },
{ SimdUtils.Shuffle.MMShuffle(3, 1, 1, 1), SimdUtils.Shuffle.MMShuffle3111 },
{ SimdUtils.Shuffle.MMShuffle(3, 1, 1, 2), SimdUtils.Shuffle.MMShuffle3112 },
{ SimdUtils.Shuffle.MMShuffle(3, 1, 1, 3), SimdUtils.Shuffle.MMShuffle3113 },
{ SimdUtils.Shuffle.MMShuffle(3, 1, 2, 0), SimdUtils.Shuffle.MMShuffle3120 },
{ SimdUtils.Shuffle.MMShuffle(3, 1, 2, 1), SimdUtils.Shuffle.MMShuffle3121 },
{ SimdUtils.Shuffle.MMShuffle(3, 1, 2, 2), SimdUtils.Shuffle.MMShuffle3122 },
{ SimdUtils.Shuffle.MMShuffle(3, 1, 2, 3), SimdUtils.Shuffle.MMShuffle3123 },
{ SimdUtils.Shuffle.MMShuffle(3, 1, 3, 0), SimdUtils.Shuffle.MMShuffle3130 },
{ SimdUtils.Shuffle.MMShuffle(3, 1, 3, 1), SimdUtils.Shuffle.MMShuffle3131 },
{ SimdUtils.Shuffle.MMShuffle(3, 1, 3, 2), SimdUtils.Shuffle.MMShuffle3132 },
{ SimdUtils.Shuffle.MMShuffle(3, 1, 3, 3), SimdUtils.Shuffle.MMShuffle3133 },
{ SimdUtils.Shuffle.MMShuffle(3, 2, 0, 0), SimdUtils.Shuffle.MMShuffle3200 },
{ SimdUtils.Shuffle.MMShuffle(3, 2, 0, 1), SimdUtils.Shuffle.MMShuffle3201 },
{ SimdUtils.Shuffle.MMShuffle(3, 2, 0, 2), SimdUtils.Shuffle.MMShuffle3202 },
{ SimdUtils.Shuffle.MMShuffle(3, 2, 0, 3), SimdUtils.Shuffle.MMShuffle3203 },
{ SimdUtils.Shuffle.MMShuffle(3, 2, 1, 0), SimdUtils.Shuffle.MMShuffle3210 },
{ SimdUtils.Shuffle.MMShuffle(3, 2, 1, 1), SimdUtils.Shuffle.MMShuffle3211 },
{ SimdUtils.Shuffle.MMShuffle(3, 2, 1, 2), SimdUtils.Shuffle.MMShuffle3212 },
{ SimdUtils.Shuffle.MMShuffle(3, 2, 1, 3), SimdUtils.Shuffle.MMShuffle3213 },
{ SimdUtils.Shuffle.MMShuffle(3, 2, 2, 0), SimdUtils.Shuffle.MMShuffle3220 },
{ SimdUtils.Shuffle.MMShuffle(3, 2, 2, 1), SimdUtils.Shuffle.MMShuffle3221 },
{ SimdUtils.Shuffle.MMShuffle(3, 2, 2, 2), SimdUtils.Shuffle.MMShuffle3222 },
{ SimdUtils.Shuffle.MMShuffle(3, 2, 2, 3), SimdUtils.Shuffle.MMShuffle3223 },
{ SimdUtils.Shuffle.MMShuffle(3, 2, 3, 0), SimdUtils.Shuffle.MMShuffle3230 },
{ SimdUtils.Shuffle.MMShuffle(3, 2, 3, 1), SimdUtils.Shuffle.MMShuffle3231 },
{ SimdUtils.Shuffle.MMShuffle(3, 2, 3, 2), SimdUtils.Shuffle.MMShuffle3232 },
{ SimdUtils.Shuffle.MMShuffle(3, 2, 3, 3), SimdUtils.Shuffle.MMShuffle3233 },
{ SimdUtils.Shuffle.MMShuffle(3, 3, 0, 0), SimdUtils.Shuffle.MMShuffle3300 },
{ SimdUtils.Shuffle.MMShuffle(3, 3, 0, 1), SimdUtils.Shuffle.MMShuffle3301 },
{ SimdUtils.Shuffle.MMShuffle(3, 3, 0, 2), SimdUtils.Shuffle.MMShuffle3302 },
{ SimdUtils.Shuffle.MMShuffle(3, 3, 0, 3), SimdUtils.Shuffle.MMShuffle3303 },
{ SimdUtils.Shuffle.MMShuffle(3, 3, 1, 0), SimdUtils.Shuffle.MMShuffle3310 },
{ SimdUtils.Shuffle.MMShuffle(3, 3, 1, 1), SimdUtils.Shuffle.MMShuffle3311 },
{ SimdUtils.Shuffle.MMShuffle(3, 3, 1, 2), SimdUtils.Shuffle.MMShuffle3312 },
{ SimdUtils.Shuffle.MMShuffle(3, 3, 1, 3), SimdUtils.Shuffle.MMShuffle3313 },
{ SimdUtils.Shuffle.MMShuffle(3, 3, 2, 0), SimdUtils.Shuffle.MMShuffle3320 },
{ SimdUtils.Shuffle.MMShuffle(3, 3, 2, 1), SimdUtils.Shuffle.MMShuffle3321 },
{ SimdUtils.Shuffle.MMShuffle(3, 3, 2, 2), SimdUtils.Shuffle.MMShuffle3322 },
{ SimdUtils.Shuffle.MMShuffle(3, 3, 2, 3), SimdUtils.Shuffle.MMShuffle3323 },
{ SimdUtils.Shuffle.MMShuffle(3, 3, 3, 0), SimdUtils.Shuffle.MMShuffle3330 },
{ SimdUtils.Shuffle.MMShuffle(3, 3, 3, 1), SimdUtils.Shuffle.MMShuffle3331 },
{ SimdUtils.Shuffle.MMShuffle(3, 3, 3, 2), SimdUtils.Shuffle.MMShuffle3332 },
{ SimdUtils.Shuffle.MMShuffle(3, 3, 3, 3), SimdUtils.Shuffle.MMShuffle3333 }
};
[Theory]
[MemberData(nameof(MMShuffleData))]
public void MMShuffleConstantsAreCorrect(byte expected, byte actual)
=> Assert.Equal(expected, actual);
[Theory]
[MemberData(nameof(ArraySizesDivisibleBy4))]
public void BulkShuffleFloat4Channel(int count)
@ -226,7 +491,7 @@ public partial class SimdUtilsTests
float[] expected = new float[count];
SimdUtils.Shuffle.InverseMmShuffle(
SimdUtils.Shuffle.InverseMMShuffle(
control,
out int p3,
out int p2,
@ -257,7 +522,7 @@ public partial class SimdUtilsTests
byte[] expected = new byte[count];
SimdUtils.Shuffle.InverseMmShuffle(
SimdUtils.Shuffle.InverseMMShuffle(
control,
out int p3,
out int p2,
@ -288,7 +553,7 @@ public partial class SimdUtilsTests
byte[] expected = new byte[count];
SimdUtils.Shuffle.InverseMmShuffle(
SimdUtils.Shuffle.InverseMMShuffle(
control,
out int _,
out int p2,
@ -319,7 +584,7 @@ public partial class SimdUtilsTests
byte[] expected = new byte[result.Length];
SimdUtils.Shuffle.InverseMmShuffle(
SimdUtils.Shuffle.InverseMMShuffle(
control,
out int p3,
out int p2,
@ -370,7 +635,7 @@ public partial class SimdUtilsTests
byte[] expected = new byte[result.Length];
SimdUtils.Shuffle.InverseMmShuffle(
SimdUtils.Shuffle.InverseMMShuffle(
control,
out int _,
out int p2,

Loading…
Cancel
Save