From 9e7cf1611b8d5f50080715cf16f16b4773b42462 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Tue, 21 Feb 2023 21:41:20 +1000 Subject: [PATCH 1/3] Use MMShuffle consts everywhere and refactor explicit shuffle components to use them --- .../Helpers/Shuffle/IComponentShuffle.cs | 54 ++-- .../Common/Helpers/Shuffle/IPad3Shuffle4.cs | 17 +- .../Common/Helpers/Shuffle/IShuffle3.cs | 7 +- .../Common/Helpers/Shuffle/IShuffle4Slice3.cs | 15 +- .../Common/Helpers/SimdUtils.Shuffle.cs | 271 +++++++++++++++++- .../Formats/Webp/Lossless/LosslessUtils.cs | 58 ++-- .../Formats/Webp/Lossy/Vp8Encoding.cs | 9 +- .../Color/Bulk/ShuffleFloat4Channel.cs | 5 +- .../Common/SimdUtilsTests.Shuffle.cs | 50 ++-- 9 files changed, 369 insertions(+), 117 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs index 45d6e6d13..b93202814 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs @@ -5,6 +5,7 @@ using System.Buffers.Binary; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using static SixLabors.ImageSharp.SimdUtils; // The JIT can detect and optimize rotation idioms ROTL (Rotate Left) // and ROTR (Rotate Right) emitting efficient CPU instructions: @@ -18,9 +19,12 @@ namespace SixLabors.ImageSharp; internal interface IComponentShuffle { /// - /// Gets the shuffle control. + /// Shuffles then slices 8-bit integers within 128-bit lanes in + /// using the control and store the results in . /// - byte Control { get; } + /// The source span of bytes. + /// The destination span of bytes. + void ShuffleReduce(ref ReadOnlySpan source, ref Span dest); /// /// Shuffle 8-bit integers within 128-bit lanes in @@ -58,11 +62,15 @@ internal readonly struct DefaultShuffle4 : IShuffle4 this.p2 = p2; this.p1 = p1; this.p0 = p0; - this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0); + this.Control = Shuffle.MmShuffle(p3, p2, p1, p0); } public byte Control { get; } + [MethodImpl(InliningOptions.ShortMethod)] + public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) + => HwIntrinsics.Shuffle4Reduce(ref source, ref dest, this.Control); + [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) { @@ -86,11 +94,9 @@ internal readonly struct DefaultShuffle4 : IShuffle4 internal readonly struct WXYZShuffle4 : IShuffle4 { - public byte Control - { - [MethodImpl(InliningOptions.ShortMethod)] - get => SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3); - } + [MethodImpl(InliningOptions.ShortMethod)] + public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) + => HwIntrinsics.Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle2103); [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) @@ -112,11 +118,9 @@ internal readonly struct WXYZShuffle4 : IShuffle4 internal readonly struct WZYXShuffle4 : IShuffle4 { - public byte Control - { - [MethodImpl(InliningOptions.ShortMethod)] - get => SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3); - } + [MethodImpl(InliningOptions.ShortMethod)] + public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) + => HwIntrinsics.Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle0123); [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) @@ -138,11 +142,9 @@ internal readonly struct WZYXShuffle4 : IShuffle4 internal readonly struct YZWXShuffle4 : IShuffle4 { - public byte Control - { - [MethodImpl(InliningOptions.ShortMethod)] - get => SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1); - } + [MethodImpl(InliningOptions.ShortMethod)] + public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) + => HwIntrinsics.Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle0321); [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) @@ -164,11 +166,9 @@ internal readonly struct YZWXShuffle4 : IShuffle4 internal readonly struct ZYXWShuffle4 : IShuffle4 { - public byte Control - { - [MethodImpl(InliningOptions.ShortMethod)] - get => SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2); - } + [MethodImpl(InliningOptions.ShortMethod)] + public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) + => HwIntrinsics.Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle3012); [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) @@ -197,11 +197,9 @@ internal readonly struct ZYXWShuffle4 : IShuffle4 internal readonly struct XWZYShuffle4 : IShuffle4 { - public byte Control - { - [MethodImpl(InliningOptions.ShortMethod)] - get => SimdUtils.Shuffle.MmShuffle(1, 2, 3, 0); - } + [MethodImpl(InliningOptions.ShortMethod)] + public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) + => HwIntrinsics.Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle1230); [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs index 76cffd82b..7f99b3a08 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs @@ -3,6 +3,7 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using static SixLabors.ImageSharp.SimdUtils; namespace SixLabors.ImageSharp; @@ -29,11 +30,15 @@ internal readonly struct DefaultPad3Shuffle4 : IPad3Shuffle4 this.p2 = p2; this.p1 = p1; this.p0 = p0; - this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0); + this.Control = Shuffle.MmShuffle(p3, p2, p1, p0); } public byte Control { get; } + [MethodImpl(InliningOptions.ShortMethod)] + public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) + => HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref dest, this.Control); + [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) { @@ -51,7 +56,7 @@ internal readonly struct DefaultPad3Shuffle4 : IPad3Shuffle4 for (int i = 0, j = 0; i < source.Length; i += 3, j += 4) { - ref var s = ref Unsafe.Add(ref sBase, i); + ref byte s = ref Unsafe.Add(ref sBase, i); tu = Unsafe.As(ref s) | 0xFF000000; Unsafe.Add(ref dBase, j) = Unsafe.Add(ref t, p0); @@ -64,11 +69,9 @@ internal readonly struct DefaultPad3Shuffle4 : IPad3Shuffle4 internal readonly struct XYZWPad3Shuffle4 : IPad3Shuffle4 { - public byte Control - { - [MethodImpl(InliningOptions.ShortMethod)] - get => SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0); - } + [MethodImpl(InliningOptions.ShortMethod)] + public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) + => HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref dest, Shuffle.MMShuffle3210); [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs index 9bee9d15e..edbd2cd5e 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs @@ -3,6 +3,7 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using static SixLabors.ImageSharp.SimdUtils; namespace SixLabors.ImageSharp; @@ -26,11 +27,15 @@ internal readonly struct DefaultShuffle3 : IShuffle3 this.p2 = p2; this.p1 = p1; this.p0 = p0; - this.Control = SimdUtils.Shuffle.MmShuffle(3, p2, p1, p0); + this.Control = Shuffle.MmShuffle(3, p2, p1, p0); } public byte Control { get; } + [MethodImpl(InliningOptions.ShortMethod)] + public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) + => HwIntrinsics.Shuffle3Reduce(ref source, ref dest, this.Control); + [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) { diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs index 90b77b568..2179a085b 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs @@ -3,6 +3,7 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using static SixLabors.ImageSharp.SimdUtils; namespace SixLabors.ImageSharp; @@ -27,11 +28,15 @@ internal readonly struct DefaultShuffle4Slice3 : IShuffle4Slice3 this.p2 = p2; this.p1 = p1; this.p0 = p0; - this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0); + this.Control = Shuffle.MmShuffle(p3, p2, p1, p0); } public byte Control { get; } + [MethodImpl(InliningOptions.ShortMethod)] + public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) + => HwIntrinsics.Shuffle4Slice3Reduce(ref source, ref dest, this.Control); + [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) { @@ -53,11 +58,9 @@ internal readonly struct DefaultShuffle4Slice3 : IShuffle4Slice3 internal readonly struct XYZWShuffle4Slice3 : IShuffle4Slice3 { - public byte Control - { - [MethodImpl(InliningOptions.ShortMethod)] - get => SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0); - } + [MethodImpl(InliningOptions.ShortMethod)] + public void ShuffleReduce(ref ReadOnlySpan source, ref Span dest) + => HwIntrinsics.Shuffle4Slice3Reduce(ref source, ref dest, Shuffle.MMShuffle3210); [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index b91dc2fad..4ff5f3d5b 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -37,6 +37,7 @@ internal static partial class SimdUtils /// Shuffle 8-bit integers within 128-bit lanes in /// using the control and store the results in . /// + /// The type of shuffle struct. /// The source span of bytes. /// The destination span of bytes. /// The type of shuffle to perform. @@ -49,7 +50,7 @@ internal static partial class SimdUtils { VerifyShuffle4SpanInput(source, dest); - HwIntrinsics.Shuffle4Reduce(ref source, ref dest, shuffle.Control); + shuffle.ShuffleReduce(ref source, ref dest); // Deal with the remainder: if (source.Length > 0) @@ -62,6 +63,7 @@ internal static partial class SimdUtils /// Shuffle 8-bit integer triplets within 128-bit lanes in /// using the control and store the results in . /// + /// The type of shuffle struct. /// The source span of bytes. /// The destination span of bytes. /// The type of shuffle to perform. @@ -75,7 +77,7 @@ internal static partial class SimdUtils // Source length should be smaller than dest length, and divisible by 3. VerifyShuffle3SpanInput(source, dest); - HwIntrinsics.Shuffle3Reduce(ref source, ref dest, shuffle.Control); + shuffle.ShuffleReduce(ref source, ref dest); // Deal with the remainder: if (source.Length > 0) @@ -88,6 +90,7 @@ internal static partial class SimdUtils /// Pads then shuffles 8-bit integers within 128-bit lanes in /// using the control and store the results in . /// + /// The type of shuffle struct. /// The source span of bytes. /// The destination span of bytes. /// The type of shuffle to perform. @@ -100,7 +103,7 @@ internal static partial class SimdUtils { VerifyPad3Shuffle4SpanInput(source, dest); - HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref dest, shuffle.Control); + shuffle.ShuffleReduce(ref source, ref dest); // Deal with the remainder: if (source.Length > 0) @@ -113,6 +116,7 @@ internal static partial class SimdUtils /// Shuffles then slices 8-bit integers within 128-bit lanes in /// using the control and store the results in . /// + /// The type of shuffle struct. /// The source span of bytes. /// The destination span of bytes. /// The type of shuffle to perform. @@ -125,7 +129,7 @@ internal static partial class SimdUtils { VerifyShuffle4Slice3SpanInput(source, dest); - HwIntrinsics.Shuffle4Slice3Reduce(ref source, ref dest, shuffle.Control); + shuffle.ShuffleReduce(ref source, ref dest); // Deal with the remainder: if (source.Length > 0) @@ -153,7 +157,7 @@ internal static partial class SimdUtils } [Conditional("DEBUG")] - private static void VerifyShuffle4SpanInput(ReadOnlySpan source, Span dest) + internal static void VerifyShuffle4SpanInput(ReadOnlySpan source, Span dest) where T : struct { DebugGuard.IsTrue( @@ -222,6 +226,263 @@ internal static partial class SimdUtils public static class Shuffle { + public const byte MMShuffle0000 = 0b00000000; + public const byte MMShuffle0001 = 0b00000001; + public const byte MMShuffle0002 = 0b00000010; + public const byte MMShuffle0003 = 0b00000011; + public const byte MMShuffle0010 = 0b00000100; + public const byte MMShuffle0011 = 0b00000101; + public const byte MMShuffle0012 = 0b00000110; + public const byte MMShuffle0013 = 0b00000111; + public const byte MMShuffle0020 = 0b00001000; + public const byte MMShuffle0021 = 0b00001001; + public const byte MMShuffle0022 = 0b00001010; + public const byte MMShuffle0023 = 0b00001011; + public const byte MMShuffle0030 = 0b00001100; + public const byte MMShuffle0031 = 0b00001101; + public const byte MMShuffle0032 = 0b00001110; + public const byte MMShuffle0033 = 0b00001111; + public const byte MMShuffle0100 = 0b00010000; + public const byte MMShuffle0101 = 0b00010001; + public const byte MMShuffle0102 = 0b00010010; + public const byte MMShuffle0103 = 0b00010011; + public const byte MMShuffle0110 = 0b00010100; + public const byte MMShuffle0111 = 0b00010101; + public const byte MMShuffle0112 = 0b00010110; + public const byte MMShuffle0113 = 0b00010111; + public const byte MMShuffle0120 = 0b00011000; + public const byte MMShuffle0121 = 0b00011001; + public const byte MMShuffle0122 = 0b00011010; + public const byte MMShuffle0123 = 0b00011011; + public const byte MMShuffle0130 = 0b00011100; + public const byte MMShuffle0131 = 0b00011101; + public const byte MMShuffle0132 = 0b00011110; + public const byte MMShuffle0133 = 0b00011111; + public const byte MMShuffle0200 = 0b00100000; + public const byte MMShuffle0201 = 0b00100001; + public const byte MMShuffle0202 = 0b00100010; + public const byte MMShuffle0203 = 0b00100011; + public const byte MMShuffle0210 = 0b00100100; + public const byte MMShuffle0211 = 0b00100101; + public const byte MMShuffle0212 = 0b00100110; + public const byte MMShuffle0213 = 0b00100111; + public const byte MMShuffle0220 = 0b00101000; + public const byte MMShuffle0221 = 0b00101001; + public const byte MMShuffle0222 = 0b00101010; + public const byte MMShuffle0223 = 0b00101011; + public const byte MMShuffle0230 = 0b00101100; + public const byte MMShuffle0231 = 0b00101101; + public const byte MMShuffle0232 = 0b00101110; + public const byte MMShuffle0233 = 0b00101111; + public const byte MMShuffle0300 = 0b00110000; + public const byte MMShuffle0301 = 0b00110001; + public const byte MMShuffle0302 = 0b00110010; + public const byte MMShuffle0303 = 0b00110011; + public const byte MMShuffle0310 = 0b00110100; + public const byte MMShuffle0311 = 0b00110101; + public const byte MMShuffle0312 = 0b00110110; + public const byte MMShuffle0313 = 0b00110111; + public const byte MMShuffle0320 = 0b00111000; + public const byte MMShuffle0321 = 0b00111001; + public const byte MMShuffle0322 = 0b00111010; + public const byte MMShuffle0323 = 0b00111011; + public const byte MMShuffle0330 = 0b00111100; + public const byte MMShuffle0331 = 0b00111101; + public const byte MMShuffle0332 = 0b00111110; + public const byte MMShuffle0333 = 0b00111111; + public const byte MMShuffle1000 = 0b01000000; + public const byte MMShuffle1001 = 0b01000001; + public const byte MMShuffle1002 = 0b01000010; + public const byte MMShuffle1003 = 0b01000011; + public const byte MMShuffle1010 = 0b01000100; + public const byte MMShuffle1011 = 0b01000101; + public const byte MMShuffle1012 = 0b01000110; + public const byte MMShuffle1013 = 0b01000111; + public const byte MMShuffle1020 = 0b01001000; + public const byte MMShuffle1021 = 0b01001001; + public const byte MMShuffle1022 = 0b01001010; + public const byte MMShuffle1023 = 0b01001011; + public const byte MMShuffle1030 = 0b01001100; + public const byte MMShuffle1031 = 0b01001101; + public const byte MMShuffle1032 = 0b01001110; + public const byte MMShuffle1033 = 0b01001111; + public const byte MMShuffle1100 = 0b01010000; + public const byte MMShuffle1101 = 0b01010001; + public const byte MMShuffle1102 = 0b01010010; + public const byte MMShuffle1103 = 0b01010011; + public const byte MMShuffle1110 = 0b01010100; + public const byte MMShuffle1111 = 0b01010101; + public const byte MMShuffle1112 = 0b01010110; + public const byte MMShuffle1113 = 0b01010111; + public const byte MMShuffle1120 = 0b01011000; + public const byte MMShuffle1121 = 0b01011001; + public const byte MMShuffle1122 = 0b01011010; + public const byte MMShuffle1123 = 0b01011011; + public const byte MMShuffle1130 = 0b01011100; + public const byte MMShuffle1131 = 0b01011101; + public const byte MMShuffle1132 = 0b01011110; + public const byte MMShuffle1133 = 0b01011111; + public const byte MMShuffle1200 = 0b01100000; + public const byte MMShuffle1201 = 0b01100001; + public const byte MMShuffle1202 = 0b01100010; + public const byte MMShuffle1203 = 0b01100011; + public const byte MMShuffle1210 = 0b01100100; + public const byte MMShuffle1211 = 0b01100101; + public const byte MMShuffle1212 = 0b01100110; + public const byte MMShuffle1213 = 0b01100111; + public const byte MMShuffle1220 = 0b01101000; + public const byte MMShuffle1221 = 0b01101001; + public const byte MMShuffle1222 = 0b01101010; + public const byte MMShuffle1223 = 0b01101011; + public const byte MMShuffle1230 = 0b01101100; + public const byte MMShuffle1231 = 0b01101101; + public const byte MMShuffle1232 = 0b01101110; + public const byte MMShuffle1233 = 0b01101111; + public const byte MMShuffle1300 = 0b01110000; + public const byte MMShuffle1301 = 0b01110001; + public const byte MMShuffle1302 = 0b01110010; + public const byte MMShuffle1303 = 0b01110011; + public const byte MMShuffle1310 = 0b01110100; + public const byte MMShuffle1311 = 0b01110101; + public const byte MMShuffle1312 = 0b01110110; + public const byte MMShuffle1313 = 0b01110111; + public const byte MMShuffle1320 = 0b01111000; + public const byte MMShuffle1321 = 0b01111001; + public const byte MMShuffle1322 = 0b01111010; + public const byte MMShuffle1323 = 0b01111011; + public const byte MMShuffle1330 = 0b01111100; + public const byte MMShuffle1331 = 0b01111101; + public const byte MMShuffle1332 = 0b01111110; + public const byte MMShuffle1333 = 0b01111111; + public const byte MMShuffle2000 = 0b10000000; + public const byte MMShuffle2001 = 0b10000001; + public const byte MMShuffle2002 = 0b10000010; + public const byte MMShuffle2003 = 0b10000011; + public const byte MMShuffle2010 = 0b10000100; + public const byte MMShuffle2011 = 0b10000101; + public const byte MMShuffle2012 = 0b10000110; + public const byte MMShuffle2013 = 0b10000111; + public const byte MMShuffle2020 = 0b10001000; + public const byte MMShuffle2021 = 0b10001001; + public const byte MMShuffle2022 = 0b10001010; + public const byte MMShuffle2023 = 0b10001011; + public const byte MMShuffle2030 = 0b10001100; + public const byte MMShuffle2031 = 0b10001101; + public const byte MMShuffle2032 = 0b10001110; + public const byte MMShuffle2033 = 0b10001111; + public const byte MMShuffle2100 = 0b10010000; + public const byte MMShuffle2101 = 0b10010001; + public const byte MMShuffle2102 = 0b10010010; + public const byte MMShuffle2103 = 0b10010011; + public const byte MMShuffle2110 = 0b10010100; + public const byte MMShuffle2111 = 0b10010101; + public const byte MMShuffle2112 = 0b10010110; + public const byte MMShuffle2113 = 0b10010111; + public const byte MMShuffle2120 = 0b10011000; + public const byte MMShuffle2121 = 0b10011001; + public const byte MMShuffle2122 = 0b10011010; + public const byte MMShuffle2123 = 0b10011011; + public const byte MMShuffle2130 = 0b10011100; + public const byte MMShuffle2131 = 0b10011101; + public const byte MMShuffle2132 = 0b10011110; + public const byte MMShuffle2133 = 0b10011111; + public const byte MMShuffle2200 = 0b10100000; + public const byte MMShuffle2201 = 0b10100001; + public const byte MMShuffle2202 = 0b10100010; + public const byte MMShuffle2203 = 0b10100011; + public const byte MMShuffle2210 = 0b10100100; + public const byte MMShuffle2211 = 0b10100101; + public const byte MMShuffle2212 = 0b10100110; + public const byte MMShuffle2213 = 0b10100111; + public const byte MMShuffle2220 = 0b10101000; + public const byte MMShuffle2221 = 0b10101001; + public const byte MMShuffle2222 = 0b10101010; + public const byte MMShuffle2223 = 0b10101011; + public const byte MMShuffle2230 = 0b10101100; + public const byte MMShuffle2231 = 0b10101101; + public const byte MMShuffle2232 = 0b10101110; + public const byte MMShuffle2233 = 0b10101111; + public const byte MMShuffle2300 = 0b10110000; + public const byte MMShuffle2301 = 0b10110001; + public const byte MMShuffle2302 = 0b10110010; + public const byte MMShuffle2303 = 0b10110011; + public const byte MMShuffle2310 = 0b10110100; + public const byte MMShuffle2311 = 0b10110101; + public const byte MMShuffle2312 = 0b10110110; + public const byte MMShuffle2313 = 0b10110111; + public const byte MMShuffle2320 = 0b10111000; + public const byte MMShuffle2321 = 0b10111001; + public const byte MMShuffle2322 = 0b10111010; + public const byte MMShuffle2323 = 0b10111011; + public const byte MMShuffle2330 = 0b10111100; + public const byte MMShuffle2331 = 0b10111101; + public const byte MMShuffle2332 = 0b10111110; + public const byte MMShuffle2333 = 0b10111111; + public const byte MMShuffle3000 = 0b11000000; + public const byte MMShuffle3001 = 0b11000001; + public const byte MMShuffle3002 = 0b11000010; + public const byte MMShuffle3003 = 0b11000011; + public const byte MMShuffle3010 = 0b11000100; + public const byte MMShuffle3011 = 0b11000101; + public const byte MMShuffle3012 = 0b11000110; + public const byte MMShuffle3013 = 0b11000111; + public const byte MMShuffle3020 = 0b11001000; + public const byte MMShuffle3021 = 0b11001001; + public const byte MMShuffle3022 = 0b11001010; + public const byte MMShuffle3023 = 0b11001011; + public const byte MMShuffle3030 = 0b11001100; + public const byte MMShuffle3031 = 0b11001101; + public const byte MMShuffle3032 = 0b11001110; + public const byte MMShuffle3033 = 0b11001111; + public const byte MMShuffle3100 = 0b11010000; + public const byte MMShuffle3101 = 0b11010001; + public const byte MMShuffle3102 = 0b11010010; + public const byte MMShuffle3103 = 0b11010011; + public const byte MMShuffle3110 = 0b11010100; + public const byte MMShuffle3111 = 0b11010101; + public const byte MMShuffle3112 = 0b11010110; + public const byte MMShuffle3113 = 0b11010111; + public const byte MMShuffle3120 = 0b11011000; + public const byte MMShuffle3121 = 0b11011001; + public const byte MMShuffle3122 = 0b11011010; + public const byte MMShuffle3123 = 0b11011011; + public const byte MMShuffle3130 = 0b11011100; + public const byte MMShuffle3131 = 0b11011101; + public const byte MMShuffle3132 = 0b11011110; + public const byte MMShuffle3133 = 0b11011111; + public const byte MMShuffle3200 = 0b11100000; + public const byte MMShuffle3201 = 0b11100001; + public const byte MMShuffle3202 = 0b11100010; + public const byte MMShuffle3203 = 0b11100011; + public const byte MMShuffle3210 = 0b11100100; + public const byte MMShuffle3211 = 0b11100101; + public const byte MMShuffle3212 = 0b11100110; + public const byte MMShuffle3213 = 0b11100111; + public const byte MMShuffle3220 = 0b11101000; + public const byte MMShuffle3221 = 0b11101001; + public const byte MMShuffle3222 = 0b11101010; + public const byte MMShuffle3223 = 0b11101011; + public const byte MMShuffle3230 = 0b11101100; + public const byte MMShuffle3231 = 0b11101101; + public const byte MMShuffle3232 = 0b11101110; + public const byte MMShuffle3233 = 0b11101111; + public const byte MMShuffle3300 = 0b11110000; + public const byte MMShuffle3301 = 0b11110001; + public const byte MMShuffle3302 = 0b11110010; + public const byte MMShuffle3303 = 0b11110011; + public const byte MMShuffle3310 = 0b11110100; + public const byte MMShuffle3311 = 0b11110101; + public const byte MMShuffle3312 = 0b11110110; + public const byte MMShuffle3313 = 0b11110111; + public const byte MMShuffle3320 = 0b11111000; + public const byte MMShuffle3321 = 0b11111001; + public const byte MMShuffle3322 = 0b11111010; + public const byte MMShuffle3323 = 0b11111011; + public const byte MMShuffle3330 = 0b11111100; + public const byte MMShuffle3331 = 0b11111101; + public const byte MMShuffle3332 = 0b11111110; + public const byte MMShuffle3333 = 0b11111111; + [MethodImpl(InliningOptions.ShortMethod)] public static byte MmShuffle(byte p3, byte p2, byte p1, byte p0) => (byte)((p3 << 6) | (p2 << 4) | (p1 << 2) | p0); diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index 5d9c20709..ac92ce6a6 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -96,7 +96,7 @@ internal static unsafe class LosslessUtils { if (Avx2.IsSupported) { - var addGreenToBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255); + Vector256 addGreenToBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255); int numPixels = pixelData.Length; nint i; for (i = 0; i <= numPixels - 8; i += 8) @@ -115,7 +115,7 @@ internal static unsafe class LosslessUtils } else if (Ssse3.IsSupported) { - var addGreenToBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255); + Vector128 addGreenToBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255); int numPixels = pixelData.Length; nint i; for (i = 0; i <= numPixels - 4; i += 4) @@ -138,13 +138,11 @@ internal static unsafe class LosslessUtils nint i; for (i = 0; i <= numPixels - 4; i += 4) { - const byte mmShuffle_2200 = 0b_10_10_00_00; - ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i); Vector128 input = Unsafe.As>(ref pos).AsByte(); Vector128 a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g - Vector128 b = Sse2.ShuffleLow(a, mmShuffle_2200); - Vector128 c = Sse2.ShuffleHigh(b, mmShuffle_2200); // 0g0g + Vector128 b = Sse2.ShuffleLow(a, SimdUtils.Shuffle.MMShuffle2200); + Vector128 c = Sse2.ShuffleHigh(b, SimdUtils.Shuffle.MMShuffle2200); // 0g0g Vector128 output = Sse2.Add(input.AsByte(), c.AsByte()); Unsafe.As>(ref pos) = output.AsUInt32(); } @@ -178,7 +176,7 @@ internal static unsafe class LosslessUtils { if (Avx2.IsSupported) { - var subtractGreenFromBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255); + Vector256 subtractGreenFromBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255); int numPixels = pixelData.Length; nint i; for (i = 0; i <= numPixels - 8; i += 8) @@ -197,7 +195,7 @@ internal static unsafe class LosslessUtils } else if (Ssse3.IsSupported) { - var subtractGreenFromBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255); + Vector128 subtractGreenFromBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255); int numPixels = pixelData.Length; nint i; for (i = 0; i <= numPixels - 4; i += 4) @@ -220,13 +218,11 @@ internal static unsafe class LosslessUtils nint i; for (i = 0; i <= numPixels - 4; i += 4) { - const byte mmShuffle_2200 = 0b_10_10_00_00; - ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i); Vector128 input = Unsafe.As>(ref pos).AsByte(); Vector128 a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g - Vector128 b = Sse2.ShuffleLow(a, mmShuffle_2200); - Vector128 c = Sse2.ShuffleHigh(b, mmShuffle_2200); // 0g0g + Vector128 b = Sse2.ShuffleLow(a, SimdUtils.Shuffle.MMShuffle2200); + Vector128 c = Sse2.ShuffleHigh(b, SimdUtils.Shuffle.MMShuffle2200); // 0g0g Vector128 output = Sse2.Subtract(input.AsByte(), c.AsByte()); Unsafe.As>(ref pos) = output.AsUInt32(); } @@ -334,7 +330,7 @@ internal static unsafe class LosslessUtils while (y < yEnd) { int predRowIdx = predRowIdxStart; - var m = default(Vp8LMultipliers); + Vp8LMultipliers m = default; int srcSafeEnd = pixelPos + safeWidth; int srcEnd = pixelPos + width; while (pixelPos < srcSafeEnd) @@ -371,21 +367,19 @@ internal static unsafe class LosslessUtils { if (Avx2.IsSupported && numPixels >= 8) { - var transformColorAlphaGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); - var transformColorRedBlueMask256 = Vector256.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); + Vector256 transformColorAlphaGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + Vector256 transformColorRedBlueMask256 = Vector256.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); Vector256 multsrb = MkCst32(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); Vector256 multsb2 = MkCst32(Cst5b(m.RedToBlue), 0); nint idx; for (idx = 0; idx <= numPixels - 8; idx += 8) { - const byte mmShuffle_2200 = 0b_10_10_00_00; - ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx); Vector256 input = Unsafe.As>(ref pos); Vector256 a = Avx2.And(input.AsByte(), transformColorAlphaGreenMask256); - Vector256 b = Avx2.ShuffleLow(a.AsInt16(), mmShuffle_2200); - Vector256 c = Avx2.ShuffleHigh(b.AsInt16(), mmShuffle_2200); + Vector256 b = Avx2.ShuffleLow(a.AsInt16(), SimdUtils.Shuffle.MMShuffle2200); + Vector256 c = Avx2.ShuffleHigh(b.AsInt16(), SimdUtils.Shuffle.MMShuffle2200); Vector256 d = Avx2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); Vector256 e = Avx2.ShiftLeftLogical(input.AsInt16(), 8); Vector256 f = Avx2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16()); @@ -403,20 +397,18 @@ internal static unsafe class LosslessUtils } else if (Sse2.IsSupported) { - var transformColorAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); - var transformColorRedBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); + Vector128 transformColorAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + Vector128 transformColorRedBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); Vector128 multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); Vector128 multsb2 = MkCst16(Cst5b(m.RedToBlue), 0); nint idx; for (idx = 0; idx <= numPixels - 4; idx += 4) { - const byte mmShuffle_2200 = 0b_10_10_00_00; - ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx); Vector128 input = Unsafe.As>(ref pos); Vector128 a = Sse2.And(input.AsByte(), transformColorAlphaGreenMask); - Vector128 b = Sse2.ShuffleLow(a.AsInt16(), mmShuffle_2200); - Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), mmShuffle_2200); + Vector128 b = Sse2.ShuffleLow(a.AsInt16(), SimdUtils.Shuffle.MMShuffle2200); + Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), SimdUtils.Shuffle.MMShuffle2200); Vector128 d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); Vector128 e = Sse2.ShiftLeftLogical(input.AsInt16(), 8); Vector128 f = Sse2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16()); @@ -465,19 +457,17 @@ internal static unsafe class LosslessUtils { if (Avx2.IsSupported && pixelData.Length >= 8) { - var transformColorInverseAlphaGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + Vector256 transformColorInverseAlphaGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); Vector256 multsrb = MkCst32(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); Vector256 multsb2 = MkCst32(Cst5b(m.RedToBlue), 0); nint idx; for (idx = 0; idx <= pixelData.Length - 8; idx += 8) { - const byte mmShuffle_2200 = 0b_10_10_00_00; - ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx); Vector256 input = Unsafe.As>(ref pos); Vector256 a = Avx2.And(input.AsByte(), transformColorInverseAlphaGreenMask256); - Vector256 b = Avx2.ShuffleLow(a.AsInt16(), mmShuffle_2200); - Vector256 c = Avx2.ShuffleHigh(b.AsInt16(), mmShuffle_2200); + Vector256 b = Avx2.ShuffleLow(a.AsInt16(), SimdUtils.Shuffle.MMShuffle2200); + Vector256 c = Avx2.ShuffleHigh(b.AsInt16(), SimdUtils.Shuffle.MMShuffle2200); Vector256 d = Avx2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); Vector256 e = Avx2.Add(input.AsByte(), d.AsByte()); Vector256 f = Avx2.ShiftLeftLogical(e.AsInt16(), 8); @@ -496,20 +486,18 @@ internal static unsafe class LosslessUtils } else if (Sse2.IsSupported) { - var transformColorInverseAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + Vector128 transformColorInverseAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); Vector128 multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); Vector128 multsb2 = MkCst16(Cst5b(m.RedToBlue), 0); nint idx; for (idx = 0; idx <= pixelData.Length - 4; idx += 4) { - const byte mmShuffle_2200 = 0b_10_10_00_00; - ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx); Vector128 input = Unsafe.As>(ref pos); Vector128 a = Sse2.And(input.AsByte(), transformColorInverseAlphaGreenMask); - Vector128 b = Sse2.ShuffleLow(a.AsInt16(), mmShuffle_2200); - Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), mmShuffle_2200); + Vector128 b = Sse2.ShuffleLow(a.AsInt16(), SimdUtils.Shuffle.MMShuffle2200); + Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), SimdUtils.Shuffle.MMShuffle2200); Vector128 d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); Vector128 e = Sse2.Add(input.AsByte(), d.AsByte()); Vector128 f = Sse2.ShiftLeftLogical(e.AsInt16(), 8); diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs index cc263657f..82f00e876 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs @@ -521,9 +521,8 @@ internal static unsafe class Vp8Encoding { // *in01 = 00 01 10 11 02 03 12 13 // *in23 = 20 21 30 31 22 23 32 33 - const byte mmShuffle_2301 = 0b_10_11_00_01; - Vector128 shuf01_p = Sse2.ShuffleHigh(row01, mmShuffle_2301); - Vector128 shuf32_p = Sse2.ShuffleHigh(row23, mmShuffle_2301); + Vector128 shuf01_p = Sse2.ShuffleHigh(row01, SimdUtils.Shuffle.MMShuffle2301); + Vector128 shuf32_p = Sse2.ShuffleHigh(row23, SimdUtils.Shuffle.MMShuffle2301); // 00 01 10 11 03 02 13 12 // 20 21 30 31 23 22 33 32 @@ -555,9 +554,7 @@ internal static unsafe class Vp8Encoding Vector128 shi = Sse2.UnpackHigh(s03, s12); // 2 3 2 3 2 3 Vector128 v23 = Sse2.UnpackHigh(slo.AsInt32(), shi.AsInt32()); out01 = Sse2.UnpackLow(slo.AsInt32(), shi.AsInt32()); - - const byte mmShuffle_1032 = 0b_01_00_11_10; - out32 = Sse2.Shuffle(v23, mmShuffle_1032); + out32 = Sse2.Shuffle(v23, SimdUtils.Shuffle.MMShuffle1032); } public static void FTransformPass2SSE2(Vector128 v01, Vector128 v32, Span output) diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs index bdeb88082..ec23364ca 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs @@ -9,7 +9,6 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk; [Config(typeof(Config.HwIntrinsics_SSE_AVX))] public class ShuffleFloat4Channel { - private static readonly byte Control = default(WXYZShuffle4).Control; private float[] source; private float[] destination; @@ -25,9 +24,7 @@ public class ShuffleFloat4Channel [Benchmark] public void Shuffle4Channel() - { - SimdUtils.Shuffle4(this.source, this.destination, Control); - } + => SimdUtils.Shuffle4(this.source, this.destination, SimdUtils.Shuffle.MMShuffle2103); } // 2020-10-29 diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs index 9727731b6..2c4989f38 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -16,7 +16,7 @@ public partial class SimdUtilsTests // No need to test multiple shuffle controls as the // pipeline is always the same. int size = FeatureTestRunner.Deserialize(serialized); - byte control = default(WZYXShuffle4).Control; + const byte control = SimdUtils.Shuffle.MMShuffle0123; TestShuffleFloat4Channel( size, @@ -45,39 +45,39 @@ public partial class SimdUtilsTests TestShuffleByte4Channel( size, (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wxyz), - wxyz.Control); + SimdUtils.Shuffle.MMShuffle2103); WZYXShuffle4 wzyx = default; TestShuffleByte4Channel( size, (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wzyx), - wzyx.Control); + SimdUtils.Shuffle.MMShuffle0123); YZWXShuffle4 yzwx = default; TestShuffleByte4Channel( size, (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yzwx), - yzwx.Control); + SimdUtils.Shuffle.MMShuffle0321); ZYXWShuffle4 zyxw = default; TestShuffleByte4Channel( size, (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, zyxw), - zyxw.Control); + SimdUtils.Shuffle.MMShuffle3012); - var xwyz = new DefaultShuffle4(2, 1, 3, 0); + DefaultShuffle4 xwyz = new(2, 1, 3, 0); TestShuffleByte4Channel( size, (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, xwyz), xwyz.Control); - var yyyy = new DefaultShuffle4(1, 1, 1, 1); + DefaultShuffle4 yyyy = new(1, 1, 1, 1); TestShuffleByte4Channel( size, (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yyyy), yyyy.Control); - var wwww = new DefaultShuffle4(3, 3, 3, 3); + DefaultShuffle4 wwww = new(3, 3, 3, 3); TestShuffleByte4Channel( size, (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wwww), @@ -101,25 +101,25 @@ public partial class SimdUtilsTests // These cannot be expressed as a theory as you cannot // use RemoteExecutor within generic methods nor pass // IShuffle3 to the generic utils method. - var zyx = new DefaultShuffle3(0, 1, 2); + DefaultShuffle3 zyx = new(0, 1, 2); TestShuffleByte3Channel( size, (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, zyx), zyx.Control); - var xyz = new DefaultShuffle3(2, 1, 0); + DefaultShuffle3 xyz = new(2, 1, 0); TestShuffleByte3Channel( size, (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, xyz), xyz.Control); - var yyy = new DefaultShuffle3(1, 1, 1); + DefaultShuffle3 yyy = new(1, 1, 1); TestShuffleByte3Channel( size, (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, yyy), yyy.Control); - var zzz = new DefaultShuffle3(2, 2, 2); + DefaultShuffle3 zzz = new(2, 2, 2); TestShuffleByte3Channel( size, (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, zzz), @@ -147,21 +147,21 @@ public partial class SimdUtilsTests TestPad3Shuffle4Channel( size, (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, xyzw), - xyzw.Control); + SimdUtils.Shuffle.MMShuffle3210); - var xwyz = new DefaultPad3Shuffle4(2, 1, 3, 0); + DefaultPad3Shuffle4 xwyz = new(2, 1, 3, 0); TestPad3Shuffle4Channel( size, (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, xwyz), xwyz.Control); - var yyyy = new DefaultPad3Shuffle4(1, 1, 1, 1); + DefaultPad3Shuffle4 yyyy = new(1, 1, 1, 1); TestPad3Shuffle4Channel( size, (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, yyyy), yyyy.Control); - var wwww = new DefaultPad3Shuffle4(3, 3, 3, 3); + DefaultPad3Shuffle4 wwww = new(3, 3, 3, 3); TestPad3Shuffle4Channel( size, (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, wwww), @@ -189,21 +189,21 @@ public partial class SimdUtilsTests TestShuffle4Slice3Channel( size, (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, xyzw), - xyzw.Control); + SimdUtils.Shuffle.MMShuffle3210); - var xwyz = new DefaultShuffle4Slice3(2, 1, 3, 0); + DefaultShuffle4Slice3 xwyz = new(2, 1, 3, 0); TestShuffle4Slice3Channel( size, (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, xwyz), xwyz.Control); - var yyyy = new DefaultShuffle4Slice3(1, 1, 1, 1); + DefaultShuffle4Slice3 yyyy = new(1, 1, 1, 1); TestShuffle4Slice3Channel( size, (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, yyyy), yyyy.Control); - var wwww = new DefaultShuffle4Slice3(3, 3, 3, 3); + DefaultShuffle4Slice3 wwww = new(3, 3, 3, 3); TestShuffle4Slice3Channel( size, (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, wwww), @@ -222,7 +222,7 @@ public partial class SimdUtilsTests byte control) { float[] source = new Random(count).GenerateRandomFloatArray(count, 0, 256); - var result = new float[count]; + float[] result = new float[count]; float[] expected = new float[count]; @@ -253,7 +253,7 @@ public partial class SimdUtilsTests { byte[] source = new byte[count]; new Random(count).NextBytes(source); - var result = new byte[count]; + byte[] result = new byte[count]; byte[] expected = new byte[count]; @@ -284,7 +284,7 @@ public partial class SimdUtilsTests { byte[] source = new byte[count]; new Random(count).NextBytes(source); - var result = new byte[count]; + byte[] result = new byte[count]; byte[] expected = new byte[count]; @@ -315,7 +315,7 @@ public partial class SimdUtilsTests byte[] source = new byte[count]; new Random(count).NextBytes(source); - var result = new byte[count * 4 / 3]; + byte[] result = new byte[count * 4 / 3]; byte[] expected = new byte[result.Length]; @@ -366,7 +366,7 @@ public partial class SimdUtilsTests byte[] source = new byte[count]; new Random(count).NextBytes(source); - var result = new byte[count * 3 / 4]; + byte[] result = new byte[count * 3 / 4]; byte[] expected = new byte[result.Length]; From b9e4810418c47012aaed2dfa4c740b252c4c2585 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Tue, 21 Feb 2023 22:00:10 +1000 Subject: [PATCH 2/3] Refactor DefaultShuffleX to use constants. --- .../Helpers/Shuffle/IComponentShuffle.cs | 24 +---- .../Common/Helpers/Shuffle/IPad3Shuffle4.cs | 24 +---- .../Common/Helpers/Shuffle/IShuffle3.cs | 20 +---- .../Common/Helpers/Shuffle/IShuffle4Slice3.cs | 21 +---- .../PixelFormats/Utils/PixelConverter.cs | 88 ++++++++++++++++--- .../Common/SimdUtilsTests.Shuffle.cs | 26 +++--- 6 files changed, 103 insertions(+), 100 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs index b93202814..6ce04ef21 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs @@ -46,23 +46,10 @@ internal interface IShuffle4 : IComponentShuffle internal readonly struct DefaultShuffle4 : IShuffle4 { - private readonly byte p3; - private readonly byte p2; - private readonly byte p1; - private readonly byte p0; - - public DefaultShuffle4(byte p3, byte p2, byte p1, byte p0) + public DefaultShuffle4(byte control) { - DebugGuard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); - DebugGuard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); - DebugGuard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); - DebugGuard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); - - this.p3 = p3; - this.p2 = p2; - this.p1 = p1; - this.p0 = p0; - this.Control = Shuffle.MmShuffle(p3, p2, p1, p0); + DebugGuard.MustBeBetweenOrEqualTo(control, 0, 3, nameof(control)); + this.Control = control; } public byte Control { get; } @@ -77,10 +64,7 @@ internal readonly struct DefaultShuffle4 : IShuffle4 ref byte sBase = ref MemoryMarshal.GetReference(source); ref byte dBase = ref MemoryMarshal.GetReference(dest); - int p3 = this.p3; - int p2 = this.p2; - int p1 = this.p1; - int p0 = this.p0; + Shuffle.InverseMmShuffle(this.Control, out int p3, out int p2, out int p1, out int p0); for (int i = 0; i < source.Length; i += 4) { diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs index 7f99b3a08..69a688e59 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs @@ -14,23 +14,10 @@ internal interface IPad3Shuffle4 : IComponentShuffle internal readonly struct DefaultPad3Shuffle4 : IPad3Shuffle4 { - private readonly byte p3; - private readonly byte p2; - private readonly byte p1; - private readonly byte p0; - - public DefaultPad3Shuffle4(byte p3, byte p2, byte p1, byte p0) + public DefaultPad3Shuffle4(byte control) { - DebugGuard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); - DebugGuard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); - DebugGuard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); - DebugGuard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); - - this.p3 = p3; - this.p2 = p2; - this.p1 = p1; - this.p0 = p0; - this.Control = Shuffle.MmShuffle(p3, p2, p1, p0); + DebugGuard.MustBeBetweenOrEqualTo(control, 0, 3, nameof(control)); + this.Control = control; } public byte Control { get; } @@ -45,10 +32,7 @@ internal readonly struct DefaultPad3Shuffle4 : IPad3Shuffle4 ref byte sBase = ref MemoryMarshal.GetReference(source); ref byte dBase = ref MemoryMarshal.GetReference(dest); - int p3 = this.p3; - int p2 = this.p2; - int p1 = this.p1; - int p0 = this.p0; + Shuffle.InverseMmShuffle(this.Control, out int p3, out int p2, out int p1, out int p0); Span temp = stackalloc byte[4]; ref byte t = ref MemoryMarshal.GetReference(temp); diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs index edbd2cd5e..4e1c2a4e4 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs @@ -14,20 +14,10 @@ internal interface IShuffle3 : IComponentShuffle internal readonly struct DefaultShuffle3 : IShuffle3 { - private readonly byte p2; - private readonly byte p1; - private readonly byte p0; - - public DefaultShuffle3(byte p2, byte p1, byte p0) + public DefaultShuffle3(byte control) { - DebugGuard.MustBeBetweenOrEqualTo(p2, 0, 2, nameof(p2)); - DebugGuard.MustBeBetweenOrEqualTo(p1, 0, 2, nameof(p1)); - DebugGuard.MustBeBetweenOrEqualTo(p0, 0, 2, nameof(p0)); - - this.p2 = p2; - this.p1 = p1; - this.p0 = p0; - this.Control = Shuffle.MmShuffle(3, p2, p1, p0); + DebugGuard.MustBeBetweenOrEqualTo(control, 0, 3, nameof(control)); + this.Control = control; } public byte Control { get; } @@ -42,9 +32,7 @@ internal readonly struct DefaultShuffle3 : IShuffle3 ref byte sBase = ref MemoryMarshal.GetReference(source); ref byte dBase = ref MemoryMarshal.GetReference(dest); - int p2 = this.p2; - int p1 = this.p1; - int p0 = this.p0; + Shuffle.InverseMmShuffle(this.Control, out _, out int p2, out int p1, out int p0); for (int i = 0; i < source.Length; i += 3) { diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs index 2179a085b..ec0c4fcf2 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs @@ -14,21 +14,10 @@ internal interface IShuffle4Slice3 : IComponentShuffle internal readonly struct DefaultShuffle4Slice3 : IShuffle4Slice3 { - private readonly byte p2; - private readonly byte p1; - private readonly byte p0; - - public DefaultShuffle4Slice3(byte p3, byte p2, byte p1, byte p0) + public DefaultShuffle4Slice3(byte control) { - DebugGuard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); - DebugGuard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); - DebugGuard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); - DebugGuard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); - - this.p2 = p2; - this.p1 = p1; - this.p0 = p0; - this.Control = Shuffle.MmShuffle(p3, p2, p1, p0); + DebugGuard.MustBeBetweenOrEqualTo(control, 0, 3, nameof(control)); + this.Control = control; } public byte Control { get; } @@ -43,9 +32,7 @@ internal readonly struct DefaultShuffle4Slice3 : IShuffle4Slice3 ref byte sBase = ref MemoryMarshal.GetReference(source); ref byte dBase = ref MemoryMarshal.GetReference(dest); - int p2 = this.p2; - int p1 = this.p1; - int p0 = this.p0; + Shuffle.InverseMmShuffle(this.Control, out _, out int p2, out int p1, out int p0); for (int i = 0, j = 0; i < dest.Length; i += 3, j += 4) { diff --git a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs index 3ae5a3b5e..50a68906e 100644 --- a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs +++ b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs @@ -29,6 +29,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToArgb32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -38,6 +40,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgra32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -47,6 +51,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToAbgr32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -56,6 +62,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgb24(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4Slice3(source, dest, default); @@ -65,9 +73,11 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgr24(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(3, 0, 1, 2)); + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(SimdUtils.Shuffle.MMShuffle3012)); } /// @@ -82,6 +92,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgba32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -91,6 +103,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgra32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -100,6 +114,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToAbgr32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -109,18 +125,22 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgb24(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(0, 3, 2, 1)); + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(SimdUtils.Shuffle.MMShuffle0321)); /// /// Converts a representing a collection of /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgr24(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(0, 1, 2, 3)); + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(SimdUtils.Shuffle.MMShuffle0123)); } /// @@ -135,6 +155,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToArgb32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -144,6 +166,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgba32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -153,6 +177,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToAbgr32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -162,15 +188,19 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgb24(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(3, 0, 1, 2)); + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(SimdUtils.Shuffle.MMShuffle3012)); /// /// Converts a representing a collection of /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgr24(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4Slice3(source, dest, default); @@ -188,6 +218,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToArgb32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -197,6 +229,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgba32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -206,6 +240,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgra32(ReadOnlySpan source, Span dest) => SimdUtils.Shuffle4(source, dest, default); @@ -215,18 +251,22 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgb24(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(0, 1, 2, 3)); + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(SimdUtils.Shuffle.MMShuffle0123)); /// /// Converts a representing a collection of /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgr24(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(0, 3, 2, 1)); + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(SimdUtils.Shuffle.MMShuffle0321)); } /// @@ -241,6 +281,8 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgba32(ReadOnlySpan source, Span dest) => SimdUtils.Pad3Shuffle4(source, dest, default); @@ -250,36 +292,44 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToArgb32(ReadOnlySpan source, Span dest) - => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(2, 1, 0, 3)); + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(SimdUtils.Shuffle.MMShuffle2103)); /// /// Converts a representing a collection of /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgra32(ReadOnlySpan source, Span dest) - => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(3, 0, 1, 2)); + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(SimdUtils.Shuffle.MMShuffle3012)); /// /// Converts a representing a collection of /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToAbgr32(ReadOnlySpan source, Span dest) - => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(0, 1, 2, 3)); + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(SimdUtils.Shuffle.MMShuffle0123)); /// /// Converts a representing a collection of /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgr24(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle3(source, dest, new DefaultShuffle3(0, 1, 2)); + => SimdUtils.Shuffle3(source, dest, new DefaultShuffle3(SimdUtils.Shuffle.MMShuffle3012)); } /// @@ -294,24 +344,30 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToArgb32(ReadOnlySpan source, Span dest) - => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(0, 1, 2, 3)); + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(SimdUtils.Shuffle.MMShuffle0123)); /// /// Converts a representing a collection of /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgba32(ReadOnlySpan source, Span dest) - => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(3, 0, 1, 2)); + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(SimdUtils.Shuffle.MMShuffle3012)); /// /// Converts a representing a collection of /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgra32(ReadOnlySpan source, Span dest) => SimdUtils.Pad3Shuffle4(source, dest, default); @@ -321,17 +377,21 @@ internal static class PixelConverter /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToAbgr32(ReadOnlySpan source, Span dest) - => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(2, 1, 0, 3)); + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(SimdUtils.Shuffle.MMShuffle2103)); /// /// Converts a representing a collection of /// pixels to a representing /// a collection of pixels. /// + /// The source span of bytes. + /// The destination span of bytes. [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgb24(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle3(source, dest, new DefaultShuffle3(0, 1, 2)); + => SimdUtils.Shuffle3(source, dest, new DefaultShuffle3(SimdUtils.Shuffle.MMShuffle3012)); } } diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs index 2c4989f38..5684c20e8 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -65,19 +65,19 @@ public partial class SimdUtilsTests (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, zyxw), SimdUtils.Shuffle.MMShuffle3012); - DefaultShuffle4 xwyz = new(2, 1, 3, 0); + DefaultShuffle4 xwyz = new(SimdUtils.Shuffle.MMShuffle2130); TestShuffleByte4Channel( size, (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, xwyz), xwyz.Control); - DefaultShuffle4 yyyy = new(1, 1, 1, 1); + DefaultShuffle4 yyyy = new(SimdUtils.Shuffle.MMShuffle1111); TestShuffleByte4Channel( size, (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yyyy), yyyy.Control); - DefaultShuffle4 wwww = new(3, 3, 3, 3); + DefaultShuffle4 wwww = new(SimdUtils.Shuffle.MMShuffle3333); TestShuffleByte4Channel( size, (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wwww), @@ -101,25 +101,25 @@ public partial class SimdUtilsTests // These cannot be expressed as a theory as you cannot // use RemoteExecutor within generic methods nor pass // IShuffle3 to the generic utils method. - DefaultShuffle3 zyx = new(0, 1, 2); + DefaultShuffle3 zyx = new(SimdUtils.Shuffle.MMShuffle3012); TestShuffleByte3Channel( size, (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, zyx), zyx.Control); - DefaultShuffle3 xyz = new(2, 1, 0); + DefaultShuffle3 xyz = new(SimdUtils.Shuffle.MMShuffle3210); TestShuffleByte3Channel( size, (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, xyz), xyz.Control); - DefaultShuffle3 yyy = new(1, 1, 1); + DefaultShuffle3 yyy = new(SimdUtils.Shuffle.MMShuffle3111); TestShuffleByte3Channel( size, (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, yyy), yyy.Control); - DefaultShuffle3 zzz = new(2, 2, 2); + DefaultShuffle3 zzz = new(SimdUtils.Shuffle.MMShuffle3222); TestShuffleByte3Channel( size, (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, zzz), @@ -149,19 +149,19 @@ public partial class SimdUtilsTests (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, xyzw), SimdUtils.Shuffle.MMShuffle3210); - DefaultPad3Shuffle4 xwyz = new(2, 1, 3, 0); + DefaultPad3Shuffle4 xwyz = new(SimdUtils.Shuffle.MMShuffle2130); TestPad3Shuffle4Channel( size, (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, xwyz), xwyz.Control); - DefaultPad3Shuffle4 yyyy = new(1, 1, 1, 1); + DefaultPad3Shuffle4 yyyy = new(SimdUtils.Shuffle.MMShuffle1111); TestPad3Shuffle4Channel( size, (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, yyyy), yyyy.Control); - DefaultPad3Shuffle4 wwww = new(3, 3, 3, 3); + DefaultPad3Shuffle4 wwww = new(SimdUtils.Shuffle.MMShuffle3333); TestPad3Shuffle4Channel( size, (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, wwww), @@ -191,19 +191,19 @@ public partial class SimdUtilsTests (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, xyzw), SimdUtils.Shuffle.MMShuffle3210); - DefaultShuffle4Slice3 xwyz = new(2, 1, 3, 0); + DefaultShuffle4Slice3 xwyz = new(SimdUtils.Shuffle.MMShuffle2130); TestShuffle4Slice3Channel( size, (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, xwyz), xwyz.Control); - DefaultShuffle4Slice3 yyyy = new(1, 1, 1, 1); + DefaultShuffle4Slice3 yyyy = new(SimdUtils.Shuffle.MMShuffle1111); TestShuffle4Slice3Channel( size, (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, yyyy), yyyy.Control); - DefaultShuffle4Slice3 wwww = new(3, 3, 3, 3); + DefaultShuffle4Slice3 wwww = new(SimdUtils.Shuffle.MMShuffle3333); TestShuffle4Slice3Channel( size, (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, wwww), From eda0869f094c77cb61a8e4283c35f4ea3289bb47 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 22 Feb 2023 00:00:21 +1000 Subject: [PATCH 3/3] Update benchmarks and add more tests --- .../Helpers/Shuffle/IComponentShuffle.cs | 2 +- .../Common/Helpers/Shuffle/IPad3Shuffle4.cs | 2 +- .../Common/Helpers/Shuffle/IShuffle3.cs | 2 +- .../Common/Helpers/Shuffle/IShuffle4Slice3.cs | 2 +- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 10 +- .../Common/Helpers/SimdUtils.Shuffle.cs | 10 +- .../Color/Bulk/Pad3Shuffle4Channel.cs | 58 +++- .../Color/Bulk/Shuffle3Channel.cs | 39 ++- .../Color/Bulk/Shuffle4Slice3Channel.cs | 68 ++++- .../Color/Bulk/ShuffleByte4Channel.cs | 39 ++- .../Color/Bulk/ShuffleFloat4Channel.cs | 35 +++ .../Common/SimdUtilsTests.Shuffle.cs | 275 +++++++++++++++++- 12 files changed, 500 insertions(+), 42 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs index 6ce04ef21..345f4b5c2 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs @@ -64,7 +64,7 @@ internal readonly struct DefaultShuffle4 : IShuffle4 ref byte sBase = ref MemoryMarshal.GetReference(source); ref byte dBase = ref MemoryMarshal.GetReference(dest); - Shuffle.InverseMmShuffle(this.Control, out int p3, out int p2, out int p1, out int p0); + Shuffle.InverseMMShuffle(this.Control, out int p3, out int p2, out int p1, out int p0); for (int i = 0; i < source.Length; i += 4) { diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs index 69a688e59..348e8ec01 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs @@ -32,7 +32,7 @@ internal readonly struct DefaultPad3Shuffle4 : IPad3Shuffle4 ref byte sBase = ref MemoryMarshal.GetReference(source); ref byte dBase = ref MemoryMarshal.GetReference(dest); - Shuffle.InverseMmShuffle(this.Control, out int p3, out int p2, out int p1, out int p0); + Shuffle.InverseMMShuffle(this.Control, out int p3, out int p2, out int p1, out int p0); Span temp = stackalloc byte[4]; ref byte t = ref MemoryMarshal.GetReference(temp); diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs index 4e1c2a4e4..6ac8a2428 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs @@ -32,7 +32,7 @@ internal readonly struct DefaultShuffle3 : IShuffle3 ref byte sBase = ref MemoryMarshal.GetReference(source); ref byte dBase = ref MemoryMarshal.GetReference(dest); - Shuffle.InverseMmShuffle(this.Control, out _, out int p2, out int p1, out int p0); + Shuffle.InverseMMShuffle(this.Control, out _, out int p2, out int p1, out int p0); for (int i = 0; i < source.Length; i += 3) { diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs index ec0c4fcf2..d4a5e8130 100644 --- a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs @@ -32,7 +32,7 @@ internal readonly struct DefaultShuffle4Slice3 : IShuffle4Slice3 ref byte sBase = ref MemoryMarshal.GetReference(source); ref byte dBase = ref MemoryMarshal.GetReference(dest); - Shuffle.InverseMmShuffle(this.Control, out _, out int p2, out int p1, out int p0); + Shuffle.InverseMMShuffle(this.Control, out _, out int p2, out int p1, out int p0); for (int i = 0, j = 0; i < dest.Length; i += 3, j += 4) { diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 7d2bab259..3841b64b4 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -297,7 +297,7 @@ internal static partial class SimdUtils // shuffle controls to add to the library. // We can add static ROS instances if need be in the future. Span bytes = stackalloc byte[Vector256.Count]; - Shuffle.MmShuffleSpan(ref bytes, control); + Shuffle.MMShuffleSpan(ref bytes, control); Vector256 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); ref Vector256 sourceBase = @@ -333,7 +333,7 @@ internal static partial class SimdUtils { // Ssse3 Span bytes = stackalloc byte[Vector128.Count]; - Shuffle.MmShuffleSpan(ref bytes, control); + Shuffle.MMShuffleSpan(ref bytes, control); Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); ref Vector128 sourceBase = @@ -382,7 +382,7 @@ internal static partial class SimdUtils Vector128 vmaske = Ssse3.AlignRight(vmasko, vmasko, 12); Span bytes = stackalloc byte[Vector128.Count]; - Shuffle.MmShuffleSpan(ref bytes, control); + Shuffle.MMShuffleSpan(ref bytes, control); Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); ref Vector128 sourceBase = @@ -445,7 +445,7 @@ internal static partial class SimdUtils Vector128 vfill = Vector128.Create(0xff000000ff000000ul).AsByte(); Span bytes = stackalloc byte[Vector128.Count]; - Shuffle.MmShuffleSpan(ref bytes, control); + Shuffle.MMShuffleSpan(ref bytes, control); Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); ref Vector128 sourceBase = @@ -489,7 +489,7 @@ internal static partial class SimdUtils Vector128 vmaske = Ssse3.AlignRight(vmasko, vmasko, 12); Span bytes = stackalloc byte[Vector128.Count]; - Shuffle.MmShuffleSpan(ref bytes, control); + Shuffle.MMShuffleSpan(ref bytes, control); Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); ref Vector128 sourceBase = diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index 4ff5f3d5b..aecdaa639 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -145,7 +145,7 @@ internal static partial class SimdUtils { ref float sBase = ref MemoryMarshal.GetReference(source); ref float dBase = ref MemoryMarshal.GetReference(dest); - Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0); + Shuffle.InverseMMShuffle(control, out int p3, out int p2, out int p1, out int p0); for (int i = 0; i < source.Length; i += 4) { @@ -484,13 +484,13 @@ internal static partial class SimdUtils public const byte MMShuffle3333 = 0b11111111; [MethodImpl(InliningOptions.ShortMethod)] - public static byte MmShuffle(byte p3, byte p2, byte p1, byte p0) + public static byte MMShuffle(byte p3, byte p2, byte p1, byte p0) => (byte)((p3 << 6) | (p2 << 4) | (p1 << 2) | p0); [MethodImpl(InliningOptions.ShortMethod)] - public static void MmShuffleSpan(ref Span span, byte control) + public static void MMShuffleSpan(ref Span span, byte control) { - InverseMmShuffle( + InverseMMShuffle( control, out int p3, out int p2, @@ -509,7 +509,7 @@ internal static partial class SimdUtils } [MethodImpl(InliningOptions.ShortMethod)] - public static void InverseMmShuffle( + public static void InverseMMShuffle( byte control, out int p3, out int p2, diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs index df308cdd4..73dcf55a1 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs @@ -8,8 +8,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk; [Config(typeof(Config.HwIntrinsics_SSE_AVX))] public class Pad3Shuffle4Channel { - private static readonly DefaultPad3Shuffle4 Control = new DefaultPad3Shuffle4(1, 0, 3, 2); - private static readonly XYZWPad3Shuffle4 ControlFast = default; + private static readonly DefaultPad3Shuffle4 Control = new(SimdUtils.Shuffle.MMShuffle1032); private byte[] source; private byte[] destination; @@ -26,15 +25,11 @@ public class Pad3Shuffle4Channel [Benchmark] public void Pad3Shuffle4() - { - SimdUtils.Pad3Shuffle4(this.source, this.destination, Control); - } + => SimdUtils.Pad3Shuffle4(this.source, this.destination, Control); [Benchmark] public void Pad3Shuffle4FastFallback() - { - SimdUtils.Pad3Shuffle4(this.source, this.destination, ControlFast); - } + => SimdUtils.Pad3Shuffle4(this.source, this.destination, default(XYZWPad3Shuffle4)); } // 2020-10-30 @@ -83,3 +78,50 @@ public class Pad3Shuffle4Channel // | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 220.37 ns | 1.601 ns | 1.419 ns | 220.13 ns | 1.00 | 0.00 | - | - | - | - | // | Pad3Shuffle4FastFallback | 2. AVX | Empty | 1536 | 111.54 ns | 2.173 ns | 2.901 ns | 111.27 ns | 0.51 | 0.01 | - | - | - | - | // | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 110.23 ns | 0.456 ns | 0.427 ns | 110.25 ns | 0.50 | 0.00 | - | - | - | - | + +// 2023-02-21 +// ########## +// +// BenchmarkDotNet=v0.13.0, OS=Windows 10.0.22621 +// 11th Gen Intel Core i7-11370H 3.30GHz, 1 CPU, 8 logical and 4 physical cores +// .NET SDK= 7.0.103 +// [Host] : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 1. No HwIntrinsics : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 2. SSE : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 3. AVX : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT + +// Runtime=.NET 6.0 + +// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated | +// |------------------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|------:|------:|------:|----------:| +// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 57.45 ns | 0.126 ns | 0.118 ns | 1.00 | - | - | - | - | +// | Pad3Shuffle4 | 2. SSE | COMPlus_EnableAVX=0 | 96 | 14.70 ns | 0.105 ns | 0.098 ns | 0.26 | - | - | - | - | +// | Pad3Shuffle4 | 3. AVX | Empty | 96 | 14.63 ns | 0.070 ns | 0.062 ns | 0.25 | - | - | - | - | +// | | | | | | | | | | | | | +// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 12.08 ns | 0.028 ns | 0.025 ns | 1.00 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 96 | 14.04 ns | 0.050 ns | 0.044 ns | 1.16 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 3. AVX | Empty | 96 | 13.90 ns | 0.086 ns | 0.080 ns | 1.15 | - | - | - | - | +// | | | | | | | | | | | | | +// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 202.67 ns | 2.010 ns | 1.678 ns | 1.00 | - | - | - | - | +// | Pad3Shuffle4 | 2. SSE | COMPlus_EnableAVX=0 | 384 | 25.54 ns | 0.060 ns | 0.053 ns | 0.13 | - | - | - | - | +// | Pad3Shuffle4 | 3. AVX | Empty | 384 | 25.72 ns | 0.139 ns | 0.130 ns | 0.13 | - | - | - | - | +// | | | | | | | | | | | | | +// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 60.35 ns | 0.080 ns | 0.071 ns | 1.00 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 384 | 25.18 ns | 0.388 ns | 0.324 ns | 0.42 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 3. AVX | Empty | 384 | 26.21 ns | 0.067 ns | 0.059 ns | 0.43 | - | - | - | - | +// | | | | | | | | | | | | | +// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 393.88 ns | 1.353 ns | 1.199 ns | 1.00 | - | - | - | - | +// | Pad3Shuffle4 | 2. SSE | COMPlus_EnableAVX=0 | 768 | 39.44 ns | 0.230 ns | 0.204 ns | 0.10 | - | - | - | - | +// | Pad3Shuffle4 | 3. AVX | Empty | 768 | 39.51 ns | 0.108 ns | 0.101 ns | 0.10 | - | - | - | - | +// | | | | | | | | | | | | | +// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 112.02 ns | 0.140 ns | 0.131 ns | 1.00 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 768 | 38.60 ns | 0.091 ns | 0.080 ns | 0.34 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 3. AVX | Empty | 768 | 38.18 ns | 0.100 ns | 0.084 ns | 0.34 | - | - | - | - | +// | | | | | | | | | | | | | +// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 777.95 ns | 1.719 ns | 1.342 ns | 1.00 | - | - | - | - | +// | Pad3Shuffle4 | 2. SSE | COMPlus_EnableAVX=0 | 1536 | 73.11 ns | 0.090 ns | 0.075 ns | 0.09 | - | - | - | - | +// | Pad3Shuffle4 | 3. AVX | Empty | 1536 | 73.41 ns | 0.125 ns | 0.117 ns | 0.09 | - | - | - | - | +// | | | | | | | | | | | | | +// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 218.14 ns | 0.377 ns | 0.334 ns | 1.00 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 1536 | 72.55 ns | 1.418 ns | 1.184 ns | 0.33 | - | - | - | - | +// | Pad3Shuffle4FastFallback | 3. AVX | Empty | 1536 | 73.15 ns | 0.330 ns | 0.292 ns | 0.34 | - | - | - | - | diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs index 7f6f5901f..9bd85f474 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs @@ -1,14 +1,16 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using System.Runtime.InteropServices; using BenchmarkDotNet.Attributes; +using Iced.Intel; namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk; [Config(typeof(Config.HwIntrinsics_SSE_AVX))] public class Shuffle3Channel { - private static readonly DefaultShuffle3 Control = new DefaultShuffle3(1, 0, 2); + private static readonly DefaultShuffle3 Control = new(SimdUtils.Shuffle.MMShuffle3102); private byte[] source; private byte[] destination; @@ -25,9 +27,7 @@ public class Shuffle3Channel [Benchmark] public void Shuffle3() - { - SimdUtils.Shuffle3(this.source, this.destination, Control); - } + => SimdUtils.Shuffle3(this.source, this.destination, Control); } // 2020-11-02 @@ -60,3 +60,34 @@ public class Shuffle3Channel // | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 773.70 ns | 5.516 ns | 4.890 ns | 772.96 ns | 1.00 | 0.00 | - | - | - | - | // | Shuffle3 | 2. AVX | Empty | 1536 | 190.41 ns | 1.090 ns | 0.851 ns | 190.38 ns | 0.25 | 0.00 | - | - | - | - | // | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 190.94 ns | 0.985 ns | 0.769 ns | 190.85 ns | 0.25 | 0.00 | - | - | - | - | + +// 2023-02-21 +// ########## +// +// BenchmarkDotNet=v0.13.0, OS=Windows 10.0.22621 +// 11th Gen Intel Core i7-11370H 3.30GHz, 1 CPU, 8 logical and 4 physical cores +// .NET SDK= 7.0.103 +// [Host] : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 1. No HwIntrinsics : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 2. SSE : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 3. AVX : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT + +// Runtime=.NET 6.0 + +// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated | +// |--------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|------:|------:|------:|----------:| +// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 44.55 ns | 0.564 ns | 0.528 ns | 1.00 | - | - | - | - | +// | Shuffle3 | 2. SSE | COMPlus_EnableAVX=0 | 96 | 15.46 ns | 0.064 ns | 0.060 ns | 0.35 | - | - | - | - | +// | Shuffle3 | 3. AVX | Empty | 96 | 15.18 ns | 0.056 ns | 0.053 ns | 0.34 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 155.68 ns | 0.539 ns | 0.504 ns | 1.00 | - | - | - | - | +// | Shuffle3 | 2. SSE | COMPlus_EnableAVX=0 | 384 | 30.04 ns | 0.100 ns | 0.089 ns | 0.19 | - | - | - | - | +// | Shuffle3 | 3. AVX | Empty | 384 | 29.70 ns | 0.061 ns | 0.054 ns | 0.19 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 302.76 ns | 1.023 ns | 0.957 ns | 1.00 | - | - | - | - | +// | Shuffle3 | 2. SSE | COMPlus_EnableAVX=0 | 768 | 50.24 ns | 0.098 ns | 0.092 ns | 0.17 | - | - | - | - | +// | Shuffle3 | 3. AVX | Empty | 768 | 49.28 ns | 0.156 ns | 0.131 ns | 0.16 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 596.53 ns | 2.675 ns | 2.503 ns | 1.00 | - | - | - | - | +// | Shuffle3 | 2. SSE | COMPlus_EnableAVX=0 | 1536 | 94.09 ns | 0.312 ns | 0.260 ns | 0.16 | - | - | - | - | +// | Shuffle3 | 3. AVX | Empty | 1536 | 93.57 ns | 0.196 ns | 0.183 ns | 0.16 | - | - | - | - | diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs index 0a0afb705..f0890221c 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs @@ -1,15 +1,16 @@ // Copyright (c) Six Labors. // Licensed under the Six Labors Split License. +using System.Runtime.InteropServices; using BenchmarkDotNet.Attributes; +using Iced.Intel; namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk; [Config(typeof(Config.HwIntrinsics_SSE_AVX))] public class Shuffle4Slice3Channel { - private static readonly DefaultShuffle4Slice3 Control = new DefaultShuffle4Slice3(1, 0, 3, 2); - private static readonly XYZWShuffle4Slice3 ControlFast = default; + private static readonly DefaultShuffle4Slice3 Control = new(SimdUtils.Shuffle.MMShuffle1032); private byte[] source; private byte[] destination; @@ -26,15 +27,11 @@ public class Shuffle4Slice3Channel [Benchmark] public void Shuffle4Slice3() - { - SimdUtils.Shuffle4Slice3(this.source, this.destination, Control); - } + => SimdUtils.Shuffle4Slice3(this.source, this.destination, Control); [Benchmark] public void Shuffle4Slice3FastFallback() - { - SimdUtils.Shuffle4Slice3(this.source, this.destination, ControlFast); - } + => SimdUtils.Shuffle4Slice3(this.source, this.destination, default(XYZWShuffle4Slice3)); } // 2020-10-29 @@ -91,3 +88,58 @@ public class Shuffle4Slice3Channel // | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 382.97 ns | 1.064 ns | 0.831 ns | 382.87 ns | 1.00 | 0.00 | - | - | - | - | // | Shuffle4Slice3FastFallback | 2. AVX | Empty | 2048 | 126.93 ns | 0.382 ns | 0.339 ns | 126.94 ns | 0.33 | 0.00 | - | - | - | - | // | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 149.36 ns | 1.875 ns | 1.754 ns | 149.33 ns | 0.39 | 0.00 | - | - | - | - | + +// 2023-02-21 +// ########## +// +// BenchmarkDotNet=v0.13.0, OS=Windows 10.0.22621 +// 11th Gen Intel Core i7-11370H 3.30GHz, 1 CPU, 8 logical and 4 physical cores +// .NET SDK= 7.0.103 +// [Host] : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 1. No HwIntrinsics : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 2. SSE : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 3. AVX : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// +// Runtime=.NET 6.0 +// +// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated | +// |--------------------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|------:|------:|------:|----------:| +// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 45.59 ns | 0.166 ns | 0.147 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3 | 2. SSE | COMPlus_EnableAVX=0 | 128 | 15.62 ns | 0.056 ns | 0.052 ns | 0.34 | - | - | - | - | +// | Shuffle4Slice3 | 3. AVX | Empty | 128 | 16.37 ns | 0.047 ns | 0.040 ns | 0.36 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 13.23 ns | 0.028 ns | 0.026 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 128 | 14.41 ns | 0.013 ns | 0.012 ns | 1.09 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 3. AVX | Empty | 128 | 14.70 ns | 0.050 ns | 0.047 ns | 1.11 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 85.48 ns | 0.192 ns | 0.179 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3 | 2. SSE | COMPlus_EnableAVX=0 | 256 | 19.18 ns | 0.230 ns | 0.204 ns | 0.22 | - | - | - | - | +// | Shuffle4Slice3 | 3. AVX | Empty | 256 | 18.66 ns | 0.017 ns | 0.015 ns | 0.22 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 24.34 ns | 0.078 ns | 0.073 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 256 | 18.58 ns | 0.061 ns | 0.057 ns | 0.76 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 3. AVX | Empty | 256 | 19.23 ns | 0.018 ns | 0.016 ns | 0.79 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 165.31 ns | 0.742 ns | 0.694 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3 | 2. SSE | COMPlus_EnableAVX=0 | 512 | 28.10 ns | 0.077 ns | 0.068 ns | 0.17 | - | - | - | - | +// | Shuffle4Slice3 | 3. AVX | Empty | 512 | 28.99 ns | 0.018 ns | 0.014 ns | 0.18 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 53.45 ns | 0.270 ns | 0.226 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 512 | 27.50 ns | 0.034 ns | 0.028 ns | 0.51 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 3. AVX | Empty | 512 | 28.76 ns | 0.017 ns | 0.015 ns | 0.54 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 323.87 ns | 0.549 ns | 0.487 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3 | 2. SSE | COMPlus_EnableAVX=0 | 1024 | 40.81 ns | 0.056 ns | 0.050 ns | 0.13 | - | - | - | - | +// | Shuffle4Slice3 | 3. AVX | Empty | 1024 | 39.95 ns | 0.075 ns | 0.067 ns | 0.12 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 101.37 ns | 0.080 ns | 0.067 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 1024 | 40.72 ns | 0.049 ns | 0.041 ns | 0.40 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 3. AVX | Empty | 1024 | 39.78 ns | 0.029 ns | 0.027 ns | 0.39 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 642.95 ns | 2.067 ns | 1.933 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3 | 2. SSE | COMPlus_EnableAVX=0 | 2048 | 73.19 ns | 0.082 ns | 0.077 ns | 0.11 | - | - | - | - | +// | Shuffle4Slice3 | 3. AVX | Empty | 2048 | 69.83 ns | 0.319 ns | 0.267 ns | 0.11 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 196.85 ns | 0.238 ns | 0.211 ns | 1.00 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 2. SSE | COMPlus_EnableAVX=0 | 2048 | 72.89 ns | 0.117 ns | 0.098 ns | 0.37 | - | - | - | - | +// | Shuffle4Slice3FastFallback | 3. AVX | Empty | 2048 | 69.59 ns | 0.073 ns | 0.061 ns | 0.35 | - | - | - | - | diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs index 6323e1c55..8e740743b 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs @@ -24,9 +24,7 @@ public class ShuffleByte4Channel [Benchmark] public void Shuffle4Channel() - { - SimdUtils.Shuffle4(this.source, this.destination, default); - } + => SimdUtils.Shuffle4(this.source, this.destination, default); } // 2020-10-29 @@ -63,3 +61,38 @@ public class ShuffleByte4Channel // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 315.29 ns | 5.206 ns | 6.583 ns | 1.00 | 0.00 | - | - | - | - | // | Shuffle4Channel | 2. AVX | Empty | 2048 | 57.37 ns | 1.152 ns | 1.078 ns | 0.18 | 0.01 | - | - | - | - | // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 65.75 ns | 1.198 ns | 1.600 ns | 0.21 | 0.01 | - | - | - | - | + +// 2023-02-21 +// ########## +// +// BenchmarkDotNet=v0.13.0, OS=Windows 10.0.22621 +// 11th Gen Intel Core i7-11370H 3.30GHz, 1 CPU, 8 logical and 4 physical cores +// .NET SDK= 7.0.103 +// [Host] : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 1. No HwIntrinsics : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 2. SSE : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 3. AVX : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// +// Runtime=.NET 6.0 +// +// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | +// |---------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:| +// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 10.76 ns | 0.033 ns | 0.029 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 128 | 11.39 ns | 0.045 ns | 0.040 ns | 1.06 | 0.01 | - | - | - | - | +// | Shuffle4Channel | 3. AVX | Empty | 128 | 14.05 ns | 0.029 ns | 0.024 ns | 1.31 | 0.00 | - | - | - | - | +// | | | | | | | | | | | | | | +// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 32.09 ns | 0.655 ns | 1.000 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 256 | 14.03 ns | 0.047 ns | 0.041 ns | 0.44 | 0.02 | - | - | - | - | +// | Shuffle4Channel | 3. AVX | Empty | 256 | 15.18 ns | 0.052 ns | 0.043 ns | 0.48 | 0.03 | - | - | - | - | +// | | | | | | | | | | | | | | +// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 59.26 ns | 0.084 ns | 0.070 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 512 | 18.80 ns | 0.036 ns | 0.034 ns | 0.32 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 3. AVX | Empty | 512 | 17.69 ns | 0.038 ns | 0.034 ns | 0.30 | 0.00 | - | - | - | - | +// | | | | | | | | | | | | | | +// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 112.48 ns | 0.285 ns | 0.253 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 1024 | 31.57 ns | 0.041 ns | 0.036 ns | 0.28 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 3. AVX | Empty | 1024 | 28.41 ns | 0.068 ns | 0.064 ns | 0.25 | 0.00 | - | - | - | - | +// | | | | | | | | | | | | | | +// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 218.59 ns | 0.303 ns | 0.283 ns | 1.00 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 2048 | 53.04 ns | 0.106 ns | 0.099 ns | 0.24 | 0.00 | - | - | - | - | +// | Shuffle4Channel | 3. AVX | Empty | 2048 | 34.74 ns | 0.061 ns | 0.054 ns | 0.16 | 0.00 | - | - | - | - | diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs index ec23364ca..1c338e1a6 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs @@ -61,3 +61,38 @@ public class ShuffleFloat4Channel // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 980.134 ns | 3.7407 ns | 3.1237 ns | 1.00 | - | - | - | - | // | Shuffle4Channel | 2. AVX | Empty | 2048 | 105.120 ns | 0.6140 ns | 0.5443 ns | 0.11 | - | - | - | - | // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 216.473 ns | 2.3268 ns | 2.0627 ns | 0.22 | - | - | - | - | + +// 2023-02-21 +// ########## +// +// BenchmarkDotNet=v0.13.0, OS=Windows 10.0.22621 +// 11th Gen Intel Core i7-11370H 3.30GHz, 1 CPU, 8 logical and 4 physical cores +// .NET SDK= 7.0.103 +// [Host] : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 1. No HwIntrinsics : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 2. SSE : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// 3. AVX : .NET 6.0.14 (6.0.1423.7309), X64 RyuJIT +// +// Runtime=.NET 6.0 +// +// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated | +// |---------------- |------------------- |-------------------------------------------------- |------ |-----------:|----------:|----------:|------:|------:|------:|------:|----------:| +// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 57.819 ns | 0.2360 ns | 0.1970 ns | 1.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 128 | 11.564 ns | 0.0234 ns | 0.0195 ns | 0.20 | - | - | - | - | +// | Shuffle4Channel | 3. AVX | Empty | 128 | 7.770 ns | 0.0696 ns | 0.0617 ns | 0.13 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 105.282 ns | 0.2713 ns | 0.2405 ns | 1.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 256 | 19.867 ns | 0.0393 ns | 0.0348 ns | 0.19 | - | - | - | - | +// | Shuffle4Channel | 3. AVX | Empty | 256 | 17.586 ns | 0.0582 ns | 0.0544 ns | 0.17 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 200.799 ns | 0.5678 ns | 0.5033 ns | 1.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 512 | 41.137 ns | 0.1524 ns | 0.1351 ns | 0.20 | - | - | - | - | +// | Shuffle4Channel | 3. AVX | Empty | 512 | 24.040 ns | 0.0445 ns | 0.0395 ns | 0.12 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 401.046 ns | 0.5865 ns | 0.5199 ns | 1.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 1024 | 94.904 ns | 0.4633 ns | 0.4334 ns | 0.24 | - | - | - | - | +// | Shuffle4Channel | 3. AVX | Empty | 1024 | 68.456 ns | 0.1192 ns | 0.0996 ns | 0.17 | - | - | - | - | +// | | | | | | | | | | | | | +// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 772.297 ns | 0.6270 ns | 0.5558 ns | 1.00 | - | - | - | - | +// | Shuffle4Channel | 2. SSE | COMPlus_EnableAVX=0 | 2048 | 184.561 ns | 0.4319 ns | 0.4040 ns | 0.24 | - | - | - | - | +// | Shuffle4Channel | 3. AVX | Empty | 2048 | 133.634 ns | 1.7864 ns | 1.8345 ns | 0.17 | - | - | - | - | diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs index 5684c20e8..960b0613e 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -7,6 +7,271 @@ namespace SixLabors.ImageSharp.Tests.Common; public partial class SimdUtilsTests { + public static readonly TheoryData MMShuffleData = new() + { + { SimdUtils.Shuffle.MMShuffle(0, 0, 0, 0), SimdUtils.Shuffle.MMShuffle0000 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 0, 1), SimdUtils.Shuffle.MMShuffle0001 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 0, 2), SimdUtils.Shuffle.MMShuffle0002 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 0, 3), SimdUtils.Shuffle.MMShuffle0003 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 1, 0), SimdUtils.Shuffle.MMShuffle0010 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 1, 1), SimdUtils.Shuffle.MMShuffle0011 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 1, 2), SimdUtils.Shuffle.MMShuffle0012 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 1, 3), SimdUtils.Shuffle.MMShuffle0013 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 2, 0), SimdUtils.Shuffle.MMShuffle0020 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 2, 1), SimdUtils.Shuffle.MMShuffle0021 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 2, 2), SimdUtils.Shuffle.MMShuffle0022 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 2, 3), SimdUtils.Shuffle.MMShuffle0023 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 3, 0), SimdUtils.Shuffle.MMShuffle0030 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 3, 1), SimdUtils.Shuffle.MMShuffle0031 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 3, 2), SimdUtils.Shuffle.MMShuffle0032 }, + { SimdUtils.Shuffle.MMShuffle(0, 0, 3, 3), SimdUtils.Shuffle.MMShuffle0033 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 0, 0), SimdUtils.Shuffle.MMShuffle0100 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 0, 1), SimdUtils.Shuffle.MMShuffle0101 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 0, 2), SimdUtils.Shuffle.MMShuffle0102 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 0, 3), SimdUtils.Shuffle.MMShuffle0103 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 1, 0), SimdUtils.Shuffle.MMShuffle0110 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 1, 1), SimdUtils.Shuffle.MMShuffle0111 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 1, 2), SimdUtils.Shuffle.MMShuffle0112 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 1, 3), SimdUtils.Shuffle.MMShuffle0113 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 2, 0), SimdUtils.Shuffle.MMShuffle0120 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 2, 1), SimdUtils.Shuffle.MMShuffle0121 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 2, 2), SimdUtils.Shuffle.MMShuffle0122 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 2, 3), SimdUtils.Shuffle.MMShuffle0123 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 3, 0), SimdUtils.Shuffle.MMShuffle0130 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 3, 1), SimdUtils.Shuffle.MMShuffle0131 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 3, 2), SimdUtils.Shuffle.MMShuffle0132 }, + { SimdUtils.Shuffle.MMShuffle(0, 1, 3, 3), SimdUtils.Shuffle.MMShuffle0133 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 0, 0), SimdUtils.Shuffle.MMShuffle0200 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 0, 1), SimdUtils.Shuffle.MMShuffle0201 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 0, 2), SimdUtils.Shuffle.MMShuffle0202 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 0, 3), SimdUtils.Shuffle.MMShuffle0203 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 1, 0), SimdUtils.Shuffle.MMShuffle0210 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 1, 1), SimdUtils.Shuffle.MMShuffle0211 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 1, 2), SimdUtils.Shuffle.MMShuffle0212 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 1, 3), SimdUtils.Shuffle.MMShuffle0213 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 2, 0), SimdUtils.Shuffle.MMShuffle0220 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 2, 1), SimdUtils.Shuffle.MMShuffle0221 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 2, 2), SimdUtils.Shuffle.MMShuffle0222 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 2, 3), SimdUtils.Shuffle.MMShuffle0223 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 3, 0), SimdUtils.Shuffle.MMShuffle0230 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 3, 1), SimdUtils.Shuffle.MMShuffle0231 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 3, 2), SimdUtils.Shuffle.MMShuffle0232 }, + { SimdUtils.Shuffle.MMShuffle(0, 2, 3, 3), SimdUtils.Shuffle.MMShuffle0233 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 0, 0), SimdUtils.Shuffle.MMShuffle0300 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 0, 1), SimdUtils.Shuffle.MMShuffle0301 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 0, 2), SimdUtils.Shuffle.MMShuffle0302 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 0, 3), SimdUtils.Shuffle.MMShuffle0303 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 1, 0), SimdUtils.Shuffle.MMShuffle0310 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 1, 1), SimdUtils.Shuffle.MMShuffle0311 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 1, 2), SimdUtils.Shuffle.MMShuffle0312 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 1, 3), SimdUtils.Shuffle.MMShuffle0313 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 2, 0), SimdUtils.Shuffle.MMShuffle0320 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 2, 1), SimdUtils.Shuffle.MMShuffle0321 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 2, 2), SimdUtils.Shuffle.MMShuffle0322 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 2, 3), SimdUtils.Shuffle.MMShuffle0323 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 3, 0), SimdUtils.Shuffle.MMShuffle0330 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 3, 1), SimdUtils.Shuffle.MMShuffle0331 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 3, 2), SimdUtils.Shuffle.MMShuffle0332 }, + { SimdUtils.Shuffle.MMShuffle(0, 3, 3, 3), SimdUtils.Shuffle.MMShuffle0333 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 0, 0), SimdUtils.Shuffle.MMShuffle1000 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 0, 1), SimdUtils.Shuffle.MMShuffle1001 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 0, 2), SimdUtils.Shuffle.MMShuffle1002 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 0, 3), SimdUtils.Shuffle.MMShuffle1003 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 1, 0), SimdUtils.Shuffle.MMShuffle1010 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 1, 1), SimdUtils.Shuffle.MMShuffle1011 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 1, 2), SimdUtils.Shuffle.MMShuffle1012 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 1, 3), SimdUtils.Shuffle.MMShuffle1013 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 2, 0), SimdUtils.Shuffle.MMShuffle1020 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 2, 1), SimdUtils.Shuffle.MMShuffle1021 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 2, 2), SimdUtils.Shuffle.MMShuffle1022 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 2, 3), SimdUtils.Shuffle.MMShuffle1023 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 3, 0), SimdUtils.Shuffle.MMShuffle1030 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 3, 1), SimdUtils.Shuffle.MMShuffle1031 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 3, 2), SimdUtils.Shuffle.MMShuffle1032 }, + { SimdUtils.Shuffle.MMShuffle(1, 0, 3, 3), SimdUtils.Shuffle.MMShuffle1033 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 0, 0), SimdUtils.Shuffle.MMShuffle1100 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 0, 1), SimdUtils.Shuffle.MMShuffle1101 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 0, 2), SimdUtils.Shuffle.MMShuffle1102 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 0, 3), SimdUtils.Shuffle.MMShuffle1103 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 1, 0), SimdUtils.Shuffle.MMShuffle1110 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 1, 1), SimdUtils.Shuffle.MMShuffle1111 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 1, 2), SimdUtils.Shuffle.MMShuffle1112 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 1, 3), SimdUtils.Shuffle.MMShuffle1113 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 2, 0), SimdUtils.Shuffle.MMShuffle1120 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 2, 1), SimdUtils.Shuffle.MMShuffle1121 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 2, 2), SimdUtils.Shuffle.MMShuffle1122 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 2, 3), SimdUtils.Shuffle.MMShuffle1123 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 3, 0), SimdUtils.Shuffle.MMShuffle1130 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 3, 1), SimdUtils.Shuffle.MMShuffle1131 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 3, 2), SimdUtils.Shuffle.MMShuffle1132 }, + { SimdUtils.Shuffle.MMShuffle(1, 1, 3, 3), SimdUtils.Shuffle.MMShuffle1133 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 0, 0), SimdUtils.Shuffle.MMShuffle1200 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 0, 1), SimdUtils.Shuffle.MMShuffle1201 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 0, 2), SimdUtils.Shuffle.MMShuffle1202 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 0, 3), SimdUtils.Shuffle.MMShuffle1203 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 1, 0), SimdUtils.Shuffle.MMShuffle1210 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 1, 1), SimdUtils.Shuffle.MMShuffle1211 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 1, 2), SimdUtils.Shuffle.MMShuffle1212 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 1, 3), SimdUtils.Shuffle.MMShuffle1213 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 2, 0), SimdUtils.Shuffle.MMShuffle1220 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 2, 1), SimdUtils.Shuffle.MMShuffle1221 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 2, 2), SimdUtils.Shuffle.MMShuffle1222 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 2, 3), SimdUtils.Shuffle.MMShuffle1223 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 3, 0), SimdUtils.Shuffle.MMShuffle1230 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 3, 1), SimdUtils.Shuffle.MMShuffle1231 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 3, 2), SimdUtils.Shuffle.MMShuffle1232 }, + { SimdUtils.Shuffle.MMShuffle(1, 2, 3, 3), SimdUtils.Shuffle.MMShuffle1233 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 0, 0), SimdUtils.Shuffle.MMShuffle1300 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 0, 1), SimdUtils.Shuffle.MMShuffle1301 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 0, 2), SimdUtils.Shuffle.MMShuffle1302 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 0, 3), SimdUtils.Shuffle.MMShuffle1303 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 1, 0), SimdUtils.Shuffle.MMShuffle1310 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 1, 1), SimdUtils.Shuffle.MMShuffle1311 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 1, 2), SimdUtils.Shuffle.MMShuffle1312 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 1, 3), SimdUtils.Shuffle.MMShuffle1313 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 2, 0), SimdUtils.Shuffle.MMShuffle1320 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 2, 1), SimdUtils.Shuffle.MMShuffle1321 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 2, 2), SimdUtils.Shuffle.MMShuffle1322 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 2, 3), SimdUtils.Shuffle.MMShuffle1323 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 3, 0), SimdUtils.Shuffle.MMShuffle1330 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 3, 1), SimdUtils.Shuffle.MMShuffle1331 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 3, 2), SimdUtils.Shuffle.MMShuffle1332 }, + { SimdUtils.Shuffle.MMShuffle(1, 3, 3, 3), SimdUtils.Shuffle.MMShuffle1333 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 0, 0), SimdUtils.Shuffle.MMShuffle2000 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 0, 1), SimdUtils.Shuffle.MMShuffle2001 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 0, 2), SimdUtils.Shuffle.MMShuffle2002 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 0, 3), SimdUtils.Shuffle.MMShuffle2003 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 1, 0), SimdUtils.Shuffle.MMShuffle2010 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 1, 1), SimdUtils.Shuffle.MMShuffle2011 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 1, 2), SimdUtils.Shuffle.MMShuffle2012 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 1, 3), SimdUtils.Shuffle.MMShuffle2013 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 2, 0), SimdUtils.Shuffle.MMShuffle2020 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 2, 1), SimdUtils.Shuffle.MMShuffle2021 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 2, 2), SimdUtils.Shuffle.MMShuffle2022 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 2, 3), SimdUtils.Shuffle.MMShuffle2023 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 3, 0), SimdUtils.Shuffle.MMShuffle2030 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 3, 1), SimdUtils.Shuffle.MMShuffle2031 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 3, 2), SimdUtils.Shuffle.MMShuffle2032 }, + { SimdUtils.Shuffle.MMShuffle(2, 0, 3, 3), SimdUtils.Shuffle.MMShuffle2033 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 0, 0), SimdUtils.Shuffle.MMShuffle2100 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 0, 1), SimdUtils.Shuffle.MMShuffle2101 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 0, 2), SimdUtils.Shuffle.MMShuffle2102 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 0, 3), SimdUtils.Shuffle.MMShuffle2103 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 1, 0), SimdUtils.Shuffle.MMShuffle2110 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 1, 1), SimdUtils.Shuffle.MMShuffle2111 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 1, 2), SimdUtils.Shuffle.MMShuffle2112 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 1, 3), SimdUtils.Shuffle.MMShuffle2113 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 2, 0), SimdUtils.Shuffle.MMShuffle2120 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 2, 1), SimdUtils.Shuffle.MMShuffle2121 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 2, 2), SimdUtils.Shuffle.MMShuffle2122 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 2, 3), SimdUtils.Shuffle.MMShuffle2123 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 3, 0), SimdUtils.Shuffle.MMShuffle2130 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 3, 1), SimdUtils.Shuffle.MMShuffle2131 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 3, 2), SimdUtils.Shuffle.MMShuffle2132 }, + { SimdUtils.Shuffle.MMShuffle(2, 1, 3, 3), SimdUtils.Shuffle.MMShuffle2133 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 0, 0), SimdUtils.Shuffle.MMShuffle2200 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 0, 1), SimdUtils.Shuffle.MMShuffle2201 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 0, 2), SimdUtils.Shuffle.MMShuffle2202 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 0, 3), SimdUtils.Shuffle.MMShuffle2203 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 1, 0), SimdUtils.Shuffle.MMShuffle2210 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 1, 1), SimdUtils.Shuffle.MMShuffle2211 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 1, 2), SimdUtils.Shuffle.MMShuffle2212 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 1, 3), SimdUtils.Shuffle.MMShuffle2213 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 2, 0), SimdUtils.Shuffle.MMShuffle2220 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 2, 1), SimdUtils.Shuffle.MMShuffle2221 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 2, 2), SimdUtils.Shuffle.MMShuffle2222 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 2, 3), SimdUtils.Shuffle.MMShuffle2223 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 3, 0), SimdUtils.Shuffle.MMShuffle2230 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 3, 1), SimdUtils.Shuffle.MMShuffle2231 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 3, 2), SimdUtils.Shuffle.MMShuffle2232 }, + { SimdUtils.Shuffle.MMShuffle(2, 2, 3, 3), SimdUtils.Shuffle.MMShuffle2233 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 0, 0), SimdUtils.Shuffle.MMShuffle2300 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 0, 1), SimdUtils.Shuffle.MMShuffle2301 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 0, 2), SimdUtils.Shuffle.MMShuffle2302 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 0, 3), SimdUtils.Shuffle.MMShuffle2303 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 1, 0), SimdUtils.Shuffle.MMShuffle2310 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 1, 1), SimdUtils.Shuffle.MMShuffle2311 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 1, 2), SimdUtils.Shuffle.MMShuffle2312 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 1, 3), SimdUtils.Shuffle.MMShuffle2313 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 2, 0), SimdUtils.Shuffle.MMShuffle2320 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 2, 1), SimdUtils.Shuffle.MMShuffle2321 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 2, 2), SimdUtils.Shuffle.MMShuffle2322 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 2, 3), SimdUtils.Shuffle.MMShuffle2323 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 3, 0), SimdUtils.Shuffle.MMShuffle2330 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 3, 1), SimdUtils.Shuffle.MMShuffle2331 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 3, 2), SimdUtils.Shuffle.MMShuffle2332 }, + { SimdUtils.Shuffle.MMShuffle(2, 3, 3, 3), SimdUtils.Shuffle.MMShuffle2333 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 0, 0), SimdUtils.Shuffle.MMShuffle3000 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 0, 1), SimdUtils.Shuffle.MMShuffle3001 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 0, 2), SimdUtils.Shuffle.MMShuffle3002 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 0, 3), SimdUtils.Shuffle.MMShuffle3003 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 1, 0), SimdUtils.Shuffle.MMShuffle3010 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 1, 1), SimdUtils.Shuffle.MMShuffle3011 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 1, 2), SimdUtils.Shuffle.MMShuffle3012 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 1, 3), SimdUtils.Shuffle.MMShuffle3013 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 2, 0), SimdUtils.Shuffle.MMShuffle3020 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 2, 1), SimdUtils.Shuffle.MMShuffle3021 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 2, 2), SimdUtils.Shuffle.MMShuffle3022 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 2, 3), SimdUtils.Shuffle.MMShuffle3023 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 3, 0), SimdUtils.Shuffle.MMShuffle3030 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 3, 1), SimdUtils.Shuffle.MMShuffle3031 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 3, 2), SimdUtils.Shuffle.MMShuffle3032 }, + { SimdUtils.Shuffle.MMShuffle(3, 0, 3, 3), SimdUtils.Shuffle.MMShuffle3033 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 0, 0), SimdUtils.Shuffle.MMShuffle3100 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 0, 1), SimdUtils.Shuffle.MMShuffle3101 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 0, 2), SimdUtils.Shuffle.MMShuffle3102 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 0, 3), SimdUtils.Shuffle.MMShuffle3103 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 1, 0), SimdUtils.Shuffle.MMShuffle3110 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 1, 1), SimdUtils.Shuffle.MMShuffle3111 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 1, 2), SimdUtils.Shuffle.MMShuffle3112 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 1, 3), SimdUtils.Shuffle.MMShuffle3113 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 2, 0), SimdUtils.Shuffle.MMShuffle3120 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 2, 1), SimdUtils.Shuffle.MMShuffle3121 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 2, 2), SimdUtils.Shuffle.MMShuffle3122 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 2, 3), SimdUtils.Shuffle.MMShuffle3123 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 3, 0), SimdUtils.Shuffle.MMShuffle3130 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 3, 1), SimdUtils.Shuffle.MMShuffle3131 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 3, 2), SimdUtils.Shuffle.MMShuffle3132 }, + { SimdUtils.Shuffle.MMShuffle(3, 1, 3, 3), SimdUtils.Shuffle.MMShuffle3133 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 0, 0), SimdUtils.Shuffle.MMShuffle3200 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 0, 1), SimdUtils.Shuffle.MMShuffle3201 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 0, 2), SimdUtils.Shuffle.MMShuffle3202 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 0, 3), SimdUtils.Shuffle.MMShuffle3203 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 1, 0), SimdUtils.Shuffle.MMShuffle3210 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 1, 1), SimdUtils.Shuffle.MMShuffle3211 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 1, 2), SimdUtils.Shuffle.MMShuffle3212 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 1, 3), SimdUtils.Shuffle.MMShuffle3213 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 2, 0), SimdUtils.Shuffle.MMShuffle3220 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 2, 1), SimdUtils.Shuffle.MMShuffle3221 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 2, 2), SimdUtils.Shuffle.MMShuffle3222 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 2, 3), SimdUtils.Shuffle.MMShuffle3223 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 3, 0), SimdUtils.Shuffle.MMShuffle3230 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 3, 1), SimdUtils.Shuffle.MMShuffle3231 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 3, 2), SimdUtils.Shuffle.MMShuffle3232 }, + { SimdUtils.Shuffle.MMShuffle(3, 2, 3, 3), SimdUtils.Shuffle.MMShuffle3233 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 0, 0), SimdUtils.Shuffle.MMShuffle3300 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 0, 1), SimdUtils.Shuffle.MMShuffle3301 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 0, 2), SimdUtils.Shuffle.MMShuffle3302 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 0, 3), SimdUtils.Shuffle.MMShuffle3303 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 1, 0), SimdUtils.Shuffle.MMShuffle3310 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 1, 1), SimdUtils.Shuffle.MMShuffle3311 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 1, 2), SimdUtils.Shuffle.MMShuffle3312 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 1, 3), SimdUtils.Shuffle.MMShuffle3313 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 2, 0), SimdUtils.Shuffle.MMShuffle3320 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 2, 1), SimdUtils.Shuffle.MMShuffle3321 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 2, 2), SimdUtils.Shuffle.MMShuffle3322 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 2, 3), SimdUtils.Shuffle.MMShuffle3323 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 3, 0), SimdUtils.Shuffle.MMShuffle3330 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 3, 1), SimdUtils.Shuffle.MMShuffle3331 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 3, 2), SimdUtils.Shuffle.MMShuffle3332 }, + { SimdUtils.Shuffle.MMShuffle(3, 3, 3, 3), SimdUtils.Shuffle.MMShuffle3333 } + }; + + [Theory] + [MemberData(nameof(MMShuffleData))] + public void MMShuffleConstantsAreCorrect(byte expected, byte actual) + => Assert.Equal(expected, actual); + [Theory] [MemberData(nameof(ArraySizesDivisibleBy4))] public void BulkShuffleFloat4Channel(int count) @@ -226,7 +491,7 @@ public partial class SimdUtilsTests float[] expected = new float[count]; - SimdUtils.Shuffle.InverseMmShuffle( + SimdUtils.Shuffle.InverseMMShuffle( control, out int p3, out int p2, @@ -257,7 +522,7 @@ public partial class SimdUtilsTests byte[] expected = new byte[count]; - SimdUtils.Shuffle.InverseMmShuffle( + SimdUtils.Shuffle.InverseMMShuffle( control, out int p3, out int p2, @@ -288,7 +553,7 @@ public partial class SimdUtilsTests byte[] expected = new byte[count]; - SimdUtils.Shuffle.InverseMmShuffle( + SimdUtils.Shuffle.InverseMMShuffle( control, out int _, out int p2, @@ -319,7 +584,7 @@ public partial class SimdUtilsTests byte[] expected = new byte[result.Length]; - SimdUtils.Shuffle.InverseMmShuffle( + SimdUtils.Shuffle.InverseMMShuffle( control, out int p3, out int p2, @@ -370,7 +635,7 @@ public partial class SimdUtilsTests byte[] expected = new byte[result.Length]; - SimdUtils.Shuffle.InverseMmShuffle( + SimdUtils.Shuffle.InverseMMShuffle( control, out int _, out int p2,