diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
index 782328edd..8a0b5460c 100644
--- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
+++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
@@ -26,7 +26,7 @@ namespace SixLabors.ImageSharp
/// The destination span of floats.
/// The byte control.
[MethodImpl(InliningOptions.ShortMethod)]
- public static void Shuffle4ChannelReduce(
+ public static void Shuffle4Reduce(
ref ReadOnlySpan source,
ref Span dest,
byte control)
@@ -41,7 +41,7 @@ namespace SixLabors.ImageSharp
if (adjustedCount > 0)
{
- Shuffle4Channel(
+ Shuffle4(
source.Slice(0, adjustedCount),
dest.Slice(0, adjustedCount),
control);
@@ -53,14 +53,14 @@ namespace SixLabors.ImageSharp
}
///
- /// Shuffle 8-bit integers in a within 128-bit lanes in
+ /// Shuffle 8-bit integers within 128-bit lanes in
/// using the control and store the results in .
///
/// The source span of bytes.
/// The destination span of bytes.
/// The byte control.
[MethodImpl(InliningOptions.ShortMethod)]
- public static void Shuffle4ChannelReduce(
+ public static void Shuffle4Reduce(
ref ReadOnlySpan source,
ref Span dest,
byte control)
@@ -75,7 +75,7 @@ namespace SixLabors.ImageSharp
if (adjustedCount > 0)
{
- Shuffle4Channel(
+ Shuffle4(
source.Slice(0, adjustedCount),
dest.Slice(0, adjustedCount),
control);
@@ -86,8 +86,41 @@ namespace SixLabors.ImageSharp
}
}
+ ///
+ /// Pads then shuffles 8-bit integers within 128-bit lanes in
+ /// using the control and store the results in .
+ ///
+ /// The source span of bytes.
+ /// The destination span of bytes.
+ /// The byte control.
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public static unsafe void Pad3Shuffle4Reduce(
+ ref ReadOnlySpan source,
+ ref Span dest,
+ byte control)
+ {
+ if (Ssse3.IsSupported)
+ {
+ int remainder = ImageMaths.ModuloP2(source.Length, Vector128.Count);
+
+ int adjustedCount = source.Length - remainder;
+ int sourceSlice = (int)(adjustedCount * (3 / 4F));
+
+ if (adjustedCount > 0)
+ {
+ Pad3Shuffle4(
+ source.Slice(0, adjustedCount),
+ dest.Slice(0, adjustedCount),
+ control);
+
+ source = source.Slice(sourceSlice);
+ dest = dest.Slice(adjustedCount);
+ }
+ }
+ }
+
[MethodImpl(InliningOptions.ShortMethod)]
- private static void Shuffle4Channel(
+ private static void Shuffle4(
ReadOnlySpan source,
Span dest,
byte control)
@@ -165,7 +198,7 @@ namespace SixLabors.ImageSharp
}
[MethodImpl(InliningOptions.ShortMethod)]
- private static void Shuffle4Channel(
+ private static void Shuffle4(
ReadOnlySpan source,
Span dest,
byte control)
@@ -246,6 +279,43 @@ namespace SixLabors.ImageSharp
}
}
+ [MethodImpl(InliningOptions.ShortMethod)]
+ private static unsafe void Pad3Shuffle4(
+ ReadOnlySpan source,
+ Span dest,
+ byte control)
+ {
+ if (Ssse3.IsSupported)
+ {
+ Vector128 wMask = Vector128.Create(0xff000000u).AsByte();
+ Vector128 padMask = Vector128.Create(0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11, -1).AsByte();
+
+ Span bytes = stackalloc byte[Vector128.Count];
+ Shuffle.MmShuffleSpan(ref bytes, control);
+ Vector128 vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes));
+
+ fixed (byte* sBase = &source.GetPinnableReference())
+ fixed (byte* dBase = &dest.GetPinnableReference())
+ {
+ byte* s = sBase;
+ byte* d = dBase;
+
+ // TODO: Consider unrolling and shuffling 4 at a time using Ssse3.AlignRight
+ // See https://stackoverflow.com/questions/2973708/fast-24-bit-array-32-bit-array-conversion
+ for (int i = 0; i < source.Length; i += 16)
+ {
+ Vector128 vs0 = Sse2.LoadVector128(s);
+ Vector128 val = Sse2.Or(wMask, Ssse3.Shuffle(vs0, padMask));
+ val = Ssse3.Shuffle(val, vcm);
+ Sse2.Store(d, val);
+
+ s += 12;
+ d += 16;
+ }
+ }
+ }
+ }
+
///
/// Performs a multiplication and an addition of the .
///
diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs
index a4a40fb4f..81d77d655 100644
--- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs
+++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs
@@ -18,7 +18,7 @@ namespace SixLabors.ImageSharp
/// The destination span of floats.
/// The byte control.
[MethodImpl(InliningOptions.ShortMethod)]
- public static void Shuffle4Channel(
+ public static void Shuffle4(
ReadOnlySpan source,
Span dest,
byte control)
@@ -26,13 +26,13 @@ namespace SixLabors.ImageSharp
VerifyShuffleSpanInput(source, dest);
#if SUPPORTS_RUNTIME_INTRINSICS
- HwIntrinsics.Shuffle4ChannelReduce(ref source, ref dest, control);
+ HwIntrinsics.Shuffle4Reduce(ref source, ref dest, control);
#endif
// Deal with the remainder:
if (source.Length > 0)
{
- ShuffleRemainder4Channel(source, dest, control);
+ Shuffle4Remainder(source, dest, control);
}
}
@@ -44,7 +44,7 @@ namespace SixLabors.ImageSharp
/// The destination span of bytes.
/// The type of shuffle to perform.
[MethodImpl(InliningOptions.ShortMethod)]
- public static void Shuffle4Channel(
+ public static void Shuffle4(
ReadOnlySpan source,
Span dest,
TShuffle shuffle)
@@ -53,7 +53,7 @@ namespace SixLabors.ImageSharp
VerifyShuffleSpanInput(source, dest);
#if SUPPORTS_RUNTIME_INTRINSICS
- HwIntrinsics.Shuffle4ChannelReduce(ref source, ref dest, shuffle.Control);
+ HwIntrinsics.Shuffle4Reduce(ref source, ref dest, shuffle.Control);
#endif
// Deal with the remainder:
@@ -63,7 +63,26 @@ namespace SixLabors.ImageSharp
}
}
- public static void ShuffleRemainder4Channel(
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public static void Pad3Shuffle4(
+ ReadOnlySpan source,
+ Span dest,
+ byte control)
+ {
+ VerifyPadShuffleSpanInput(source, dest);
+
+#if SUPPORTS_RUNTIME_INTRINSICS
+ HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref dest, control);
+#endif
+
+ // Deal with the remainder:
+ if (source.Length > 0)
+ {
+ Pad3Shuffle4Remainder(source, dest, control);
+ }
+ }
+
+ public static void Shuffle4Remainder(
ReadOnlySpan source,
Span dest,
byte control)
@@ -81,6 +100,24 @@ namespace SixLabors.ImageSharp
}
}
+ public static void Pad3Shuffle4Remainder(
+ ReadOnlySpan source,
+ Span dest,
+ byte control)
+ {
+ ref byte sBase = ref MemoryMarshal.GetReference(source);
+ ref byte dBase = ref MemoryMarshal.GetReference(dest);
+ Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0);
+
+ for (int i = 0, j = 0; i < dest.Length; i += 4, j += 3)
+ {
+ Unsafe.Add(ref dBase, p0 + i) = Unsafe.Add(ref sBase, j);
+ Unsafe.Add(ref dBase, p1 + i) = Unsafe.Add(ref sBase, j + 1);
+ Unsafe.Add(ref dBase, p2 + i) = Unsafe.Add(ref sBase, j + 2);
+ Unsafe.Add(ref dBase, p3 + i) = byte.MaxValue;
+ }
+ }
+
[Conditional("DEBUG")]
private static void VerifyShuffleSpanInput(ReadOnlySpan source, Span dest)
where T : struct
@@ -96,6 +133,20 @@ namespace SixLabors.ImageSharp
"Input spans must be divisiable by 4!");
}
+ [Conditional("DEBUG")]
+ private static void VerifyPadShuffleSpanInput(ReadOnlySpan source, Span dest)
+ {
+ DebugGuard.IsTrue(
+ source.Length == (int)(dest.Length * 3 / 4F),
+ nameof(source),
+ "Input spans must be 3/4 the length of the output span!");
+
+ DebugGuard.IsTrue(
+ source.Length % 3 == 0,
+ nameof(source),
+ "Input spans must be divisiable by 3!");
+ }
+
public static class Shuffle
{
[MethodImpl(InliningOptions.ShortMethod)]
diff --git a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs
index ab9011a5c..5afd369be 100644
--- a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs
+++ b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs
@@ -27,7 +27,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils
///
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToArgb32(ReadOnlySpan source, Span dest)
- => SimdUtils.Shuffle4Channel(source, dest, default);
+ => SimdUtils.Shuffle4(source, dest, default);
///
/// Converts a representing a collection of
@@ -36,7 +36,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils
///
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToBgra32(ReadOnlySpan source, Span dest)
- => SimdUtils.Shuffle4Channel(source, dest, default);
+ => SimdUtils.Shuffle4(source, dest, default);
}
public static class FromArgb32
@@ -48,7 +48,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils
///
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToRgba32(ReadOnlySpan source, Span dest)
- => SimdUtils.Shuffle4Channel(source, dest, default);
+ => SimdUtils.Shuffle4(source, dest, default);
///
/// Converts a representing a collection of
@@ -57,7 +57,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils
///
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToBgra32(ReadOnlySpan source, Span dest)
- => SimdUtils.Shuffle4Channel(source, dest, default);
+ => SimdUtils.Shuffle4(source, dest, default);
}
public static class FromBgra32
@@ -69,7 +69,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils
///
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToArgb32(ReadOnlySpan source, Span dest)
- => SimdUtils.Shuffle4Channel(source, dest, default);
+ => SimdUtils.Shuffle4(source, dest, default);
///
/// Converts a representing a collection of
@@ -78,7 +78,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils
///
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToRgba32(ReadOnlySpan source, Span dest)
- => SimdUtils.Shuffle4Channel(source, dest, default);
+ => SimdUtils.Shuffle4(source, dest, default);
}
}
}
diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs
new file mode 100644
index 000000000..c529b2af1
--- /dev/null
+++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs
@@ -0,0 +1,67 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+using BenchmarkDotNet.Attributes;
+
+namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
+{
+ [Config(typeof(Config.HwIntrinsics_SSE_AVX))]
+ public class Pad3Shuffle4Channel
+ {
+ private byte[] source;
+ private byte[] destination;
+
+ [GlobalSetup]
+ public void Setup()
+ {
+ this.source = new byte[this.Count];
+ new Random(this.Count).NextBytes(this.source);
+ this.destination = new byte[this.Count];
+ }
+
+ [Params(96, 384, 768, 1536)]
+ public int Count { get; set; }
+
+ [Benchmark]
+ public void Shuffle4Channel()
+ {
+ SimdUtils.Shuffle4(this.source, this.destination, default);
+ }
+ }
+
+ // 2020-10-29
+ // ##########
+ //
+ // BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1)
+ // Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores
+ // .NET Core SDK=3.1.403
+ // [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
+ // 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
+ // 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
+ // 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
+ //
+ // Runtime=.NET Core 3.1
+ //
+ // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
+ // |---------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:|
+ // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 17.39 ns | 0.187 ns | 0.175 ns | 1.00 | 0.00 | - | - | - | - |
+ // | Shuffle4Channel | 2. AVX | Empty | 128 | 21.72 ns | 0.299 ns | 0.279 ns | 1.25 | 0.02 | - | - | - | - |
+ // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 128 | 18.10 ns | 0.346 ns | 0.289 ns | 1.04 | 0.02 | - | - | - | - |
+ // | | | | | | | | | | | | | |
+ // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 35.51 ns | 0.711 ns | 0.790 ns | 1.00 | 0.00 | - | - | - | - |
+ // | Shuffle4Channel | 2. AVX | Empty | 256 | 23.90 ns | 0.508 ns | 0.820 ns | 0.69 | 0.02 | - | - | - | - |
+ // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 256 | 20.40 ns | 0.133 ns | 0.111 ns | 0.57 | 0.01 | - | - | - | - |
+ // | | | | | | | | | | | | | |
+ // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 73.39 ns | 0.310 ns | 0.259 ns | 1.00 | 0.00 | - | - | - | - |
+ // | Shuffle4Channel | 2. AVX | Empty | 512 | 26.10 ns | 0.418 ns | 0.391 ns | 0.36 | 0.01 | - | - | - | - |
+ // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 512 | 27.59 ns | 0.556 ns | 0.571 ns | 0.38 | 0.01 | - | - | - | - |
+ // | | | | | | | | | | | | | |
+ // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 150.64 ns | 2.903 ns | 2.716 ns | 1.00 | 0.00 | - | - | - | - |
+ // | Shuffle4Channel | 2. AVX | Empty | 1024 | 38.67 ns | 0.801 ns | 1.889 ns | 0.24 | 0.02 | - | - | - | - |
+ // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 47.13 ns | 0.948 ns | 1.054 ns | 0.31 | 0.01 | - | - | - | - |
+ // | | | | | | | | | | | | | |
+ // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 315.29 ns | 5.206 ns | 6.583 ns | 1.00 | 0.00 | - | - | - | - |
+ // | Shuffle4Channel | 2. AVX | Empty | 2048 | 57.37 ns | 1.152 ns | 1.078 ns | 0.18 | 0.01 | - | - | - | - |
+ // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 65.75 ns | 1.198 ns | 1.600 ns | 0.21 | 0.01 | - | - | - | - |
+}
diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs
index 749859eac..db4947001 100644
--- a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs
+++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs
@@ -26,7 +26,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
[Benchmark]
public void Shuffle4Channel()
{
- SimdUtils.Shuffle4Channel(this.source, this.destination, default);
+ SimdUtils.Shuffle4(this.source, this.destination, default);
}
}
diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs
index 6f5b5001b..4a2512fea 100644
--- a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs
+++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs
@@ -27,7 +27,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
[Benchmark]
public void Shuffle4Channel()
{
- SimdUtils.Shuffle4Channel(this.source, this.destination, control);
+ SimdUtils.Shuffle4(this.source, this.destination, control);
}
}
diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs
index 06f61e617..1c456e5a2 100644
--- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs
+++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs
@@ -22,7 +22,7 @@ namespace SixLabors.ImageSharp.Tests.Common
TestShuffleFloat4Channel(
size,
- (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, control),
+ (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, control),
control);
}
@@ -49,43 +49,43 @@ namespace SixLabors.ImageSharp.Tests.Common
WXYZShuffle4 wxyz = default;
TestShuffleByte4Channel(
size,
- (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, wxyz),
+ (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wxyz),
wxyz.Control);
WZYXShuffle4 wzyx = default;
TestShuffleByte4Channel(
size,
- (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, wzyx),
+ (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wzyx),
wzyx.Control);
YZWXShuffle4 yzwx = default;
TestShuffleByte4Channel(
size,
- (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, yzwx),
+ (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yzwx),
yzwx.Control);
ZYXWShuffle4 zyxw = default;
TestShuffleByte4Channel(
size,
- (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, zyxw),
+ (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, zyxw),
zyxw.Control);
var xwyz = new DefaultShuffle4(2, 1, 3, 0);
TestShuffleByte4Channel(
size,
- (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, xwyz),
+ (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, xwyz),
xwyz.Control);
var yyyy = new DefaultShuffle4(1, 1, 1, 1);
TestShuffleByte4Channel(
size,
- (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, yyyy),
+ (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yyyy),
yyyy.Control);
var wwww = new DefaultShuffle4(3, 3, 3, 3);
TestShuffleByte4Channel(
size,
- (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, wwww),
+ (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wwww),
wwww.Control);
}
}
@@ -97,6 +97,29 @@ namespace SixLabors.ImageSharp.Tests.Common
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE);
}
+ [Theory]
+ [MemberData(nameof(ArraySizesDivisibleBy3))]
+ public void BulkPad3Shuffle4Channel(int count)
+ {
+ static void RunTest(string serialized)
+ {
+ // No need to test multiple shuffle controls as the
+ // pipeline is always the same.
+ int size = FeatureTestRunner.Deserialize(serialized);
+ byte control = default(WZYXShuffle4).Control;
+
+ TestPad3Shuffle4Channel(
+ size,
+ (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, control),
+ control);
+ }
+
+ FeatureTestRunner.RunWithHwIntrinsicsFeature(
+ RunTest,
+ count,
+ HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE);
+ }
+
private static void TestShuffleFloat4Channel(
int count,
Action, Memory> convert,
@@ -157,5 +180,37 @@ namespace SixLabors.ImageSharp.Tests.Common
Assert.Equal(expected, result);
}
+
+ private static void TestPad3Shuffle4Channel(
+ int count,
+ Action, Memory> convert,
+ byte control)
+ {
+ byte[] source = new byte[count];
+ new Random(count).NextBytes(source);
+
+ var result = new byte[(int)(count * (4 / 3F))];
+
+ byte[] expected = new byte[result.Length];
+
+ SimdUtils.Shuffle.InverseMmShuffle(
+ control,
+ out int p3,
+ out int p2,
+ out int p1,
+ out int p0);
+
+ for (int i = 0, j = 0; i < expected.Length; i += 4, j += 3)
+ {
+ expected[p0 + i] = source[j];
+ expected[p1 + i] = source[j + 1];
+ expected[p2 + i] = source[j + 2];
+ expected[p3 + i] = byte.MaxValue;
+ }
+
+ convert(source, result);
+
+ Assert.Equal(expected, result);
+ }
}
}
diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs
index bddadff4d..fe432107a 100644
--- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs
+++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs
@@ -163,6 +163,8 @@ namespace SixLabors.ImageSharp.Tests.Common
public static readonly TheoryData ArraySizesDivisibleBy8 = new TheoryData { 0, 8, 16, 1024 };
public static readonly TheoryData ArraySizesDivisibleBy4 = new TheoryData { 0, 4, 8, 28, 1020 };
+ public static readonly TheoryData ArraySizesDivisibleBy3 = new TheoryData { 0, 3, 9, 36, 957 };
+
public static readonly TheoryData ArraySizesDivisibleBy32 = new TheoryData { 0, 32, 512 };