Browse Source

Fast fallbacks

js/color-alpha-handling
James Jackson-South 6 years ago
parent
commit
50e30c3c42
  1. 40
      src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs
  2. 96
      src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs
  3. 74
      src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs
  4. 69
      src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs
  5. 118
      src/ImageSharp/PixelFormats/Utils/PixelConverter.cs
  6. 61
      tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs
  7. 71
      tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs
  8. 173
      tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs

40
src/ImageSharp/Common/Helpers/IComponentShuffle.cs → src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs

@ -30,13 +30,20 @@ namespace SixLabors.ImageSharp
internal readonly struct DefaultShuffle4 : IComponentShuffle
{
private readonly byte p3;
private readonly byte p2;
private readonly byte p1;
private readonly byte p0;
public DefaultShuffle4(byte p3, byte p2, byte p1, byte p0)
: this(SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0))
{
this.p3 = p3;
this.p2 = p2;
this.p1 = p1;
this.p0 = p0;
this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0);
}
public DefaultShuffle4(byte control) => this.Control = control;
public byte Control { get; }
[MethodImpl(InliningOptions.ShortMethod)]
@ -44,12 +51,11 @@ namespace SixLabors.ImageSharp
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);
SimdUtils.Shuffle.InverseMmShuffle(
this.Control,
out int p3,
out int p2,
out int p1,
out int p0);
int p3 = this.p3;
int p2 = this.p2;
int p1 = this.p1;
int p0 = this.p0;
for (int i = 0; i < source.Length; i += 4)
{
@ -63,7 +69,9 @@ namespace SixLabors.ImageSharp
internal readonly struct WXYZShuffle4 : IComponentShuffle
{
public byte Control => SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3);
private static readonly byte WXYZ = SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3);
public byte Control => WXYZ;
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
@ -89,7 +97,9 @@ namespace SixLabors.ImageSharp
internal readonly struct WZYXShuffle4 : IComponentShuffle
{
public byte Control => SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3);
private static readonly byte WZYX = SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3);
public byte Control => WZYX;
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
@ -112,7 +122,9 @@ namespace SixLabors.ImageSharp
internal readonly struct YZWXShuffle4 : IComponentShuffle
{
public byte Control => SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1);
private static readonly byte YZWX = SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1);
public byte Control => YZWX;
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
@ -135,7 +147,9 @@ namespace SixLabors.ImageSharp
internal readonly struct ZYXWShuffle4 : IComponentShuffle
{
public byte Control => SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2);
private static readonly byte ZYXW = SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2);
public byte Control => ZYXW;
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)

96
src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs

@ -0,0 +1,96 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace SixLabors.ImageSharp
{
/// <inheritdoc/>
internal interface IPad3Shuffle4 : IComponentShuffle
{
}
internal readonly struct DefaultPad3Shuffle4 : IPad3Shuffle4
{
private readonly byte p3;
private readonly byte p2;
private readonly byte p1;
private readonly byte p0;
public DefaultPad3Shuffle4(byte p3, byte p2, byte p1, byte p0)
{
this.p3 = p3;
this.p2 = p2;
this.p1 = p1;
this.p0 = p0;
this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0);
}
public byte Control { get; }
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);
int p3 = this.p3;
int p2 = this.p2;
int p1 = this.p1;
int p0 = this.p0;
Span<byte> temp = stackalloc byte[4];
ref byte t = ref MemoryMarshal.GetReference(temp);
ref uint tu = ref Unsafe.As<byte, uint>(ref t);
for (int i = 0, j = 0; i < source.Length; i += 3, j += 4)
{
ref var s = ref Unsafe.Add(ref sBase, i);
tu = Unsafe.As<byte, uint>(ref s) | 0xFF000000;
Unsafe.Add(ref dBase, j) = Unsafe.Add(ref t, p0);
Unsafe.Add(ref dBase, j + 1) = Unsafe.Add(ref t, p1);
Unsafe.Add(ref dBase, j + 2) = Unsafe.Add(ref t, p2);
Unsafe.Add(ref dBase, j + 3) = Unsafe.Add(ref t, p3);
}
}
}
internal readonly struct XYZWPad3Shuffle4 : IPad3Shuffle4
{
private static readonly byte XYZW = SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0);
public byte Control => XYZW;
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref byte rs = ref MemoryMarshal.GetReference(source);
ref byte rd = ref MemoryMarshal.GetReference(dest);
ref byte rsEnd = ref Unsafe.Add(ref rs, source.Length);
ref byte rsLoopEnd = ref Unsafe.Subtract(ref rsEnd, 4);
while (Unsafe.IsAddressLessThan(ref rs, ref rsLoopEnd))
{
Unsafe.As<byte, uint>(ref rd) = Unsafe.As<byte, uint>(ref rs) | 0xFF000000;
rs = ref Unsafe.Add(ref rs, 3);
rd = ref Unsafe.Add(ref rd, 4);
}
while (Unsafe.IsAddressLessThan(ref rs, ref rsEnd))
{
Unsafe.Add(ref rd, 0) = Unsafe.Add(ref rs, 0);
Unsafe.Add(ref rd, 1) = Unsafe.Add(ref rs, 1);
Unsafe.Add(ref rd, 2) = Unsafe.Add(ref rs, 2);
Unsafe.Add(ref rd, 3) = byte.MaxValue;
rs = ref Unsafe.Add(ref rs, 3);
rd = ref Unsafe.Add(ref rd, 4);
}
}
}
}

74
src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs

@ -0,0 +1,74 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace SixLabors.ImageSharp
{
/// <inheritdoc/>
internal interface IShuffle4Slice3 : IComponentShuffle
{
}
internal readonly struct DefaultShuffle4Slice3 : IShuffle4Slice3
{
private readonly byte p2;
private readonly byte p1;
private readonly byte p0;
public DefaultShuffle4Slice3(byte p3, byte p2, byte p1, byte p0)
{
this.p2 = p2;
this.p1 = p1;
this.p0 = p0;
this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0);
}
public byte Control { get; }
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);
int p2 = this.p2;
int p1 = this.p1;
int p0 = this.p0;
for (int i = 0, j = 0; i < dest.Length; i += 3, j += 4)
{
Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + j);
Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + j);
Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + j);
}
}
}
internal readonly struct XYZWShuffle4Slice3 : IShuffle4Slice3
{
private static readonly byte XYZW = SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0);
public byte Control => XYZW;
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
ref byte dBase = ref MemoryMarshal.GetReference(dest);
int n = source.Length / 4;
for (int i = 0, j = 0; i < n; i++, j += 3)
{
Unsafe.As<byte, Xyz24>(ref Unsafe.Add(ref dBase, j)) = Unsafe.As<uint, Xyz24>(ref Unsafe.Add(ref sBase, i));
}
}
}
[StructLayout(LayoutKind.Explicit, Size = 3)]
internal readonly struct Xyz24
{
}
}

69
src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs

@ -63,45 +63,61 @@ namespace SixLabors.ImageSharp
}
}
/// <summary>
/// Pads then shuffles 8-bit integers within 128-bit lanes in <paramref name="source"/>
/// using the control and store the results in <paramref name="dest"/>.
/// </summary>
/// <param name="source">The source span of bytes.</param>
/// <param name="dest">The destination span of bytes.</param>
/// <param name="shuffle">The type of shuffle to perform.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public static void Pad3Shuffle4(
public static void Pad3Shuffle4<TShuffle>(
ReadOnlySpan<byte> source,
Span<byte> dest,
byte control)
TShuffle shuffle)
where TShuffle : struct, IPad3Shuffle4
{
VerifyPad3Shuffle4SpanInput(source, dest);
#if SUPPORTS_RUNTIME_INTRINSICS
HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref dest, control);
HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref dest, shuffle.Control);
#endif
// Deal with the remainder:
if (source.Length > 0)
{
Pad3Shuffle4Remainder(source, dest, control);
shuffle.RunFallbackShuffle(source, dest);
}
}
/// <summary>
/// Shuffles then slices 8-bit integers within 128-bit lanes in <paramref name="source"/>
/// using the control and store the results in <paramref name="dest"/>.
/// </summary>
/// <param name="source">The source span of bytes.</param>
/// <param name="dest">The destination span of bytes.</param>
/// <param name="shuffle">The type of shuffle to perform.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public static void Shuffle4Slice3(
public static void Shuffle4Slice3<TShuffle>(
ReadOnlySpan<byte> source,
Span<byte> dest,
byte control)
TShuffle shuffle)
where TShuffle : struct, IShuffle4Slice3
{
VerifyShuffle4Slice3SpanInput(source, dest);
#if SUPPORTS_RUNTIME_INTRINSICS
HwIntrinsics.Shuffle4Slice3Reduce(ref source, ref dest, control);
HwIntrinsics.Shuffle4Slice3Reduce(ref source, ref dest, shuffle.Control);
#endif
// Deal with the remainder:
if (source.Length > 0)
{
Shuffle4Slice3Remainder(source, dest, control);
shuffle.RunFallbackShuffle(source, dest);
}
}
public static void Shuffle4Remainder(
private static void Shuffle4Remainder(
ReadOnlySpan<float> source,
Span<float> dest,
byte control)
@ -119,41 +135,6 @@ namespace SixLabors.ImageSharp
}
}
public static void Pad3Shuffle4Remainder(
ReadOnlySpan<byte> source,
Span<byte> dest,
byte control)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);
Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0);
for (int i = 0, j = 0; i < dest.Length; i += 4, j += 3)
{
Unsafe.Add(ref dBase, p0 + i) = Unsafe.Add(ref sBase, j);
Unsafe.Add(ref dBase, p1 + i) = Unsafe.Add(ref sBase, j + 1);
Unsafe.Add(ref dBase, p2 + i) = Unsafe.Add(ref sBase, j + 2);
Unsafe.Add(ref dBase, p3 + i) = byte.MaxValue;
}
}
public static void Shuffle4Slice3Remainder(
ReadOnlySpan<byte> source,
Span<byte> dest,
byte control)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);
Shuffle.InverseMmShuffle(control, out int _, out int p2, out int p1, out int p0);
for (int i = 0, j = 0; i < dest.Length; i += 3, j += 4)
{
Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + j);
Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + j);
Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + j);
}
}
[Conditional("DEBUG")]
private static void VerifyShuffleSpanInput<T>(ReadOnlySpan<T> source, Span<T> dest)
where T : struct

118
src/ImageSharp/PixelFormats/Utils/PixelConverter.cs

@ -37,6 +37,24 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToBgra32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4<ZYXWShuffle4>(source, dest, default);
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Rgba32"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Rgb24"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToRgb24(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4Slice3<XYZWShuffle4Slice3>(source, dest, default);
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Rgba32"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Bgr24"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToBgr24(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(3, 0, 1, 2));
}
public static class FromArgb32
@ -58,6 +76,24 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToBgra32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4<WZYXShuffle4>(source, dest, default);
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Argb32"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Rgb24"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToRgb24(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(0, 3, 2, 1));
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Argb32"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Bgr24"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToBgr24(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(0, 1, 2, 3));
}
public static class FromBgra32
@ -79,6 +115,88 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToRgba32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4<ZYXWShuffle4>(source, dest, default);
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Argb32"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Rgb24"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToRgb24(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(3, 0, 1, 2));
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Argb32"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Bgr24"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToBgr24(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4Slice3<XYZWShuffle4Slice3>(source, dest, default);
}
public static class FromRgb24
{
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Rgb24"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Rgba32"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToRgba32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Pad3Shuffle4<XYZWPad3Shuffle4>(source, dest, default);
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Rgba32"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Argb32"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToArgb32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(2, 1, 0, 3));
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Rgba32"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Bgra32"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToBgra32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(3, 0, 1, 2));
// TODO: Bgr24
}
public static class FromBgr24
{
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Bgr24"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Argb32"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToArgb32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(0, 1, 2, 3));
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Bgr24"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Bgra32"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToRgba32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(3, 0, 1, 2));
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Bgr24"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Bgra32"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToBgra32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Pad3Shuffle4<XYZWPad3Shuffle4>(source, dest, default);
// TODO: Rgb24
}
}
}

61
tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs

@ -9,7 +9,8 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
[Config(typeof(Config.HwIntrinsics_SSE_AVX))]
public class Pad3Shuffle4Channel
{
private static readonly byte Control = default(WXYZShuffle4).Control;
private static readonly DefaultPad3Shuffle4 Control = new DefaultPad3Shuffle4(1, 0, 3, 2);
private static readonly XYZWPad3Shuffle4 ControlFast = default;
private byte[] source;
private byte[] destination;
@ -18,7 +19,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
{
this.source = new byte[this.Count];
new Random(this.Count).NextBytes(this.source);
this.destination = new byte[(int)(this.Count * (4 / 3F))];
this.destination = new byte[this.Count * 4 / 3];
}
[Params(96, 384, 768, 1536)]
@ -29,6 +30,12 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
{
SimdUtils.Pad3Shuffle4(this.source, this.destination, Control);
}
[Benchmark]
public void Pad3Shuffle4FastFallback()
{
SimdUtils.Pad3Shuffle4(this.source, this.destination, ControlFast);
}
}
// 2020-10-30
@ -44,21 +51,37 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
//
// Runtime=.NET Core 3.1
//
// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
// |------------- |------------------- |-------------------------------------------------- |------ |----------:|----------:|---------:|------:|--------:|------:|------:|------:|----------:|
// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 71.84 ns | 1.491 ns | 3.146 ns | 1.00 | 0.00 | - | - | - | - |
// | Pad3Shuffle4 | 2. AVX | Empty | 96 | 23.14 ns | 0.186 ns | 0.165 ns | 0.33 | 0.02 | - | - | - | - |
// | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 96 | 22.94 ns | 0.127 ns | 0.112 ns | 0.33 | 0.02 | - | - | - | - |
// | | | | | | | | | | | | | |
// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 230.90 ns | 0.877 ns | 0.777 ns | 1.00 | 0.00 | - | - | - | - |
// | Pad3Shuffle4 | 2. AVX | Empty | 384 | 39.54 ns | 0.601 ns | 0.533 ns | 0.17 | 0.00 | - | - | - | - |
// | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 384 | 39.88 ns | 0.657 ns | 0.548 ns | 0.17 | 0.00 | - | - | - | - |
// | | | | | | | | | | | | | |
// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 451.67 ns | 2.932 ns | 2.448 ns | 1.00 | 0.00 | - | - | - | - |
// | Pad3Shuffle4 | 2. AVX | Empty | 768 | 60.79 ns | 1.200 ns | 1.561 ns | 0.13 | 0.00 | - | - | - | - |
// | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 768 | 62.30 ns | 0.469 ns | 0.416 ns | 0.14 | 0.00 | - | - | - | - |
// | | | | | | | | | | | | | |
// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 896.11 ns | 10.358 ns | 9.689 ns | 1.00 | 0.00 | - | - | - | - |
// | Pad3Shuffle4 | 2. AVX | Empty | 1536 | 109.72 ns | 2.149 ns | 2.300 ns | 0.12 | 0.00 | - | - | - | - |
// | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 108.70 ns | 0.994 ns | 0.881 ns | 0.12 | 0.00 | - | - | - | - |
// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
// |------------------------- |------------------- |-------------------------------------------------- |------ |------------:|----------:|----------:|------------:|------:|--------:|------:|------:|------:|----------:|
// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 120.64 ns | 7.190 ns | 21.200 ns | 114.26 ns | 1.00 | 0.00 | - | - | - | - |
// | Pad3Shuffle4 | 2. AVX | Empty | 96 | 23.63 ns | 0.175 ns | 0.155 ns | 23.65 ns | 0.15 | 0.01 | - | - | - | - |
// | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 96 | 25.25 ns | 0.356 ns | 0.298 ns | 25.27 ns | 0.17 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 14.80 ns | 0.358 ns | 1.032 ns | 14.64 ns | 1.00 | 0.00 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 2. AVX | Empty | 96 | 24.84 ns | 0.376 ns | 0.333 ns | 24.74 ns | 1.57 | 0.06 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 96 | 24.58 ns | 0.471 ns | 0.704 ns | 24.38 ns | 1.60 | 0.09 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 258.92 ns | 4.873 ns | 4.069 ns | 257.95 ns | 1.00 | 0.00 | - | - | - | - |
// | Pad3Shuffle4 | 2. AVX | Empty | 384 | 41.41 ns | 0.859 ns | 1.204 ns | 41.33 ns | 0.16 | 0.00 | - | - | - | - |
// | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 384 | 40.74 ns | 0.848 ns | 0.793 ns | 40.48 ns | 0.16 | 0.00 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 74.50 ns | 0.490 ns | 0.383 ns | 74.49 ns | 1.00 | 0.00 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 2. AVX | Empty | 384 | 40.74 ns | 0.624 ns | 0.584 ns | 40.72 ns | 0.55 | 0.01 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 384 | 38.28 ns | 0.534 ns | 0.417 ns | 38.22 ns | 0.51 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 503.91 ns | 6.466 ns | 6.048 ns | 501.58 ns | 1.00 | 0.00 | - | - | - | - |
// | Pad3Shuffle4 | 2. AVX | Empty | 768 | 62.86 ns | 0.332 ns | 0.277 ns | 62.80 ns | 0.12 | 0.00 | - | - | - | - |
// | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 768 | 64.59 ns | 0.469 ns | 0.415 ns | 64.62 ns | 0.13 | 0.00 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 110.51 ns | 0.592 ns | 0.554 ns | 110.33 ns | 1.00 | 0.00 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 2. AVX | Empty | 768 | 64.72 ns | 1.306 ns | 1.090 ns | 64.51 ns | 0.59 | 0.01 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 768 | 62.11 ns | 0.816 ns | 0.682 ns | 61.98 ns | 0.56 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 1,005.84 ns | 13.176 ns | 12.325 ns | 1,004.70 ns | 1.00 | 0.00 | - | - | - | - |
// | Pad3Shuffle4 | 2. AVX | Empty | 1536 | 110.05 ns | 0.256 ns | 0.214 ns | 110.04 ns | 0.11 | 0.00 | - | - | - | - |
// | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 110.23 ns | 0.545 ns | 0.483 ns | 110.09 ns | 0.11 | 0.00 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 220.37 ns | 1.601 ns | 1.419 ns | 220.13 ns | 1.00 | 0.00 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 2. AVX | Empty | 1536 | 111.54 ns | 2.173 ns | 2.901 ns | 111.27 ns | 0.51 | 0.01 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 110.23 ns | 0.456 ns | 0.427 ns | 110.25 ns | 0.50 | 0.00 | - | - | - | - |
}

71
tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs

@ -9,7 +9,8 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
[Config(typeof(Config.HwIntrinsics_SSE_AVX))]
public class Shuffle4Slice3Channel
{
private static readonly byte Control = default(WXYZShuffle4).Control;
private static readonly DefaultShuffle4Slice3 Control = new DefaultShuffle4Slice3(1, 0, 3, 2);
private static readonly XYZWShuffle4Slice3 ControlFast = default;
private byte[] source;
private byte[] destination;
@ -29,6 +30,12 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
{
SimdUtils.Shuffle4Slice3(this.source, this.destination, Control);
}
[Benchmark]
public void Shuffle4Slice3FastFallback()
{
SimdUtils.Shuffle4Slice3(this.source, this.destination, ControlFast);
}
}
// 2020-10-29
@ -44,25 +51,45 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
//
// Runtime=.NET Core 3.1
//
// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated |
// |--------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|------:|------:|------:|----------:|
// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 52.24 ns | 1.081 ns | 1.062 ns | 1.00 | - | - | - | - |
// | Shuffle4Slice3 | 2. AVX | Empty | 128 | 25.52 ns | 0.189 ns | 0.158 ns | 0.49 | - | - | - | - |
// | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 128 | 26.11 ns | 0.524 ns | 0.644 ns | 0.50 | - | - | - | - |
// | | | | | | | | | | | | |
// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 101.09 ns | 0.733 ns | 0.612 ns | 1.00 | - | - | - | - |
// | Shuffle4Slice3 | 2. AVX | Empty | 256 | 32.65 ns | 0.674 ns | 1.198 ns | 0.33 | - | - | - | - |
// | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 256 | 32.76 ns | 0.656 ns | 0.853 ns | 0.32 | - | - | - | - |
// | | | | | | | | | | | | |
// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 209.58 ns | 3.826 ns | 5.957 ns | 1.00 | - | - | - | - |
// | Shuffle4Slice3 | 2. AVX | Empty | 512 | 46.32 ns | 0.729 ns | 1.296 ns | 0.22 | - | - | - | - |
// | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 512 | 46.97 ns | 0.196 ns | 0.183 ns | 0.22 | - | - | - | - |
// | | | | | | | | | | | | |
// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 406.39 ns | 7.493 ns | 6.257 ns | 1.00 | - | - | - | - |
// | Shuffle4Slice3 | 2. AVX | Empty | 1024 | 74.53 ns | 1.509 ns | 1.678 ns | 0.18 | - | - | - | - |
// | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 74.04 ns | 0.703 ns | 0.657 ns | 0.18 | - | - | - | - |
// | | | | | | | | | | | | |
// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 796.80 ns | 6.476 ns | 5.741 ns | 1.00 | - | - | - | - |
// | Shuffle4Slice3 | 2. AVX | Empty | 2048 | 130.70 ns | 2.512 ns | 2.227 ns | 0.16 | - | - | - | - |
// | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 129.42 ns | 2.555 ns | 2.133 ns | 0.16 | - | - | - | - |
// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
// |--------------------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|----------:|------:|--------:|------:|------:|------:|----------:|
// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 56.44 ns | 2.843 ns | 8.382 ns | 56.70 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Slice3 | 2. AVX | Empty | 128 | 27.15 ns | 0.556 ns | 0.762 ns | 27.34 ns | 0.41 | 0.03 | - | - | - | - |
// | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 128 | 26.36 ns | 0.321 ns | 0.268 ns | 26.26 ns | 0.38 | 0.02 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 25.85 ns | 0.494 ns | 0.462 ns | 25.84 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 2. AVX | Empty | 128 | 26.15 ns | 0.113 ns | 0.106 ns | 26.16 ns | 1.01 | 0.02 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 128 | 25.57 ns | 0.078 ns | 0.061 ns | 25.56 ns | 0.99 | 0.02 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 97.47 ns | 0.327 ns | 0.289 ns | 97.35 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Slice3 | 2. AVX | Empty | 256 | 32.61 ns | 0.107 ns | 0.095 ns | 32.62 ns | 0.33 | 0.00 | - | - | - | - |
// | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 256 | 33.21 ns | 0.169 ns | 0.150 ns | 33.15 ns | 0.34 | 0.00 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 52.34 ns | 0.779 ns | 0.729 ns | 51.94 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 2. AVX | Empty | 256 | 32.16 ns | 0.111 ns | 0.104 ns | 32.16 ns | 0.61 | 0.01 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 256 | 33.61 ns | 0.342 ns | 0.319 ns | 33.62 ns | 0.64 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 210.74 ns | 3.825 ns | 5.956 ns | 207.70 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Slice3 | 2. AVX | Empty | 512 | 51.03 ns | 0.535 ns | 0.501 ns | 51.18 ns | 0.24 | 0.01 | - | - | - | - |
// | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 512 | 66.60 ns | 1.313 ns | 1.613 ns | 65.93 ns | 0.31 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 119.12 ns | 1.905 ns | 1.689 ns | 118.52 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 2. AVX | Empty | 512 | 50.33 ns | 0.382 ns | 0.339 ns | 50.41 ns | 0.42 | 0.01 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 512 | 49.25 ns | 0.555 ns | 0.492 ns | 49.26 ns | 0.41 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 423.55 ns | 4.891 ns | 4.336 ns | 423.27 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Slice3 | 2. AVX | Empty | 1024 | 77.13 ns | 1.355 ns | 2.264 ns | 76.19 ns | 0.19 | 0.01 | - | - | - | - |
// | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 79.39 ns | 0.103 ns | 0.086 ns | 79.37 ns | 0.19 | 0.00 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 226.57 ns | 2.930 ns | 2.598 ns | 226.10 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 2. AVX | Empty | 1024 | 80.25 ns | 1.647 ns | 2.082 ns | 80.98 ns | 0.35 | 0.01 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 84.99 ns | 1.234 ns | 1.155 ns | 85.60 ns | 0.38 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 794.96 ns | 1.735 ns | 1.538 ns | 795.15 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Slice3 | 2. AVX | Empty | 2048 | 128.41 ns | 0.417 ns | 0.390 ns | 128.24 ns | 0.16 | 0.00 | - | - | - | - |
// | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 127.24 ns | 0.294 ns | 0.229 ns | 127.23 ns | 0.16 | 0.00 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 382.97 ns | 1.064 ns | 0.831 ns | 382.87 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 2. AVX | Empty | 2048 | 126.93 ns | 0.382 ns | 0.339 ns | 126.94 ns | 0.33 | 0.00 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 149.36 ns | 1.875 ns | 1.754 ns | 149.33 ns | 0.39 | 0.00 | - | - | - | - |
}

173
tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs

@ -39,56 +39,51 @@ namespace SixLabors.ImageSharp.Tests.Common
static void RunTest(string serialized)
{
int size = FeatureTestRunner.Deserialize<int>(serialized);
foreach (var item in ArraySizesDivisibleBy4)
{
// These cannot be expressed as a theory as you cannot
// use RemoteExecutor within generic methods nor pass
// IComponentShuffle to the generic utils method.
foreach (var count in item)
{
WXYZShuffle4 wxyz = default;
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wxyz),
wxyz.Control);
WZYXShuffle4 wzyx = default;
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wzyx),
wzyx.Control);
YZWXShuffle4 yzwx = default;
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yzwx),
yzwx.Control);
ZYXWShuffle4 zyxw = default;
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4(s.Span, d.Span, zyxw),
zyxw.Control);
var xwyz = new DefaultShuffle4(2, 1, 3, 0);
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4(s.Span, d.Span, xwyz),
xwyz.Control);
var yyyy = new DefaultShuffle4(1, 1, 1, 1);
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yyyy),
yyyy.Control);
var wwww = new DefaultShuffle4(3, 3, 3, 3);
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wwww),
wwww.Control);
}
}
// These cannot be expressed as a theory as you cannot
// use RemoteExecutor within generic methods nor pass
// IComponentShuffle to the generic utils method.
WXYZShuffle4 wxyz = default;
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wxyz),
wxyz.Control);
WZYXShuffle4 wzyx = default;
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wzyx),
wzyx.Control);
YZWXShuffle4 yzwx = default;
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yzwx),
yzwx.Control);
ZYXWShuffle4 zyxw = default;
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4(s.Span, d.Span, zyxw),
zyxw.Control);
var xwyz = new DefaultShuffle4(2, 1, 3, 0);
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4(s.Span, d.Span, xwyz),
xwyz.Control);
var yyyy = new DefaultShuffle4(1, 1, 1, 1);
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yyyy),
yyyy.Control);
var wwww = new DefaultShuffle4(3, 3, 3, 3);
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wwww),
wwww.Control);
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
@ -103,21 +98,40 @@ namespace SixLabors.ImageSharp.Tests.Common
{
static void RunTest(string serialized)
{
// No need to test multiple shuffle controls as the
// pipeline is always the same.
int size = FeatureTestRunner.Deserialize<int>(serialized);
byte control = default(WZYXShuffle4).Control;
// These cannot be expressed as a theory as you cannot
// use RemoteExecutor within generic methods nor pass
// IComponentShuffle to the generic utils method.
XYZWPad3Shuffle4 xyzw = default;
TestPad3Shuffle4Channel(
size,
(s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, control),
control);
(s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, xyzw),
xyzw.Control);
var xwyz = new DefaultPad3Shuffle4(2, 1, 3, 0);
TestPad3Shuffle4Channel(
size,
(s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, xwyz),
xwyz.Control);
var yyyy = new DefaultPad3Shuffle4(1, 1, 1, 1);
TestPad3Shuffle4Channel(
size,
(s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, yyyy),
yyyy.Control);
var wwww = new DefaultPad3Shuffle4(3, 3, 3, 3);
TestPad3Shuffle4Channel(
size,
(s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, wwww),
wwww.Control);
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
count,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE);
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE);
}
[Theory]
@ -126,15 +140,34 @@ namespace SixLabors.ImageSharp.Tests.Common
{
static void RunTest(string serialized)
{
// No need to test multiple shuffle controls as the
// pipeline is always the same.
int size = FeatureTestRunner.Deserialize<int>(serialized);
byte control = default(WZYXShuffle4).Control;
// These cannot be expressed as a theory as you cannot
// use RemoteExecutor within generic methods nor pass
// IComponentShuffle to the generic utils method.
XYZWShuffle4Slice3 xyzw = default;
TestShuffle4Slice3Channel(
size,
(s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, control),
control);
(s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, xyzw),
xyzw.Control);
var xwyz = new DefaultShuffle4Slice3(2, 1, 3, 0);
TestShuffle4Slice3Channel(
size,
(s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, xwyz),
xwyz.Control);
var yyyy = new DefaultShuffle4Slice3(1, 1, 1, 1);
TestShuffle4Slice3Channel(
size,
(s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, yyyy),
yyyy.Control);
var wwww = new DefaultShuffle4Slice3(3, 3, 3, 3);
TestShuffle4Slice3Channel(
size,
(s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, wwww),
wwww.Control);
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
@ -212,7 +245,7 @@ namespace SixLabors.ImageSharp.Tests.Common
byte[] source = new byte[count];
new Random(count).NextBytes(source);
var result = new byte[(int)(count * (4 / 3D))];
var result = new byte[count * 4 / 3];
byte[] expected = new byte[result.Length];
@ -231,6 +264,20 @@ namespace SixLabors.ImageSharp.Tests.Common
expected[p3 + i] = byte.MaxValue;
}
Span<byte> temp = stackalloc byte[4];
for (int i = 0, j = 0; i < expected.Length; i += 4, j += 3)
{
temp[0] = source[j];
temp[1] = source[j + 1];
temp[2] = source[j + 2];
temp[3] = byte.MaxValue;
expected[i] = temp[p0];
expected[i + 1] = temp[p1];
expected[i + 2] = temp[p2];
expected[i + 3] = temp[p3];
}
convert(source, result);
for (int i = 0; i < expected.Length; i++)
@ -249,7 +296,7 @@ namespace SixLabors.ImageSharp.Tests.Common
byte[] source = new byte[count];
new Random(count).NextBytes(source);
var result = new byte[(int)(count * (3 / 4D))];
var result = new byte[count * 3 / 4];
byte[] expected = new byte[result.Length];

Loading…
Cancel
Save