mirror of https://github.com/SixLabors/ImageSharp
committed by
GitHub
20 changed files with 1077 additions and 307 deletions
@ -0,0 +1,165 @@ |
|||||
|
// Copyright (c) Six Labors.
|
||||
|
// Licensed under the Apache License, Version 2.0.
|
||||
|
|
||||
|
using System; |
||||
|
using System.Buffers.Binary; |
||||
|
using System.Runtime.CompilerServices; |
||||
|
using System.Runtime.InteropServices; |
||||
|
|
||||
|
namespace SixLabors.ImageSharp |
||||
|
{ |
||||
|
/// <summary>
|
||||
|
/// Defines the contract for methods that allow the shuffling of pixel components.
|
||||
|
/// Used for shuffling on platforms that do not support Hardware Intrinsics.
|
||||
|
/// </summary>
|
||||
|
internal interface IComponentShuffle |
||||
|
{ |
||||
|
/// <summary>
|
||||
|
/// Gets the shuffle control.
|
||||
|
/// </summary>
|
||||
|
byte Control { get; } |
||||
|
|
||||
|
/// <summary>
|
||||
|
/// Shuffle 8-bit integers within 128-bit lanes in <paramref name="source"/>
|
||||
|
/// using the control and store the results in <paramref name="dest"/>.
|
||||
|
/// </summary>
|
||||
|
/// <param name="source">The source span of bytes.</param>
|
||||
|
/// <param name="dest">The destination span of bytes.</param>
|
||||
|
void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest); |
||||
|
} |
||||
|
|
||||
|
internal readonly struct DefaultShuffle4 : IComponentShuffle |
||||
|
{ |
||||
|
public DefaultShuffle4(byte p3, byte p2, byte p1, byte p0) |
||||
|
: this(SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0)) |
||||
|
{ |
||||
|
} |
||||
|
|
||||
|
public DefaultShuffle4(byte control) => this.Control = control; |
||||
|
|
||||
|
public byte Control { get; } |
||||
|
|
||||
|
[MethodImpl(InliningOptions.ShortMethod)] |
||||
|
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest) |
||||
|
{ |
||||
|
ref byte sBase = ref MemoryMarshal.GetReference(source); |
||||
|
ref byte dBase = ref MemoryMarshal.GetReference(dest); |
||||
|
SimdUtils.Shuffle.InverseMmShuffle( |
||||
|
this.Control, |
||||
|
out int p3, |
||||
|
out int p2, |
||||
|
out int p1, |
||||
|
out int p0); |
||||
|
|
||||
|
for (int i = 0; i < source.Length; i += 4) |
||||
|
{ |
||||
|
Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i); |
||||
|
Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i); |
||||
|
Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i); |
||||
|
Unsafe.Add(ref dBase, i + 3) = Unsafe.Add(ref sBase, p3 + i); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
internal readonly struct WXYZShuffle4 : IComponentShuffle |
||||
|
{ |
||||
|
public byte Control => SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3); |
||||
|
|
||||
|
[MethodImpl(InliningOptions.ShortMethod)] |
||||
|
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest) |
||||
|
{ |
||||
|
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source); |
||||
|
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest); |
||||
|
ref uint sBase = ref MemoryMarshal.GetReference(s); |
||||
|
ref uint dBase = ref MemoryMarshal.GetReference(d); |
||||
|
|
||||
|
// The JIT can detect and optimize rotation idioms ROTL (Rotate Left)
|
||||
|
// and ROTR (Rotate Right) emitting efficient CPU instructions:
|
||||
|
// https://github.com/dotnet/coreclr/pull/1830
|
||||
|
for (int i = 0; i < s.Length; i++) |
||||
|
{ |
||||
|
uint packed = Unsafe.Add(ref sBase, i); |
||||
|
|
||||
|
// packed = [W Z Y X]
|
||||
|
// ROTL(8, packed) = [Z Y X W]
|
||||
|
Unsafe.Add(ref dBase, i) = (packed << 8) | (packed >> 24); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
internal readonly struct WZYXShuffle4 : IComponentShuffle |
||||
|
{ |
||||
|
public byte Control => SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3); |
||||
|
|
||||
|
[MethodImpl(InliningOptions.ShortMethod)] |
||||
|
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest) |
||||
|
{ |
||||
|
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source); |
||||
|
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest); |
||||
|
ref uint sBase = ref MemoryMarshal.GetReference(s); |
||||
|
ref uint dBase = ref MemoryMarshal.GetReference(d); |
||||
|
|
||||
|
for (int i = 0; i < s.Length; i++) |
||||
|
{ |
||||
|
uint packed = Unsafe.Add(ref sBase, i); |
||||
|
|
||||
|
// packed = [W Z Y X]
|
||||
|
// REVERSE(packedArgb) = [X Y Z W]
|
||||
|
Unsafe.Add(ref dBase, i) = BinaryPrimitives.ReverseEndianness(packed); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
internal readonly struct YZWXShuffle4 : IComponentShuffle |
||||
|
{ |
||||
|
public byte Control => SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1); |
||||
|
|
||||
|
[MethodImpl(InliningOptions.ShortMethod)] |
||||
|
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest) |
||||
|
{ |
||||
|
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source); |
||||
|
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest); |
||||
|
ref uint sBase = ref MemoryMarshal.GetReference(s); |
||||
|
ref uint dBase = ref MemoryMarshal.GetReference(d); |
||||
|
|
||||
|
for (int i = 0; i < s.Length; i++) |
||||
|
{ |
||||
|
uint packed = Unsafe.Add(ref sBase, i); |
||||
|
|
||||
|
// packed = [W Z Y X]
|
||||
|
// ROTR(8, packedArgb) = [Y Z W X]
|
||||
|
Unsafe.Add(ref dBase, i) = (packed >> 8) | (packed << 24); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
internal readonly struct ZYXWShuffle4 : IComponentShuffle |
||||
|
{ |
||||
|
public byte Control => SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2); |
||||
|
|
||||
|
[MethodImpl(InliningOptions.ShortMethod)] |
||||
|
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest) |
||||
|
{ |
||||
|
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source); |
||||
|
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest); |
||||
|
ref uint sBase = ref MemoryMarshal.GetReference(s); |
||||
|
ref uint dBase = ref MemoryMarshal.GetReference(d); |
||||
|
|
||||
|
for (int i = 0; i < s.Length; i++) |
||||
|
{ |
||||
|
uint packed = Unsafe.Add(ref sBase, i); |
||||
|
|
||||
|
// packed = [W Z Y X]
|
||||
|
// tmp1 = [W 0 Y 0]
|
||||
|
// tmp2 = [0 Z 0 X]
|
||||
|
// tmp3=ROTL(16, tmp2) = [0 X 0 Z]
|
||||
|
// tmp1 + tmp3 = [W X Y Z]
|
||||
|
uint tmp1 = packed & 0xFF00FF00; |
||||
|
uint tmp2 = packed & 0x00FF00FF; |
||||
|
uint tmp3 = (tmp2 << 16) | (tmp2 >> 16); |
||||
|
|
||||
|
Unsafe.Add(ref dBase, i) = tmp1 + tmp3; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,141 @@ |
|||||
|
// Copyright (c) Six Labors.
|
||||
|
// Licensed under the Apache License, Version 2.0.
|
||||
|
|
||||
|
using System; |
||||
|
using System.Diagnostics; |
||||
|
using System.Runtime.CompilerServices; |
||||
|
using System.Runtime.InteropServices; |
||||
|
|
||||
|
namespace SixLabors.ImageSharp |
||||
|
{ |
||||
|
internal static partial class SimdUtils |
||||
|
{ |
||||
|
/// <summary>
|
||||
|
/// Shuffle single-precision (32-bit) floating-point elements in <paramref name="source"/>
|
||||
|
/// using the control and store the results in <paramref name="dest"/>.
|
||||
|
/// </summary>
|
||||
|
/// <param name="source">The source span of floats.</param>
|
||||
|
/// <param name="dest">The destination span of floats.</param>
|
||||
|
/// <param name="control">The byte control.</param>
|
||||
|
[MethodImpl(InliningOptions.ShortMethod)] |
||||
|
public static void Shuffle4Channel( |
||||
|
ReadOnlySpan<float> source, |
||||
|
Span<float> dest, |
||||
|
byte control) |
||||
|
{ |
||||
|
VerifyShuffleSpanInput(source, dest); |
||||
|
|
||||
|
#if SUPPORTS_RUNTIME_INTRINSICS
|
||||
|
HwIntrinsics.Shuffle4ChannelReduce(ref source, ref dest, control); |
||||
|
#endif
|
||||
|
|
||||
|
// Deal with the remainder:
|
||||
|
if (source.Length > 0) |
||||
|
{ |
||||
|
ShuffleRemainder4Channel(source, dest, control); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
/// <summary>
|
||||
|
/// Shuffle 8-bit integers within 128-bit lanes in <paramref name="source"/>
|
||||
|
/// using the control and store the results in <paramref name="dest"/>.
|
||||
|
/// </summary>
|
||||
|
/// <param name="source">The source span of bytes.</param>
|
||||
|
/// <param name="dest">The destination span of bytes.</param>
|
||||
|
/// <param name="shuffle">The type of shuffle to perform.</param>
|
||||
|
[MethodImpl(InliningOptions.ShortMethod)] |
||||
|
public static void Shuffle4Channel<TShuffle>( |
||||
|
ReadOnlySpan<byte> source, |
||||
|
Span<byte> dest, |
||||
|
TShuffle shuffle) |
||||
|
where TShuffle : struct, IComponentShuffle |
||||
|
{ |
||||
|
VerifyShuffleSpanInput(source, dest); |
||||
|
|
||||
|
#if SUPPORTS_RUNTIME_INTRINSICS
|
||||
|
HwIntrinsics.Shuffle4ChannelReduce(ref source, ref dest, shuffle.Control); |
||||
|
#endif
|
||||
|
|
||||
|
// Deal with the remainder:
|
||||
|
if (source.Length > 0) |
||||
|
{ |
||||
|
shuffle.RunFallbackShuffle(source, dest); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public static void ShuffleRemainder4Channel( |
||||
|
ReadOnlySpan<float> source, |
||||
|
Span<float> dest, |
||||
|
byte control) |
||||
|
{ |
||||
|
ref float sBase = ref MemoryMarshal.GetReference(source); |
||||
|
ref float dBase = ref MemoryMarshal.GetReference(dest); |
||||
|
Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0); |
||||
|
|
||||
|
for (int i = 0; i < source.Length; i += 4) |
||||
|
{ |
||||
|
Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i); |
||||
|
Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i); |
||||
|
Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i); |
||||
|
Unsafe.Add(ref dBase, i + 3) = Unsafe.Add(ref sBase, p3 + i); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
[Conditional("DEBUG")] |
||||
|
private static void VerifyShuffleSpanInput<T>(ReadOnlySpan<T> source, Span<T> dest) |
||||
|
where T : struct |
||||
|
{ |
||||
|
DebugGuard.IsTrue( |
||||
|
source.Length == dest.Length, |
||||
|
nameof(source), |
||||
|
"Input spans must be of same length!"); |
||||
|
|
||||
|
DebugGuard.IsTrue( |
||||
|
source.Length % 4 == 0, |
||||
|
nameof(source), |
||||
|
"Input spans must be divisiable by 4!"); |
||||
|
} |
||||
|
|
||||
|
public static class Shuffle |
||||
|
{ |
||||
|
[MethodImpl(InliningOptions.ShortMethod)] |
||||
|
public static byte MmShuffle(byte p3, byte p2, byte p1, byte p0) |
||||
|
=> (byte)((p3 << 6) | (p2 << 4) | (p1 << 2) | p0); |
||||
|
|
||||
|
[MethodImpl(InliningOptions.ShortMethod)] |
||||
|
public static void MmShuffleSpan(ref Span<byte> span, byte control) |
||||
|
{ |
||||
|
InverseMmShuffle( |
||||
|
control, |
||||
|
out int p3, |
||||
|
out int p2, |
||||
|
out int p1, |
||||
|
out int p0); |
||||
|
|
||||
|
ref byte spanBase = ref MemoryMarshal.GetReference(span); |
||||
|
|
||||
|
for (int i = 0; i < span.Length; i += 4) |
||||
|
{ |
||||
|
Unsafe.Add(ref spanBase, i) = (byte)(p0 + i); |
||||
|
Unsafe.Add(ref spanBase, i + 1) = (byte)(p1 + i); |
||||
|
Unsafe.Add(ref spanBase, i + 2) = (byte)(p2 + i); |
||||
|
Unsafe.Add(ref spanBase, i + 3) = (byte)(p3 + i); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
[MethodImpl(InliningOptions.ShortMethod)] |
||||
|
public static void InverseMmShuffle( |
||||
|
byte control, |
||||
|
out int p3, |
||||
|
out int p2, |
||||
|
out int p1, |
||||
|
out int p0) |
||||
|
{ |
||||
|
p3 = control >> 6 & 0x3; |
||||
|
p2 = control >> 4 & 0x3; |
||||
|
p1 = control >> 2 & 0x3; |
||||
|
p0 = control >> 0 & 0x3; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,67 @@ |
|||||
|
// Copyright (c) Six Labors.
|
||||
|
// Licensed under the Apache License, Version 2.0.
|
||||
|
|
||||
|
using System; |
||||
|
using BenchmarkDotNet.Attributes; |
||||
|
|
||||
|
namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk |
||||
|
{ |
||||
|
[Config(typeof(Config.HwIntrinsics_SSE_AVX))] |
||||
|
public class ShuffleByte4Channel |
||||
|
{ |
||||
|
private byte[] source; |
||||
|
private byte[] destination; |
||||
|
|
||||
|
[GlobalSetup] |
||||
|
public void Setup() |
||||
|
{ |
||||
|
this.source = new byte[this.Count]; |
||||
|
new Random(this.Count).NextBytes(this.source); |
||||
|
this.destination = new byte[this.Count]; |
||||
|
} |
||||
|
|
||||
|
[Params(128, 256, 512, 1024, 2048)] |
||||
|
public int Count { get; set; } |
||||
|
|
||||
|
[Benchmark] |
||||
|
public void Shuffle4Channel() |
||||
|
{ |
||||
|
SimdUtils.Shuffle4Channel<WXYZShuffle4>(this.source, this.destination, default); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// 2020-10-29
|
||||
|
// ##########
|
||||
|
//
|
||||
|
// BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1)
|
||||
|
// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores
|
||||
|
// .NET Core SDK=3.1.403
|
||||
|
// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
|
||||
|
// 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
|
||||
|
// 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
|
||||
|
// 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
|
||||
|
//
|
||||
|
// Runtime=.NET Core 3.1
|
||||
|
//
|
||||
|
// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
|
||||
|
// |---------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:|
|
||||
|
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 17.39 ns | 0.187 ns | 0.175 ns | 1.00 | 0.00 | - | - | - | - |
|
||||
|
// | Shuffle4Channel | 2. AVX | Empty | 128 | 21.72 ns | 0.299 ns | 0.279 ns | 1.25 | 0.02 | - | - | - | - |
|
||||
|
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 128 | 18.10 ns | 0.346 ns | 0.289 ns | 1.04 | 0.02 | - | - | - | - |
|
||||
|
// | | | | | | | | | | | | | |
|
||||
|
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 35.51 ns | 0.711 ns | 0.790 ns | 1.00 | 0.00 | - | - | - | - |
|
||||
|
// | Shuffle4Channel | 2. AVX | Empty | 256 | 23.90 ns | 0.508 ns | 0.820 ns | 0.69 | 0.02 | - | - | - | - |
|
||||
|
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 256 | 20.40 ns | 0.133 ns | 0.111 ns | 0.57 | 0.01 | - | - | - | - |
|
||||
|
// | | | | | | | | | | | | | |
|
||||
|
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 73.39 ns | 0.310 ns | 0.259 ns | 1.00 | 0.00 | - | - | - | - |
|
||||
|
// | Shuffle4Channel | 2. AVX | Empty | 512 | 26.10 ns | 0.418 ns | 0.391 ns | 0.36 | 0.01 | - | - | - | - |
|
||||
|
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 512 | 27.59 ns | 0.556 ns | 0.571 ns | 0.38 | 0.01 | - | - | - | - |
|
||||
|
// | | | | | | | | | | | | | |
|
||||
|
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 150.64 ns | 2.903 ns | 2.716 ns | 1.00 | 0.00 | - | - | - | - |
|
||||
|
// | Shuffle4Channel | 2. AVX | Empty | 1024 | 38.67 ns | 0.801 ns | 1.889 ns | 0.24 | 0.02 | - | - | - | - |
|
||||
|
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 47.13 ns | 0.948 ns | 1.054 ns | 0.31 | 0.01 | - | - | - | - |
|
||||
|
// | | | | | | | | | | | | | |
|
||||
|
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 315.29 ns | 5.206 ns | 6.583 ns | 1.00 | 0.00 | - | - | - | - |
|
||||
|
// | Shuffle4Channel | 2. AVX | Empty | 2048 | 57.37 ns | 1.152 ns | 1.078 ns | 0.18 | 0.01 | - | - | - | - |
|
||||
|
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 65.75 ns | 1.198 ns | 1.600 ns | 0.21 | 0.01 | - | - | - | - |
|
||||
|
} |
||||
@ -0,0 +1,68 @@ |
|||||
|
// Copyright (c) Six Labors.
|
||||
|
// Licensed under the Apache License, Version 2.0.
|
||||
|
|
||||
|
using System; |
||||
|
using BenchmarkDotNet.Attributes; |
||||
|
using SixLabors.ImageSharp.Tests; |
||||
|
|
||||
|
namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk |
||||
|
{ |
||||
|
[Config(typeof(Config.HwIntrinsics_SSE_AVX))] |
||||
|
public class ShuffleFloat4Channel |
||||
|
{ |
||||
|
private static readonly byte control = default(WXYZShuffle4).Control; |
||||
|
private float[] source; |
||||
|
private float[] destination; |
||||
|
|
||||
|
[GlobalSetup] |
||||
|
public void Setup() |
||||
|
{ |
||||
|
this.source = new Random(this.Count).GenerateRandomFloatArray(this.Count, 0, 256); |
||||
|
this.destination = new float[this.Count]; |
||||
|
} |
||||
|
|
||||
|
[Params(128, 256, 512, 1024, 2048)] |
||||
|
public int Count { get; set; } |
||||
|
|
||||
|
[Benchmark] |
||||
|
public void Shuffle4Channel() |
||||
|
{ |
||||
|
SimdUtils.Shuffle4Channel(this.source, this.destination, control); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// 2020-10-29
|
||||
|
// ##########
|
||||
|
//
|
||||
|
// BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1)
|
||||
|
// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores
|
||||
|
// .NET Core SDK=3.1.403
|
||||
|
// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
|
||||
|
// 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
|
||||
|
// 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
|
||||
|
// 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
|
||||
|
//
|
||||
|
// Runtime=.NET Core 3.1
|
||||
|
//
|
||||
|
// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated |
|
||||
|
// |---------------- |------------------- |-------------------------------------------------- |------ |-----------:|----------:|----------:|------:|------:|------:|------:|----------:|
|
||||
|
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 63.647 ns | 0.5475 ns | 0.4853 ns | 1.00 | - | - | - | - |
|
||||
|
// | Shuffle4Channel | 2. AVX | Empty | 128 | 9.818 ns | 0.1457 ns | 0.1292 ns | 0.15 | - | - | - | - |
|
||||
|
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 128 | 15.267 ns | 0.1005 ns | 0.0940 ns | 0.24 | - | - | - | - |
|
||||
|
// | | | | | | | | | | | | |
|
||||
|
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 125.586 ns | 1.9312 ns | 1.8064 ns | 1.00 | - | - | - | - |
|
||||
|
// | Shuffle4Channel | 2. AVX | Empty | 256 | 15.878 ns | 0.1983 ns | 0.1758 ns | 0.13 | - | - | - | - |
|
||||
|
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 256 | 29.170 ns | 0.2925 ns | 0.2442 ns | 0.23 | - | - | - | - |
|
||||
|
// | | | | | | | | | | | | |
|
||||
|
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 263.859 ns | 2.6660 ns | 2.3634 ns | 1.00 | - | - | - | - |
|
||||
|
// | Shuffle4Channel | 2. AVX | Empty | 512 | 29.452 ns | 0.3334 ns | 0.3118 ns | 0.11 | - | - | - | - |
|
||||
|
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 512 | 52.912 ns | 0.1932 ns | 0.1713 ns | 0.20 | - | - | - | - |
|
||||
|
// | | | | | | | | | | | | |
|
||||
|
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 495.717 ns | 1.9850 ns | 1.8567 ns | 1.00 | - | - | - | - |
|
||||
|
// | Shuffle4Channel | 2. AVX | Empty | 1024 | 53.757 ns | 0.3212 ns | 0.2847 ns | 0.11 | - | - | - | - |
|
||||
|
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 107.815 ns | 1.6201 ns | 1.3528 ns | 0.22 | - | - | - | - |
|
||||
|
// | | | | | | | | | | | | |
|
||||
|
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 980.134 ns | 3.7407 ns | 3.1237 ns | 1.00 | - | - | - | - |
|
||||
|
// | Shuffle4Channel | 2. AVX | Empty | 2048 | 105.120 ns | 0.6140 ns | 0.5443 ns | 0.11 | - | - | - | - |
|
||||
|
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 216.473 ns | 2.3268 ns | 2.0627 ns | 0.22 | - | - | - | - |
|
||||
|
} |
||||
@ -0,0 +1,161 @@ |
|||||
|
// Copyright (c) Six Labors.
|
||||
|
// Licensed under the Apache License, Version 2.0.
|
||||
|
|
||||
|
using System; |
||||
|
using SixLabors.ImageSharp.Tests.TestUtilities; |
||||
|
using Xunit; |
||||
|
|
||||
|
namespace SixLabors.ImageSharp.Tests.Common |
||||
|
{ |
||||
|
public partial class SimdUtilsTests |
||||
|
{ |
||||
|
[Theory] |
||||
|
[MemberData(nameof(ArraySizesDivisibleBy4))] |
||||
|
public void BulkShuffleFloat4Channel(int count) |
||||
|
{ |
||||
|
static void RunTest(string serialized) |
||||
|
{ |
||||
|
// No need to test multiple shuffle controls as the
|
||||
|
// pipeline is always the same.
|
||||
|
int size = FeatureTestRunner.Deserialize<int>(serialized); |
||||
|
byte control = default(WZYXShuffle4).Control; |
||||
|
|
||||
|
TestShuffleFloat4Channel( |
||||
|
size, |
||||
|
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, control), |
||||
|
control); |
||||
|
} |
||||
|
|
||||
|
FeatureTestRunner.RunWithHwIntrinsicsFeature( |
||||
|
RunTest, |
||||
|
count, |
||||
|
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE); |
||||
|
} |
||||
|
|
||||
|
[Theory] |
||||
|
[MemberData(nameof(ArraySizesDivisibleBy4))] |
||||
|
public void BulkShuffleByte4Channel(int count) |
||||
|
{ |
||||
|
static void RunTest(string serialized) |
||||
|
{ |
||||
|
int size = FeatureTestRunner.Deserialize<int>(serialized); |
||||
|
foreach (var item in ArraySizesDivisibleBy4) |
||||
|
{ |
||||
|
// These cannot be expressed as a theory as you cannot
|
||||
|
// use RemoteExecutor within generic methods nor pass
|
||||
|
// IComponentShuffle to the generic utils method.
|
||||
|
foreach (var count in item) |
||||
|
{ |
||||
|
WXYZShuffle4 wxyz = default; |
||||
|
TestShuffleByte4Channel( |
||||
|
size, |
||||
|
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, wxyz), |
||||
|
wxyz.Control); |
||||
|
|
||||
|
WZYXShuffle4 wzyx = default; |
||||
|
TestShuffleByte4Channel( |
||||
|
size, |
||||
|
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, wzyx), |
||||
|
wzyx.Control); |
||||
|
|
||||
|
YZWXShuffle4 yzwx = default; |
||||
|
TestShuffleByte4Channel( |
||||
|
size, |
||||
|
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, yzwx), |
||||
|
yzwx.Control); |
||||
|
|
||||
|
ZYXWShuffle4 zyxw = default; |
||||
|
TestShuffleByte4Channel( |
||||
|
size, |
||||
|
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, zyxw), |
||||
|
zyxw.Control); |
||||
|
|
||||
|
var xwyz = new DefaultShuffle4(2, 1, 3, 0); |
||||
|
TestShuffleByte4Channel( |
||||
|
size, |
||||
|
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, xwyz), |
||||
|
xwyz.Control); |
||||
|
|
||||
|
var yyyy = new DefaultShuffle4(1, 1, 1, 1); |
||||
|
TestShuffleByte4Channel( |
||||
|
size, |
||||
|
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, yyyy), |
||||
|
yyyy.Control); |
||||
|
|
||||
|
var wwww = new DefaultShuffle4(3, 3, 3, 3); |
||||
|
TestShuffleByte4Channel( |
||||
|
size, |
||||
|
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, wwww), |
||||
|
wwww.Control); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
FeatureTestRunner.RunWithHwIntrinsicsFeature( |
||||
|
RunTest, |
||||
|
count, |
||||
|
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE); |
||||
|
} |
||||
|
|
||||
|
private static void TestShuffleFloat4Channel( |
||||
|
int count, |
||||
|
Action<Memory<float>, Memory<float>> convert, |
||||
|
byte control) |
||||
|
{ |
||||
|
float[] source = new Random(count).GenerateRandomFloatArray(count, 0, 256); |
||||
|
var result = new float[count]; |
||||
|
|
||||
|
float[] expected = new float[count]; |
||||
|
|
||||
|
SimdUtils.Shuffle.InverseMmShuffle( |
||||
|
control, |
||||
|
out int p3, |
||||
|
out int p2, |
||||
|
out int p1, |
||||
|
out int p0); |
||||
|
|
||||
|
for (int i = 0; i < expected.Length; i += 4) |
||||
|
{ |
||||
|
expected[i] = source[p0 + i]; |
||||
|
expected[i + 1] = source[p1 + i]; |
||||
|
expected[i + 2] = source[p2 + i]; |
||||
|
expected[i + 3] = source[p3 + i]; |
||||
|
} |
||||
|
|
||||
|
convert(source, result); |
||||
|
|
||||
|
Assert.Equal(expected, result, new ApproximateFloatComparer(1e-5F)); |
||||
|
} |
||||
|
|
||||
|
private static void TestShuffleByte4Channel( |
||||
|
int count, |
||||
|
Action<Memory<byte>, Memory<byte>> convert, |
||||
|
byte control) |
||||
|
{ |
||||
|
byte[] source = new byte[count]; |
||||
|
new Random(count).NextBytes(source); |
||||
|
var result = new byte[count]; |
||||
|
|
||||
|
byte[] expected = new byte[count]; |
||||
|
|
||||
|
SimdUtils.Shuffle.InverseMmShuffle( |
||||
|
control, |
||||
|
out int p3, |
||||
|
out int p2, |
||||
|
out int p1, |
||||
|
out int p0); |
||||
|
|
||||
|
for (int i = 0; i < expected.Length; i += 4) |
||||
|
{ |
||||
|
expected[i] = source[p0 + i]; |
||||
|
expected[i + 1] = source[p1 + i]; |
||||
|
expected[i + 2] = source[p2 + i]; |
||||
|
expected[i + 3] = source[p3 + i]; |
||||
|
} |
||||
|
|
||||
|
convert(source, result); |
||||
|
|
||||
|
Assert.Equal(expected, result); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
Loading…
Reference in new issue