mirror of https://github.com/SixLabors/ImageSharp
committed by
GitHub
75 changed files with 3210 additions and 904 deletions
@ -0,0 +1,7 @@ |
|||
<?xml version="1.0" encoding="utf-8" ?> |
|||
<RunSettings> |
|||
<RunConfiguration> |
|||
<!--Used in conjunction with ActiveIssueAttribute to skip tests with known issues--> |
|||
<TestCaseFilter>category!=failing</TestCaseFilter> |
|||
</RunConfiguration> |
|||
</RunSettings> |
|||
@ -0,0 +1,165 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Buffers.Binary; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
|
|||
namespace SixLabors.ImageSharp |
|||
{ |
|||
/// <summary>
|
|||
/// Defines the contract for methods that allow the shuffling of pixel components.
|
|||
/// Used for shuffling on platforms that do not support Hardware Intrinsics.
|
|||
/// </summary>
|
|||
internal interface IComponentShuffle |
|||
{ |
|||
/// <summary>
|
|||
/// Gets the shuffle control.
|
|||
/// </summary>
|
|||
byte Control { get; } |
|||
|
|||
/// <summary>
|
|||
/// Shuffle 8-bit integers within 128-bit lanes in <paramref name="source"/>
|
|||
/// using the control and store the results in <paramref name="dest"/>.
|
|||
/// </summary>
|
|||
/// <param name="source">The source span of bytes.</param>
|
|||
/// <param name="dest">The destination span of bytes.</param>
|
|||
void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest); |
|||
} |
|||
|
|||
internal readonly struct DefaultShuffle4 : IComponentShuffle |
|||
{ |
|||
public DefaultShuffle4(byte p3, byte p2, byte p1, byte p0) |
|||
: this(SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0)) |
|||
{ |
|||
} |
|||
|
|||
public DefaultShuffle4(byte control) => this.Control = control; |
|||
|
|||
public byte Control { get; } |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest) |
|||
{ |
|||
ref byte sBase = ref MemoryMarshal.GetReference(source); |
|||
ref byte dBase = ref MemoryMarshal.GetReference(dest); |
|||
SimdUtils.Shuffle.InverseMmShuffle( |
|||
this.Control, |
|||
out int p3, |
|||
out int p2, |
|||
out int p1, |
|||
out int p0); |
|||
|
|||
for (int i = 0; i < source.Length; i += 4) |
|||
{ |
|||
Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i); |
|||
Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i); |
|||
Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i); |
|||
Unsafe.Add(ref dBase, i + 3) = Unsafe.Add(ref sBase, p3 + i); |
|||
} |
|||
} |
|||
} |
|||
|
|||
internal readonly struct WXYZShuffle4 : IComponentShuffle |
|||
{ |
|||
public byte Control => SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3); |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest) |
|||
{ |
|||
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source); |
|||
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest); |
|||
ref uint sBase = ref MemoryMarshal.GetReference(s); |
|||
ref uint dBase = ref MemoryMarshal.GetReference(d); |
|||
|
|||
// The JIT can detect and optimize rotation idioms ROTL (Rotate Left)
|
|||
// and ROTR (Rotate Right) emitting efficient CPU instructions:
|
|||
// https://github.com/dotnet/coreclr/pull/1830
|
|||
for (int i = 0; i < s.Length; i++) |
|||
{ |
|||
uint packed = Unsafe.Add(ref sBase, i); |
|||
|
|||
// packed = [W Z Y X]
|
|||
// ROTL(8, packed) = [Z Y X W]
|
|||
Unsafe.Add(ref dBase, i) = (packed << 8) | (packed >> 24); |
|||
} |
|||
} |
|||
} |
|||
|
|||
internal readonly struct WZYXShuffle4 : IComponentShuffle |
|||
{ |
|||
public byte Control => SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3); |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest) |
|||
{ |
|||
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source); |
|||
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest); |
|||
ref uint sBase = ref MemoryMarshal.GetReference(s); |
|||
ref uint dBase = ref MemoryMarshal.GetReference(d); |
|||
|
|||
for (int i = 0; i < s.Length; i++) |
|||
{ |
|||
uint packed = Unsafe.Add(ref sBase, i); |
|||
|
|||
// packed = [W Z Y X]
|
|||
// REVERSE(packedArgb) = [X Y Z W]
|
|||
Unsafe.Add(ref dBase, i) = BinaryPrimitives.ReverseEndianness(packed); |
|||
} |
|||
} |
|||
} |
|||
|
|||
internal readonly struct YZWXShuffle4 : IComponentShuffle |
|||
{ |
|||
public byte Control => SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1); |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest) |
|||
{ |
|||
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source); |
|||
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest); |
|||
ref uint sBase = ref MemoryMarshal.GetReference(s); |
|||
ref uint dBase = ref MemoryMarshal.GetReference(d); |
|||
|
|||
for (int i = 0; i < s.Length; i++) |
|||
{ |
|||
uint packed = Unsafe.Add(ref sBase, i); |
|||
|
|||
// packed = [W Z Y X]
|
|||
// ROTR(8, packedArgb) = [Y Z W X]
|
|||
Unsafe.Add(ref dBase, i) = (packed >> 8) | (packed << 24); |
|||
} |
|||
} |
|||
} |
|||
|
|||
internal readonly struct ZYXWShuffle4 : IComponentShuffle |
|||
{ |
|||
public byte Control => SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2); |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest) |
|||
{ |
|||
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source); |
|||
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest); |
|||
ref uint sBase = ref MemoryMarshal.GetReference(s); |
|||
ref uint dBase = ref MemoryMarshal.GetReference(d); |
|||
|
|||
for (int i = 0; i < s.Length; i++) |
|||
{ |
|||
uint packed = Unsafe.Add(ref sBase, i); |
|||
|
|||
// packed = [W Z Y X]
|
|||
// tmp1 = [W 0 Y 0]
|
|||
// tmp2 = [0 Z 0 X]
|
|||
// tmp3=ROTL(16, tmp2) = [0 X 0 Z]
|
|||
// tmp1 + tmp3 = [W X Y Z]
|
|||
uint tmp1 = packed & 0xFF00FF00; |
|||
uint tmp2 = packed & 0x00FF00FF; |
|||
uint tmp3 = (tmp2 << 16) | (tmp2 >> 16); |
|||
|
|||
Unsafe.Add(ref dBase, i) = tmp1 + tmp3; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
@ -1,103 +0,0 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
|
|||
using System; |
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
using System.Runtime.Intrinsics; |
|||
using System.Runtime.Intrinsics.X86; |
|||
|
|||
namespace SixLabors.ImageSharp |
|||
{ |
|||
internal static partial class SimdUtils |
|||
{ |
|||
public static class Avx2Intrinsics |
|||
{ |
|||
private static ReadOnlySpan<byte> PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 }; |
|||
|
|||
/// <summary>
|
|||
/// <see cref="NormalizedFloatToByteSaturate"/> as many elements as possible, slicing them down (keeping the remainder).
|
|||
/// </summary>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
internal static void NormalizedFloatToByteSaturateReduce( |
|||
ref ReadOnlySpan<float> source, |
|||
ref Span<byte> dest) |
|||
{ |
|||
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); |
|||
|
|||
if (Avx2.IsSupported) |
|||
{ |
|||
int remainder = ImageMaths.ModuloP2(source.Length, Vector<byte>.Count); |
|||
int adjustedCount = source.Length - remainder; |
|||
|
|||
if (adjustedCount > 0) |
|||
{ |
|||
NormalizedFloatToByteSaturate( |
|||
source.Slice(0, adjustedCount), |
|||
dest.Slice(0, adjustedCount)); |
|||
|
|||
source = source.Slice(adjustedCount); |
|||
dest = dest.Slice(adjustedCount); |
|||
} |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Implementation of <see cref="SimdUtils.NormalizedFloatToByteSaturate"/>, which is faster on new .NET runtime.
|
|||
/// </summary>
|
|||
/// <remarks>
|
|||
/// Implementation is based on MagicScaler code:
|
|||
/// https://github.com/saucecontrol/PhotoSauce/blob/a9bd6e5162d2160419f0cf743fd4f536c079170b/src/MagicScaler/Magic/Processors/ConvertersFloat.cs#L453-L477
|
|||
/// </remarks>
|
|||
internal static void NormalizedFloatToByteSaturate( |
|||
ReadOnlySpan<float> source, |
|||
Span<byte> dest) |
|||
{ |
|||
VerifySpanInput(source, dest, Vector256<byte>.Count); |
|||
|
|||
int n = dest.Length / Vector256<byte>.Count; |
|||
|
|||
ref Vector256<float> sourceBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(source)); |
|||
ref Vector256<byte> destBase = ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(dest)); |
|||
|
|||
var maxBytes = Vector256.Create(255f); |
|||
ref byte maskBase = ref MemoryMarshal.GetReference(PermuteMaskDeinterleave8x32); |
|||
Vector256<int> mask = Unsafe.As<byte, Vector256<int>>(ref maskBase); |
|||
|
|||
for (int i = 0; i < n; i++) |
|||
{ |
|||
ref Vector256<float> s = ref Unsafe.Add(ref sourceBase, i * 4); |
|||
|
|||
Vector256<float> f0 = s; |
|||
Vector256<float> f1 = Unsafe.Add(ref s, 1); |
|||
Vector256<float> f2 = Unsafe.Add(ref s, 2); |
|||
Vector256<float> f3 = Unsafe.Add(ref s, 3); |
|||
|
|||
Vector256<int> w0 = ConvertToInt32(f0, maxBytes); |
|||
Vector256<int> w1 = ConvertToInt32(f1, maxBytes); |
|||
Vector256<int> w2 = ConvertToInt32(f2, maxBytes); |
|||
Vector256<int> w3 = ConvertToInt32(f3, maxBytes); |
|||
|
|||
Vector256<short> u0 = Avx2.PackSignedSaturate(w0, w1); |
|||
Vector256<short> u1 = Avx2.PackSignedSaturate(w2, w3); |
|||
Vector256<byte> b = Avx2.PackUnsignedSaturate(u0, u1); |
|||
b = Avx2.PermuteVar8x32(b.AsInt32(), mask).AsByte(); |
|||
|
|||
Unsafe.Add(ref destBase, i) = b; |
|||
} |
|||
} |
|||
|
|||
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|||
private static Vector256<int> ConvertToInt32(Vector256<float> vf, Vector256<float> scale) |
|||
{ |
|||
vf = Avx.Multiply(vf, scale); |
|||
return Avx.ConvertToVector256Int32(vf); |
|||
} |
|||
} |
|||
} |
|||
} |
|||
#endif
|
|||
@ -0,0 +1,529 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
using System; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
using System.Runtime.Intrinsics; |
|||
using System.Runtime.Intrinsics.X86; |
|||
|
|||
namespace SixLabors.ImageSharp |
|||
{ |
|||
internal static partial class SimdUtils |
|||
{ |
|||
public static class HwIntrinsics |
|||
{ |
|||
public static ReadOnlySpan<byte> PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 }; |
|||
|
|||
public static ReadOnlySpan<byte> PermuteMaskEvenOdd8x32 => new byte[] { 0, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0 }; |
|||
|
|||
/// <summary>
|
|||
/// Shuffle single-precision (32-bit) floating-point elements in <paramref name="source"/>
|
|||
/// using the control and store the results in <paramref name="dest"/>.
|
|||
/// </summary>
|
|||
/// <param name="source">The source span of floats.</param>
|
|||
/// <param name="dest">The destination span of floats.</param>
|
|||
/// <param name="control">The byte control.</param>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public static void Shuffle4ChannelReduce( |
|||
ref ReadOnlySpan<float> source, |
|||
ref Span<float> dest, |
|||
byte control) |
|||
{ |
|||
if (Avx.IsSupported || Sse.IsSupported) |
|||
{ |
|||
int remainder = Avx.IsSupported |
|||
? ImageMaths.ModuloP2(source.Length, Vector256<float>.Count) |
|||
: ImageMaths.ModuloP2(source.Length, Vector128<float>.Count); |
|||
|
|||
int adjustedCount = source.Length - remainder; |
|||
|
|||
if (adjustedCount > 0) |
|||
{ |
|||
Shuffle4Channel( |
|||
source.Slice(0, adjustedCount), |
|||
dest.Slice(0, adjustedCount), |
|||
control); |
|||
|
|||
source = source.Slice(adjustedCount); |
|||
dest = dest.Slice(adjustedCount); |
|||
} |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Shuffle 8-bit integers in a within 128-bit lanes in <paramref name="source"/>
|
|||
/// using the control and store the results in <paramref name="dest"/>.
|
|||
/// </summary>
|
|||
/// <param name="source">The source span of bytes.</param>
|
|||
/// <param name="dest">The destination span of bytes.</param>
|
|||
/// <param name="control">The byte control.</param>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public static void Shuffle4ChannelReduce( |
|||
ref ReadOnlySpan<byte> source, |
|||
ref Span<byte> dest, |
|||
byte control) |
|||
{ |
|||
if (Avx2.IsSupported || Ssse3.IsSupported) |
|||
{ |
|||
int remainder = Avx2.IsSupported |
|||
? ImageMaths.ModuloP2(source.Length, Vector256<byte>.Count) |
|||
: ImageMaths.ModuloP2(source.Length, Vector128<byte>.Count); |
|||
|
|||
int adjustedCount = source.Length - remainder; |
|||
|
|||
if (adjustedCount > 0) |
|||
{ |
|||
Shuffle4Channel( |
|||
source.Slice(0, adjustedCount), |
|||
dest.Slice(0, adjustedCount), |
|||
control); |
|||
|
|||
source = source.Slice(adjustedCount); |
|||
dest = dest.Slice(adjustedCount); |
|||
} |
|||
} |
|||
} |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
private static void Shuffle4Channel( |
|||
ReadOnlySpan<float> source, |
|||
Span<float> dest, |
|||
byte control) |
|||
{ |
|||
if (Avx.IsSupported) |
|||
{ |
|||
ref Vector256<float> sourceBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(source)); |
|||
|
|||
ref Vector256<float> destBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(dest)); |
|||
|
|||
int n = dest.Length / Vector256<float>.Count; |
|||
int m = ImageMaths.Modulo4(n); |
|||
int u = n - m; |
|||
|
|||
for (int i = 0; i < u; i += 4) |
|||
{ |
|||
ref Vector256<float> vd0 = ref Unsafe.Add(ref destBase, i); |
|||
ref Vector256<float> vs0 = ref Unsafe.Add(ref sourceBase, i); |
|||
|
|||
vd0 = Avx.Permute(vs0, control); |
|||
Unsafe.Add(ref vd0, 1) = Avx.Permute(Unsafe.Add(ref vs0, 1), control); |
|||
Unsafe.Add(ref vd0, 2) = Avx.Permute(Unsafe.Add(ref vs0, 2), control); |
|||
Unsafe.Add(ref vd0, 3) = Avx.Permute(Unsafe.Add(ref vs0, 3), control); |
|||
} |
|||
|
|||
if (m > 0) |
|||
{ |
|||
for (int i = u; i < n; i++) |
|||
{ |
|||
Unsafe.Add(ref destBase, i) = Avx.Permute(Unsafe.Add(ref sourceBase, i), control); |
|||
} |
|||
} |
|||
} |
|||
else |
|||
{ |
|||
// Sse
|
|||
ref Vector128<float> sourceBase = |
|||
ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(source)); |
|||
|
|||
ref Vector128<float> destBase = |
|||
ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(dest)); |
|||
|
|||
int n = dest.Length / Vector128<float>.Count; |
|||
int m = ImageMaths.Modulo4(n); |
|||
int u = n - m; |
|||
|
|||
for (int i = 0; i < u; i += 4) |
|||
{ |
|||
ref Vector128<float> vd0 = ref Unsafe.Add(ref destBase, i); |
|||
ref Vector128<float> vs0 = ref Unsafe.Add(ref sourceBase, i); |
|||
|
|||
vd0 = Sse.Shuffle(vs0, vs0, control); |
|||
|
|||
Vector128<float> vs1 = Unsafe.Add(ref vs0, 1); |
|||
Unsafe.Add(ref vd0, 1) = Sse.Shuffle(vs1, vs1, control); |
|||
|
|||
Vector128<float> vs2 = Unsafe.Add(ref vs0, 2); |
|||
Unsafe.Add(ref vd0, 2) = Sse.Shuffle(vs2, vs2, control); |
|||
|
|||
Vector128<float> vs3 = Unsafe.Add(ref vs0, 3); |
|||
Unsafe.Add(ref vd0, 3) = Sse.Shuffle(vs3, vs3, control); |
|||
} |
|||
|
|||
if (m > 0) |
|||
{ |
|||
for (int i = u; i < n; i++) |
|||
{ |
|||
Vector128<float> vs = Unsafe.Add(ref sourceBase, i); |
|||
Unsafe.Add(ref destBase, i) = Sse.Shuffle(vs, vs, control); |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
private static void Shuffle4Channel( |
|||
ReadOnlySpan<byte> source, |
|||
Span<byte> dest, |
|||
byte control) |
|||
{ |
|||
if (Avx2.IsSupported) |
|||
{ |
|||
// I've chosen to do this for convenience while we determine what
|
|||
// shuffle controls to add to the library.
|
|||
// We can add static ROS instances if need be in the future.
|
|||
Span<byte> bytes = stackalloc byte[Vector256<byte>.Count]; |
|||
Shuffle.MmShuffleSpan(ref bytes, control); |
|||
Vector256<byte> vcm = Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(bytes)); |
|||
|
|||
ref Vector256<byte> sourceBase = |
|||
ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(source)); |
|||
|
|||
ref Vector256<byte> destBase = |
|||
ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(dest)); |
|||
|
|||
int n = dest.Length / Vector256<byte>.Count; |
|||
int m = ImageMaths.Modulo4(n); |
|||
int u = n - m; |
|||
|
|||
for (int i = 0; i < u; i += 4) |
|||
{ |
|||
ref Vector256<byte> vs0 = ref Unsafe.Add(ref sourceBase, i); |
|||
ref Vector256<byte> vd0 = ref Unsafe.Add(ref destBase, i); |
|||
|
|||
vd0 = Avx2.Shuffle(vs0, vcm); |
|||
Unsafe.Add(ref vd0, 1) = Avx2.Shuffle(Unsafe.Add(ref vs0, 1), vcm); |
|||
Unsafe.Add(ref vd0, 2) = Avx2.Shuffle(Unsafe.Add(ref vs0, 2), vcm); |
|||
Unsafe.Add(ref vd0, 3) = Avx2.Shuffle(Unsafe.Add(ref vs0, 3), vcm); |
|||
} |
|||
|
|||
if (m > 0) |
|||
{ |
|||
for (int i = u; i < n; i++) |
|||
{ |
|||
Unsafe.Add(ref destBase, i) = Avx2.Shuffle(Unsafe.Add(ref sourceBase, i), vcm); |
|||
} |
|||
} |
|||
} |
|||
else |
|||
{ |
|||
// Ssse3
|
|||
Span<byte> bytes = stackalloc byte[Vector128<byte>.Count]; |
|||
Shuffle.MmShuffleSpan(ref bytes, control); |
|||
Vector128<byte> vcm = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(bytes)); |
|||
|
|||
ref Vector128<byte> sourceBase = |
|||
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(source)); |
|||
|
|||
ref Vector128<byte> destBase = |
|||
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(dest)); |
|||
|
|||
int n = dest.Length / Vector128<byte>.Count; |
|||
int m = ImageMaths.Modulo4(n); |
|||
int u = n - m; |
|||
|
|||
for (int i = 0; i < u; i += 4) |
|||
{ |
|||
ref Vector128<byte> vs0 = ref Unsafe.Add(ref sourceBase, i); |
|||
ref Vector128<byte> vd0 = ref Unsafe.Add(ref destBase, i); |
|||
|
|||
vd0 = Ssse3.Shuffle(vs0, vcm); |
|||
Unsafe.Add(ref vd0, 1) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 1), vcm); |
|||
Unsafe.Add(ref vd0, 2) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 2), vcm); |
|||
Unsafe.Add(ref vd0, 3) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 3), vcm); |
|||
} |
|||
|
|||
if (m > 0) |
|||
{ |
|||
for (int i = u; i < n; i++) |
|||
{ |
|||
Unsafe.Add(ref destBase, i) = Ssse3.Shuffle(Unsafe.Add(ref sourceBase, i), vcm); |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Performs a multiplication and an addition of the <see cref="Vector256{T}"/>.
|
|||
/// </summary>
|
|||
/// <param name="va">The vector to add to the intermediate result.</param>
|
|||
/// <param name="vm0">The first vector to multiply.</param>
|
|||
/// <param name="vm1">The second vector to multiply.</param>
|
|||
/// <returns>The <see cref="Vector256{T}"/>.</returns>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public static Vector256<float> MultiplyAdd( |
|||
in Vector256<float> va, |
|||
in Vector256<float> vm0, |
|||
in Vector256<float> vm1) |
|||
{ |
|||
if (Fma.IsSupported) |
|||
{ |
|||
return Fma.MultiplyAdd(vm1, vm0, va); |
|||
} |
|||
else |
|||
{ |
|||
return Avx.Add(Avx.Multiply(vm0, vm1), va); |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// <see cref="ByteToNormalizedFloat"/> as many elements as possible, slicing them down (keeping the remainder).
|
|||
/// </summary>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
internal static void ByteToNormalizedFloatReduce( |
|||
ref ReadOnlySpan<byte> source, |
|||
ref Span<float> dest) |
|||
{ |
|||
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); |
|||
|
|||
if (Avx2.IsSupported || Sse2.IsSupported) |
|||
{ |
|||
int remainder; |
|||
if (Avx2.IsSupported) |
|||
{ |
|||
remainder = ImageMaths.ModuloP2(source.Length, Vector256<byte>.Count); |
|||
} |
|||
else |
|||
{ |
|||
remainder = ImageMaths.ModuloP2(source.Length, Vector128<byte>.Count); |
|||
} |
|||
|
|||
int adjustedCount = source.Length - remainder; |
|||
|
|||
if (adjustedCount > 0) |
|||
{ |
|||
ByteToNormalizedFloat(source.Slice(0, adjustedCount), dest.Slice(0, adjustedCount)); |
|||
|
|||
source = source.Slice(adjustedCount); |
|||
dest = dest.Slice(adjustedCount); |
|||
} |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Implementation <see cref="SimdUtils.ByteToNormalizedFloat"/>, which is faster on new RyuJIT runtime.
|
|||
/// </summary>
|
|||
/// <remarks>
|
|||
/// Implementation is based on MagicScaler code:
|
|||
/// https://github.com/saucecontrol/PhotoSauce/blob/b5811908041200488aa18fdfd17df5fc457415dc/src/MagicScaler/Magic/Processors/ConvertersFloat.cs#L80-L182
|
|||
/// </remarks>
|
|||
internal static unsafe void ByteToNormalizedFloat( |
|||
ReadOnlySpan<byte> source, |
|||
Span<float> dest) |
|||
{ |
|||
if (Avx2.IsSupported) |
|||
{ |
|||
VerifySpanInput(source, dest, Vector256<byte>.Count); |
|||
|
|||
int n = dest.Length / Vector256<byte>.Count; |
|||
|
|||
byte* sourceBase = (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(source)); |
|||
|
|||
ref Vector256<float> destBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(dest)); |
|||
|
|||
var scale = Vector256.Create(1 / (float)byte.MaxValue); |
|||
|
|||
for (int i = 0; i < n; i++) |
|||
{ |
|||
int si = Vector256<byte>.Count * i; |
|||
Vector256<int> i0 = Avx2.ConvertToVector256Int32(sourceBase + si); |
|||
Vector256<int> i1 = Avx2.ConvertToVector256Int32(sourceBase + si + Vector256<int>.Count); |
|||
Vector256<int> i2 = Avx2.ConvertToVector256Int32(sourceBase + si + (Vector256<int>.Count * 2)); |
|||
Vector256<int> i3 = Avx2.ConvertToVector256Int32(sourceBase + si + (Vector256<int>.Count * 3)); |
|||
|
|||
Vector256<float> f0 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i0)); |
|||
Vector256<float> f1 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i1)); |
|||
Vector256<float> f2 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i2)); |
|||
Vector256<float> f3 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i3)); |
|||
|
|||
ref Vector256<float> d = ref Unsafe.Add(ref destBase, i * 4); |
|||
|
|||
d = f0; |
|||
Unsafe.Add(ref d, 1) = f1; |
|||
Unsafe.Add(ref d, 2) = f2; |
|||
Unsafe.Add(ref d, 3) = f3; |
|||
} |
|||
} |
|||
else |
|||
{ |
|||
// Sse
|
|||
VerifySpanInput(source, dest, Vector128<byte>.Count); |
|||
|
|||
int n = dest.Length / Vector128<byte>.Count; |
|||
|
|||
byte* sourceBase = (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(source)); |
|||
|
|||
ref Vector128<float> destBase = |
|||
ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(dest)); |
|||
|
|||
var scale = Vector128.Create(1 / (float)byte.MaxValue); |
|||
Vector128<byte> zero = Vector128<byte>.Zero; |
|||
|
|||
for (int i = 0; i < n; i++) |
|||
{ |
|||
int si = Vector128<byte>.Count * i; |
|||
|
|||
Vector128<int> i0, i1, i2, i3; |
|||
if (Sse41.IsSupported) |
|||
{ |
|||
i0 = Sse41.ConvertToVector128Int32(sourceBase + si); |
|||
i1 = Sse41.ConvertToVector128Int32(sourceBase + si + Vector128<int>.Count); |
|||
i2 = Sse41.ConvertToVector128Int32(sourceBase + si + (Vector128<int>.Count * 2)); |
|||
i3 = Sse41.ConvertToVector128Int32(sourceBase + si + (Vector128<int>.Count * 3)); |
|||
} |
|||
else |
|||
{ |
|||
Vector128<byte> b = Sse2.LoadVector128(sourceBase + si); |
|||
Vector128<short> s0 = Sse2.UnpackLow(b, zero).AsInt16(); |
|||
Vector128<short> s1 = Sse2.UnpackHigh(b, zero).AsInt16(); |
|||
|
|||
i0 = Sse2.UnpackLow(s0, zero.AsInt16()).AsInt32(); |
|||
i1 = Sse2.UnpackHigh(s0, zero.AsInt16()).AsInt32(); |
|||
i2 = Sse2.UnpackLow(s1, zero.AsInt16()).AsInt32(); |
|||
i3 = Sse2.UnpackHigh(s1, zero.AsInt16()).AsInt32(); |
|||
} |
|||
|
|||
Vector128<float> f0 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i0)); |
|||
Vector128<float> f1 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i1)); |
|||
Vector128<float> f2 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i2)); |
|||
Vector128<float> f3 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i3)); |
|||
|
|||
ref Vector128<float> d = ref Unsafe.Add(ref destBase, i * 4); |
|||
|
|||
d = f0; |
|||
Unsafe.Add(ref d, 1) = f1; |
|||
Unsafe.Add(ref d, 2) = f2; |
|||
Unsafe.Add(ref d, 3) = f3; |
|||
} |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// <see cref="NormalizedFloatToByteSaturate"/> as many elements as possible, slicing them down (keeping the remainder).
|
|||
/// </summary>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
internal static void NormalizedFloatToByteSaturateReduce( |
|||
ref ReadOnlySpan<float> source, |
|||
ref Span<byte> dest) |
|||
{ |
|||
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); |
|||
|
|||
if (Avx2.IsSupported || Sse2.IsSupported) |
|||
{ |
|||
int remainder; |
|||
if (Avx2.IsSupported) |
|||
{ |
|||
remainder = ImageMaths.ModuloP2(source.Length, Vector256<byte>.Count); |
|||
} |
|||
else |
|||
{ |
|||
remainder = ImageMaths.ModuloP2(source.Length, Vector128<byte>.Count); |
|||
} |
|||
|
|||
int adjustedCount = source.Length - remainder; |
|||
|
|||
if (adjustedCount > 0) |
|||
{ |
|||
NormalizedFloatToByteSaturate( |
|||
source.Slice(0, adjustedCount), |
|||
dest.Slice(0, adjustedCount)); |
|||
|
|||
source = source.Slice(adjustedCount); |
|||
dest = dest.Slice(adjustedCount); |
|||
} |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Implementation of <see cref="SimdUtils.NormalizedFloatToByteSaturate"/>, which is faster on new .NET runtime.
|
|||
/// </summary>
|
|||
/// <remarks>
|
|||
/// Implementation is based on MagicScaler code:
|
|||
/// https://github.com/saucecontrol/PhotoSauce/blob/b5811908041200488aa18fdfd17df5fc457415dc/src/MagicScaler/Magic/Processors/ConvertersFloat.cs#L541-L622
|
|||
/// </remarks>
|
|||
internal static void NormalizedFloatToByteSaturate( |
|||
ReadOnlySpan<float> source, |
|||
Span<byte> dest) |
|||
{ |
|||
if (Avx2.IsSupported) |
|||
{ |
|||
VerifySpanInput(source, dest, Vector256<byte>.Count); |
|||
|
|||
int n = dest.Length / Vector256<byte>.Count; |
|||
|
|||
ref Vector256<float> sourceBase = |
|||
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(source)); |
|||
|
|||
ref Vector256<byte> destBase = |
|||
ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(dest)); |
|||
|
|||
var scale = Vector256.Create((float)byte.MaxValue); |
|||
ref byte maskBase = ref MemoryMarshal.GetReference(PermuteMaskDeinterleave8x32); |
|||
Vector256<int> mask = Unsafe.As<byte, Vector256<int>>(ref maskBase); |
|||
|
|||
for (int i = 0; i < n; i++) |
|||
{ |
|||
ref Vector256<float> s = ref Unsafe.Add(ref sourceBase, i * 4); |
|||
|
|||
Vector256<float> f0 = Avx.Multiply(scale, s); |
|||
Vector256<float> f1 = Avx.Multiply(scale, Unsafe.Add(ref s, 1)); |
|||
Vector256<float> f2 = Avx.Multiply(scale, Unsafe.Add(ref s, 2)); |
|||
Vector256<float> f3 = Avx.Multiply(scale, Unsafe.Add(ref s, 3)); |
|||
|
|||
Vector256<int> w0 = Avx.ConvertToVector256Int32(f0); |
|||
Vector256<int> w1 = Avx.ConvertToVector256Int32(f1); |
|||
Vector256<int> w2 = Avx.ConvertToVector256Int32(f2); |
|||
Vector256<int> w3 = Avx.ConvertToVector256Int32(f3); |
|||
|
|||
Vector256<short> u0 = Avx2.PackSignedSaturate(w0, w1); |
|||
Vector256<short> u1 = Avx2.PackSignedSaturate(w2, w3); |
|||
Vector256<byte> b = Avx2.PackUnsignedSaturate(u0, u1); |
|||
b = Avx2.PermuteVar8x32(b.AsInt32(), mask).AsByte(); |
|||
|
|||
Unsafe.Add(ref destBase, i) = b; |
|||
} |
|||
} |
|||
else |
|||
{ |
|||
// Sse
|
|||
VerifySpanInput(source, dest, Vector128<byte>.Count); |
|||
|
|||
int n = dest.Length / Vector128<byte>.Count; |
|||
|
|||
ref Vector128<float> sourceBase = |
|||
ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(source)); |
|||
|
|||
ref Vector128<byte> destBase = |
|||
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(dest)); |
|||
|
|||
var scale = Vector128.Create((float)byte.MaxValue); |
|||
|
|||
for (int i = 0; i < n; i++) |
|||
{ |
|||
ref Vector128<float> s = ref Unsafe.Add(ref sourceBase, i * 4); |
|||
|
|||
Vector128<float> f0 = Sse.Multiply(scale, s); |
|||
Vector128<float> f1 = Sse.Multiply(scale, Unsafe.Add(ref s, 1)); |
|||
Vector128<float> f2 = Sse.Multiply(scale, Unsafe.Add(ref s, 2)); |
|||
Vector128<float> f3 = Sse.Multiply(scale, Unsafe.Add(ref s, 3)); |
|||
|
|||
Vector128<int> w0 = Sse2.ConvertToVector128Int32(f0); |
|||
Vector128<int> w1 = Sse2.ConvertToVector128Int32(f1); |
|||
Vector128<int> w2 = Sse2.ConvertToVector128Int32(f2); |
|||
Vector128<int> w3 = Sse2.ConvertToVector128Int32(f3); |
|||
|
|||
Vector128<short> u0 = Sse2.PackSignedSaturate(w0, w1); |
|||
Vector128<short> u1 = Sse2.PackSignedSaturate(w2, w3); |
|||
|
|||
Unsafe.Add(ref destBase, i) = Sse2.PackUnsignedSaturate(u0, u1); |
|||
} |
|||
} |
|||
} |
|||
} |
|||
} |
|||
} |
|||
#endif
|
|||
@ -0,0 +1,141 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Diagnostics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
|
|||
namespace SixLabors.ImageSharp |
|||
{ |
|||
internal static partial class SimdUtils |
|||
{ |
|||
/// <summary>
|
|||
/// Shuffle single-precision (32-bit) floating-point elements in <paramref name="source"/>
|
|||
/// using the control and store the results in <paramref name="dest"/>.
|
|||
/// </summary>
|
|||
/// <param name="source">The source span of floats.</param>
|
|||
/// <param name="dest">The destination span of floats.</param>
|
|||
/// <param name="control">The byte control.</param>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public static void Shuffle4Channel( |
|||
ReadOnlySpan<float> source, |
|||
Span<float> dest, |
|||
byte control) |
|||
{ |
|||
VerifyShuffleSpanInput(source, dest); |
|||
|
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
HwIntrinsics.Shuffle4ChannelReduce(ref source, ref dest, control); |
|||
#endif
|
|||
|
|||
// Deal with the remainder:
|
|||
if (source.Length > 0) |
|||
{ |
|||
ShuffleRemainder4Channel(source, dest, control); |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Shuffle 8-bit integers within 128-bit lanes in <paramref name="source"/>
|
|||
/// using the control and store the results in <paramref name="dest"/>.
|
|||
/// </summary>
|
|||
/// <param name="source">The source span of bytes.</param>
|
|||
/// <param name="dest">The destination span of bytes.</param>
|
|||
/// <param name="shuffle">The type of shuffle to perform.</param>
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public static void Shuffle4Channel<TShuffle>( |
|||
ReadOnlySpan<byte> source, |
|||
Span<byte> dest, |
|||
TShuffle shuffle) |
|||
where TShuffle : struct, IComponentShuffle |
|||
{ |
|||
VerifyShuffleSpanInput(source, dest); |
|||
|
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
HwIntrinsics.Shuffle4ChannelReduce(ref source, ref dest, shuffle.Control); |
|||
#endif
|
|||
|
|||
// Deal with the remainder:
|
|||
if (source.Length > 0) |
|||
{ |
|||
shuffle.RunFallbackShuffle(source, dest); |
|||
} |
|||
} |
|||
|
|||
public static void ShuffleRemainder4Channel( |
|||
ReadOnlySpan<float> source, |
|||
Span<float> dest, |
|||
byte control) |
|||
{ |
|||
ref float sBase = ref MemoryMarshal.GetReference(source); |
|||
ref float dBase = ref MemoryMarshal.GetReference(dest); |
|||
Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0); |
|||
|
|||
for (int i = 0; i < source.Length; i += 4) |
|||
{ |
|||
Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i); |
|||
Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i); |
|||
Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i); |
|||
Unsafe.Add(ref dBase, i + 3) = Unsafe.Add(ref sBase, p3 + i); |
|||
} |
|||
} |
|||
|
|||
[Conditional("DEBUG")] |
|||
private static void VerifyShuffleSpanInput<T>(ReadOnlySpan<T> source, Span<T> dest) |
|||
where T : struct |
|||
{ |
|||
DebugGuard.IsTrue( |
|||
source.Length == dest.Length, |
|||
nameof(source), |
|||
"Input spans must be of same length!"); |
|||
|
|||
DebugGuard.IsTrue( |
|||
source.Length % 4 == 0, |
|||
nameof(source), |
|||
"Input spans must be divisiable by 4!"); |
|||
} |
|||
|
|||
public static class Shuffle |
|||
{ |
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public static byte MmShuffle(byte p3, byte p2, byte p1, byte p0) |
|||
=> (byte)((p3 << 6) | (p2 << 4) | (p1 << 2) | p0); |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public static void MmShuffleSpan(ref Span<byte> span, byte control) |
|||
{ |
|||
InverseMmShuffle( |
|||
control, |
|||
out int p3, |
|||
out int p2, |
|||
out int p1, |
|||
out int p0); |
|||
|
|||
ref byte spanBase = ref MemoryMarshal.GetReference(span); |
|||
|
|||
for (int i = 0; i < span.Length; i += 4) |
|||
{ |
|||
Unsafe.Add(ref spanBase, i) = (byte)(p0 + i); |
|||
Unsafe.Add(ref spanBase, i + 1) = (byte)(p1 + i); |
|||
Unsafe.Add(ref spanBase, i + 2) = (byte)(p2 + i); |
|||
Unsafe.Add(ref spanBase, i + 3) = (byte)(p3 + i); |
|||
} |
|||
} |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
public static void InverseMmShuffle( |
|||
byte control, |
|||
out int p3, |
|||
out int p2, |
|||
out int p1, |
|||
out int p0) |
|||
{ |
|||
p3 = control >> 6 & 0x3; |
|||
p2 = control >> 4 & 0x3; |
|||
p1 = control >> 2 & 0x3; |
|||
p0 = control >> 0 & 0x3; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,21 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using BenchmarkDotNet.Attributes; |
|||
using SixLabors.ImageSharp.Formats.Jpeg.Components; |
|||
|
|||
namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations |
|||
{ |
|||
[Config(typeof(Config.HwIntrinsics_SSE_AVX))] |
|||
public class Block8x8F_AddInPlace |
|||
{ |
|||
[Benchmark] |
|||
public float AddInplace() |
|||
{ |
|||
float f = 42F; |
|||
Block8x8F b = default; |
|||
b.AddInPlace(f); |
|||
return f; |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,37 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using BenchmarkDotNet.Attributes; |
|||
using SixLabors.ImageSharp.Formats.Jpeg.Components; |
|||
|
|||
namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations |
|||
{ |
|||
[Config(typeof(Config.HwIntrinsics_SSE_AVX))] |
|||
public class Block8x8F_MultiplyInPlaceBlock |
|||
{ |
|||
private static readonly Block8x8F Source = Create8x8FloatData(); |
|||
|
|||
[Benchmark] |
|||
public void MultiplyInPlaceBlock() |
|||
{ |
|||
Block8x8F dest = default; |
|||
Source.MultiplyInPlace(ref dest); |
|||
} |
|||
|
|||
private static Block8x8F Create8x8FloatData() |
|||
{ |
|||
var result = new float[64]; |
|||
for (int i = 0; i < 8; i++) |
|||
{ |
|||
for (int j = 0; j < 8; j++) |
|||
{ |
|||
result[(i * 8) + j] = (i * 10) + j; |
|||
} |
|||
} |
|||
|
|||
var source = default(Block8x8F); |
|||
source.LoadFrom(result); |
|||
return source; |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,21 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using BenchmarkDotNet.Attributes; |
|||
using SixLabors.ImageSharp.Formats.Jpeg.Components; |
|||
|
|||
namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations |
|||
{ |
|||
[Config(typeof(Config.HwIntrinsics_SSE_AVX))] |
|||
public class Block8x8F_MultiplyInPlaceScalar |
|||
{ |
|||
[Benchmark] |
|||
public float MultiplyInPlaceScalar() |
|||
{ |
|||
float f = 42F; |
|||
Block8x8F b = default; |
|||
b.MultiplyInPlace(f); |
|||
return f; |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,37 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using BenchmarkDotNet.Attributes; |
|||
using SixLabors.ImageSharp.Formats.Jpeg.Components; |
|||
|
|||
namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations |
|||
{ |
|||
[Config(typeof(Config.HwIntrinsics_SSE_AVX))] |
|||
public class Block8x8F_Transpose |
|||
{ |
|||
private static readonly Block8x8F Source = Create8x8FloatData(); |
|||
|
|||
[Benchmark] |
|||
public void TransposeInto() |
|||
{ |
|||
var dest = default(Block8x8F); |
|||
Source.TransposeInto(ref dest); |
|||
} |
|||
|
|||
private static Block8x8F Create8x8FloatData() |
|||
{ |
|||
var result = new float[64]; |
|||
for (int i = 0; i < 8; i++) |
|||
{ |
|||
for (int j = 0; j < 8; j++) |
|||
{ |
|||
result[(i * 8) + j] = (i * 10) + j; |
|||
} |
|||
} |
|||
|
|||
var source = default(Block8x8F); |
|||
source.LoadFrom(result); |
|||
return source; |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,68 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
using BenchmarkDotNet.Attributes; |
|||
|
|||
namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk |
|||
{ |
|||
[Config(typeof(Config.ShortCore31))] |
|||
public class PremultiplyVector4 |
|||
{ |
|||
private static readonly Vector4[] Vectors = CreateVectors(); |
|||
|
|||
[Benchmark(Baseline = true)] |
|||
public void PremultiplyBaseline() |
|||
{ |
|||
ref Vector4 baseRef = ref MemoryMarshal.GetReference<Vector4>(Vectors); |
|||
|
|||
for (int i = 0; i < Vectors.Length; i++) |
|||
{ |
|||
ref Vector4 v = ref Unsafe.Add(ref baseRef, i); |
|||
Premultiply(ref v); |
|||
} |
|||
} |
|||
|
|||
[Benchmark] |
|||
public void Premultiply() |
|||
{ |
|||
Vector4Utilities.Premultiply(Vectors); |
|||
} |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
private static void Premultiply(ref Vector4 source) |
|||
{ |
|||
float w = source.W; |
|||
source *= w; |
|||
source.W = w; |
|||
} |
|||
|
|||
private static Vector4[] CreateVectors() |
|||
{ |
|||
var rnd = new Random(42); |
|||
return GenerateRandomVectorArray(rnd, 2048, 0, 1); |
|||
} |
|||
|
|||
private static Vector4[] GenerateRandomVectorArray(Random rnd, int length, float minVal, float maxVal) |
|||
{ |
|||
var values = new Vector4[length]; |
|||
|
|||
for (int i = 0; i < length; i++) |
|||
{ |
|||
ref Vector4 v = ref values[i]; |
|||
v.X = GetRandomFloat(rnd, minVal, maxVal); |
|||
v.Y = GetRandomFloat(rnd, minVal, maxVal); |
|||
v.Z = GetRandomFloat(rnd, minVal, maxVal); |
|||
v.W = GetRandomFloat(rnd, minVal, maxVal); |
|||
} |
|||
|
|||
return values; |
|||
} |
|||
|
|||
private static float GetRandomFloat(Random rnd, float minVal, float maxVal) |
|||
=> ((float)rnd.NextDouble() * (maxVal - minVal)) + minVal; |
|||
} |
|||
} |
|||
@ -0,0 +1,67 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using BenchmarkDotNet.Attributes; |
|||
|
|||
namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk |
|||
{ |
|||
[Config(typeof(Config.HwIntrinsics_SSE_AVX))] |
|||
public class ShuffleByte4Channel |
|||
{ |
|||
private byte[] source; |
|||
private byte[] destination; |
|||
|
|||
[GlobalSetup] |
|||
public void Setup() |
|||
{ |
|||
this.source = new byte[this.Count]; |
|||
new Random(this.Count).NextBytes(this.source); |
|||
this.destination = new byte[this.Count]; |
|||
} |
|||
|
|||
[Params(128, 256, 512, 1024, 2048)] |
|||
public int Count { get; set; } |
|||
|
|||
[Benchmark] |
|||
public void Shuffle4Channel() |
|||
{ |
|||
SimdUtils.Shuffle4Channel<WXYZShuffle4>(this.source, this.destination, default); |
|||
} |
|||
} |
|||
|
|||
// 2020-10-29
|
|||
// ##########
|
|||
//
|
|||
// BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1)
|
|||
// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores
|
|||
// .NET Core SDK=3.1.403
|
|||
// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
|
|||
// 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
|
|||
// 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
|
|||
// 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
|
|||
//
|
|||
// Runtime=.NET Core 3.1
|
|||
//
|
|||
// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
|
|||
// |---------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:|
|
|||
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 17.39 ns | 0.187 ns | 0.175 ns | 1.00 | 0.00 | - | - | - | - |
|
|||
// | Shuffle4Channel | 2. AVX | Empty | 128 | 21.72 ns | 0.299 ns | 0.279 ns | 1.25 | 0.02 | - | - | - | - |
|
|||
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 128 | 18.10 ns | 0.346 ns | 0.289 ns | 1.04 | 0.02 | - | - | - | - |
|
|||
// | | | | | | | | | | | | | |
|
|||
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 35.51 ns | 0.711 ns | 0.790 ns | 1.00 | 0.00 | - | - | - | - |
|
|||
// | Shuffle4Channel | 2. AVX | Empty | 256 | 23.90 ns | 0.508 ns | 0.820 ns | 0.69 | 0.02 | - | - | - | - |
|
|||
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 256 | 20.40 ns | 0.133 ns | 0.111 ns | 0.57 | 0.01 | - | - | - | - |
|
|||
// | | | | | | | | | | | | | |
|
|||
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 73.39 ns | 0.310 ns | 0.259 ns | 1.00 | 0.00 | - | - | - | - |
|
|||
// | Shuffle4Channel | 2. AVX | Empty | 512 | 26.10 ns | 0.418 ns | 0.391 ns | 0.36 | 0.01 | - | - | - | - |
|
|||
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 512 | 27.59 ns | 0.556 ns | 0.571 ns | 0.38 | 0.01 | - | - | - | - |
|
|||
// | | | | | | | | | | | | | |
|
|||
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 150.64 ns | 2.903 ns | 2.716 ns | 1.00 | 0.00 | - | - | - | - |
|
|||
// | Shuffle4Channel | 2. AVX | Empty | 1024 | 38.67 ns | 0.801 ns | 1.889 ns | 0.24 | 0.02 | - | - | - | - |
|
|||
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 47.13 ns | 0.948 ns | 1.054 ns | 0.31 | 0.01 | - | - | - | - |
|
|||
// | | | | | | | | | | | | | |
|
|||
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 315.29 ns | 5.206 ns | 6.583 ns | 1.00 | 0.00 | - | - | - | - |
|
|||
// | Shuffle4Channel | 2. AVX | Empty | 2048 | 57.37 ns | 1.152 ns | 1.078 ns | 0.18 | 0.01 | - | - | - | - |
|
|||
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 65.75 ns | 1.198 ns | 1.600 ns | 0.21 | 0.01 | - | - | - | - |
|
|||
} |
|||
@ -0,0 +1,68 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using BenchmarkDotNet.Attributes; |
|||
using SixLabors.ImageSharp.Tests; |
|||
|
|||
namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk |
|||
{ |
|||
[Config(typeof(Config.HwIntrinsics_SSE_AVX))] |
|||
public class ShuffleFloat4Channel |
|||
{ |
|||
private static readonly byte control = default(WXYZShuffle4).Control; |
|||
private float[] source; |
|||
private float[] destination; |
|||
|
|||
[GlobalSetup] |
|||
public void Setup() |
|||
{ |
|||
this.source = new Random(this.Count).GenerateRandomFloatArray(this.Count, 0, 256); |
|||
this.destination = new float[this.Count]; |
|||
} |
|||
|
|||
[Params(128, 256, 512, 1024, 2048)] |
|||
public int Count { get; set; } |
|||
|
|||
[Benchmark] |
|||
public void Shuffle4Channel() |
|||
{ |
|||
SimdUtils.Shuffle4Channel(this.source, this.destination, control); |
|||
} |
|||
} |
|||
|
|||
// 2020-10-29
|
|||
// ##########
|
|||
//
|
|||
// BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1)
|
|||
// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores
|
|||
// .NET Core SDK=3.1.403
|
|||
// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
|
|||
// 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
|
|||
// 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
|
|||
// 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
|
|||
//
|
|||
// Runtime=.NET Core 3.1
|
|||
//
|
|||
// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated |
|
|||
// |---------------- |------------------- |-------------------------------------------------- |------ |-----------:|----------:|----------:|------:|------:|------:|------:|----------:|
|
|||
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 63.647 ns | 0.5475 ns | 0.4853 ns | 1.00 | - | - | - | - |
|
|||
// | Shuffle4Channel | 2. AVX | Empty | 128 | 9.818 ns | 0.1457 ns | 0.1292 ns | 0.15 | - | - | - | - |
|
|||
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 128 | 15.267 ns | 0.1005 ns | 0.0940 ns | 0.24 | - | - | - | - |
|
|||
// | | | | | | | | | | | | |
|
|||
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 125.586 ns | 1.9312 ns | 1.8064 ns | 1.00 | - | - | - | - |
|
|||
// | Shuffle4Channel | 2. AVX | Empty | 256 | 15.878 ns | 0.1983 ns | 0.1758 ns | 0.13 | - | - | - | - |
|
|||
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 256 | 29.170 ns | 0.2925 ns | 0.2442 ns | 0.23 | - | - | - | - |
|
|||
// | | | | | | | | | | | | |
|
|||
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 263.859 ns | 2.6660 ns | 2.3634 ns | 1.00 | - | - | - | - |
|
|||
// | Shuffle4Channel | 2. AVX | Empty | 512 | 29.452 ns | 0.3334 ns | 0.3118 ns | 0.11 | - | - | - | - |
|
|||
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 512 | 52.912 ns | 0.1932 ns | 0.1713 ns | 0.20 | - | - | - | - |
|
|||
// | | | | | | | | | | | | |
|
|||
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 495.717 ns | 1.9850 ns | 1.8567 ns | 1.00 | - | - | - | - |
|
|||
// | Shuffle4Channel | 2. AVX | Empty | 1024 | 53.757 ns | 0.3212 ns | 0.2847 ns | 0.11 | - | - | - | - |
|
|||
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 107.815 ns | 1.6201 ns | 1.3528 ns | 0.22 | - | - | - | - |
|
|||
// | | | | | | | | | | | | |
|
|||
// | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 980.134 ns | 3.7407 ns | 3.1237 ns | 1.00 | - | - | - | - |
|
|||
// | Shuffle4Channel | 2. AVX | Empty | 2048 | 105.120 ns | 0.6140 ns | 0.5443 ns | 0.11 | - | - | - | - |
|
|||
// | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 216.473 ns | 2.3268 ns | 2.0627 ns | 0.22 | - | - | - | - |
|
|||
} |
|||
@ -0,0 +1,68 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Numerics; |
|||
using System.Runtime.CompilerServices; |
|||
using System.Runtime.InteropServices; |
|||
using BenchmarkDotNet.Attributes; |
|||
|
|||
namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk |
|||
{ |
|||
[Config(typeof(Config.ShortCore31))] |
|||
public class UnPremultiplyVector4 |
|||
{ |
|||
private static readonly Vector4[] Vectors = CreateVectors(); |
|||
|
|||
[Benchmark(Baseline = true)] |
|||
public void UnPremultiplyBaseline() |
|||
{ |
|||
ref Vector4 baseRef = ref MemoryMarshal.GetReference<Vector4>(Vectors); |
|||
|
|||
for (int i = 0; i < Vectors.Length; i++) |
|||
{ |
|||
ref Vector4 v = ref Unsafe.Add(ref baseRef, i); |
|||
UnPremultiply(ref v); |
|||
} |
|||
} |
|||
|
|||
[Benchmark] |
|||
public void UnPremultiply() |
|||
{ |
|||
Vector4Utilities.UnPremultiply(Vectors); |
|||
} |
|||
|
|||
[MethodImpl(InliningOptions.ShortMethod)] |
|||
private static void UnPremultiply(ref Vector4 source) |
|||
{ |
|||
float w = source.W; |
|||
source /= w; |
|||
source.W = w; |
|||
} |
|||
|
|||
private static Vector4[] CreateVectors() |
|||
{ |
|||
var rnd = new Random(42); |
|||
return GenerateRandomVectorArray(rnd, 2048, 0, 1); |
|||
} |
|||
|
|||
private static Vector4[] GenerateRandomVectorArray(Random rnd, int length, float minVal, float maxVal) |
|||
{ |
|||
var values = new Vector4[length]; |
|||
|
|||
for (int i = 0; i < length; i++) |
|||
{ |
|||
ref Vector4 v = ref values[i]; |
|||
v.X = GetRandomFloat(rnd, minVal, maxVal); |
|||
v.Y = GetRandomFloat(rnd, minVal, maxVal); |
|||
v.Z = GetRandomFloat(rnd, minVal, maxVal); |
|||
v.W = GetRandomFloat(rnd, minVal, maxVal); |
|||
} |
|||
|
|||
return values; |
|||
} |
|||
|
|||
private static float GetRandomFloat(Random rnd, float minVal, float maxVal) |
|||
=> ((float)rnd.NextDouble() * (maxVal - minVal)) + minVal; |
|||
} |
|||
} |
|||
@ -0,0 +1,84 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
using System.Runtime.Intrinsics.X86; |
|||
#endif
|
|||
using BenchmarkDotNet.Environments; |
|||
using BenchmarkDotNet.Jobs; |
|||
|
|||
namespace SixLabors.ImageSharp.Benchmarks |
|||
{ |
|||
public partial class Config |
|||
{ |
|||
private const string On = "1"; |
|||
private const string Off = "0"; |
|||
|
|||
// See https://github.com/SixLabors/ImageSharp/pull/1229#discussion_r440477861
|
|||
// * EnableHWIntrinsic
|
|||
// * EnableSSE
|
|||
// * EnableSSE2
|
|||
// * EnableAES
|
|||
// * EnablePCLMULQDQ
|
|||
// * EnableSSE3
|
|||
// * EnableSSSE3
|
|||
// * EnableSSE41
|
|||
// * EnableSSE42
|
|||
// * EnablePOPCNT
|
|||
// * EnableAVX
|
|||
// * EnableFMA
|
|||
// * EnableAVX2
|
|||
// * EnableBMI1
|
|||
// * EnableBMI2
|
|||
// * EnableLZCNT
|
|||
//
|
|||
// `FeatureSIMD` ends up impacting all SIMD support(including `System.Numerics`) but not things
|
|||
// like `LZCNT`, `BMI1`, or `BMI2`
|
|||
// `EnableSSE3_4` is a legacy switch that exists for compat and is basically the same as `EnableSSE3`
|
|||
private const string EnableAES = "COMPlus_EnableAES"; |
|||
private const string EnableAVX = "COMPlus_EnableAVX"; |
|||
private const string EnableAVX2 = "COMPlus_EnableAVX2"; |
|||
private const string EnableBMI1 = "COMPlus_EnableBMI1"; |
|||
private const string EnableBMI2 = "COMPlus_EnableBMI2"; |
|||
private const string EnableFMA = "COMPlus_EnableFMA"; |
|||
private const string EnableHWIntrinsic = "COMPlus_EnableHWIntrinsic"; |
|||
private const string EnableLZCNT = "COMPlus_EnableLZCNT"; |
|||
private const string EnablePCLMULQDQ = "COMPlus_EnablePCLMULQDQ"; |
|||
private const string EnablePOPCNT = "COMPlus_EnablePOPCNT"; |
|||
private const string EnableSSE = "COMPlus_EnableSSE"; |
|||
private const string EnableSSE2 = "COMPlus_EnableSSE2"; |
|||
private const string EnableSSE3 = "COMPlus_EnableSSE3"; |
|||
private const string EnableSSE3_4 = "COMPlus_EnableSSE3_4"; |
|||
private const string EnableSSE41 = "COMPlus_EnableSSE41"; |
|||
private const string EnableSSE42 = "COMPlus_EnableSSE42"; |
|||
private const string EnableSSSE3 = "COMPlus_EnableSSSE3"; |
|||
private const string FeatureSIMD = "COMPlus_FeatureSIMD"; |
|||
|
|||
public class HwIntrinsics_SSE_AVX : Config |
|||
{ |
|||
public HwIntrinsics_SSE_AVX() |
|||
{ |
|||
this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31) |
|||
.WithEnvironmentVariables( |
|||
new EnvironmentVariable(EnableHWIntrinsic, Off), |
|||
new EnvironmentVariable(FeatureSIMD, Off)) |
|||
.WithId("1. No HwIntrinsics").AsBaseline()); |
|||
|
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
if (Avx.IsSupported) |
|||
{ |
|||
this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31) |
|||
.WithId("2. AVX")); |
|||
} |
|||
|
|||
if (Sse.IsSupported) |
|||
{ |
|||
this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31) |
|||
.WithEnvironmentVariables(new EnvironmentVariable(EnableAVX, Off)) |
|||
.WithId("3. SSE")); |
|||
} |
|||
#endif
|
|||
} |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,161 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using SixLabors.ImageSharp.Tests.TestUtilities; |
|||
using Xunit; |
|||
|
|||
namespace SixLabors.ImageSharp.Tests.Common |
|||
{ |
|||
public partial class SimdUtilsTests |
|||
{ |
|||
[Theory] |
|||
[MemberData(nameof(ArraySizesDivisibleBy4))] |
|||
public void BulkShuffleFloat4Channel(int count) |
|||
{ |
|||
static void RunTest(string serialized) |
|||
{ |
|||
// No need to test multiple shuffle controls as the
|
|||
// pipeline is always the same.
|
|||
int size = FeatureTestRunner.Deserialize<int>(serialized); |
|||
byte control = default(WZYXShuffle4).Control; |
|||
|
|||
TestShuffleFloat4Channel( |
|||
size, |
|||
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, control), |
|||
control); |
|||
} |
|||
|
|||
FeatureTestRunner.RunWithHwIntrinsicsFeature( |
|||
RunTest, |
|||
count, |
|||
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE); |
|||
} |
|||
|
|||
[Theory] |
|||
[MemberData(nameof(ArraySizesDivisibleBy4))] |
|||
public void BulkShuffleByte4Channel(int count) |
|||
{ |
|||
static void RunTest(string serialized) |
|||
{ |
|||
int size = FeatureTestRunner.Deserialize<int>(serialized); |
|||
foreach (var item in ArraySizesDivisibleBy4) |
|||
{ |
|||
// These cannot be expressed as a theory as you cannot
|
|||
// use RemoteExecutor within generic methods nor pass
|
|||
// IComponentShuffle to the generic utils method.
|
|||
foreach (var count in item) |
|||
{ |
|||
WXYZShuffle4 wxyz = default; |
|||
TestShuffleByte4Channel( |
|||
size, |
|||
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, wxyz), |
|||
wxyz.Control); |
|||
|
|||
WZYXShuffle4 wzyx = default; |
|||
TestShuffleByte4Channel( |
|||
size, |
|||
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, wzyx), |
|||
wzyx.Control); |
|||
|
|||
YZWXShuffle4 yzwx = default; |
|||
TestShuffleByte4Channel( |
|||
size, |
|||
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, yzwx), |
|||
yzwx.Control); |
|||
|
|||
ZYXWShuffle4 zyxw = default; |
|||
TestShuffleByte4Channel( |
|||
size, |
|||
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, zyxw), |
|||
zyxw.Control); |
|||
|
|||
var xwyz = new DefaultShuffle4(2, 1, 3, 0); |
|||
TestShuffleByte4Channel( |
|||
size, |
|||
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, xwyz), |
|||
xwyz.Control); |
|||
|
|||
var yyyy = new DefaultShuffle4(1, 1, 1, 1); |
|||
TestShuffleByte4Channel( |
|||
size, |
|||
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, yyyy), |
|||
yyyy.Control); |
|||
|
|||
var wwww = new DefaultShuffle4(3, 3, 3, 3); |
|||
TestShuffleByte4Channel( |
|||
size, |
|||
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, wwww), |
|||
wwww.Control); |
|||
} |
|||
} |
|||
} |
|||
|
|||
FeatureTestRunner.RunWithHwIntrinsicsFeature( |
|||
RunTest, |
|||
count, |
|||
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE); |
|||
} |
|||
|
|||
private static void TestShuffleFloat4Channel( |
|||
int count, |
|||
Action<Memory<float>, Memory<float>> convert, |
|||
byte control) |
|||
{ |
|||
float[] source = new Random(count).GenerateRandomFloatArray(count, 0, 256); |
|||
var result = new float[count]; |
|||
|
|||
float[] expected = new float[count]; |
|||
|
|||
SimdUtils.Shuffle.InverseMmShuffle( |
|||
control, |
|||
out int p3, |
|||
out int p2, |
|||
out int p1, |
|||
out int p0); |
|||
|
|||
for (int i = 0; i < expected.Length; i += 4) |
|||
{ |
|||
expected[i] = source[p0 + i]; |
|||
expected[i + 1] = source[p1 + i]; |
|||
expected[i + 2] = source[p2 + i]; |
|||
expected[i + 3] = source[p3 + i]; |
|||
} |
|||
|
|||
convert(source, result); |
|||
|
|||
Assert.Equal(expected, result, new ApproximateFloatComparer(1e-5F)); |
|||
} |
|||
|
|||
private static void TestShuffleByte4Channel( |
|||
int count, |
|||
Action<Memory<byte>, Memory<byte>> convert, |
|||
byte control) |
|||
{ |
|||
byte[] source = new byte[count]; |
|||
new Random(count).NextBytes(source); |
|||
var result = new byte[count]; |
|||
|
|||
byte[] expected = new byte[count]; |
|||
|
|||
SimdUtils.Shuffle.InverseMmShuffle( |
|||
control, |
|||
out int p3, |
|||
out int p2, |
|||
out int p1, |
|||
out int p0); |
|||
|
|||
for (int i = 0; i < expected.Length; i += 4) |
|||
{ |
|||
expected[i] = source[p0 + i]; |
|||
expected[i + 1] = source[p1 + i]; |
|||
expected[i + 2] = source[p2 + i]; |
|||
expected[i + 3] = source[p3 + i]; |
|||
} |
|||
|
|||
convert(source, result); |
|||
|
|||
Assert.Equal(expected, result); |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,319 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Collections.Generic; |
|||
using System.Diagnostics; |
|||
using Microsoft.DotNet.RemoteExecutor; |
|||
using Xunit.Abstractions; |
|||
|
|||
namespace SixLabors.ImageSharp.Tests.TestUtilities |
|||
{ |
|||
/// <summary>
|
|||
/// Allows the testing against specific feature sets.
|
|||
/// </summary>
|
|||
public static class FeatureTestRunner |
|||
{ |
|||
private static readonly char[] SplitChars = new[] { ',', ' ' }; |
|||
|
|||
/// <summary>
|
|||
/// Allows the deserialization of parameters passed to the feature test.
|
|||
/// <remark>
|
|||
/// <para>
|
|||
/// This is required because <see cref="RemoteExecutor"/> does not allow
|
|||
/// marshalling of fields so we cannot pass a wrapped <see cref="Action{T}"/>
|
|||
/// allowing automatic deserialization.
|
|||
/// </para>
|
|||
/// </remark>
|
|||
/// </summary>
|
|||
/// <typeparam name="T">The type to deserialize to.</typeparam>
|
|||
/// <param name="value">The string value to deserialize.</param>
|
|||
/// <returns>The <see cref="T"/> value.</returns>
|
|||
public static T DeserializeForXunit<T>(string value) |
|||
where T : IXunitSerializable |
|||
=> BasicSerializer.Deserialize<T>(value); |
|||
|
|||
/// <summary>
|
|||
/// Allows the deserialization of types implementing <see cref="IConvertible"/>
|
|||
/// passed to the feature test.
|
|||
/// </summary>
|
|||
/// <param name="value">The string value to deserialize.</param>
|
|||
/// <returns>The <typeparamref name="T"/> value.</returns>
|
|||
public static T Deserialize<T>(string value) |
|||
where T : IConvertible |
|||
=> (T)Convert.ChangeType(value, typeof(T)); |
|||
|
|||
/// <summary>
|
|||
/// Runs the given test <paramref name="action"/> within an environment
|
|||
/// where the given <paramref name="intrinsics"/> features.
|
|||
/// </summary>
|
|||
/// <param name="action">The test action to run.</param>
|
|||
/// <param name="intrinsics">The intrinsics features.</param>
|
|||
public static void RunWithHwIntrinsicsFeature( |
|||
Action action, |
|||
HwIntrinsics intrinsics) |
|||
{ |
|||
if (!RemoteExecutor.IsSupported) |
|||
{ |
|||
return; |
|||
} |
|||
|
|||
foreach (KeyValuePair<HwIntrinsics, string> intrinsic in intrinsics.ToFeatureKeyValueCollection()) |
|||
{ |
|||
var processStartInfo = new ProcessStartInfo(); |
|||
if (intrinsic.Key != HwIntrinsics.AllowAll) |
|||
{ |
|||
processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; |
|||
|
|||
RemoteExecutor.Invoke( |
|||
action, |
|||
new RemoteInvokeOptions |
|||
{ |
|||
StartInfo = processStartInfo |
|||
}) |
|||
.Dispose(); |
|||
} |
|||
else |
|||
{ |
|||
// Since we are running using the default architecture there is no
|
|||
// point creating the overhead of running the action in a separate process.
|
|||
action(); |
|||
} |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Runs the given test <paramref name="action"/> within an environment
|
|||
/// where the given <paramref name="intrinsics"/> features.
|
|||
/// </summary>
|
|||
/// <param name="action">
|
|||
/// The test action to run.
|
|||
/// The parameter passed will be a string representing the currently testing <see cref="HwIntrinsics"/>.</param>
|
|||
/// <param name="intrinsics">The intrinsics features.</param>
|
|||
public static void RunWithHwIntrinsicsFeature( |
|||
Action<string> action, |
|||
HwIntrinsics intrinsics) |
|||
{ |
|||
if (!RemoteExecutor.IsSupported) |
|||
{ |
|||
return; |
|||
} |
|||
|
|||
foreach (KeyValuePair<HwIntrinsics, string> intrinsic in intrinsics.ToFeatureKeyValueCollection()) |
|||
{ |
|||
var processStartInfo = new ProcessStartInfo(); |
|||
if (intrinsic.Key != HwIntrinsics.AllowAll) |
|||
{ |
|||
processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; |
|||
|
|||
RemoteExecutor.Invoke( |
|||
action, |
|||
intrinsic.Key.ToString(), |
|||
new RemoteInvokeOptions |
|||
{ |
|||
StartInfo = processStartInfo |
|||
}) |
|||
.Dispose(); |
|||
} |
|||
else |
|||
{ |
|||
// Since we are running using the default architecture there is no
|
|||
// point creating the overhead of running the action in a separate process.
|
|||
action(intrinsic.Key.ToString()); |
|||
} |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Runs the given test <paramref name="action"/> within an environment
|
|||
/// where the given <paramref name="intrinsics"/> features.
|
|||
/// </summary>
|
|||
/// <param name="action">The test action to run.</param>
|
|||
/// <param name="intrinsics">The intrinsics features.</param>
|
|||
/// <param name="serializable">The value to pass as a parameter to the test action.</param>
|
|||
public static void RunWithHwIntrinsicsFeature<T>( |
|||
Action<string> action, |
|||
HwIntrinsics intrinsics, |
|||
T serializable) |
|||
where T : IXunitSerializable |
|||
{ |
|||
if (!RemoteExecutor.IsSupported) |
|||
{ |
|||
return; |
|||
} |
|||
|
|||
foreach (KeyValuePair<HwIntrinsics, string> intrinsic in intrinsics.ToFeatureKeyValueCollection()) |
|||
{ |
|||
var processStartInfo = new ProcessStartInfo(); |
|||
if (intrinsic.Key != HwIntrinsics.AllowAll) |
|||
{ |
|||
processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; |
|||
|
|||
RemoteExecutor.Invoke( |
|||
action, |
|||
BasicSerializer.Serialize(serializable), |
|||
new RemoteInvokeOptions |
|||
{ |
|||
StartInfo = processStartInfo |
|||
}) |
|||
.Dispose(); |
|||
} |
|||
else |
|||
{ |
|||
// Since we are running using the default architecture there is no
|
|||
// point creating the overhead of running the action in a separate process.
|
|||
action(BasicSerializer.Serialize(serializable)); |
|||
} |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Runs the given test <paramref name="action"/> within an environment
|
|||
/// where the given <paramref name="intrinsics"/> features.
|
|||
/// </summary>
|
|||
/// <param name="action">The test action to run.</param>
|
|||
/// <param name="intrinsics">The intrinsics features.</param>
|
|||
/// <param name="serializable">The value to pass as a parameter to the test action.</param>
|
|||
public static void RunWithHwIntrinsicsFeature<T>( |
|||
Action<string, string> action, |
|||
HwIntrinsics intrinsics, |
|||
T serializable) |
|||
where T : IXunitSerializable |
|||
{ |
|||
if (!RemoteExecutor.IsSupported) |
|||
{ |
|||
return; |
|||
} |
|||
|
|||
foreach (KeyValuePair<HwIntrinsics, string> intrinsic in intrinsics.ToFeatureKeyValueCollection()) |
|||
{ |
|||
var processStartInfo = new ProcessStartInfo(); |
|||
if (intrinsic.Key != HwIntrinsics.AllowAll) |
|||
{ |
|||
processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; |
|||
|
|||
RemoteExecutor.Invoke( |
|||
action, |
|||
BasicSerializer.Serialize(serializable), |
|||
intrinsic.Key.ToString(), |
|||
new RemoteInvokeOptions |
|||
{ |
|||
StartInfo = processStartInfo |
|||
}) |
|||
.Dispose(); |
|||
} |
|||
else |
|||
{ |
|||
// Since we are running using the default architecture there is no
|
|||
// point creating the overhead of running the action in a separate process.
|
|||
action(BasicSerializer.Serialize(serializable), intrinsic.Key.ToString()); |
|||
} |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Runs the given test <paramref name="action"/> within an environment
|
|||
/// where the given <paramref name="intrinsics"/> features.
|
|||
/// </summary>
|
|||
/// <param name="action">The test action to run.</param>
|
|||
/// <param name="serializable">The value to pass as a parameter to the test action.</param>
|
|||
/// <param name="intrinsics">The intrinsics features.</param>
|
|||
public static void RunWithHwIntrinsicsFeature<T>( |
|||
Action<string> action, |
|||
T serializable, |
|||
HwIntrinsics intrinsics) |
|||
where T : IConvertible |
|||
{ |
|||
if (!RemoteExecutor.IsSupported) |
|||
{ |
|||
return; |
|||
} |
|||
|
|||
foreach (KeyValuePair<HwIntrinsics, string> intrinsic in intrinsics.ToFeatureKeyValueCollection()) |
|||
{ |
|||
var processStartInfo = new ProcessStartInfo(); |
|||
if (intrinsic.Key != HwIntrinsics.AllowAll) |
|||
{ |
|||
processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; |
|||
|
|||
RemoteExecutor.Invoke( |
|||
action, |
|||
serializable.ToString(), |
|||
new RemoteInvokeOptions |
|||
{ |
|||
StartInfo = processStartInfo |
|||
}) |
|||
.Dispose(); |
|||
} |
|||
else |
|||
{ |
|||
// Since we are running using the default architecture there is no
|
|||
// point creating the overhead of running the action in a separate process.
|
|||
action(serializable.ToString()); |
|||
} |
|||
} |
|||
} |
|||
|
|||
internal static Dictionary<HwIntrinsics, string> ToFeatureKeyValueCollection(this HwIntrinsics intrinsics) |
|||
{ |
|||
// Loop through and translate the given values into COMPlus equivaluents
|
|||
var features = new Dictionary<HwIntrinsics, string>(); |
|||
foreach (string intrinsic in intrinsics.ToString("G").Split(SplitChars, StringSplitOptions.RemoveEmptyEntries)) |
|||
{ |
|||
var key = (HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic); |
|||
switch (intrinsic) |
|||
{ |
|||
case nameof(HwIntrinsics.DisableSIMD): |
|||
features.Add(key, "FeatureSIMD"); |
|||
break; |
|||
|
|||
case nameof(HwIntrinsics.AllowAll): |
|||
|
|||
// Not a COMPlus value. We filter in calling method.
|
|||
features.Add(key, nameof(HwIntrinsics.AllowAll)); |
|||
break; |
|||
|
|||
default: |
|||
features.Add(key, intrinsic.Replace("Disable", "Enable")); |
|||
break; |
|||
} |
|||
} |
|||
|
|||
return features; |
|||
} |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// See <see href="https://github.com/dotnet/runtime/blob/50ac454d8d8a1915188b2a4bb3fff3b81bf6c0cf/src/coreclr/src/jit/jitconfigvalues.h#L224"/>
|
|||
/// <remarks>
|
|||
/// <see cref="DisableSIMD"/> ends up impacting all SIMD support(including System.Numerics)
|
|||
/// but not things like <see cref="DisableBMI1"/>, <see cref="DisableBMI2"/>, and <see cref="DisableLZCNT"/>.
|
|||
/// </remarks>
|
|||
/// </summary>
|
|||
[Flags] |
|||
#pragma warning disable RCS1135 // Declare enum member with zero value (when enum has FlagsAttribute).
|
|||
public enum HwIntrinsics |
|||
#pragma warning restore RCS1135 // Declare enum member with zero value (when enum has FlagsAttribute).
|
|||
{ |
|||
// Use flags so we can pass multiple values without using params.
|
|||
// Don't base on 0 or use inverse for All as that doesn't translate to string values.
|
|||
DisableSIMD = 1 << 0, |
|||
DisableHWIntrinsic = 1 << 1, |
|||
DisableSSE = 1 << 2, |
|||
DisableSSE2 = 1 << 3, |
|||
DisableAES = 1 << 4, |
|||
DisablePCLMULQDQ = 1 << 5, |
|||
DisableSSE3 = 1 << 6, |
|||
DisableSSSE3 = 1 << 7, |
|||
DisableSSE41 = 1 << 8, |
|||
DisableSSE42 = 1 << 9, |
|||
DisablePOPCNT = 1 << 10, |
|||
DisableAVX = 1 << 11, |
|||
DisableFMA = 1 << 12, |
|||
DisableAVX2 = 1 << 13, |
|||
DisableBMI1 = 1 << 14, |
|||
DisableBMI2 = 1 << 15, |
|||
DisableLZCNT = 1 << 16, |
|||
AllowAll = 1 << 17 |
|||
} |
|||
} |
|||
@ -1,54 +0,0 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
namespace SixLabors.ImageSharp.Tests |
|||
{ |
|||
public static partial class TestEnvironment |
|||
{ |
|||
internal static class Features |
|||
{ |
|||
public const string On = "1"; |
|||
public const string Off = "0"; |
|||
|
|||
// See https://github.com/SixLabors/ImageSharp/pull/1229#discussion_r440477861
|
|||
// * EnableHWIntrinsic
|
|||
// * EnableSSE
|
|||
// * EnableSSE2
|
|||
// * EnableAES
|
|||
// * EnablePCLMULQDQ
|
|||
// * EnableSSE3
|
|||
// * EnableSSSE3
|
|||
// * EnableSSE41
|
|||
// * EnableSSE42
|
|||
// * EnablePOPCNT
|
|||
// * EnableAVX
|
|||
// * EnableFMA
|
|||
// * EnableAVX2
|
|||
// * EnableBMI1
|
|||
// * EnableBMI2
|
|||
// * EnableLZCNT
|
|||
//
|
|||
// `FeatureSIMD` ends up impacting all SIMD support(including `System.Numerics`) but not things
|
|||
// like `LZCNT`, `BMI1`, or `BMI2`
|
|||
// `EnableSSE3_4` is a legacy switch that exists for compat and is basically the same as `EnableSSE3`
|
|||
public const string EnableAES = "COMPlus_EnableAES"; |
|||
public const string EnableAVX = "COMPlus_EnableAVX"; |
|||
public const string EnableAVX2 = "COMPlus_EnableAVX2"; |
|||
public const string EnableBMI1 = "COMPlus_EnableBMI1"; |
|||
public const string EnableBMI2 = "COMPlus_EnableBMI2"; |
|||
public const string EnableFMA = "COMPlus_EnableFMA"; |
|||
public const string EnableHWIntrinsic = "COMPlus_EnableHWIntrinsic"; |
|||
public const string EnableLZCNT = "COMPlus_EnableLZCNT"; |
|||
public const string EnablePCLMULQDQ = "COMPlus_EnablePCLMULQDQ"; |
|||
public const string EnablePOPCNT = "COMPlus_EnablePOPCNT"; |
|||
public const string EnableSSE = "COMPlus_EnableSSE"; |
|||
public const string EnableSSE2 = "COMPlus_EnableSSE2"; |
|||
public const string EnableSSE3 = "COMPlus_EnableSSE3"; |
|||
public const string EnableSSE3_4 = "COMPlus_EnableSSE3_4"; |
|||
public const string EnableSSE41 = "COMPlus_EnableSSE41"; |
|||
public const string EnableSSE42 = "COMPlus_EnableSSE42"; |
|||
public const string EnableSSSE3 = "COMPlus_EnableSSSE3"; |
|||
public const string FeatureSIMD = "COMPlus_FeatureSIMD"; |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,296 @@ |
|||
// Copyright (c) Six Labors.
|
|||
// Licensed under the Apache License, Version 2.0.
|
|||
|
|||
using System; |
|||
using System.Collections.Generic; |
|||
using System.Linq; |
|||
using System.Numerics; |
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
using System.Runtime.Intrinsics.X86; |
|||
#endif
|
|||
using Xunit; |
|||
using Xunit.Abstractions; |
|||
|
|||
namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests |
|||
{ |
|||
public class FeatureTestRunnerTests |
|||
{ |
|||
public static TheoryData<HwIntrinsics, string[]> Intrinsics => |
|||
new TheoryData<HwIntrinsics, string[]> |
|||
{ |
|||
{ HwIntrinsics.DisableAES | HwIntrinsics.AllowAll, new string[] { "EnableAES", "AllowAll" } }, |
|||
{ HwIntrinsics.DisableSIMD | HwIntrinsics.DisableHWIntrinsic, new string[] { "FeatureSIMD", "EnableHWIntrinsic" } }, |
|||
{ HwIntrinsics.DisableSSE42 | HwIntrinsics.DisableAVX, new string[] { "EnableSSE42", "EnableAVX" } } |
|||
}; |
|||
|
|||
[Theory] |
|||
[MemberData(nameof(Intrinsics))] |
|||
public void ToFeatureCollectionReturnsExpectedResult(HwIntrinsics expectedItrinsics, string[] expectedValues) |
|||
{ |
|||
Dictionary<HwIntrinsics, string> features = expectedItrinsics.ToFeatureKeyValueCollection(); |
|||
HwIntrinsics[] keys = features.Keys.ToArray(); |
|||
|
|||
HwIntrinsics actualIntrinsics = keys[0]; |
|||
for (int i = 1; i < keys.Length; i++) |
|||
{ |
|||
actualIntrinsics |= keys[i]; |
|||
} |
|||
|
|||
Assert.Equal(expectedItrinsics, actualIntrinsics); |
|||
|
|||
IEnumerable<string> actualValues = features.Select(x => x.Value); |
|||
Assert.Equal(expectedValues, actualValues); |
|||
} |
|||
|
|||
[Fact] |
|||
public void AllowsAllHwIntrinsicFeatures() |
|||
{ |
|||
if (!Vector.IsHardwareAccelerated) |
|||
{ |
|||
return; |
|||
} |
|||
|
|||
FeatureTestRunner.RunWithHwIntrinsicsFeature( |
|||
() => Assert.True(Vector.IsHardwareAccelerated), |
|||
HwIntrinsics.AllowAll); |
|||
} |
|||
|
|||
[Fact] |
|||
public void CanLimitHwIntrinsicSIMDFeatures() |
|||
{ |
|||
FeatureTestRunner.RunWithHwIntrinsicsFeature( |
|||
() => Assert.False(Vector.IsHardwareAccelerated), |
|||
HwIntrinsics.DisableSIMD); |
|||
} |
|||
|
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
[Fact] |
|||
public void CanLimitHwIntrinsicBaseFeatures() |
|||
{ |
|||
static void AssertDisabled() |
|||
{ |
|||
Assert.False(Sse.IsSupported); |
|||
Assert.False(Sse2.IsSupported); |
|||
Assert.False(Aes.IsSupported); |
|||
Assert.False(Pclmulqdq.IsSupported); |
|||
Assert.False(Sse3.IsSupported); |
|||
Assert.False(Ssse3.IsSupported); |
|||
Assert.False(Sse41.IsSupported); |
|||
Assert.False(Sse42.IsSupported); |
|||
Assert.False(Popcnt.IsSupported); |
|||
Assert.False(Avx.IsSupported); |
|||
Assert.False(Fma.IsSupported); |
|||
Assert.False(Avx2.IsSupported); |
|||
Assert.False(Bmi1.IsSupported); |
|||
Assert.False(Bmi2.IsSupported); |
|||
Assert.False(Lzcnt.IsSupported); |
|||
} |
|||
|
|||
FeatureTestRunner.RunWithHwIntrinsicsFeature( |
|||
AssertDisabled, |
|||
HwIntrinsics.DisableHWIntrinsic); |
|||
} |
|||
#endif
|
|||
|
|||
[Fact] |
|||
public void CanLimitHwIntrinsicFeaturesWithIntrinsicsParam() |
|||
{ |
|||
static void AssertHwIntrinsicsFeatureDisabled(string intrinsic) |
|||
{ |
|||
Assert.NotNull(intrinsic); |
|||
|
|||
switch ((HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic)) |
|||
{ |
|||
case HwIntrinsics.DisableSIMD: |
|||
Assert.False(Vector.IsHardwareAccelerated); |
|||
break; |
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
case HwIntrinsics.DisableHWIntrinsic: |
|||
Assert.False(Sse.IsSupported); |
|||
Assert.False(Sse2.IsSupported); |
|||
Assert.False(Aes.IsSupported); |
|||
Assert.False(Pclmulqdq.IsSupported); |
|||
Assert.False(Sse3.IsSupported); |
|||
Assert.False(Ssse3.IsSupported); |
|||
Assert.False(Sse41.IsSupported); |
|||
Assert.False(Sse42.IsSupported); |
|||
Assert.False(Popcnt.IsSupported); |
|||
Assert.False(Avx.IsSupported); |
|||
Assert.False(Fma.IsSupported); |
|||
Assert.False(Avx2.IsSupported); |
|||
Assert.False(Bmi1.IsSupported); |
|||
Assert.False(Bmi2.IsSupported); |
|||
Assert.False(Lzcnt.IsSupported); |
|||
break; |
|||
case HwIntrinsics.DisableSSE: |
|||
Assert.False(Sse.IsSupported); |
|||
break; |
|||
case HwIntrinsics.DisableSSE2: |
|||
Assert.False(Sse2.IsSupported); |
|||
break; |
|||
case HwIntrinsics.DisableAES: |
|||
Assert.False(Aes.IsSupported); |
|||
break; |
|||
case HwIntrinsics.DisablePCLMULQDQ: |
|||
Assert.False(Pclmulqdq.IsSupported); |
|||
break; |
|||
case HwIntrinsics.DisableSSE3: |
|||
Assert.False(Sse3.IsSupported); |
|||
break; |
|||
case HwIntrinsics.DisableSSSE3: |
|||
Assert.False(Ssse3.IsSupported); |
|||
break; |
|||
case HwIntrinsics.DisableSSE41: |
|||
Assert.False(Sse41.IsSupported); |
|||
break; |
|||
case HwIntrinsics.DisableSSE42: |
|||
Assert.False(Sse42.IsSupported); |
|||
break; |
|||
case HwIntrinsics.DisablePOPCNT: |
|||
Assert.False(Popcnt.IsSupported); |
|||
break; |
|||
case HwIntrinsics.DisableAVX: |
|||
Assert.False(Avx.IsSupported); |
|||
break; |
|||
case HwIntrinsics.DisableFMA: |
|||
Assert.False(Fma.IsSupported); |
|||
break; |
|||
case HwIntrinsics.DisableAVX2: |
|||
Assert.False(Avx2.IsSupported); |
|||
break; |
|||
case HwIntrinsics.DisableBMI1: |
|||
Assert.False(Bmi1.IsSupported); |
|||
break; |
|||
case HwIntrinsics.DisableBMI2: |
|||
Assert.False(Bmi2.IsSupported); |
|||
break; |
|||
case HwIntrinsics.DisableLZCNT: |
|||
Assert.False(Lzcnt.IsSupported); |
|||
break; |
|||
#endif
|
|||
} |
|||
} |
|||
|
|||
foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) |
|||
{ |
|||
FeatureTestRunner.RunWithHwIntrinsicsFeature(AssertHwIntrinsicsFeatureDisabled, intrinsic); |
|||
} |
|||
} |
|||
|
|||
[Fact] |
|||
public void CanLimitHwIntrinsicFeaturesWithSerializableParam() |
|||
{ |
|||
static void AssertHwIntrinsicsFeatureDisabled(string serializable) |
|||
{ |
|||
Assert.NotNull(serializable); |
|||
Assert.NotNull(FeatureTestRunner.DeserializeForXunit<FakeSerializable>(serializable)); |
|||
|
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
Assert.False(Sse.IsSupported); |
|||
#endif
|
|||
} |
|||
|
|||
FeatureTestRunner.RunWithHwIntrinsicsFeature( |
|||
AssertHwIntrinsicsFeatureDisabled, |
|||
HwIntrinsics.DisableSSE, |
|||
new FakeSerializable()); |
|||
} |
|||
|
|||
[Fact] |
|||
public void CanLimitHwIntrinsicFeaturesWithSerializableAndIntrinsicsParams() |
|||
{ |
|||
static void AssertHwIntrinsicsFeatureDisabled(string serializable, string intrinsic) |
|||
{ |
|||
Assert.NotNull(serializable); |
|||
Assert.NotNull(FeatureTestRunner.DeserializeForXunit<FakeSerializable>(serializable)); |
|||
|
|||
switch ((HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic)) |
|||
{ |
|||
case HwIntrinsics.DisableSIMD: |
|||
Assert.False(Vector.IsHardwareAccelerated, nameof(Vector.IsHardwareAccelerated)); |
|||
break; |
|||
#if SUPPORTS_RUNTIME_INTRINSICS
|
|||
case HwIntrinsics.DisableHWIntrinsic: |
|||
Assert.False(Sse.IsSupported); |
|||
Assert.False(Sse2.IsSupported); |
|||
Assert.False(Aes.IsSupported); |
|||
Assert.False(Pclmulqdq.IsSupported); |
|||
Assert.False(Sse3.IsSupported); |
|||
Assert.False(Ssse3.IsSupported); |
|||
Assert.False(Sse41.IsSupported); |
|||
Assert.False(Sse42.IsSupported); |
|||
Assert.False(Popcnt.IsSupported); |
|||
Assert.False(Avx.IsSupported); |
|||
Assert.False(Fma.IsSupported); |
|||
Assert.False(Avx2.IsSupported); |
|||
Assert.False(Bmi1.IsSupported); |
|||
Assert.False(Bmi2.IsSupported); |
|||
Assert.False(Lzcnt.IsSupported); |
|||
break; |
|||
case HwIntrinsics.DisableSSE: |
|||
Assert.False(Sse.IsSupported); |
|||
break; |
|||
case HwIntrinsics.DisableSSE2: |
|||
Assert.False(Sse2.IsSupported); |
|||
break; |
|||
case HwIntrinsics.DisableAES: |
|||
Assert.False(Aes.IsSupported); |
|||
break; |
|||
case HwIntrinsics.DisablePCLMULQDQ: |
|||
Assert.False(Pclmulqdq.IsSupported); |
|||
break; |
|||
case HwIntrinsics.DisableSSE3: |
|||
Assert.False(Sse3.IsSupported); |
|||
break; |
|||
case HwIntrinsics.DisableSSSE3: |
|||
Assert.False(Ssse3.IsSupported); |
|||
break; |
|||
case HwIntrinsics.DisableSSE41: |
|||
Assert.False(Sse41.IsSupported); |
|||
break; |
|||
case HwIntrinsics.DisableSSE42: |
|||
Assert.False(Sse42.IsSupported); |
|||
break; |
|||
case HwIntrinsics.DisablePOPCNT: |
|||
Assert.False(Popcnt.IsSupported); |
|||
break; |
|||
case HwIntrinsics.DisableAVX: |
|||
Assert.False(Avx.IsSupported); |
|||
break; |
|||
case HwIntrinsics.DisableFMA: |
|||
Assert.False(Fma.IsSupported); |
|||
break; |
|||
case HwIntrinsics.DisableAVX2: |
|||
Assert.False(Avx2.IsSupported); |
|||
break; |
|||
case HwIntrinsics.DisableBMI1: |
|||
Assert.False(Bmi1.IsSupported); |
|||
break; |
|||
case HwIntrinsics.DisableBMI2: |
|||
Assert.False(Bmi2.IsSupported); |
|||
break; |
|||
case HwIntrinsics.DisableLZCNT: |
|||
Assert.False(Lzcnt.IsSupported); |
|||
break; |
|||
#endif
|
|||
} |
|||
} |
|||
|
|||
foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) |
|||
{ |
|||
FeatureTestRunner.RunWithHwIntrinsicsFeature(AssertHwIntrinsicsFeatureDisabled, intrinsic, new FakeSerializable()); |
|||
} |
|||
} |
|||
|
|||
public class FakeSerializable : IXunitSerializable |
|||
{ |
|||
public void Deserialize(IXunitSerializationInfo info) |
|||
{ |
|||
} |
|||
|
|||
public void Serialize(IXunitSerializationInfo info) |
|||
{ |
|||
} |
|||
} |
|||
} |
|||
} |
|||
@ -1 +1 @@ |
|||
Subproject commit 6a003080674d1fedc66292c13ce5a357b2a33083 |
|||
Subproject commit cc6465910d092319ef9bf4e99698a0649996d3c5 |
|||
@ -0,0 +1,3 @@ |
|||
version https://git-lfs.github.com/spec/v1 |
|||
oid sha256:6a9c5cdacc9bedf481c883828de5bfb7902e2bec038fff08830171cf7075e4f9 |
|||
size 870 |
|||
Loading…
Reference in new issue