Browse Source

ENhance NormalizedFloatToByteSaturate

pull/2654/head
James Jackson-South 2 years ago
parent
commit
980347e96f
  1. 20
      src/ImageSharp/Common/Helpers/Numerics.cs
  2. 4
      src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs
  3. 4
      src/ImageSharp/Common/Helpers/SimdUtils.FallbackIntrinsics128.cs
  4. 150
      src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
  5. 49
      src/ImageSharp/Common/Helpers/SimdUtils.cs
  6. 87
      src/ImageSharp/Common/Helpers/Vector128Utilities.cs
  7. 39
      src/ImageSharp/Common/Helpers/Vector256Utilities.cs
  8. 37
      src/ImageSharp/Common/Helpers/Vector512Utilities.cs
  9. 16
      src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgba32.PixelOperations.cs
  10. 92
      tests/ImageSharp.Benchmarks/Bulk/FromVector4.cs
  11. 66
      tests/ImageSharp.Benchmarks/Bulk/FromVector4_Rgb24.cs
  12. 5
      tests/ImageSharp.Tests/Common/SimdUtilsTests.cs
  13. 22
      tests/ImageSharp.Tests/TestUtilities/BasicSerializer.cs
  14. 81
      tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs

20
src/ImageSharp/Common/Helpers/Numerics.cs

@ -1010,6 +1010,26 @@ internal static class Numerics
where TVector : struct
=> (uint)span.Length / (uint)Vector256<TVector>.Count;
/// <summary>
/// Gets the count of vectors that safely fit into the given span.
/// </summary>
/// <typeparam name="TVector">The type of the vector.</typeparam>
/// <param name="span">The given span.</param>
/// <returns>Count of vectors that safely fit into the span.</returns>
public static nuint Vector512Count<TVector>(this Span<byte> span)
where TVector : struct
=> (uint)span.Length / (uint)Vector512<TVector>.Count;
/// <summary>
/// Gets the count of vectors that safely fit into the given span.
/// </summary>
/// <typeparam name="TVector">The type of the vector.</typeparam>
/// <param name="span">The given span.</param>
/// <returns>Count of vectors that safely fit into the span.</returns>
public static nuint Vector512Count<TVector>(this ReadOnlySpan<byte> span)
where TVector : struct
=> (uint)span.Length / (uint)Vector512<TVector>.Count;
/// <summary>
/// Gets the count of vectors that safely fit into the given span.
/// </summary>

4
src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs

@ -95,7 +95,7 @@ internal static partial class SimdUtils
/// </summary>
internal static void ByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest)
{
VerifySpanInput(source, dest, Vector<byte>.Count);
DebugVerifySpanInput(source, dest, Vector<byte>.Count);
nuint n = dest.VectorCount<byte>();
@ -130,7 +130,7 @@ internal static partial class SimdUtils
ReadOnlySpan<float> source,
Span<byte> dest)
{
VerifySpanInput(source, dest, Vector<byte>.Count);
DebugVerifySpanInput(source, dest, Vector<byte>.Count);
nuint n = dest.VectorCount<byte>();

4
src/ImageSharp/Common/Helpers/SimdUtils.FallbackIntrinsics128.cs

@ -69,7 +69,7 @@ internal static partial class SimdUtils
[MethodImpl(InliningOptions.ColdPath)]
internal static void ByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest)
{
VerifySpanInput(source, dest, 4);
DebugVerifySpanInput(source, dest, 4);
uint count = (uint)dest.Length / 4;
if (count == 0)
@ -103,7 +103,7 @@ internal static partial class SimdUtils
ReadOnlySpan<float> source,
Span<byte> dest)
{
VerifySpanInput(source, dest, 4);
DebugVerifySpanInput(source, dest, 4);
uint count = (uint)source.Length / 4;
if (count == 0)

150
src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs

@ -17,8 +17,13 @@ internal static partial class SimdUtils
{
public static class HwIntrinsics
{
#pragma warning disable SA1117 // Parameters should be on same line or separate lines
#pragma warning disable SA1137 // Elements should have the same indentation
[MethodImpl(MethodImplOptions.AggressiveInlining)] // too much IL for JIT to inline, so give a hint
public static Vector256<int> PermuteMaskDeinterleave8x32() => Vector256.Create(0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0).AsInt32();
public static Vector256<int> PermuteMaskDeinterleave8x32() => Vector256.Create(0, 4, 1, 5, 2, 6, 3, 7);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector512<int> PermuteMaskDeinterleave16x32() => Vector512.Create(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<uint> PermuteMaskEvenOdd8x32() => Vector256.Create(0, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0).AsUInt32();
@ -38,17 +43,18 @@ internal static partial class SimdUtils
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector128<byte> ShuffleMaskSlice4Nx16() => Vector128.Create(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 0x80, 0x80, 0x80, 0x80);
#pragma warning disable SA1003, SA1116, SA1117 // Parameters should be on same line or separate lines
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector256<byte> ShuffleMaskShiftAlpha() => Vector256.Create((byte)
0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15,
0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15);
private static Vector256<byte> ShuffleMaskShiftAlpha() => Vector256.Create(
(byte)0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15,
0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<uint> PermuteMaskShiftAlpha8x32() => Vector256.Create(
0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0,
5, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0).AsUInt32();
#pragma warning restore SA1003, SA1116, SA1117 // Parameters should be on same line or separate lines
public static Vector256<uint> PermuteMaskShiftAlpha8x32()
=> Vector256.Create(
0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0,
5, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0).AsUInt32();
#pragma warning restore SA1137 // Elements should have the same indentation
#pragma warning restore SA1117 // Parameters should be on same line or separate lines
/// <summary>
/// Shuffle single-precision (32-bit) floating-point elements in <paramref name="source"/>
@ -795,7 +801,7 @@ internal static partial class SimdUtils
{
if (Avx2.IsSupported)
{
VerifySpanInput(source, dest, Vector256<byte>.Count);
DebugVerifySpanInput(source, dest, Vector256<byte>.Count);
nuint n = dest.Vector256Count<byte>();
@ -828,7 +834,7 @@ internal static partial class SimdUtils
else
{
// Sse
VerifySpanInput(source, dest, Vector128<byte>.Count);
DebugVerifySpanInput(source, dest, Vector128<byte>.Count);
nuint n = dest.Vector128Count<byte>();
@ -881,17 +887,24 @@ internal static partial class SimdUtils
/// <summary>
/// <see cref="NormalizedFloatToByteSaturate"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary>
/// <param name="source">The source buffer.</param>
/// <param name="destination">The destination buffer.</param>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void NormalizedFloatToByteSaturateReduce(
ref ReadOnlySpan<float> source,
ref Span<byte> dest)
ref Span<byte> destination)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
DebugGuard.IsTrue(source.Length == destination.Length, nameof(source), "Input spans must be of same length!");
if (Avx2.IsSupported || Sse2.IsSupported)
if (Avx512BW.IsSupported || Avx2.IsSupported || Sse2.IsSupported || AdvSimd.IsSupported)
{
int remainder;
if (Avx2.IsSupported)
if (Avx512BW.IsSupported)
{
remainder = Numerics.ModuloP2(source.Length, Vector512<byte>.Count);
}
else if (Avx2.IsSupported)
{
remainder = Numerics.ModuloP2(source.Length, Vector256<byte>.Count);
}
@ -906,10 +919,10 @@ internal static partial class SimdUtils
{
NormalizedFloatToByteSaturate(
source[..adjustedCount],
dest[..adjustedCount]);
destination[..adjustedCount]);
source = source[adjustedCount..];
dest = dest[adjustedCount..];
destination = destination[adjustedCount..];
}
}
}
@ -917,25 +930,59 @@ internal static partial class SimdUtils
/// <summary>
/// Implementation of <see cref="SimdUtils.NormalizedFloatToByteSaturate"/>, which is faster on new .NET runtime.
/// </summary>
/// <param name="source">The source buffer.</param>
/// <param name="destination">The destination buffer.</param>
/// <remarks>
/// Implementation is based on MagicScaler code:
/// https://github.com/saucecontrol/PhotoSauce/blob/b5811908041200488aa18fdfd17df5fc457415dc/src/MagicScaler/Magic/Processors/ConvertersFloat.cs#L541-L622
/// </remarks>
internal static void NormalizedFloatToByteSaturate(
ReadOnlySpan<float> source,
Span<byte> dest)
Span<byte> destination)
{
if (Avx2.IsSupported)
if (Avx512BW.IsSupported)
{
VerifySpanInput(source, dest, Vector256<byte>.Count);
DebugVerifySpanInput(source, destination, Vector512<byte>.Count);
nuint n = destination.Vector512Count<byte>();
nuint n = dest.Vector256Count<byte>();
ref Vector512<float> sourceBase = ref Unsafe.As<float, Vector512<float>>(ref MemoryMarshal.GetReference(source));
ref Vector512<byte> destinationBase = ref Unsafe.As<byte, Vector512<byte>>(ref MemoryMarshal.GetReference(destination));
ref Vector256<float> sourceBase =
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(source));
Vector512<float> scale = Vector512.Create((float)byte.MaxValue);
Vector512<int> mask = PermuteMaskDeinterleave16x32();
ref Vector256<byte> destBase =
ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(dest));
for (nuint i = 0; i < n; i++)
{
ref Vector512<float> s = ref Unsafe.Add(ref sourceBase, i * 4);
Vector512<float> f0 = scale * s;
Vector512<float> f1 = scale * Unsafe.Add(ref s, 1);
Vector512<float> f2 = scale * Unsafe.Add(ref s, 2);
Vector512<float> f3 = scale * Unsafe.Add(ref s, 3);
Vector512<int> w0 = Vector512Utilities.ConvertToInt32RoundToEven(f0);
Vector512<int> w1 = Vector512Utilities.ConvertToInt32RoundToEven(f1);
Vector512<int> w2 = Vector512Utilities.ConvertToInt32RoundToEven(f2);
Vector512<int> w3 = Vector512Utilities.ConvertToInt32RoundToEven(f3);
Vector512<short> u0 = Avx512BW.PackSignedSaturate(w0, w1);
Vector512<short> u1 = Avx512BW.PackSignedSaturate(w2, w3);
Vector512<byte> b = Avx512BW.PackUnsignedSaturate(u0, u1);
b = Avx512F.PermuteVar16x32(b.AsInt32(), mask).AsByte();
Unsafe.Add(ref destinationBase, i) = b;
}
}
else
if (Avx2.IsSupported)
{
DebugVerifySpanInput(source, destination, Vector256<byte>.Count);
nuint n = destination.Vector256Count<byte>();
ref Vector256<float> sourceBase = ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(source));
ref Vector256<byte> destinationBase = ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(destination));
Vector256<float> scale = Vector256.Create((float)byte.MaxValue);
Vector256<int> mask = PermuteMaskDeinterleave8x32();
@ -944,36 +991,33 @@ internal static partial class SimdUtils
{
ref Vector256<float> s = ref Unsafe.Add(ref sourceBase, i * 4);
Vector256<float> f0 = Avx.Multiply(scale, s);
Vector256<float> f1 = Avx.Multiply(scale, Unsafe.Add(ref s, 1));
Vector256<float> f2 = Avx.Multiply(scale, Unsafe.Add(ref s, 2));
Vector256<float> f3 = Avx.Multiply(scale, Unsafe.Add(ref s, 3));
Vector256<float> f0 = scale * s;
Vector256<float> f1 = scale * Unsafe.Add(ref s, 1);
Vector256<float> f2 = scale * Unsafe.Add(ref s, 2);
Vector256<float> f3 = scale * Unsafe.Add(ref s, 3);
Vector256<int> w0 = Avx.ConvertToVector256Int32(f0);
Vector256<int> w1 = Avx.ConvertToVector256Int32(f1);
Vector256<int> w2 = Avx.ConvertToVector256Int32(f2);
Vector256<int> w3 = Avx.ConvertToVector256Int32(f3);
Vector256<int> w0 = Vector256Utilities.ConvertToInt32RoundToEven(f0);
Vector256<int> w1 = Vector256Utilities.ConvertToInt32RoundToEven(f1);
Vector256<int> w2 = Vector256Utilities.ConvertToInt32RoundToEven(f2);
Vector256<int> w3 = Vector256Utilities.ConvertToInt32RoundToEven(f3);
Vector256<short> u0 = Avx2.PackSignedSaturate(w0, w1);
Vector256<short> u1 = Avx2.PackSignedSaturate(w2, w3);
Vector256<byte> b = Avx2.PackUnsignedSaturate(u0, u1);
b = Avx2.PermuteVar8x32(b.AsInt32(), mask).AsByte();
Unsafe.Add(ref destBase, i) = b;
Unsafe.Add(ref destinationBase, i) = b;
}
}
else
{
// Sse
VerifySpanInput(source, dest, Vector128<byte>.Count);
nuint n = dest.Vector128Count<byte>();
// Sse, AdvSimd
DebugVerifySpanInput(source, destination, Vector128<byte>.Count);
ref Vector128<float> sourceBase =
ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(source));
nuint n = destination.Vector128Count<byte>();
ref Vector128<byte> destBase =
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(dest));
ref Vector128<float> sourceBase = ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(source));
ref Vector128<byte> destinationBase = ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(destination));
Vector128<float> scale = Vector128.Create((float)byte.MaxValue);
@ -981,20 +1025,20 @@ internal static partial class SimdUtils
{
ref Vector128<float> s = ref Unsafe.Add(ref sourceBase, i * 4);
Vector128<float> f0 = Sse.Multiply(scale, s);
Vector128<float> f1 = Sse.Multiply(scale, Unsafe.Add(ref s, 1));
Vector128<float> f2 = Sse.Multiply(scale, Unsafe.Add(ref s, 2));
Vector128<float> f3 = Sse.Multiply(scale, Unsafe.Add(ref s, 3));
Vector128<float> f0 = scale * s;
Vector128<float> f1 = scale * Unsafe.Add(ref s, 1);
Vector128<float> f2 = scale * Unsafe.Add(ref s, 2);
Vector128<float> f3 = scale * Unsafe.Add(ref s, 3);
Vector128<int> w0 = Sse2.ConvertToVector128Int32(f0);
Vector128<int> w1 = Sse2.ConvertToVector128Int32(f1);
Vector128<int> w2 = Sse2.ConvertToVector128Int32(f2);
Vector128<int> w3 = Sse2.ConvertToVector128Int32(f3);
Vector128<int> w0 = Vector128Utilities.ConvertToInt32RoundToEven(f0);
Vector128<int> w1 = Vector128Utilities.ConvertToInt32RoundToEven(f1);
Vector128<int> w2 = Vector128Utilities.ConvertToInt32RoundToEven(f2);
Vector128<int> w3 = Vector128Utilities.ConvertToInt32RoundToEven(f3);
Vector128<short> u0 = Sse2.PackSignedSaturate(w0, w1);
Vector128<short> u1 = Sse2.PackSignedSaturate(w2, w3);
Vector128<short> u0 = Vector128Utilities.PackSignedSaturate(w0, w1);
Vector128<short> u1 = Vector128Utilities.PackSignedSaturate(w2, w3);
Unsafe.Add(ref destBase, i) = Sse2.PackUnsignedSaturate(u0, u1);
Unsafe.Add(ref destinationBase, i) = Vector128Utilities.PackUnsignedSaturate(u0, u1);
}
}
}

49
src/ImageSharp/Common/Helpers/SimdUtils.cs

@ -94,35 +94,31 @@ internal static partial class SimdUtils
}
/// <summary>
/// Convert all <see cref="float"/> values normalized into [0..1] from 'source' into 'dest' buffer of <see cref="byte"/>.
/// Convert all <see cref="float"/> values normalized into [0..1] from 'source' into 'destination' buffer of <see cref="byte"/>.
/// The values are scaled up into [0-255] and rounded, overflows are clamped.
/// <paramref name="source"/> should be the of the same size as <paramref name="dest"/>,
/// <paramref name="source"/> should be the of the same size as <paramref name="destination"/>,
/// but there are no restrictions on the span's length.
/// </summary>
/// <param name="source">The source span of floats</param>
/// <param name="dest">The destination span of bytes</param>
/// <param name="destination">The destination span of bytes</param>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void NormalizedFloatToByteSaturate(ReadOnlySpan<float> source, Span<byte> dest)
internal static void NormalizedFloatToByteSaturate(ReadOnlySpan<float> source, Span<byte> destination)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
HwIntrinsics.NormalizedFloatToByteSaturateReduce(ref source, ref dest);
// Also deals with the remainder from previous conversions:
FallbackIntrinsics128.NormalizedFloatToByteSaturateReduce(ref source, ref dest);
DebugGuard.IsTrue(source.Length == destination.Length, nameof(source), "Input spans must be of same length!");
HwIntrinsics.NormalizedFloatToByteSaturateReduce(ref source, ref destination);
// Deal with the remainder:
if (source.Length > 0)
{
ConvertNormalizedFloatToByteRemainder(source, dest);
ConvertNormalizedFloatToByteRemainder(source, destination);
}
}
[MethodImpl(InliningOptions.ColdPath)]
private static void ConvertByteToNormalizedFloatRemainder(ReadOnlySpan<byte> source, Span<float> dest)
private static void ConvertByteToNormalizedFloatRemainder(ReadOnlySpan<byte> source, Span<float> destination)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref float dBase = ref MemoryMarshal.GetReference(dest);
ref float dBase = ref MemoryMarshal.GetReference(destination);
// There are at most 3 elements at this point, having a for loop is overkill.
// Let's minimize the no. of instructions!
@ -140,23 +136,14 @@ internal static partial class SimdUtils
}
}
[MethodImpl(InliningOptions.ColdPath)]
private static void ConvertNormalizedFloatToByteRemainder(ReadOnlySpan<float> source, Span<byte> dest)
[MethodImpl(MethodImplOptions.NoInlining)]
private static void ConvertNormalizedFloatToByteRemainder(ReadOnlySpan<float> source, Span<byte> destination)
{
ref float sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);
switch (source.Length)
ref byte dBase = ref MemoryMarshal.GetReference(destination);
for (int i = 0; i < source.Length; i++)
{
case 3:
Unsafe.Add(ref dBase, 2) = ConvertToByte(Unsafe.Add(ref sBase, 2));
goto case 2;
case 2:
Unsafe.Add(ref dBase, 1) = ConvertToByte(Unsafe.Add(ref sBase, 1));
goto case 1;
case 1:
dBase = ConvertToByte(sBase);
break;
Unsafe.Add(ref dBase, i) = ConvertToByte(Unsafe.Add(ref sBase, i));
}
}
@ -173,7 +160,7 @@ internal static partial class SimdUtils
}
[Conditional("DEBUG")]
private static void VerifySpanInput(ReadOnlySpan<byte> source, Span<float> dest, int shouldBeDivisibleBy)
private static void DebugVerifySpanInput(ReadOnlySpan<byte> source, Span<float> dest, int shouldBeDivisibleBy)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
DebugGuard.IsTrue(
@ -183,11 +170,11 @@ internal static partial class SimdUtils
}
[Conditional("DEBUG")]
private static void VerifySpanInput(ReadOnlySpan<float> source, Span<byte> dest, int shouldBeDivisibleBy)
private static void DebugVerifySpanInput(ReadOnlySpan<float> source, Span<byte> destination, int shouldBeDivisibleBy)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
DebugGuard.IsTrue(source.Length == destination.Length, nameof(source), "Input spans must be of same length!");
DebugGuard.IsTrue(
Numerics.ModuloP2(dest.Length, shouldBeDivisibleBy) == 0,
Numerics.ModuloP2(destination.Length, shouldBeDivisibleBy) == 0,
nameof(source),
$"length should be divisible by {shouldBeDivisibleBy}!");
}

87
src/ImageSharp/Common/Helpers/Vector128Utilities.cs

@ -26,7 +26,7 @@ internal static class Vector128Utilities
public static bool SupportsShuffleFloat
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get => Sse.IsSupported;
get => Sse.IsSupported || AdvSimd.IsSupported;
}
/// <summary>
@ -62,6 +62,7 @@ internal static class Vector128Utilities
/// <param name="vector">The input vector from which values are selected.</param>
/// <param name="control">The shuffle control byte.</param>
/// <returns>The <see cref="Vector128{Single}"/>.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> Shuffle(Vector128<float> vector, [ConstantExpected] byte control)
{
if (Sse.IsSupported)
@ -69,6 +70,17 @@ internal static class Vector128Utilities
return Sse.Shuffle(vector, vector, control);
}
if (AdvSimd.IsSupported)
{
#pragma warning disable CA1857 // A constant is expected for the parameter
Vector128<float> result = Vector128.Create(AdvSimd.Extract(vector, (byte)(control & 0x3)));
result = AdvSimd.Insert(result, 1, AdvSimd.Extract(vector, (byte)((control >> 2) & 0x3)));
result = AdvSimd.Insert(result, 2, AdvSimd.Extract(vector, (byte)((control >> 4) & 0x3)));
result = AdvSimd.Insert(result, 3, AdvSimd.Extract(vector, (byte)((control >> 6) & 0x3)));
#pragma warning restore CA1857 // A constant is expected for the parameter
return result;
}
ThrowUnreachableException();
return default;
}
@ -84,6 +96,7 @@ internal static class Vector128Utilities
/// <returns>
/// A new vector containing the values from <paramref name="vector" /> selected by the given <paramref name="indices" />.
/// </returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<byte> Shuffle(Vector128<byte> vector, Vector128<byte> indices)
{
if (Ssse3.IsSupported)
@ -155,6 +168,7 @@ internal static class Vector128Utilities
/// <param name="right">The right hand source vector.</param>
/// <param name="mask">An 8-bit mask used for the operation.</param>
/// <returns>The <see cref="Vector128{Byte}"/>.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<byte> AlignRight(Vector128<byte> left, Vector128<byte> right, [ConstantExpected(Max = (byte)15)] byte mask)
{
if (Ssse3.IsSupported)
@ -171,6 +185,77 @@ internal static class Vector128Utilities
return default;
}
/// <summary>
/// Performs a conversion from a 128-bit vector of 4 single-precision floating-point values to a 128-bit vector of 4 signed 32-bit integer values.
/// Rounding is equivalent to <see cref="MidpointRounding.ToEven"/>.
/// </summary>
/// <param name="vector">The value to convert.</param>
/// <returns>The <see cref="Vector128{Int32}"/>.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<int> ConvertToInt32RoundToEven(Vector128<float> vector)
{
if (Sse2.IsSupported)
{
return Sse2.ConvertToVector128Int32(vector);
}
if (AdvSimd.IsSupported)
{
return AdvSimd.ConvertToInt32RoundToEven(vector);
}
Vector128<float> sign = vector & Vector128.Create(-0.0f);
Vector128<float> val_2p23_f32 = sign | Vector128.Create(8388608.0f);
val_2p23_f32 = (vector + val_2p23_f32) - val_2p23_f32;
return Vector128.ConvertToInt32(val_2p23_f32 | sign);
}
/// <summary>
/// Packs signed 16-bit integers to unsigned 8-bit integers and saturates.
/// </summary>
/// <param name="left">The left hand source vector.</param>
/// <param name="right">The right hand source vector.</param>
/// <returns>The <see cref="Vector128{Int16}"/>.</returns>
public static Vector128<byte> PackUnsignedSaturate(Vector128<short> left, Vector128<short> right)
{
if (Sse2.IsSupported)
{
return Sse2.PackUnsignedSaturate(left, right);
}
if (AdvSimd.IsSupported)
{
return AdvSimd.ExtractNarrowingSaturateUnsignedUpper(AdvSimd.ExtractNarrowingSaturateUnsignedLower(left), right);
}
ThrowUnreachableException();
return default;
}
/// <summary>
/// Packs signed 32-bit integers to signed 16-bit integers and saturates.
/// </summary>
/// <param name="left">The left hand source vector.</param>
/// <param name="right">The right hand source vector.</param>
/// <returns>The <see cref="Vector128{Int16}"/>.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<short> PackSignedSaturate(Vector128<int> left, Vector128<int> right)
{
if (Sse2.IsSupported)
{
return Sse2.PackSignedSaturate(left, right);
}
if (AdvSimd.IsSupported)
{
return AdvSimd.ExtractNarrowingSaturateUpper(AdvSimd.ExtractNarrowingSaturateLower(left), right);
}
ThrowUnreachableException();
return default;
}
[DoesNotReturn]
private static void ThrowUnreachableException() => throw new UnreachableException();
}

39
src/ImageSharp/Common/Helpers/Vector256Utilities.cs

@ -25,7 +25,7 @@ internal static class Vector256Utilities
public static bool SupportsShuffleFloat
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get => Avx.IsSupported;
get => Avx.IsSupported || Sse.IsSupported;
}
/// <summary>
@ -43,6 +43,7 @@ internal static class Vector256Utilities
/// <param name="vector">The input vector from which values are selected.</param>
/// <param name="control">The shuffle control byte.</param>
/// <returns>The <see cref="Vector256{Single}"/>.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<float> Shuffle(Vector256<float> vector, [ConstantExpected] byte control)
{
if (Avx.IsSupported)
@ -50,6 +51,13 @@ internal static class Vector256Utilities
return Avx.Shuffle(vector, vector, control);
}
if (Sse.IsSupported)
{
Vector128<float> lower = vector.GetLower();
Vector128<float> upper = vector.GetUpper();
return Vector256.Create(Sse.Shuffle(lower, lower, control), Sse.Shuffle(upper, upper, control));
}
ThrowUnreachableException();
return default;
}
@ -62,6 +70,7 @@ internal static class Vector256Utilities
/// The per-element indices used to select a value from <paramref name="vector" />.
/// </param>
/// <returns>The <see cref="Vector256{Single}"/>.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<byte> Shuffle(Vector256<byte> vector, Vector256<byte> indices)
{
if (Avx2.IsSupported)
@ -73,6 +82,34 @@ internal static class Vector256Utilities
return default;
}
/// <summary>
/// Performs a conversion from a 256-bit vector of 8 single-precision floating-point values to a 256-bit vector of 8 signed 32-bit integer values.
/// Rounding is equivalent to <see cref="MidpointRounding.ToEven"/>.
/// </summary>
/// <param name="vector">The value to convert.</param>
/// <returns>The <see cref="Vector256{Int32}"/>.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<int> ConvertToInt32RoundToEven(Vector256<float> vector)
{
if (Avx.IsSupported)
{
return Avx.ConvertToVector256Int32(vector);
}
if (Sse2.IsSupported)
{
Vector128<int> lower = Sse2.ConvertToVector128Int32(vector.GetLower());
Vector128<int> upper = Sse2.ConvertToVector128Int32(vector.GetUpper());
return Vector256.Create(lower, upper);
}
Vector256<float> sign = vector & Vector256.Create(-0.0f);
Vector256<float> val_2p23_f32 = sign | Vector256.Create(8388608.0f);
val_2p23_f32 = (vector + val_2p23_f32) - val_2p23_f32;
return Vector256.ConvertToInt32(val_2p23_f32 | sign);
}
[DoesNotReturn]
private static void ThrowUnreachableException() => throw new UnreachableException();
}

37
src/ImageSharp/Common/Helpers/Vector512Utilities.cs

@ -25,7 +25,7 @@ internal static class Vector512Utilities
public static bool SupportsShuffleFloat
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get => Avx512F.IsSupported;
get => Avx512F.IsSupported || Avx.IsSupported;
}
/// <summary>
@ -51,6 +51,13 @@ internal static class Vector512Utilities
return Avx512F.Shuffle(vector, vector, control);
}
if (Avx.IsSupported)
{
Vector256<float> lower = vector.GetLower();
Vector256<float> upper = vector.GetUpper();
return Vector512.Create(Avx.Shuffle(lower, lower, control), Avx.Shuffle(upper, upper, control));
}
ThrowUnreachableException();
return default;
}
@ -75,6 +82,34 @@ internal static class Vector512Utilities
return default;
}
/// <summary>
/// Performs a conversion from a 512-bit vector of 16 single-precision floating-point values to a 512-bit vector of 16 signed 32-bit integer values.
/// Rounding is equivalent to <see cref="MidpointRounding.ToEven"/>.
/// </summary>
/// <param name="vector">The value to convert.</param>
/// <returns>The <see cref="Vector128{Int32}"/>.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector512<int> ConvertToInt32RoundToEven(Vector512<float> vector)
{
if (Avx512F.IsSupported)
{
return Avx512F.ConvertToVector512Int32(vector);
}
if (Avx.IsSupported)
{
Vector256<int> lower = Avx.ConvertToVector256Int32(vector.GetLower());
Vector256<int> upper = Avx.ConvertToVector256Int32(vector.GetUpper());
return Vector512.Create(lower, upper);
}
Vector512<float> sign = vector & Vector512.Create(-0.0f);
Vector512<float> val_2p23_f32 = sign | Vector512.Create(8388608.0f);
val_2p23_f32 = (vector + val_2p23_f32) - val_2p23_f32;
return Vector512.ConvertToInt32(val_2p23_f32 | sign);
}
[DoesNotReturn]
private static void ThrowUnreachableException() => throw new UnreachableException();
}

16
src/ImageSharp/PixelFormats/PixelImplementations/PixelOperations/Rgba32.PixelOperations.cs

@ -20,15 +20,15 @@ public partial struct Rgba32
/// <inheritdoc />
public override void ToVector4(
Configuration configuration,
ReadOnlySpan<Rgba32> sourcePixels,
ReadOnlySpan<Rgba32> source,
Span<Vector4> destinationVectors,
PixelConversionModifiers modifiers)
{
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationVectors, nameof(destinationVectors));
Guard.DestinationShouldNotBeTooShort(source, destinationVectors, nameof(destinationVectors));
destinationVectors = destinationVectors[..sourcePixels.Length];
destinationVectors = destinationVectors[..source.Length];
SimdUtils.ByteToNormalizedFloat(
MemoryMarshal.Cast<Rgba32, byte>(sourcePixels),
MemoryMarshal.Cast<Rgba32, byte>(source),
MemoryMarshal.Cast<Vector4, float>(destinationVectors));
Vector4Converters.ApplyForwardConversionModifiers(destinationVectors, modifiers);
}
@ -37,16 +37,16 @@ public partial struct Rgba32
public override void FromVector4Destructive(
Configuration configuration,
Span<Vector4> sourceVectors,
Span<Rgba32> destinationPixels,
Span<Rgba32> destination,
PixelConversionModifiers modifiers)
{
Guard.DestinationShouldNotBeTooShort(sourceVectors, destinationPixels, nameof(destinationPixels));
Guard.DestinationShouldNotBeTooShort(sourceVectors, destination, nameof(destination));
destinationPixels = destinationPixels[..sourceVectors.Length];
destination = destination[..sourceVectors.Length];
Vector4Converters.ApplyBackwardConversionModifiers(sourceVectors, modifiers);
SimdUtils.NormalizedFloatToByteSaturate(
MemoryMarshal.Cast<Vector4, float>(sourceVectors),
MemoryMarshal.Cast<Rgba32, byte>(destinationPixels));
MemoryMarshal.Cast<Rgba32, byte>(destination));
}
/// <inheritdoc />

92
tests/ImageSharp.Benchmarks/Bulk/FromVector4.cs

@ -18,9 +18,9 @@ namespace SixLabors.ImageSharp.Benchmarks.Bulk;
public abstract class FromVector4<TPixel>
where TPixel : unmanaged, IPixel<TPixel>
{
protected IMemoryOwner<Vector4> source;
protected IMemoryOwner<Vector4> Source { get; set; }
protected IMemoryOwner<TPixel> destination;
protected IMemoryOwner<TPixel> Destination { get; set; }
protected Configuration Configuration => Configuration.Default;
@ -31,22 +31,22 @@ public abstract class FromVector4<TPixel>
[GlobalSetup]
public void Setup()
{
this.destination = this.Configuration.MemoryAllocator.Allocate<TPixel>(this.Count);
this.source = this.Configuration.MemoryAllocator.Allocate<Vector4>(this.Count);
this.Destination = this.Configuration.MemoryAllocator.Allocate<TPixel>(this.Count);
this.Source = this.Configuration.MemoryAllocator.Allocate<Vector4>(this.Count);
}
[GlobalCleanup]
public void Cleanup()
{
this.destination.Dispose();
this.source.Dispose();
this.Destination.Dispose();
this.Source.Dispose();
}
// [Benchmark]
public void PerElement()
{
ref Vector4 s = ref MemoryMarshal.GetReference(this.source.GetSpan());
ref TPixel d = ref MemoryMarshal.GetReference(this.destination.GetSpan());
ref Vector4 s = ref MemoryMarshal.GetReference(this.Source.GetSpan());
ref TPixel d = ref MemoryMarshal.GetReference(this.Destination.GetSpan());
for (nuint i = 0; i < (uint)this.Count; i++)
{
Unsafe.Add(ref d, i) = TPixel.FromVector4(Unsafe.Add(ref s, i));
@ -55,11 +55,11 @@ public abstract class FromVector4<TPixel>
[Benchmark(Baseline = true)]
public void PixelOperations_Base()
=> new PixelOperations<TPixel>().FromVector4Destructive(this.Configuration, this.source.GetSpan(), this.destination.GetSpan());
=> new PixelOperations<TPixel>().FromVector4Destructive(this.Configuration, this.Source.GetSpan(), this.Destination.GetSpan());
[Benchmark]
public void PixelOperations_Specialized()
=> PixelOperations<TPixel>.Instance.FromVector4Destructive(this.Configuration, this.source.GetSpan(), this.destination.GetSpan());
=> PixelOperations<TPixel>.Instance.FromVector4Destructive(this.Configuration, this.Source.GetSpan(), this.Destination.GetSpan());
}
public class FromVector4Rgba32 : FromVector4<Rgba32>
@ -67,8 +67,8 @@ public class FromVector4Rgba32 : FromVector4<Rgba32>
[Benchmark]
public void FallbackIntrinsics128()
{
Span<float> sBytes = MemoryMarshal.Cast<Vector4, float>(this.source.GetSpan());
Span<byte> dFloats = MemoryMarshal.Cast<Rgba32, byte>(this.destination.GetSpan());
Span<float> sBytes = MemoryMarshal.Cast<Vector4, float>(this.Source.GetSpan());
Span<byte> dFloats = MemoryMarshal.Cast<Rgba32, byte>(this.Destination.GetSpan());
SimdUtils.FallbackIntrinsics128.NormalizedFloatToByteSaturate(sBytes, dFloats);
}
@ -76,8 +76,8 @@ public class FromVector4Rgba32 : FromVector4<Rgba32>
[Benchmark]
public void ExtendedIntrinsic()
{
Span<float> sBytes = MemoryMarshal.Cast<Vector4, float>(this.source.GetSpan());
Span<byte> dFloats = MemoryMarshal.Cast<Rgba32, byte>(this.destination.GetSpan());
Span<float> sBytes = MemoryMarshal.Cast<Vector4, float>(this.Source.GetSpan());
Span<byte> dFloats = MemoryMarshal.Cast<Rgba32, byte>(this.Destination.GetSpan());
SimdUtils.ExtendedIntrinsics.NormalizedFloatToByteSaturate(sBytes, dFloats);
}
@ -85,8 +85,8 @@ public class FromVector4Rgba32 : FromVector4<Rgba32>
[Benchmark]
public void UseHwIntrinsics()
{
Span<float> sBytes = MemoryMarshal.Cast<Vector4, float>(this.source.GetSpan());
Span<byte> dFloats = MemoryMarshal.Cast<Rgba32, byte>(this.destination.GetSpan());
Span<float> sBytes = MemoryMarshal.Cast<Vector4, float>(this.Source.GetSpan());
Span<byte> dFloats = MemoryMarshal.Cast<Rgba32, byte>(this.Destination.GetSpan());
SimdUtils.HwIntrinsics.NormalizedFloatToByteSaturate(sBytes, dFloats);
}
@ -96,8 +96,8 @@ public class FromVector4Rgba32 : FromVector4<Rgba32>
[Benchmark]
public void UseAvx2_Grouped()
{
Span<float> src = MemoryMarshal.Cast<Vector4, float>(this.source.GetSpan());
Span<byte> dest = MemoryMarshal.Cast<Rgba32, byte>(this.destination.GetSpan());
Span<float> src = MemoryMarshal.Cast<Vector4, float>(this.Source.GetSpan());
Span<byte> dest = MemoryMarshal.Cast<Rgba32, byte>(this.Destination.GetSpan());
nuint n = (uint)dest.Length / (uint)Vector<byte>.Count;
@ -107,7 +107,7 @@ public class FromVector4Rgba32 : FromVector4<Rgba32>
ref byte maskBase = ref MemoryMarshal.GetReference(PermuteMaskDeinterleave8x32);
Vector256<int> mask = Unsafe.As<byte, Vector256<int>>(ref maskBase);
var maxBytes = Vector256.Create(255f);
Vector256<float> maxBytes = Vector256.Create(255f);
for (nuint i = 0; i < n; i++)
{
@ -137,25 +137,37 @@ public class FromVector4Rgba32 : FromVector4<Rgba32>
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector256<int> ConvertToInt32(Vector256<float> vf, Vector256<float> scale)
{
vf = Avx.Multiply(scale, vf);
return Avx.ConvertToVector256Int32(vf);
}
// *** RESULTS 2020 March: ***
// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores
// .NET Core SDK=3.1.200-preview-014971
// Job-IUZXZT : .NET Core 3.1.2 (CoreCLR 4.700.20.6602, CoreFX 4.700.20.6702), X64 RyuJIT
//
// | Method | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
// |---------------------------- |------ |-----------:|------------:|----------:|------:|--------:|------:|------:|------:|----------:|
// | FallbackIntrinsics128 | 1024 | 2,952.6 ns | 1,680.77 ns | 92.13 ns | 3.32 | 0.16 | - | - | - | - |
// | BasicIntrinsics256 | 1024 | 1,664.5 ns | 928.11 ns | 50.87 ns | 1.87 | 0.09 | - | - | - | - |
// | ExtendedIntrinsic | 1024 | 890.6 ns | 375.48 ns | 20.58 ns | 1.00 | 0.00 | - | - | - | - |
// | UseAvx2 | 1024 | 299.0 ns | 30.47 ns | 1.67 ns | 0.34 | 0.01 | - | - | - | - |
// | UseAvx2_Grouped | 1024 | 318.1 ns | 48.19 ns | 2.64 ns | 0.36 | 0.01 | - | - | - | - |
// | PixelOperations_Base | 1024 | 8,136.9 ns | 1,834.82 ns | 100.57 ns | 9.14 | 0.26 | - | - | - | 24 B |
// | PixelOperations_Specialized | 1024 | 951.1 ns | 123.93 ns | 6.79 ns | 1.07 | 0.03 | - | - | - | - |
/*
BenchmarkDotNet v0.13.10, Windows 11 (10.0.22631.3085/23H2/2023Update/SunValley3)
11th Gen Intel Core i7-11370H 3.30GHz, 1 CPU, 8 logical and 4 physical cores
.NET SDK 8.0.200-preview.23624.5
[Host] : .NET 8.0.1 (8.0.123.58001), X64 RyuJIT AVX2
Job-YJYLLR : .NET 8.0.1 (8.0.123.58001), X64 RyuJIT AVX2
Runtime=.NET 8.0 Arguments=/p:DebugType=portable IterationCount=3
LaunchCount=1 WarmupCount=3
| Method | Count | Mean | Error | StdDev | Ratio | RatioSD | Allocated | Alloc Ratio |
|---------------------------- |------ |------------:|-------------:|-----------:|------:|--------:|----------:|------------:|
| PixelOperations_Base | 64 | 114.80 ns | 16.459 ns | 0.902 ns | 1.00 | 0.00 | - | NA |
| PixelOperations_Specialized | 64 | 28.91 ns | 80.482 ns | 4.411 ns | 0.25 | 0.04 | - | NA |
| FallbackIntrinsics128 | 64 | 133.60 ns | 23.750 ns | 1.302 ns | 1.16 | 0.02 | - | NA |
| ExtendedIntrinsic | 64 | 40.11 ns | 10.183 ns | 0.558 ns | 0.35 | 0.01 | - | NA |
| UseHwIntrinsics | 64 | 14.71 ns | 4.860 ns | 0.266 ns | 0.13 | 0.00 | - | NA |
| UseAvx2_Grouped | 64 | 20.23 ns | 11.619 ns | 0.637 ns | 0.18 | 0.00 | - | NA |
| | | | | | | | | |
| PixelOperations_Base | 256 | 387.94 ns | 31.591 ns | 1.732 ns | 1.00 | 0.00 | - | NA |
| PixelOperations_Specialized | 256 | 50.93 ns | 22.388 ns | 1.227 ns | 0.13 | 0.00 | - | NA |
| FallbackIntrinsics128 | 256 | 509.72 ns | 249.926 ns | 13.699 ns | 1.31 | 0.04 | - | NA |
| ExtendedIntrinsic | 256 | 140.32 ns | 9.353 ns | 0.513 ns | 0.36 | 0.00 | - | NA |
| UseHwIntrinsics | 256 | 41.99 ns | 16.000 ns | 0.877 ns | 0.11 | 0.00 | - | NA |
| UseAvx2_Grouped | 256 | 63.81 ns | 2.360 ns | 0.129 ns | 0.16 | 0.00 | - | NA |
| | | | | | | | | |
| PixelOperations_Base | 2048 | 2,979.49 ns | 2,023.706 ns | 110.926 ns | 1.00 | 0.00 | - | NA |
| PixelOperations_Specialized | 2048 | 326.19 ns | 19.077 ns | 1.046 ns | 0.11 | 0.00 | - | NA |
| FallbackIntrinsics128 | 2048 | 3,885.95 ns | 411.078 ns | 22.533 ns | 1.31 | 0.05 | - | NA |
| ExtendedIntrinsic | 2048 | 1,078.58 ns | 136.960 ns | 7.507 ns | 0.36 | 0.01 | - | NA |
| UseHwIntrinsics | 2048 | 312.07 ns | 68.662 ns | 3.764 ns | 0.10 | 0.00 | - | NA |
| UseAvx2_Grouped | 2048 | 451.83 ns | 41.742 ns | 2.288 ns | 0.15 | 0.01 | - | NA |
*/
}

66
tests/ImageSharp.Benchmarks/Bulk/FromVector4_Rgb24.cs

@ -7,48 +7,26 @@ using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp.Benchmarks.Bulk;
[Config(typeof(Config.Short))]
public class FromVector4_Rgb24 : FromVector4<Rgb24>
{
}
public class FromVector4_Rgb24 : FromVector4<Rgb24>;
// 2020-11-02
// ##########
//
// BenchmarkDotNet = v0.12.1, OS = Windows 10.0.19041.572(2004 /?/ 20H1)
// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores
// .NET Core SDK=3.1.403
// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// Job-XYEQXL : .NET Framework 4.8 (4.8.4250.0), X64 RyuJIT
// Job-HSXNJV : .NET Core 2.1.23 (CoreCLR 4.6.29321.03, CoreFX 4.6.29321.01), X64 RyuJIT
// Job-YUREJO : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
//
// IterationCount=3 LaunchCount=1 WarmupCount=3
//
// | Method | Job | Runtime | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
// |---------------------------- |----------- |-------------- |------ |-----------:|------------:|----------:|------:|--------:|-------:|------:|------:|----------:|
// | PixelOperations_Base | Job-XYEQXL | .NET 4.7.2 | 64 | 343.2 ns | 305.91 ns | 16.77 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B |
// | PixelOperations_Specialized | Job-XYEQXL | .NET 4.7.2 | 64 | 320.8 ns | 19.93 ns | 1.09 ns | 0.94 | 0.05 | - | - | - | - |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-HSXNJV | .NET Core 2.1 | 64 | 234.3 ns | 17.98 ns | 0.99 ns | 1.00 | 0.00 | 0.0052 | - | - | 24 B |
// | PixelOperations_Specialized | Job-HSXNJV | .NET Core 2.1 | 64 | 246.0 ns | 82.34 ns | 4.51 ns | 1.05 | 0.02 | - | - | - | - |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-YUREJO | .NET Core 3.1 | 64 | 222.3 ns | 39.46 ns | 2.16 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B |
// | PixelOperations_Specialized | Job-YUREJO | .NET Core 3.1 | 64 | 243.4 ns | 33.58 ns | 1.84 ns | 1.09 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-XYEQXL | .NET 4.7.2 | 256 | 824.9 ns | 32.77 ns | 1.80 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B |
// | PixelOperations_Specialized | Job-XYEQXL | .NET 4.7.2 | 256 | 967.0 ns | 39.09 ns | 2.14 ns | 1.17 | 0.01 | 0.0172 | - | - | 72 B |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-HSXNJV | .NET Core 2.1 | 256 | 756.9 ns | 94.43 ns | 5.18 ns | 1.00 | 0.00 | 0.0048 | - | - | 24 B |
// | PixelOperations_Specialized | Job-HSXNJV | .NET Core 2.1 | 256 | 1,003.3 ns | 3,192.09 ns | 174.97 ns | 1.32 | 0.22 | 0.0172 | - | - | 72 B |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-YUREJO | .NET Core 3.1 | 256 | 748.6 ns | 248.03 ns | 13.60 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B |
// | PixelOperations_Specialized | Job-YUREJO | .NET Core 3.1 | 256 | 437.0 ns | 36.48 ns | 2.00 ns | 0.58 | 0.01 | 0.0172 | - | - | 72 B |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-XYEQXL | .NET 4.7.2 | 2048 | 5,751.6 ns | 704.24 ns | 38.60 ns | 1.00 | 0.00 | - | - | - | 24 B |
// | PixelOperations_Specialized | Job-XYEQXL | .NET 4.7.2 | 2048 | 4,391.6 ns | 718.17 ns | 39.37 ns | 0.76 | 0.00 | 0.0153 | - | - | 72 B |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-HSXNJV | .NET Core 2.1 | 2048 | 6,202.0 ns | 1,815.18 ns | 99.50 ns | 1.00 | 0.00 | - | - | - | 24 B |
// | PixelOperations_Specialized | Job-HSXNJV | .NET Core 2.1 | 2048 | 4,225.6 ns | 1,004.03 ns | 55.03 ns | 0.68 | 0.01 | 0.0153 | - | - | 72 B |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-YUREJO | .NET Core 3.1 | 2048 | 6,157.1 ns | 2,516.98 ns | 137.96 ns | 1.00 | 0.00 | - | - | - | 24 B |
// | PixelOperations_Specialized | Job-YUREJO | .NET Core 3.1 | 2048 | 1,822.7 ns | 1,764.43 ns | 96.71 ns | 0.30 | 0.02 | 0.0172 | - | - | 72 B |
/*
BenchmarkDotNet v0.13.10, Windows 11 (10.0.22631.3085/23H2/2023Update/SunValley3)
11th Gen Intel Core i7-11370H 3.30GHz, 1 CPU, 8 logical and 4 physical cores
.NET SDK 8.0.200-preview.23624.5
[Host] : .NET 8.0.1 (8.0.123.58001), X64 RyuJIT AVX2
Job-NEHCEM : .NET 8.0.1 (8.0.123.58001), X64 RyuJIT AVX2
Runtime=.NET 8.0 Arguments=/p:DebugType=portable IterationCount=3
LaunchCount=1 WarmupCount=3
| Method | Count | Mean | Error | StdDev | Ratio | Gen0 | Allocated | Alloc Ratio |
|---------------------------- |------ |------------:|----------:|---------:|------:|-------:|----------:|------------:|
| PixelOperations_Base | 64 | 95.87 ns | 13.60 ns | 0.745 ns | 1.00 | - | - | NA |
| PixelOperations_Specialized | 64 | 97.34 ns | 30.34 ns | 1.663 ns | 1.02 | - | - | NA |
| | | | | | | | | |
| PixelOperations_Base | 256 | 337.80 ns | 88.10 ns | 4.829 ns | 1.00 | - | - | NA |
| PixelOperations_Specialized | 256 | 195.07 ns | 30.54 ns | 1.674 ns | 0.58 | 0.0153 | 96 B | NA |
| | | | | | | | | |
| PixelOperations_Base | 2048 | 2,561.79 ns | 162.45 ns | 8.905 ns | 1.00 | - | - | NA |
| PixelOperations_Specialized | 2048 | 741.85 ns | 18.05 ns | 0.989 ns | 0.29 | 0.0153 | 96 B | NA |
*/

5
tests/ImageSharp.Tests/Common/SimdUtilsTests.cs

@ -112,6 +112,7 @@ public partial class SimdUtilsTests
public static readonly TheoryData<int> ArraySizesDivisibleBy4 = new() { 0, 4, 8, 28, 1020 };
public static readonly TheoryData<int> ArraySizesDivisibleBy3 = new() { 0, 3, 9, 36, 957 };
public static readonly TheoryData<int> ArraySizesDivisibleBy32 = new() { 0, 32, 512 };
public static readonly TheoryData<int> ArraySizesDivisibleBy64 = new() { 0, 64, 512 };
public static readonly TheoryData<int> ArbitraryArraySizes = new() { 0, 1, 2, 3, 4, 7, 8, 9, 15, 16, 17, 63, 64, 255, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520 };
@ -199,7 +200,7 @@ public partial class SimdUtilsTests
}
[Theory]
[MemberData(nameof(ArraySizesDivisibleBy32))]
[MemberData(nameof(ArraySizesDivisibleBy64))]
public void HwIntrinsics_BulkConvertNormalizedFloatToByteClampOverflows(int count)
{
if (!Sse2.IsSupported)
@ -214,7 +215,7 @@ public partial class SimdUtilsTests
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
count,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2);
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX512BW | HwIntrinsics.DisableAVX2);
}
[Theory]

22
tests/ImageSharp.Tests/TestUtilities/BasicSerializer.cs

@ -13,14 +13,14 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities;
/// </summary>
internal class BasicSerializer : IXunitSerializationInfo
{
private readonly Dictionary<string, string> map = new Dictionary<string, string>();
private readonly Dictionary<string, string> map = [];
public const char Separator = ':';
private string DumpToString(Type type)
{
using var ms = new MemoryStream();
using var writer = new StreamWriter(ms);
using MemoryStream ms = new();
using StreamWriter writer = new(ms);
writer.WriteLine(type.FullName);
foreach (KeyValuePair<string, string> kv in this.map)
{
@ -29,16 +29,16 @@ internal class BasicSerializer : IXunitSerializationInfo
writer.Flush();
byte[] data = ms.ToArray();
return System.Convert.ToBase64String(data);
return Convert.ToBase64String(data);
}
private Type LoadDump(string dump)
{
byte[] data = System.Convert.FromBase64String(dump);
byte[] data = Convert.FromBase64String(dump);
using var ms = new MemoryStream(data);
using var reader = new StreamReader(ms);
var type = Type.GetType(reader.ReadLine());
using MemoryStream ms = new(data);
using StreamReader reader = new(ms);
Type type = Type.GetType(reader.ReadLine());
for (string s = reader.ReadLine(); s != null; s = reader.ReadLine())
{
string[] kv = s.Split(Separator);
@ -50,7 +50,7 @@ internal class BasicSerializer : IXunitSerializationInfo
public static string Serialize(IXunitSerializable serializable)
{
var serializer = new BasicSerializer();
BasicSerializer serializer = new();
serializable.Serialize(serializer);
return serializer.DumpToString(serializable.GetType());
}
@ -58,10 +58,10 @@ internal class BasicSerializer : IXunitSerializationInfo
public static T Deserialize<T>(string dump)
where T : IXunitSerializable
{
var serializer = new BasicSerializer();
BasicSerializer serializer = new();
Type type = serializer.LoadDump(dump);
var result = (T)Activator.CreateInstance(type);
T result = (T)Activator.CreateInstance(type);
result.Deserialize(serializer);
return result;
}

81
tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs

@ -2,6 +2,7 @@
// Licensed under the Six Labors Split License.
using System.Diagnostics;
using System.Globalization;
using Microsoft.DotNet.RemoteExecutor;
using Xunit.Abstractions;
@ -12,7 +13,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities;
/// </summary>
public static class FeatureTestRunner
{
private static readonly char[] SplitChars = { ',', ' ' };
private static readonly char[] SplitChars = [',', ' '];
/// <summary>
/// Allows the deserialization of parameters passed to the feature test.
@ -40,7 +41,7 @@ public static class FeatureTestRunner
/// <returns>The <typeparamref name="T"/> value.</returns>
public static T Deserialize<T>(string value)
where T : IConvertible
=> (T)Convert.ChangeType(value, typeof(T));
=> (T)Convert.ChangeType(value, typeof(T), CultureInfo.InvariantCulture);
/// <summary>
/// Runs the given test <paramref name="action"/> within an environment
@ -127,6 +128,7 @@ public static class FeatureTestRunner
/// Runs the given test <paramref name="action"/> within an environment
/// where the given <paramref name="intrinsics"/> features.
/// </summary>
/// <typeparam name="T">The type of argument.</typeparam>
/// <param name="action">The test action to run.</param>
/// <param name="intrinsics">The intrinsics features.</param>
/// <param name="serializable">The value to pass as a parameter to the test action.</param>
@ -170,6 +172,7 @@ public static class FeatureTestRunner
/// Runs the given test <paramref name="action"/> within an environment
/// where the given <paramref name="intrinsics"/> features.
/// </summary>
/// <typeparam name="T">The type of argument.</typeparam>
/// <param name="action">The test action to run.</param>
/// <param name="intrinsics">The intrinsics features.</param>
/// <param name="serializable">The value to pass as a parameter to the test action.</param>
@ -214,6 +217,8 @@ public static class FeatureTestRunner
/// Runs the given test <paramref name="action"/> within an environment
/// where the given <paramref name="intrinsics"/> features.
/// </summary>
/// <typeparam name="T">The type of argument.</typeparam>
/// <typeparam name="T2">The addition type of argument.</typeparam>
/// <param name="action">The test action to run.</param>
/// <param name="intrinsics">The intrinsics features.</param>
/// <param name="arg1">The value to pass as a parameter to the test action.</param>
@ -261,6 +266,7 @@ public static class FeatureTestRunner
/// Runs the given test <paramref name="action"/> within an environment
/// where the given <paramref name="intrinsics"/> features.
/// </summary>
/// <typeparam name="T">The type of argument.</typeparam>
/// <param name="action">The test action to run.</param>
/// <param name="intrinsics">The intrinsics features.</param>
/// <param name="arg1">The value to pass as a parameter to the test action.</param>
@ -307,6 +313,7 @@ public static class FeatureTestRunner
/// Runs the given test <paramref name="action"/> within an environment
/// where the given <paramref name="intrinsics"/> features.
/// </summary>
/// <typeparam name="T">The type of argument.</typeparam>
/// <param name="action">The test action to run.</param>
/// <param name="serializable">The value to pass as a parameter to the test action.</param>
/// <param name="intrinsics">The intrinsics features.</param>
@ -350,6 +357,7 @@ public static class FeatureTestRunner
/// Runs the given test <paramref name="action"/> within an environment
/// where the given <paramref name="intrinsics"/> features.
/// </summary>
/// <typeparam name="T">The type of argument.</typeparam>
/// <param name="action">The test action to run.</param>
/// <param name="arg0">The value to pass as a parameter #0 to the test action.</param>
/// <param name="arg1">The value to pass as a parameter #1 to the test action.</param>
@ -395,10 +403,10 @@ public static class FeatureTestRunner
internal static Dictionary<HwIntrinsics, string> ToFeatureKeyValueCollection(this HwIntrinsics intrinsics)
{
// Loop through and translate the given values into COMPlus equivalents
Dictionary<HwIntrinsics, string> features = new();
Dictionary<HwIntrinsics, string> features = [];
foreach (string intrinsic in intrinsics.ToString("G").Split(SplitChars, StringSplitOptions.RemoveEmptyEntries))
{
HwIntrinsics key = (HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic);
HwIntrinsics key = Enum.Parse<HwIntrinsics>(intrinsic);
switch (intrinsic)
{
case nameof(HwIntrinsics.AllowAll):
@ -418,40 +426,47 @@ public static class FeatureTestRunner
}
/// <summary>
/// See <see href="https://github.com/dotnet/runtime/blob/50ac454d8d8a1915188b2a4bb3fff3b81bf6c0cf/src/coreclr/src/jit/jitconfigvalues.h#L224"/>
/// <remarks>
/// <see cref="DisableSIMD"/> ends up impacting all SIMD support(including System.Numerics)
/// but not things like <see cref="DisableBMI1"/>, <see cref="DisableBMI2"/>, and <see cref="DisableLZCNT"/>.
/// </remarks>
/// See <see href="https://github.com/dotnet/runtime/blob/58601ba7da092fe82bb71d087d30df95472968b6/src/coreclr/jit/jitconfigvalues.h#L315"/>
/// </summary>
[Flags]
#pragma warning disable RCS1135 // Declare enum member with zero value (when enum has FlagsAttribute).
public enum HwIntrinsics
public enum HwIntrinsics : long
#pragma warning restore RCS1135 // Declare enum member with zero value (when enum has FlagsAttribute).
{
// Use flags so we can pass multiple values without using params.
// Don't base on 0 or use inverse for All as that doesn't translate to string values.
DisableHWIntrinsic = 1 << 0,
DisableSSE = 1 << 1,
DisableSSE2 = 1 << 2,
DisableAES = 1 << 3,
DisablePCLMULQDQ = 1 << 4,
DisableSSE3 = 1 << 5,
DisableSSSE3 = 1 << 6,
DisableSSE41 = 1 << 7,
DisableSSE42 = 1 << 8,
DisablePOPCNT = 1 << 9,
DisableAVX = 1 << 10,
DisableFMA = 1 << 11,
DisableAVX2 = 1 << 12,
DisableBMI1 = 1 << 13,
DisableBMI2 = 1 << 14,
DisableLZCNT = 1 << 15,
DisableArm64AdvSimd = 1 << 16,
DisableArm64Crc32 = 1 << 17,
DisableArm64Dp = 1 << 18,
DisableArm64Aes = 1 << 19,
DisableArm64Sha1 = 1 << 20,
DisableArm64Sha256 = 1 << 21,
AllowAll = 1 << 22
DisableHWIntrinsic = 1L << 0,
DisableSSE = 1L << 1,
DisableSSE2 = 1L << 2,
DisableAES = 1L << 3,
DisablePCLMULQDQ = 1L << 4,
DisableSSE3 = 1L << 5,
DisableSSSE3 = 1L << 6,
DisableSSE41 = 1L << 7,
DisableSSE42 = 1L << 8,
DisablePOPCNT = 1L << 9,
DisableAVX = 1L << 10,
DisableFMA = 1L << 11,
DisableAVX2 = 1L << 12,
DisableAVXVNNI = 1L << 13,
DisableAVX512BW = 1L << 14,
DisableAVX512BW_VL = 1L << 15,
DisableAVX512CD = 1L << 16,
DisableAVX512CD_VL = 1L << 17,
DisableAVX512DQ = 1L << 18,
DisableAVX512DQ_VL = 1L << 19,
DisableAVX512F = 1L << 20,
DisableAVX512F_VL = 1L << 21,
DisableAVX512VBMI = 1L << 22,
DisableAVX512VBMI_VL = 1L << 23,
DisableBMI1 = 1L << 24,
DisableBMI2 = 1L << 25,
DisableLZCNT = 1L << 26,
DisableArm64AdvSimd = 1L << 27,
DisableArm64Crc32 = 1L << 28,
DisableArm64Dp = 1L << 29,
DisableArm64Aes = 1L << 30,
DisableArm64Sha1 = 1L << 31,
DisableArm64Sha256 = 1L << 32,
AllowAll = 1L << 33
}

Loading…
Cancel
Save