Browse Source

Bulk conversion of arbitrary-sized Span-s of scalars

af/merge-core
Anton Firszov 8 years ago
parent
commit
81c57a812d
  1. 212
      src/ImageSharp/Common/Helpers/SimdUtils.BasicIntrinsics256.cs
  2. 90
      src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs
  3. 255
      src/ImageSharp/Common/Helpers/SimdUtils.cs
  4. 100
      src/ImageSharp/Common/Tuples/Octet.cs
  5. 2
      src/ImageSharp/Common/Tuples/Vector4Pair.cs
  6. 2
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimd.cs
  7. 2
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs
  8. 2
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs
  9. 12
      src/ImageSharp/PixelFormats/Rgba32.PixelOperations.cs
  10. 8
      tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4.cs
  11. 27
      tests/ImageSharp.Benchmarks/General/Vectorization/UInt32ToSingle.cs
  12. 7
      tests/ImageSharp.Benchmarks/General/Vectorization/WidenBytesToUInt32.cs
  13. 108
      tests/ImageSharp.Tests/Common/SimdUtilsTests.cs

212
src/ImageSharp/Common/Helpers/SimdUtils.BasicIntrinsics256.cs

@ -0,0 +1,212 @@
// Copyright (c) Six Labors and contributors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Tuples;
// ReSharper disable MemberHidesStaticFromOuterClass
namespace SixLabors.ImageSharp
{
internal static partial class SimdUtils
{
/// <summary>
/// 256bit / AVX2 intrinsics NOT depending on newer API-s (Vector.Widen, Vector.Narrow, Vector.ConvertTo*)
/// </summary>
public static class BasicIntrinsics256
{
public static bool IsAvailable { get; } = IsAvx2CompatibleArchitecture;
/// <summary>
/// <see cref="BulkConvertByteToNormalizedFloat"/> as much elements as possible, slicing them down (keeping the remainder).
/// </summary>
internal static void BulkConvertByteToNormalizedFloatReduce(
ref ReadOnlySpan<byte> source,
ref Span<float> dest)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same size!");
if (IsAvailable)
{
int remainder = source.Length % 8;
int alignedCount = source.Length - remainder;
if (alignedCount > 0)
{
BulkConvertByteToNormalizedFloat(
source.Slice(0, alignedCount),
dest.Slice(0, alignedCount));
source = source.Slice(alignedCount);
dest = dest.Slice(alignedCount);
}
}
}
/// <summary>
/// Convert 'source.Length' <see cref="float"/> values normalized into [0..1] from 'source'
/// into 'dest' buffer of <see cref="byte"/>. The values are scaled up into [0-255] and rounded.
/// The implementation is SIMD optimized and works only with `source.Length` divisible by 8/>.
/// Based on:
/// <see>
/// <cref>http://lolengine.net/blog/2011/3/20/understanding-fast-float-integer-conversions</cref>
/// </see>
/// </summary>
internal static void BulkConvertNormalizedFloatToByte(ReadOnlySpan<float> source, Span<byte> dest)
{
GuardAvx2(nameof(BulkConvertNormalizedFloatToByte));
DebugGuard.IsTrue((source.Length % Vector<float>.Count) == 0, nameof(source), "source.Length should be divisable by Vector<float>.Count!");
if (source.Length == 0)
{
return;
}
ref Vector<float> srcBase = ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(source));
ref Octet.OfByte destBase = ref Unsafe.As<byte, Octet.OfByte>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / 8;
Vector<float> magick = new Vector<float>(32768.0f);
Vector<float> scale = new Vector<float>(255f) / new Vector<float>(256f);
// need to copy to a temporary struct, because
// SimdUtils.Octet.OfUInt32 temp = Unsafe.As<Vector<float>, SimdUtils.Octet.OfUInt32>(ref x)
// does not work. TODO: This might be a CoreClr bug, need to ask/report
var temp = default(Octet.OfUInt32);
ref Vector<float> tempRef = ref Unsafe.As<Octet.OfUInt32, Vector<float>>(ref temp);
for (int i = 0; i < n; i++)
{
// union { float f; uint32_t i; } u;
// u.f = 32768.0f + x * (255.0f / 256.0f);
// return (uint8_t)u.i;
Vector<float> x = Unsafe.Add(ref srcBase, i);
x = (x * scale) + magick;
tempRef = x;
ref Octet.OfByte d = ref Unsafe.Add(ref destBase, i);
d.LoadFrom(ref temp);
}
}
/// <summary>
/// SIMD optimized implementation for <see cref="SimdUtils.BulkConvertByteToNormalizedFloat"/>.
/// Works only with `dest.Length` divisible by 8.
/// Implementation adapted from:
/// http://lolengine.net/blog/2011/3/20/understanding-fast-float-integer-conversions
/// http://stackoverflow.com/a/536278
/// </summary>
internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest)
{
GuardAvx2(nameof(BulkConvertByteToNormalizedFloat));
DebugGuard.IsTrue((dest.Length % 8) == 0, nameof(source), "dest.Length should be divisable by 8!");
var bVec = new Vector<float>(256.0f / 255.0f);
var magicFloat = new Vector<float>(32768.0f);
var magicInt = new Vector<uint>(1191182336); // reinterpreded value of 32768.0f
var mask = new Vector<uint>(255);
ref Octet.OfByte sourceBase = ref Unsafe.As<byte, Octet.OfByte>(ref MemoryMarshal.GetReference(source));
ref Octet.OfUInt32 destBaseAsWideOctet = ref Unsafe.As<float, Octet.OfUInt32>(ref MemoryMarshal.GetReference(dest));
ref Vector<float> destBaseAsFloat = ref Unsafe.As<Octet.OfUInt32, Vector<float>>(ref destBaseAsWideOctet);
int n = dest.Length / 8;
for (int i = 0; i < n; i++)
{
ref Octet.OfByte s = ref Unsafe.Add(ref sourceBase, i);
ref Octet.OfUInt32 d = ref Unsafe.Add(ref destBaseAsWideOctet, i);
d.LoadFrom(ref s);
}
for (int i = 0; i < n; i++)
{
ref Vector<float> df = ref Unsafe.Add(ref destBaseAsFloat, i);
var vi = Vector.AsVectorUInt32(df);
vi &= mask;
vi |= magicInt;
var vf = Vector.AsVectorSingle(vi);
vf = (vf - magicFloat) * bVec;
df = vf;
}
}
/// <summary>
/// <see cref="BulkConvertNormalizedFloatToByteClampOverflows"/> as much elements as possible, slicing them down (keeping the remainder).
/// </summary>
internal static void BulkConvertNormalizedFloatToByteClampOverflowsReduce(
ref ReadOnlySpan<float> source,
ref Span<byte> dest)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same size!");
if (IsAvailable)
{
int remainder = source.Length % Vector<byte>.Count;
int alignedCount = source.Length - remainder;
if (alignedCount > 0)
{
BulkConvertNormalizedFloatToByteClampOverflows(source.Slice(0, alignedCount), dest.Slice(0, alignedCount));
source = source.Slice(alignedCount);
dest = dest.Slice(alignedCount);
}
}
}
/// <summary>
/// Same as <see cref="BulkConvertNormalizedFloatToByte"/> but clamps overflown values before conversion.
/// </summary>
internal static void BulkConvertNormalizedFloatToByteClampOverflows(ReadOnlySpan<float> source, Span<byte> dest)
{
GuardAvx2(nameof(BulkConvertNormalizedFloatToByteClampOverflows));
DebugGuard.IsTrue((source.Length % 8) == 0, nameof(source), "source.Length should be divisible by 8!");
if (source.Length == 0)
{
return;
}
ref Vector<float> srcBase = ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(source));
ref Octet.OfByte destBase = ref Unsafe.As<byte, Octet.OfByte>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / 8;
Vector<float> magick = new Vector<float>(32768.0f);
Vector<float> scale = new Vector<float>(255f) / new Vector<float>(256f);
// need to copy to a temporary struct, because
// SimdUtils.Octet.OfUInt32 temp = Unsafe.As<Vector<float>, SimdUtils.Octet.OfUInt32>(ref x)
// does not work. TODO: This might be a CoreClr bug, need to ask/report
var temp = default(Octet.OfUInt32);
ref Vector<float> tempRef = ref Unsafe.As<Octet.OfUInt32, Vector<float>>(ref temp);
for (int i = 0; i < n; i++)
{
// union { float f; uint32_t i; } u;
// u.f = 32768.0f + x * (255.0f / 256.0f);
// return (uint8_t)u.i;
Vector<float> x = Unsafe.Add(ref srcBase, i);
x = Vector.Max(x, Vector<float>.Zero);
x = Vector.Min(x, Vector<float>.One);
x = (x * scale) + magick;
tempRef = x;
ref Octet.OfByte d = ref Unsafe.Add(ref destBase, i);
d.LoadFrom(ref temp);
}
}
}
}
}

90
src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs

@ -1,8 +1,10 @@
using System; using System;
using System.Diagnostics;
using System.Numerics; using System.Numerics;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
using System.Runtime.InteropServices; using System.Runtime.InteropServices;
// ReSharper disable MemberHidesStaticFromOuterClass
namespace SixLabors.ImageSharp namespace SixLabors.ImageSharp
{ {
internal static partial class SimdUtils internal static partial class SimdUtils
@ -18,22 +20,47 @@ namespace SixLabors.ImageSharp
{ {
public static bool IsAvailable { get; } = public static bool IsAvailable { get; } =
#if NETCOREAPP2_1 #if NETCOREAPP2_1
// TODO: Also available in .NET 4.7.2, we need to add a build target! // TODO: Also available in .NET 4.7.2, we need to add a build target!
true; Vector.IsHardwareAccelerated;
#else #else
false; false;
#endif #endif
/// <summary> /// <summary>
/// A variant of <see cref="SimdUtils.BulkConvertByteToNormalizedFloat"/>, which is faster on new .NET runtime. /// <see cref="BulkConvertByteToNormalizedFloat"/> as much elements as possible, slicing them down (keeping the remainder).
/// </summary>
[Conditional("NETCOREAPP2_1")]
internal static void BulkConvertByteToNormalizedFloatReduce(
ref ReadOnlySpan<byte> source,
ref Span<float> dest)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same size!");
if (IsAvailable)
{
int remainder = source.Length % Vector<byte>.Count;
int alignedCount = source.Length - remainder;
if (alignedCount > 0)
{
BulkConvertByteToNormalizedFloat(source.Slice(0, alignedCount), dest.Slice(0, alignedCount));
source = source.Slice(alignedCount);
dest = dest.Slice(alignedCount);
}
}
}
/// <summary>
/// A variant of <see cref="BasicIntrinsics256.BulkConvertByteToNormalizedFloat"/>, which is faster on new RyuJIT runtime.
/// </summary> /// </summary>
// ReSharper disable once MemberHidesStaticFromOuterClass // ReSharper disable once MemberHidesStaticFromOuterClass
internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest) internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest)
{ {
Guard.IsTrue( DebugGuard.IsTrue(
dest.Length % Vector<byte>.Count == 0, dest.Length % Vector<byte>.Count == 0,
nameof(source), nameof(source),
"dest.Length should be divisable by Vector<byte>.Count!"); "dest.Length should be divisible by Vector<byte>.Count!");
int n = dest.Length / Vector<byte>.Count; int n = dest.Length / Vector<byte>.Count;
@ -63,34 +90,52 @@ namespace SixLabors.ImageSharp
} }
} }
[MethodImpl(MethodImplOptions.AggressiveInlining)] /// <summary>
private static Vector<float> ConvertToSingle(Vector<uint> u, Vector<float> scale) /// <see cref="BulkConvertNormalizedFloatToByteClampOverflows"/> as much elements as possible, slicing them down (keeping the remainder).
/// </summary>
[Conditional("NETCOREAPP2_1")]
internal static void BulkConvertNormalizedFloatToByteClampOverflowsReduce(
ref ReadOnlySpan<float> source,
ref Span<byte> dest)
{ {
Vector<int> vi = Vector.AsVectorInt32(u); DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same size!");
Vector<float> v = Vector.ConvertToSingle(vi);
v *= scale; if (IsAvailable)
return v; {
int remainder = source.Length % Vector<byte>.Count;
int alignedCount = source.Length - remainder;
if (alignedCount > 0)
{
BulkConvertNormalizedFloatToByteClampOverflows(source.Slice(0, alignedCount), dest.Slice(0, alignedCount));
source = source.Slice(alignedCount);
dest = dest.Slice(alignedCount);
}
}
} }
/// <summary> /// <summary>
/// A variant of <see cref="SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows"/>, which is faster on new .NET runtime. /// A variant of <see cref="BasicIntrinsics256.BulkConvertNormalizedFloatToByteClampOverflows"/>, which is faster on new .NET runtime.
/// </summary> /// </summary>
/// <remarks> /// <remarks>
/// It does NOT worth yet to utilize this method (2018 Oct). /// It does NOT worth yet to utilize this method (2018 Oct).
/// See benchmark results for the "PackFromVector4_Rgba32" benchmark! /// See benchmark results for the "PackFromVector4_Rgba32" benchmark!
/// TODO: Check again later! /// TODO: Check again later!
/// </remarks> /// </remarks>
// ReSharper disable once MemberHidesStaticFromOuterClass internal static void BulkConvertNormalizedFloatToByteClampOverflows(
internal static void BulkConvertNormalizedFloatToByteClampOverflows(ReadOnlySpan<float> source, Span<byte> dest) ReadOnlySpan<float> source,
Span<byte> dest)
{ {
Guard.IsTrue( DebugGuard.IsTrue(
dest.Length % Vector<byte>.Count == 0, dest.Length % Vector<byte>.Count == 0,
nameof(dest), nameof(dest),
"dest.Length should be divisable by Vector<byte>.Count!"); "dest.Length should be divisible by Vector<byte>.Count!");
int n = dest.Length / Vector<byte>.Count; int n = dest.Length / Vector<byte>.Count;
ref Vector<float> sourceBase = ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(source)); ref Vector<float> sourceBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(source));
ref Vector<byte> destBase = ref Unsafe.As<byte, Vector<byte>>(ref MemoryMarshal.GetReference(dest)); ref Vector<byte> destBase = ref Unsafe.As<byte, Vector<byte>>(ref MemoryMarshal.GetReference(dest));
for (int i = 0; i < n; i++) for (int i = 0; i < n; i++)
@ -126,6 +171,15 @@ namespace SixLabors.ImageSharp
Vector<int> vi = Vector.ConvertToInt32(vf); Vector<int> vi = Vector.ConvertToInt32(vf);
return Vector.AsVectorUInt32(vi); return Vector.AsVectorUInt32(vi);
} }
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector<float> ConvertToSingle(Vector<uint> u, Vector<float> scale)
{
Vector<int> vi = Vector.AsVectorInt32(u);
Vector<float> v = Vector.ConvertToSingle(vi);
v *= scale;
return v;
}
} }
} }
} }

255
src/ImageSharp/Common/Helpers/SimdUtils.cs

@ -6,6 +6,9 @@ using System.Numerics;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
using System.Runtime.InteropServices; using System.Runtime.InteropServices;
using SixLabors.ImageSharp.PixelFormats;
using SixLabors.ImageSharp.Tuples;
namespace SixLabors.ImageSharp namespace SixLabors.ImageSharp
{ {
/// <summary> /// <summary>
@ -16,7 +19,8 @@ namespace SixLabors.ImageSharp
/// <summary> /// <summary>
/// Gets a value indicating whether the code is being executed on AVX2 CPU where both float and integer registers are of size 256 byte. /// Gets a value indicating whether the code is being executed on AVX2 CPU where both float and integer registers are of size 256 byte.
/// </summary> /// </summary>
public static bool IsAvx2CompatibleArchitecture { get; } = Vector.IsHardwareAccelerated && Vector<float>.Count == 8 && Vector<int>.Count == 8; public static bool IsAvx2CompatibleArchitecture { get; } =
Vector.IsHardwareAccelerated && Vector<float>.Count == 8 && Vector<int>.Count == 8;
internal static void GuardAvx2(string operation) internal static void GuardAvx2(string operation)
{ {
@ -57,236 +61,61 @@ namespace SixLabors.ImageSharp
} }
/// <summary> /// <summary>
/// Convert 'source.Length' <see cref="float"/> values normalized into [0..1] from 'source' into 'dest' buffer of <see cref="byte"/> values. /// Converts `dest.Length` <see cref="byte"/>-s to <see cref="float"/>-s normalized into [0..1].
/// The values are scaled up into [0-255] and rounded. /// <paramref name="source"/> should be the of the same size as <paramref name="dest"/>,
/// The implementation is SIMD optimized and works only with `source.Length` divisible by <see cref="Vector{UInt32}.Count"/>. /// but there are no restrictions on the span's length.
/// Based on:
/// <see>
/// <cref>http://lolengine.net/blog/2011/3/20/understanding-fast-float-integer-conversions</cref>
/// </see>
/// </summary>
internal static void BulkConvertNormalizedFloatToByte(ReadOnlySpan<float> source, Span<byte> dest)
{
GuardAvx2(nameof(BulkConvertNormalizedFloatToByte));
DebugGuard.IsTrue((source.Length % Vector<float>.Count) == 0, nameof(source), "source.Length should be divisable by Vector<float>.Count!");
if (source.Length == 0)
{
return;
}
ref Vector<float> srcBase = ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(source));
ref Octet.OfByte destBase = ref Unsafe.As<byte, Octet.OfByte>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / 8;
Vector<float> magick = new Vector<float>(32768.0f);
Vector<float> scale = new Vector<float>(255f) / new Vector<float>(256f);
// need to copy to a temporary struct, because
// SimdUtils.Octet.OfUInt32 temp = Unsafe.As<Vector<float>, SimdUtils.Octet.OfUInt32>(ref x)
// does not work. TODO: This might be a CoreClr bug, need to ask/report
var temp = default(Octet.OfUInt32);
ref Vector<float> tempRef = ref Unsafe.As<Octet.OfUInt32, Vector<float>>(ref temp);
for (int i = 0; i < n; i++)
{
// union { float f; uint32_t i; } u;
// u.f = 32768.0f + x * (255.0f / 256.0f);
// return (uint8_t)u.i;
Vector<float> x = Unsafe.Add(ref srcBase, i);
x = (x * scale) + magick;
tempRef = x;
ref Octet.OfByte d = ref Unsafe.Add(ref destBase, i);
d.LoadFrom(ref temp);
}
}
/// <summary>
/// Converts `dest.Length` bytes to <see cref="byte"/>-s to <see cref="float"/>-s normalized into [0..1]
/// The implementation is SIMD optimized and works only with `dest.Length` divisible by <see cref="Vector{UInt32}.Count"/>.
/// Implementation adapted from:
/// <see>
/// <cref>http://stackoverflow.com/a/5362789</cref>
/// </see>
/// </summary> /// </summary>
internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest) internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest)
{ {
GuardAvx2(nameof(BulkConvertByteToNormalizedFloat)); DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same size!");
DebugGuard.IsTrue((dest.Length % Vector<float>.Count) == 0, nameof(source), "dest.Length should be divisable by Vector<float>.Count!");
var bVec = new Vector<float>(256.0f / 255.0f);
var magicFloat = new Vector<float>(32768.0f);
var magicInt = new Vector<uint>(1191182336); // reinterpreded value of 32768.0f
var mask = new Vector<uint>(255);
ref Octet.OfByte sourceBase = ref Unsafe.As<byte, Octet.OfByte>(ref MemoryMarshal.GetReference(source)); ExtendedIntrinsics.BulkConvertByteToNormalizedFloatReduce(ref source, ref dest);
ref Octet.OfUInt32 destBaseAsWideOctet = ref Unsafe.As<float, Octet.OfUInt32>(ref MemoryMarshal.GetReference(dest)); BasicIntrinsics256.BulkConvertByteToNormalizedFloatReduce(ref source, ref dest);
ref Vector<float> destBaseAsFloat = ref Unsafe.As<Octet.OfUInt32, Vector<float>>(ref destBaseAsWideOctet);
int n = dest.Length / 8;
for (int i = 0; i < n; i++)
{
ref Octet.OfByte s = ref Unsafe.Add(ref sourceBase, i);
ref Octet.OfUInt32 d = ref Unsafe.Add(ref destBaseAsWideOctet, i);
d.LoadFrom(ref s);
}
for (int i = 0; i < n; i++) // Deal with the remainder:
int count = source.Length;
if (count > 0)
{ {
ref Vector<float> df = ref Unsafe.Add(ref destBaseAsFloat, i); // TODO: Do we need to optimize anything on this? (There are at most 7 remainders)
ref byte sBase = ref MemoryMarshal.GetReference(source);
var vi = Vector.AsVectorUInt32(df); ref float dBase = ref MemoryMarshal.GetReference(dest);
vi &= mask; for (int i = 0; i < count; i++)
vi |= magicInt; {
Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, i) / 255f;
var vf = Vector.AsVectorSingle(vi); }
vf = (vf - magicFloat) * bVec;
df = vf;
} }
} }
/// <summary> /// <summary>
/// Same as <see cref="BulkConvertNormalizedFloatToByte"/> but clamps overflown values before conversion. /// Convert 'source.Length' <see cref="float"/> values normalized into [0..1] from 'source' into 'dest' buffer of <see cref="byte"/>.
/// The values are scaled up into [0-255] and rounded, overflows are clamped.
/// <paramref name="source"/> should be the of the same size as <paramref name="dest"/>,
/// but there are no restrictions on the span's length.
/// </summary> /// </summary>
internal static void BulkConvertNormalizedFloatToByteClampOverflows(ReadOnlySpan<float> source, Span<byte> dest) internal static void BulkConvertNormalizedFloatToByteClampOverflows(ReadOnlySpan<float> source, Span<byte> dest)
{ {
GuardAvx2(nameof(BulkConvertNormalizedFloatToByteClampOverflows)); DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same size!");
DebugGuard.IsTrue((source.Length % Vector<float>.Count) == 0, nameof(source), "source.Length should be divisable by Vector<float>.Count!");
if (source.Length == 0)
{
return;
}
ref Vector<float> srcBase = ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(source));
ref Octet.OfByte destBase = ref Unsafe.As<byte, Octet.OfByte>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / 8;
Vector<float> magick = new Vector<float>(32768.0f);
Vector<float> scale = new Vector<float>(255f) / new Vector<float>(256f);
// need to copy to a temporary struct, because
// SimdUtils.Octet.OfUInt32 temp = Unsafe.As<Vector<float>, SimdUtils.Octet.OfUInt32>(ref x)
// does not work. TODO: This might be a CoreClr bug, need to ask/report
var temp = default(Octet.OfUInt32);
ref Vector<float> tempRef = ref Unsafe.As<Octet.OfUInt32, Vector<float>>(ref temp);
for (int i = 0; i < n; i++)
{
// union { float f; uint32_t i; } u;
// u.f = 32768.0f + x * (255.0f / 256.0f);
// return (uint8_t)u.i;
Vector<float> x = Unsafe.Add(ref srcBase, i);
x = Vector.Max(x, Vector<float>.Zero);
x = Vector.Min(x, Vector<float>.One);
x = (x * scale) + magick;
tempRef = x;
ref Octet.OfByte d = ref Unsafe.Add(ref destBase, i);
d.LoadFrom(ref temp);
}
}
// TODO: Replace these with T4-d library level tuples!
internal static class Octet
{
[StructLayout(LayoutKind.Explicit, Size = 8 * sizeof(uint))]
public struct OfUInt32
{
[FieldOffset(0 * sizeof(uint))]
public uint V0;
[FieldOffset(1 * sizeof(uint))]
public uint V1;
[FieldOffset(2 * sizeof(uint))]
public uint V2;
[FieldOffset(3 * sizeof(uint))]
public uint V3;
[FieldOffset(4 * sizeof(uint))]
public uint V4;
[FieldOffset(5 * sizeof(uint))]
public uint V5;
[FieldOffset(6 * sizeof(uint))] ExtendedIntrinsics.BulkConvertNormalizedFloatToByteClampOverflowsReduce(ref source, ref dest);
public uint V6; BasicIntrinsics256.BulkConvertNormalizedFloatToByteClampOverflowsReduce(ref source, ref dest);
[FieldOffset(7 * sizeof(uint))] // Deal with the remainder:
public uint V7; int count = source.Length;
if (count > 0)
public override string ToString()
{
return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]";
}
[MethodImpl(InliningOptions.ShortMethod)]
public void LoadFrom(ref OfByte src)
{
this.V0 = src.V0;
this.V1 = src.V1;
this.V2 = src.V2;
this.V3 = src.V3;
this.V4 = src.V4;
this.V5 = src.V5;
this.V6 = src.V6;
this.V7 = src.V7;
}
}
[StructLayout(LayoutKind.Explicit, Size = 8)]
public struct OfByte
{ {
[FieldOffset(0)] ref float sBase = ref MemoryMarshal.GetReference(source);
public byte V0; ref byte dBase = ref MemoryMarshal.GetReference(dest);
[FieldOffset(1)]
public byte V1;
[FieldOffset(2)]
public byte V2;
[FieldOffset(3)]
public byte V3;
[FieldOffset(4)]
public byte V4;
[FieldOffset(5)]
public byte V5;
[FieldOffset(6)]
public byte V6;
[FieldOffset(7)]
public byte V7;
public override string ToString()
{
return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]";
}
[MethodImpl(InliningOptions.ShortMethod)] for (int i = 0; i < count; i++)
public void LoadFrom(ref OfUInt32 src)
{ {
this.V0 = (byte)src.V0; // TODO: Do we need to optimize anything on this? (There are at most 7 remainders)
this.V1 = (byte)src.V1; float f = Unsafe.Add(ref sBase, i);
this.V2 = (byte)src.V2; f *= 255f;
this.V3 = (byte)src.V3; f += 0.5f;
this.V4 = (byte)src.V4; f = MathF.Max(0, f);
this.V5 = (byte)src.V5; f = MathF.Min(255f, f);
this.V6 = (byte)src.V6;
this.V7 = (byte)src.V7; Unsafe.Add(ref dBase, i) = (byte)f;
} }
} }
} }

100
src/ImageSharp/Common/Tuples/Octet.cs

@ -0,0 +1,100 @@
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace SixLabors.ImageSharp.Tuples
{
internal static class Octet
{
[StructLayout(LayoutKind.Explicit, Size = 8 * sizeof(uint))]
public struct OfUInt32
{
[FieldOffset(0 * sizeof(uint))]
public uint V0;
[FieldOffset(1 * sizeof(uint))]
public uint V1;
[FieldOffset(2 * sizeof(uint))]
public uint V2;
[FieldOffset(3 * sizeof(uint))]
public uint V3;
[FieldOffset(4 * sizeof(uint))]
public uint V4;
[FieldOffset(5 * sizeof(uint))]
public uint V5;
[FieldOffset(6 * sizeof(uint))]
public uint V6;
[FieldOffset(7 * sizeof(uint))]
public uint V7;
public override string ToString()
{
return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]";
}
[MethodImpl(InliningOptions.ShortMethod)]
public void LoadFrom(ref OfByte src)
{
this.V0 = src.V0;
this.V1 = src.V1;
this.V2 = src.V2;
this.V3 = src.V3;
this.V4 = src.V4;
this.V5 = src.V5;
this.V6 = src.V6;
this.V7 = src.V7;
}
}
[StructLayout(LayoutKind.Explicit, Size = 8)]
public struct OfByte
{
[FieldOffset(0)]
public byte V0;
[FieldOffset(1)]
public byte V1;
[FieldOffset(2)]
public byte V2;
[FieldOffset(3)]
public byte V3;
[FieldOffset(4)]
public byte V4;
[FieldOffset(5)]
public byte V5;
[FieldOffset(6)]
public byte V6;
[FieldOffset(7)]
public byte V7;
public override string ToString()
{
return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]";
}
[MethodImpl(InliningOptions.ShortMethod)]
public void LoadFrom(ref OfUInt32 src)
{
this.V0 = (byte)src.V0;
this.V1 = (byte)src.V1;
this.V2 = (byte)src.V2;
this.V3 = (byte)src.V3;
this.V4 = (byte)src.V4;
this.V5 = (byte)src.V5;
this.V6 = (byte)src.V6;
this.V7 = (byte)src.V7;
}
}
}
}

2
src/ImageSharp/Common/Tuples/Vector4Pair.cs

@ -2,7 +2,7 @@
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
using System.Runtime.InteropServices; using System.Runtime.InteropServices;
namespace SixLabors.ImageSharp.Common.Tuples namespace SixLabors.ImageSharp.Tuples
{ {
/// <summary> /// <summary>
/// Its faster to process multiple Vector4-s together, so let's pair them! /// Its faster to process multiple Vector4-s together, so let's pair them!

2
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimd.cs

@ -6,7 +6,7 @@ using System.Numerics;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
using System.Runtime.InteropServices; using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Common.Tuples; using SixLabors.ImageSharp.Tuples;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{ {

2
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs

@ -6,7 +6,7 @@ using System.Numerics;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
using System.Runtime.InteropServices; using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Common.Tuples; using SixLabors.ImageSharp.Tuples;
// ReSharper disable ImpureMethodCallOnReadonlyValueField // ReSharper disable ImpureMethodCallOnReadonlyValueField
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters

2
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs

@ -6,8 +6,8 @@ using System.Collections.Generic;
using System.Linq; using System.Linq;
using System.Numerics; using System.Numerics;
using SixLabors.ImageSharp.Common.Tuples;
using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.Tuples;
using SixLabors.Memory; using SixLabors.Memory;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters

12
src/ImageSharp/PixelFormats/Rgba32.PixelOperations.cs

@ -37,7 +37,7 @@ namespace SixLabors.ImageSharp.PixelFormats
} }
else else
{ {
ConvertToVector4UsingStandardIntrinsics(sourceColors, destinationVectors, count); ConvertToVector4UsingBasicIntrinsics(sourceColors, destinationVectors, count);
} }
} }
@ -58,7 +58,7 @@ namespace SixLabors.ImageSharp.PixelFormats
} }
else else
{ {
ConvertFromVector4StandardIntrinsics(sourceVectors, destinationColors, count); ConvertFromVector4BasicIntrinsics(sourceVectors, destinationColors, count);
} }
} }
@ -112,7 +112,7 @@ namespace SixLabors.ImageSharp.PixelFormats
} }
} }
private static void ConvertToVector4UsingStandardIntrinsics( private static void ConvertToVector4UsingBasicIntrinsics(
ReadOnlySpan<Rgba32> sourceColors, ReadOnlySpan<Rgba32> sourceColors,
Span<Vector4> destinationVectors, Span<Vector4> destinationVectors,
int count) int count)
@ -125,7 +125,7 @@ namespace SixLabors.ImageSharp.PixelFormats
ReadOnlySpan<byte> rawSrc = MemoryMarshal.Cast<Rgba32, byte>(sourceColors); ReadOnlySpan<byte> rawSrc = MemoryMarshal.Cast<Rgba32, byte>(sourceColors);
Span<float> rawDest = MemoryMarshal.Cast<Vector4, float>(destinationVectors.Slice(0, alignedCount)); Span<float> rawDest = MemoryMarshal.Cast<Vector4, float>(destinationVectors.Slice(0, alignedCount));
SimdUtils.BulkConvertByteToNormalizedFloat(rawSrc, rawDest); SimdUtils.BasicIntrinsics256.BulkConvertByteToNormalizedFloat(rawSrc, rawDest);
} }
if (remainder > 0) if (remainder > 0)
@ -155,7 +155,7 @@ namespace SixLabors.ImageSharp.PixelFormats
} }
} }
private static void ConvertFromVector4StandardIntrinsics(ReadOnlySpan<Vector4> sourceVectors, Span<Rgba32> destinationColors, int count) private static void ConvertFromVector4BasicIntrinsics(ReadOnlySpan<Vector4> sourceVectors, Span<Rgba32> destinationColors, int count)
{ {
int remainder = count % 2; int remainder = count % 2;
int alignedCount = count - remainder; int alignedCount = count - remainder;
@ -165,7 +165,7 @@ namespace SixLabors.ImageSharp.PixelFormats
ReadOnlySpan<float> rawSrc = MemoryMarshal.Cast<Vector4, float>(sourceVectors.Slice(0, alignedCount)); ReadOnlySpan<float> rawSrc = MemoryMarshal.Cast<Vector4, float>(sourceVectors.Slice(0, alignedCount));
Span<byte> rawDest = MemoryMarshal.Cast<Rgba32, byte>(destinationColors); Span<byte> rawDest = MemoryMarshal.Cast<Rgba32, byte>(destinationColors);
SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows(rawSrc, rawDest); SimdUtils.BasicIntrinsics256.BulkConvertNormalizedFloatToByteClampOverflows(rawSrc, rawDest);
} }
if (remainder > 0) if (remainder > 0)

8
tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4.cs

@ -30,8 +30,8 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
[Params( [Params(
//64, //64,
//256, //256,
//512, 512
2048 //1024
)] )]
public int Count { get; set; } public int Count { get; set; }
@ -117,7 +117,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
SimdUtils.ExtendedIntrinsics.BulkConvertByteToNormalizedFloat(sBytes, dFloats); SimdUtils.ExtendedIntrinsics.BulkConvertByteToNormalizedFloat(sBytes, dFloats);
} }
//[Benchmark] [Benchmark]
public void ExtendedIntrinsics_BulkConvertByteToNormalizedFloat_2Loops() public void ExtendedIntrinsics_BulkConvertByteToNormalizedFloat_2Loops()
{ {
Span<byte> sBytes = MemoryMarshal.Cast<Rgba32, byte>(this.source.GetSpan()); Span<byte> sBytes = MemoryMarshal.Cast<Rgba32, byte>(this.source.GetSpan());
@ -159,7 +159,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
} }
} }
//[Benchmark] [Benchmark]
public void ExtendedIntrinsics_BulkConvertByteToNormalizedFloat_ConvertInSameLoop() public void ExtendedIntrinsics_BulkConvertByteToNormalizedFloat_ConvertInSameLoop()
{ {
Span<byte> sBytes = MemoryMarshal.Cast<Rgba32, byte>(this.source.GetSpan()); Span<byte> sBytes = MemoryMarshal.Cast<Rgba32, byte>(this.source.GetSpan());

27
tests/ImageSharp.Benchmarks/General/Vectorization/UInt32ToSingle.cs

@ -5,6 +5,7 @@ using BenchmarkDotNet.Attributes;
namespace SixLabors.ImageSharp.Benchmarks.General.Vectorization namespace SixLabors.ImageSharp.Benchmarks.General.Vectorization
{ {
[Config(typeof(Config.ShortClr))]
public class UInt32ToSingle public class UInt32ToSingle
{ {
private float[] data; private float[] data;
@ -66,8 +67,7 @@ namespace SixLabors.ImageSharp.Benchmarks.General.Vectorization
Unsafe.Add(ref bf, i) = v; Unsafe.Add(ref bf, i) = v;
} }
} }
// This code is not correct at all, it's just here as reference
[Benchmark] [Benchmark]
public void StandardSimdFromInt() public void StandardSimdFromInt()
{ {
@ -86,5 +86,28 @@ namespace SixLabors.ImageSharp.Benchmarks.General.Vectorization
Unsafe.Add(ref bf, i) = v; Unsafe.Add(ref bf, i) = v;
} }
} }
[Benchmark]
public void StandardSimdFromInt_RefCast()
{
int n = Count / Vector<float>.Count;
ref Vector<float> bf = ref Unsafe.As<float, Vector<float>>(ref this.data[0]);
ref Vector<int> bu = ref Unsafe.As<Vector<float>, Vector<int>>(ref bf);
var scale = new Vector<float>(1f / 255f);
for (int i = 0; i < n; i++)
{
ref Vector<float> fRef = ref Unsafe.Add(ref bf, i);
Vector<int> du = Vector.AsVectorInt32(fRef);
Vector<float> v = Vector.ConvertToSingle(du);
v *= scale;
fRef = v;
}
}
} }
} }

7
tests/ImageSharp.Benchmarks/General/Vectorization/WidenBytesToUInt32.cs

@ -3,8 +3,11 @@ using System.Runtime.CompilerServices;
using BenchmarkDotNet.Attributes; using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.Tuples;
namespace SixLabors.ImageSharp.Benchmarks.General.Vectorization namespace SixLabors.ImageSharp.Benchmarks.General.Vectorization
{ {
[Config(typeof(Config.ShortClr))]
public class WidenBytesToUInt32 public class WidenBytesToUInt32
{ {
private byte[] source; private byte[] source;
@ -25,8 +28,8 @@ namespace SixLabors.ImageSharp.Benchmarks.General.Vectorization
{ {
const int N = Count / 8; const int N = Count / 8;
ref SimdUtils.Octet.OfByte sBase = ref Unsafe.As<byte, SimdUtils.Octet.OfByte>(ref this.source[0]); ref Octet.OfByte sBase = ref Unsafe.As<byte, Octet.OfByte>(ref this.source[0]);
ref SimdUtils.Octet.OfUInt32 dBase = ref Unsafe.As<uint, SimdUtils.Octet.OfUInt32>(ref this.dest[0]); ref Octet.OfUInt32 dBase = ref Unsafe.As<uint, Octet.OfUInt32>(ref this.dest[0]);
for (int i = 0; i < N; i++) for (int i = 0; i < N; i++)
{ {

108
tests/ImageSharp.Tests/Common/SimdUtilsTests.cs

@ -62,7 +62,7 @@ namespace SixLabors.ImageSharp.Tests.Common
{ {
float[] data = new float[Vector<float>.Count]; float[] data = new float[Vector<float>.Count];
var rnd = new Random(); var rnd = new Random(seed);
for (int i = 0; i < Vector<float>.Count; i++) for (int i = 0; i < Vector<float>.Count; i++)
{ {
@ -118,7 +118,7 @@ namespace SixLabors.ImageSharp.Tests.Common
[InlineData(1, 8)] [InlineData(1, 8)]
[InlineData(2, 16)] [InlineData(2, 16)]
[InlineData(3, 128)] [InlineData(3, 128)]
public void BulkConvertNormalizedFloatToByte_WithRoundedData(int seed, int count) public void BasicIntrinsics_BulkConvertNormalizedFloatToByte_WithRoundedData(int seed, int count)
{ {
if (this.SkipOnNonAvx2()) if (this.SkipOnNonAvx2())
{ {
@ -130,7 +130,7 @@ namespace SixLabors.ImageSharp.Tests.Common
byte[] dest = new byte[count]; byte[] dest = new byte[count];
SimdUtils.BulkConvertNormalizedFloatToByte(normalized, dest); SimdUtils.BasicIntrinsics256.BulkConvertNormalizedFloatToByte(normalized, dest);
byte[] expected = orig.Select(f => (byte)(f)).ToArray(); byte[] expected = orig.Select(f => (byte)(f)).ToArray();
@ -142,7 +142,7 @@ namespace SixLabors.ImageSharp.Tests.Common
[InlineData(1, 8)] [InlineData(1, 8)]
[InlineData(2, 16)] [InlineData(2, 16)]
[InlineData(3, 128)] [InlineData(3, 128)]
public void BulkConvertNormalizedFloatToByte_WithNonRoundedData(int seed, int count) public void BasicIntrinsics_BulkConvertNormalizedFloatToByte_WithNonRoundedData(int seed, int count)
{ {
if (this.SkipOnNonAvx2()) if (this.SkipOnNonAvx2())
{ {
@ -153,87 +153,113 @@ namespace SixLabors.ImageSharp.Tests.Common
byte[] dest = new byte[count]; byte[] dest = new byte[count];
SimdUtils.BulkConvertNormalizedFloatToByte(source, dest); SimdUtils.BasicIntrinsics256.BulkConvertNormalizedFloatToByte(source, dest);
byte[] expected = source.Select(f => (byte)Math.Round(f * 255f)).ToArray(); byte[] expected = source.Select(f => (byte)Math.Round(f * 255f)).ToArray();
Assert.Equal(expected, dest); Assert.Equal(expected, dest);
} }
public static readonly TheoryData<int> ArraySizesDivisibleBy8 = new TheoryData<int> { 0, 8, 16, 1024 };
public static readonly TheoryData<int> ArraySizesDivisibleBy32 = new TheoryData<int> { 0, 32, 512 };
public static readonly TheoryData<int> ArbitraryArraySizes =
new TheoryData<int>
{
0, 1, 2, 3, 4, 7, 8, 9, 15, 16, 17, 63, 64, 255, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 520,
};
[Theory] [Theory]
[InlineData(1, 0)] [MemberData(nameof(ArraySizesDivisibleBy8))]
[InlineData(2, 32)] public void BasicIntrinsics_BulkConvertByteToNormalizedFloat(int count)
[InlineData(3, 128)]
public void BulkConvertByteToNormalizedFloat(int seed, int count)
{ {
if (this.SkipOnNonAvx2()) if (this.SkipOnNonAvx2())
{ {
return; return;
} }
byte[] source = new Random(seed).GenerateRandomByteArray(count); TestImpl_BulkConvertByteToNormalizedFloat(
float[] result = new float[count]; count,
float[] expected = source.Select(b => (float)b / 255f).ToArray(); (s, d) => SimdUtils.BasicIntrinsics256.BulkConvertByteToNormalizedFloat(s.Span, d.Span));
SimdUtils.BulkConvertByteToNormalizedFloat(source, result);
Assert.Equal(expected, result, new ApproximateFloatComparer(1e-5f));
} }
[Theory] [Theory]
[InlineData(1, 0)] [MemberData(nameof(ArraySizesDivisibleBy32))]
[InlineData(2, 32)] public void ExtendedIntrinsics_BulkConvertByteToNormalizedFloat(int count)
[InlineData(3, 128)] {
public void ExtendedIntrinsics_BulkConvertByteToNormalizedFloat(int seed, int count) TestImpl_BulkConvertByteToNormalizedFloat(
count,
(s, d) => SimdUtils.ExtendedIntrinsics.BulkConvertByteToNormalizedFloat(s.Span, d.Span));
}
[Theory]
[MemberData(nameof(ArbitraryArraySizes))]
public void BulkConvertByteToNormalizedFloat(int count)
{
TestImpl_BulkConvertByteToNormalizedFloat(
count,
(s, d) => SimdUtils.BulkConvertByteToNormalizedFloat(s.Span, d.Span));
}
private static void TestImpl_BulkConvertByteToNormalizedFloat(
int count,
Action<Memory<byte>, Memory<float>> convert)
{ {
byte[] source = new Random(seed).GenerateRandomByteArray(count); byte[] source = new Random(count).GenerateRandomByteArray(count);
float[] result = new float[count]; float[] result = new float[count];
float[] expected = source.Select(b => (float)b / 255f).ToArray(); float[] expected = source.Select(b => (float)b / 255f).ToArray();
convert(source, result);
SimdUtils.ExtendedIntrinsics.BulkConvertByteToNormalizedFloat(source, result);
Assert.Equal(expected, result, new ApproximateFloatComparer(1e-5f)); Assert.Equal(expected, result, new ApproximateFloatComparer(1e-5f));
} }
public static readonly TheoryData<int> BulkConvertNormalizedFloatToByteClampOverflows_Data =
new TheoryData<int>
{
0, 64, 1024
};
[Theory] [Theory]
[MemberData(nameof(BulkConvertNormalizedFloatToByteClampOverflows_Data))] [MemberData(nameof(ArraySizesDivisibleBy8))]
public void BulkConvertNormalizedFloatToByteClampOverflows(int count) public void BasicIntrinsics_BulkConvertNormalizedFloatToByteClampOverflows(int count)
{ {
if (this.SkipOnNonAvx2()) if (this.SkipOnNonAvx2())
{ {
return; return;
} }
float[] source = new Random(count).GenerateRandomFloatArray(count, -0.1f, 1.2f); TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(count,
byte[] expected = source.Select(NormalizedFloatToByte).ToArray(); (s, d) => SimdUtils.BasicIntrinsics256.BulkConvertNormalizedFloatToByteClampOverflows(s.Span, d.Span)
byte[] actual = new byte[count]; );
SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows(source, actual);
Assert.Equal(expected, actual);
} }
[Theory] [Theory]
[MemberData(nameof(BulkConvertNormalizedFloatToByteClampOverflows_Data))] [MemberData(nameof(ArraySizesDivisibleBy32))]
public void ExtendedIntrinsics_BulkConvertNormalizedFloatToByteClampOverflows(int count) public void ExtendedIntrinsics_BulkConvertNormalizedFloatToByteClampOverflows(int count)
{
TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(count,
(s, d) => SimdUtils.ExtendedIntrinsics.BulkConvertNormalizedFloatToByteClampOverflows(s.Span, d.Span)
);
}
[Theory]
[MemberData(nameof(ArbitraryArraySizes))]
public void BulkConvertNormalizedFloatToByteClampOverflows(int count)
{
TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(count,
(s, d) => SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows(s.Span, d.Span)
);
}
private static void TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(
int count,
Action<Memory<float>, Memory<byte>> convert)
{ {
float[] source = new Random(count).GenerateRandomFloatArray(count, -0.1f, 1.2f); float[] source = new Random(count).GenerateRandomFloatArray(count, -0.1f, 1.2f);
byte[] expected = source.Select(NormalizedFloatToByte).ToArray(); byte[] expected = source.Select(NormalizedFloatToByte).ToArray();
byte[] actual = new byte[count]; byte[] actual = new byte[count];
SimdUtils.ExtendedIntrinsics.BulkConvertNormalizedFloatToByteClampOverflows(source, actual); convert(source, actual);
Assert.Equal(expected, actual); Assert.Equal(expected, actual);
} }
private static byte NormalizedFloatToByte(float f) => (byte)Math.Min(255f, Math.Max(0f, f * 255f + 0.5f)); private static byte NormalizedFloatToByte(float f) => (byte)Math.Min(255f, Math.Max(0f, f * 255f + 0.5f));
[Theory] [Theory]

Loading…
Cancel
Save