Browse Source

Merge pull request #742 from SixLabors/af/simd-conversion

Clean up and optimize byte<->float and Rgba32 <-> Vector4 conversion
pull/746/head
Anton Firsov 7 years ago
committed by GitHub
parent
commit
6f5ebbbd6f
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 232
      src/ImageSharp/Common/Extensions/SimdUtils.cs
  2. 40
      src/ImageSharp/Common/Helpers/ImageMaths.cs
  3. 215
      src/ImageSharp/Common/Helpers/SimdUtils.BasicIntrinsics256.cs
  4. 178
      src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs
  5. 151
      src/ImageSharp/Common/Helpers/SimdUtils.FallbackIntrinsics128.cs
  6. 185
      src/ImageSharp/Common/Helpers/SimdUtils.cs
  7. 109
      src/ImageSharp/Common/Tuples/Octet.cs
  8. 12
      src/ImageSharp/Common/Tuples/Vector4Pair.cs
  9. 4
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimd.cs
  10. 4
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs
  11. 6
      src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs
  12. 144
      src/ImageSharp/PixelFormats/Rgba32.PixelOperations.cs
  13. 68
      tests/ImageSharp.Benchmarks/Color/Bulk/PackFromVector4.cs
  14. 166
      tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4.cs
  15. 8
      tests/ImageSharp.Benchmarks/General/BasicMath/Abs.cs
  16. 70
      tests/ImageSharp.Benchmarks/General/BasicMath/ClampFloat.cs
  17. 12
      tests/ImageSharp.Benchmarks/General/BasicMath/ClampInt32IntoByte.cs
  18. 23
      tests/ImageSharp.Benchmarks/General/BasicMath/ModuloPowerOfTwoConstant.cs
  19. 32
      tests/ImageSharp.Benchmarks/General/BasicMath/ModuloPowerOfTwoVariable.cs
  20. 3
      tests/ImageSharp.Benchmarks/General/BasicMath/Pow.cs
  21. 19
      tests/ImageSharp.Benchmarks/General/Modulus.cs
  22. 113
      tests/ImageSharp.Benchmarks/General/Vectorization/UInt32ToSingle.cs
  23. 64
      tests/ImageSharp.Benchmarks/General/Vectorization/WidenBytesToUInt32.cs
  24. 146
      tests/ImageSharp.Tests/Common/SimdUtilsTests.cs
  25. 68
      tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs
  26. 1340
      tests/ImageSharp.Tests/PixelFormats/PixelOperationsTests.cs
  27. 16
      tests/ImageSharp.Tests/TestUtilities/TestDataGenerator.cs
  28. 2
      tests/ImageSharp.Tests/TestUtilities/Tests/TestEnvironmentTests.cs

232
src/ImageSharp/Common/Extensions/SimdUtils.cs

@ -1,232 +0,0 @@
// Copyright (c) Six Labors and contributors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace SixLabors.ImageSharp
{
/// <summary>
/// Various extension and utility methods for <see cref="Vector4"/> and <see cref="Vector{T}"/> utilizing SIMD capabilities
/// </summary>
internal static class SimdUtils
{
/// <summary>
/// Gets a value indicating whether the code is being executed on AVX2 CPU where both float and integer registers are of size 256 byte.
/// </summary>
public static bool IsAvx2CompatibleArchitecture => Vector<float>.Count == 8 && Vector<int>.Count == 8;
internal static void GuardAvx2(string operation)
{
if (!IsAvx2CompatibleArchitecture)
{
throw new NotSupportedException($"{operation} is supported only on AVX2 CPU!");
}
}
/// <summary>
/// Transform all scalars in 'v' in a way that converting them to <see cref="int"/> would have rounding semantics.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static Vector4 PseudoRound(this Vector4 v)
{
var sign = Vector4.Clamp(v, new Vector4(-1), new Vector4(1));
return v + (sign * 0.5f);
}
/// <summary>
/// Rounds all values in 'v' to the nearest integer following <see cref="MidpointRounding.ToEven"/> semantics.
/// Source:
/// <see>
/// <cref>https://github.com/g-truc/glm/blob/master/glm/simd/common.h#L110</cref>
/// </see>
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static Vector<float> FastRound(this Vector<float> x)
{
Vector<int> magic0 = new Vector<int>(int.MinValue); // 0x80000000
Vector<float> sgn0 = Vector.AsVectorSingle(magic0);
Vector<float> and0 = Vector.BitwiseAnd(sgn0, x);
Vector<float> or0 = Vector.BitwiseOr(and0, new Vector<float>(8388608.0f));
Vector<float> add0 = Vector.Add(x, or0);
Vector<float> sub0 = Vector.Subtract(add0, or0);
return sub0;
}
/// <summary>
/// Convert 'source.Length' <see cref="float"/> values normalized into [0..1] from 'source' into 'dest' buffer of <see cref="byte"/> values.
/// The values gonna be scaled up into [0-255] and rounded.
/// Based on:
/// <see>
/// <cref>http://lolengine.net/blog/2011/3/20/understanding-fast-float-integer-conversions</cref>
/// </see>
/// </summary>
internal static void BulkConvertNormalizedFloatToByte(ReadOnlySpan<float> source, Span<byte> dest)
{
GuardAvx2(nameof(BulkConvertNormalizedFloatToByte));
DebugGuard.IsTrue((source.Length % Vector<float>.Count) == 0, nameof(source), "source.Length should be divisable by Vector<float>.Count!");
if (source.Length == 0)
{
return;
}
ref Vector<float> srcBase = ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(source));
ref Octet.OfByte destBase = ref Unsafe.As<byte, Octet.OfByte>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / 8;
Vector<float> magick = new Vector<float>(32768.0f);
Vector<float> scale = new Vector<float>(255f) / new Vector<float>(256f);
// need to copy to a temporary struct, because
// SimdUtils.Octet.OfUInt32 temp = Unsafe.As<Vector<float>, SimdUtils.Octet.OfUInt32>(ref x)
// does not work. TODO: This might be a CoreClr bug, need to ask/report
var temp = default(Octet.OfUInt32);
ref Vector<float> tempRef = ref Unsafe.As<Octet.OfUInt32, Vector<float>>(ref temp);
for (int i = 0; i < n; i++)
{
// union { float f; uint32_t i; } u;
// u.f = 32768.0f + x * (255.0f / 256.0f);
// return (uint8_t)u.i;
Vector<float> x = Unsafe.Add(ref srcBase, i);
x = (x * scale) + magick;
tempRef = x;
ref Octet.OfByte d = ref Unsafe.Add(ref destBase, i);
d.LoadFrom(ref temp);
}
}
/// <summary>
/// Same as <see cref="BulkConvertNormalizedFloatToByte"/> but clamps overflown values before conversion.
/// </summary>
internal static void BulkConvertNormalizedFloatToByteClampOverflows(ReadOnlySpan<float> source, Span<byte> dest)
{
GuardAvx2(nameof(BulkConvertNormalizedFloatToByte));
DebugGuard.IsTrue((source.Length % Vector<float>.Count) == 0, nameof(source), "source.Length should be divisable by Vector<float>.Count!");
if (source.Length == 0)
{
return;
}
ref Vector<float> srcBase = ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(source));
ref Octet.OfByte destBase = ref Unsafe.As<byte, Octet.OfByte>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / 8;
Vector<float> magick = new Vector<float>(32768.0f);
Vector<float> scale = new Vector<float>(255f) / new Vector<float>(256f);
// need to copy to a temporary struct, because
// SimdUtils.Octet.OfUInt32 temp = Unsafe.As<Vector<float>, SimdUtils.Octet.OfUInt32>(ref x)
// does not work. TODO: This might be a CoreClr bug, need to ask/report
var temp = default(Octet.OfUInt32);
ref Vector<float> tempRef = ref Unsafe.As<Octet.OfUInt32, Vector<float>>(ref temp);
for (int i = 0; i < n; i++)
{
// union { float f; uint32_t i; } u;
// u.f = 32768.0f + x * (255.0f / 256.0f);
// return (uint8_t)u.i;
Vector<float> x = Unsafe.Add(ref srcBase, i);
x = Vector.Max(x, Vector<float>.Zero);
x = Vector.Min(x, Vector<float>.One);
x = (x * scale) + magick;
tempRef = x;
ref Octet.OfByte d = ref Unsafe.Add(ref destBase, i);
d.LoadFrom(ref temp);
}
}
// TODO: Replace these with T4-d library level tuples!
internal static class Octet
{
[StructLayout(LayoutKind.Explicit, Size = 8 * sizeof(uint))]
public struct OfUInt32
{
[FieldOffset(0 * sizeof(uint))]
public uint V0;
[FieldOffset(1 * sizeof(uint))]
public uint V1;
[FieldOffset(2 * sizeof(uint))]
public uint V2;
[FieldOffset(3 * sizeof(uint))]
public uint V3;
[FieldOffset(4 * sizeof(uint))]
public uint V4;
[FieldOffset(5 * sizeof(uint))]
public uint V5;
[FieldOffset(6 * sizeof(uint))]
public uint V6;
[FieldOffset(7 * sizeof(uint))]
public uint V7;
public override string ToString()
{
return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]";
}
}
[StructLayout(LayoutKind.Explicit, Size = 8)]
public struct OfByte
{
[FieldOffset(0)]
public byte V0;
[FieldOffset(1)]
public byte V1;
[FieldOffset(2)]
public byte V2;
[FieldOffset(3)]
public byte V3;
[FieldOffset(4)]
public byte V4;
[FieldOffset(5)]
public byte V5;
[FieldOffset(6)]
public byte V6;
[FieldOffset(7)]
public byte V7;
public override string ToString()
{
return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]";
}
public void LoadFrom(ref OfUInt32 i)
{
this.V0 = (byte)i.V0;
this.V1 = (byte)i.V1;
this.V2 = (byte)i.V2;
this.V3 = (byte)i.V3;
this.V4 = (byte)i.V4;
this.V5 = (byte)i.V5;
this.V6 = (byte)i.V6;
this.V7 = (byte)i.V7;
}
}
}
}
}

40
src/ImageSharp/Common/Helpers/ImageMaths.cs

@ -39,6 +39,28 @@ namespace SixLabors.ImageSharp
return (a / GreatestCommonDivisor(a, b)) * b;
}
/// <summary>
/// Calculates <paramref name="x"/> % 4
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static int Modulo4(int x) => x & 3;
/// <summary>
/// Calculates <paramref name="x"/> % 8
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static int Modulo8(int x) => x & 7;
/// <summary>
/// Fast (x mod m) calculator, with the restriction that
/// <paramref name="m"/> should be power of 2.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static int ModuloP2(int x, int m)
{
return x & (m - 1);
}
/// <summary>
/// Returns the absolute value of a 32-bit signed integer. Uses bit shifting to speed up the operation.
/// </summary>
@ -46,7 +68,7 @@ namespace SixLabors.ImageSharp
/// A number that is greater than <see cref="int.MinValue"/>, but less than or equal to <see cref="int.MaxValue"/>
/// </param>
/// <returns>The <see cref="int"/></returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[MethodImpl(InliningOptions.ShortMethod)]
public static int FastAbs(int x)
{
int y = x >> 31;
@ -58,7 +80,7 @@ namespace SixLabors.ImageSharp
/// </summary>
/// <param name="x">A single-precision floating-point number</param>
/// <returns>The number <paramref name="x" /> raised to the power of 2.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[MethodImpl(InliningOptions.ShortMethod)]
public static float Pow2(float x) => x * x;
/// <summary>
@ -66,7 +88,7 @@ namespace SixLabors.ImageSharp
/// </summary>
/// <param name="x">A single-precision floating-point number</param>
/// <returns>The number <paramref name="x" /> raised to the power of 3.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[MethodImpl(InliningOptions.ShortMethod)]
public static float Pow3(float x) => x * x * x;
/// <summary>
@ -77,7 +99,7 @@ namespace SixLabors.ImageSharp
/// <returns>
/// The <see cref="int"/>
/// </returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[MethodImpl(InliningOptions.ShortMethod)]
public static int GetBitsNeededForColorDepth(int colors) => Math.Max(1, (int)Math.Ceiling(Math.Log(colors, 2)));
/// <summary>
@ -85,7 +107,7 @@ namespace SixLabors.ImageSharp
/// </summary>
/// <param name="bitDepth">The bit depth.</param>
/// <returns>The <see cref="int"/></returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[MethodImpl(InliningOptions.ShortMethod)]
public static int GetColorCountForBitDepth(int bitDepth) => 1 << bitDepth;
/// <summary>
@ -94,7 +116,7 @@ namespace SixLabors.ImageSharp
/// <param name="x">The x provided to G(x).</param>
/// <param name="sigma">The spread of the blur.</param>
/// <returns>The Gaussian G(x)</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[MethodImpl(InliningOptions.ShortMethod)]
public static float Gaussian(float x, float sigma)
{
const float Numerator = 1.0f;
@ -117,7 +139,7 @@ namespace SixLabors.ImageSharp
/// <returns>
/// The sine cardinal of <paramref name="f" />.
/// </returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[MethodImpl(InliningOptions.ShortMethod)]
public static float SinC(float f)
{
if (MathF.Abs(f) > Constants.Epsilon)
@ -140,7 +162,7 @@ namespace SixLabors.ImageSharp
/// <returns>
/// The <see cref="float"/>.
/// </returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[MethodImpl(InliningOptions.ShortMethod)]
public static float GetBcValue(float x, float b, float c)
{
if (x < 0F)
@ -176,7 +198,7 @@ namespace SixLabors.ImageSharp
/// <returns>
/// The bounding <see cref="Rectangle"/>.
/// </returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[MethodImpl(InliningOptions.ShortMethod)]
public static Rectangle GetBoundingRectangle(Point topLeft, Point bottomRight) => new Rectangle(topLeft.X, topLeft.Y, bottomRight.X - topLeft.X, bottomRight.Y - topLeft.Y);
/// <summary>

215
src/ImageSharp/Common/Helpers/SimdUtils.BasicIntrinsics256.cs

@ -0,0 +1,215 @@
// Copyright (c) Six Labors and contributors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Tuples;
// ReSharper disable MemberHidesStaticFromOuterClass
namespace SixLabors.ImageSharp
{
internal static partial class SimdUtils
{
/// <summary>
/// Implementation with 256bit / AVX2 intrinsics NOT depending on newer API-s (Vector.Widen etc.)
/// </summary>
public static class BasicIntrinsics256
{
public static bool IsAvailable { get; } = IsAvx2CompatibleArchitecture;
/// <summary>
/// <see cref="BulkConvertByteToNormalizedFloat"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void BulkConvertByteToNormalizedFloatReduce(
ref ReadOnlySpan<byte> source,
ref Span<float> dest)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
if (!IsAvailable)
{
return;
}
int remainder = ImageMaths.Modulo8(source.Length);
int adjustedCount = source.Length - remainder;
if (adjustedCount > 0)
{
BulkConvertByteToNormalizedFloat(
source.Slice(0, adjustedCount),
dest.Slice(0, adjustedCount));
source = source.Slice(adjustedCount);
dest = dest.Slice(adjustedCount);
}
}
/// <summary>
/// <see cref="BulkConvertNormalizedFloatToByteClampOverflows"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void BulkConvertNormalizedFloatToByteClampOverflowsReduce(
ref ReadOnlySpan<float> source,
ref Span<byte> dest)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
if (!IsAvailable)
{
return;
}
int remainder = ImageMaths.Modulo8(source.Length);
int adjustedCount = source.Length - remainder;
if (adjustedCount > 0)
{
BulkConvertNormalizedFloatToByteClampOverflows(source.Slice(0, adjustedCount), dest.Slice(0, adjustedCount));
source = source.Slice(adjustedCount);
dest = dest.Slice(adjustedCount);
}
}
/// <summary>
/// SIMD optimized implementation for <see cref="SimdUtils.BulkConvertByteToNormalizedFloat"/>.
/// Works only with span Length divisible by 8.
/// Implementation adapted from:
/// http://lolengine.net/blog/2011/3/20/understanding-fast-float-integer-conversions
/// http://stackoverflow.com/a/536278
/// </summary>
internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest)
{
VerifyIsAvx2Compatible(nameof(BulkConvertByteToNormalizedFloat));
VerifySpanInput(source, dest, 8);
var bVec = new Vector<float>(256.0f / 255.0f);
var magicFloat = new Vector<float>(32768.0f);
var magicInt = new Vector<uint>(1191182336); // reinterpreded value of 32768.0f
var mask = new Vector<uint>(255);
ref Octet.OfByte sourceBase = ref Unsafe.As<byte, Octet.OfByte>(ref MemoryMarshal.GetReference(source));
ref Octet.OfUInt32 destBaseAsWideOctet = ref Unsafe.As<float, Octet.OfUInt32>(ref MemoryMarshal.GetReference(dest));
ref Vector<float> destBaseAsFloat = ref Unsafe.As<Octet.OfUInt32, Vector<float>>(ref destBaseAsWideOctet);
int n = dest.Length / 8;
for (int i = 0; i < n; i++)
{
ref Octet.OfByte s = ref Unsafe.Add(ref sourceBase, i);
ref Octet.OfUInt32 d = ref Unsafe.Add(ref destBaseAsWideOctet, i);
d.LoadFrom(ref s);
}
for (int i = 0; i < n; i++)
{
ref Vector<float> df = ref Unsafe.Add(ref destBaseAsFloat, i);
var vi = Vector.AsVectorUInt32(df);
vi &= mask;
vi |= magicInt;
var vf = Vector.AsVectorSingle(vi);
vf = (vf - magicFloat) * bVec;
df = vf;
}
}
/// <summary>
/// Implementation of <see cref="SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows"/> which is faster on older runtimes.
/// </summary>
internal static void BulkConvertNormalizedFloatToByteClampOverflows(ReadOnlySpan<float> source, Span<byte> dest)
{
VerifyIsAvx2Compatible(nameof(BulkConvertNormalizedFloatToByteClampOverflows));
VerifySpanInput(source, dest, 8);
if (source.Length == 0)
{
return;
}
ref Vector<float> srcBase = ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(source));
ref Octet.OfByte destBase = ref Unsafe.As<byte, Octet.OfByte>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / 8;
Vector<float> magick = new Vector<float>(32768.0f);
Vector<float> scale = new Vector<float>(255f) / new Vector<float>(256f);
// need to copy to a temporary struct, because
// SimdUtils.Octet.OfUInt32 temp = Unsafe.As<Vector<float>, SimdUtils.Octet.OfUInt32>(ref x)
// does not work. TODO: This might be a CoreClr bug, need to ask/report
var temp = default(Octet.OfUInt32);
ref Vector<float> tempRef = ref Unsafe.As<Octet.OfUInt32, Vector<float>>(ref temp);
for (int i = 0; i < n; i++)
{
// union { float f; uint32_t i; } u;
// u.f = 32768.0f + x * (255.0f / 256.0f);
// return (uint8_t)u.i;
Vector<float> x = Unsafe.Add(ref srcBase, i);
x = Vector.Max(x, Vector<float>.Zero);
x = Vector.Min(x, Vector<float>.One);
x = (x * scale) + magick;
tempRef = x;
ref Octet.OfByte d = ref Unsafe.Add(ref destBase, i);
d.LoadFrom(ref temp);
}
}
/// <summary>
/// Convert all <see cref="float"/> values normalized into [0..1] from 'source'
/// into 'dest' buffer of <see cref="byte"/>. The values are scaled up into [0-255] and rounded.
/// This implementation is SIMD optimized and works only when span Length is divisible by 8.
/// Based on:
/// <see>
/// <cref>http://lolengine.net/blog/2011/3/20/understanding-fast-float-integer-conversions</cref>
/// </see>
/// </summary>
internal static void BulkConvertNormalizedFloatToByte(ReadOnlySpan<float> source, Span<byte> dest)
{
VerifyIsAvx2Compatible(nameof(BulkConvertNormalizedFloatToByte));
VerifySpanInput(source, dest, 8);
if (source.Length == 0)
{
return;
}
ref Vector<float> srcBase = ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(source));
ref Octet.OfByte destBase = ref Unsafe.As<byte, Octet.OfByte>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / 8;
Vector<float> magick = new Vector<float>(32768.0f);
Vector<float> scale = new Vector<float>(255f) / new Vector<float>(256f);
// need to copy to a temporary struct, because
// SimdUtils.Octet.OfUInt32 temp = Unsafe.As<Vector<float>, SimdUtils.Octet.OfUInt32>(ref x)
// does not work. TODO: This might be a CoreClr bug, need to ask/report
var temp = default(Octet.OfUInt32);
ref Vector<float> tempRef = ref Unsafe.As<Octet.OfUInt32, Vector<float>>(ref temp);
for (int i = 0; i < n; i++)
{
// union { float f; uint32_t i; } u;
// u.f = 32768.0f + x * (255.0f / 256.0f);
// return (uint8_t)u.i;
Vector<float> x = Unsafe.Add(ref srcBase, i);
x = (x * scale) + magick;
tempRef = x;
ref Octet.OfByte d = ref Unsafe.Add(ref destBase, i);
d.LoadFrom(ref temp);
}
}
}
}
}

178
src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs

@ -0,0 +1,178 @@
using System;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
// ReSharper disable MemberHidesStaticFromOuterClass
namespace SixLabors.ImageSharp
{
internal static partial class SimdUtils
{
/// <summary>
/// Implementation methods based on newer <see cref="Vector{T}"/> API-s (Vector.Widen, Vector.Narrow, Vector.ConvertTo*).
/// Only accelerated only on RyuJIT having dotnet/coreclr#10662 merged (.NET Core 2.1+ .NET 4.7.2+)
/// See:
/// https://github.com/dotnet/coreclr/pull/10662
/// API Proposal:
/// https://github.com/dotnet/corefx/issues/15957
/// </summary>
public static class ExtendedIntrinsics
{
public static bool IsAvailable { get; } =
#if NETCOREAPP2_1
// TODO: Also available in .NET 4.7.2, we need to add a build target!
Vector.IsHardwareAccelerated;
#else
false;
#endif
/// <summary>
/// <see cref="BulkConvertByteToNormalizedFloat"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void BulkConvertByteToNormalizedFloatReduce(
ref ReadOnlySpan<byte> source,
ref Span<float> dest)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
if (!IsAvailable)
{
return;
}
int remainder = ImageMaths.ModuloP2(source.Length, Vector<byte>.Count);
int adjustedCount = source.Length - remainder;
if (adjustedCount > 0)
{
BulkConvertByteToNormalizedFloat(source.Slice(0, adjustedCount), dest.Slice(0, adjustedCount));
source = source.Slice(adjustedCount);
dest = dest.Slice(adjustedCount);
}
}
/// <summary>
/// <see cref="BulkConvertNormalizedFloatToByteClampOverflows"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void BulkConvertNormalizedFloatToByteClampOverflowsReduce(
ref ReadOnlySpan<float> source,
ref Span<byte> dest)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
if (!IsAvailable)
{
return;
}
int remainder = ImageMaths.ModuloP2(source.Length, Vector<byte>.Count);
int adjustedCount = source.Length - remainder;
if (adjustedCount > 0)
{
BulkConvertNormalizedFloatToByteClampOverflows(
source.Slice(0, adjustedCount),
dest.Slice(0, adjustedCount));
source = source.Slice(adjustedCount);
dest = dest.Slice(adjustedCount);
}
}
/// <summary>
/// Implementation <see cref="SimdUtils.BulkConvertByteToNormalizedFloat"/>, which is faster on new RyuJIT runtime.
/// </summary>
internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest)
{
VerifySpanInput(source, dest, Vector<byte>.Count);
int n = dest.Length / Vector<byte>.Count;
ref Vector<byte> sourceBase = ref Unsafe.As<byte, Vector<byte>>(ref MemoryMarshal.GetReference(source));
ref Vector<float> destBase = ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(dest));
for (int i = 0; i < n; i++)
{
Vector<byte> b = Unsafe.Add(ref sourceBase, i);
Vector.Widen(b, out Vector<ushort> s0, out Vector<ushort> s1);
Vector.Widen(s0, out Vector<uint> w0, out Vector<uint> w1);
Vector.Widen(s1, out Vector<uint> w2, out Vector<uint> w3);
Vector<float> f0 = ConvertToSingle(w0);
Vector<float> f1 = ConvertToSingle(w1);
Vector<float> f2 = ConvertToSingle(w2);
Vector<float> f3 = ConvertToSingle(w3);
ref Vector<float> d = ref Unsafe.Add(ref destBase, i * 4);
d = f0;
Unsafe.Add(ref d, 1) = f1;
Unsafe.Add(ref d, 2) = f2;
Unsafe.Add(ref d, 3) = f3;
}
}
/// <summary>
/// Implementation of <see cref="SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows"/>, which is faster on new .NET runtime.
/// </summary>
internal static void BulkConvertNormalizedFloatToByteClampOverflows(
ReadOnlySpan<float> source,
Span<byte> dest)
{
VerifySpanInput(source, dest, Vector<byte>.Count);
int n = dest.Length / Vector<byte>.Count;
ref Vector<float> sourceBase =
ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(source));
ref Vector<byte> destBase = ref Unsafe.As<byte, Vector<byte>>(ref MemoryMarshal.GetReference(dest));
for (int i = 0; i < n; i++)
{
ref Vector<float> s = ref Unsafe.Add(ref sourceBase, i * 4);
Vector<float> f0 = s;
Vector<float> f1 = Unsafe.Add(ref s, 1);
Vector<float> f2 = Unsafe.Add(ref s, 2);
Vector<float> f3 = Unsafe.Add(ref s, 3);
Vector<uint> w0 = ConvertToUInt32(f0);
Vector<uint> w1 = ConvertToUInt32(f1);
Vector<uint> w2 = ConvertToUInt32(f2);
Vector<uint> w3 = ConvertToUInt32(f3);
Vector<ushort> u0 = Vector.Narrow(w0, w1);
Vector<ushort> u1 = Vector.Narrow(w2, w3);
Vector<byte> b = Vector.Narrow(u0, u1);
Unsafe.Add(ref destBase, i) = b;
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector<uint> ConvertToUInt32(Vector<float> vf)
{
Vector<float> maxBytes = new Vector<float>(255f);
vf *= maxBytes;
vf += new Vector<float>(0.5f);
vf = Vector.Min(Vector.Max(vf, Vector<float>.Zero), maxBytes);
Vector<int> vi = Vector.ConvertToInt32(vf);
return Vector.AsVectorUInt32(vi);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector<float> ConvertToSingle(Vector<uint> u)
{
Vector<int> vi = Vector.AsVectorInt32(u);
Vector<float> v = Vector.ConvertToSingle(vi);
v *= new Vector<float>(1f / 255f);
return v;
}
}
}
}

151
src/ImageSharp/Common/Helpers/SimdUtils.FallbackIntrinsics128.cs

@ -0,0 +1,151 @@
// Copyright (c) Six Labors and contributors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
// ReSharper disable MemberHidesStaticFromOuterClass
namespace SixLabors.ImageSharp
{
internal static partial class SimdUtils
{
/// <summary>
/// Fallback implementation based on <see cref="Vector4"/> (128bit).
/// For <see cref="Vector4"/>, efficient software fallback implementations are present,
/// and we hope that even mono's JIT is able to emit SIMD instructions for that type :P
/// </summary>
public static class FallbackIntrinsics128
{
/// <summary>
/// <see cref="BulkConvertByteToNormalizedFloat"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void BulkConvertByteToNormalizedFloatReduce(
ref ReadOnlySpan<byte> source,
ref Span<float> dest)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
int remainder = ImageMaths.Modulo4(source.Length);
int adjustedCount = source.Length - remainder;
if (adjustedCount > 0)
{
BulkConvertByteToNormalizedFloat(
source.Slice(0, adjustedCount),
dest.Slice(0, adjustedCount));
source = source.Slice(adjustedCount);
dest = dest.Slice(adjustedCount);
}
}
/// <summary>
/// <see cref="BulkConvertNormalizedFloatToByteClampOverflows"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void BulkConvertNormalizedFloatToByteClampOverflowsReduce(
ref ReadOnlySpan<float> source,
ref Span<byte> dest)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
int remainder = ImageMaths.Modulo4(source.Length);
int adjustedCount = source.Length - remainder;
if (adjustedCount > 0)
{
BulkConvertNormalizedFloatToByteClampOverflows(
source.Slice(0, adjustedCount),
dest.Slice(0, adjustedCount));
source = source.Slice(adjustedCount);
dest = dest.Slice(adjustedCount);
}
}
/// <summary>
/// Implementation of <see cref="SimdUtils.BulkConvertByteToNormalizedFloat"/> using <see cref="Vector4"/>.
/// </summary>
[MethodImpl(InliningOptions.ColdPath)]
internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest)
{
VerifySpanInput(source, dest, 4);
int count = dest.Length / 4;
if (count == 0)
{
return;
}
ref ByteVector4 sBase = ref Unsafe.As<byte, ByteVector4>(ref MemoryMarshal.GetReference(source));
ref Vector4 dBase = ref Unsafe.As<float, Vector4>(ref MemoryMarshal.GetReference(dest));
const float Scale = 1f / 255f;
Vector4 d = default;
for (int i = 0; i < count; i++)
{
ref ByteVector4 s = ref Unsafe.Add(ref sBase, i);
d.X = s.X;
d.Y = s.Y;
d.Z = s.Z;
d.W = s.W;
d *= Scale;
Unsafe.Add(ref dBase, i) = d;
}
}
/// <summary>
/// Implementation of <see cref="SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows"/> using <see cref="Vector4"/>.
/// </summary>
[MethodImpl(InliningOptions.ColdPath)]
internal static void BulkConvertNormalizedFloatToByteClampOverflows(
ReadOnlySpan<float> source,
Span<byte> dest)
{
VerifySpanInput(source, dest, 4);
int count = source.Length / 4;
if (count == 0)
{
return;
}
ref Vector4 sBase = ref Unsafe.As<float, Vector4>(ref MemoryMarshal.GetReference(source));
ref ByteVector4 dBase = ref Unsafe.As<byte, ByteVector4>(ref MemoryMarshal.GetReference(dest));
var half = new Vector4(0.5f);
var maxBytes = new Vector4(255f);
for (int i = 0; i < count; i++)
{
Vector4 s = Unsafe.Add(ref sBase, i);
s *= maxBytes;
s += half;
// I'm not sure if Vector4.Clamp() is properly implemented with intrinsics.
s = Vector4.Max(Vector4.Zero, s);
s = Vector4.Min(maxBytes, s);
ref ByteVector4 d = ref Unsafe.Add(ref dBase, i);
d.X = (byte)s.X;
d.Y = (byte)s.Y;
d.Z = (byte)s.Z;
d.W = (byte)s.W;
}
}
[StructLayout(LayoutKind.Sequential)]
private struct ByteVector4
{
public byte X;
public byte Y;
public byte Z;
public byte W;
}
}
}
}

185
src/ImageSharp/Common/Helpers/SimdUtils.cs

@ -0,0 +1,185 @@
// Copyright (c) Six Labors and contributors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.PixelFormats;
using SixLabors.ImageSharp.Tuples;
namespace SixLabors.ImageSharp
{
/// <summary>
/// Various extension and utility methods for <see cref="Vector4"/> and <see cref="Vector{T}"/> utilizing SIMD capabilities
/// </summary>
internal static partial class SimdUtils
{
/// <summary>
/// Gets a value indicating whether the code is being executed on AVX2 CPU where both float and integer registers are of size 256 byte.
/// </summary>
public static bool IsAvx2CompatibleArchitecture { get; } =
Vector.IsHardwareAccelerated && Vector<float>.Count == 8 && Vector<int>.Count == 8;
/// <summary>
/// Transform all scalars in 'v' in a way that converting them to <see cref="int"/> would have rounding semantics.
/// </summary>
/// <param name="v">The vector</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static Vector4 PseudoRound(this Vector4 v)
{
var sign = Vector4.Clamp(v, new Vector4(-1), new Vector4(1));
return v + (sign * 0.5f);
}
/// <summary>
/// Rounds all values in 'v' to the nearest integer following <see cref="MidpointRounding.ToEven"/> semantics.
/// Source:
/// <see>
/// <cref>https://github.com/g-truc/glm/blob/master/glm/simd/common.h#L110</cref>
/// </see>
/// </summary>
/// <param name="v">The vector</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static Vector<float> FastRound(this Vector<float> v)
{
Vector<int> magic0 = new Vector<int>(int.MinValue); // 0x80000000
Vector<float> sgn0 = Vector.AsVectorSingle(magic0);
Vector<float> and0 = Vector.BitwiseAnd(sgn0, v);
Vector<float> or0 = Vector.BitwiseOr(and0, new Vector<float>(8388608.0f));
Vector<float> add0 = Vector.Add(v, or0);
Vector<float> sub0 = Vector.Subtract(add0, or0);
return sub0;
}
/// <summary>
/// Converts all input <see cref="byte"/>-s to <see cref="float"/>-s normalized into [0..1].
/// <paramref name="source"/> should be the of the same size as <paramref name="dest"/>,
/// but there are no restrictions on the span's length.
/// </summary>
/// <param name="source">The source span of bytes</param>
/// <param name="dest">The destination span of floats</param>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
#if NETCOREAPP2_1
ExtendedIntrinsics.BulkConvertByteToNormalizedFloatReduce(ref source, ref dest);
#else
BasicIntrinsics256.BulkConvertByteToNormalizedFloatReduce(ref source, ref dest);
#endif
FallbackIntrinsics128.BulkConvertByteToNormalizedFloatReduce(ref source, ref dest);
// Deal with the remainder:
if (source.Length > 0)
{
ConverByteToNormalizedFloatRemainder(source, dest);
}
}
/// <summary>
/// Convert all <see cref="float"/> values normalized into [0..1] from 'source' into 'dest' buffer of <see cref="byte"/>.
/// The values are scaled up into [0-255] and rounded, overflows are clamped.
/// <paramref name="source"/> should be the of the same size as <paramref name="dest"/>,
/// but there are no restrictions on the span's length.
/// </summary>
/// <param name="source">The source span of floats</param>
/// <param name="dest">The destination span of bytes</param>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void BulkConvertNormalizedFloatToByteClampOverflows(ReadOnlySpan<float> source, Span<byte> dest)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
#if NETCOREAPP2_1
ExtendedIntrinsics.BulkConvertNormalizedFloatToByteClampOverflowsReduce(ref source, ref dest);
#else
BasicIntrinsics256.BulkConvertNormalizedFloatToByteClampOverflowsReduce(ref source, ref dest);
#endif
FallbackIntrinsics128.BulkConvertNormalizedFloatToByteClampOverflowsReduce(ref source, ref dest);
// Deal with the remainder:
if (source.Length > 0)
{
ConvertNormalizedFloatToByteRemainder(source, dest);
}
}
[MethodImpl(InliningOptions.ColdPath)]
private static void ConverByteToNormalizedFloatRemainder(ReadOnlySpan<byte> source, Span<float> dest)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref float dBase = ref MemoryMarshal.GetReference(dest);
// There are at most 3 elements at this point, having a for loop is overkill.
// Let's minimize the no. of instructions!
switch (source.Length)
{
case 3:
Unsafe.Add(ref dBase, 2) = Unsafe.Add(ref sBase, 2) / 255f;
goto case 2;
case 2:
Unsafe.Add(ref dBase, 1) = Unsafe.Add(ref sBase, 1) / 255f;
goto case 1;
case 1:
dBase = sBase / 255f;
break;
}
}
[MethodImpl(InliningOptions.ColdPath)]
private static void ConvertNormalizedFloatToByteRemainder(ReadOnlySpan<float> source, Span<byte> dest)
{
ref float sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);
switch (source.Length)
{
case 3:
Unsafe.Add(ref dBase, 2) = ConvertToByte(Unsafe.Add(ref sBase, 2));
goto case 2;
case 2:
Unsafe.Add(ref dBase, 1) = ConvertToByte(Unsafe.Add(ref sBase, 1));
goto case 1;
case 1:
dBase = ConvertToByte(sBase);
break;
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static byte ConvertToByte(float f) => (byte)ComparableExtensions.Clamp((f * 255f) + 0.5f, 0, 255f);
[Conditional("DEBUG")]
private static void VerifyIsAvx2Compatible(string operation)
{
if (!IsAvx2CompatibleArchitecture)
{
throw new NotSupportedException($"{operation} is supported only on AVX2 CPU!");
}
}
[Conditional("DEBUG")]
private static void VerifySpanInput(ReadOnlySpan<byte> source, Span<float> dest, int shouldBeDivisibleBy)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
DebugGuard.IsTrue(
ImageMaths.ModuloP2(dest.Length, shouldBeDivisibleBy) == 0,
nameof(source),
$"length should be divisable by {shouldBeDivisibleBy}!");
}
[Conditional("DEBUG")]
private static void VerifySpanInput(ReadOnlySpan<float> source, Span<byte> dest, int shouldBeDivisibleBy)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!");
DebugGuard.IsTrue(
ImageMaths.ModuloP2(dest.Length, shouldBeDivisibleBy) == 0,
nameof(source),
$"length should be divisable by {shouldBeDivisibleBy}!");
}
}
}

109
src/ImageSharp/Common/Tuples/Octet.cs

@ -0,0 +1,109 @@
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace SixLabors.ImageSharp.Tuples
{
/// <summary>
/// Contains 8 element value tuples of various types.
/// </summary>
internal static class Octet
{
/// <summary>
/// Value tuple of <see cref="uint"/>-s
/// </summary>
[StructLayout(LayoutKind.Explicit, Size = 8 * sizeof(uint))]
public struct OfUInt32
{
[FieldOffset(0 * sizeof(uint))]
public uint V0;
[FieldOffset(1 * sizeof(uint))]
public uint V1;
[FieldOffset(2 * sizeof(uint))]
public uint V2;
[FieldOffset(3 * sizeof(uint))]
public uint V3;
[FieldOffset(4 * sizeof(uint))]
public uint V4;
[FieldOffset(5 * sizeof(uint))]
public uint V5;
[FieldOffset(6 * sizeof(uint))]
public uint V6;
[FieldOffset(7 * sizeof(uint))]
public uint V7;
public override string ToString()
{
return $"{nameof(Octet)}.{nameof(OfUInt32)}({this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7})";
}
[MethodImpl(InliningOptions.ShortMethod)]
public void LoadFrom(ref OfByte src)
{
this.V0 = src.V0;
this.V1 = src.V1;
this.V2 = src.V2;
this.V3 = src.V3;
this.V4 = src.V4;
this.V5 = src.V5;
this.V6 = src.V6;
this.V7 = src.V7;
}
}
/// <summary>
/// Value tuple of <see cref="byte"/>-s
/// </summary>
[StructLayout(LayoutKind.Explicit, Size = 8)]
public struct OfByte
{
[FieldOffset(0)]
public byte V0;
[FieldOffset(1)]
public byte V1;
[FieldOffset(2)]
public byte V2;
[FieldOffset(3)]
public byte V3;
[FieldOffset(4)]
public byte V4;
[FieldOffset(5)]
public byte V5;
[FieldOffset(6)]
public byte V6;
[FieldOffset(7)]
public byte V7;
public override string ToString()
{
return $"{nameof(Octet)}.{nameof(OfByte)}({this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7})";
}
[MethodImpl(InliningOptions.ShortMethod)]
public void LoadFrom(ref OfUInt32 src)
{
this.V0 = (byte)src.V0;
this.V1 = (byte)src.V1;
this.V2 = (byte)src.V2;
this.V3 = (byte)src.V3;
this.V4 = (byte)src.V4;
this.V5 = (byte)src.V5;
this.V6 = (byte)src.V6;
this.V7 = (byte)src.V7;
}
}
}
}

12
src/ImageSharp/Common/Tuples/Vector4Pair.cs

@ -2,11 +2,12 @@
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace SixLabors.ImageSharp.Common.Tuples
namespace SixLabors.ImageSharp.Tuples
{
/// <summary>
/// Its faster to process multiple Vector4-s together, so let's pair them!
/// On AVX2 this pair should be convertible to <see cref="Vector{T}"/> of <see cref="float"/>!
/// TODO: Investigate defining this as union with an Octet.OfSingle type.
/// </summary>
[StructLayout(LayoutKind.Sequential)]
internal struct Vector4Pair
@ -15,8 +16,6 @@ namespace SixLabors.ImageSharp.Common.Tuples
public Vector4 B;
private static readonly Vector4 Scale = new Vector4(1 / 255f);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void MultiplyInplace(float value)
{
@ -52,8 +51,9 @@ namespace SixLabors.ImageSharp.Common.Tuples
b = b.FastRound();
// Downscale by 1/255
this.A *= Scale;
this.B *= Scale;
var scale = new Vector4(1 / 255f);
this.A *= scale;
this.B *= scale;
}
/// <summary>
@ -74,7 +74,7 @@ namespace SixLabors.ImageSharp.Common.Tuples
public override string ToString()
{
return $"{this.A}, {this.B}";
return $"{nameof(Vector4Pair)}({this.A}, {this.B})";
}
}
}

4
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimd.cs

@ -6,7 +6,7 @@ using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Common.Tuples;
using SixLabors.ImageSharp.Tuples;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
{
@ -109,7 +109,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
// Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
destination.Collect(ref r, ref g, ref b);
destination.Pack(ref r, ref g, ref b);
}
}
}

4
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs

@ -6,7 +6,7 @@ using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Common.Tuples;
using SixLabors.ImageSharp.Tuples;
// ReSharper disable ImpureMethodCallOnReadonlyValueField
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
@ -102,7 +102,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
// Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order:
ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i);
destination.Collect(ref rr, ref gg, ref bb);
destination.Pack(ref rr, ref gg, ref bb);
}
}
}

6
src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs

@ -6,8 +6,8 @@ using System.Collections.Generic;
using System.Linq;
using System.Numerics;
using SixLabors.ImageSharp.Common.Tuples;
using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.Tuples;
using SixLabors.Memory;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
@ -157,9 +157,9 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters
public Vector4 V0, V1, V2, V3, V4, V5, V6, V7;
/// <summary>
/// Collect (r0,r1...r8) (g0,g1...g8) (b0,b1...b8) vector values in the expected (r0,g0,g1,1), (r1,g1,g2,1) ... order.
/// Pack (r0,r1...r7) (g0,g1...g7) (b0,b1...b7) vector values as (r0,g0,b0,1), (r1,g1,b1,1) ...
/// </summary>
public void Collect(ref Vector4Pair r, ref Vector4Pair g, ref Vector4Pair b)
public void Pack(ref Vector4Pair r, ref Vector4Pair g, ref Vector4Pair b)
{
this.V0.X = r.A.X;
this.V0.Y = g.A.X;

144
src/ImageSharp/PixelFormats/Rgba32.PixelOperations.cs

@ -3,7 +3,6 @@
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.Memory;
@ -19,100 +18,18 @@ namespace SixLabors.ImageSharp.PixelFormats
/// </summary>
internal partial class PixelOperations : PixelOperations<Rgba32>
{
/// <summary>
/// SIMD optimized bulk implementation of <see cref="IPixel.PackFromVector4(Vector4)"/>
/// that works only with `count` divisible by <see cref="Vector{UInt32}.Count"/>.
/// </summary>
/// <param name="sourceColors">The <see cref="Span{T}"/> to the source colors.</param>
/// <param name="destVectors">The <see cref="Span{T}"/> to the dstination vectors.</param>
/// <param name="count">The number of pixels to convert.</param>
/// <remarks>
/// Implementation adapted from:
/// <see>
/// <cref>http://stackoverflow.com/a/5362789</cref>
/// </see>
/// TODO: We can replace this implementation in the future using new Vector API-s:
/// <see>
/// <cref>https://github.com/dotnet/corefx/issues/15957</cref>
/// </see>
/// </remarks>
internal static void ToVector4SimdAligned(ReadOnlySpan<Rgba32> sourceColors, Span<Vector4> destVectors, int count)
{
if (!Vector.IsHardwareAccelerated)
{
throw new InvalidOperationException(
"Rgba32.PixelOperations.ToVector4SimdAligned() should not be called when Vector.IsHardwareAccelerated == false!");
}
DebugGuard.IsTrue(
count % Vector<uint>.Count == 0,
nameof(count),
"Argument 'count' should divisible by Vector<uint>.Count!");
var bVec = new Vector<float>(256.0f / 255.0f);
var magicFloat = new Vector<float>(32768.0f);
var magicInt = new Vector<uint>(1191182336); // reinterpreded value of 32768.0f
var mask = new Vector<uint>(255);
int unpackedRawCount = count * 4;
ref uint sourceBase = ref Unsafe.As<Rgba32, uint>(ref MemoryMarshal.GetReference(sourceColors));
ref UnpackedRGBA destBaseAsUnpacked = ref Unsafe.As<Vector4, UnpackedRGBA>(ref MemoryMarshal.GetReference(destVectors));
ref Vector<uint> destBaseAsUInt = ref Unsafe.As<UnpackedRGBA, Vector<uint>>(ref destBaseAsUnpacked);
ref Vector<float> destBaseAsFloat = ref Unsafe.As<UnpackedRGBA, Vector<float>>(ref destBaseAsUnpacked);
for (int i = 0; i < count; i++)
{
uint sVal = Unsafe.Add(ref sourceBase, i);
ref UnpackedRGBA dst = ref Unsafe.Add(ref destBaseAsUnpacked, i);
// This call is the bottleneck now:
dst.Load(sVal);
}
int numOfVectors = unpackedRawCount / Vector<uint>.Count;
for (int i = 0; i < numOfVectors; i++)
{
Vector<uint> vi = Unsafe.Add(ref destBaseAsUInt, i);
vi &= mask;
vi |= magicInt;
var vf = Vector.AsVectorSingle(vi);
vf = (vf - magicFloat) * bVec;
Unsafe.Add(ref destBaseAsFloat, i) = vf;
}
}
/// <inheritdoc />
internal override void ToVector4(ReadOnlySpan<Rgba32> sourceColors, Span<Vector4> destinationVectors, int count)
{
Guard.MustBeSizedAtLeast(sourceColors, count, nameof(sourceColors));
Guard.MustBeSizedAtLeast(destinationVectors, count, nameof(destinationVectors));
if (count < 256 || !Vector.IsHardwareAccelerated)
{
// Doesn't worth to bother with SIMD:
base.ToVector4(sourceColors, destinationVectors, count);
return;
}
int remainder = count % Vector<uint>.Count;
int alignedCount = count - remainder;
sourceColors = sourceColors.Slice(0, count);
destinationVectors = destinationVectors.Slice(0, count);
if (alignedCount > 0)
{
ToVector4SimdAligned(sourceColors, destinationVectors, alignedCount);
}
if (remainder > 0)
{
sourceColors = sourceColors.Slice(alignedCount);
destinationVectors = destinationVectors.Slice(alignedCount);
base.ToVector4(sourceColors, destinationVectors, remainder);
}
SimdUtils.BulkConvertByteToNormalizedFloat(
MemoryMarshal.Cast<Rgba32, byte>(sourceColors),
MemoryMarshal.Cast<Vector4, float>(destinationVectors));
}
/// <inheritdoc />
@ -120,29 +37,12 @@ namespace SixLabors.ImageSharp.PixelFormats
{
GuardSpans(sourceVectors, nameof(sourceVectors), destinationColors, nameof(destinationColors), count);
if (!SimdUtils.IsAvx2CompatibleArchitecture)
{
base.PackFromVector4(sourceVectors, destinationColors, count);
return;
}
int remainder = count % 2;
int alignedCount = count - remainder;
sourceVectors = sourceVectors.Slice(0, count);
destinationColors = destinationColors.Slice(0, count);
if (alignedCount > 0)
{
ReadOnlySpan<float> flatSrc = MemoryMarshal.Cast<Vector4, float>(sourceVectors.Slice(0, alignedCount));
Span<byte> flatDest = MemoryMarshal.Cast<Rgba32, byte>(destinationColors);
SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows(flatSrc, flatDest);
}
if (remainder > 0)
{
// actually: remainder == 1
int lastIdx = count - 1;
destinationColors[lastIdx].PackFromVector4(sourceVectors[lastIdx]);
}
SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows(
MemoryMarshal.Cast<Vector4, float>(sourceVectors),
MemoryMarshal.Cast<Rgba32, byte>(destinationColors));
}
/// <inheritdoc />
@ -172,30 +72,6 @@ namespace SixLabors.ImageSharp.PixelFormats
sourcePixels.Slice(0, count).CopyTo(dest);
}
/// <summary>
/// Value type to store <see cref="Rgba32"/>-s unpacked into multiple <see cref="uint"/>-s.
/// </summary>
[StructLayout(LayoutKind.Sequential)]
private struct UnpackedRGBA
{
private uint r;
private uint g;
private uint b;
private uint a;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Load(uint p)
{
this.r = p;
this.g = p >> GreenShift;
this.b = p >> BlueShift;
this.a = p >> AlphaShift;
}
}
}
}
}

68
tests/ImageSharp.Benchmarks/Color/Bulk/PackFromVector4.cs

@ -3,6 +3,7 @@
// ReSharper disable InconsistentNaming
using System;
using System.Buffers;
using System.Numerics;
using System.Runtime.CompilerServices;
@ -19,11 +20,14 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
public abstract class PackFromVector4<TPixel>
where TPixel : struct, IPixel<TPixel>
{
private IMemoryOwner<Vector4> source;
protected IMemoryOwner<Vector4> source;
private IMemoryOwner<TPixel> destination;
protected IMemoryOwner<TPixel> destination;
[Params(16, 128, 512)]
[Params(
64,
2048
)]
public int Count { get; set; }
[GlobalSetup]
@ -40,7 +44,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
this.source.Dispose();
}
[Benchmark(Baseline = true)]
//[Benchmark]
public void PerElement()
{
ref Vector4 s = ref MemoryMarshal.GetReference(this.source.GetSpan());
@ -53,13 +57,13 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
}
[Benchmark]
public void CommonBulk()
public void PixelOperations_Base()
{
new PixelOperations<TPixel>().PackFromVector4(this.source.GetSpan(), this.destination.GetSpan(), this.Count);
}
[Benchmark]
public void OptimizedBulk()
public void PixelOperations_Specialized()
{
PixelOperations<TPixel>.Instance.PackFromVector4(this.source.GetSpan(), this.destination.GetSpan(), this.Count);
}
@ -67,6 +71,58 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
public class PackFromVector4_Rgba32 : PackFromVector4<Rgba32>
{
[Benchmark]
public void FallbackIntrinsics128()
{
Span<float> sBytes = MemoryMarshal.Cast<Vector4, float>(this.source.GetSpan());
Span<byte> dFloats = MemoryMarshal.Cast<Rgba32, byte>(this.destination.GetSpan());
SimdUtils.FallbackIntrinsics128.BulkConvertNormalizedFloatToByteClampOverflows(sBytes, dFloats);
}
[Benchmark(Baseline = true)]
public void BasicIntrinsics256()
{
Span<float> sBytes = MemoryMarshal.Cast<Vector4, float>(this.source.GetSpan());
Span<byte> dFloats = MemoryMarshal.Cast<Rgba32, byte>(this.destination.GetSpan());
SimdUtils.BasicIntrinsics256.BulkConvertNormalizedFloatToByteClampOverflows(sBytes, dFloats);
}
[Benchmark]
public void ExtendedIntrinsic()
{
Span<float> sBytes = MemoryMarshal.Cast<Vector4, float>(this.source.GetSpan());
Span<byte> dFloats = MemoryMarshal.Cast<Rgba32, byte>(this.destination.GetSpan());
SimdUtils.ExtendedIntrinsics.BulkConvertNormalizedFloatToByteClampOverflows(sBytes, dFloats);
}
// RESULTS (2018 October):
// Method | Runtime | Count | Mean | Error | StdDev | Scaled | ScaledSD | Gen 0 | Allocated |
// ---------------------------- |-------- |------ |-------------:|-------------:|------------:|-------:|---------:|-------:|----------:|
// FallbackIntrinsics128 | Clr | 64 | 340.38 ns | 22.319 ns | 1.2611 ns | 1.41 | 0.01 | - | 0 B |
// BasicIntrinsics256 | Clr | 64 | 240.79 ns | 11.421 ns | 0.6453 ns | 1.00 | 0.00 | - | 0 B |
// ExtendedIntrinsic | Clr | 64 | 199.09 ns | 124.239 ns | 7.0198 ns | 0.83 | 0.02 | - | 0 B |
// PixelOperations_Base | Clr | 64 | 647.99 ns | 24.003 ns | 1.3562 ns | 2.69 | 0.01 | 0.0067 | 24 B |
// PixelOperations_Specialized | Clr | 64 | 259.79 ns | 13.391 ns | 0.7566 ns | 1.08 | 0.00 | - | 0 B | <--- ceremonial overhead has been minimized!
// | | | | | | | | | |
// FallbackIntrinsics128 | Core | 64 | 234.64 ns | 12.320 ns | 0.6961 ns | 1.58 | 0.00 | - | 0 B |
// BasicIntrinsics256 | Core | 64 | 148.87 ns | 2.794 ns | 0.1579 ns | 1.00 | 0.00 | - | 0 B |
// ExtendedIntrinsic | Core | 64 | 94.06 ns | 10.015 ns | 0.5659 ns | 0.63 | 0.00 | - | 0 B |
// PixelOperations_Base | Core | 64 | 573.52 ns | 31.865 ns | 1.8004 ns | 3.85 | 0.01 | 0.0067 | 24 B |
// PixelOperations_Specialized | Core | 64 | 117.21 ns | 13.264 ns | 0.7494 ns | 0.79 | 0.00 | - | 0 B |
// | | | | | | | | | |
// FallbackIntrinsics128 | Clr | 2048 | 6,735.93 ns | 2,139.340 ns | 120.8767 ns | 1.71 | 0.03 | - | 0 B |
// BasicIntrinsics256 | Clr | 2048 | 3,929.29 ns | 334.027 ns | 18.8731 ns | 1.00 | 0.00 | - | 0 B |
// ExtendedIntrinsic | Clr | 2048 | 2,226.01 ns | 130.525 ns | 7.3749 ns |!! 0.57 | 0.00 | - | 0 B | <--- ExtendedIntrinsics rock!
// PixelOperations_Base | Clr | 2048 | 16,760.84 ns | 367.800 ns | 20.7814 ns | 4.27 | 0.02 | - | 24 B | <--- Extra copies using "Vector4 TPixel.ToVector4()"
// PixelOperations_Specialized | Clr | 2048 | 3,986.03 ns | 237.238 ns | 13.4044 ns | 1.01 | 0.00 | - | 0 B | <--- can't yet detect whether ExtendedIntrinsics are available :(
// | | | | | | | | | |
// FallbackIntrinsics128 | Core | 2048 | 6,644.65 ns | 2,677.090 ns | 151.2605 ns | 1.69 | 0.05 | - | 0 B |
// BasicIntrinsics256 | Core | 2048 | 3,923.70 ns | 1,971.760 ns | 111.4081 ns | 1.00 | 0.00 | - | 0 B |
// ExtendedIntrinsic | Core | 2048 | 2,092.32 ns | 375.657 ns | 21.2253 ns |!! 0.53 | 0.01 | - | 0 B | <--- ExtendedIntrinsics rock!
// PixelOperations_Base | Core | 2048 | 16,875.73 ns | 1,271.957 ns | 71.8679 ns | 4.30 | 0.10 | - | 24 B |
// PixelOperations_Specialized | Core | 2048 | 2,129.92 ns | 262.888 ns | 14.8537 ns |!! 0.54 | 0.01 | - | 0 B | <--- ExtendedIntrinsics rock!
}
}

166
tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4.cs

@ -6,8 +6,14 @@
using System.Buffers;
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Attributes.Jobs;
using BenchmarkDotNet.Configs;
using BenchmarkDotNet.Environments;
using BenchmarkDotNet.Jobs;
using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.PixelFormats;
@ -17,11 +23,17 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
public abstract class ToVector4<TPixel>
where TPixel : struct, IPixel<TPixel>
{
private IMemoryOwner<TPixel> source;
protected IMemoryOwner<TPixel> source;
private IMemoryOwner<Vector4> destination;
protected IMemoryOwner<Vector4> destination;
[Params(64, 300, 1024)]
[Params(
64,
//256,
//512,
//1024,
2048
)]
public int Count { get; set; }
[GlobalSetup]
@ -38,7 +50,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
this.destination.Dispose();
}
[Benchmark(Baseline = true)]
//[Benchmark]
public void PerElement()
{
Span<TPixel> s = this.source.GetSpan();
@ -46,25 +58,163 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
for (int i = 0; i < this.Count; i++)
{
TPixel c = s[i];
d[i] = c.ToVector4();
d[i] = s[i].ToVector4();
}
}
[Benchmark]
public void CommonBulk()
public void PixelOperations_Base()
{
new PixelOperations<TPixel>().ToVector4(this.source.GetSpan(), this.destination.GetSpan(), this.Count);
}
[Benchmark]
public void OptimizedBulk()
public void PixelOperations_Specialized()
{
PixelOperations<TPixel>.Instance.ToVector4(this.source.GetSpan(), this.destination.GetSpan(), this.Count);
}
}
[Config(typeof(Config.ShortClr))]
public class ToVector4_Rgba32 : ToVector4<Rgba32>
{
[Benchmark]
public void FallbackIntrinsics128()
{
Span<byte> sBytes = MemoryMarshal.Cast<Rgba32, byte>(this.source.GetSpan());
Span<float> dFloats = MemoryMarshal.Cast<Vector4, float>(this.destination.GetSpan());
SimdUtils.FallbackIntrinsics128.BulkConvertByteToNormalizedFloat(sBytes, dFloats);
}
[Benchmark(Baseline = true)]
public void BasicIntrinsics256()
{
Span<byte> sBytes = MemoryMarshal.Cast<Rgba32, byte>(this.source.GetSpan());
Span<float> dFloats = MemoryMarshal.Cast<Vector4, float>(this.destination.GetSpan());
SimdUtils.BasicIntrinsics256.BulkConvertByteToNormalizedFloat(sBytes, dFloats);
}
[Benchmark]
public void ExtendedIntrinsics()
{
Span<byte> sBytes = MemoryMarshal.Cast<Rgba32, byte>(this.source.GetSpan());
Span<float> dFloats = MemoryMarshal.Cast<Vector4, float>(this.destination.GetSpan());
SimdUtils.ExtendedIntrinsics.BulkConvertByteToNormalizedFloat(sBytes, dFloats);
}
//[Benchmark]
public void ExtendedIntrinsics_BulkConvertByteToNormalizedFloat_2Loops()
{
Span<byte> sBytes = MemoryMarshal.Cast<Rgba32, byte>(this.source.GetSpan());
Span<float> dFloats = MemoryMarshal.Cast<Vector4, float>(this.destination.GetSpan());
int n = dFloats.Length / Vector<byte>.Count;
ref Vector<byte> sourceBase = ref Unsafe.As<byte, Vector<byte>>(ref MemoryMarshal.GetReference((ReadOnlySpan<byte>)sBytes));
ref Vector<float> destBase = ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(dFloats));
ref Vector<uint> destBaseU = ref Unsafe.As<Vector<float>, Vector<uint>>(ref destBase);
for (int i = 0; i < n; i++)
{
Vector<byte> b = Unsafe.Add(ref sourceBase, i);
Vector.Widen(b, out Vector<ushort> s0, out Vector<ushort> s1);
Vector.Widen(s0, out Vector<uint> w0, out Vector<uint> w1);
Vector.Widen(s1, out Vector<uint> w2, out Vector<uint> w3);
ref Vector<uint> d = ref Unsafe.Add(ref destBaseU, i * 4);
d = w0;
Unsafe.Add(ref d, 1) = w1;
Unsafe.Add(ref d, 2) = w2;
Unsafe.Add(ref d, 3) = w3;
}
n = dFloats.Length / Vector<float>.Count;
var scale = new Vector<float>(1f / 255f);
for (int i = 0; i < n; i++)
{
ref Vector<float> dRef = ref Unsafe.Add(ref destBase, i);
Vector<int> du = Vector.AsVectorInt32(dRef);
Vector<float> v = Vector.ConvertToSingle(du);
v *= scale;
dRef = v;
}
}
//[Benchmark]
public void ExtendedIntrinsics_BulkConvertByteToNormalizedFloat_ConvertInSameLoop()
{
Span<byte> sBytes = MemoryMarshal.Cast<Rgba32, byte>(this.source.GetSpan());
Span<float> dFloats = MemoryMarshal.Cast<Vector4, float>(this.destination.GetSpan());
int n = dFloats.Length / Vector<byte>.Count;
ref Vector<byte> sourceBase = ref Unsafe.As<byte, Vector<byte>>(ref MemoryMarshal.GetReference((ReadOnlySpan<byte>)sBytes));
ref Vector<float> destBase = ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(dFloats));
var scale = new Vector<float>(1f / 255f);
for (int i = 0; i < n; i++)
{
Vector<byte> b = Unsafe.Add(ref sourceBase, i);
Vector.Widen(b, out Vector<ushort> s0, out Vector<ushort> s1);
Vector.Widen(s0, out Vector<uint> w0, out Vector<uint> w1);
Vector.Widen(s1, out Vector<uint> w2, out Vector<uint> w3);
Vector<float> f0 = ConvertToNormalizedSingle(w0, scale);
Vector<float> f1 = ConvertToNormalizedSingle(w1, scale);
Vector<float> f2 = ConvertToNormalizedSingle(w2, scale);
Vector<float> f3 = ConvertToNormalizedSingle(w3, scale);
ref Vector<float> d = ref Unsafe.Add(ref destBase, i * 4);
d = f0;
Unsafe.Add(ref d, 1) = f1;
Unsafe.Add(ref d, 2) = f2;
Unsafe.Add(ref d, 3) = f3;
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector<float> ConvertToNormalizedSingle(Vector<uint> u, Vector<float> scale)
{
Vector<int> vi = Vector.AsVectorInt32(u);
Vector<float> v = Vector.ConvertToSingle(vi);
v *= scale;
return v;
}
// RESULTS (2018 October):
//
// Method | Runtime | Count | Mean | Error | StdDev | Scaled | ScaledSD | Gen 0 | Allocated |
// ---------------------------- |-------- |------ |------------:|-------------:|------------:|-------:|---------:|-------:|----------:|
// FallbackIntrinsics128 | Clr | 64 | 287.62 ns | 6.026 ns | 0.3405 ns | 1.19 | 0.00 | - | 0 B |
// BasicIntrinsics256 | Clr | 64 | 240.83 ns | 10.585 ns | 0.5981 ns | 1.00 | 0.00 | - | 0 B |
// ExtendedIntrinsics | Clr | 64 | 168.28 ns | 11.478 ns | 0.6485 ns | 0.70 | 0.00 | - | 0 B |
// PixelOperations_Base | Clr | 64 | 334.08 ns | 38.048 ns | 2.1498 ns | 1.39 | 0.01 | 0.0072 | 24 B |
// PixelOperations_Specialized | Clr | 64 | 255.41 ns | 10.939 ns | 0.6181 ns | 1.06 | 0.00 | - | 0 B | <--- ceremonial overhead has been minimized!
// | | | | | | | | | |
// FallbackIntrinsics128 | Core | 64 | 183.29 ns | 8.931 ns | 0.5046 ns | 1.32 | 0.00 | - | 0 B |
// BasicIntrinsics256 | Core | 64 | 139.18 ns | 7.633 ns | 0.4313 ns | 1.00 | 0.00 | - | 0 B |
// ExtendedIntrinsics | Core | 64 | 66.29 ns | 16.366 ns | 0.9247 ns | 0.48 | 0.01 | - | 0 B |
// PixelOperations_Base | Core | 64 | 257.75 ns | 16.959 ns | 0.9582 ns | 1.85 | 0.01 | 0.0072 | 24 B |
// PixelOperations_Specialized | Core | 64 | 90.14 ns | 9.955 ns | 0.5625 ns | 0.65 | 0.00 | - | 0 B |
// | | | | | | | | | |
// FallbackIntrinsics128 | Clr | 2048 | 5,011.84 ns | 347.991 ns | 19.6621 ns | 1.22 | 0.01 | - | 0 B |
// BasicIntrinsics256 | Clr | 2048 | 4,119.35 ns | 720.153 ns | 40.6900 ns | 1.00 | 0.00 | - | 0 B |
// ExtendedIntrinsics | Clr | 2048 | 1,195.29 ns | 164.389 ns | 9.2883 ns |!! 0.29 | 0.00 | - | 0 B | <--- ExtendedIntrinsics rock!
// PixelOperations_Base | Clr | 2048 | 6,820.58 ns | 823.433 ns | 46.5255 ns | 1.66 | 0.02 | - | 24 B |
// PixelOperations_Specialized | Clr | 2048 | 4,203.53 ns | 176.714 ns | 9.9847 ns | 1.02 | 0.01 | - | 0 B | <--- can't yet detect whether ExtendedIntrinsics are available :(
// | | | | | | | | | |
// FallbackIntrinsics128 | Core | 2048 | 5,017.89 ns | 4,021.533 ns | 227.2241 ns | 1.24 | 0.05 | - | 0 B |
// BasicIntrinsics256 | Core | 2048 | 4,046.51 ns | 1,150.390 ns | 64.9992 ns | 1.00 | 0.00 | - | 0 B |
// ExtendedIntrinsics | Core | 2048 | 1,130.59 ns | 832.588 ns | 47.0427 ns |!! 0.28 | 0.01 | - | 0 B | <--- ExtendedIntrinsics rock!
// PixelOperations_Base | Core | 2048 | 6,752.68 ns | 272.820 ns | 15.4148 ns | 1.67 | 0.02 | - | 24 B |
// PixelOperations_Specialized | Core | 2048 | 1,126.13 ns | 79.192 ns | 4.4745 ns |!! 0.28 | 0.00 | - | 0 B | <--- ExtendedIntrinsics rock!
}
}

8
tests/ImageSharp.Benchmarks/General/Abs.cs → tests/ImageSharp.Benchmarks/General/BasicMath/Abs.cs

@ -1,9 +1,9 @@
namespace SixLabors.ImageSharp.Benchmarks.General
{
using System;
using System;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Attributes;
namespace SixLabors.ImageSharp.Benchmarks.General.BasicMath
{
public class Abs
{
[Params(-1, 1)]

70
tests/ImageSharp.Benchmarks/General/BasicMath/ClampFloat.cs

@ -0,0 +1,70 @@
using System;
using System.Runtime.CompilerServices;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
namespace SixLabors.ImageSharp.Benchmarks.General.BasicMath
{
public class ClampFloat
{
private readonly float min = -1.5f;
private readonly float max = 2.5f;
private static readonly float[] Values = { -10, -5, -3, -1.5f, -0.5f, 0f, 1f, 1.5f, 2.5f, 3, 10 };
[Benchmark(Baseline = true)]
public float UsingMathF()
{
float acc = 0;
for (int i = 0; i < Values.Length; i++)
{
acc += ClampUsingMathF(Values[i], this.min, this.max);
}
return acc;
}
[Benchmark]
public float UsingBranching()
{
float acc = 0;
for (int i = 0; i < Values.Length; i++)
{
acc += ClampUsingBranching(Values[i], this.min, this.max);
}
return acc;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static float ClampUsingMathF(float x, float min, float max)
{
return Math.Min(max, Math.Max(min, x));
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static float ClampUsingBranching(float x, float min, float max)
{
if (x >= max)
{
return max;
}
if (x <= min)
{
return min;
}
return x;
}
// RESULTS:
// Method | Mean | Error | StdDev | Scaled |
// --------------- |---------:|----------:|----------:|-------:|
// UsingMathF | 30.37 ns | 0.3764 ns | 0.3337 ns | 1.00 |
// UsingBranching | 18.66 ns | 0.1043 ns | 0.0871 ns | 0.61 |
}
}

12
tests/ImageSharp.Benchmarks/General/Clamp.cs → tests/ImageSharp.Benchmarks/General/BasicMath/ClampInt32IntoByte.cs

@ -3,14 +3,14 @@
// Licensed under the Apache License, Version 2.0.
// </copyright>
namespace SixLabors.ImageSharp.Benchmarks.General
{
using System;
using System.Runtime.CompilerServices;
using System;
using System.Runtime.CompilerServices;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Attributes;
public class Clamp
namespace SixLabors.ImageSharp.Benchmarks.General.BasicMath
{
public class ClampInt32IntoByte
{
[Params(-1, 0, 255, 256)]
public int Value { get; set; }

23
tests/ImageSharp.Benchmarks/General/BasicMath/ModuloPowerOfTwoConstant.cs

@ -0,0 +1,23 @@
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Attributes.Jobs;
namespace SixLabors.ImageSharp.Benchmarks.General.BasicMath
{
[LongRunJob]
public class ModuloPowerOfTwoConstant
{
private readonly int value = 42;
[Benchmark(Baseline = true)]
public int Standard()
{
return this.value % 8;
}
[Benchmark]
public int Bitwise()
{
return ImageMaths.Modulo8(this.value);
}
}
}

32
tests/ImageSharp.Benchmarks/General/BasicMath/ModuloPowerOfTwoVariable.cs

@ -0,0 +1,32 @@
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Attributes.Jobs;
namespace SixLabors.ImageSharp.Benchmarks.General.BasicMath
{
[LongRunJob]
public class ModuloPowerOfTwoVariable
{
private readonly int value = 42;
private readonly int m = 32;
[Benchmark(Baseline = true)]
public int Standard()
{
return this.value % this.m;
}
[Benchmark]
public int Bitwise()
{
return ImageMaths.ModuloP2(this.value, this.m);
}
// RESULTS:
//
// Method | Mean | Error | StdDev | Median | Scaled | ScaledSD |
// --------- |----------:|----------:|----------:|----------:|-------:|---------:|
// Standard | 1.2465 ns | 0.0093 ns | 0.0455 ns | 1.2423 ns | 1.00 | 0.00 |
// Bitwise | 0.0265 ns | 0.0103 ns | 0.0515 ns | 0.0000 ns | 0.02 | 0.04 |
}
}

3
tests/ImageSharp.Benchmarks/General/Pow.cs → tests/ImageSharp.Benchmarks/General/BasicMath/Pow.cs

@ -1,7 +1,8 @@
using System;
using BenchmarkDotNet.Attributes;
namespace SixLabors.ImageSharp.Benchmarks.General
namespace SixLabors.ImageSharp.Benchmarks.General.BasicMath
{
public class Pow
{

19
tests/ImageSharp.Benchmarks/General/Modulus.cs

@ -1,19 +0,0 @@
namespace SixLabors.ImageSharp.Benchmarks.General
{
using BenchmarkDotNet.Attributes;
public class Modulus
{
[Benchmark(Baseline = true, Description = "Standard Modulus using %")]
public int StandardModulus()
{
return 255 % 256;
}
[Benchmark(Description = "Bitwise Modulus using &")]
public int BitwiseModulus()
{
return 255 & 255;
}
}
}

113
tests/ImageSharp.Benchmarks/General/Vectorization/UInt32ToSingle.cs

@ -0,0 +1,113 @@
using System.Numerics;
using System.Runtime.CompilerServices;
using BenchmarkDotNet.Attributes;
namespace SixLabors.ImageSharp.Benchmarks.General.Vectorization
{
[Config(typeof(Config.ShortClr))]
public class UInt32ToSingle
{
private float[] data;
private const int Count = 32;
[GlobalSetup]
public void Setup()
{
this.data = new float[Count];
}
[Benchmark(Baseline = true)]
public void MagicMethod()
{
ref Vector<float> b = ref Unsafe.As<float, Vector<float>>(ref this.data[0]);
int n = Count / Vector<float>.Count;
var bVec = new Vector<float>(256.0f / 255.0f);
var magicFloat = new Vector<float>(32768.0f);
var magicInt = new Vector<uint>(1191182336); // reinterpreded value of 32768.0f
var mask = new Vector<uint>(255);
for (int i = 0; i < n; i++)
{
// union { float f; uint32_t i; } u;
// u.f = 32768.0f + x * (255.0f / 256.0f);
// return (uint8_t)u.i;
ref Vector<float> df = ref Unsafe.Add(ref b, i);
var vi = Vector.AsVectorUInt32(df);
vi &= mask;
vi |= magicInt;
var vf = Vector.AsVectorSingle(vi);
vf = (vf - magicFloat) * bVec;
df = vf;
}
}
[Benchmark]
public void StandardSimd()
{
int n = Count / Vector<float>.Count;
ref Vector<float> bf = ref Unsafe.As<float, Vector<float>>(ref this.data[0]);
ref Vector<uint> bu = ref Unsafe.As<Vector<float>, Vector<uint>>(ref bf);
var scale = new Vector<float>(1f / 255f);
for (int i = 0; i < n; i++)
{
Vector<uint> u = Unsafe.Add(ref bu, i);
Vector<float> v = Vector.ConvertToSingle(u);
v *= scale;
Unsafe.Add(ref bf, i) = v;
}
}
[Benchmark]
public void StandardSimdFromInt()
{
int n = Count / Vector<float>.Count;
ref Vector<float> bf = ref Unsafe.As<float, Vector<float>>(ref this.data[0]);
ref Vector<int> bu = ref Unsafe.As<Vector<float>, Vector<int>>(ref bf);
var scale = new Vector<float>(1f / 255f);
for (int i = 0; i < n; i++)
{
Vector<int> u = Unsafe.Add(ref bu, i);
Vector<float> v = Vector.ConvertToSingle(u);
v *= scale;
Unsafe.Add(ref bf, i) = v;
}
}
[Benchmark]
public void StandardSimdFromInt_RefCast()
{
int n = Count / Vector<float>.Count;
ref Vector<float> bf = ref Unsafe.As<float, Vector<float>>(ref this.data[0]);
ref Vector<int> bu = ref Unsafe.As<Vector<float>, Vector<int>>(ref bf);
var scale = new Vector<float>(1f / 255f);
for (int i = 0; i < n; i++)
{
ref Vector<float> fRef = ref Unsafe.Add(ref bf, i);
Vector<int> du = Vector.AsVectorInt32(fRef);
Vector<float> v = Vector.ConvertToSingle(du);
v *= scale;
fRef = v;
}
}
}
}

64
tests/ImageSharp.Benchmarks/General/Vectorization/WidenBytesToUInt32.cs

@ -0,0 +1,64 @@
using System.Numerics;
using System.Runtime.CompilerServices;
using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.Tuples;
namespace SixLabors.ImageSharp.Benchmarks.General.Vectorization
{
[Config(typeof(Config.ShortClr))]
public class WidenBytesToUInt32
{
private byte[] source;
private uint[] dest;
private const int Count = 64;
[GlobalSetup]
public void Setup()
{
this.source = new byte[Count];
this.dest = new uint[Count];
}
[Benchmark(Baseline = true)]
public void Standard()
{
const int N = Count / 8;
ref Octet.OfByte sBase = ref Unsafe.As<byte, Octet.OfByte>(ref this.source[0]);
ref Octet.OfUInt32 dBase = ref Unsafe.As<uint, Octet.OfUInt32>(ref this.dest[0]);
for (int i = 0; i < N; i++)
{
Unsafe.Add(ref dBase, i).LoadFrom(ref Unsafe.Add(ref sBase, i));
}
}
[Benchmark]
public void Simd()
{
int n = Count / Vector<byte>.Count;
ref Vector<byte> sBase = ref Unsafe.As<byte, Vector<byte>>(ref this.source[0]);
ref Vector<uint> dBase = ref Unsafe.As<uint, Vector<uint>>(ref this.dest[0]);
for (int i = 0; i < n; i++)
{
Vector<byte> b = Unsafe.Add(ref sBase, i);
Vector.Widen(b, out Vector<ushort> s0, out Vector<ushort> s1);
Vector.Widen(s0, out Vector<uint> w0, out Vector<uint> w1);
Vector.Widen(s1, out Vector<uint> w2, out Vector<uint> w3);
ref Vector<uint> d = ref Unsafe.Add(ref dBase, i * 4);
d = w0;
Unsafe.Add(ref d, 1) = w1;
Unsafe.Add(ref d, 2) = w2;
Unsafe.Add(ref d, 3) = w3;
}
}
}
}

146
tests/ImageSharp.Tests/Common/SimdUtilsTests.cs

@ -62,7 +62,7 @@ namespace SixLabors.ImageSharp.Tests.Common
{
float[] data = new float[Vector<float>.Count];
var rnd = new Random();
var rnd = new Random(seed);
for (int i = 0; i < Vector<float>.Count; i++)
{
@ -118,7 +118,7 @@ namespace SixLabors.ImageSharp.Tests.Common
[InlineData(1, 8)]
[InlineData(2, 16)]
[InlineData(3, 128)]
public void BulkConvertNormalizedFloatToByte_WithRoundedData(int seed, int count)
public void BasicIntrinsics256_BulkConvertNormalizedFloatToByte_WithRoundedData(int seed, int count)
{
if (this.SkipOnNonAvx2())
{
@ -130,7 +130,7 @@ namespace SixLabors.ImageSharp.Tests.Common
byte[] dest = new byte[count];
SimdUtils.BulkConvertNormalizedFloatToByte(normalized, dest);
SimdUtils.BasicIntrinsics256.BulkConvertNormalizedFloatToByte(normalized, dest);
byte[] expected = orig.Select(f => (byte)(f)).ToArray();
@ -142,7 +142,7 @@ namespace SixLabors.ImageSharp.Tests.Common
[InlineData(1, 8)]
[InlineData(2, 16)]
[InlineData(3, 128)]
public void BulkConvertNormalizedFloatToByte_WithNonRoundedData(int seed, int count)
public void BasicIntrinsics256_BulkConvertNormalizedFloatToByte_WithNonRoundedData(int seed, int count)
{
if (this.SkipOnNonAvx2())
{
@ -153,39 +153,147 @@ namespace SixLabors.ImageSharp.Tests.Common
byte[] dest = new byte[count];
SimdUtils.BulkConvertNormalizedFloatToByte(source, dest);
SimdUtils.BasicIntrinsics256.BulkConvertNormalizedFloatToByte(source, dest);
byte[] expected = source.Select(f => (byte)Math.Round(f * 255f)).ToArray();
Assert.Equal(expected, dest);
}
private static float Clamp255(float x) => Math.Min(255f, Math.Max(0f, x));
public static readonly TheoryData<int> ArraySizesDivisibleBy8 = new TheoryData<int> { 0, 8, 16, 1024 };
public static readonly TheoryData<int> ArraySizesDivisibleBy4 = new TheoryData<int> { 0, 4, 8, 28, 1020 };
public static readonly TheoryData<int> ArraySizesDivisibleBy32 = new TheoryData<int> { 0, 32, 512 };
public static readonly TheoryData<int> ArbitraryArraySizes =
new TheoryData<int>
{
0, 1, 2, 3, 4, 7, 8, 9, 15, 16, 17, 63, 64, 255, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 520,
};
[Theory]
[InlineData(1, 0)]
[InlineData(1, 8)]
[InlineData(2, 16)]
[InlineData(3, 128)]
public void BulkConvertNormalizedFloatToByteClampOverflows(int seed, int count)
[MemberData(nameof(ArraySizesDivisibleBy4))]
public void FallbackIntrinsics128_BulkConvertByteToNormalizedFloat(int count)
{
TestImpl_BulkConvertByteToNormalizedFloat(
count,
(s, d) => SimdUtils.FallbackIntrinsics128.BulkConvertByteToNormalizedFloat(s.Span, d.Span));
}
[Theory]
[MemberData(nameof(ArraySizesDivisibleBy8))]
public void BasicIntrinsics256_BulkConvertByteToNormalizedFloat(int count)
{
if (this.SkipOnNonAvx2())
{
return;
}
float[] orig = new Random(seed).GenerateRandomRoundedFloatArray(count, -50, 444);
float[] normalized = orig.Select(f => f / 255f).ToArray();
TestImpl_BulkConvertByteToNormalizedFloat(
count,
(s, d) => SimdUtils.BasicIntrinsics256.BulkConvertByteToNormalizedFloat(s.Span, d.Span));
}
[Theory]
[MemberData(nameof(ArraySizesDivisibleBy32))]
public void ExtendedIntrinsics_BulkConvertByteToNormalizedFloat(int count)
{
TestImpl_BulkConvertByteToNormalizedFloat(
count,
(s, d) => SimdUtils.ExtendedIntrinsics.BulkConvertByteToNormalizedFloat(s.Span, d.Span));
}
byte[] dest = new byte[count];
[Theory]
[MemberData(nameof(ArbitraryArraySizes))]
public void BulkConvertByteToNormalizedFloat(int count)
{
TestImpl_BulkConvertByteToNormalizedFloat(
count,
(s, d) => SimdUtils.BulkConvertByteToNormalizedFloat(s.Span, d.Span));
}
SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows(normalized, dest);
private static void TestImpl_BulkConvertByteToNormalizedFloat(
int count,
Action<Memory<byte>, Memory<float>> convert)
{
byte[] source = new Random(count).GenerateRandomByteArray(count);
float[] result = new float[count];
float[] expected = source.Select(b => (float)b / 255f).ToArray();
byte[] expected = orig.Select(f => (byte)Clamp255(f)).ToArray();
convert(source, result);
Assert.Equal(expected, dest);
Assert.Equal(expected, result, new ApproximateFloatComparer(1e-5f));
}
[Theory]
[MemberData(nameof(ArraySizesDivisibleBy4))]
public void FallbackIntrinsics128_BulkConvertNormalizedFloatToByteClampOverflows(int count)
{
TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(count,
(s, d) => SimdUtils.FallbackIntrinsics128.BulkConvertNormalizedFloatToByteClampOverflows(s.Span, d.Span)
);
}
[Theory]
[MemberData(nameof(ArraySizesDivisibleBy8))]
public void BasicIntrinsics256_BulkConvertNormalizedFloatToByteClampOverflows(int count)
{
if (this.SkipOnNonAvx2())
{
return;
}
TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(count,
(s, d) => SimdUtils.BasicIntrinsics256.BulkConvertNormalizedFloatToByteClampOverflows(s.Span, d.Span)
);
}
[Theory]
[MemberData(nameof(ArraySizesDivisibleBy32))]
public void ExtendedIntrinsics_BulkConvertNormalizedFloatToByteClampOverflows(int count)
{
TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(count,
(s, d) => SimdUtils.ExtendedIntrinsics.BulkConvertNormalizedFloatToByteClampOverflows(s.Span, d.Span)
);
}
[Theory]
[MemberData(nameof(ArbitraryArraySizes))]
public void BulkConvertNormalizedFloatToByteClampOverflows(int count)
{
TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(count,
(s, d) => SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows(s.Span, d.Span)
);
// for small values, let's stress test the implementation a bit:
if (count > 0 && count < 10)
{
for (int i = 0; i < 20; i++)
{
TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(
count,
(s, d) => SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows(s.Span, d.Span),
i + 42);
}
}
}
private static void TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(
int count,
Action<Memory<float>, Memory<byte>> convert, int seed = -1)
{
seed = seed > 0 ? seed : count;
float[] source = new Random(seed).GenerateRandomFloatArray(count, -0.2f, 1.2f);
byte[] expected = source.Select(NormalizedFloatToByte).ToArray();
byte[] actual = new byte[count];
convert(source, actual);
Assert.Equal(expected, actual);
}
private static byte NormalizedFloatToByte(float f) => (byte)Math.Min(255f, Math.Max(0f, f * 255f + 0.5f));
[Theory]
[InlineData(0)]
[InlineData(7)]
@ -211,7 +319,7 @@ namespace SixLabors.ImageSharp.Tests.Common
float[] source = { 0, 7, 42, 255, 0.5f, 1.1f, 2.6f, 16f };
var expected = source.Select(f => (byte)Math.Round(f)).ToArray();
byte[] expected = source.Select(f => (byte)Math.Round(f)).ToArray();
source = source.Select(f => f / 255f).ToArray();
@ -245,8 +353,6 @@ namespace SixLabors.ImageSharp.Tests.Common
iiRef = x;
//Tuple8.OfUInt32 ii = Unsafe.As<Vector<float>, Tuple8.OfUInt32>(ref x);
ref Tuple8.OfByte d = ref MemoryMarshal.Cast<byte, Tuple8.OfByte>(dest)[0];
d.LoadFrom(ref ii);

68
tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs

@ -9,6 +9,74 @@ namespace SixLabors.ImageSharp.Tests.Helpers
public class ImageMathsTests
{
[Theory]
[InlineData(0)]
[InlineData(1)]
[InlineData(2)]
[InlineData(3)]
[InlineData(4)]
[InlineData(100)]
[InlineData(123)]
[InlineData(53436353)]
public void Modulo4(int x)
{
int actual = ImageMaths.Modulo4(x);
Assert.Equal(x % 4, actual);
}
[Theory]
[InlineData(0)]
[InlineData(1)]
[InlineData(2)]
[InlineData(6)]
[InlineData(7)]
[InlineData(8)]
[InlineData(100)]
[InlineData(123)]
[InlineData(53436353)]
[InlineData(975)]
public void Modulo8(int x)
{
int actual = ImageMaths.Modulo8(x);
Assert.Equal(x % 8, actual);
}
[Theory]
[InlineData(0, 2)]
[InlineData(1, 2)]
[InlineData(2, 2)]
[InlineData(0, 4)]
[InlineData(3, 4)]
[InlineData(5, 4)]
[InlineData(5, 8)]
[InlineData(8, 8)]
[InlineData(8, 16)]
[InlineData(15, 16)]
[InlineData(17, 16)]
[InlineData(17, 32)]
[InlineData(31, 32)]
[InlineData(32, 32)]
[InlineData(33, 32)]
public void Modulo2P(int x, int m)
{
int actual = ImageMaths.ModuloP2(x, m);
Assert.Equal(x % m, actual);
}
[Theory]
[InlineData(0, 0, 0, 0)]
[InlineData(0.5f, 0, 1, 0.5f)]
[InlineData(-0.5f, -0.1f, 10, -0.1f)]
[InlineData(-0.05f, -0.1f, 10, -0.05f)]
[InlineData(9.9f, -0.1f, 10, 9.9f)]
[InlineData(10f, -0.1f, 10, 10f)]
[InlineData(10.1f, -0.1f, 10, 10f)]
public void Clamp(float x, float min, float max, float expected)
{
float actual = x.Clamp(min, max);
Assert.Equal(expected, actual);
}
[Fact]
public void FasAbsResultMatchesMath()
{

1340
tests/ImageSharp.Tests/PixelFormats/PixelOperationsTests.cs

File diff suppressed because it is too large

16
tests/ImageSharp.Tests/TestUtilities/TestDataGenerator.cs

@ -33,20 +33,28 @@ namespace SixLabors.ImageSharp.Tests
return values;
}
public static float[] GenerateRandomRoundedFloatArray(this Random rnd, int length, int minVal, int maxValExclusive)
public static float[] GenerateRandomRoundedFloatArray(this Random rnd, int length, float minVal, float maxVal)
{
float[] values = new float[length];
for (int i = 0; i < length; i++)
{
int val = rnd.Next(minVal, maxValExclusive);
values[i] = (float)val;
values[i] = (float) Math.Round(rnd.GetRandomFloat(minVal, maxVal));
}
return values;
}
private static float GetRandomFloat(Random rnd, float minVal, float maxVal)
public static byte[] GenerateRandomByteArray(this Random rnd, int length)
{
byte[] values = new byte[length];
rnd.NextBytes(values);
return values;
}
private static float GetRandomFloat(this Random rnd, float minVal, float maxVal)
{
return (float)rnd.NextDouble() * (maxVal - minVal) + minVal;
}

2
tests/ImageSharp.Tests/TestUtilities/Tests/TestEnvironmentTests.cs

@ -3,6 +3,8 @@
using System;
using System.IO;
using System.Reflection;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Common.Helpers;
using SixLabors.ImageSharp.Formats;

Loading…
Cancel
Save