Browse Source

minimize ceremonial overhead

in BulkConvertByteToNormalizedFloat() and BulkConvertNormalizedFloatToByteClampOverflows()
af/merge-core
Anton Firszov 8 years ago
parent
commit
9b0ee6fb2a
  1. 45
      src/ImageSharp/Common/Helpers/ImageMaths.cs
  2. 91
      src/ImageSharp/Common/Helpers/SimdUtils.BasicIntrinsics256.cs
  3. 96
      src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs
  4. 56
      src/ImageSharp/Common/Helpers/SimdUtils.FallbackIntrinsics128.cs
  5. 84
      src/ImageSharp/Common/Helpers/SimdUtils.cs
  6. 50
      tests/ImageSharp.Benchmarks/Color/Bulk/PackFromVector4.cs
  7. 50
      tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4.cs
  8. 8
      tests/ImageSharp.Benchmarks/General/BasicMath/Abs.cs
  9. 10
      tests/ImageSharp.Benchmarks/General/BasicMath/Clamp.cs
  10. 23
      tests/ImageSharp.Benchmarks/General/BasicMath/ModuloPowerOfTwoConstant.cs
  11. 32
      tests/ImageSharp.Benchmarks/General/BasicMath/ModuloPowerOfTwoVariable.cs
  12. 3
      tests/ImageSharp.Benchmarks/General/BasicMath/Pow.cs
  13. 19
      tests/ImageSharp.Benchmarks/General/Modulus.cs
  14. 17
      tests/ImageSharp.Tests/Common/SimdUtilsTests.cs
  15. 94
      tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs

45
src/ImageSharp/Common/Helpers/ImageMaths.cs

@ -39,22 +39,31 @@ namespace SixLabors.ImageSharp
return (a / GreatestCommonDivisor(a, b)) * b; return (a / GreatestCommonDivisor(a, b)) * b;
} }
[MethodImpl(MethodImplOptions.AggressiveInlining)] /// <summary>
public static int Modulo4(int a) => a & 3; /// Calculates <paramref name="x"/> % 4
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static int Modulo4(int x) => x & 3;
[MethodImpl(MethodImplOptions.AggressiveInlining)] /// <summary>
public static int Modulo8(int a) => a & 7; /// Calculates <paramref name="x"/> % 8
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static int Modulo8(int x) => x & 7;
/// <summary> /// <summary>
/// Fast (mod m) calculator, /// Fast (x mod m) calculator, with the restriction that
/// where <paramref name="m"/> should be a power of 2. /// <paramref name="m"/> should be power of 2.
/// </summary> /// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(InliningOptions.ShortMethod)]
public static int ModuloP2(int a, int m) public static int ModuloP2(int x, int m)
{ {
return a & (m - 1); return x & (m - 1);
} }
[MethodImpl(InliningOptions.ShortMethod)]
public static float Clamp(float x, float min, float max) => Math.Min(max, Math.Max(min, x));
/// <summary> /// <summary>
/// Returns the absolute value of a 32-bit signed integer. Uses bit shifting to speed up the operation. /// Returns the absolute value of a 32-bit signed integer. Uses bit shifting to speed up the operation.
/// </summary> /// </summary>
@ -62,7 +71,7 @@ namespace SixLabors.ImageSharp
/// A number that is greater than <see cref="int.MinValue"/>, but less than or equal to <see cref="int.MaxValue"/> /// A number that is greater than <see cref="int.MinValue"/>, but less than or equal to <see cref="int.MaxValue"/>
/// </param> /// </param>
/// <returns>The <see cref="int"/></returns> /// <returns>The <see cref="int"/></returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(InliningOptions.ShortMethod)]
public static int FastAbs(int x) public static int FastAbs(int x)
{ {
int y = x >> 31; int y = x >> 31;
@ -74,7 +83,7 @@ namespace SixLabors.ImageSharp
/// </summary> /// </summary>
/// <param name="x">A single-precision floating-point number</param> /// <param name="x">A single-precision floating-point number</param>
/// <returns>The number <paramref name="x" /> raised to the power of 2.</returns> /// <returns>The number <paramref name="x" /> raised to the power of 2.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(InliningOptions.ShortMethod)]
public static float Pow2(float x) => x * x; public static float Pow2(float x) => x * x;
/// <summary> /// <summary>
@ -82,7 +91,7 @@ namespace SixLabors.ImageSharp
/// </summary> /// </summary>
/// <param name="x">A single-precision floating-point number</param> /// <param name="x">A single-precision floating-point number</param>
/// <returns>The number <paramref name="x" /> raised to the power of 3.</returns> /// <returns>The number <paramref name="x" /> raised to the power of 3.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(InliningOptions.ShortMethod)]
public static float Pow3(float x) => x * x * x; public static float Pow3(float x) => x * x * x;
/// <summary> /// <summary>
@ -93,7 +102,7 @@ namespace SixLabors.ImageSharp
/// <returns> /// <returns>
/// The <see cref="int"/> /// The <see cref="int"/>
/// </returns> /// </returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(InliningOptions.ShortMethod)]
public static int GetBitsNeededForColorDepth(int colors) => Math.Max(1, (int)Math.Ceiling(Math.Log(colors, 2))); public static int GetBitsNeededForColorDepth(int colors) => Math.Max(1, (int)Math.Ceiling(Math.Log(colors, 2)));
/// <summary> /// <summary>
@ -101,7 +110,7 @@ namespace SixLabors.ImageSharp
/// </summary> /// </summary>
/// <param name="bitDepth">The bit depth.</param> /// <param name="bitDepth">The bit depth.</param>
/// <returns>The <see cref="int"/></returns> /// <returns>The <see cref="int"/></returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(InliningOptions.ShortMethod)]
public static int GetColorCountForBitDepth(int bitDepth) => 1 << bitDepth; public static int GetColorCountForBitDepth(int bitDepth) => 1 << bitDepth;
/// <summary> /// <summary>
@ -110,7 +119,7 @@ namespace SixLabors.ImageSharp
/// <param name="x">The x provided to G(x).</param> /// <param name="x">The x provided to G(x).</param>
/// <param name="sigma">The spread of the blur.</param> /// <param name="sigma">The spread of the blur.</param>
/// <returns>The Gaussian G(x)</returns> /// <returns>The Gaussian G(x)</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(InliningOptions.ShortMethod)]
public static float Gaussian(float x, float sigma) public static float Gaussian(float x, float sigma)
{ {
const float Numerator = 1.0f; const float Numerator = 1.0f;
@ -133,7 +142,7 @@ namespace SixLabors.ImageSharp
/// <returns> /// <returns>
/// The sine cardinal of <paramref name="f" />. /// The sine cardinal of <paramref name="f" />.
/// </returns> /// </returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(InliningOptions.ShortMethod)]
public static float SinC(float f) public static float SinC(float f)
{ {
if (MathF.Abs(f) > Constants.Epsilon) if (MathF.Abs(f) > Constants.Epsilon)
@ -156,7 +165,7 @@ namespace SixLabors.ImageSharp
/// <returns> /// <returns>
/// The <see cref="float"/>. /// The <see cref="float"/>.
/// </returns> /// </returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(InliningOptions.ShortMethod)]
public static float GetBcValue(float x, float b, float c) public static float GetBcValue(float x, float b, float c)
{ {
if (x < 0F) if (x < 0F)
@ -192,7 +201,7 @@ namespace SixLabors.ImageSharp
/// <returns> /// <returns>
/// The bounding <see cref="Rectangle"/>. /// The bounding <see cref="Rectangle"/>.
/// </returns> /// </returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(InliningOptions.ShortMethod)]
public static Rectangle GetBoundingRectangle(Point topLeft, Point bottomRight) => new Rectangle(topLeft.X, topLeft.Y, bottomRight.X - topLeft.X, bottomRight.Y - topLeft.Y); public static Rectangle GetBoundingRectangle(Point topLeft, Point bottomRight) => new Rectangle(topLeft.X, topLeft.Y, bottomRight.X - topLeft.X, bottomRight.Y - topLeft.Y);
/// <summary> /// <summary>

91
src/ImageSharp/Common/Helpers/SimdUtils.BasicIntrinsics256.cs

@ -21,28 +21,58 @@ namespace SixLabors.ImageSharp
public static bool IsAvailable { get; } = IsAvx2CompatibleArchitecture; public static bool IsAvailable { get; } = IsAvx2CompatibleArchitecture;
/// <summary> /// <summary>
/// <see cref="BulkConvertByteToNormalizedFloat"/> as much elements as possible, slicing them down (keeping the remainder). /// <see cref="BulkConvertByteToNormalizedFloat"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary> /// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void BulkConvertByteToNormalizedFloatReduce( internal static void BulkConvertByteToNormalizedFloatReduce(
ref ReadOnlySpan<byte> source, ref ReadOnlySpan<byte> source,
ref Span<float> dest) ref Span<float> dest)
{ {
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same size!"); DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same size!");
if (IsAvailable) if (!IsAvailable)
{ {
int remainder = source.Length % 8; return;
int alignedCount = source.Length - remainder; }
if (alignedCount > 0) int remainder = ImageMaths.Modulo8(source.Length);
{ int adjustedCount = source.Length - remainder;
BulkConvertByteToNormalizedFloat(
source.Slice(0, alignedCount), if (adjustedCount > 0)
dest.Slice(0, alignedCount)); {
BulkConvertByteToNormalizedFloat(
source = source.Slice(alignedCount); source.Slice(0, adjustedCount),
dest = dest.Slice(alignedCount); dest.Slice(0, adjustedCount));
}
source = source.Slice(adjustedCount);
dest = dest.Slice(adjustedCount);
}
}
/// <summary>
/// <see cref="BulkConvertNormalizedFloatToByteClampOverflows"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void BulkConvertNormalizedFloatToByteClampOverflowsReduce(
ref ReadOnlySpan<float> source,
ref Span<byte> dest)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same size!");
if (!IsAvailable)
{
return;
}
int remainder = ImageMaths.Modulo8(source.Length);
int adjustedCount = source.Length - remainder;
if (adjustedCount > 0)
{
BulkConvertNormalizedFloatToByteClampOverflows(source.Slice(0, adjustedCount), dest.Slice(0, adjustedCount));
source = source.Slice(adjustedCount);
dest = dest.Slice(adjustedCount);
} }
} }
@ -57,7 +87,7 @@ namespace SixLabors.ImageSharp
{ {
GuardAvx2(nameof(BulkConvertByteToNormalizedFloat)); GuardAvx2(nameof(BulkConvertByteToNormalizedFloat));
DebugGuard.IsTrue((dest.Length % 8) == 0, nameof(source), "dest.Length should be divisable by 8!"); DebugGuard.IsTrue(ImageMaths.Modulo8(dest.Length) == 0, nameof(source), "dest.Length should be divisable by 8!");
var bVec = new Vector<float>(256.0f / 255.0f); var bVec = new Vector<float>(256.0f / 255.0f);
var magicFloat = new Vector<float>(32768.0f); var magicFloat = new Vector<float>(32768.0f);
@ -93,30 +123,6 @@ namespace SixLabors.ImageSharp
} }
} }
/// <summary>
/// <see cref="BulkConvertNormalizedFloatToByteClampOverflows"/> as much elements as possible, slicing them down (keeping the remainder).
/// </summary>
internal static void BulkConvertNormalizedFloatToByteClampOverflowsReduce(
ref ReadOnlySpan<float> source,
ref Span<byte> dest)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same size!");
if (IsAvailable)
{
int remainder = source.Length % Vector<byte>.Count;
int alignedCount = source.Length - remainder;
if (alignedCount > 0)
{
BulkConvertNormalizedFloatToByteClampOverflows(source.Slice(0, alignedCount), dest.Slice(0, alignedCount));
source = source.Slice(alignedCount);
dest = dest.Slice(alignedCount);
}
}
}
/// <summary> /// <summary>
/// Implementation of <see cref="SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows"/> which is faster on older runtimes. /// Implementation of <see cref="SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows"/> which is faster on older runtimes.
/// </summary> /// </summary>
@ -124,7 +130,7 @@ namespace SixLabors.ImageSharp
{ {
GuardAvx2(nameof(BulkConvertNormalizedFloatToByteClampOverflows)); GuardAvx2(nameof(BulkConvertNormalizedFloatToByteClampOverflows));
DebugGuard.IsTrue((source.Length % 8) == 0, nameof(source), "source.Length should be divisible by 8!"); DebugGuard.IsTrue(ImageMaths.Modulo8(source.Length) == 0, nameof(source), "source.Length should be divisible by 8!");
if (source.Length == 0) if (source.Length == 0)
{ {
@ -174,7 +180,10 @@ namespace SixLabors.ImageSharp
{ {
GuardAvx2(nameof(BulkConvertNormalizedFloatToByte)); GuardAvx2(nameof(BulkConvertNormalizedFloatToByte));
DebugGuard.IsTrue((source.Length % Vector<float>.Count) == 0, nameof(source), "source.Length should be divisable by Vector<float>.Count!"); DebugGuard.IsTrue(
ImageMaths.Modulo8(source.Length) == 0,
nameof(source),
"source.Length should be divisible by 8!");
if (source.Length == 0) if (source.Length == 0)
{ {

96
src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs

@ -28,27 +28,58 @@ namespace SixLabors.ImageSharp
#endif #endif
/// <summary> /// <summary>
/// <see cref="BulkConvertByteToNormalizedFloat"/> as much elements as possible, slicing them down (keeping the remainder). /// <see cref="BulkConvertByteToNormalizedFloat"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary> /// </summary>
[Conditional("NETCOREAPP2_1")] [MethodImpl(InliningOptions.ShortMethod)]
internal static void BulkConvertByteToNormalizedFloatReduce( internal static void BulkConvertByteToNormalizedFloatReduce(
ref ReadOnlySpan<byte> source, ref ReadOnlySpan<byte> source,
ref Span<float> dest) ref Span<float> dest)
{ {
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same size!"); DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same size!");
if (IsAvailable) if (!IsAvailable)
{ {
int remainder = source.Length % Vector<byte>.Count; return;
int alignedCount = source.Length - remainder; }
int remainder = ImageMaths.ModuloP2(source.Length, Vector<byte>.Count);
int adjustedCount = source.Length - remainder;
if (alignedCount > 0) if (adjustedCount > 0)
{ {
BulkConvertByteToNormalizedFloat(source.Slice(0, alignedCount), dest.Slice(0, alignedCount)); BulkConvertByteToNormalizedFloat(source.Slice(0, adjustedCount), dest.Slice(0, adjustedCount));
source = source.Slice(alignedCount); source = source.Slice(adjustedCount);
dest = dest.Slice(alignedCount); dest = dest.Slice(adjustedCount);
} }
}
/// <summary>
/// <see cref="BulkConvertNormalizedFloatToByteClampOverflows"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void BulkConvertNormalizedFloatToByteClampOverflowsReduce(
ref ReadOnlySpan<float> source,
ref Span<byte> dest)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same size!");
if (!IsAvailable)
{
return;
}
int remainder = ImageMaths.ModuloP2(source.Length, Vector<byte>.Count);
int adjustedCount = source.Length - remainder;
if (adjustedCount > 0)
{
BulkConvertNormalizedFloatToByteClampOverflows(
source.Slice(0, adjustedCount),
dest.Slice(0, adjustedCount));
source = source.Slice(adjustedCount);
dest = dest.Slice(adjustedCount);
} }
} }
@ -58,7 +89,7 @@ namespace SixLabors.ImageSharp
internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest) internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest)
{ {
DebugGuard.IsTrue( DebugGuard.IsTrue(
dest.Length % Vector<byte>.Count == 0, ImageMaths.ModuloP2(dest.Length, Vector<byte>.Count) == 0,
nameof(source), nameof(source),
"dest.Length should be divisible by Vector<byte>.Count!"); "dest.Length should be divisible by Vector<byte>.Count!");
@ -67,8 +98,6 @@ namespace SixLabors.ImageSharp
ref Vector<byte> sourceBase = ref Unsafe.As<byte, Vector<byte>>(ref MemoryMarshal.GetReference(source)); ref Vector<byte> sourceBase = ref Unsafe.As<byte, Vector<byte>>(ref MemoryMarshal.GetReference(source));
ref Vector<float> destBase = ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(dest)); ref Vector<float> destBase = ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(dest));
var scale = new Vector<float>(1f / 255f);
for (int i = 0; i < n; i++) for (int i = 0; i < n; i++)
{ {
Vector<byte> b = Unsafe.Add(ref sourceBase, i); Vector<byte> b = Unsafe.Add(ref sourceBase, i);
@ -77,10 +106,10 @@ namespace SixLabors.ImageSharp
Vector.Widen(s0, out Vector<uint> w0, out Vector<uint> w1); Vector.Widen(s0, out Vector<uint> w0, out Vector<uint> w1);
Vector.Widen(s1, out Vector<uint> w2, out Vector<uint> w3); Vector.Widen(s1, out Vector<uint> w2, out Vector<uint> w3);
Vector<float> f0 = ConvertToSingle(w0, scale); Vector<float> f0 = ConvertToSingle(w0);
Vector<float> f1 = ConvertToSingle(w1, scale); Vector<float> f1 = ConvertToSingle(w1);
Vector<float> f2 = ConvertToSingle(w2, scale); Vector<float> f2 = ConvertToSingle(w2);
Vector<float> f3 = ConvertToSingle(w3, scale); Vector<float> f3 = ConvertToSingle(w3);
ref Vector<float> d = ref Unsafe.Add(ref destBase, i * 4); ref Vector<float> d = ref Unsafe.Add(ref destBase, i * 4);
d = f0; d = f0;
@ -90,31 +119,6 @@ namespace SixLabors.ImageSharp
} }
} }
/// <summary>
/// <see cref="BulkConvertNormalizedFloatToByteClampOverflows"/> as much elements as possible, slicing them down (keeping the remainder).
/// </summary>
[Conditional("NETCOREAPP2_1")]
internal static void BulkConvertNormalizedFloatToByteClampOverflowsReduce(
ref ReadOnlySpan<float> source,
ref Span<byte> dest)
{
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same size!");
if (IsAvailable)
{
int remainder = source.Length % Vector<byte>.Count;
int alignedCount = source.Length - remainder;
if (alignedCount > 0)
{
BulkConvertNormalizedFloatToByteClampOverflows(source.Slice(0, alignedCount), dest.Slice(0, alignedCount));
source = source.Slice(alignedCount);
dest = dest.Slice(alignedCount);
}
}
}
/// <summary> /// <summary>
/// Implementation of <see cref="SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows"/>, which is faster on new .NET runtime. /// Implementation of <see cref="SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows"/>, which is faster on new .NET runtime.
/// </summary> /// </summary>
@ -123,7 +127,7 @@ namespace SixLabors.ImageSharp
Span<byte> dest) Span<byte> dest)
{ {
DebugGuard.IsTrue( DebugGuard.IsTrue(
dest.Length % Vector<byte>.Count == 0, ImageMaths.ModuloP2(dest.Length, Vector<byte>.Count) == 0,
nameof(dest), nameof(dest),
"dest.Length should be divisible by Vector<byte>.Count!"); "dest.Length should be divisible by Vector<byte>.Count!");
@ -168,11 +172,11 @@ namespace SixLabors.ImageSharp
} }
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector<float> ConvertToSingle(Vector<uint> u, Vector<float> scale) private static Vector<float> ConvertToSingle(Vector<uint> u)
{ {
Vector<int> vi = Vector.AsVectorInt32(u); Vector<int> vi = Vector.AsVectorInt32(u);
Vector<float> v = Vector.ConvertToSingle(vi); Vector<float> v = Vector.ConvertToSingle(vi);
v *= scale; v *= new Vector<float>(1f / 255f);
return v; return v;
} }
} }

56
src/ImageSharp/Common/Helpers/SimdUtils.FallbackIntrinsics128.cs

@ -1,71 +1,81 @@
using System; // Copyright (c) Six Labors and contributors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Numerics; using System.Numerics;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
using System.Runtime.InteropServices; using System.Runtime.InteropServices;
// ReSharper disable MemberHidesStaticFromOuterClass
namespace SixLabors.ImageSharp namespace SixLabors.ImageSharp
{ {
internal static partial class SimdUtils internal static partial class SimdUtils
{ {
/// <summary> /// <summary>
/// Fallback implementation based on <see cref="Vector4"/> (128bit). /// Fallback implementation based on <see cref="Vector4"/> (128bit).
/// For <see cref="Vector4"/>, efficient software fallback implementations are present /// For <see cref="Vector4"/>, efficient software fallback implementations are present,
/// + maybe even mono can emit intrinsics for that type :P /// and we hope that even mono's JIT is able to emit SIMD instructions for that type :P
/// </summary> /// </summary>
public static class FallbackIntrinsics128 public static class FallbackIntrinsics128
{ {
/// <summary> /// <summary>
/// <see cref="BulkConvertByteToNormalizedFloat"/> as much elements as possible, slicing them down (keeping the remainder). /// <see cref="BulkConvertByteToNormalizedFloat"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary> /// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void BulkConvertByteToNormalizedFloatReduce( internal static void BulkConvertByteToNormalizedFloatReduce(
ref ReadOnlySpan<byte> source, ref ReadOnlySpan<byte> source,
ref Span<float> dest) ref Span<float> dest)
{ {
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same size!"); DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same size!");
int remainder = source.Length % 4; int remainder = ImageMaths.Modulo4(source.Length);
int alignedCount = source.Length - remainder; int adjustedCount = source.Length - remainder;
if (alignedCount > 0) if (adjustedCount > 0)
{ {
BulkConvertByteToNormalizedFloat( BulkConvertByteToNormalizedFloat(
source.Slice(0, alignedCount), source.Slice(0, adjustedCount),
dest.Slice(0, alignedCount)); dest.Slice(0, adjustedCount));
source = source.Slice(alignedCount); source = source.Slice(adjustedCount);
dest = dest.Slice(alignedCount); dest = dest.Slice(adjustedCount);
} }
} }
/// <summary> /// <summary>
/// <see cref="BulkConvertNormalizedFloatToByteClampOverflows"/> as much elements as possible, slicing them down (keeping the remainder). /// <see cref="BulkConvertNormalizedFloatToByteClampOverflows"/> as many elements as possible, slicing them down (keeping the remainder).
/// </summary> /// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void BulkConvertNormalizedFloatToByteClampOverflowsReduce( internal static void BulkConvertNormalizedFloatToByteClampOverflowsReduce(
ref ReadOnlySpan<float> source, ref ReadOnlySpan<float> source,
ref Span<byte> dest) ref Span<byte> dest)
{ {
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same size!"); DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same size!");
int remainder = source.Length % 4; int remainder = ImageMaths.Modulo4(source.Length);
int alignedCount = source.Length - remainder; int adjustedCount = source.Length - remainder;
if (alignedCount > 0) if (adjustedCount > 0)
{ {
BulkConvertNormalizedFloatToByteClampOverflows( BulkConvertNormalizedFloatToByteClampOverflows(
source.Slice(0, alignedCount), source.Slice(0, adjustedCount),
dest.Slice(0, alignedCount)); dest.Slice(0, adjustedCount));
source = source.Slice(alignedCount); source = source.Slice(adjustedCount);
dest = dest.Slice(alignedCount); dest = dest.Slice(adjustedCount);
} }
} }
/// <summary> /// <summary>
/// Implementation of <see cref="SimdUtils.BulkConvertByteToNormalizedFloat"/> using <see cref="Vector4"/>. /// Implementation of <see cref="SimdUtils.BulkConvertByteToNormalizedFloat"/> using <see cref="Vector4"/>.
/// </summary> /// </summary>
[MethodImpl(InliningOptions.ColdPath)]
internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest) internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest)
{ {
DebugGuard.IsTrue((dest.Length % 4) == 0, nameof(dest), "dest.Length should be divisible by 4!"); DebugGuard.IsTrue(
ImageMaths.Modulo4(dest.Length) == 0,
nameof(dest),
"dest.Length should be divisible by 4!");
int count = dest.Length / 4; int count = dest.Length / 4;
if (count == 0) if (count == 0)
@ -94,11 +104,15 @@ namespace SixLabors.ImageSharp
/// <summary> /// <summary>
/// Implementation of <see cref="SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows"/> using <see cref="Vector4"/>. /// Implementation of <see cref="SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows"/> using <see cref="Vector4"/>.
/// </summary> /// </summary>
[MethodImpl(InliningOptions.ColdPath)]
internal static void BulkConvertNormalizedFloatToByteClampOverflows( internal static void BulkConvertNormalizedFloatToByteClampOverflows(
ReadOnlySpan<float> source, ReadOnlySpan<float> source,
Span<byte> dest) Span<byte> dest)
{ {
DebugGuard.IsTrue((source.Length % 4) == 0, nameof(source), "source.Length should be divisible by 4!"); DebugGuard.IsTrue(
ImageMaths.Modulo4(source.Length) == 0,
nameof(source),
"source.Length should be divisible by 4!");
int count = source.Length / 4; int count = source.Length / 4;
if (count == 0) if (count == 0)

84
src/ImageSharp/Common/Helpers/SimdUtils.cs

@ -2,6 +2,7 @@
// Licensed under the Apache License, Version 2.0. // Licensed under the Apache License, Version 2.0.
using System; using System;
using System.Diagnostics;
using System.Numerics; using System.Numerics;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
using System.Runtime.InteropServices; using System.Runtime.InteropServices;
@ -61,25 +62,22 @@ namespace SixLabors.ImageSharp
/// </summary> /// </summary>
/// <param name="source">The source span of bytes</param> /// <param name="source">The source span of bytes</param>
/// <param name="dest">The destination span of floats</param> /// <param name="dest">The destination span of floats</param>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest) internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest)
{ {
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same size!"); DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same size!");
#if NETCOREAPP2_1
ExtendedIntrinsics.BulkConvertByteToNormalizedFloatReduce(ref source, ref dest); ExtendedIntrinsics.BulkConvertByteToNormalizedFloatReduce(ref source, ref dest);
#else
BasicIntrinsics256.BulkConvertByteToNormalizedFloatReduce(ref source, ref dest); BasicIntrinsics256.BulkConvertByteToNormalizedFloatReduce(ref source, ref dest);
#endif
FallbackIntrinsics128.BulkConvertByteToNormalizedFloatReduce(ref source, ref dest); FallbackIntrinsics128.BulkConvertByteToNormalizedFloatReduce(ref source, ref dest);
// Deal with the remainder: // Deal with the remainder:
int count = source.Length; if (source.Length > 0)
if (count > 0)
{ {
// TODO: Do we need to optimize anything on this? (There are at most 7 remainders) ConverByteToNormalizedFloatRemainder(source, dest);
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref float dBase = ref MemoryMarshal.GetReference(dest);
for (int i = 0; i < count; i++)
{
Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, i) / 255f;
}
} }
} }
@ -91,35 +89,71 @@ namespace SixLabors.ImageSharp
/// </summary> /// </summary>
/// <param name="source">The source span of floats</param> /// <param name="source">The source span of floats</param>
/// <param name="dest">The destination span of bytes</param> /// <param name="dest">The destination span of bytes</param>
[MethodImpl(InliningOptions.ShortMethod)]
internal static void BulkConvertNormalizedFloatToByteClampOverflows(ReadOnlySpan<float> source, Span<byte> dest) internal static void BulkConvertNormalizedFloatToByteClampOverflows(ReadOnlySpan<float> source, Span<byte> dest)
{ {
DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same size!"); DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same size!");
#if NETCOREAPP2_1
ExtendedIntrinsics.BulkConvertNormalizedFloatToByteClampOverflowsReduce(ref source, ref dest); ExtendedIntrinsics.BulkConvertNormalizedFloatToByteClampOverflowsReduce(ref source, ref dest);
#else
BasicIntrinsics256.BulkConvertNormalizedFloatToByteClampOverflowsReduce(ref source, ref dest); BasicIntrinsics256.BulkConvertNormalizedFloatToByteClampOverflowsReduce(ref source, ref dest);
#endif
FallbackIntrinsics128.BulkConvertNormalizedFloatToByteClampOverflowsReduce(ref source, ref dest); FallbackIntrinsics128.BulkConvertNormalizedFloatToByteClampOverflowsReduce(ref source, ref dest);
// Deal with the remainder: // Deal with the remainder:
int count = source.Length; if (source.Length > 0)
if (count > 0)
{ {
ref float sBase = ref MemoryMarshal.GetReference(source); ConvertNormalizedFloatToByteRemainder(source, dest);
ref byte dBase = ref MemoryMarshal.GetReference(dest);
for (int i = 0; i < count; i++)
{
// TODO: Do we need to optimize anything on this? (There are at most 7 remainders)
float f = Unsafe.Add(ref sBase, i);
f *= 255f;
f += 0.5f;
f = MathF.Max(0, f);
f = MathF.Min(255f, f);
Unsafe.Add(ref dBase, i) = (byte)f;
}
} }
} }
[MethodImpl(InliningOptions.ColdPath)]
private static void ConverByteToNormalizedFloatRemainder(ReadOnlySpan<byte> source, Span<float> dest)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref float dBase = ref MemoryMarshal.GetReference(dest);
// There are at most 3 elements at this point, having a for loop is overkill.
// Let's minimize the no. of instructions!
switch (source.Length)
{
case 3:
Unsafe.Add(ref dBase, 2) = Unsafe.Add(ref sBase, 2) / 255f;
goto case 2;
case 2:
Unsafe.Add(ref dBase, 1) = Unsafe.Add(ref sBase, 1) / 255f;
goto case 1;
case 1:
dBase = sBase / 255f;
break;
}
}
[MethodImpl(InliningOptions.ColdPath)]
private static void ConvertNormalizedFloatToByteRemainder(ReadOnlySpan<float> source, Span<byte> dest)
{
ref float sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);
switch (source.Length)
{
case 3:
Unsafe.Add(ref dBase, 2) = ConvertToByte(Unsafe.Add(ref sBase, 2));
goto case 2;
case 2:
Unsafe.Add(ref dBase, 1) = ConvertToByte(Unsafe.Add(ref sBase, 1));
goto case 1;
case 1:
dBase = ConvertToByte(sBase);
break;
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static byte ConvertToByte(float f) => (byte)ImageMaths.Clamp((f * 255f) + 0.5f, 0, 255f);
[Conditional("DEBUG")]
private static void GuardAvx2(string operation) private static void GuardAvx2(string operation)
{ {
if (!IsAvx2CompatibleArchitecture) if (!IsAvx2CompatibleArchitecture)

50
tests/ImageSharp.Benchmarks/Color/Bulk/PackFromVector4.cs

@ -99,30 +99,30 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
} }
// RESULTS (2018 October): // RESULTS (2018 October):
// Method | Runtime | Count | Mean | Error | StdDev | Scaled | ScaledSD | Gen 0 | Allocated | // Method | Runtime | Count | Mean | Error | StdDev | Scaled | ScaledSD | Gen 0 | Allocated |
// ------------------------------------------------------------------ |-------- |------ |-------------:|-------------:|-----------:|-------:|---------:|-------:|----------:| // ---------------------------- |-------- |------ |-------------:|-------------:|------------:|-------:|---------:|-------:|----------:|
// BasicBulk | Clr | 64 | 581.62 ns | 33.625 ns | 1.8999 ns | 2.27 | 0.02 | - | 0 B | // FallbackIntrinsics128 | Clr | 64 | 340.38 ns | 22.319 ns | 1.2611 ns | 1.41 | 0.01 | - | 0 B |
// BasicIntrinsics256_BulkConvertNormalizedFloatToByteClampOverflows | Clr | 64 | 256.66 ns | 45.153 ns | 2.5512 ns | 1.00 | 0.00 | - | 0 B | // BasicIntrinsics256 | Clr | 64 | 240.79 ns | 11.421 ns | 0.6453 ns | 1.00 | 0.00 | - | 0 B |
// ExtendedIntrinsic_BulkConvertNormalizedFloatToByteClampOverflows | Clr | 64 | 201.92 ns | 30.161 ns | 1.7042 ns | 0.79 | 0.01 | - | 0 B | // ExtendedIntrinsic | Clr | 64 | 199.09 ns | 124.239 ns | 7.0198 ns | 0.83 | 0.02 | - | 0 B |
// PixelOperations_Base | Clr | 64 | 665.01 ns | 13.032 ns | 0.7363 ns | 2.59 | 0.02 | 0.0067 | 24 B | // PixelOperations_Base | Clr | 64 | 647.99 ns | 24.003 ns | 1.3562 ns | 2.69 | 0.01 | 0.0067 | 24 B |
// PixelOperations_Specialized | Clr | 64 | 295.14 ns | 26.335 ns | 1.4880 ns | 1.15 | 0.01 | - | 0 B | // PixelOperations_Specialized | Clr | 64 | 259.79 ns | 13.391 ns | 0.7566 ns | 1.08 | 0.00 | - | 0 B | <--- ceremonial overhead has been minimized!
// | | | | | | | | | | // | | | | | | | | | |
// BasicBulk | Core | 64 | 513.22 ns | 91.110 ns | 5.1479 ns | 3.19 | 0.03 | - | 0 B | // FallbackIntrinsics128 | Core | 64 | 234.64 ns | 12.320 ns | 0.6961 ns | 1.58 | 0.00 | - | 0 B |
// BasicIntrinsics256_BulkConvertNormalizedFloatToByteClampOverflows | Core | 64 | 160.76 ns | 2.760 ns | 0.1559 ns | 1.00 | 0.00 | - | 0 B | // BasicIntrinsics256 | Core | 64 | 148.87 ns | 2.794 ns | 0.1579 ns | 1.00 | 0.00 | - | 0 B |
// ExtendedIntrinsic_BulkConvertNormalizedFloatToByteClampOverflows | Core | 64 | 95.98 ns | 10.077 ns | 0.5694 ns | 0.60 | 0.00 | - | 0 B | // ExtendedIntrinsic | Core | 64 | 94.06 ns | 10.015 ns | 0.5659 ns | 0.63 | 0.00 | - | 0 B |
// PixelOperations_Base | Core | 64 | 591.74 ns | 49.856 ns | 2.8170 ns | 3.68 | 0.01 | 0.0067 | 24 B | // PixelOperations_Base | Core | 64 | 573.52 ns | 31.865 ns | 1.8004 ns | 3.85 | 0.01 | 0.0067 | 24 B |
// PixelOperations_Specialized | Core | 64 | 149.11 ns | 4.485 ns | 0.2534 ns | 0.93 | 0.00 | - | 0 B | // PixelOperations_Specialized | Core | 64 | 117.21 ns | 13.264 ns | 0.7494 ns | 0.79 | 0.00 | - | 0 B |
// | | | | | | | | | | // | | | | | | | | | |
// BasicBulk | Clr | 2048 | 15,345.85 ns | 1,213.551 ns | 68.5679 ns | 3.90 | 0.01 | - | 0 B | // FallbackIntrinsics128 | Clr | 2048 | 6,735.93 ns | 2,139.340 ns | 120.8767 ns | 1.71 | 0.03 | - | 0 B |
// BasicIntrinsics256_BulkConvertNormalizedFloatToByteClampOverflows | Clr | 2048 | 3,939.49 ns | 71.101 ns | 4.0173 ns | 1.00 | 0.00 | - | 0 B | // BasicIntrinsics256 | Clr | 2048 | 3,929.29 ns | 334.027 ns | 18.8731 ns | 1.00 | 0.00 | - | 0 B |
// ExtendedIntrinsic_BulkConvertNormalizedFloatToByteClampOverflows | Clr | 2048 | 2,272.61 ns | 110.671 ns | 6.2531 ns | 0.58 | 0.00 | - | 0 B | // ExtendedIntrinsic | Clr | 2048 | 2,226.01 ns | 130.525 ns | 7.3749 ns |!! 0.57 | 0.00 | - | 0 B | <--- ExtendedIntrinsics rock!
// PixelOperations_Base | Clr | 2048 | 17,422.47 ns | 811.733 ns | 45.8644 ns | 4.42 | 0.01 | - | 24 B | // PixelOperations_Base | Clr | 2048 | 16,760.84 ns | 367.800 ns | 20.7814 ns | 4.27 | 0.02 | - | 24 B | <--- Extra copies using "Vector4 TPixel.ToVector4()"
// PixelOperations_Specialized | Clr | 2048 | 3,984.26 ns | 110.352 ns | 6.2351 ns | 1.01 | 0.00 | - | 0 B | // PixelOperations_Specialized | Clr | 2048 | 3,986.03 ns | 237.238 ns | 13.4044 ns | 1.01 | 0.00 | - | 0 B | <--- can't yet detect whether ExtendedIntrinsics are available :(
// | | | | | | | | | | // | | | | | | | | | |
// BasicBulk | Core | 2048 | 14,950.43 ns | 699.309 ns | 39.5123 ns | 3.76 | 0.02 | - | 0 B | // FallbackIntrinsics128 | Core | 2048 | 6,644.65 ns | 2,677.090 ns | 151.2605 ns | 1.69 | 0.05 | - | 0 B |
// BasicIntrinsics256_BulkConvertNormalizedFloatToByteClampOverflows | Core | 2048 | 3,978.28 ns | 481.105 ns | 27.1833 ns | 1.00 | 0.00 | - | 0 B | // BasicIntrinsics256 | Core | 2048 | 3,923.70 ns | 1,971.760 ns | 111.4081 ns | 1.00 | 0.00 | - | 0 B |
// ExtendedIntrinsic_BulkConvertNormalizedFloatToByteClampOverflows | Core | 2048 | 2,169.54 ns | 75.606 ns | 4.2719 ns | !!0.55!| 0.00 | - | 0 B | // ExtendedIntrinsic | Core | 2048 | 2,092.32 ns | 375.657 ns | 21.2253 ns |!! 0.53 | 0.01 | - | 0 B | <--- ExtendedIntrinsics rock!
// PixelOperations_Base | Core | 2048 | 18,403.62 ns | 1,494.056 ns | 84.4169 ns | 4.63 | 0.03 | - | 24 B | // PixelOperations_Base | Core | 2048 | 16,875.73 ns | 1,271.957 ns | 71.8679 ns | 4.30 | 0.10 | - | 24 B |
// PixelOperations_Specialized | Core | 2048 | 2,227.60 ns | 486.761 ns | 27.5029 ns | !!0.56!| 0.01 | - | 0 B | // PixelOperations_Specialized | Core | 2048 | 2,129.92 ns | 262.888 ns | 14.8537 ns |!! 0.54 | 0.01 | - | 0 B | <--- ExtendedIntrinsics rock!
} }
} }

50
tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4.cs

@ -191,30 +191,30 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
// RESULTS (2018 October): // RESULTS (2018 October):
// //
// Method | Runtime | Count | Mean | Error | StdDev | Scaled | ScaledSD | Gen 0 | Allocated | // Method | Runtime | Count | Mean | Error | StdDev | Scaled | ScaledSD | Gen 0 | Allocated |
// ---------------------------------------------------- |-------- |------ |------------:|-------------:|-----------:|-------:|---------:|-------:|----------:| // ---------------------------- |-------- |------ |------------:|-------------:|------------:|-------:|---------:|-------:|----------:|
// BasicBulk | Clr | 64 | 267.40 ns | 30.711 ns | 1.7352 ns | 1.07 | 0.01 | - | 0 B | // FallbackIntrinsics128 | Clr | 64 | 287.62 ns | 6.026 ns | 0.3405 ns | 1.19 | 0.00 | - | 0 B |
// BasicIntrinsics256_BulkConvertByteToNormalizedFloat | Clr | 64 | 249.97 ns | 33.838 ns | 1.9119 ns | 1.00 | 0.00 | - | 0 B | // BasicIntrinsics256 | Clr | 64 | 240.83 ns | 10.585 ns | 0.5981 ns | 1.00 | 0.00 | - | 0 B |
// ExtendedIntrinsics_BulkConvertByteToNormalizedFloat | Clr | 64 | 176.97 ns | 5.221 ns | 0.2950 ns | 0.71 | 0.00 | - | 0 B | // ExtendedIntrinsics | Clr | 64 | 168.28 ns | 11.478 ns | 0.6485 ns | 0.70 | 0.00 | - | 0 B |
// PixelOperations_Base | Clr | 64 | 349.70 ns | 104.331 ns | 5.8949 ns | 1.40 | 0.02 | 0.0072 | 24 B | // PixelOperations_Base | Clr | 64 | 334.08 ns | 38.048 ns | 2.1498 ns | 1.39 | 0.01 | 0.0072 | 24 B |
// PixelOperations_Specialized | Clr | 64 | 288.31 ns | 26.833 ns | 1.5161 ns | 1.15 | 0.01 | - | 0 B | // PixelOperations_Specialized | Clr | 64 | 255.41 ns | 10.939 ns | 0.6181 ns | 1.06 | 0.00 | - | 0 B | <--- ceremonial overhead has been minimized!
// | | | | | | | | | | // | | | | | | | | | |
// BasicBulk | Core | 64 | 185.36 ns | 30.051 ns | 1.6979 ns | 1.26 | 0.01 | - | 0 B | // FallbackIntrinsics128 | Core | 64 | 183.29 ns | 8.931 ns | 0.5046 ns | 1.32 | 0.00 | - | 0 B |
// BasicIntrinsics256_BulkConvertByteToNormalizedFloat | Core | 64 | 146.84 ns | 12.674 ns | 0.7161 ns | 1.00 | 0.00 | - | 0 B | // BasicIntrinsics256 | Core | 64 | 139.18 ns | 7.633 ns | 0.4313 ns | 1.00 | 0.00 | - | 0 B |
// ExtendedIntrinsics_BulkConvertByteToNormalizedFloat | Core | 64 | 67.31 ns | 2.542 ns | 0.1436 ns | 0.46 | 0.00 | - | 0 B | // ExtendedIntrinsics | Core | 64 | 66.29 ns | 16.366 ns | 0.9247 ns | 0.48 | 0.01 | - | 0 B |
// PixelOperations_Base | Core | 64 | 272.03 ns | 94.419 ns | 5.3348 ns | 1.85 | 0.03 | 0.0072 | 24 B | // PixelOperations_Base | Core | 64 | 257.75 ns | 16.959 ns | 0.9582 ns | 1.85 | 0.01 | 0.0072 | 24 B |
// PixelOperations_Specialized | Core | 64 | 121.91 ns | 31.477 ns | 1.7785 ns | 0.83 | 0.01 | - | 0 B | // PixelOperations_Specialized | Core | 64 | 90.14 ns | 9.955 ns | 0.5625 ns | 0.65 | 0.00 | - | 0 B |
// | | | | | | | | | | // | | | | | | | | | |
// BasicBulk | Clr | 2048 | 5,133.04 ns | 284.052 ns | 16.0494 ns | 1.21 | 0.01 | - | 0 B | // FallbackIntrinsics128 | Clr | 2048 | 5,011.84 ns | 347.991 ns | 19.6621 ns | 1.22 | 0.01 | - | 0 B |
// BasicIntrinsics256_BulkConvertByteToNormalizedFloat | Clr | 2048 | 4,248.58 ns | 1,095.887 ns | 61.9196 ns | 1.00 | 0.00 | - | 0 B | // BasicIntrinsics256 | Clr | 2048 | 4,119.35 ns | 720.153 ns | 40.6900 ns | 1.00 | 0.00 | - | 0 B |
// ExtendedIntrinsics_BulkConvertByteToNormalizedFloat | Clr | 2048 | 1,214.02 ns | 184.349 ns | 10.4160 ns | 0.29 | 0.00 | - | 0 B | // ExtendedIntrinsics | Clr | 2048 | 1,195.29 ns | 164.389 ns | 9.2883 ns |!! 0.29 | 0.00 | - | 0 B | <--- ExtendedIntrinsics rock!
// PixelOperations_Base | Clr | 2048 | 7,096.04 ns | 362.350 ns | 20.4734 ns | 1.67 | 0.02 | - | 24 B | // PixelOperations_Base | Clr | 2048 | 6,820.58 ns | 823.433 ns | 46.5255 ns | 1.66 | 0.02 | - | 24 B |
// PixelOperations_Specialized | Clr | 2048 | 4,314.19 ns | 204.964 ns | 11.5809 ns | 1.02 | 0.01 | - | 0 B | // PixelOperations_Specialized | Clr | 2048 | 4,203.53 ns | 176.714 ns | 9.9847 ns | 1.02 | 0.01 | - | 0 B | <--- can't yet detect whether ExtendedIntrinsics are available :(
// | | | | | | | | | | // | | | | | | | | | |
// BasicBulk | Core | 2048 | 5,038.38 ns | 223.282 ns | 12.6158 ns | 1.20 | 0.01 | - | 0 B | // FallbackIntrinsics128 | Core | 2048 | 5,017.89 ns | 4,021.533 ns | 227.2241 ns | 1.24 | 0.05 | - | 0 B |
// BasicIntrinsics256_BulkConvertByteToNormalizedFloat | Core | 2048 | 4,199.17 ns | 897.985 ns | 50.7378 ns | 1.00 | 0.00 | - | 0 B | // BasicIntrinsics256 | Core | 2048 | 4,046.51 ns | 1,150.390 ns | 64.9992 ns | 1.00 | 0.00 | - | 0 B |
// ExtendedIntrinsics_BulkConvertByteToNormalizedFloat | Core | 2048 | 1,113.86 ns | 64.799 ns | 3.6613 ns | !!0.27!| 0.00 | - | 0 B | // ExtendedIntrinsics | Core | 2048 | 1,130.59 ns | 832.588 ns | 47.0427 ns |!! 0.28 | 0.01 | - | 0 B | <--- ExtendedIntrinsics rock!
// PixelOperations_Base | Core | 2048 | 7,015.00 ns | 920.083 ns | 51.9864 ns | 1.67 | 0.02 | - | 24 B | // PixelOperations_Base | Core | 2048 | 6,752.68 ns | 272.820 ns | 15.4148 ns | 1.67 | 0.02 | - | 24 B |
// PixelOperations_Specialized | Core | 2048 | 1,176.59 ns | 256.955 ns | 14.5184 ns | !!0.28!| 0.00 | - | 0 B | // PixelOperations_Specialized | Core | 2048 | 1,126.13 ns | 79.192 ns | 4.4745 ns |!! 0.28 | 0.00 | - | 0 B | <--- ExtendedIntrinsics rock!
} }
} }

8
tests/ImageSharp.Benchmarks/General/Abs.cs → tests/ImageSharp.Benchmarks/General/BasicMath/Abs.cs

@ -1,9 +1,9 @@
namespace SixLabors.ImageSharp.Benchmarks.General using System;
{
using System;
using BenchmarkDotNet.Attributes; using BenchmarkDotNet.Attributes;
namespace SixLabors.ImageSharp.Benchmarks.General.BasicMath
{
public class Abs public class Abs
{ {
[Params(-1, 1)] [Params(-1, 1)]

10
tests/ImageSharp.Benchmarks/General/Clamp.cs → tests/ImageSharp.Benchmarks/General/BasicMath/Clamp.cs

@ -3,13 +3,13 @@
// Licensed under the Apache License, Version 2.0. // Licensed under the Apache License, Version 2.0.
// </copyright> // </copyright>
namespace SixLabors.ImageSharp.Benchmarks.General using System;
{ using System.Runtime.CompilerServices;
using System;
using System.Runtime.CompilerServices;
using BenchmarkDotNet.Attributes; using BenchmarkDotNet.Attributes;
namespace SixLabors.ImageSharp.Benchmarks.General.BasicMath
{
public class Clamp public class Clamp
{ {
[Params(-1, 0, 255, 256)] [Params(-1, 0, 255, 256)]

23
tests/ImageSharp.Benchmarks/General/BasicMath/ModuloPowerOfTwoConstant.cs

@ -0,0 +1,23 @@
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Attributes.Jobs;
namespace SixLabors.ImageSharp.Benchmarks.General.BasicMath
{
[LongRunJob]
public class ModuloPowerOfTwoConstant
{
private readonly int value = 42;
[Benchmark(Baseline = true)]
public int Standard()
{
return this.value % 8;
}
[Benchmark]
public int Bitwise()
{
return ImageMaths.Modulo8(this.value);
}
}
}

32
tests/ImageSharp.Benchmarks/General/BasicMath/ModuloPowerOfTwoVariable.cs

@ -0,0 +1,32 @@
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Attributes.Jobs;
namespace SixLabors.ImageSharp.Benchmarks.General.BasicMath
{
[LongRunJob]
public class ModuloPowerOfTwoVariable
{
private readonly int value = 42;
private readonly int m = 32;
[Benchmark(Baseline = true)]
public int Standard()
{
return this.value % this.m;
}
[Benchmark]
public int Bitwise()
{
return ImageMaths.ModuloP2(this.value, this.m);
}
// RESULTS:
//
// Method | Mean | Error | StdDev | Median | Scaled | ScaledSD |
// --------- |----------:|----------:|----------:|----------:|-------:|---------:|
// Standard | 1.2465 ns | 0.0093 ns | 0.0455 ns | 1.2423 ns | 1.00 | 0.00 |
// Bitwise | 0.0265 ns | 0.0103 ns | 0.0515 ns | 0.0000 ns | 0.02 | 0.04 |
}
}

3
tests/ImageSharp.Benchmarks/General/Pow.cs → tests/ImageSharp.Benchmarks/General/BasicMath/Pow.cs

@ -1,7 +1,8 @@
using System; using System;
using BenchmarkDotNet.Attributes; using BenchmarkDotNet.Attributes;
namespace SixLabors.ImageSharp.Benchmarks.General namespace SixLabors.ImageSharp.Benchmarks.General.BasicMath
{ {
public class Pow public class Pow
{ {

19
tests/ImageSharp.Benchmarks/General/Modulus.cs

@ -1,19 +0,0 @@
namespace SixLabors.ImageSharp.Benchmarks.General
{
using BenchmarkDotNet.Attributes;
public class Modulus
{
[Benchmark(Baseline = true, Description = "Standard Modulus using %")]
public int StandardModulus()
{
return 255 % 256;
}
[Benchmark(Description = "Bitwise Modulus using &")]
public int BitwiseModulus()
{
return 255 & 255;
}
}
}

17
tests/ImageSharp.Tests/Common/SimdUtilsTests.cs

@ -264,13 +264,26 @@ namespace SixLabors.ImageSharp.Tests.Common
TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(count, TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(count,
(s, d) => SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows(s.Span, d.Span) (s, d) => SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows(s.Span, d.Span)
); );
// for small values, let's stress test the implementation a bit:
if (count > 0 && count < 10)
{
for (int i = 0; i < 20; i++)
{
TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(
count,
(s, d) => SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows(s.Span, d.Span),
i + 42);
}
}
} }
private static void TestImpl_BulkConvertNormalizedFloatToByteClampOverflows( private static void TestImpl_BulkConvertNormalizedFloatToByteClampOverflows(
int count, int count,
Action<Memory<float>, Memory<byte>> convert) Action<Memory<float>, Memory<byte>> convert, int seed = -1)
{ {
float[] source = new Random(count).GenerateRandomFloatArray(count, -0.1f, 1.2f); seed = seed > 0 ? seed : count;
float[] source = new Random(seed).GenerateRandomFloatArray(count, -0.2f, 1.2f);
byte[] expected = source.Select(NormalizedFloatToByte).ToArray(); byte[] expected = source.Select(NormalizedFloatToByte).ToArray();
byte[] actual = new byte[count]; byte[] actual = new byte[count];

94
tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs

@ -10,56 +10,70 @@ namespace SixLabors.ImageSharp.Tests.Helpers
public class ImageMathsTests public class ImageMathsTests
{ {
[Theory] [Theory]
[InlineData(0, 0)] [InlineData(0)]
[InlineData(1, 1)] [InlineData(1)]
[InlineData(2, 2)] [InlineData(2)]
[InlineData(3, 3)] [InlineData(3)]
[InlineData(4, 0)] [InlineData(4)]
[InlineData(100, 0)] [InlineData(100)]
[InlineData(123, 3)] [InlineData(123)]
[InlineData(53436353, 1)] [InlineData(53436353)]
public void Modulo4(int a, int expected) public void Modulo4(int x)
{ {
int actual = ImageMaths.Modulo4(a); int actual = ImageMaths.Modulo4(x);
Assert.Equal(expected, actual); Assert.Equal(x % 4, actual);
} }
[Theory] [Theory]
[InlineData(0, 0)] [InlineData(0)]
[InlineData(1, 1)] [InlineData(1)]
[InlineData(2)]
[InlineData(6)]
[InlineData(7)]
[InlineData(8)]
[InlineData(100)]
[InlineData(123)]
[InlineData(53436353)]
[InlineData(975)]
public void Modulo8(int x)
{
int actual = ImageMaths.Modulo8(x);
Assert.Equal(x % 8, actual);
}
[Theory]
[InlineData(0, 2)]
[InlineData(1, 2)]
[InlineData(2, 2)] [InlineData(2, 2)]
[InlineData(6, 6)] [InlineData(0, 4)]
[InlineData(7, 7)] [InlineData(3, 4)]
[InlineData(8, 0)] [InlineData(5, 4)]
[InlineData(100, 4)] [InlineData(5, 8)]
[InlineData(123, 3)] [InlineData(8, 8)]
[InlineData(53436353, 1)] [InlineData(8, 16)]
[InlineData(975, 7)] [InlineData(15, 16)]
public void Modulo8(int a, int expected) [InlineData(17, 16)]
[InlineData(17, 32)]
[InlineData(31, 32)]
[InlineData(32, 32)]
[InlineData(33, 32)]
public void Modulo2P(int x, int m)
{ {
int actual = ImageMaths.Modulo8(a); int actual = ImageMaths.ModuloP2(x, m);
Assert.Equal(expected, actual); Assert.Equal(x % m, actual);
} }
[Theory] [Theory]
[InlineData(0, 2, 0)] [InlineData(0, 0, 0, 0)]
[InlineData(1, 2, 1)] [InlineData(0.5f, 0, 1, 0.5f)]
[InlineData(2, 2, 0)] [InlineData(-0.5f, -0.1f, 10, -0.1f)]
[InlineData(0, 4, 0)] [InlineData(-0.05f, -0.1f, 10, -0.05f)]
[InlineData(3, 4, 3)] [InlineData(9.9f, -0.1f, 10, 9.9f)]
[InlineData(5, 4, 1)] [InlineData(10f, -0.1f, 10, 10f)]
[InlineData(5, 8, 5)] [InlineData(10.1f, -0.1f, 10, 10f)]
[InlineData(8, 8, 0)] public void Clamp(float x, float min, float max, float expected)
[InlineData(8, 16, 8)]
[InlineData(15, 16, 15)]
[InlineData(17, 16, 1)]
[InlineData(17, 32, 17)]
[InlineData(31, 32, 31)]
[InlineData(32, 32, 0)]
[InlineData(33, 32, 1)]
public void Modulo2P(int a, int m, int expected)
{ {
int actual = ImageMaths.ModuloP2(a, m); float actual = ImageMaths.Clamp(x, min, max);
Assert.Equal(expected, actual); Assert.Equal(expected, actual);
} }

Loading…
Cancel
Save