mirror of https://github.com/SixLabors/ImageSharp
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1126 lines
44 KiB
1126 lines
44 KiB
// Copyright (c) Six Labors.
|
|
// Licensed under the Six Labors Split License.
|
|
|
|
using System.Numerics;
|
|
using System.Runtime.CompilerServices;
|
|
using System.Runtime.InteropServices;
|
|
using System.Runtime.Intrinsics;
|
|
using System.Runtime.Intrinsics.X86;
|
|
|
|
namespace SixLabors.ImageSharp;
|
|
|
|
/// <summary>
|
|
/// Provides optimized static methods for trigonometric, logarithmic,
|
|
/// and other common mathematical functions.
|
|
/// </summary>
|
|
internal static class Numerics
|
|
{
|
|
public const int BlendAlphaControl = 0b_10_00_10_00;
|
|
private const int ShuffleAlphaControl = 0b_11_11_11_11;
|
|
|
|
/// <summary>
|
|
/// Determine the Greatest CommonDivisor (GCD) of two numbers.
|
|
/// </summary>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static int GreatestCommonDivisor(int a, int b)
|
|
{
|
|
while (b != 0)
|
|
{
|
|
int temp = b;
|
|
b = a % b;
|
|
a = temp;
|
|
}
|
|
|
|
return a;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Determine the Least Common Multiple (LCM) of two numbers.
|
|
/// See https://en.wikipedia.org/wiki/Least_common_multiple#Reduction_by_the_greatest_common_divisor.
|
|
/// </summary>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static int LeastCommonMultiple(int a, int b)
|
|
=> a / GreatestCommonDivisor(a, b) * b;
|
|
|
|
/// <summary>
|
|
/// Calculates <paramref name="x"/> % 2
|
|
/// </summary>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static int Modulo2(int x) => x & 1;
|
|
|
|
/// <summary>
|
|
/// Calculates <paramref name="x"/> % 4
|
|
/// </summary>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static int Modulo4(int x) => x & 3;
|
|
|
|
/// <summary>
|
|
/// Calculates <paramref name="x"/> % 4
|
|
/// </summary>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static nint Modulo4(nint x) => x & 3;
|
|
|
|
/// <summary>
|
|
/// Calculates <paramref name="x"/> % 4
|
|
/// </summary>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static nuint Modulo4(nuint x) => x & 3;
|
|
|
|
/// <summary>
|
|
/// Calculates <paramref name="x"/> % 8
|
|
/// </summary>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static int Modulo8(int x) => x & 7;
|
|
|
|
/// <summary>
|
|
/// Calculates <paramref name="x"/> % 8
|
|
/// </summary>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static nint Modulo8(nint x) => x & 7;
|
|
|
|
/// <summary>
|
|
/// Calculates <paramref name="x"/> % 64
|
|
/// </summary>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static int Modulo64(int x) => x & 63;
|
|
|
|
/// <summary>
|
|
/// Calculates <paramref name="x"/> % 64
|
|
/// </summary>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static nint Modulo64(nint x) => x & 63;
|
|
|
|
/// <summary>
|
|
/// Calculates <paramref name="x"/> % 256
|
|
/// </summary>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static int Modulo256(int x) => x & 255;
|
|
|
|
/// <summary>
|
|
/// Calculates <paramref name="x"/> % 256
|
|
/// </summary>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static nint Modulo256(nint x) => x & 255;
|
|
|
|
/// <summary>
|
|
/// Fast (x mod m) calculator, with the restriction that
|
|
/// <paramref name="m"/> should be power of 2.
|
|
/// </summary>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static int ModuloP2(int x, int m) => x & (m - 1);
|
|
|
|
/// <summary>
|
|
/// Returns the absolute value of a 32-bit signed integer.
|
|
/// Uses bit shifting to speed up the operation compared to <see cref="Math"/>.
|
|
/// </summary>
|
|
/// <param name="x">
|
|
/// A number that is greater than <see cref="int.MinValue"/>, but less than
|
|
/// or equal to <see cref="int.MaxValue"/>
|
|
/// </param>
|
|
/// <returns>The <see cref="int"/></returns>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static int Abs(int x)
|
|
{
|
|
int y = x >> 31;
|
|
return (x ^ y) - y;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Returns a specified number raised to the power of 2
|
|
/// </summary>
|
|
/// <param name="x">A single-precision floating-point number</param>
|
|
/// <returns>The number <paramref name="x" /> raised to the power of 2.</returns>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static float Pow2(float x) => x * x;
|
|
|
|
/// <summary>
|
|
/// Returns a specified number raised to the power of 3
|
|
/// </summary>
|
|
/// <param name="x">A single-precision floating-point number</param>
|
|
/// <returns>The number <paramref name="x" /> raised to the power of 3.</returns>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static float Pow3(float x) => x * x * x;
|
|
|
|
/// <summary>
|
|
/// Returns a specified number raised to the power of 3
|
|
/// </summary>
|
|
/// <param name="x">A double-precision floating-point number</param>
|
|
/// <returns>The number <paramref name="x" /> raised to the power of 3.</returns>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static double Pow3(double x) => x * x * x;
|
|
|
|
/// <summary>
|
|
/// Implementation of 1D Gaussian G(x) function
|
|
/// </summary>
|
|
/// <param name="x">The x provided to G(x).</param>
|
|
/// <param name="sigma">The spread of the blur.</param>
|
|
/// <returns>The Gaussian G(x)</returns>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static float Gaussian(float x, float sigma)
|
|
{
|
|
const float numerator = 1.0f;
|
|
float denominator = MathF.Sqrt(2 * MathF.PI) * sigma;
|
|
|
|
float exponentNumerator = -x * x;
|
|
float exponentDenominator = 2 * Pow2(sigma);
|
|
|
|
float left = numerator / denominator;
|
|
float right = MathF.Exp(exponentNumerator / exponentDenominator);
|
|
|
|
return left * right;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Returns the result of a normalized sine cardinal function for the given value.
|
|
/// SinC(x) = sin(pi*x)/(pi*x).
|
|
/// </summary>
|
|
/// <param name="f">A single-precision floating-point number to calculate the result for.</param>
|
|
/// <returns>
|
|
/// The sine cardinal of <paramref name="f" />.
|
|
/// </returns>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static float SinC(float f)
|
|
{
|
|
if (MathF.Abs(f) > Constants.Epsilon)
|
|
{
|
|
f *= MathF.PI;
|
|
float result = MathF.Sin(f) / f;
|
|
return MathF.Abs(result) < Constants.Epsilon ? 0F : result;
|
|
}
|
|
|
|
return 1F;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Returns the value clamped to the inclusive range of min and max.
|
|
/// </summary>
|
|
/// <param name="value">The value to clamp.</param>
|
|
/// <param name="min">The minimum inclusive value.</param>
|
|
/// <param name="max">The maximum inclusive value.</param>
|
|
/// <returns>The clamped <see cref="byte"/>.</returns>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static byte Clamp(byte value, byte min, byte max)
|
|
{
|
|
// Order is important here as someone might set min to higher than max.
|
|
if (value > max)
|
|
{
|
|
return max;
|
|
}
|
|
|
|
if (value < min)
|
|
{
|
|
return min;
|
|
}
|
|
|
|
return value;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Returns the value clamped to the inclusive range of min and max.
|
|
/// </summary>
|
|
/// <param name="value">The value to clamp.</param>
|
|
/// <param name="min">The minimum inclusive value.</param>
|
|
/// <param name="max">The maximum inclusive value.</param>
|
|
/// <returns>The clamped <see cref="uint"/>.</returns>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static uint Clamp(uint value, uint min, uint max)
|
|
{
|
|
if (value > max)
|
|
{
|
|
return max;
|
|
}
|
|
|
|
if (value < min)
|
|
{
|
|
return min;
|
|
}
|
|
|
|
return value;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Returns the value clamped to the inclusive range of min and max.
|
|
/// </summary>
|
|
/// <param name="value">The value to clamp.</param>
|
|
/// <param name="min">The minimum inclusive value.</param>
|
|
/// <param name="max">The maximum inclusive value.</param>
|
|
/// <returns>The clamped <see cref="uint"/>.</returns>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static int Clamp(int value, int min, int max)
|
|
{
|
|
if (value > max)
|
|
{
|
|
return max;
|
|
}
|
|
|
|
if (value < min)
|
|
{
|
|
return min;
|
|
}
|
|
|
|
return value;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Returns the value clamped to the inclusive range of min and max.
|
|
/// </summary>
|
|
/// <param name="value">The value to clamp.</param>
|
|
/// <param name="min">The minimum inclusive value.</param>
|
|
/// <param name="max">The maximum inclusive value.</param>
|
|
/// <returns>The clamped <see cref="float"/>.</returns>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static float Clamp(float value, float min, float max)
|
|
{
|
|
if (value > max)
|
|
{
|
|
return max;
|
|
}
|
|
|
|
if (value < min)
|
|
{
|
|
return min;
|
|
}
|
|
|
|
return value;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Returns the value clamped to the inclusive range of min and max.
|
|
/// </summary>
|
|
/// <param name="value">The value to clamp.</param>
|
|
/// <param name="min">The minimum inclusive value.</param>
|
|
/// <param name="max">The maximum inclusive value.</param>
|
|
/// <returns>The clamped <see cref="double"/>.</returns>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static double Clamp(double value, double min, double max)
|
|
{
|
|
if (value > max)
|
|
{
|
|
return max;
|
|
}
|
|
|
|
if (value < min)
|
|
{
|
|
return min;
|
|
}
|
|
|
|
return value;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Returns the value clamped to the inclusive range of min and max.
|
|
/// 5x Faster than <see cref="Vector4.Clamp(Vector4, Vector4, Vector4)"/>
|
|
/// on platforms < NET 5.
|
|
/// </summary>
|
|
/// <param name="value">The value to clamp.</param>
|
|
/// <param name="min">The minimum inclusive value.</param>
|
|
/// <param name="max">The maximum inclusive value.</param>
|
|
/// <returns>The clamped <see cref="Vector4"/>.</returns>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static Vector4 Clamp(Vector4 value, Vector4 min, Vector4 max)
|
|
=> Vector4.Min(Vector4.Max(value, min), max);
|
|
|
|
/// <summary>
|
|
/// Clamps the span values to the inclusive range of min and max.
|
|
/// </summary>
|
|
/// <param name="span">The span containing the values to clamp.</param>
|
|
/// <param name="min">The minimum inclusive value.</param>
|
|
/// <param name="max">The maximum inclusive value.</param>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static void Clamp(Span<byte> span, byte min, byte max)
|
|
{
|
|
Span<byte> remainder = span[ClampReduce(span, min, max)..];
|
|
|
|
if (remainder.Length > 0)
|
|
{
|
|
ref byte remainderStart = ref MemoryMarshal.GetReference(remainder);
|
|
ref byte remainderEnd = ref Unsafe.Add(ref remainderStart, (uint)remainder.Length);
|
|
|
|
while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
|
|
{
|
|
remainderStart = Clamp(remainderStart, min, max);
|
|
|
|
remainderStart = ref Unsafe.Add(ref remainderStart, 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Clamps the span values to the inclusive range of min and max.
|
|
/// </summary>
|
|
/// <param name="span">The span containing the values to clamp.</param>
|
|
/// <param name="min">The minimum inclusive value.</param>
|
|
/// <param name="max">The maximum inclusive value.</param>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static void Clamp(Span<uint> span, uint min, uint max)
|
|
{
|
|
Span<uint> remainder = span[ClampReduce(span, min, max)..];
|
|
|
|
if (remainder.Length > 0)
|
|
{
|
|
ref uint remainderStart = ref MemoryMarshal.GetReference(remainder);
|
|
ref uint remainderEnd = ref Unsafe.Add(ref remainderStart, (uint)remainder.Length);
|
|
|
|
while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
|
|
{
|
|
remainderStart = Clamp(remainderStart, min, max);
|
|
|
|
remainderStart = ref Unsafe.Add(ref remainderStart, 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Clamps the span values to the inclusive range of min and max.
|
|
/// </summary>
|
|
/// <param name="span">The span containing the values to clamp.</param>
|
|
/// <param name="min">The minimum inclusive value.</param>
|
|
/// <param name="max">The maximum inclusive value.</param>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static void Clamp(Span<int> span, int min, int max)
|
|
{
|
|
Span<int> remainder = span[ClampReduce(span, min, max)..];
|
|
|
|
if (remainder.Length > 0)
|
|
{
|
|
ref int remainderStart = ref MemoryMarshal.GetReference(remainder);
|
|
ref int remainderEnd = ref Unsafe.Add(ref remainderStart, (uint)remainder.Length);
|
|
|
|
while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
|
|
{
|
|
remainderStart = Clamp(remainderStart, min, max);
|
|
|
|
remainderStart = ref Unsafe.Add(ref remainderStart, 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Clamps the span values to the inclusive range of min and max.
|
|
/// </summary>
|
|
/// <param name="span">The span containing the values to clamp.</param>
|
|
/// <param name="min">The minimum inclusive value.</param>
|
|
/// <param name="max">The maximum inclusive value.</param>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static void Clamp(Span<float> span, float min, float max)
|
|
{
|
|
Span<float> remainder = span[ClampReduce(span, min, max)..];
|
|
|
|
if (remainder.Length > 0)
|
|
{
|
|
ref float remainderStart = ref MemoryMarshal.GetReference(remainder);
|
|
ref float remainderEnd = ref Unsafe.Add(ref remainderStart, (uint)remainder.Length);
|
|
|
|
while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
|
|
{
|
|
remainderStart = Clamp(remainderStart, min, max);
|
|
|
|
remainderStart = ref Unsafe.Add(ref remainderStart, 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Clamps the span values to the inclusive range of min and max.
|
|
/// </summary>
|
|
/// <param name="span">The span containing the values to clamp.</param>
|
|
/// <param name="min">The minimum inclusive value.</param>
|
|
/// <param name="max">The maximum inclusive value.</param>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static void Clamp(Span<double> span, double min, double max)
|
|
{
|
|
Span<double> remainder = span[ClampReduce(span, min, max)..];
|
|
|
|
if (remainder.Length > 0)
|
|
{
|
|
ref double remainderStart = ref MemoryMarshal.GetReference(remainder);
|
|
ref double remainderEnd = ref Unsafe.Add(ref remainderStart, (uint)remainder.Length);
|
|
|
|
while (Unsafe.IsAddressLessThan(ref remainderStart, ref remainderEnd))
|
|
{
|
|
remainderStart = Clamp(remainderStart, min, max);
|
|
|
|
remainderStart = ref Unsafe.Add(ref remainderStart, 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
private static int ClampReduce<T>(Span<T> span, T min, T max)
|
|
where T : unmanaged
|
|
{
|
|
if (Vector.IsHardwareAccelerated && span.Length >= Vector<T>.Count)
|
|
{
|
|
int remainder = ModuloP2(span.Length, Vector<T>.Count);
|
|
int adjustedCount = span.Length - remainder;
|
|
|
|
if (adjustedCount > 0)
|
|
{
|
|
ClampImpl(span[..adjustedCount], min, max);
|
|
}
|
|
|
|
return adjustedCount;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
private static void ClampImpl<T>(Span<T> span, T min, T max)
|
|
where T : unmanaged
|
|
{
|
|
ref T sRef = ref MemoryMarshal.GetReference(span);
|
|
Vector<T> vmin = new(min);
|
|
Vector<T> vmax = new(max);
|
|
|
|
nint n = (nint)(uint)span.Length / Vector<T>.Count;
|
|
nint m = Modulo4(n);
|
|
nint u = n - m;
|
|
|
|
ref Vector<T> vs0 = ref Unsafe.As<T, Vector<T>>(ref MemoryMarshal.GetReference(span));
|
|
ref Vector<T> vs1 = ref Unsafe.Add(ref vs0, 1);
|
|
ref Vector<T> vs2 = ref Unsafe.Add(ref vs0, 2);
|
|
ref Vector<T> vs3 = ref Unsafe.Add(ref vs0, 3);
|
|
ref Vector<T> vsEnd = ref Unsafe.Add(ref vs0, u);
|
|
|
|
while (Unsafe.IsAddressLessThan(ref vs0, ref vsEnd))
|
|
{
|
|
vs0 = Vector.Min(Vector.Max(vmin, vs0), vmax);
|
|
vs1 = Vector.Min(Vector.Max(vmin, vs1), vmax);
|
|
vs2 = Vector.Min(Vector.Max(vmin, vs2), vmax);
|
|
vs3 = Vector.Min(Vector.Max(vmin, vs3), vmax);
|
|
|
|
vs0 = ref Unsafe.Add(ref vs0, 4);
|
|
vs1 = ref Unsafe.Add(ref vs1, 4);
|
|
vs2 = ref Unsafe.Add(ref vs2, 4);
|
|
vs3 = ref Unsafe.Add(ref vs3, 4);
|
|
}
|
|
|
|
if (m > 0)
|
|
{
|
|
vs0 = ref vsEnd;
|
|
vsEnd = ref Unsafe.Add(ref vsEnd, m);
|
|
|
|
while (Unsafe.IsAddressLessThan(ref vs0, ref vsEnd))
|
|
{
|
|
vs0 = Vector.Min(Vector.Max(vmin, vs0), vmax);
|
|
|
|
vs0 = ref Unsafe.Add(ref vs0, 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Pre-multiplies the "x", "y", "z" components of a vector by its "w" component leaving the "w" component intact.
|
|
/// </summary>
|
|
/// <param name="source">The <see cref="Vector4"/> to premultiply</param>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static void Premultiply(ref Vector4 source)
|
|
{
|
|
// Load into a local variable to prevent accessing the source from memory multiple times.
|
|
Vector4 src = source;
|
|
Vector4 alpha = PermuteW(src);
|
|
source = WithW(src * alpha, alpha);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Bulk variant of <see cref="Premultiply(ref Vector4)"/>
|
|
/// </summary>
|
|
/// <param name="vectors">The span of vectors</param>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static void Premultiply(Span<Vector4> vectors)
|
|
{
|
|
if (Avx.IsSupported && vectors.Length >= 2)
|
|
{
|
|
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float>
|
|
ref Vector256<float> vectorsBase = ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(vectors));
|
|
ref Vector256<float> vectorsLast = ref Unsafe.Add(ref vectorsBase, (uint)vectors.Length / 2u);
|
|
|
|
while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast))
|
|
{
|
|
Vector256<float> source = vectorsBase;
|
|
Vector256<float> alpha = Avx.Permute(source, ShuffleAlphaControl);
|
|
vectorsBase = Avx.Blend(Avx.Multiply(source, alpha), source, BlendAlphaControl);
|
|
vectorsBase = ref Unsafe.Add(ref vectorsBase, 1);
|
|
}
|
|
|
|
if (Modulo2(vectors.Length) != 0)
|
|
{
|
|
// Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
|
|
Premultiply(ref MemoryMarshal.GetReference(vectors[^1..]));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors);
|
|
ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, (uint)vectors.Length);
|
|
|
|
while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd))
|
|
{
|
|
Premultiply(ref vectorsStart);
|
|
|
|
vectorsStart = ref Unsafe.Add(ref vectorsStart, 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Reverses the result of premultiplying a vector via <see cref="Premultiply(ref Vector4)"/>.
|
|
/// </summary>
|
|
/// <param name="source">The <see cref="Vector4"/> to premultiply</param>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static void UnPremultiply(ref Vector4 source)
|
|
{
|
|
Vector4 alpha = PermuteW(source);
|
|
UnPremultiply(ref source, alpha);
|
|
}
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static void UnPremultiply(ref Vector4 source, Vector4 alpha)
|
|
{
|
|
if (alpha == Vector4.Zero)
|
|
{
|
|
return;
|
|
}
|
|
|
|
// Divide source by alpha if alpha is nonzero, otherwise set all components to match the source value
|
|
// Blend the result with the alpha vector to ensure that the alpha component is unchanged
|
|
source = WithW(source / alpha, alpha);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Bulk variant of <see cref="UnPremultiply(ref Vector4)"/>
|
|
/// </summary>
|
|
/// <param name="vectors">The span of vectors</param>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static void UnPremultiply(Span<Vector4> vectors)
|
|
{
|
|
if (Avx.IsSupported && vectors.Length >= 2)
|
|
{
|
|
// Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float>
|
|
ref Vector256<float> vectorsBase = ref Unsafe.As<Vector4, Vector256<float>>(ref MemoryMarshal.GetReference(vectors));
|
|
ref Vector256<float> vectorsLast = ref Unsafe.Add(ref vectorsBase, (uint)vectors.Length / 2u);
|
|
Vector256<float> epsilon = Vector256.Create(Constants.Epsilon);
|
|
|
|
while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast))
|
|
{
|
|
Vector256<float> source = vectorsBase;
|
|
Vector256<float> alpha = Avx.Permute(source, ShuffleAlphaControl);
|
|
vectorsBase = UnPremultiply(source, alpha);
|
|
vectorsBase = ref Unsafe.Add(ref vectorsBase, 1);
|
|
}
|
|
|
|
if (Modulo2(vectors.Length) != 0)
|
|
{
|
|
// Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
|
|
UnPremultiply(ref MemoryMarshal.GetReference(vectors[^1..]));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ref Vector4 vectorsStart = ref MemoryMarshal.GetReference(vectors);
|
|
ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsStart, (uint)vectors.Length);
|
|
|
|
while (Unsafe.IsAddressLessThan(ref vectorsStart, ref vectorsEnd))
|
|
{
|
|
UnPremultiply(ref vectorsStart);
|
|
|
|
vectorsStart = ref Unsafe.Add(ref vectorsStart, 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static Vector256<float> UnPremultiply(Vector256<float> source, Vector256<float> alpha)
|
|
{
|
|
// Check if alpha is zero to avoid division by zero
|
|
Vector256<float> zeroMask = Avx.CompareEqual(alpha, Vector256<float>.Zero);
|
|
|
|
// Divide source by alpha if alpha is nonzero, otherwise set all components to match the source value
|
|
Vector256<float> result = Avx.BlendVariable(Avx.Divide(source, alpha), source, zeroMask);
|
|
|
|
// Blend the result with the alpha vector to ensure that the alpha component is unchanged
|
|
return Avx.Blend(result, alpha, BlendAlphaControl);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Permutes the given vector return a new instance with all the values set to <see cref="Vector4.W"/>.
|
|
/// </summary>
|
|
/// <param name="value">The vector.</param>
|
|
/// <returns>The <see cref="Vector4"/>.</returns>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static Vector4 PermuteW(Vector4 value)
|
|
{
|
|
if (Sse.IsSupported)
|
|
{
|
|
return Sse.Shuffle(value.AsVector128(), value.AsVector128(), ShuffleAlphaControl).AsVector4();
|
|
}
|
|
|
|
return new Vector4(value.W);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Sets the W component of the given vector <paramref name="value"/> to the given value from <paramref name="w"/>.
|
|
/// </summary>
|
|
/// <param name="value">The vector to set.</param>
|
|
/// <param name="w">The vector containing the W value.</param>
|
|
/// <returns>The <see cref="Vector4"/>.</returns>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static Vector4 WithW(Vector4 value, Vector4 w)
|
|
{
|
|
if (Sse41.IsSupported)
|
|
{
|
|
return Sse41.Insert(value.AsVector128(), w.AsVector128(), 0b11_11_0000).AsVector4();
|
|
}
|
|
|
|
if (Sse.IsSupported)
|
|
{
|
|
// Create tmp as <w[3], w[0], value[2], value[0]>
|
|
// Then return <value[0], value[1], tmp[2], tmp[0]> (which is <value[0], value[1], value[2], w[3]>)
|
|
Vector128<float> tmp = Sse.Shuffle(w.AsVector128(), value.AsVector128(), 0b00_10_00_11);
|
|
return Sse.Shuffle(value.AsVector128(), tmp, 0b00_10_01_00).AsVector4();
|
|
}
|
|
|
|
value.W = w.W;
|
|
return value;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Calculates the cube pow of all the XYZ channels of the input vectors.
|
|
/// </summary>
|
|
/// <param name="vectors">The span of vectors</param>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static unsafe void CubePowOnXYZ(Span<Vector4> vectors)
|
|
{
|
|
ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
|
|
ref Vector4 endRef = ref Unsafe.Add(ref baseRef, (uint)vectors.Length);
|
|
|
|
while (Unsafe.IsAddressLessThan(ref baseRef, ref endRef))
|
|
{
|
|
Vector4 v = baseRef;
|
|
Vector4 a = PermuteW(v);
|
|
|
|
// Fast path for the default gamma exposure, which is 3. In this case we can skip
|
|
// calling Math.Pow 3 times (one per component), as the method is an internal call and
|
|
// introduces quite a bit of overhead. Instead, we can just manually multiply the whole
|
|
// pixel in Vector4 format 3 times, and then restore the alpha channel before copying it
|
|
// back to the target index in the temporary span. The whole iteration will get completely
|
|
// inlined and traslated into vectorized instructions, with much better performance.
|
|
v = v * v * v;
|
|
v = WithW(v, a);
|
|
|
|
baseRef = v;
|
|
baseRef = ref Unsafe.Add(ref baseRef, 1);
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Calculates the cube root of all the XYZ channels of the input vectors.
|
|
/// </summary>
|
|
/// <param name="vectors">The span of vectors</param>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static unsafe void CubeRootOnXYZ(Span<Vector4> vectors)
|
|
{
|
|
if (Sse41.IsSupported)
|
|
{
|
|
ref Vector128<float> vectors128Ref = ref Unsafe.As<Vector4, Vector128<float>>(ref MemoryMarshal.GetReference(vectors));
|
|
ref Vector128<float> vectors128End = ref Unsafe.Add(ref vectors128Ref, (uint)vectors.Length);
|
|
|
|
Vector128<int> v128_341 = Vector128.Create(341);
|
|
Vector128<int> v128_negativeZero = Vector128.Create(-0.0f).AsInt32();
|
|
Vector128<int> v128_one = Vector128.Create(1.0f).AsInt32();
|
|
|
|
Vector128<float> v128_13rd = Vector128.Create(1 / 3f);
|
|
Vector128<float> v128_23rds = Vector128.Create(2 / 3f);
|
|
|
|
while (Unsafe.IsAddressLessThan(ref vectors128Ref, ref vectors128End))
|
|
{
|
|
Vector128<float> vecx = vectors128Ref;
|
|
Vector128<int> veax = vecx.AsInt32();
|
|
|
|
// If we can use SSE41 instructions, we can vectorize the entire cube root calculation, and also execute it
|
|
// directly on 32 bit floating point values. What follows is a vectorized implementation of this method:
|
|
// https://www.musicdsp.org/en/latest/Other/206-fast-cube-root-square-root-and-reciprocal-for-x86-sse-cpus.html.
|
|
// Furthermore, after the initial setup in vectorized form, we're doing two Newton approximations here
|
|
// using a different succession (the same used below), which should be less unstable due to not having cube pow.
|
|
veax = Sse2.AndNot(v128_negativeZero, veax);
|
|
veax = Sse2.Subtract(veax, v128_one);
|
|
veax = Sse2.ShiftRightArithmetic(veax, 10);
|
|
veax = Sse41.MultiplyLow(veax, v128_341);
|
|
veax = Sse2.Add(veax, v128_one);
|
|
veax = Sse2.AndNot(v128_negativeZero, veax);
|
|
veax = Sse2.Or(veax, Sse2.And(vecx.AsInt32(), v128_negativeZero));
|
|
|
|
Vector128<float> y4 = veax.AsSingle();
|
|
|
|
if (Fma.IsSupported)
|
|
{
|
|
y4 = Fma.MultiplyAdd(v128_23rds, y4, Sse.Multiply(v128_13rd, Sse.Divide(vecx, Sse.Multiply(y4, y4))));
|
|
y4 = Fma.MultiplyAdd(v128_23rds, y4, Sse.Multiply(v128_13rd, Sse.Divide(vecx, Sse.Multiply(y4, y4))));
|
|
}
|
|
else
|
|
{
|
|
y4 = Sse.Add(Sse.Multiply(v128_23rds, y4), Sse.Multiply(v128_13rd, Sse.Divide(vecx, Sse.Multiply(y4, y4))));
|
|
y4 = Sse.Add(Sse.Multiply(v128_23rds, y4), Sse.Multiply(v128_13rd, Sse.Divide(vecx, Sse.Multiply(y4, y4))));
|
|
}
|
|
|
|
y4 = Sse41.Insert(y4, vecx, 0xF0);
|
|
|
|
vectors128Ref = y4;
|
|
vectors128Ref = ref Unsafe.Add(ref vectors128Ref, 1);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ref Vector4 vectorsRef = ref MemoryMarshal.GetReference(vectors);
|
|
ref Vector4 vectorsEnd = ref Unsafe.Add(ref vectorsRef, (uint)vectors.Length);
|
|
|
|
// Fallback with scalar preprocessing and vectorized approximation steps
|
|
while (Unsafe.IsAddressLessThan(ref vectorsRef, ref vectorsEnd))
|
|
{
|
|
Vector4 v = vectorsRef;
|
|
|
|
double
|
|
x64 = v.X,
|
|
y64 = v.Y,
|
|
z64 = v.Z;
|
|
float a = v.W;
|
|
|
|
ulong
|
|
xl = *(ulong*)&x64,
|
|
yl = *(ulong*)&y64,
|
|
zl = *(ulong*)&z64;
|
|
|
|
// Here we use a trick to compute the starting value x0 for the cube root. This is because doing
|
|
// pow(x, 1 / gamma) is the same as the gamma-th root of x, and since gamme is 3 in this case,
|
|
// this means what we actually want is to find the cube root of our clamped values.
|
|
// For more info on the constant below, see:
|
|
// https://community.intel.com/t5/Intel-C-Compiler/Fast-approximate-of-transcendental-operations/td-p/1044543.
|
|
// Here we perform the same trick on all RGB channels separately to help the CPU execute them in paralle, and
|
|
// store the alpha channel to preserve it. Then we set these values to the fields of a temporary 128-bit
|
|
// register, and use it to accelerate two steps of the Newton approximation using SIMD.
|
|
xl = 0x2a9f8a7be393b600 + (xl / 3);
|
|
yl = 0x2a9f8a7be393b600 + (yl / 3);
|
|
zl = 0x2a9f8a7be393b600 + (zl / 3);
|
|
|
|
Vector4 y4;
|
|
y4.X = (float)*(double*)&xl;
|
|
y4.Y = (float)*(double*)&yl;
|
|
y4.Z = (float)*(double*)&zl;
|
|
y4.W = 0;
|
|
|
|
y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4)));
|
|
y4 = (2 / 3f * y4) + (1 / 3f * (v / (y4 * y4)));
|
|
y4.W = a;
|
|
|
|
vectorsRef = y4;
|
|
vectorsRef = ref Unsafe.Add(ref vectorsRef, 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Performs a linear interpolation between two values based on the given weighting.
|
|
/// </summary>
|
|
/// <param name="value1">The first value.</param>
|
|
/// <param name="value2">The second value.</param>
|
|
/// <param name="amount">Values between 0 and 1 that indicates the weight of <paramref name="value2"/>.</param>
|
|
/// <returns>The <see cref="Vector256{Single}"/>.</returns>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static Vector256<float> Lerp(
|
|
in Vector256<float> value1,
|
|
in Vector256<float> value2,
|
|
in Vector256<float> amount)
|
|
{
|
|
Vector256<float> diff = Avx.Subtract(value2, value1);
|
|
if (Fma.IsSupported)
|
|
{
|
|
return Fma.MultiplyAdd(diff, amount, value1);
|
|
}
|
|
else
|
|
{
|
|
return Avx.Add(Avx.Multiply(diff, amount), value1);
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Performs a linear interpolation between two values based on the given weighting.
|
|
/// </summary>
|
|
/// <param name="value1">The first value.</param>
|
|
/// <param name="value2">The second value.</param>
|
|
/// <param name="amount">A value between 0 and 1 that indicates the weight of <paramref name="value2"/>.</param>
|
|
/// <returns>The <see cref="float"/>.</returns>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static float Lerp(float value1, float value2, float amount)
|
|
=> ((value2 - value1) * amount) + value1;
|
|
|
|
/// <summary>
|
|
/// Accumulates 8-bit integers into <paramref name="accumulator"/> by
|
|
/// widening them to 32-bit integers and performing four additions.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// <c>byte(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)</c>
|
|
/// is widened and added onto <paramref name="accumulator"/> as such:
|
|
/// <code>
|
|
/// accumulator += i32(1, 2, 3, 4);
|
|
/// accumulator += i32(5, 6, 7, 8);
|
|
/// accumulator += i32(9, 10, 11, 12);
|
|
/// accumulator += i32(13, 14, 15, 16);
|
|
/// </code>
|
|
/// </remarks>
|
|
/// <param name="accumulator">The accumulator destination.</param>
|
|
/// <param name="values">The values to accumulate.</param>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static void Accumulate(ref Vector<uint> accumulator, Vector<byte> values)
|
|
{
|
|
Vector.Widen(values, out Vector<ushort> shortLow, out Vector<ushort> shortHigh);
|
|
|
|
Vector.Widen(shortLow, out Vector<uint> intLow, out Vector<uint> intHigh);
|
|
accumulator += intLow;
|
|
accumulator += intHigh;
|
|
|
|
Vector.Widen(shortHigh, out intLow, out intHigh);
|
|
accumulator += intLow;
|
|
accumulator += intHigh;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Reduces elements of the vector into one sum.
|
|
/// </summary>
|
|
/// <param name="accumulator">The accumulator to reduce.</param>
|
|
/// <returns>The sum of all elements.</returns>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static int ReduceSum(Vector256<int> accumulator)
|
|
{
|
|
// Add upper lane to lower lane.
|
|
Vector128<int> vsum = Sse2.Add(accumulator.GetLower(), accumulator.GetUpper());
|
|
|
|
// Add odd to even.
|
|
vsum = Sse2.Add(vsum, Sse2.Shuffle(vsum, 0b_11_11_01_01));
|
|
|
|
// Add high to low.
|
|
vsum = Sse2.Add(vsum, Sse2.Shuffle(vsum, 0b_11_10_11_10));
|
|
|
|
return Sse2.ConvertToInt32(vsum);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Reduces even elements of the vector into one sum.
|
|
/// </summary>
|
|
/// <param name="accumulator">The accumulator to reduce.</param>
|
|
/// <returns>The sum of even elements.</returns>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static int EvenReduceSum(Vector128<int> accumulator)
|
|
{
|
|
// Add high to low.
|
|
Vector128<int> vsum = Sse2.Add(accumulator, Sse2.Shuffle(accumulator, 0b_11_10_11_10));
|
|
|
|
return Sse2.ConvertToInt32(vsum);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Reduces even elements of the vector into one sum.
|
|
/// </summary>
|
|
/// <param name="accumulator">The accumulator to reduce.</param>
|
|
/// <returns>The sum of even elements.</returns>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static int EvenReduceSum(Vector256<int> accumulator)
|
|
{
|
|
Vector128<int> vsum = Sse2.Add(accumulator.GetLower(), accumulator.GetUpper()); // add upper lane to lower lane
|
|
vsum = Sse2.Add(vsum, Sse2.Shuffle(vsum, 0b_11_10_11_10)); // add high to low
|
|
|
|
// Vector128<int>.ToScalar() isn't optimized pre-net5.0 https://github.com/dotnet/runtime/pull/37882
|
|
return Sse2.ConvertToInt32(vsum);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Fast division with ceiling for <see cref="uint"/> numbers.
|
|
/// </summary>
|
|
/// <param name="value">Divident value.</param>
|
|
/// <param name="divisor">Divisor value.</param>
|
|
/// <returns>Ceiled division result.</returns>
|
|
public static uint DivideCeil(uint value, uint divisor) => (value + divisor - 1) / divisor;
|
|
|
|
/// <summary>
|
|
/// Tells whether input value is outside of the given range.
|
|
/// </summary>
|
|
/// <param name="value">Value.</param>
|
|
/// <param name="min">Minimum value, inclusive.</param>
|
|
/// <param name="max">Maximum value, inclusive.</param>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static bool IsOutOfRange(int value, int min, int max)
|
|
=> (uint)(value - min) > (uint)(max - min);
|
|
|
|
/// <summary>
|
|
/// Gets the count of vectors that safely fit into the given span.
|
|
/// </summary>
|
|
/// <typeparam name="TVector">The type of the vector.</typeparam>
|
|
/// <param name="span">The given span.</param>
|
|
/// <returns>Count of vectors that safely fit into the span.</returns>
|
|
public static nuint VectorCount<TVector>(this Span<byte> span)
|
|
where TVector : struct
|
|
=> (uint)span.Length / (uint)Vector<TVector>.Count;
|
|
|
|
/// <summary>
|
|
/// Gets the count of vectors that safely fit into the given span.
|
|
/// </summary>
|
|
/// <typeparam name="TVector">The type of the vector.</typeparam>
|
|
/// <param name="span">The given span.</param>
|
|
/// <returns>Count of vectors that safely fit into the span.</returns>
|
|
public static nuint Vector128Count<TVector>(this Span<byte> span)
|
|
where TVector : struct
|
|
=> (uint)span.Length / (uint)Vector128<TVector>.Count;
|
|
|
|
/// <summary>
|
|
/// Gets the count of vectors that safely fit into the given span.
|
|
/// </summary>
|
|
/// <typeparam name="TVector">The type of the vector.</typeparam>
|
|
/// <param name="span">The given span.</param>
|
|
/// <returns>Count of vectors that safely fit into the span.</returns>
|
|
public static nuint Vector128Count<TVector>(this ReadOnlySpan<byte> span)
|
|
where TVector : struct
|
|
=> (uint)span.Length / (uint)Vector128<TVector>.Count;
|
|
|
|
/// <summary>
|
|
/// Gets the count of vectors that safely fit into the given span.
|
|
/// </summary>
|
|
/// <typeparam name="TVector">The type of the vector.</typeparam>
|
|
/// <param name="span">The given span.</param>
|
|
/// <returns>Count of vectors that safely fit into the span.</returns>
|
|
public static nuint Vector256Count<TVector>(this Span<byte> span)
|
|
where TVector : struct
|
|
=> (uint)span.Length / (uint)Vector256<TVector>.Count;
|
|
|
|
/// <summary>
|
|
/// Gets the count of vectors that safely fit into the given span.
|
|
/// </summary>
|
|
/// <typeparam name="TVector">The type of the vector.</typeparam>
|
|
/// <param name="span">The given span.</param>
|
|
/// <returns>Count of vectors that safely fit into the span.</returns>
|
|
public static nuint Vector256Count<TVector>(this ReadOnlySpan<byte> span)
|
|
where TVector : struct
|
|
=> (uint)span.Length / (uint)Vector256<TVector>.Count;
|
|
|
|
/// <summary>
|
|
/// Gets the count of vectors that safely fit into the given span.
|
|
/// </summary>
|
|
/// <typeparam name="TVector">The type of the vector.</typeparam>
|
|
/// <param name="span">The given span.</param>
|
|
/// <returns>Count of vectors that safely fit into the span.</returns>
|
|
public static nuint Vector512Count<TVector>(this Span<byte> span)
|
|
where TVector : struct
|
|
=> (uint)span.Length / (uint)Vector512<TVector>.Count;
|
|
|
|
/// <summary>
|
|
/// Gets the count of vectors that safely fit into the given span.
|
|
/// </summary>
|
|
/// <typeparam name="TVector">The type of the vector.</typeparam>
|
|
/// <param name="span">The given span.</param>
|
|
/// <returns>Count of vectors that safely fit into the span.</returns>
|
|
public static nuint Vector512Count<TVector>(this ReadOnlySpan<byte> span)
|
|
where TVector : struct
|
|
=> (uint)span.Length / (uint)Vector512<TVector>.Count;
|
|
|
|
/// <summary>
|
|
/// Gets the count of vectors that safely fit into the given span.
|
|
/// </summary>
|
|
/// <typeparam name="TVector">The type of the vector.</typeparam>
|
|
/// <param name="span">The given span.</param>
|
|
/// <returns>Count of vectors that safely fit into the span.</returns>
|
|
public static nuint VectorCount<TVector>(this Span<float> span)
|
|
where TVector : struct
|
|
=> (uint)span.Length / (uint)Vector<TVector>.Count;
|
|
|
|
/// <summary>
|
|
/// Gets the count of vectors that safely fit into the given span.
|
|
/// </summary>
|
|
/// <typeparam name="TVector">The type of the vector.</typeparam>
|
|
/// <param name="span">The given span.</param>
|
|
/// <returns>Count of vectors that safely fit into the span.</returns>
|
|
public static nuint Vector128Count<TVector>(this Span<float> span)
|
|
where TVector : struct
|
|
=> (uint)span.Length / (uint)Vector128<TVector>.Count;
|
|
|
|
/// <summary>
|
|
/// Gets the count of vectors that safely fit into the given span.
|
|
/// </summary>
|
|
/// <typeparam name="TVector">The type of the vector.</typeparam>
|
|
/// <param name="span">The given span.</param>
|
|
/// <returns>Count of vectors that safely fit into the span.</returns>
|
|
public static nuint Vector256Count<TVector>(this Span<float> span)
|
|
where TVector : struct
|
|
=> (uint)span.Length / (uint)Vector256<TVector>.Count;
|
|
|
|
/// <summary>
|
|
/// Gets the count of vectors that safely fit into length.
|
|
/// </summary>
|
|
/// <typeparam name="TVector">The type of the vector.</typeparam>
|
|
/// <param name="length">The given length.</param>
|
|
/// <returns>Count of vectors that safely fit into the length.</returns>
|
|
public static nuint Vector256Count<TVector>(int length)
|
|
where TVector : struct
|
|
=> (uint)length / (uint)Vector256<TVector>.Count;
|
|
|
|
/// <summary>
|
|
/// Gets the count of vectors that safely fit into the given span.
|
|
/// </summary>
|
|
/// <typeparam name="TVector">The type of the vector.</typeparam>
|
|
/// <param name="span">The given span.</param>
|
|
/// <returns>Count of vectors that safely fit into the span.</returns>
|
|
public static nuint Vector512Count<TVector>(this Span<float> span)
|
|
where TVector : struct
|
|
=> (uint)span.Length / (uint)Vector512<TVector>.Count;
|
|
|
|
/// <summary>
|
|
/// Gets the count of vectors that safely fit into length.
|
|
/// </summary>
|
|
/// <typeparam name="TVector">The type of the vector.</typeparam>
|
|
/// <param name="length">The given length.</param>
|
|
/// <returns>Count of vectors that safely fit into the length.</returns>
|
|
public static nuint Vector512Count<TVector>(int length)
|
|
where TVector : struct
|
|
=> (uint)length / (uint)Vector512<TVector>.Count;
|
|
|
|
/// <summary>
|
|
/// Normalizes the values in a given <see cref="Span{T}"/>.
|
|
/// </summary>
|
|
/// <param name="span">The sequence of <see cref="float"/> values to normalize.</param>
|
|
/// <param name="sum">The sum of the values in <paramref name="span"/>.</param>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static void Normalize(Span<float> span, float sum)
|
|
{
|
|
if (Vector256.IsHardwareAccelerated)
|
|
{
|
|
ref float startRef = ref MemoryMarshal.GetReference(span);
|
|
ref float endRef = ref Unsafe.Add(ref startRef, span.Length & ~7);
|
|
Vector256<float> sum256 = Vector256.Create(sum);
|
|
|
|
while (Unsafe.IsAddressLessThan(ref startRef, ref endRef))
|
|
{
|
|
Unsafe.As<float, Vector256<float>>(ref startRef) /= sum256;
|
|
startRef = ref Unsafe.Add(ref startRef, (nuint)8);
|
|
}
|
|
|
|
if ((span.Length & 7) >= 4)
|
|
{
|
|
Unsafe.As<float, Vector128<float>>(ref startRef) /= sum256.GetLower();
|
|
startRef = ref Unsafe.Add(ref startRef, (nuint)4);
|
|
}
|
|
|
|
endRef = ref Unsafe.Add(ref startRef, span.Length & 3);
|
|
|
|
while (Unsafe.IsAddressLessThan(ref startRef, ref endRef))
|
|
{
|
|
startRef /= sum;
|
|
startRef = ref Unsafe.Add(ref startRef, (nuint)1);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (int i = 0; i < span.Length; i++)
|
|
{
|
|
span[i] /= sum;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|