Browse Source

Add XPlat V128 SubtractSaturate

pull/2933/head
James Jackson-South 9 months ago
parent
commit
cfad39b8fb
  1. 143
      src/ImageSharp/Common/Helpers/Vector128Utilities.cs

143
src/ImageSharp/Common/Helpers/Vector128Utilities.cs

@ -402,40 +402,6 @@ internal static class Vector128_
public static Vector128<T> Clamp<T>(Vector128<T> value, Vector128<T> min, Vector128<T> max)
=> Vector128.Min(Vector128.Max(value, min), max);
/// <summary>
/// Multiply the packed 16-bit integers in <paramref name="left"/> and <paramref name="right"/>, producing
/// intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in the result.
/// </summary>
/// <param name="left">
/// The first vector containing packed 16-bit integers to multiply.
/// </param>
/// <param name="right">
/// The second vector containing packed 16-bit integers to multiply.
/// </param>
/// <returns>
/// A vector containing the low 16 bits of the products of the packed 16-bit integers
/// from <paramref name="left"/> and <paramref name="right"/>.
/// </returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<short> MultiplyLow(Vector128<short> left, Vector128<short> right)
{
if (Sse2.IsSupported)
{
return Sse2.MultiplyLow(left, right);
}
// Widen each half of the short vectors into two int vectors
(Vector128<int> leftLower, Vector128<int> leftUpper) = Vector128.Widen(left);
(Vector128<int> rightLower, Vector128<int> rightUpper) = Vector128.Widen(right);
// Elementwise multiply: each int lane now holds the full 32-bit product
Vector128<int> prodLo = leftLower * rightLower;
Vector128<int> prodHi = leftUpper * rightUpper;
// Narrow the two int vectors back into one short vector
return Vector128.Narrow(prodLo, prodHi);
}
/// <summary>
/// Multiply packed signed 16-bit integers in <paramref name="left"/> and <paramref name="right"/>, producing
/// intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and
@ -450,6 +416,7 @@ internal static class Vector128_
/// <returns>
/// A vector containing the results of multiplying and adding adjacent pairs of packed signed 16-bit integers
/// </returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<int> MultiplyAddAdjacent(Vector128<short> left, Vector128<short> right)
{
if (Sse2.IsSupported)
@ -470,12 +437,12 @@ internal static class Vector128_
{
// Widen each half of the short vectors into two int vectors
(Vector128<int> leftLower, Vector128<int> leftUpper) = Vector128.Widen(left);
(Vector128<int> rightLower, Vector128<int> rightUpper) = Vector128.Widen(right);
(Vector128<int> leftLo, Vector128<int> leftHi) = Vector128.Widen(left);
(Vector128<int> rightLo, Vector128<int> rightHi) = Vector128.Widen(right);
// Elementwise multiply: each int lane now holds the full 32-bit product
Vector128<int> prodLo = leftLower * rightLower;
Vector128<int> prodHi = leftUpper * rightUpper;
Vector128<int> prodLo = leftLo * rightLo;
Vector128<int> prodHi = leftHi * rightHi;
// Extract the low and high parts of the products shuffling them to form a result we can add together.
// Use out-of-bounds to zero out the unused lanes.
@ -488,6 +455,40 @@ internal static class Vector128_
}
}
/// <summary>
/// Multiply the packed 16-bit integers in <paramref name="left"/> and <paramref name="right"/>, producing
/// intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in the result.
/// </summary>
/// <param name="left">
/// The first vector containing packed 16-bit integers to multiply.
/// </param>
/// <param name="right">
/// The second vector containing packed 16-bit integers to multiply.
/// </param>
/// <returns>
/// A vector containing the low 16 bits of the products of the packed 16-bit integers
/// from <paramref name="left"/> and <paramref name="right"/>.
/// </returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<short> MultiplyLow(Vector128<short> left, Vector128<short> right)
{
if (Sse2.IsSupported)
{
return Sse2.MultiplyLow(left, right);
}
// Widen each half of the short vectors into two int vectors
(Vector128<int> leftLo, Vector128<int> leftHi) = Vector128.Widen(left);
(Vector128<int> rightLo, Vector128<int> rightHi) = Vector128.Widen(right);
// Elementwise multiply: each int lane now holds the full 32-bit product
Vector128<int> prodLo = leftLo * rightLo;
Vector128<int> prodHi = leftHi * rightHi;
// Narrow the two int vectors back into one short vector
return Vector128.Narrow(prodLo, prodHi);
}
/// <summary>
/// Multiply the packed 16-bit integers in <paramref name="left"/> and <paramref name="right"/>, producing
/// intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in the result.
@ -511,12 +512,12 @@ internal static class Vector128_
}
// Widen each half of the short vectors into two int vectors
(Vector128<int> leftLower, Vector128<int> leftUpper) = Vector128.Widen(left);
(Vector128<int> rightLower, Vector128<int> rightUpper) = Vector128.Widen(right);
(Vector128<int> leftLo, Vector128<int> leftHi) = Vector128.Widen(left);
(Vector128<int> rightLo, Vector128<int> rightHi) = Vector128.Widen(right);
// Elementwise multiply: each int lane now holds the full 32-bit product
Vector128<int> prodLo = leftLower * rightLower;
Vector128<int> prodHi = leftUpper * rightUpper;
Vector128<int> prodLo = leftLo * rightLo;
Vector128<int> prodHi = leftHi * rightHi;
// Arithmetic shift right by 16 bits to extract the high word
prodLo >>= 16;
@ -540,6 +541,7 @@ internal static class Vector128_
/// A vector containing the unpacked and interleaved 64-bit integers from the high
/// halves of <paramref name="left"/> and <paramref name="right"/>.
/// </returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<long> UnpackHigh(Vector128<long> left, Vector128<long> right)
{
if (Sse2.IsSupported)
@ -569,6 +571,7 @@ internal static class Vector128_
/// A vector containing the unpacked and interleaved 64-bit integers from the low
/// halves of <paramref name="left"/> and <paramref name="right"/>.
/// </returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<long> UnpackLow(Vector128<long> left, Vector128<long> right)
{
if (Sse2.IsSupported)
@ -598,6 +601,7 @@ internal static class Vector128_
/// A vector containing the unpacked and interleaved 32-bit integers from the high
/// halves of <paramref name="left"/> and <paramref name="right"/>.
/// </returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<int> UnpackHigh(Vector128<int> left, Vector128<int> right)
{
if (Sse2.IsSupported)
@ -628,6 +632,7 @@ internal static class Vector128_
/// A vector containing the unpacked and interleaved 32-bit integers from the low
/// halves of <paramref name="left"/> and <paramref name="right"/>.
/// </returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<int> UnpackLow(Vector128<int> left, Vector128<int> right)
{
if (Sse2.IsSupported)
@ -658,6 +663,7 @@ internal static class Vector128_
/// A vector containing the unpacked and interleaved 16-bit integers from the high
/// halves of <paramref name="left"/> and <paramref name="right"/>.
/// </returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<short> UnpackHigh(Vector128<short> left, Vector128<short> right)
{
if (Sse2.IsSupported)
@ -688,6 +694,7 @@ internal static class Vector128_
/// A vector containing the unpacked and interleaved 16-bit integers from the low
/// halves of <paramref name="left"/> and <paramref name="right"/>.
/// </returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<short> UnpackLow(Vector128<short> left, Vector128<short> right)
{
if (Sse2.IsSupported)
@ -718,6 +725,7 @@ internal static class Vector128_
/// A vector containing the unpacked and interleaved 8-bit integers from the low
/// halves of <paramref name="left"/> and <paramref name="right"/>.
/// </returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<byte> UnpackLow(Vector128<byte> left, Vector128<byte> right)
{
if (Sse2.IsSupported)
@ -736,6 +744,57 @@ internal static class Vector128_
Vector128.Shuffle(unpacked, Vector128.Create((byte)0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15)));
}
/// <summary>
/// Subtract packed unsigned 8-bit integers in <paramref name="right"/> from packed unsigned 8-bit integers
/// in <paramref name="left"/> using saturation, and store the results.
/// </summary>
/// <param name="left">
/// The first vector containing packed unsigned 8-bit integers to subtract from.
/// </param>
/// <param name="right">
/// The second vector containing packed unsigned 8-bit integers to subtract.
/// </param>
/// <returns>
/// A vector containing the results of subtracting packed unsigned 8-bit integers
/// </returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<byte> SubtractSaturate(Vector128<byte> left, Vector128<byte> right)
{
if (Sse2.IsSupported)
{
return Sse2.SubtractSaturate(left, right);
}
if (AdvSimd.IsSupported)
{
return AdvSimd.SubtractSaturate(left, right);
}
if (PackedSimd.IsSupported)
{
return PackedSimd.SubtractSaturate(left, right);
}
// Widen inputs to 16-bit to safely compute unsigned differences without underflow
(Vector128<ushort> leftLo, Vector128<ushort> leftHi) = Vector128.Widen(left);
(Vector128<ushort> rightLo, Vector128<ushort> rightHi) = Vector128.Widen(right);
// Subtract
Vector128<ushort> diffLo = leftLo - rightLo;
Vector128<ushort> diffHi = leftHi - rightHi;
// Mask lanes where left >= right to preserve the result
// All other lanes are zeroed (saturate to 0)
Vector128<ushort> maskLo = Vector128.GreaterThanOrEqual(leftLo, rightLo).AsUInt16();
Vector128<ushort> maskHi = Vector128.GreaterThanOrEqual(leftHi, rightHi).AsUInt16();
diffLo &= maskLo;
diffHi &= maskHi;
// Narrow back to bytes
return Vector128.Narrow(diffLo, diffHi);
}
[DoesNotReturn]
private static void ThrowUnreachableException() => throw new UnreachableException();
}

Loading…
Cancel
Save