Browse Source

Update based on feedback

pull/2933/head
James Jackson-South 11 months ago
parent
commit
362707343f
  1. 15
      src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
  2. 276
      src/ImageSharp/Common/Helpers/Vector128Utilities.cs
  3. 29
      src/ImageSharp/Common/Helpers/Vector256Utilities.cs
  4. 7
      src/ImageSharp/Formats/Webp/Lossless/ColorSpaceTransformUtils.cs
  5. 7
      src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs
  6. 8
      src/ImageSharp/Formats/Webp/WebpCommonUtils.cs

15
src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs

@ -375,6 +375,11 @@ internal static partial class SimdUtils
}
else if (Vector256.IsHardwareAccelerated)
{
// ShufflePerLane performs per-128-bit-lane shuffling using Avx2.Shuffle (vpshufb).
// MMShuffleSpan generates indices in the range [0, 31] and never sets bit 7 in any byte,
// so the shuffle will not zero elements. Because vpshufb uses only the low 4 bits (b[i] & 0x0F)
// for indexing within each lane, and ignores the upper bits unless bit 7 is set,
// this usage is guaranteed to remain within-lane and non-zeroing.
Span<byte> temp = stackalloc byte[Vector256<byte>.Count];
Shuffle.MMShuffleSpan(ref temp, control);
Vector256<byte> mask = Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(temp));
@ -391,17 +396,17 @@ internal static partial class SimdUtils
ref Vector256<byte> vs0 = ref Unsafe.Add(ref sourceBase, i);
ref Vector256<byte> vd0 = ref Unsafe.Add(ref destinationBase, i);
vd0 = Vector256_.ShuffleNative(vs0, mask);
Unsafe.Add(ref vd0, (nuint)1) = Vector256_.ShuffleNative(Unsafe.Add(ref vs0, (nuint)1), mask);
Unsafe.Add(ref vd0, (nuint)2) = Vector256_.ShuffleNative(Unsafe.Add(ref vs0, (nuint)2), mask);
Unsafe.Add(ref vd0, (nuint)3) = Vector256_.ShuffleNative(Unsafe.Add(ref vs0, (nuint)3), mask);
vd0 = Vector256_.ShufflePerLane(vs0, mask);
Unsafe.Add(ref vd0, (nuint)1) = Vector256_.ShufflePerLane(Unsafe.Add(ref vs0, (nuint)1), mask);
Unsafe.Add(ref vd0, (nuint)2) = Vector256_.ShufflePerLane(Unsafe.Add(ref vs0, (nuint)2), mask);
Unsafe.Add(ref vd0, (nuint)3) = Vector256_.ShufflePerLane(Unsafe.Add(ref vs0, (nuint)3), mask);
}
if (m > 0)
{
for (nuint i = u; i < n; i++)
{
Unsafe.Add(ref destinationBase, i) = Vector256_.ShuffleNative(Unsafe.Add(ref sourceBase, i), mask);
Unsafe.Add(ref destinationBase, i) = Vector256_.ShufflePerLane(Unsafe.Add(ref sourceBase, i), mask);
}
}
}

276
src/ImageSharp/Common/Helpers/Vector128Utilities.cs

@ -47,8 +47,10 @@ internal static class Vector128_
return AdvSimd.FusedAddRoundedHalving(left, right);
}
// Portable fallback: (a + b + 1) >> 1
return (left + right + Vector128.Create((byte)1)) >> 1;
// Account for potential 9th bit to ensure correct rounded result.
return Vector128.Narrow(
(Vector128.WidenLower(left) + Vector128.WidenLower(right) + Vector128<ushort>.One) >> 1,
(Vector128.WidenUpper(left) + Vector128.WidenUpper(right) + Vector128<ushort>.One) >> 1);
}
/// <summary>
@ -117,13 +119,17 @@ internal static class Vector128_
}
// Don't use InverseMMShuffle here as we want to avoid the cast.
Vector64<short> indices = Vector64.Create(
(short)(control & 0x3),
(short)((control >> 2) & 0x3),
(short)((control >> 4) & 0x3),
(short)((control >> 6) & 0x3));
return Vector128.Create(value.GetLower(), Vector64.Shuffle(value.GetUpper(), indices));
Vector128<short> indices = Vector128.Create(
0,
1,
2,
3,
(short)((control & 0x3) + 4),
(short)(((control >> 2) & 0x3) + 4),
(short)(((control >> 4) & 0x3) + 4),
(short)(((control >> 6) & 0x3) + 4));
return Vector128.Shuffle(value, indices);
}
/// <summary>
@ -144,13 +150,17 @@ internal static class Vector128_
}
// Don't use InverseMMShuffle here as we want to avoid the cast.
Vector64<short> indices = Vector64.Create(
(short)(control & 0x3),
(short)((control >> 2) & 0x3),
(short)((control >> 4) & 0x3),
(short)((control >> 6) & 0x3));
return Vector128.Create(Vector64.Shuffle(value.GetLower(), indices), value.GetUpper());
Vector128<short> indices = Vector128.Create(
(short)(control & 0x3),
(short)((control >> 2) & 0x3),
(short)((control >> 4) & 0x3),
(short)((control >> 6) & 0x3),
4,
5,
6,
7);
return Vector128.Shuffle(value, indices);
}
/// <summary>
@ -237,28 +247,13 @@ internal static class Vector128_
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<short> ShiftLeftLogical(Vector128<short> value, [ConstantExpected] byte count)
{
if (Sse2.IsSupported)
{
return Sse2.ShiftLeftLogical(value, count);
}
// Zero lanes where count >= 16 to match SSE2
if (count >= 16)
{
return Vector128<short>.Zero;
}
if (AdvSimd.IsSupported)
{
return AdvSimd.ShiftLogical(value, Vector128.Create((short)count));
}
if (PackedSimd.IsSupported)
{
return PackedSimd.ShiftLeft(value, count);
}
return Vector128.ShiftLeft(value, count);
return value << count;
}
/// <summary>
@ -536,6 +531,11 @@ internal static class Vector128_
Vector128<int> prodLo = AdvSimd.MultiplyWideningLower(left.GetLower(), right.GetLower());
Vector128<int> prodHi = AdvSimd.MultiplyWideningLower(left.GetUpper(), right.GetUpper());
if (AdvSimd.Arm64.IsSupported)
{
return AdvSimd.Arm64.AddPairwise(prodLo, prodHi);
}
Vector128<long> v0 = AdvSimd.AddPairwiseWidening(prodLo);
Vector128<long> v1 = AdvSimd.AddPairwiseWidening(prodHi);
@ -587,50 +587,26 @@ internal static class Vector128_
return AdvSimd.Arm64.AddPairwise(left, right);
}
// Extract the low and high parts of the products shuffling them to form a result we can add together.
// Use out-of-bounds to zero out the unused lanes.
Vector128<short> even = Vector128.Create(0, 2, 4, 6, 8, 8, 8, 8);
Vector128<short> odd = Vector128.Create(1, 3, 5, 7, 8, 8, 8, 8);
Vector128<short> v0 = Vector128.Shuffle(right, even);
Vector128<short> v1 = Vector128.Shuffle(right, odd);
Vector128<short> v2 = Vector128.Shuffle(left, even);
Vector128<short> v3 = Vector128.Shuffle(left, odd);
return v0 + v1 + v2 + v3;
}
/// <summary>
/// Multiply the packed 16-bit integers in <paramref name="left"/> and <paramref name="right"/>, producing
/// intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in the result.
/// </summary>
/// <param name="left">
/// The first vector containing packed 16-bit integers to multiply.
/// </param>
/// <param name="right">
/// The second vector containing packed 16-bit integers to multiply.
/// </param>
/// <returns>
/// A vector containing the low 16 bits of the products of the packed 16-bit integers
/// from <paramref name="left"/> and <paramref name="right"/>.
/// </returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<short> MultiplyLow(Vector128<short> left, Vector128<short> right)
{
if (Sse2.IsSupported)
if (AdvSimd.IsSupported)
{
return Sse2.MultiplyLow(left, right);
}
Vector128<int> v0 = AdvSimd.AddPairwiseWidening(left);
Vector128<int> v1 = AdvSimd.AddPairwiseWidening(right);
// Widen each half of the short vectors into two int vectors
(Vector128<int> leftLo, Vector128<int> leftHi) = Vector128.Widen(left);
(Vector128<int> rightLo, Vector128<int> rightHi) = Vector128.Widen(right);
return Vector128.Narrow(v0, v1);
}
// Elementwise multiply: each int lane now holds the full 32-bit product
Vector128<int> prodLo = leftLo * rightLo;
Vector128<int> prodHi = leftHi * rightHi;
{
// Extract the low and high parts of the products shuffling them to form a result we can add together.
// Use out-of-bounds to zero out the unused lanes.
Vector128<short> even = Vector128.Create(0, 2, 4, 6, 8, 8, 8, 8);
Vector128<short> odd = Vector128.Create(1, 3, 5, 7, 8, 8, 8, 8);
Vector128<short> v0 = Vector128.Shuffle(right, even);
Vector128<short> v1 = Vector128.Shuffle(right, odd);
Vector128<short> v2 = Vector128.Shuffle(left, even);
Vector128<short> v3 = Vector128.Shuffle(left, odd);
// Narrow the two int vectors back into one short vector
return Vector128.Narrow(prodLo, prodHi);
return v0 + v1 + v2 + v3;
}
}
/// <summary>
@ -655,20 +631,33 @@ internal static class Vector128_
return Sse2.MultiplyHigh(left, right);
}
// Widen each half of the short vectors into two int vectors
(Vector128<int> leftLo, Vector128<int> leftHi) = Vector128.Widen(left);
(Vector128<int> rightLo, Vector128<int> rightHi) = Vector128.Widen(right);
if (AdvSimd.IsSupported)
{
Vector128<int> prodLo = AdvSimd.MultiplyWideningLower(left.GetLower(), right.GetLower());
Vector128<int> prodHi = AdvSimd.MultiplyWideningUpper(left, right);
prodLo >>= 16;
prodHi >>= 16;
return Vector128.Narrow(prodLo, prodHi);
}
{
// Widen each half of the short vectors into two int vectors
(Vector128<int> leftLo, Vector128<int> leftHi) = Vector128.Widen(left);
(Vector128<int> rightLo, Vector128<int> rightHi) = Vector128.Widen(right);
// Elementwise multiply: each int lane now holds the full 32-bit product
Vector128<int> prodLo = leftLo * rightLo;
Vector128<int> prodHi = leftHi * rightHi;
// Elementwise multiply: each int lane now holds the full 32-bit product
Vector128<int> prodLo = leftLo * rightLo;
Vector128<int> prodHi = leftHi * rightHi;
// Arithmetic shift right by 16 bits to extract the high word
prodLo >>= 16;
prodHi >>= 16;
// Arithmetic shift right by 16 bits to extract the high word
prodLo >>= 16;
prodHi >>= 16;
// Narrow the two int vectors back into one short vector
return Vector128.Narrow(prodLo, prodHi);
// Narrow the two int vectors back into one short vector
return Vector128.Narrow(prodLo, prodHi);
}
}
/// <summary>
@ -693,20 +682,33 @@ internal static class Vector128_
return Sse2.MultiplyHigh(left, right);
}
// Widen each half of the short vectors into two uint vectors
(Vector128<uint> leftLo, Vector128<uint> leftHi) = Vector128.Widen(left);
(Vector128<uint> rightLo, Vector128<uint> rightHi) = Vector128.Widen(right);
if (AdvSimd.IsSupported)
{
Vector128<uint> prodLo = AdvSimd.MultiplyWideningLower(left.GetLower(), right.GetLower());
Vector128<uint> prodHi = AdvSimd.MultiplyWideningUpper(left, right);
// Elementwise multiply: each int lane now holds the full 32-bit product
Vector128<uint> prodLo = leftLo * rightLo;
Vector128<uint> prodHi = leftHi * rightHi;
prodLo >>= 16;
prodHi >>= 16;
// Arithmetic shift right by 16 bits to extract the high word
prodLo >>= 16;
prodHi >>= 16;
return Vector128.Narrow(prodLo, prodHi);
}
{
// Widen each half of the short vectors into two uint vectors
(Vector128<uint> leftLo, Vector128<uint> leftHi) = Vector128.Widen(left);
(Vector128<uint> rightLo, Vector128<uint> rightHi) = Vector128.Widen(right);
// Narrow the two int vectors back into one short vector
return Vector128.Narrow(prodLo, prodHi);
// Elementwise multiply: each int lane now holds the full 32-bit product
Vector128<uint> prodLo = leftLo * rightLo;
Vector128<uint> prodHi = leftHi * rightHi;
// Arithmetic shift right by 16 bits to extract the high word
prodLo >>= 16;
prodHi >>= 16;
// Narrow the two int vectors back into one short vector
return Vector128.Narrow(prodLo, prodHi);
}
}
/// <summary>
@ -1363,90 +1365,4 @@ internal static class Vector128_
// Narrow back to signed bytes
return Vector128.Narrow(diffLo, diffHi);
}
/// <summary>
/// Create mask from the most significant bit of each 8-bit element in <paramref name="value"/>, and store the result.
/// </summary>
/// <param name="value">
/// The vector containing packed 8-bit integers from which to create the mask.
/// </param>
/// <returns>
/// A 16-bit integer mask where each bit corresponds to the most significant bit of each 8-bit element
/// in <paramref name="value"/>.
/// </returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int MoveMask(Vector128<byte> value)
{
if (Sse2.IsSupported)
{
return Sse2.MoveMask(value);
}
// AdvSimd versions ported from Stack Overflow answer:
// https://stackoverflow.com/questions/11870910/sse-mm-movemask-epi8-equivalent-method-for-arm-neon
if (AdvSimd.Arm64.IsSupported)
{
// Shift values to align each MSB to its corresponding bit in the output
Vector128<sbyte> shift = Vector128.Create(-7, -6, -5, -4, -3, -2, -1, 0, -7, -6, -5, -4, -3, -2, -1, 0);
// Mask to isolate MSBs
Vector128<byte> msbMask = Vector128.Create((byte)0x80);
Vector128<byte> masked = value & msbMask;
// Shift each MSB into the correct bit position
Vector128<byte> shifted = AdvSimd.ShiftLogical(masked.AsSByte(), shift).AsByte();
// Sum lanes: lower 8 go into bits 0–7, upper 8 go into bits 8–15
byte lo = AdvSimd.Arm64.AddAcross(shifted.GetLower()).ToScalar();
byte hi = AdvSimd.Arm64.AddAcross(shifted.GetUpper()).ToScalar();
return lo + (hi << 8);
}
if (AdvSimd.IsSupported)
{
Vector128<byte> powers = Vector128.Create(1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128);
Vector128<byte> msbMask = Vector128.Create((byte)0x80);
Vector128<byte> normalized = AdvSimd.CompareEqual(value & msbMask, msbMask); // 0xFF or 0x00
Vector128<byte> masked = normalized & powers;
Vector128<ushort> sum8 = AdvSimd.AddPairwiseWidening(masked);
Vector128<uint> sum16 = AdvSimd.AddPairwiseWidening(sum8);
Vector128<ulong> sum32 = AdvSimd.AddPairwiseWidening(sum16);
// Extract lower 8 bits of each 64-bit lane
byte lo = sum32.AsByte().GetElement(0);
byte hi = sum32.AsByte().GetElement(8);
return (hi << 8) | lo;
}
{
// Step 1: isolate MSBs
Vector128<byte> msbMask = Vector128.Create((byte)0x80);
Vector128<byte> masked = value & msbMask;
// Step 2: shift each byte so MSB lands in bit position [0..15]
// i.e. convert: 0x80 → 1 << i
Vector128<ushort> bitShifts = Vector128.Create((ushort)1, 2, 4, 8, 16, 32, 64, 128);
Vector128<ushort> bitShiftsHigh = Vector128.Create(256, 512, 1024, 2048, 4096, 8192, 16384, 32768);
// Step 3: widen to ushort
(Vector128<ushort> lo, Vector128<ushort> hi) = Vector128.Widen(masked);
// Step 4: compare > 0 to get 0xFFFF where MSB was set
lo = Vector128.ConditionalSelect(Vector128.Equals(lo, Vector128<ushort>.Zero), Vector128<ushort>.Zero, bitShifts);
hi = Vector128.ConditionalSelect(Vector128.Equals(hi, Vector128<ushort>.Zero), Vector128<ushort>.Zero, bitShiftsHigh);
// Step 5: bitwise OR the two halves
Vector128<ushort> maskVector = lo | hi;
// Step 6: horizontal OR reduction via shuffles
maskVector |= Vector128.Shuffle(maskVector, Vector128.Create((ushort)4, 5, 6, 7, 0, 1, 2, 3));
maskVector |= Vector128.Shuffle(maskVector, Vector128.Create((ushort)2, 3, 0, 1, 6, 7, 4, 5));
maskVector |= Vector128.Shuffle(maskVector, Vector128.Create((ushort)1, 0, 3, 2, 5, 4, 7, 6));
return maskVector.ToScalar();
}
}
}

29
src/ImageSharp/Common/Helpers/Vector256Utilities.cs

@ -39,14 +39,17 @@ internal static class Vector256_
/// </param>
/// <returns>The <see cref="Vector256{Single}"/>.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<byte> ShuffleNative(Vector256<byte> vector, Vector256<byte> indices)
public static Vector256<byte> ShufflePerLane(Vector256<byte> vector, Vector256<byte> indices)
{
if (Avx2.IsSupported)
{
return Avx2.Shuffle(vector, indices);
}
return Vector256.Shuffle(vector, indices);
Vector128<byte> indicesLo = indices.GetLower();
Vector128<byte> lower = Vector128_.ShuffleNative(vector.GetLower(), indicesLo);
Vector128<byte> upper = Vector128_.ShuffleNative(vector.GetUpper(), indicesLo);
return Vector256.Create(lower, upper);
}
/// <summary>
@ -458,26 +461,4 @@ internal static class Vector256_
Vector128_.SubtractSaturate(left.GetLower(), right.GetLower()),
Vector128_.SubtractSaturate(left.GetUpper(), right.GetUpper()));
}
/// <summary>
/// Create mask from the most significant bit of each 8-bit element in <paramref name="value"/>, and store the result.
/// </summary>
/// <param name="value">
/// The vector containing packed 8-bit integers from which to create the mask.
/// </param>
/// <returns>
/// A 16-bit integer mask where each bit corresponds to the most significant bit of each 8-bit element
/// in <paramref name="value"/>.
/// </returns>
public static int MoveMask(Vector256<byte> value)
{
if (Avx2.IsSupported)
{
return Avx2.MoveMask(value);
}
int loMask = Vector128_.MoveMask(value.GetLower());
int hiMask = Vector128_.MoveMask(value.GetUpper());
return loMask | (hiMask << 16);
}
}

7
src/ImageSharp/Formats/Webp/Lossless/ColorSpaceTransformUtils.cs

@ -16,6 +16,9 @@ internal static class ColorSpaceTransformUtils
{
const int span = 16;
Span<ushort> values = stackalloc ushort[span];
// These shuffle masks are safe for use with Avx2.Shuffle because all indices are within their respective 128-bit lanes (0–15 for the low mask, 16–31 for the high mask),
// and all disabled lanes are set to 0xFF to zero those bytes per the vpshufb specification. This guarantees lane-local shuffling with no cross-lane violations.
Vector256<byte> collectColorBlueTransformsShuffleLowMask256 = Vector256.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255, 255, 18, 255, 22, 255, 26, 255, 30, 255, 255, 255, 255, 255, 255, 255, 255);
Vector256<byte> collectColorBlueTransformsShuffleHighMask256 = Vector256.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255, 255, 18, 255, 22, 255, 26, 255, 30);
Vector256<byte> collectColorBlueTransformsGreenBlueMask256 = Vector256.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0);
@ -33,8 +36,8 @@ internal static class ColorSpaceTransformUtils
nuint input1Idx = x + (span / 2);
Vector256<byte> input0 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte();
Vector256<byte> input1 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte();
Vector256<byte> r0 = Vector256_.ShuffleNative(input0, collectColorBlueTransformsShuffleLowMask256);
Vector256<byte> r1 = Vector256_.ShuffleNative(input1, collectColorBlueTransformsShuffleHighMask256);
Vector256<byte> r0 = Vector256_.ShufflePerLane(input0, collectColorBlueTransformsShuffleLowMask256);
Vector256<byte> r1 = Vector256_.ShufflePerLane(input1, collectColorBlueTransformsShuffleHighMask256);
Vector256<byte> r = r0 | r1;
Vector256<byte> gb0 = input0 & collectColorBlueTransformsGreenBlueMask256;
Vector256<byte> gb1 = input1 & collectColorBlueTransformsGreenBlueMask256;

7
src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs

@ -97,6 +97,9 @@ internal static unsafe class LosslessUtils
{
if (Vector256.IsHardwareAccelerated && pixelData.Length >= 8)
{
// The `255` values disable the write for alpha (A), since 0x80 is set in the control byte (high bit set).
// Each byte index is within its respective 128-bit lane (0–15 and 16–31), so this is safe for per-lane shuffle.
// The high bits are not set for the index bytes, and the values are always < 16 per lane, satisfying AVX2 lane rules.
Vector256<byte> addGreenToBlueAndRedMask = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255);
nuint numPixels = (uint)pixelData.Length;
nuint i = 0;
@ -104,7 +107,7 @@ internal static unsafe class LosslessUtils
{
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
Vector256<byte> input = Unsafe.As<uint, Vector256<uint>>(ref pos).AsByte();
Vector256<byte> in0g0g = Vector256_.ShuffleNative(input, addGreenToBlueAndRedMask);
Vector256<byte> in0g0g = Vector256_.ShufflePerLane(input, addGreenToBlueAndRedMask);
Vector256<byte> output = input + in0g0g;
Unsafe.As<uint, Vector256<uint>>(ref pos) = output.AsUInt32();
i += 8;
@ -168,7 +171,7 @@ internal static unsafe class LosslessUtils
{
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
Vector256<byte> input = Unsafe.As<uint, Vector256<uint>>(ref pos).AsByte();
Vector256<byte> in0g0g = Vector256_.ShuffleNative(input, subtractGreenFromBlueAndRedMask);
Vector256<byte> in0g0g = Vector256_.ShufflePerLane(input, subtractGreenFromBlueAndRedMask);
Vector256<byte> output = input - in0g0g;
Unsafe.As<uint, Vector256<uint>>(ref pos) = output.AsUInt32();
i += 8;

8
src/ImageSharp/Formats/Webp/WebpCommonUtils.cs

@ -44,8 +44,8 @@ internal static class WebpCommonUtils
Vector256<short> c1 = Vector256_.PackSignedSaturate(b2, b3).AsInt16();
Vector256<byte> d = Vector256_.PackSignedSaturate(c0, c1).AsByte();
Vector256<byte> bits = Vector256.Equals(d, all0x80Vector256);
int mask = Vector256_.MoveMask(bits);
if (mask != -1)
uint mask = bits.ExtractMostSignificantBits();
if (mask != 0xFFFF_FFFF)
{
return true;
}
@ -138,7 +138,7 @@ internal static class WebpCommonUtils
Vector128<short> c1 = Vector128_.PackSignedSaturate(b2, b3).AsInt16();
Vector128<byte> d = Vector128_.PackSignedSaturate(c0, c1).AsByte();
Vector128<byte> bits = Vector128.Equals(d, Vector128.Create((byte)0x80).AsByte());
int mask = Vector128_.MoveMask(bits);
uint mask = bits.ExtractMostSignificantBits();
return mask != 0xFFFF;
}
@ -153,7 +153,7 @@ internal static class WebpCommonUtils
Vector128<short> c = Vector128_.PackSignedSaturate(b0, b1).AsInt16();
Vector128<byte> d = Vector128_.PackSignedSaturate(c, c).AsByte();
Vector128<byte> bits = Vector128.Equals(d, Vector128.Create((byte)0x80).AsByte());
int mask = Vector128_.MoveMask(bits);
uint mask = bits.ExtractMostSignificantBits();
return mask != 0xFFFF;
}
}

Loading…
Cancel
Save