diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 6fef043169..ff5ea5de33 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -375,6 +375,11 @@ internal static partial class SimdUtils } else if (Vector256.IsHardwareAccelerated) { + // ShufflePerLane performs per-128-bit-lane shuffling using Avx2.Shuffle (vpshufb). + // MMShuffleSpan generates indices in the range [0, 31] and never sets bit 7 in any byte, + // so the shuffle will not zero elements. Because vpshufb uses only the low 4 bits (b[i] & 0x0F) + // for indexing within each lane, and ignores the upper bits unless bit 7 is set, + // this usage is guaranteed to remain within-lane and non-zeroing. Span temp = stackalloc byte[Vector256.Count]; Shuffle.MMShuffleSpan(ref temp, control); Vector256 mask = Unsafe.As>(ref MemoryMarshal.GetReference(temp)); @@ -391,17 +396,17 @@ internal static partial class SimdUtils ref Vector256 vs0 = ref Unsafe.Add(ref sourceBase, i); ref Vector256 vd0 = ref Unsafe.Add(ref destinationBase, i); - vd0 = Vector256_.ShuffleNative(vs0, mask); - Unsafe.Add(ref vd0, (nuint)1) = Vector256_.ShuffleNative(Unsafe.Add(ref vs0, (nuint)1), mask); - Unsafe.Add(ref vd0, (nuint)2) = Vector256_.ShuffleNative(Unsafe.Add(ref vs0, (nuint)2), mask); - Unsafe.Add(ref vd0, (nuint)3) = Vector256_.ShuffleNative(Unsafe.Add(ref vs0, (nuint)3), mask); + vd0 = Vector256_.ShufflePerLane(vs0, mask); + Unsafe.Add(ref vd0, (nuint)1) = Vector256_.ShufflePerLane(Unsafe.Add(ref vs0, (nuint)1), mask); + Unsafe.Add(ref vd0, (nuint)2) = Vector256_.ShufflePerLane(Unsafe.Add(ref vs0, (nuint)2), mask); + Unsafe.Add(ref vd0, (nuint)3) = Vector256_.ShufflePerLane(Unsafe.Add(ref vs0, (nuint)3), mask); } if (m > 0) { for (nuint i = u; i < n; i++) { - Unsafe.Add(ref destinationBase, i) = Vector256_.ShuffleNative(Unsafe.Add(ref sourceBase, i), mask); + Unsafe.Add(ref destinationBase, i) = Vector256_.ShufflePerLane(Unsafe.Add(ref sourceBase, i), mask); } } } diff --git a/src/ImageSharp/Common/Helpers/Vector128Utilities.cs b/src/ImageSharp/Common/Helpers/Vector128Utilities.cs index a3b8e0156e..7eac4f58c4 100644 --- a/src/ImageSharp/Common/Helpers/Vector128Utilities.cs +++ b/src/ImageSharp/Common/Helpers/Vector128Utilities.cs @@ -47,8 +47,10 @@ internal static class Vector128_ return AdvSimd.FusedAddRoundedHalving(left, right); } - // Portable fallback: (a + b + 1) >> 1 - return (left + right + Vector128.Create((byte)1)) >> 1; + // Account for potential 9th bit to ensure correct rounded result. + return Vector128.Narrow( + (Vector128.WidenLower(left) + Vector128.WidenLower(right) + Vector128.One) >> 1, + (Vector128.WidenUpper(left) + Vector128.WidenUpper(right) + Vector128.One) >> 1); } /// @@ -117,13 +119,17 @@ internal static class Vector128_ } // Don't use InverseMMShuffle here as we want to avoid the cast. - Vector64 indices = Vector64.Create( - (short)(control & 0x3), - (short)((control >> 2) & 0x3), - (short)((control >> 4) & 0x3), - (short)((control >> 6) & 0x3)); - - return Vector128.Create(value.GetLower(), Vector64.Shuffle(value.GetUpper(), indices)); + Vector128 indices = Vector128.Create( + 0, + 1, + 2, + 3, + (short)((control & 0x3) + 4), + (short)(((control >> 2) & 0x3) + 4), + (short)(((control >> 4) & 0x3) + 4), + (short)(((control >> 6) & 0x3) + 4)); + + return Vector128.Shuffle(value, indices); } /// @@ -144,13 +150,17 @@ internal static class Vector128_ } // Don't use InverseMMShuffle here as we want to avoid the cast. - Vector64 indices = Vector64.Create( - (short)(control & 0x3), - (short)((control >> 2) & 0x3), - (short)((control >> 4) & 0x3), - (short)((control >> 6) & 0x3)); - - return Vector128.Create(Vector64.Shuffle(value.GetLower(), indices), value.GetUpper()); + Vector128 indices = Vector128.Create( + (short)(control & 0x3), + (short)((control >> 2) & 0x3), + (short)((control >> 4) & 0x3), + (short)((control >> 6) & 0x3), + 4, + 5, + 6, + 7); + + return Vector128.Shuffle(value, indices); } /// @@ -237,28 +247,13 @@ internal static class Vector128_ [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 ShiftLeftLogical(Vector128 value, [ConstantExpected] byte count) { - if (Sse2.IsSupported) - { - return Sse2.ShiftLeftLogical(value, count); - } - // Zero lanes where count >= 16 to match SSE2 if (count >= 16) { return Vector128.Zero; } - if (AdvSimd.IsSupported) - { - return AdvSimd.ShiftLogical(value, Vector128.Create((short)count)); - } - - if (PackedSimd.IsSupported) - { - return PackedSimd.ShiftLeft(value, count); - } - - return Vector128.ShiftLeft(value, count); + return value << count; } /// @@ -536,6 +531,11 @@ internal static class Vector128_ Vector128 prodLo = AdvSimd.MultiplyWideningLower(left.GetLower(), right.GetLower()); Vector128 prodHi = AdvSimd.MultiplyWideningLower(left.GetUpper(), right.GetUpper()); + if (AdvSimd.Arm64.IsSupported) + { + return AdvSimd.Arm64.AddPairwise(prodLo, prodHi); + } + Vector128 v0 = AdvSimd.AddPairwiseWidening(prodLo); Vector128 v1 = AdvSimd.AddPairwiseWidening(prodHi); @@ -587,50 +587,26 @@ internal static class Vector128_ return AdvSimd.Arm64.AddPairwise(left, right); } - // Extract the low and high parts of the products shuffling them to form a result we can add together. - // Use out-of-bounds to zero out the unused lanes. - Vector128 even = Vector128.Create(0, 2, 4, 6, 8, 8, 8, 8); - Vector128 odd = Vector128.Create(1, 3, 5, 7, 8, 8, 8, 8); - Vector128 v0 = Vector128.Shuffle(right, even); - Vector128 v1 = Vector128.Shuffle(right, odd); - Vector128 v2 = Vector128.Shuffle(left, even); - Vector128 v3 = Vector128.Shuffle(left, odd); - - return v0 + v1 + v2 + v3; - } - - /// - /// Multiply the packed 16-bit integers in and , producing - /// intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in the result. - /// - /// - /// The first vector containing packed 16-bit integers to multiply. - /// - /// - /// The second vector containing packed 16-bit integers to multiply. - /// - /// - /// A vector containing the low 16 bits of the products of the packed 16-bit integers - /// from and . - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector128 MultiplyLow(Vector128 left, Vector128 right) - { - if (Sse2.IsSupported) + if (AdvSimd.IsSupported) { - return Sse2.MultiplyLow(left, right); - } + Vector128 v0 = AdvSimd.AddPairwiseWidening(left); + Vector128 v1 = AdvSimd.AddPairwiseWidening(right); - // Widen each half of the short vectors into two int vectors - (Vector128 leftLo, Vector128 leftHi) = Vector128.Widen(left); - (Vector128 rightLo, Vector128 rightHi) = Vector128.Widen(right); + return Vector128.Narrow(v0, v1); + } - // Elementwise multiply: each int lane now holds the full 32-bit product - Vector128 prodLo = leftLo * rightLo; - Vector128 prodHi = leftHi * rightHi; + { + // Extract the low and high parts of the products shuffling them to form a result we can add together. + // Use out-of-bounds to zero out the unused lanes. + Vector128 even = Vector128.Create(0, 2, 4, 6, 8, 8, 8, 8); + Vector128 odd = Vector128.Create(1, 3, 5, 7, 8, 8, 8, 8); + Vector128 v0 = Vector128.Shuffle(right, even); + Vector128 v1 = Vector128.Shuffle(right, odd); + Vector128 v2 = Vector128.Shuffle(left, even); + Vector128 v3 = Vector128.Shuffle(left, odd); - // Narrow the two int vectors back into one short vector - return Vector128.Narrow(prodLo, prodHi); + return v0 + v1 + v2 + v3; + } } /// @@ -655,20 +631,33 @@ internal static class Vector128_ return Sse2.MultiplyHigh(left, right); } - // Widen each half of the short vectors into two int vectors - (Vector128 leftLo, Vector128 leftHi) = Vector128.Widen(left); - (Vector128 rightLo, Vector128 rightHi) = Vector128.Widen(right); + if (AdvSimd.IsSupported) + { + Vector128 prodLo = AdvSimd.MultiplyWideningLower(left.GetLower(), right.GetLower()); + Vector128 prodHi = AdvSimd.MultiplyWideningUpper(left, right); + + prodLo >>= 16; + prodHi >>= 16; + + return Vector128.Narrow(prodLo, prodHi); + } + + { + // Widen each half of the short vectors into two int vectors + (Vector128 leftLo, Vector128 leftHi) = Vector128.Widen(left); + (Vector128 rightLo, Vector128 rightHi) = Vector128.Widen(right); - // Elementwise multiply: each int lane now holds the full 32-bit product - Vector128 prodLo = leftLo * rightLo; - Vector128 prodHi = leftHi * rightHi; + // Elementwise multiply: each int lane now holds the full 32-bit product + Vector128 prodLo = leftLo * rightLo; + Vector128 prodHi = leftHi * rightHi; - // Arithmetic shift right by 16 bits to extract the high word - prodLo >>= 16; - prodHi >>= 16; + // Arithmetic shift right by 16 bits to extract the high word + prodLo >>= 16; + prodHi >>= 16; - // Narrow the two int vectors back into one short vector - return Vector128.Narrow(prodLo, prodHi); + // Narrow the two int vectors back into one short vector + return Vector128.Narrow(prodLo, prodHi); + } } /// @@ -693,20 +682,33 @@ internal static class Vector128_ return Sse2.MultiplyHigh(left, right); } - // Widen each half of the short vectors into two uint vectors - (Vector128 leftLo, Vector128 leftHi) = Vector128.Widen(left); - (Vector128 rightLo, Vector128 rightHi) = Vector128.Widen(right); + if (AdvSimd.IsSupported) + { + Vector128 prodLo = AdvSimd.MultiplyWideningLower(left.GetLower(), right.GetLower()); + Vector128 prodHi = AdvSimd.MultiplyWideningUpper(left, right); - // Elementwise multiply: each int lane now holds the full 32-bit product - Vector128 prodLo = leftLo * rightLo; - Vector128 prodHi = leftHi * rightHi; + prodLo >>= 16; + prodHi >>= 16; - // Arithmetic shift right by 16 bits to extract the high word - prodLo >>= 16; - prodHi >>= 16; + return Vector128.Narrow(prodLo, prodHi); + } + + { + // Widen each half of the short vectors into two uint vectors + (Vector128 leftLo, Vector128 leftHi) = Vector128.Widen(left); + (Vector128 rightLo, Vector128 rightHi) = Vector128.Widen(right); - // Narrow the two int vectors back into one short vector - return Vector128.Narrow(prodLo, prodHi); + // Elementwise multiply: each int lane now holds the full 32-bit product + Vector128 prodLo = leftLo * rightLo; + Vector128 prodHi = leftHi * rightHi; + + // Arithmetic shift right by 16 bits to extract the high word + prodLo >>= 16; + prodHi >>= 16; + + // Narrow the two int vectors back into one short vector + return Vector128.Narrow(prodLo, prodHi); + } } /// @@ -1363,90 +1365,4 @@ internal static class Vector128_ // Narrow back to signed bytes return Vector128.Narrow(diffLo, diffHi); } - - /// - /// Create mask from the most significant bit of each 8-bit element in , and store the result. - /// - /// - /// The vector containing packed 8-bit integers from which to create the mask. - /// - /// - /// A 16-bit integer mask where each bit corresponds to the most significant bit of each 8-bit element - /// in . - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int MoveMask(Vector128 value) - { - if (Sse2.IsSupported) - { - return Sse2.MoveMask(value); - } - - // AdvSimd versions ported from Stack Overflow answer: - // https://stackoverflow.com/questions/11870910/sse-mm-movemask-epi8-equivalent-method-for-arm-neon - if (AdvSimd.Arm64.IsSupported) - { - // Shift values to align each MSB to its corresponding bit in the output - Vector128 shift = Vector128.Create(-7, -6, -5, -4, -3, -2, -1, 0, -7, -6, -5, -4, -3, -2, -1, 0); - - // Mask to isolate MSBs - Vector128 msbMask = Vector128.Create((byte)0x80); - Vector128 masked = value & msbMask; - - // Shift each MSB into the correct bit position - Vector128 shifted = AdvSimd.ShiftLogical(masked.AsSByte(), shift).AsByte(); - - // Sum lanes: lower 8 go into bits 0–7, upper 8 go into bits 8–15 - byte lo = AdvSimd.Arm64.AddAcross(shifted.GetLower()).ToScalar(); - byte hi = AdvSimd.Arm64.AddAcross(shifted.GetUpper()).ToScalar(); - - return lo + (hi << 8); - } - - if (AdvSimd.IsSupported) - { - Vector128 powers = Vector128.Create(1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128); - Vector128 msbMask = Vector128.Create((byte)0x80); - Vector128 normalized = AdvSimd.CompareEqual(value & msbMask, msbMask); // 0xFF or 0x00 - Vector128 masked = normalized & powers; - - Vector128 sum8 = AdvSimd.AddPairwiseWidening(masked); - Vector128 sum16 = AdvSimd.AddPairwiseWidening(sum8); - Vector128 sum32 = AdvSimd.AddPairwiseWidening(sum16); - - // Extract lower 8 bits of each 64-bit lane - byte lo = sum32.AsByte().GetElement(0); - byte hi = sum32.AsByte().GetElement(8); - - return (hi << 8) | lo; - } - - { - // Step 1: isolate MSBs - Vector128 msbMask = Vector128.Create((byte)0x80); - Vector128 masked = value & msbMask; - - // Step 2: shift each byte so MSB lands in bit position [0..15] - // i.e. convert: 0x80 → 1 << i - Vector128 bitShifts = Vector128.Create((ushort)1, 2, 4, 8, 16, 32, 64, 128); - Vector128 bitShiftsHigh = Vector128.Create(256, 512, 1024, 2048, 4096, 8192, 16384, 32768); - - // Step 3: widen to ushort - (Vector128 lo, Vector128 hi) = Vector128.Widen(masked); - - // Step 4: compare > 0 to get 0xFFFF where MSB was set - lo = Vector128.ConditionalSelect(Vector128.Equals(lo, Vector128.Zero), Vector128.Zero, bitShifts); - hi = Vector128.ConditionalSelect(Vector128.Equals(hi, Vector128.Zero), Vector128.Zero, bitShiftsHigh); - - // Step 5: bitwise OR the two halves - Vector128 maskVector = lo | hi; - - // Step 6: horizontal OR reduction via shuffles - maskVector |= Vector128.Shuffle(maskVector, Vector128.Create((ushort)4, 5, 6, 7, 0, 1, 2, 3)); - maskVector |= Vector128.Shuffle(maskVector, Vector128.Create((ushort)2, 3, 0, 1, 6, 7, 4, 5)); - maskVector |= Vector128.Shuffle(maskVector, Vector128.Create((ushort)1, 0, 3, 2, 5, 4, 7, 6)); - - return maskVector.ToScalar(); - } - } } diff --git a/src/ImageSharp/Common/Helpers/Vector256Utilities.cs b/src/ImageSharp/Common/Helpers/Vector256Utilities.cs index 4769df2b0b..14ac13dd8d 100644 --- a/src/ImageSharp/Common/Helpers/Vector256Utilities.cs +++ b/src/ImageSharp/Common/Helpers/Vector256Utilities.cs @@ -39,14 +39,17 @@ internal static class Vector256_ /// /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 ShuffleNative(Vector256 vector, Vector256 indices) + public static Vector256 ShufflePerLane(Vector256 vector, Vector256 indices) { if (Avx2.IsSupported) { return Avx2.Shuffle(vector, indices); } - return Vector256.Shuffle(vector, indices); + Vector128 indicesLo = indices.GetLower(); + Vector128 lower = Vector128_.ShuffleNative(vector.GetLower(), indicesLo); + Vector128 upper = Vector128_.ShuffleNative(vector.GetUpper(), indicesLo); + return Vector256.Create(lower, upper); } /// @@ -458,26 +461,4 @@ internal static class Vector256_ Vector128_.SubtractSaturate(left.GetLower(), right.GetLower()), Vector128_.SubtractSaturate(left.GetUpper(), right.GetUpper())); } - - /// - /// Create mask from the most significant bit of each 8-bit element in , and store the result. - /// - /// - /// The vector containing packed 8-bit integers from which to create the mask. - /// - /// - /// A 16-bit integer mask where each bit corresponds to the most significant bit of each 8-bit element - /// in . - /// - public static int MoveMask(Vector256 value) - { - if (Avx2.IsSupported) - { - return Avx2.MoveMask(value); - } - - int loMask = Vector128_.MoveMask(value.GetLower()); - int hiMask = Vector128_.MoveMask(value.GetUpper()); - return loMask | (hiMask << 16); - } } diff --git a/src/ImageSharp/Formats/Webp/Lossless/ColorSpaceTransformUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/ColorSpaceTransformUtils.cs index a0930c75b0..c701d56d3f 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/ColorSpaceTransformUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/ColorSpaceTransformUtils.cs @@ -16,6 +16,9 @@ internal static class ColorSpaceTransformUtils { const int span = 16; Span values = stackalloc ushort[span]; + + // These shuffle masks are safe for use with Avx2.Shuffle because all indices are within their respective 128-bit lanes (0–15 for the low mask, 16–31 for the high mask), + // and all disabled lanes are set to 0xFF to zero those bytes per the vpshufb specification. This guarantees lane-local shuffling with no cross-lane violations. Vector256 collectColorBlueTransformsShuffleLowMask256 = Vector256.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255, 255, 18, 255, 22, 255, 26, 255, 30, 255, 255, 255, 255, 255, 255, 255, 255); Vector256 collectColorBlueTransformsShuffleHighMask256 = Vector256.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255, 255, 18, 255, 22, 255, 26, 255, 30); Vector256 collectColorBlueTransformsGreenBlueMask256 = Vector256.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0); @@ -33,8 +36,8 @@ internal static class ColorSpaceTransformUtils nuint input1Idx = x + (span / 2); Vector256 input0 = Unsafe.As>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte(); Vector256 input1 = Unsafe.As>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte(); - Vector256 r0 = Vector256_.ShuffleNative(input0, collectColorBlueTransformsShuffleLowMask256); - Vector256 r1 = Vector256_.ShuffleNative(input1, collectColorBlueTransformsShuffleHighMask256); + Vector256 r0 = Vector256_.ShufflePerLane(input0, collectColorBlueTransformsShuffleLowMask256); + Vector256 r1 = Vector256_.ShufflePerLane(input1, collectColorBlueTransformsShuffleHighMask256); Vector256 r = r0 | r1; Vector256 gb0 = input0 & collectColorBlueTransformsGreenBlueMask256; Vector256 gb1 = input1 & collectColorBlueTransformsGreenBlueMask256; diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index 8cc9fd05b9..e573097e53 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -97,6 +97,9 @@ internal static unsafe class LosslessUtils { if (Vector256.IsHardwareAccelerated && pixelData.Length >= 8) { + // The `255` values disable the write for alpha (A), since 0x80 is set in the control byte (high bit set). + // Each byte index is within its respective 128-bit lane (0–15 and 16–31), so this is safe for per-lane shuffle. + // The high bits are not set for the index bytes, and the values are always < 16 per lane, satisfying AVX2 lane rules. Vector256 addGreenToBlueAndRedMask = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255); nuint numPixels = (uint)pixelData.Length; nuint i = 0; @@ -104,7 +107,7 @@ internal static unsafe class LosslessUtils { ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i); Vector256 input = Unsafe.As>(ref pos).AsByte(); - Vector256 in0g0g = Vector256_.ShuffleNative(input, addGreenToBlueAndRedMask); + Vector256 in0g0g = Vector256_.ShufflePerLane(input, addGreenToBlueAndRedMask); Vector256 output = input + in0g0g; Unsafe.As>(ref pos) = output.AsUInt32(); i += 8; @@ -168,7 +171,7 @@ internal static unsafe class LosslessUtils { ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i); Vector256 input = Unsafe.As>(ref pos).AsByte(); - Vector256 in0g0g = Vector256_.ShuffleNative(input, subtractGreenFromBlueAndRedMask); + Vector256 in0g0g = Vector256_.ShufflePerLane(input, subtractGreenFromBlueAndRedMask); Vector256 output = input - in0g0g; Unsafe.As>(ref pos) = output.AsUInt32(); i += 8; diff --git a/src/ImageSharp/Formats/Webp/WebpCommonUtils.cs b/src/ImageSharp/Formats/Webp/WebpCommonUtils.cs index b08fe15f51..acfa26b4ff 100644 --- a/src/ImageSharp/Formats/Webp/WebpCommonUtils.cs +++ b/src/ImageSharp/Formats/Webp/WebpCommonUtils.cs @@ -44,8 +44,8 @@ internal static class WebpCommonUtils Vector256 c1 = Vector256_.PackSignedSaturate(b2, b3).AsInt16(); Vector256 d = Vector256_.PackSignedSaturate(c0, c1).AsByte(); Vector256 bits = Vector256.Equals(d, all0x80Vector256); - int mask = Vector256_.MoveMask(bits); - if (mask != -1) + uint mask = bits.ExtractMostSignificantBits(); + if (mask != 0xFFFF_FFFF) { return true; } @@ -138,7 +138,7 @@ internal static class WebpCommonUtils Vector128 c1 = Vector128_.PackSignedSaturate(b2, b3).AsInt16(); Vector128 d = Vector128_.PackSignedSaturate(c0, c1).AsByte(); Vector128 bits = Vector128.Equals(d, Vector128.Create((byte)0x80).AsByte()); - int mask = Vector128_.MoveMask(bits); + uint mask = bits.ExtractMostSignificantBits(); return mask != 0xFFFF; } @@ -153,7 +153,7 @@ internal static class WebpCommonUtils Vector128 c = Vector128_.PackSignedSaturate(b0, b1).AsInt16(); Vector128 d = Vector128_.PackSignedSaturate(c, c).AsByte(); Vector128 bits = Vector128.Equals(d, Vector128.Create((byte)0x80).AsByte()); - int mask = Vector128_.MoveMask(bits); + uint mask = bits.ExtractMostSignificantBits(); return mask != 0xFFFF; } }