Browse Source

Add Arm64 movemask

pull/2933/head
James Jackson-South 11 months ago
parent
commit
1a63729cb9
  1. 22
      src/ImageSharp/Common/Helpers/Vector128Utilities.cs

22
src/ImageSharp/Common/Helpers/Vector128Utilities.cs

@ -1319,9 +1319,29 @@ internal static class Vector128_
return Sse2.MoveMask(value);
}
// AdvSimd versions ported from Stack Overflow answer:
// https://stackoverflow.com/questions/11870910/sse-mm-movemask-epi8-equivalent-method-for-arm-neon
if (AdvSimd.Arm64.IsSupported)
{
// Shift values to align each MSB to its corresponding bit in the output
Vector128<sbyte> shift = Vector128.Create(-7, -6, -5, -4, -3, -2, -1, 0, -7, -6, -5, -4, -3, -2, -1, 0);
// Mask to isolate MSBs
Vector128<byte> msbMask = Vector128.Create((byte)0x80);
Vector128<byte> masked = value & msbMask;
// Shift each MSB into the correct bit position
Vector128<byte> shifted = AdvSimd.ShiftLogical(masked.AsSByte(), shift).AsByte();
// Sum lanes: lower 8 go into bits 0–7, upper 8 go into bits 8–15
byte lo = AdvSimd.Arm64.AddAcross(shifted.GetLower()).ToScalar();
byte hi = AdvSimd.Arm64.AddAcross(shifted.GetUpper()).ToScalar();
return lo + (hi << 8);
}
if (AdvSimd.IsSupported)
{
// https://stackoverflow.com/questions/11870910/sse-mm-movemask-epi8-equivalent-method-for-arm-neon
Vector128<byte> powers = Vector128.Create(1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128);
Vector128<byte> msbMask = Vector128.Create((byte)0x80);
Vector128<byte> normalized = AdvSimd.CompareEqual(value & msbMask, msbMask); // 0xFF or 0x00

Loading…
Cancel
Save