|
|
|
@ -216,17 +216,18 @@ internal static class LossyUtils |
|
|
|
} |
|
|
|
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)] |
|
|
|
private static int Vp8_Sse16x16_Neon(Span<byte> a, Span<byte> b) |
|
|
|
private static unsafe int Vp8_Sse16x16_Neon(Span<byte> a, Span<byte> b) |
|
|
|
{ |
|
|
|
Vector128<uint> sum = Vector128<uint>.Zero; |
|
|
|
ref byte aRef = ref MemoryMarshal.GetReference(a); |
|
|
|
ref byte bRef = ref MemoryMarshal.GetReference(b); |
|
|
|
for (int y = 0; y < 16; y++) |
|
|
|
fixed (byte* aRef = &MemoryMarshal.GetReference(a)) |
|
|
|
{ |
|
|
|
sum = AccumulateSSE16Neon( |
|
|
|
ref Unsafe.Add(ref aRef, y * WebpConstants.Bps), |
|
|
|
ref Unsafe.Add(ref bRef, y * WebpConstants.Bps), |
|
|
|
sum); |
|
|
|
fixed (byte* bRef = &MemoryMarshal.GetReference(b)) |
|
|
|
{ |
|
|
|
for (int y = 0; y < 16; y++) |
|
|
|
{ |
|
|
|
sum = AccumulateSSE16Neon(aRef + (y * WebpConstants.Bps), bRef + (y * WebpConstants.Bps), sum); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
#if NET7_0_OR_GREATER
|
|
|
|
@ -237,17 +238,18 @@ internal static class LossyUtils |
|
|
|
} |
|
|
|
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)] |
|
|
|
private static int Vp8_Sse16x8_Neon(Span<byte> a, Span<byte> b) |
|
|
|
private static unsafe int Vp8_Sse16x8_Neon(Span<byte> a, Span<byte> b) |
|
|
|
{ |
|
|
|
Vector128<uint> sum = Vector128<uint>.Zero; |
|
|
|
ref byte aRef = ref MemoryMarshal.GetReference(a); |
|
|
|
ref byte bRef = ref MemoryMarshal.GetReference(b); |
|
|
|
for (int y = 0; y < 8; y++) |
|
|
|
fixed (byte* aRef = &MemoryMarshal.GetReference(a)) |
|
|
|
{ |
|
|
|
sum = AccumulateSSE16Neon( |
|
|
|
ref Unsafe.Add(ref aRef, y * WebpConstants.Bps), |
|
|
|
ref Unsafe.Add(ref bRef, y * WebpConstants.Bps), |
|
|
|
sum); |
|
|
|
fixed (byte* bRef = &MemoryMarshal.GetReference(b)) |
|
|
|
{ |
|
|
|
for (int y = 0; y < 8; y++) |
|
|
|
{ |
|
|
|
sum = AccumulateSSE16Neon(aRef + (y * WebpConstants.Bps), bRef + (y * WebpConstants.Bps), sum); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
#if NET7_0_OR_GREATER
|
|
|
|
@ -296,10 +298,10 @@ internal static class LossyUtils |
|
|
|
} |
|
|
|
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)] |
|
|
|
private static Vector128<uint> AccumulateSSE16Neon(ref byte aRef, ref byte bRef, Vector128<uint> sum) |
|
|
|
private static unsafe Vector128<uint> AccumulateSSE16Neon(byte* a, byte* b, Vector128<uint> sum) |
|
|
|
{ |
|
|
|
Vector128<byte> a0 = Unsafe.As<byte, Vector128<byte>>(ref aRef); |
|
|
|
Vector128<byte> b0 = Unsafe.As<byte, Vector128<byte>>(ref bRef); |
|
|
|
Vector128<byte> a0 = AdvSimd.LoadVector128(a); |
|
|
|
Vector128<byte> b0 = AdvSimd.LoadVector128(b); |
|
|
|
|
|
|
|
Vector128<byte> absDiff = AdvSimd.AbsoluteDifference(a0, b0); |
|
|
|
Vector64<byte> absDiffLower = absDiff.GetLower(); |
|
|
|
|