From ae7306beb98367cdd42235605adfc3305490fc45 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Fri, 17 Feb 2023 14:24:28 +0100 Subject: [PATCH] Change arguments of AccumulateSSE16Neon to pointers for better code generation --- .../Formats/Webp/Lossy/LossyUtils.cs | 40 ++++++++++--------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index 13f5662e7..316c705e3 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -216,17 +216,18 @@ internal static class LossyUtils } [MethodImpl(InliningOptions.ShortMethod)] - private static int Vp8_Sse16x16_Neon(Span a, Span b) + private static unsafe int Vp8_Sse16x16_Neon(Span a, Span b) { Vector128 sum = Vector128.Zero; - ref byte aRef = ref MemoryMarshal.GetReference(a); - ref byte bRef = ref MemoryMarshal.GetReference(b); - for (int y = 0; y < 16; y++) + fixed (byte* aRef = &MemoryMarshal.GetReference(a)) { - sum = AccumulateSSE16Neon( - ref Unsafe.Add(ref aRef, y * WebpConstants.Bps), - ref Unsafe.Add(ref bRef, y * WebpConstants.Bps), - sum); + fixed (byte* bRef = &MemoryMarshal.GetReference(b)) + { + for (int y = 0; y < 16; y++) + { + sum = AccumulateSSE16Neon(aRef + (y * WebpConstants.Bps), bRef + (y * WebpConstants.Bps), sum); + } + } } #if NET7_0_OR_GREATER @@ -237,17 +238,18 @@ internal static class LossyUtils } [MethodImpl(InliningOptions.ShortMethod)] - private static int Vp8_Sse16x8_Neon(Span a, Span b) + private static unsafe int Vp8_Sse16x8_Neon(Span a, Span b) { Vector128 sum = Vector128.Zero; - ref byte aRef = ref MemoryMarshal.GetReference(a); - ref byte bRef = ref MemoryMarshal.GetReference(b); - for (int y = 0; y < 8; y++) + fixed (byte* aRef = &MemoryMarshal.GetReference(a)) { - sum = AccumulateSSE16Neon( - ref Unsafe.Add(ref aRef, y * WebpConstants.Bps), - ref Unsafe.Add(ref bRef, y * WebpConstants.Bps), - sum); + fixed (byte* bRef = &MemoryMarshal.GetReference(b)) + { + for (int y = 0; y < 8; y++) + { + sum = AccumulateSSE16Neon(aRef + (y * WebpConstants.Bps), bRef + (y * WebpConstants.Bps), sum); + } + } } #if NET7_0_OR_GREATER @@ -296,10 +298,10 @@ internal static class LossyUtils } [MethodImpl(InliningOptions.ShortMethod)] - private static Vector128 AccumulateSSE16Neon(ref byte aRef, ref byte bRef, Vector128 sum) + private static unsafe Vector128 AccumulateSSE16Neon(byte* a, byte* b, Vector128 sum) { - Vector128 a0 = Unsafe.As>(ref aRef); - Vector128 b0 = Unsafe.As>(ref bRef); + Vector128 a0 = AdvSimd.LoadVector128(a); + Vector128 b0 = AdvSimd.LoadVector128(b); Vector128 absDiff = AdvSimd.AbsoluteDifference(a0, b0); Vector64 absDiffLower = absDiff.GetLower();