From 1033297a37519b56729b7a5ba54259ba1fcb4de4 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Tue, 19 Jan 2021 18:19:31 +0100 Subject: [PATCH] Add initial FMA resize kernel convolve implementation --- .../Transforms/Resize/ResizeKernel.cs | 58 +++++++++++++++---- 1 file changed, 48 insertions(+), 10 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernel.cs b/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernel.cs index d94aeffe69..bff2c574a6 100644 --- a/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernel.cs +++ b/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernel.cs @@ -4,6 +4,10 @@ using System; using System.Numerics; using System.Runtime.CompilerServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif namespace SixLabors.ImageSharp.Processing.Processors.Transforms { @@ -66,21 +70,55 @@ namespace SixLabors.ImageSharp.Processing.Processors.Transforms [MethodImpl(InliningOptions.ShortMethod)] public Vector4 ConvolveCore(ref Vector4 rowStartRef) { - ref float horizontalValues = ref Unsafe.AsRef(this.bufferPtr); +#if SUPPORTS_RUNTIME_INTRINSICS + if (Fma.IsSupported) + { + float* bufferStart = this.bufferPtr; + float* bufferEnd = bufferStart + (this.Length & ~1); + Vector256 result256 = Vector256.Zero; - // Destination color components - Vector4 result = Vector4.Zero; + while (bufferStart < bufferEnd) + { + Vector256 rowItem256 = Unsafe.As>(ref rowStartRef); + var bufferItem256 = Vector256.Create(Vector128.Create(bufferStart[0]), Vector128.Create(bufferStart[1])); - for (int i = 0; i < this.Length; i++) - { - float weight = Unsafe.Add(ref horizontalValues, i); + result256 = Fma.MultiplyAdd(rowItem256, bufferItem256, result256); + + bufferStart += 2; + rowStartRef = ref Unsafe.Add(ref rowStartRef, 2); + } + + Vector128 result128 = Sse.Add(result256.GetLower(), result256.GetUpper()); + + if ((this.Length & 1) != 0) + { + Vector128 rowItem128 = Unsafe.As>(ref rowStartRef); + var bufferItem128 = Vector128.Create(*bufferStart); - // Vector4 v = offsetedRowSpan[i]; - Vector4 v = Unsafe.Add(ref rowStartRef, i); - result += v * weight; + result128 = Fma.MultiplyAdd(rowItem128, bufferItem128, result128); + } + + return *(Vector4*)&result128; } + else +#endif + { + // Destination color components + Vector4 result = Vector4.Zero; + float* bufferStart = this.bufferPtr; + float* bufferEnd = this.bufferPtr + this.Length; + + while (bufferStart < bufferEnd) + { + // Vector4 v = offsetedRowSpan[i]; + result += rowStartRef * *bufferStart; - return result; + rowStartRef = ref Unsafe.Add(ref rowStartRef, 1); + bufferStart++; + } + + return result; + } } ///