From c825eccd10f14eb733cdbe4c75656005afae5aed Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Tue, 19 Jan 2021 19:14:51 +0100 Subject: [PATCH] Improved loading of factors using permutation Assembly for loading in the loop went from: ```asm vmovss xmm2, [rax] vbroadcastss xmm2, xmm2 vmovss xmm3, [rax+4] vbroadcastss xmm3, xmm3 vinsertf128 ymm2, ymm2, xmm3, 1 ``` To: ```asm vmovsd xmm3, [rax] vbroadcastsd ymm3, xmm3 vpermps ymm3, ymm1, ymm3 ``` --- .../Processing/Processors/Transforms/Resize/ResizeKernel.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernel.cs b/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernel.cs index bff2c574a..02027f42d 100644 --- a/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernel.cs +++ b/src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernel.cs @@ -76,11 +76,12 @@ namespace SixLabors.ImageSharp.Processing.Processors.Transforms float* bufferStart = this.bufferPtr; float* bufferEnd = bufferStart + (this.Length & ~1); Vector256 result256 = Vector256.Zero; + var mask = Vector256.Create(0, 0, 0, 0, 1, 1, 1, 1); while (bufferStart < bufferEnd) { Vector256 rowItem256 = Unsafe.As>(ref rowStartRef); - var bufferItem256 = Vector256.Create(Vector128.Create(bufferStart[0]), Vector128.Create(bufferStart[1])); + Vector256 bufferItem256 = Avx2.PermuteVar8x32(Vector256.Create(*(double*)bufferStart).AsSingle(), mask); result256 = Fma.MultiplyAdd(rowItem256, bufferItem256, result256);