From 7eb6b238b30bec1781d96a90959c42cb5151f69a Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 19 Dec 2021 20:23:52 +0100 Subject: [PATCH] Add AVX2 version of GetResidualCost --- .../Formats/Webp/Lossy/Vp8Residual.cs | 28 ++++++++----------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs index 8da176e770..0c0f9b02aa 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs @@ -17,9 +17,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy internal class Vp8Residual { #if SUPPORTS_RUNTIME_INTRINSICS - private static readonly Vector128 Cst2 = Vector128.Create((byte)2); + private static readonly Vector256 Cst2 = Vector256.Create((byte)2); - private static readonly Vector128 Cst67 = Vector128.Create((byte)67); + private static readonly Vector256 Cst67 = Vector256.Create((byte)67); #endif private readonly byte[] scratch = new byte[32]; @@ -145,7 +145,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy } #if SUPPORTS_RUNTIME_INTRINSICS - if (Sse2.IsSupported) + if (Avx2.IsSupported) { Span ctxs = this.scratch.AsSpan(0, 16); Span levels = this.scratch.AsSpan(16, 16); @@ -153,23 +153,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy // Precompute clamped levels and contexts, packed to 8b. ref short outputRef = ref MemoryMarshal.GetReference(this.Coeffs); - Vector128 c0 = Unsafe.As>(ref outputRef).AsInt16(); - Vector128 c1 = Unsafe.As>(ref Unsafe.Add(ref outputRef, 8)).AsInt16(); - Vector128 d0 = Sse2.Subtract(Vector128.Zero, c0); - Vector128 d1 = Sse2.Subtract(Vector128.Zero, c1); - Vector128 e0 = Sse2.Max(c0, d0); // abs(v), 16b - Vector128 e1 = Sse2.Max(c1, d1); - Vector128 f = Sse2.PackSignedSaturate(e0, e1); - Vector128 g = Sse2.Min(f.AsByte(), Cst2); - Vector128 h = Sse2.Min(f.AsByte(), Cst67); // clampLevel in [0..67] + Vector256 c0 = Unsafe.As>(ref outputRef).AsInt16(); + Vector256 d0 = Avx2.Subtract(Vector256.Zero, c0); + Vector256 e0 = Avx2.Max(c0, d0); // abs(v), 16b + Vector256 f = Avx2.PackSignedSaturate(e0, e0); + Vector256 g = Avx2.Min(f.AsByte(), Cst2); + Vector256 h = Avx2.Min(f.AsByte(), Cst67); // clampLevel in [0..67] ref byte ctxsRef = ref MemoryMarshal.GetReference(ctxs); ref byte levelsRef = ref MemoryMarshal.GetReference(levels); ref ushort absLevelsRef = ref MemoryMarshal.GetReference(absLevels); - Unsafe.As>(ref ctxsRef) = g; - Unsafe.As>(ref levelsRef) = h; - Unsafe.As>(ref absLevelsRef) = e0.AsUInt16(); - Unsafe.As>(ref Unsafe.Add(ref absLevelsRef, 8)) = e1.AsUInt16(); + Unsafe.As>(ref ctxsRef) = g.GetLower(); + Unsafe.As>(ref levelsRef) = h.GetLower(); + Unsafe.As>(ref absLevelsRef) = e0.AsUInt16(); int level; int flevel;