diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs index 4eeeedd376..8da176e770 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs @@ -3,6 +3,11 @@ using System; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif namespace SixLabors.ImageSharp.Formats.Webp.Lossy { @@ -11,6 +16,16 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy /// internal class Vp8Residual { +#if SUPPORTS_RUNTIME_INTRINSICS + private static readonly Vector128 Cst2 = Vector128.Create((byte)2); + + private static readonly Vector128 Cst67 = Vector128.Create((byte)67); +#endif + + private readonly byte[] scratch = new byte[32]; + + private readonly ushort[] scratchUShort = new ushort[16]; + public int First { get; set; } public int Last { get; set; } @@ -129,27 +144,82 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy return LossyUtils.Vp8BitCost(0, (byte)p0); } - int v; - for (; n < this.Last; ++n) +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse2.IsSupported) { + Span ctxs = this.scratch.AsSpan(0, 16); + Span levels = this.scratch.AsSpan(16, 16); + Span absLevels = this.scratchUShort.AsSpan(); + + // Precompute clamped levels and contexts, packed to 8b. + ref short outputRef = ref MemoryMarshal.GetReference(this.Coeffs); + Vector128 c0 = Unsafe.As>(ref outputRef).AsInt16(); + Vector128 c1 = Unsafe.As>(ref Unsafe.Add(ref outputRef, 8)).AsInt16(); + Vector128 d0 = Sse2.Subtract(Vector128.Zero, c0); + Vector128 d1 = Sse2.Subtract(Vector128.Zero, c1); + Vector128 e0 = Sse2.Max(c0, d0); // abs(v), 16b + Vector128 e1 = Sse2.Max(c1, d1); + Vector128 f = Sse2.PackSignedSaturate(e0, e1); + Vector128 g = Sse2.Min(f.AsByte(), Cst2); + Vector128 h = Sse2.Min(f.AsByte(), Cst67); // clampLevel in [0..67] + + ref byte ctxsRef = ref MemoryMarshal.GetReference(ctxs); + ref byte levelsRef = ref MemoryMarshal.GetReference(levels); + ref ushort absLevelsRef = ref MemoryMarshal.GetReference(absLevels); + Unsafe.As>(ref ctxsRef) = g; + Unsafe.As>(ref levelsRef) = h; + Unsafe.As>(ref absLevelsRef) = e0.AsUInt16(); + Unsafe.As>(ref Unsafe.Add(ref absLevelsRef, 8)) = e1.AsUInt16(); + + int level; + int flevel; + for (; n < this.Last; ++n) + { + int ctx = ctxs[n]; + level = levels[n]; + flevel = absLevels[n]; + cost += WebpLookupTables.Vp8LevelFixedCosts[flevel] + t.Costs[level]; + t = costs[n + 1].Costs[ctx]; + } + + // Last coefficient is always non-zero. + level = levels[n]; + flevel = absLevels[n]; + cost += WebpLookupTables.Vp8LevelFixedCosts[flevel] + t.Costs[level]; + if (n < 15) + { + int b = WebpConstants.Vp8EncBands[n + 1]; + int ctx = ctxs[n]; + int lastP0 = this.Prob[b].Probabilities[ctx].Probabilities[0]; + cost += LossyUtils.Vp8BitCost(0, (byte)lastP0); + } + + return cost; + } +#endif + { + int v; + for (; n < this.Last; ++n) + { + v = Math.Abs(this.Coeffs[n]); + int ctx = v >= 2 ? 2 : v; + cost += LevelCost(t.Costs, v); + t = costs[n + 1].Costs[ctx]; + } + + // Last coefficient is always non-zero v = Math.Abs(this.Coeffs[n]); - int ctx = v >= 2 ? 2 : v; cost += LevelCost(t.Costs, v); - t = costs[n + 1].Costs[ctx]; - } + if (n < 15) + { + int b = WebpConstants.Vp8EncBands[n + 1]; + int ctx = v == 1 ? 1 : 2; + int lastP0 = this.Prob[b].Probabilities[ctx].Probabilities[0]; + cost += LossyUtils.Vp8BitCost(0, (byte)lastP0); + } - // Last coefficient is always non-zero - v = Math.Abs(this.Coeffs[n]); - cost += LevelCost(t.Costs, v); - if (n < 15) - { - int b = WebpConstants.Vp8EncBands[n + 1]; - int ctx = v == 1 ? 1 : 2; - int lastP0 = this.Prob[b].Probabilities[ctx].Probabilities[0]; - cost += LossyUtils.Vp8BitCost(0, (byte)lastP0); + return cost; } - - return cost; } [MethodImpl(InliningOptions.ShortMethod)]