|
|
|
@ -3,6 +3,11 @@ |
|
|
|
|
|
|
|
using System; |
|
|
|
using System.Runtime.CompilerServices; |
|
|
|
using System.Runtime.InteropServices; |
|
|
|
#if SUPPORTS_RUNTIME_INTRINSICS
|
|
|
|
using System.Runtime.Intrinsics; |
|
|
|
using System.Runtime.Intrinsics.X86; |
|
|
|
#endif
|
|
|
|
|
|
|
|
namespace SixLabors.ImageSharp.Formats.Webp.Lossy |
|
|
|
{ |
|
|
|
@ -11,6 +16,16 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy |
|
|
|
/// </summary>
|
|
|
|
internal class Vp8Residual |
|
|
|
{ |
|
|
|
#if SUPPORTS_RUNTIME_INTRINSICS
|
|
|
|
private static readonly Vector256<byte> Cst2 = Vector256.Create((byte)2); |
|
|
|
|
|
|
|
private static readonly Vector256<byte> Cst67 = Vector256.Create((byte)67); |
|
|
|
#endif
|
|
|
|
|
|
|
|
private readonly byte[] scratch = new byte[32]; |
|
|
|
|
|
|
|
private readonly ushort[] scratchUShort = new ushort[16]; |
|
|
|
|
|
|
|
public int First { get; set; } |
|
|
|
|
|
|
|
public int Last { get; set; } |
|
|
|
@ -37,14 +52,39 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy |
|
|
|
|
|
|
|
public void SetCoeffs(Span<short> coeffs) |
|
|
|
{ |
|
|
|
int n; |
|
|
|
this.Last = -1; |
|
|
|
for (n = 15; n >= 0; --n) |
|
|
|
#if SUPPORTS_RUNTIME_INTRINSICS
|
|
|
|
if (Sse2.IsSupported) |
|
|
|
{ |
|
|
|
ref short coeffsRef = ref MemoryMarshal.GetReference(coeffs); |
|
|
|
Vector128<byte> c0 = Unsafe.As<short, Vector128<byte>>(ref coeffsRef); |
|
|
|
Vector128<byte> c1 = Unsafe.As<short, Vector128<byte>>(ref Unsafe.Add(ref coeffsRef, 8)); |
|
|
|
|
|
|
|
// Use SSE2 to compare 16 values with a single instruction.
|
|
|
|
Vector128<sbyte> m0 = Sse2.PackSignedSaturate(c0.AsInt16(), c1.AsInt16()); |
|
|
|
Vector128<sbyte> m1 = Sse2.CompareEqual(m0, Vector128<sbyte>.Zero); |
|
|
|
|
|
|
|
// Get the comparison results as a bitmask into 16bits. Negate the mask to get
|
|
|
|
// the position of entries that are not equal to zero. We don't need to mask
|
|
|
|
// out least significant bits according to res->first, since coeffs[0] is 0
|
|
|
|
// if res->first > 0.
|
|
|
|
uint mask = 0x0000ffffu ^ (uint)Sse2.MoveMask(m1); |
|
|
|
|
|
|
|
// The position of the most significant non-zero bit indicates the position of
|
|
|
|
// the last non-zero value.
|
|
|
|
this.Last = mask != 0 ? Numerics.Log2(mask) : -1; |
|
|
|
} |
|
|
|
else |
|
|
|
#endif
|
|
|
|
{ |
|
|
|
if (coeffs[n] != 0) |
|
|
|
int n; |
|
|
|
this.Last = -1; |
|
|
|
for (n = 15; n >= 0; --n) |
|
|
|
{ |
|
|
|
this.Last = n; |
|
|
|
break; |
|
|
|
if (coeffs[n] != 0) |
|
|
|
{ |
|
|
|
this.Last = n; |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
@ -129,27 +169,78 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy |
|
|
|
return LossyUtils.Vp8BitCost(0, (byte)p0); |
|
|
|
} |
|
|
|
|
|
|
|
int v; |
|
|
|
for (; n < this.Last; ++n) |
|
|
|
#if SUPPORTS_RUNTIME_INTRINSICS
|
|
|
|
if (Avx2.IsSupported) |
|
|
|
{ |
|
|
|
Span<byte> ctxs = this.scratch.AsSpan(0, 16); |
|
|
|
Span<byte> levels = this.scratch.AsSpan(16, 16); |
|
|
|
Span<ushort> absLevels = this.scratchUShort.AsSpan(); |
|
|
|
|
|
|
|
// Precompute clamped levels and contexts, packed to 8b.
|
|
|
|
ref short outputRef = ref MemoryMarshal.GetReference<short>(this.Coeffs); |
|
|
|
Vector256<short> c0 = Unsafe.As<short, Vector256<byte>>(ref outputRef).AsInt16(); |
|
|
|
Vector256<short> d0 = Avx2.Subtract(Vector256<short>.Zero, c0); |
|
|
|
Vector256<short> e0 = Avx2.Max(c0, d0); // abs(v), 16b
|
|
|
|
Vector256<sbyte> f = Avx2.PackSignedSaturate(e0, e0); |
|
|
|
Vector256<byte> g = Avx2.Min(f.AsByte(), Cst2); |
|
|
|
Vector256<byte> h = Avx2.Min(f.AsByte(), Cst67); // clampLevel in [0..67]
|
|
|
|
|
|
|
|
ref byte ctxsRef = ref MemoryMarshal.GetReference(ctxs); |
|
|
|
ref byte levelsRef = ref MemoryMarshal.GetReference(levels); |
|
|
|
ref ushort absLevelsRef = ref MemoryMarshal.GetReference(absLevels); |
|
|
|
Unsafe.As<byte, Vector128<byte>>(ref ctxsRef) = g.GetLower(); |
|
|
|
Unsafe.As<byte, Vector128<byte>>(ref levelsRef) = h.GetLower(); |
|
|
|
Unsafe.As<ushort, Vector256<ushort>>(ref absLevelsRef) = e0.AsUInt16(); |
|
|
|
|
|
|
|
int level; |
|
|
|
int flevel; |
|
|
|
for (; n < this.Last; ++n) |
|
|
|
{ |
|
|
|
int ctx = ctxs[n]; |
|
|
|
level = levels[n]; |
|
|
|
flevel = absLevels[n]; |
|
|
|
cost += WebpLookupTables.Vp8LevelFixedCosts[flevel] + t.Costs[level]; |
|
|
|
t = costs[n + 1].Costs[ctx]; |
|
|
|
} |
|
|
|
|
|
|
|
// Last coefficient is always non-zero.
|
|
|
|
level = levels[n]; |
|
|
|
flevel = absLevels[n]; |
|
|
|
cost += WebpLookupTables.Vp8LevelFixedCosts[flevel] + t.Costs[level]; |
|
|
|
if (n < 15) |
|
|
|
{ |
|
|
|
int b = WebpConstants.Vp8EncBands[n + 1]; |
|
|
|
int ctx = ctxs[n]; |
|
|
|
int lastP0 = this.Prob[b].Probabilities[ctx].Probabilities[0]; |
|
|
|
cost += LossyUtils.Vp8BitCost(0, (byte)lastP0); |
|
|
|
} |
|
|
|
|
|
|
|
return cost; |
|
|
|
} |
|
|
|
#endif
|
|
|
|
{ |
|
|
|
int v; |
|
|
|
for (; n < this.Last; ++n) |
|
|
|
{ |
|
|
|
v = Math.Abs(this.Coeffs[n]); |
|
|
|
int ctx = v >= 2 ? 2 : v; |
|
|
|
cost += LevelCost(t.Costs, v); |
|
|
|
t = costs[n + 1].Costs[ctx]; |
|
|
|
} |
|
|
|
|
|
|
|
// Last coefficient is always non-zero
|
|
|
|
v = Math.Abs(this.Coeffs[n]); |
|
|
|
int ctx = v >= 2 ? 2 : v; |
|
|
|
cost += LevelCost(t.Costs, v); |
|
|
|
t = costs[n + 1].Costs[ctx]; |
|
|
|
} |
|
|
|
if (n < 15) |
|
|
|
{ |
|
|
|
int b = WebpConstants.Vp8EncBands[n + 1]; |
|
|
|
int ctx = v == 1 ? 1 : 2; |
|
|
|
int lastP0 = this.Prob[b].Probabilities[ctx].Probabilities[0]; |
|
|
|
cost += LossyUtils.Vp8BitCost(0, (byte)lastP0); |
|
|
|
} |
|
|
|
|
|
|
|
// Last coefficient is always non-zero
|
|
|
|
v = Math.Abs(this.Coeffs[n]); |
|
|
|
cost += LevelCost(t.Costs, v); |
|
|
|
if (n < 15) |
|
|
|
{ |
|
|
|
int b = WebpConstants.Vp8EncBands[n + 1]; |
|
|
|
int ctx = v == 1 ? 1 : 2; |
|
|
|
int lastP0 = this.Prob[b].Probabilities[ctx].Probabilities[0]; |
|
|
|
cost += LossyUtils.Vp8BitCost(0, (byte)lastP0); |
|
|
|
return cost; |
|
|
|
} |
|
|
|
|
|
|
|
return cost; |
|
|
|
} |
|
|
|
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)] |
|
|
|
|