Browse Source

Add SSE2 version of GetResidualCost

pull/1902/head
Brian Popow 4 years ago
parent
commit
8e05f5dba1
  1. 102
      src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs

102
src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs

@ -3,6 +3,11 @@
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
namespace SixLabors.ImageSharp.Formats.Webp.Lossy
{
@ -11,6 +16,16 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
/// </summary>
internal class Vp8Residual
{
#if SUPPORTS_RUNTIME_INTRINSICS
private static readonly Vector128<byte> Cst2 = Vector128.Create((byte)2);
private static readonly Vector128<byte> Cst67 = Vector128.Create((byte)67);
#endif
private readonly byte[] scratch = new byte[32];
private readonly ushort[] scratchUShort = new ushort[16];
public int First { get; set; }
public int Last { get; set; }
@ -129,27 +144,82 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
return LossyUtils.Vp8BitCost(0, (byte)p0);
}
int v;
for (; n < this.Last; ++n)
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse2.IsSupported)
{
Span<byte> ctxs = this.scratch.AsSpan(0, 16);
Span<byte> levels = this.scratch.AsSpan(16, 16);
Span<ushort> absLevels = this.scratchUShort.AsSpan();
// Precompute clamped levels and contexts, packed to 8b.
ref short outputRef = ref MemoryMarshal.GetReference<short>(this.Coeffs);
Vector128<short> c0 = Unsafe.As<short, Vector128<byte>>(ref outputRef).AsInt16();
Vector128<short> c1 = Unsafe.As<short, Vector128<byte>>(ref Unsafe.Add(ref outputRef, 8)).AsInt16();
Vector128<short> d0 = Sse2.Subtract(Vector128<short>.Zero, c0);
Vector128<short> d1 = Sse2.Subtract(Vector128<short>.Zero, c1);
Vector128<short> e0 = Sse2.Max(c0, d0); // abs(v), 16b
Vector128<short> e1 = Sse2.Max(c1, d1);
Vector128<sbyte> f = Sse2.PackSignedSaturate(e0, e1);
Vector128<byte> g = Sse2.Min(f.AsByte(), Cst2);
Vector128<byte> h = Sse2.Min(f.AsByte(), Cst67); // clampLevel in [0..67]
ref byte ctxsRef = ref MemoryMarshal.GetReference(ctxs);
ref byte levelsRef = ref MemoryMarshal.GetReference(levels);
ref ushort absLevelsRef = ref MemoryMarshal.GetReference(absLevels);
Unsafe.As<byte, Vector128<byte>>(ref ctxsRef) = g;
Unsafe.As<byte, Vector128<byte>>(ref levelsRef) = h;
Unsafe.As<ushort, Vector128<ushort>>(ref absLevelsRef) = e0.AsUInt16();
Unsafe.As<ushort, Vector128<ushort>>(ref Unsafe.Add(ref absLevelsRef, 8)) = e1.AsUInt16();
int level;
int flevel;
for (; n < this.Last; ++n)
{
int ctx = ctxs[n];
level = levels[n];
flevel = absLevels[n];
cost += WebpLookupTables.Vp8LevelFixedCosts[flevel] + t.Costs[level];
t = costs[n + 1].Costs[ctx];
}
// Last coefficient is always non-zero.
level = levels[n];
flevel = absLevels[n];
cost += WebpLookupTables.Vp8LevelFixedCosts[flevel] + t.Costs[level];
if (n < 15)
{
int b = WebpConstants.Vp8EncBands[n + 1];
int ctx = ctxs[n];
int lastP0 = this.Prob[b].Probabilities[ctx].Probabilities[0];
cost += LossyUtils.Vp8BitCost(0, (byte)lastP0);
}
return cost;
}
#endif
{
int v;
for (; n < this.Last; ++n)
{
v = Math.Abs(this.Coeffs[n]);
int ctx = v >= 2 ? 2 : v;
cost += LevelCost(t.Costs, v);
t = costs[n + 1].Costs[ctx];
}
// Last coefficient is always non-zero
v = Math.Abs(this.Coeffs[n]);
int ctx = v >= 2 ? 2 : v;
cost += LevelCost(t.Costs, v);
t = costs[n + 1].Costs[ctx];
}
if (n < 15)
{
int b = WebpConstants.Vp8EncBands[n + 1];
int ctx = v == 1 ? 1 : 2;
int lastP0 = this.Prob[b].Probabilities[ctx].Probabilities[0];
cost += LossyUtils.Vp8BitCost(0, (byte)lastP0);
}
// Last coefficient is always non-zero
v = Math.Abs(this.Coeffs[n]);
cost += LevelCost(t.Costs, v);
if (n < 15)
{
int b = WebpConstants.Vp8EncBands[n + 1];
int ctx = v == 1 ? 1 : 2;
int lastP0 = this.Prob[b].Probabilities[ctx].Probabilities[0];
cost += LossyUtils.Vp8BitCost(0, (byte)lastP0);
return cost;
}
return cost;
}
[MethodImpl(InliningOptions.ShortMethod)]

Loading…
Cancel
Save