Browse Source

Merge branch 'master' into af/UniformUnmanagedMemoryPoolMemoryAllocator-02

af/UniformUnmanagedMemoryPoolMemoryAllocator-02-MemoryGuards
Anton Firszov 5 years ago
parent
commit
251af068d6
  1. 10
      src/ImageSharp/Formats/Webp/BitReader/Vp8BitReader.cs
  2. 24
      src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs
  3. 10
      src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs
  4. 41
      src/ImageSharp/Formats/Webp/Lossless/HistogramEncoder.cs
  5. 9
      src/ImageSharp/Formats/Webp/Lossless/HuffmanTree.cs
  6. 7
      src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs
  7. 123
      src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs
  8. 6
      src/ImageSharp/Formats/Webp/Lossless/PixOrCopy.cs
  9. 194
      src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs
  10. 35
      src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs
  11. 57
      src/ImageSharp/Formats/Webp/Lossless/Vp8LHistogram.cs
  12. 9
      src/ImageSharp/Formats/Webp/Lossless/Vp8LStreaks.cs
  13. 3
      src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs
  14. 99
      src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs
  15. 86
      src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs
  16. 27
      src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs
  17. 18
      src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs
  18. 54
      src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs
  19. 23
      src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs
  20. 18
      src/ImageSharp/Formats/Webp/Lossy/Vp8ModeScore.cs
  21. 7
      src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs
  22. 30
      src/ImageSharp/Formats/Webp/Lossy/WebpLossyDecoder.cs
  23. 56
      src/ImageSharp/Formats/Webp/WebpCommonUtils.cs
  24. 267
      src/ImageSharp/Formats/Webp/WebpLookupTables.cs
  25. 30
      tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs
  26. 6
      tests/ImageSharp.Tests/Formats/WebP/PredictorEncoderTests.cs

10
src/ImageSharp/Formats/Webp/BitReader/Vp8BitReader.cs

@ -142,10 +142,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitReader
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public bool ReadBool() => this.ReadValue(1) is 1; public bool ReadBool() => this.ReadValue(1) is 1;
[MethodImpl(InliningOptions.ShortMethod)]
public uint ReadValue(int nBits) public uint ReadValue(int nBits)
{ {
Guard.MustBeGreaterThan(nBits, 0, nameof(nBits)); DebugGuard.MustBeGreaterThan(nBits, 0, nameof(nBits));
Guard.MustBeLessThanOrEqualTo(nBits, 32, nameof(nBits)); DebugGuard.MustBeLessThanOrEqualTo(nBits, 32, nameof(nBits));
uint v = 0; uint v = 0;
while (nBits-- > 0) while (nBits-- > 0)
@ -156,10 +157,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitReader
return v; return v;
} }
[MethodImpl(InliningOptions.ShortMethod)]
public int ReadSignedValue(int nBits) public int ReadSignedValue(int nBits)
{ {
Guard.MustBeGreaterThan(nBits, 0, nameof(nBits)); DebugGuard.MustBeGreaterThan(nBits, 0, nameof(nBits));
Guard.MustBeLessThanOrEqualTo(nBits, 32, nameof(nBits)); DebugGuard.MustBeLessThanOrEqualTo(nBits, 32, nameof(nBits));
int value = (int)this.ReadValue(nBits); int value = (int)this.ReadValue(nBits);
return this.ReadValue(1) != 0 ? -value : value; return this.ReadValue(1) != 0 ? -value : value;

24
src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs

@ -28,7 +28,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitReader
/// </summary> /// </summary>
private const int Wbits = 32; private const int Wbits = 32;
private readonly uint[] bitMask = private static readonly uint[] BitMask =
{ {
0, 0,
0x000001, 0x000003, 0x000007, 0x00000f, 0x000001, 0x000003, 0x000007, 0x00000f,
@ -125,19 +125,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitReader
/// </summary> /// </summary>
/// <param name="nBits">The number of bits to read (should not exceed 16).</param> /// <param name="nBits">The number of bits to read (should not exceed 16).</param>
/// <returns>A ushort value.</returns> /// <returns>A ushort value.</returns>
[MethodImpl(InliningOptions.ShortMethod)]
public uint ReadValue(int nBits) public uint ReadValue(int nBits)
{ {
Guard.MustBeGreaterThan(nBits, 0, nameof(nBits)); DebugGuard.MustBeGreaterThan(nBits, 0, nameof(nBits));
if (!this.Eos && nBits <= Vp8LMaxNumBitRead) if (!this.Eos && nBits <= Vp8LMaxNumBitRead)
{ {
ulong val = this.PrefetchBits() & this.bitMask[nBits]; ulong val = this.PrefetchBits() & BitMask[nBits];
this.bitPos += nBits; this.bitPos += nBits;
this.ShiftBytes(); this.ShiftBytes();
return (uint)val; return (uint)val;
} }
this.SetEndOfStream();
return 0; return 0;
} }
@ -169,6 +169,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitReader
/// <summary> /// <summary>
/// Advances the read buffer by 4 bytes to make room for reading next 32 bits. /// Advances the read buffer by 4 bytes to make room for reading next 32 bits.
/// </summary> /// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public void FillBitWindow() public void FillBitWindow()
{ {
if (this.bitPos >= Wbits) if (this.bitPos >= Wbits)
@ -181,7 +182,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitReader
/// Returns true if there was an attempt at reading bit past the end of the buffer. /// Returns true if there was an attempt at reading bit past the end of the buffer.
/// </summary> /// </summary>
/// <returns>True, if end of buffer was reached.</returns> /// <returns>True, if end of buffer was reached.</returns>
public bool IsEndOfStream() => this.Eos || ((this.pos == this.len) && (this.bitPos > Lbits)); [MethodImpl(InliningOptions.ShortMethod)]
public bool IsEndOfStream() => this.Eos || (this.pos == this.len && this.bitPos > Lbits);
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
private void DoFillBitWindow() => this.ShiftBytes(); private void DoFillBitWindow() => this.ShiftBytes();
@ -189,6 +191,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitReader
/// <summary> /// <summary>
/// If not at EOS, reload up to Vp8LLbits byte-by-byte. /// If not at EOS, reload up to Vp8LLbits byte-by-byte.
/// </summary> /// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
private void ShiftBytes() private void ShiftBytes()
{ {
System.Span<byte> dataSpan = this.Data.Memory.Span; System.Span<byte> dataSpan = this.Data.Memory.Span;
@ -199,17 +202,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitReader
++this.pos; ++this.pos;
this.bitPos -= 8; this.bitPos -= 8;
} }
if (this.IsEndOfStream())
{
this.SetEndOfStream();
}
}
private void SetEndOfStream()
{
this.Eos = true;
this.bitPos = 0; // To avoid undefined behaviour with shifts.
} }
} }
} }

10
src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs

@ -49,6 +49,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
double bitCostBest = -1; double bitCostBest = -1;
int cacheBitsInitial = cacheBits; int cacheBitsInitial = cacheBits;
Vp8LHashChain hashChainBox = null; Vp8LHashChain hashChainBox = null;
var stats = new Vp8LStreaks();
var bitsEntropy = new Vp8LBitEntropy();
for (int lz77Type = 1; lz77TypesToTry > 0; lz77TypesToTry &= ~lz77Type, lz77Type <<= 1) for (int lz77Type = 1; lz77TypesToTry > 0; lz77TypesToTry &= ~lz77Type, lz77Type <<= 1)
{ {
int cacheBitsTmp = cacheBitsInitial; int cacheBitsTmp = cacheBitsInitial;
@ -81,7 +83,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
// Keep the best backward references. // Keep the best backward references.
var histo = new Vp8LHistogram(worst, cacheBitsTmp); var histo = new Vp8LHistogram(worst, cacheBitsTmp);
double bitCost = histo.EstimateBits(); double bitCost = histo.EstimateBits(stats, bitsEntropy);
if (lz77TypeBest == 0 || bitCost < bitCostBest) if (lz77TypeBest == 0 || bitCost < bitCostBest)
{ {
@ -100,7 +102,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
Vp8LHashChain hashChainTmp = lz77TypeBest == (int)Vp8LLz77Type.Lz77Standard ? hashChain : hashChainBox; Vp8LHashChain hashChainTmp = lz77TypeBest == (int)Vp8LLz77Type.Lz77Standard ? hashChain : hashChainBox;
BackwardReferencesTraceBackwards(width, height, bgra, cacheBits, hashChainTmp, best, worst); BackwardReferencesTraceBackwards(width, height, bgra, cacheBits, hashChainTmp, best, worst);
var histo = new Vp8LHistogram(worst, cacheBits); var histo = new Vp8LHistogram(worst, cacheBits);
double bitCostTrace = histo.EstimateBits(); double bitCostTrace = histo.EstimateBits(stats, bitsEntropy);
if (bitCostTrace < bitCostBest) if (bitCostTrace < bitCostBest)
{ {
best = worst; best = worst;
@ -214,9 +216,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
} }
var stats = new Vp8LStreaks();
var bitsEntropy = new Vp8LBitEntropy();
for (int i = 0; i <= cacheBitsMax; i++) for (int i = 0; i <= cacheBitsMax; i++)
{ {
double entropy = histos[i].EstimateBits(); double entropy = histos[i].EstimateBits(stats, bitsEntropy);
if (i == 0 || entropy < entropyMin) if (i == 0 || entropy < entropyMin)
{ {
entropyMin = entropy; entropyMin = entropy;

41
src/ImageSharp/Formats/Webp/Lossless/HistogramEncoder.cs

@ -152,10 +152,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private static int HistogramCopyAndAnalyze(List<Vp8LHistogram> origHistograms, List<Vp8LHistogram> histograms, ushort[] histogramSymbols) private static int HistogramCopyAndAnalyze(List<Vp8LHistogram> origHistograms, List<Vp8LHistogram> histograms, ushort[] histogramSymbols)
{ {
var stats = new Vp8LStreaks();
var bitsEntropy = new Vp8LBitEntropy();
for (int clusterId = 0, i = 0; i < origHistograms.Count; i++) for (int clusterId = 0, i = 0; i < origHistograms.Count; i++)
{ {
Vp8LHistogram origHistogram = origHistograms[i]; Vp8LHistogram origHistogram = origHistograms[i];
origHistogram.UpdateHistogramCost(); origHistogram.UpdateHistogramCost(stats, bitsEntropy);
// Skip the histogram if it is completely empty, which can happen for tiles with no information (when they are skipped because of LZ77). // Skip the histogram if it is completely empty, which can happen for tiles with no information (when they are skipped because of LZ77).
if (!origHistogram.IsUsed[0] && !origHistogram.IsUsed[1] && !origHistogram.IsUsed[2] && !origHistogram.IsUsed[3] && !origHistogram.IsUsed[4]) if (!origHistogram.IsUsed[0] && !origHistogram.IsUsed[1] && !origHistogram.IsUsed[2] && !origHistogram.IsUsed[3] && !origHistogram.IsUsed[4])
@ -175,7 +177,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return numUsed; return numUsed;
} }
private static void HistogramCombineEntropyBin(List<Vp8LHistogram> histograms, ushort[] clusters, ushort[] clusterMappings, Vp8LHistogram curCombo, ushort[] binMap, int numBins, double combineCostFactor) private static void HistogramCombineEntropyBin(
List<Vp8LHistogram> histograms,
ushort[] clusters,
ushort[] clusterMappings,
Vp8LHistogram curCombo,
ushort[] binMap,
int numBins,
double combineCostFactor)
{ {
var binInfo = new HistogramBinInfo[BinSize]; var binInfo = new HistogramBinInfo[BinSize];
for (int idx = 0; idx < numBins; idx++) for (int idx = 0; idx < numBins; idx++)
@ -191,6 +200,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
var indicesToRemove = new List<int>(); var indicesToRemove = new List<int>();
var stats = new Vp8LStreaks();
var bitsEntropy = new Vp8LBitEntropy();
for (int idx = 0; idx < histograms.Count; idx++) for (int idx = 0; idx < histograms.Count; idx++)
{ {
if (histograms[idx] == null) if (histograms[idx] == null)
@ -209,7 +220,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
// Try to merge #idx into #first (both share the same binId) // Try to merge #idx into #first (both share the same binId)
double bitCost = histograms[idx].BitCost; double bitCost = histograms[idx].BitCost;
double bitCostThresh = -bitCost * combineCostFactor; double bitCostThresh = -bitCost * combineCostFactor;
double currCostDiff = histograms[first].AddEval(histograms[idx], bitCostThresh, curCombo); double currCostDiff = histograms[first].AddEval(histograms[idx], stats, bitsEntropy, bitCostThresh, curCombo);
if (currCostDiff < bitCostThresh) if (currCostDiff < bitCostThresh)
{ {
@ -308,6 +319,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int numUsed = histograms.Count(h => h != null); int numUsed = histograms.Count(h => h != null);
int outerIters = numUsed; int outerIters = numUsed;
int numTriesNoSuccess = outerIters / 2; int numTriesNoSuccess = outerIters / 2;
var stats = new Vp8LStreaks();
var bitsEntropy = new Vp8LBitEntropy();
if (numUsed < minClusterSize) if (numUsed < minClusterSize)
{ {
@ -354,7 +367,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
idx2 = mappings[idx2]; idx2 = mappings[idx2];
// Calculate cost reduction on combination. // Calculate cost reduction on combination.
double currCost = HistoPriorityListPush(histoPriorityList, maxSize, histograms, idx1, idx2, bestCost); double currCost = HistoPriorityListPush(histoPriorityList, maxSize, histograms, idx1, idx2, bestCost, stats, bitsEntropy);
// Found a better pair? // Found a better pair?
if (currCost < 0) if (currCost < 0)
@ -428,7 +441,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
if (doEval) if (doEval)
{ {
// Re-evaluate the cost of an updated pair. // Re-evaluate the cost of an updated pair.
HistoListUpdatePair(histograms[p.Idx1], histograms[p.Idx2], 0.0d, p); HistoListUpdatePair(histograms[p.Idx1], histograms[p.Idx2], stats, bitsEntropy, 0.0d, p);
if (p.CostDiff >= 0.0d) if (p.CostDiff >= 0.0d)
{ {
histoPriorityList[j] = histoPriorityList[histoPriorityList.Count - 1]; histoPriorityList[j] = histoPriorityList[histoPriorityList.Count - 1];
@ -456,6 +469,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
// Priority list of histogram pairs. // Priority list of histogram pairs.
var histoPriorityList = new List<HistogramPair>(); var histoPriorityList = new List<HistogramPair>();
int maxSize = histoSize * histoSize; int maxSize = histoSize * histoSize;
var stats = new Vp8LStreaks();
var bitsEntropy = new Vp8LBitEntropy();
for (int i = 0; i < histoSize; i++) for (int i = 0; i < histoSize; i++)
{ {
@ -471,7 +486,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
continue; continue;
} }
HistoPriorityListPush(histoPriorityList, maxSize, histograms, i, j, 0.0d); HistoPriorityListPush(histoPriorityList, maxSize, histograms, i, j, 0.0d, stats, bitsEntropy);
} }
} }
@ -510,7 +525,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
continue; continue;
} }
HistoPriorityListPush(histoPriorityList, maxSize, histograms, idx1, i, 0.0d); HistoPriorityListPush(histoPriorityList, maxSize, histograms, idx1, i, 0.0d, stats, bitsEntropy);
} }
} }
} }
@ -519,6 +534,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
int inSize = input.Count; int inSize = input.Count;
int outSize = output.Count; int outSize = output.Count;
var stats = new Vp8LStreaks();
var bitsEntropy = new Vp8LBitEntropy();
if (outSize > 1) if (outSize > 1)
{ {
for (int i = 0; i < inSize; i++) for (int i = 0; i < inSize; i++)
@ -534,7 +551,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
double bestBits = double.MaxValue; double bestBits = double.MaxValue;
for (int k = 0; k < outSize; k++) for (int k = 0; k < outSize; k++)
{ {
double curBits = output[k].AddThresh(input[i], bestBits); double curBits = output[k].AddThresh(input[i], stats, bitsEntropy, bestBits);
if (k == 0 || curBits < bestBits) if (k == 0 || curBits < bestBits)
{ {
bestBits = curBits; bestBits = curBits;
@ -577,7 +594,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// Create a pair from indices "idx1" and "idx2" provided its cost is inferior to "threshold", a negative entropy. /// Create a pair from indices "idx1" and "idx2" provided its cost is inferior to "threshold", a negative entropy.
/// </summary> /// </summary>
/// <returns>The cost of the pair, or 0 if it superior to threshold.</returns> /// <returns>The cost of the pair, or 0 if it superior to threshold.</returns>
private static double HistoPriorityListPush(List<HistogramPair> histoList, int maxSize, List<Vp8LHistogram> histograms, int idx1, int idx2, double threshold) private static double HistoPriorityListPush(List<HistogramPair> histoList, int maxSize, List<Vp8LHistogram> histograms, int idx1, int idx2, double threshold, Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy)
{ {
var pair = new HistogramPair(); var pair = new HistogramPair();
@ -598,7 +615,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
Vp8LHistogram h1 = histograms[idx1]; Vp8LHistogram h1 = histograms[idx1];
Vp8LHistogram h2 = histograms[idx2]; Vp8LHistogram h2 = histograms[idx2];
HistoListUpdatePair(h1, h2, threshold, pair); HistoListUpdatePair(h1, h2, stats, bitsEntropy, threshold, pair);
// Do not even consider the pair if it does not improve the entropy. // Do not even consider the pair if it does not improve the entropy.
if (pair.CostDiff >= threshold) if (pair.CostDiff >= threshold)
@ -616,11 +633,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// <summary> /// <summary>
/// Update the cost diff and combo of a pair of histograms. This needs to be called when the the histograms have been merged with a third one. /// Update the cost diff and combo of a pair of histograms. This needs to be called when the the histograms have been merged with a third one.
/// </summary> /// </summary>
private static void HistoListUpdatePair(Vp8LHistogram h1, Vp8LHistogram h2, double threshold, HistogramPair pair) private static void HistoListUpdatePair(Vp8LHistogram h1, Vp8LHistogram h2, Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy, double threshold, HistogramPair pair)
{ {
double sumCost = h1.BitCost + h2.BitCost; double sumCost = h1.BitCost + h2.BitCost;
pair.CostCombo = 0.0d; pair.CostCombo = 0.0d;
h1.GetCombinedHistogramEntropy(h2, sumCost + threshold, costInitial: pair.CostCombo, out double cost); h1.GetCombinedHistogramEntropy(h2, stats, bitsEntropy, sumCost + threshold, costInitial: pair.CostCombo, out double cost);
pair.CostCombo = cost; pair.CostCombo = cost;
pair.CostDiff = pair.CostCombo - sumCost; pair.CostDiff = pair.CostCombo - sumCost;
} }

9
src/ImageSharp/Formats/Webp/Lossless/HuffmanTree.cs

@ -49,14 +49,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
return -1; return -1;
} }
else if (t1.TotalCount < t2.TotalCount)
if (t1.TotalCount < t2.TotalCount)
{ {
return 1; return 1;
} }
else
{ return t1.Value < t2.Value ? -1 : 1;
return t1.Value < t2.Value ? -1 : 1;
}
} }
public IDeepCloneable DeepClone() => new HuffmanTree(this); public IDeepCloneable DeepClone() => new HuffmanTree(this);

7
src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs

@ -202,9 +202,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
// Build the Huffman tree. // Build the Huffman tree.
HuffmanTree[] treeCopy = tree.AsSpan().Slice(0, treeSize).ToArray(); #if NET5_0_OR_GREATER
Span<HuffmanTree> treeSlice = tree.AsSpan(0, treeSize);
treeSlice.Sort(HuffmanTree.Compare);
#else
HuffmanTree[] treeCopy = tree.AsSpan(0, treeSize).ToArray();
Array.Sort(treeCopy, HuffmanTree.Compare); Array.Sort(treeCopy, HuffmanTree.Compare);
treeCopy.AsSpan().CopyTo(tree); treeCopy.AsSpan().CopyTo(tree);
#endif
if (treeSize > 1) if (treeSize > 1)
{ {

123
src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs

@ -27,6 +27,30 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private const double Log2Reciprocal = 1.44269504088896338700465094007086; private const double Log2Reciprocal = 1.44269504088896338700465094007086;
#if SUPPORTS_RUNTIME_INTRINSICS
private static readonly Vector256<byte> AddGreenToBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255);
private static readonly Vector128<byte> AddGreenToBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255);
private static readonly byte AddGreenToBlueAndRedShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
private static readonly Vector256<byte> SubtractGreenFromBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255);
private static readonly Vector128<byte> SubtractGreenFromBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255);
private static readonly byte SubtractGreenFromBlueAndRedShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
private static readonly Vector128<byte> TransformColorAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
private static readonly Vector128<byte> TransformColorRedBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
private static readonly byte TransformColorShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
private static readonly Vector128<byte> TransformColorInverseAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
private static readonly byte TransformColorInverseShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
#endif
/// <summary> /// <summary>
/// Returns the exact index where array1 and array2 are different. For an index /// Returns the exact index where array1 and array2 are different. For an index
/// inferior or equal to bestLenMatch, the return value just has to be strictly /// inferior or equal to bestLenMatch, the return value just has to be strictly
@ -97,7 +121,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
#if SUPPORTS_RUNTIME_INTRINSICS #if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported) if (Avx2.IsSupported)
{ {
var mask = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255);
int numPixels = pixelData.Length; int numPixels = pixelData.Length;
fixed (uint* p = pixelData) fixed (uint* p = pixelData)
{ {
@ -106,7 +129,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
uint* idx = p + i; uint* idx = p + i;
Vector256<byte> input = Avx.LoadVector256((ushort*)idx).AsByte(); Vector256<byte> input = Avx.LoadVector256((ushort*)idx).AsByte();
Vector256<byte> in0g0g = Avx2.Shuffle(input, mask); Vector256<byte> in0g0g = Avx2.Shuffle(input, AddGreenToBlueAndRedMaskAvx2);
Vector256<byte> output = Avx2.Add(input, in0g0g); Vector256<byte> output = Avx2.Add(input, in0g0g);
Avx.Store((byte*)idx, output); Avx.Store((byte*)idx, output);
} }
@ -119,7 +142,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
else if (Ssse3.IsSupported) else if (Ssse3.IsSupported)
{ {
var mask = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255);
int numPixels = pixelData.Length; int numPixels = pixelData.Length;
fixed (uint* p = pixelData) fixed (uint* p = pixelData)
{ {
@ -128,7 +150,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
uint* idx = p + i; uint* idx = p + i;
Vector128<byte> input = Sse2.LoadVector128((ushort*)idx).AsByte(); Vector128<byte> input = Sse2.LoadVector128((ushort*)idx).AsByte();
Vector128<byte> in0g0g = Ssse3.Shuffle(input, mask); Vector128<byte> in0g0g = Ssse3.Shuffle(input, AddGreenToBlueAndRedMaskSsse3);
Vector128<byte> output = Sse2.Add(input, in0g0g); Vector128<byte> output = Sse2.Add(input, in0g0g);
Sse2.Store((byte*)idx, output.AsByte()); Sse2.Store((byte*)idx, output.AsByte());
} }
@ -141,7 +163,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
else if (Sse2.IsSupported) else if (Sse2.IsSupported)
{ {
byte mask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
int numPixels = pixelData.Length; int numPixels = pixelData.Length;
fixed (uint* p = pixelData) fixed (uint* p = pixelData)
{ {
@ -151,8 +172,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
uint* idx = p + i; uint* idx = p + i;
Vector128<ushort> input = Sse2.LoadVector128((ushort*)idx); Vector128<ushort> input = Sse2.LoadVector128((ushort*)idx);
Vector128<ushort> a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g Vector128<ushort> a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g
Vector128<ushort> b = Sse2.ShuffleLow(a, mask); Vector128<ushort> b = Sse2.ShuffleLow(a, AddGreenToBlueAndRedShuffleMask);
Vector128<ushort> c = Sse2.ShuffleHigh(b, mask); // 0g0g Vector128<ushort> c = Sse2.ShuffleHigh(b, AddGreenToBlueAndRedShuffleMask); // 0g0g
Vector128<byte> output = Sse2.Add(input.AsByte(), c.AsByte()); Vector128<byte> output = Sse2.Add(input.AsByte(), c.AsByte());
Sse2.Store((byte*)idx, output); Sse2.Store((byte*)idx, output);
} }
@ -189,7 +210,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
#if SUPPORTS_RUNTIME_INTRINSICS #if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported) if (Avx2.IsSupported)
{ {
var mask = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255);
int numPixels = pixelData.Length; int numPixels = pixelData.Length;
fixed (uint* p = pixelData) fixed (uint* p = pixelData)
{ {
@ -198,7 +218,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
uint* idx = p + i; uint* idx = p + i;
Vector256<byte> input = Avx.LoadVector256((ushort*)idx).AsByte(); Vector256<byte> input = Avx.LoadVector256((ushort*)idx).AsByte();
Vector256<byte> in0g0g = Avx2.Shuffle(input, mask); Vector256<byte> in0g0g = Avx2.Shuffle(input, SubtractGreenFromBlueAndRedMaskAvx2);
Vector256<byte> output = Avx2.Subtract(input, in0g0g); Vector256<byte> output = Avx2.Subtract(input, in0g0g);
Avx.Store((byte*)idx, output); Avx.Store((byte*)idx, output);
} }
@ -211,7 +231,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
else if (Ssse3.IsSupported) else if (Ssse3.IsSupported)
{ {
var mask = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255);
int numPixels = pixelData.Length; int numPixels = pixelData.Length;
fixed (uint* p = pixelData) fixed (uint* p = pixelData)
{ {
@ -220,7 +239,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
uint* idx = p + i; uint* idx = p + i;
Vector128<byte> input = Sse2.LoadVector128((ushort*)idx).AsByte(); Vector128<byte> input = Sse2.LoadVector128((ushort*)idx).AsByte();
Vector128<byte> in0g0g = Ssse3.Shuffle(input, mask); Vector128<byte> in0g0g = Ssse3.Shuffle(input, SubtractGreenFromBlueAndRedMaskSsse3);
Vector128<byte> output = Sse2.Subtract(input, in0g0g); Vector128<byte> output = Sse2.Subtract(input, in0g0g);
Sse2.Store((byte*)idx, output.AsByte()); Sse2.Store((byte*)idx, output.AsByte());
} }
@ -233,7 +252,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
else if (Sse2.IsSupported) else if (Sse2.IsSupported)
{ {
byte mask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
int numPixels = pixelData.Length; int numPixels = pixelData.Length;
fixed (uint* p = pixelData) fixed (uint* p = pixelData)
{ {
@ -243,8 +261,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
uint* idx = p + i; uint* idx = p + i;
Vector128<ushort> input = Sse2.LoadVector128((ushort*)idx); Vector128<ushort> input = Sse2.LoadVector128((ushort*)idx);
Vector128<ushort> a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g Vector128<ushort> a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g
Vector128<ushort> b = Sse2.ShuffleLow(a, mask); Vector128<ushort> b = Sse2.ShuffleLow(a, SubtractGreenFromBlueAndRedShuffleMask);
Vector128<ushort> c = Sse2.ShuffleHigh(b, mask); // 0g0g Vector128<ushort> c = Sse2.ShuffleHigh(b, SubtractGreenFromBlueAndRedShuffleMask); // 0g0g
Vector128<byte> output = Sse2.Subtract(input.AsByte(), c.AsByte()); Vector128<byte> output = Sse2.Subtract(input.AsByte(), c.AsByte());
Sse2.Store((byte*)idx, output); Sse2.Store((byte*)idx, output);
} }
@ -394,9 +412,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
Vector128<int> multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); Vector128<int> multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue));
Vector128<int> multsb2 = MkCst16(Cst5b(m.RedToBlue), 0); Vector128<int> multsb2 = MkCst16(Cst5b(m.RedToBlue), 0);
var maskalphagreen = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
var maskredblue = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
byte shufflemask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
fixed (uint* src = data) fixed (uint* src = data)
{ {
int idx; int idx;
@ -404,15 +419,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
uint* pos = src + idx; uint* pos = src + idx;
Vector128<uint> input = Sse2.LoadVector128(pos); Vector128<uint> input = Sse2.LoadVector128(pos);
Vector128<byte> a = Sse2.And(input.AsByte(), maskalphagreen); Vector128<byte> a = Sse2.And(input.AsByte(), TransformColorAlphaGreenMask);
Vector128<short> b = Sse2.ShuffleLow(a.AsInt16(), shufflemask); Vector128<short> b = Sse2.ShuffleLow(a.AsInt16(), TransformColorShuffleMask);
Vector128<short> c = Sse2.ShuffleHigh(b.AsInt16(), shufflemask); Vector128<short> c = Sse2.ShuffleHigh(b.AsInt16(), TransformColorShuffleMask);
Vector128<short> d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); Vector128<short> d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
Vector128<short> e = Sse2.ShiftLeftLogical(input.AsInt16(), 8); Vector128<short> e = Sse2.ShiftLeftLogical(input.AsInt16(), 8);
Vector128<short> f = Sse2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16()); Vector128<short> f = Sse2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16());
Vector128<int> g = Sse2.ShiftRightLogical(f.AsInt32(), 16); Vector128<int> g = Sse2.ShiftRightLogical(f.AsInt32(), 16);
Vector128<byte> h = Sse2.Add(g.AsByte(), d.AsByte()); Vector128<byte> h = Sse2.Add(g.AsByte(), d.AsByte());
Vector128<byte> i = Sse2.And(h, maskredblue); Vector128<byte> i = Sse2.And(h, TransformColorRedBlueMask);
Vector128<byte> output = Sse2.Subtract(input.AsByte(), i); Vector128<byte> output = Sse2.Subtract(input.AsByte(), i);
Sse2.Store((byte*)pos, output); Sse2.Store((byte*)pos, output);
} }
@ -460,8 +475,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
Vector128<int> multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); Vector128<int> multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue));
Vector128<int> multsb2 = MkCst16(Cst5b(m.RedToBlue), 0); Vector128<int> multsb2 = MkCst16(Cst5b(m.RedToBlue), 0);
var maskalphagreen = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
byte shufflemask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
fixed (uint* src = pixelData) fixed (uint* src = pixelData)
{ {
int idx; int idx;
@ -469,9 +482,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
uint* pos = src + idx; uint* pos = src + idx;
Vector128<uint> input = Sse2.LoadVector128(pos); Vector128<uint> input = Sse2.LoadVector128(pos);
Vector128<byte> a = Sse2.And(input.AsByte(), maskalphagreen); Vector128<byte> a = Sse2.And(input.AsByte(), TransformColorInverseAlphaGreenMask);
Vector128<short> b = Sse2.ShuffleLow(a.AsInt16(), shufflemask); Vector128<short> b = Sse2.ShuffleLow(a.AsInt16(), TransformColorInverseShuffleMask);
Vector128<short> c = Sse2.ShuffleHigh(b.AsInt16(), shufflemask); Vector128<short> c = Sse2.ShuffleHigh(b.AsInt16(), TransformColorInverseShuffleMask);
Vector128<short> d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); Vector128<short> d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
Vector128<byte> e = Sse2.Add(input.AsByte(), d.AsByte()); Vector128<byte> e = Sse2.Add(input.AsByte(), d.AsByte());
Vector128<short> f = Sse2.ShiftLeftLogical(e.AsInt16(), 8); Vector128<short> f = Sse2.ShiftLeftLogical(e.AsInt16(), 8);
@ -551,6 +564,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int mask = tileWidth - 1; int mask = tileWidth - 1;
int tilesPerRow = SubSampleSize(width, transform.Bits); int tilesPerRow = SubSampleSize(width, transform.Bits);
int predictorModeIdxBase = (y >> transform.Bits) * tilesPerRow; int predictorModeIdxBase = (y >> transform.Bits) * tilesPerRow;
Span<short> scratch = stackalloc short[8];
while (y < yEnd) while (y < yEnd)
{ {
int predictorModeIdx = predictorModeIdxBase; int predictorModeIdx = predictorModeIdxBase;
@ -608,7 +622,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
PredictorAdd10(input + x, output + x - width, xEnd - x, output + x); PredictorAdd10(input + x, output + x - width, xEnd - x, output + x);
break; break;
case 11: case 11:
PredictorAdd11(input + x, output + x - width, xEnd - x, output + x); PredictorAdd11(input + x, output + x - width, xEnd - x, output + x, scratch);
break; break;
case 12: case 12:
PredictorAdd12(input + x, output + x - width, xEnd - x, output + x); PredictorAdd12(input + x, output + x - width, xEnd - x, output + x);
@ -704,7 +718,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// Compute the combined Shanon's entropy for distribution {X} and {X+Y}. /// Compute the combined Shanon's entropy for distribution {X} and {X+Y}.
/// </summary> /// </summary>
/// <returns>Shanon entropy.</returns> /// <returns>Shanon entropy.</returns>
public static float CombinedShannonEntropy(int[] x, int[] y) public static float CombinedShannonEntropy(Span<int> x, Span<int> y)
{ {
double retVal = 0.0d; double retVal = 0.0d;
uint sumX = 0, sumXY = 0; uint sumX = 0, sumXY = 0;
@ -974,11 +988,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
private static void PredictorAdd11(uint* input, uint* upper, int numberOfPixels, uint* output) private static void PredictorAdd11(uint* input, uint* upper, int numberOfPixels, uint* output, Span<short> scratch)
{ {
for (int x = 0; x < numberOfPixels; x++) for (int x = 0; x < numberOfPixels; x++)
{ {
uint pred = Predictor11(output[x - 1], upper + x); uint pred = Predictor11(output[x - 1], upper + x, scratch);
output[x] = AddPixels(input[x], pred); output[x] = AddPixels(input[x], pred);
} }
} }
@ -1031,7 +1045,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
public static uint Predictor10(uint left, uint* top) => Average4(left, top[-1], top[0], top[1]); public static uint Predictor10(uint left, uint* top) => Average4(left, top[-1], top[0], top[1]);
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public static uint Predictor11(uint left, uint* top) => Select(top[0], left, top[-1]); public static uint Predictor11(uint left, uint* top, Span<short> scratch) => Select(top[0], left, top[-1], scratch);
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public static uint Predictor12(uint left, uint* top) => ClampedAddSubtractFull(left, top[0], top[-1]); public static uint Predictor12(uint left, uint* top) => ClampedAddSubtractFull(left, top[0], top[-1]);
@ -1148,11 +1162,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public static void PredictorSub11(uint* input, uint* upper, int numPixels, uint* output) public static void PredictorSub11(uint* input, uint* upper, int numPixels, uint* output, Span<short> scratch)
{ {
for (int x = 0; x < numPixels; x++) for (int x = 0; x < numPixels; x++)
{ {
uint pred = Predictor11(input[x - 1], upper + x); uint pred = Predictor11(input[x - 1], upper + x, scratch);
output[x] = SubPixels(input[x], pred); output[x] = SubPixels(input[x], pred);
} }
} }
@ -1240,14 +1254,43 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private static Vector128<int> MkCst16(int hi, int lo) => Vector128.Create((hi << 16) | (lo & 0xffff)); private static Vector128<int> MkCst16(int hi, int lo) => Vector128.Create((hi << 16) | (lo & 0xffff));
#endif #endif
private static uint Select(uint a, uint b, uint c) private static uint Select(uint a, uint b, uint c, Span<short> scratch)
{ {
int paMinusPb = #if SUPPORTS_RUNTIME_INTRINSICS
Sub3((int)(a >> 24), (int)(b >> 24), (int)(c >> 24)) + if (Sse2.IsSupported)
Sub3((int)((a >> 16) & 0xff), (int)((b >> 16) & 0xff), (int)((c >> 16) & 0xff)) + {
Sub3((int)((a >> 8) & 0xff), (int)((b >> 8) & 0xff), (int)((c >> 8) & 0xff)) + Span<short> output = scratch;
Sub3((int)(a & 0xff), (int)(b & 0xff), (int)(c & 0xff)); fixed (short* p = output)
return paMinusPb <= 0 ? a : b; {
Vector128<byte> a0 = Sse2.ConvertScalarToVector128UInt32(a).AsByte();
Vector128<byte> b0 = Sse2.ConvertScalarToVector128UInt32(b).AsByte();
Vector128<byte> c0 = Sse2.ConvertScalarToVector128UInt32(c).AsByte();
Vector128<byte> ac0 = Sse2.SubtractSaturate(a0, c0);
Vector128<byte> ca0 = Sse2.SubtractSaturate(c0, a0);
Vector128<byte> bc0 = Sse2.SubtractSaturate(b0, c0);
Vector128<byte> cb0 = Sse2.SubtractSaturate(c0, b0);
Vector128<byte> ac = Sse2.Or(ac0, ca0);
Vector128<byte> bc = Sse2.Or(bc0, cb0);
Vector128<byte> pa = Sse2.UnpackLow(ac, Vector128<byte>.Zero); // |a - c|
Vector128<byte> pb = Sse2.UnpackLow(bc, Vector128<byte>.Zero); // |b - c|
Vector128<ushort> diff = Sse2.Subtract(pb.AsUInt16(), pa.AsUInt16());
Sse2.Store((ushort*)p, diff);
}
int paMinusPb = output[0] + output[1] + output[2] + output[3];
return (paMinusPb <= 0) ? a : b;
}
else
#endif
{
int paMinusPb =
Sub3((int)(a >> 24), (int)(b >> 24), (int)(c >> 24)) +
Sub3((int)((a >> 16) & 0xff), (int)((b >> 16) & 0xff), (int)((c >> 16) & 0xff)) +
Sub3((int)((a >> 8) & 0xff), (int)((b >> 8) & 0xff), (int)((c >> 8) & 0xff)) +
Sub3((int)(a & 0xff), (int)(b & 0xff), (int)(c & 0xff));
return paMinusPb <= 0 ? a : b;
}
} }
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]

6
src/ImageSharp/Formats/Webp/Lossless/PixOrCopy.cs

@ -15,7 +15,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
public uint BgraOrDistance { get; set; } public uint BgraOrDistance { get; set; }
public static PixOrCopy CreateCacheIdx(int idx) => public static PixOrCopy CreateCacheIdx(int idx) =>
new PixOrCopy() new()
{ {
Mode = PixOrCopyMode.CacheIdx, Mode = PixOrCopyMode.CacheIdx,
BgraOrDistance = (uint)idx, BgraOrDistance = (uint)idx,
@ -23,14 +23,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}; };
public static PixOrCopy CreateLiteral(uint bgra) => public static PixOrCopy CreateLiteral(uint bgra) =>
new PixOrCopy() new()
{ {
Mode = PixOrCopyMode.Literal, Mode = PixOrCopyMode.Literal,
BgraOrDistance = bgra, BgraOrDistance = bgra,
Len = 1 Len = 1
}; };
public static PixOrCopy CreateCopy(uint distance, ushort len) => new PixOrCopy() public static PixOrCopy CreateCopy(uint distance, ushort len) => new()
{ {
Mode = PixOrCopyMode.Copy, Mode = PixOrCopyMode.Copy,
BgraOrDistance = distance, BgraOrDistance = distance,

194
src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs

@ -17,6 +17,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// </summary> /// </summary>
internal static unsafe class PredictorEncoder internal static unsafe class PredictorEncoder
{ {
private static readonly sbyte[][] Offset =
{
new sbyte[] { 0, -1 }, new sbyte[] { 0, 1 }, new sbyte[] { -1, 0 }, new sbyte[] { 1, 0 }, new sbyte[] { -1, -1 }, new sbyte[] { -1, 1 }, new sbyte[] { 1, -1 }, new sbyte[] { 1, 1 }
};
private const int GreenRedToBlueNumAxis = 8; private const int GreenRedToBlueNumAxis = 8;
private const int GreenRedToBlueMaxIters = 7; private const int GreenRedToBlueMaxIters = 7;
@ -29,6 +34,25 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private const int PredLowEffort = 11; private const int PredLowEffort = 11;
#if SUPPORTS_RUNTIME_INTRINSICS
private static readonly Vector128<byte> CollectColorRedTransformsGreenMask = Vector128.Create(0x00ff00).AsByte();
private static readonly Vector128<byte> CollectColorRedTransformsAndMask = Vector128.Create((short)0xff).AsByte();
private static readonly Vector128<byte> CollectColorBlueTransformsGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
private static readonly Vector128<byte> CollectColorBlueTransformsGreenBlueMask = Vector128.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0);
private static readonly Vector128<byte> CollectColorBlueTransformsBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
private static readonly Vector128<byte> CollectColorBlueTransformsShuffleLowMask = Vector128.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255);
private static readonly Vector128<byte> CollectColorBlueTransformsShuffleHighMask = Vector128.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14);
#endif
// This uses C#'s compiler optimization to refer to assembly's static data directly.
private static ReadOnlySpan<sbyte> DeltaLut => new sbyte[] { 16, 16, 8, 4, 2, 2, 2 };
/// <summary> /// <summary>
/// Finds the best predictor for each tile, and converts the image to residuals /// Finds the best predictor for each tile, and converts the image to residuals
/// with respect to predictions. If nearLosslessQuality &lt; 100, applies /// with respect to predictions. If nearLosslessQuality &lt; 100, applies
@ -41,6 +65,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
Span<uint> bgra, Span<uint> bgra,
Span<uint> bgraScratch, Span<uint> bgraScratch,
Span<uint> image, Span<uint> image,
int[][] histoArgb,
int[][] bestHisto,
bool nearLossless, bool nearLossless,
int nearLosslessQuality, int nearLosslessQuality,
WebpTransparentColorMode transparentColorMode, WebpTransparentColorMode transparentColorMode,
@ -50,6 +76,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int tilesPerRow = LosslessUtils.SubSampleSize(width, bits); int tilesPerRow = LosslessUtils.SubSampleSize(width, bits);
int tilesPerCol = LosslessUtils.SubSampleSize(height, bits); int tilesPerCol = LosslessUtils.SubSampleSize(height, bits);
int maxQuantization = 1 << LosslessUtils.NearLosslessBits(nearLosslessQuality); int maxQuantization = 1 << LosslessUtils.NearLosslessBits(nearLosslessQuality);
Span<short> scratch = stackalloc short[8];
// TODO: Can we optimize this? // TODO: Can we optimize this?
int[][] histo = new int[4][]; int[][] histo = new int[4][];
@ -80,11 +107,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
histo, histo,
bgraScratch, bgraScratch,
bgra, bgra,
histoArgb,
bestHisto,
maxQuantization, maxQuantization,
transparentColorMode, transparentColorMode,
usedSubtractGreen, usedSubtractGreen,
nearLossless, nearLossless,
image); image,
scratch);
image[(tileY * tilesPerRow) + tileX] = (uint)(WebpConstants.ArgbBlack | (pred << 8)); image[(tileY * tilesPerRow) + tileX] = (uint)(WebpConstants.ArgbBlack | (pred << 8));
} }
@ -105,7 +135,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
lowEffort); lowEffort);
} }
public static void ColorSpaceTransform(int width, int height, int bits, int quality, Span<uint> bgra, Span<uint> image) public static void ColorSpaceTransform(int width, int height, int bits, int quality, Span<uint> bgra, Span<uint> image, Span<int> scratch)
{ {
int maxTileSize = 1 << bits; int maxTileSize = 1 << bits;
int tileXSize = LosslessUtils.SubSampleSize(width, bits); int tileXSize = LosslessUtils.SubSampleSize(width, bits);
@ -139,7 +169,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
height, height,
accumulatedRedHisto, accumulatedRedHisto,
accumulatedBlueHisto, accumulatedBlueHisto,
bgra); bgra,
scratch);
image[offset] = MultipliersToColorCode(prevX); image[offset] = MultipliersToColorCode(prevX);
CopyTileWithColorTransform(width, height, tileXOffset, tileYOffset, maxTileSize, prevX, bgra); CopyTileWithColorTransform(width, height, tileXOffset, tileYOffset, maxTileSize, prevX, bgra);
@ -188,11 +219,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int[][] accumulated, int[][] accumulated,
Span<uint> argbScratch, Span<uint> argbScratch,
Span<uint> argb, Span<uint> argb,
int[][] histoArgb,
int[][] bestHisto,
int maxQuantization, int maxQuantization,
WebpTransparentColorMode transparentColorMode, WebpTransparentColorMode transparentColorMode,
bool usedSubtractGreen, bool usedSubtractGreen,
bool nearLossless, bool nearLossless,
Span<uint> modes) Span<uint> modes,
Span<short> scratch)
{ {
const int numPredModes = 14; const int numPredModes = 14;
int startX = tileX << bits; int startX = tileX << bits;
@ -222,21 +256,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
float bestDiff = MaxDiffCost; float bestDiff = MaxDiffCost;
int bestMode = 0; int bestMode = 0;
uint[] residuals = new uint[1 << WebpConstants.MaxTransformBits]; uint[] residuals = new uint[1 << WebpConstants.MaxTransformBits];
int[][] histoArgb = new int[4][];
int[][] bestHisto = new int[4][];
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
{ {
histoArgb[i] = new int[256]; histoArgb[i].AsSpan().Clear();
bestHisto[i] = new int[256]; bestHisto[i].AsSpan().Clear();
} }
for (int mode = 0; mode < numPredModes; mode++) for (int mode = 0; mode < numPredModes; mode++)
{ {
for (int i = 0; i < 4; i++)
{
histoArgb[i].AsSpan().Fill(0);
}
if (startY > 0) if (startY > 0)
{ {
// Read the row above the tile which will become the first upper_row. // Read the row above the tile which will become the first upper_row.
@ -272,7 +299,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
} }
GetResidual(width, height, upperRow, currentRow, maxDiffs, mode, startX, startX + maxX, y, maxQuantization, transparentColorMode, usedSubtractGreen, nearLossless, residuals); GetResidual(width, height, upperRow, currentRow, maxDiffs, mode, startX, startX + maxX, y, maxQuantization, transparentColorMode, usedSubtractGreen, nearLossless, residuals, scratch);
for (int relativeX = 0; relativeX < maxX; ++relativeX) for (int relativeX = 0; relativeX < maxX; ++relativeX)
{ {
UpdateHisto(histoArgb, residuals[relativeX]); UpdateHisto(histoArgb, residuals[relativeX]);
@ -300,6 +327,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
bestDiff = curDiff; bestDiff = curDiff;
bestMode = mode; bestMode = mode;
} }
for (int i = 0; i < 4; i++)
{
histoArgb[i].AsSpan().Clear();
}
} }
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
@ -333,11 +365,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
WebpTransparentColorMode transparentColorMode, WebpTransparentColorMode transparentColorMode,
bool usedSubtractGreen, bool usedSubtractGreen,
bool nearLossless, bool nearLossless,
Span<uint> output) Span<uint> output,
Span<short> scratch)
{ {
if (transparentColorMode == WebpTransparentColorMode.Preserve) if (transparentColorMode == WebpTransparentColorMode.Preserve)
{ {
PredictBatch(mode, xStart, y, xEnd - xStart, currentRowSpan, upperRowSpan, output); PredictBatch(mode, xStart, y, xEnd - xStart, currentRowSpan, upperRowSpan, output, scratch);
} }
else else
{ {
@ -395,7 +428,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
predict = LosslessUtils.Predictor10(currentRow[x - 1], upperRow + x); predict = LosslessUtils.Predictor10(currentRow[x - 1], upperRow + x);
break; break;
case 11: case 11:
predict = LosslessUtils.Predictor11(currentRow[x - 1], upperRow + x); predict = LosslessUtils.Predictor11(currentRow[x - 1], upperRow + x, scratch);
break; break;
case 12: case 12:
predict = LosslessUtils.Predictor12(currentRow[x - 1], upperRow + x); predict = LosslessUtils.Predictor12(currentRow[x - 1], upperRow + x);
@ -583,6 +616,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
Span<byte> currentMaxDiffs = MemoryMarshal.Cast<uint, byte>(currentRow.Slice(width + 1)); Span<byte> currentMaxDiffs = MemoryMarshal.Cast<uint, byte>(currentRow.Slice(width + 1));
Span<byte> lowerMaxDiffs = currentMaxDiffs.Slice(width); Span<byte> lowerMaxDiffs = currentMaxDiffs.Slice(width);
Span<short> scratch = stackalloc short[8];
for (int y = 0; y < height; y++) for (int y = 0; y < height; y++)
{ {
Span<uint> tmp32 = upperRow; Span<uint> tmp32 = upperRow;
@ -593,7 +627,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
if (lowEffort) if (lowEffort)
{ {
PredictBatch(PredLowEffort, 0, y, width, currentRow, upperRow, argb.Slice(y * width)); PredictBatch(PredLowEffort, 0, y, width, currentRow, upperRow, argb.Slice(y * width), scratch);
} }
else else
{ {
@ -634,7 +668,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
transparentColorMode, transparentColorMode,
usedSubtractGreen, usedSubtractGreen,
nearLossless, nearLossless,
argb.Slice((y * width) + x)); argb.Slice((y * width) + x),
scratch);
x = xEnd; x = xEnd;
} }
@ -649,7 +684,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int numPixels, int numPixels,
Span<uint> currentSpan, Span<uint> currentSpan,
Span<uint> upperSpan, Span<uint> upperSpan,
Span<uint> outputSpan) Span<uint> outputSpan,
Span<short> scratch)
{ {
#pragma warning disable SA1503 // Braces should not be omitted #pragma warning disable SA1503 // Braces should not be omitted
fixed (uint* current = currentSpan) fixed (uint* current = currentSpan)
@ -718,7 +754,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
LosslessUtils.PredictorSub10(current + xStart, upper + xStart, numPixels, output); LosslessUtils.PredictorSub10(current + xStart, upper + xStart, numPixels, output);
break; break;
case 11: case 11:
LosslessUtils.PredictorSub11(current + xStart, upper + xStart, numPixels, output); LosslessUtils.PredictorSub11(current + xStart, upper + xStart, numPixels, output, scratch);
break; break;
case 12: case 12:
LosslessUtils.PredictorSub12(current + xStart, upper + xStart, numPixels, output); LosslessUtils.PredictorSub12(current + xStart, upper + xStart, numPixels, output);
@ -819,7 +855,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
} }
private static Vp8LMultipliers GetBestColorTransformForTile(int tileX, int tileY, int bits, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int quality, int xSize, int ySize, int[] accumulatedRedHisto, int[] accumulatedBlueHisto, Span<uint> argb) private static Vp8LMultipliers GetBestColorTransformForTile(
int tileX,
int tileY,
int bits,
Vp8LMultipliers prevX,
Vp8LMultipliers prevY,
int quality,
int xSize,
int ySize,
int[] accumulatedRedHisto,
int[] accumulatedBlueHisto,
Span<uint> argb,
Span<int> scratch)
{ {
int maxTileSize = 1 << bits; int maxTileSize = 1 << bits;
int tileYOffset = tileY * maxTileSize; int tileYOffset = tileY * maxTileSize;
@ -832,18 +880,28 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
var bestTx = default(Vp8LMultipliers); var bestTx = default(Vp8LMultipliers);
GetBestGreenToRed(tileArgb, xSize, tileWidth, tileHeight, prevX, prevY, quality, accumulatedRedHisto, ref bestTx); GetBestGreenToRed(tileArgb, xSize, scratch, tileWidth, tileHeight, prevX, prevY, quality, accumulatedRedHisto, ref bestTx);
GetBestGreenRedToBlue(tileArgb, xSize, tileWidth, tileHeight, prevX, prevY, quality, accumulatedBlueHisto, ref bestTx); GetBestGreenRedToBlue(tileArgb, xSize, scratch, tileWidth, tileHeight, prevX, prevY, quality, accumulatedBlueHisto, ref bestTx);
return bestTx; return bestTx;
} }
private static void GetBestGreenToRed(Span<uint> argb, int stride, int tileWidth, int tileHeight, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int quality, int[] accumulatedRedHisto, ref Vp8LMultipliers bestTx) private static void GetBestGreenToRed(
Span<uint> argb,
int stride,
Span<int> scratch,
int tileWidth,
int tileHeight,
Vp8LMultipliers prevX,
Vp8LMultipliers prevY,
int quality,
int[] accumulatedRedHisto,
ref Vp8LMultipliers bestTx)
{ {
int maxIters = 4 + ((7 * quality) >> 8); // in range [4..6] int maxIters = 4 + ((7 * quality) >> 8); // in range [4..6]
int greenToRedBest = 0; int greenToRedBest = 0;
double bestDiff = GetPredictionCostCrossColorRed(argb, stride, tileWidth, tileHeight, prevX, prevY, greenToRedBest, accumulatedRedHisto); double bestDiff = GetPredictionCostCrossColorRed(argb, stride, scratch, tileWidth, tileHeight, prevX, prevY, greenToRedBest, accumulatedRedHisto);
for (int iter = 0; iter < maxIters; iter++) for (int iter = 0; iter < maxIters; iter++)
{ {
// ColorTransformDelta is a 3.5 bit fixed point, so 32 is equal to // ColorTransformDelta is a 3.5 bit fixed point, so 32 is equal to
@ -855,7 +913,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
for (int offset = -delta; offset <= delta; offset += 2 * delta) for (int offset = -delta; offset <= delta; offset += 2 * delta)
{ {
int greenToRedCur = offset + greenToRedBest; int greenToRedCur = offset + greenToRedBest;
double curDiff = GetPredictionCostCrossColorRed(argb, stride, tileWidth, tileHeight, prevX, prevY, greenToRedCur, accumulatedRedHisto); double curDiff = GetPredictionCostCrossColorRed(argb, stride, scratch, tileWidth, tileHeight, prevX, prevY, greenToRedCur, accumulatedRedHisto);
if (curDiff < bestDiff) if (curDiff < bestDiff)
{ {
bestDiff = curDiff; bestDiff = curDiff;
@ -867,24 +925,22 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
bestTx.GreenToRed = (byte)(greenToRedBest & 0xff); bestTx.GreenToRed = (byte)(greenToRedBest & 0xff);
} }
private static void GetBestGreenRedToBlue(Span<uint> argb, int stride, int tileWidth, int tileHeight, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int quality, int[] accumulatedBlueHisto, ref Vp8LMultipliers bestTx) private static void GetBestGreenRedToBlue(Span<uint> argb, int stride, Span<int> scratch, int tileWidth, int tileHeight, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int quality, int[] accumulatedBlueHisto, ref Vp8LMultipliers bestTx)
{ {
int iters = (quality < 25) ? 1 : (quality > 50) ? GreenRedToBlueMaxIters : 4; int iters = (quality < 25) ? 1 : (quality > 50) ? GreenRedToBlueMaxIters : 4;
int greenToBlueBest = 0; int greenToBlueBest = 0;
int redToBlueBest = 0; int redToBlueBest = 0;
sbyte[][] offset = { new sbyte[] { 0, -1 }, new sbyte[] { 0, 1 }, new sbyte[] { -1, 0 }, new sbyte[] { 1, 0 }, new sbyte[] { -1, -1 }, new sbyte[] { -1, 1 }, new sbyte[] { 1, -1 }, new sbyte[] { 1, 1 } };
sbyte[] deltaLut = { 16, 16, 8, 4, 2, 2, 2 };
// Initial value at origin: // Initial value at origin:
double bestDiff = GetPredictionCostCrossColorBlue(argb, stride, tileWidth, tileHeight, prevX, prevY, greenToBlueBest, redToBlueBest, accumulatedBlueHisto); double bestDiff = GetPredictionCostCrossColorBlue(argb, stride, scratch, tileWidth, tileHeight, prevX, prevY, greenToBlueBest, redToBlueBest, accumulatedBlueHisto);
for (int iter = 0; iter < iters; iter++) for (int iter = 0; iter < iters; iter++)
{ {
int delta = deltaLut[iter]; int delta = DeltaLut[iter];
for (int axis = 0; axis < GreenRedToBlueNumAxis; axis++) for (int axis = 0; axis < GreenRedToBlueNumAxis; axis++)
{ {
int greenToBlueCur = (offset[axis][0] * delta) + greenToBlueBest; int greenToBlueCur = (Offset[axis][0] * delta) + greenToBlueBest;
int redToBlueCur = (offset[axis][1] * delta) + redToBlueBest; int redToBlueCur = (Offset[axis][1] * delta) + redToBlueBest;
double curDiff = GetPredictionCostCrossColorBlue(argb, stride, tileWidth, tileHeight, prevX, prevY, greenToBlueCur, redToBlueCur, accumulatedBlueHisto); double curDiff = GetPredictionCostCrossColorBlue(argb, stride, scratch, tileWidth, tileHeight, prevX, prevY, greenToBlueCur, redToBlueCur, accumulatedBlueHisto);
if (curDiff < bestDiff) if (curDiff < bestDiff)
{ {
bestDiff = curDiff; bestDiff = curDiff;
@ -910,9 +966,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
bestTx.RedToBlue = (byte)(redToBlueBest & 0xff); bestTx.RedToBlue = (byte)(redToBlueBest & 0xff);
} }
private static double GetPredictionCostCrossColorRed(Span<uint> argb, int stride, int tileWidth, int tileHeight, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int greenToRed, int[] accumulatedRedHisto) private static double GetPredictionCostCrossColorRed(
Span<uint> argb,
int stride,
Span<int> scratch,
int tileWidth,
int tileHeight,
Vp8LMultipliers prevX,
Vp8LMultipliers prevY,
int greenToRed,
int[] accumulatedRedHisto)
{ {
int[] histo = new int[256]; Span<int> histo = scratch.Slice(0, 256);
histo.Clear();
CollectColorRedTransforms(argb, stride, tileWidth, tileHeight, greenToRed, histo); CollectColorRedTransforms(argb, stride, tileWidth, tileHeight, greenToRed, histo);
double curDiff = PredictionCostCrossColor(accumulatedRedHisto, histo); double curDiff = PredictionCostCrossColor(accumulatedRedHisto, histo);
@ -937,9 +1003,20 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return curDiff; return curDiff;
} }
private static double GetPredictionCostCrossColorBlue(Span<uint> argb, int stride, int tileWidth, int tileHeight, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int greenToBlue, int redToBlue, int[] accumulatedBlueHisto) private static double GetPredictionCostCrossColorBlue(
Span<uint> argb,
int stride,
Span<int> scratch,
int tileWidth,
int tileHeight,
Vp8LMultipliers prevX,
Vp8LMultipliers prevY,
int greenToBlue,
int redToBlue,
int[] accumulatedBlueHisto)
{ {
int[] histo = new int[256]; Span<int> histo = scratch.Slice(0, 256);
histo.Clear();
CollectColorBlueTransforms(argb, stride, tileWidth, tileHeight, greenToBlue, redToBlue, histo); CollectColorBlueTransforms(argb, stride, tileWidth, tileHeight, greenToBlue, redToBlue, histo);
double curDiff = PredictionCostCrossColor(accumulatedBlueHisto, histo); double curDiff = PredictionCostCrossColor(accumulatedBlueHisto, histo);
@ -980,15 +1057,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return curDiff; return curDiff;
} }
private static void CollectColorRedTransforms(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToRed, int[] histo) private static void CollectColorRedTransforms(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToRed, Span<int> histo)
{ {
#if SUPPORTS_RUNTIME_INTRINSICS #if SUPPORTS_RUNTIME_INTRINSICS
if (Sse41.IsSupported) if (Sse41.IsSupported)
{ {
var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToRed)); var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToRed));
var maskgreen = Vector128.Create(0x00ff00);
var mask = Vector128.Create((short)0xff);
const int span = 8; const int span = 8;
Span<ushort> values = stackalloc ushort[span]; Span<ushort> values = stackalloc ushort[span];
for (int y = 0; y < tileHeight; y++) for (int y = 0; y < tileHeight; y++)
@ -1004,15 +1078,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
uint* input1Idx = src + x + (span / 2); uint* input1Idx = src + x + (span / 2);
Vector128<byte> input0 = Sse2.LoadVector128((ushort*)input0Idx).AsByte(); Vector128<byte> input0 = Sse2.LoadVector128((ushort*)input0Idx).AsByte();
Vector128<byte> input1 = Sse2.LoadVector128((ushort*)input1Idx).AsByte(); Vector128<byte> input1 = Sse2.LoadVector128((ushort*)input1Idx).AsByte();
Vector128<byte> g0 = Sse2.And(input0, maskgreen.AsByte()); // 0 0 | g 0 Vector128<byte> g0 = Sse2.And(input0, CollectColorRedTransformsGreenMask); // 0 0 | g 0
Vector128<byte> g1 = Sse2.And(input1, maskgreen.AsByte()); Vector128<byte> g1 = Sse2.And(input1, CollectColorRedTransformsGreenMask);
Vector128<ushort> g = Sse41.PackUnsignedSaturate(g0.AsInt32(), g1.AsInt32()); // g 0 Vector128<ushort> g = Sse41.PackUnsignedSaturate(g0.AsInt32(), g1.AsInt32()); // g 0
Vector128<int> a0 = Sse2.ShiftRightLogical(input0.AsInt32(), 16); // 0 0 | x r Vector128<int> a0 = Sse2.ShiftRightLogical(input0.AsInt32(), 16); // 0 0 | x r
Vector128<int> a1 = Sse2.ShiftRightLogical(input1.AsInt32(), 16); Vector128<int> a1 = Sse2.ShiftRightLogical(input1.AsInt32(), 16);
Vector128<ushort> a = Sse41.PackUnsignedSaturate(a0, a1); // x r Vector128<ushort> a = Sse41.PackUnsignedSaturate(a0, a1); // x r
Vector128<short> b = Sse2.MultiplyHigh(g.AsInt16(), multsg); // x dr Vector128<short> b = Sse2.MultiplyHigh(g.AsInt16(), multsg); // x dr
Vector128<byte> c = Sse2.Subtract(a.AsByte(), b.AsByte()); // x r' Vector128<byte> c = Sse2.Subtract(a.AsByte(), b.AsByte()); // x r'
Vector128<byte> d = Sse2.And(c, mask.AsByte()); // 0 r' Vector128<byte> d = Sse2.And(c, CollectColorRedTransformsAndMask); // 0 r'
Sse2.Store(dst, d.AsUInt16()); Sse2.Store(dst, d.AsUInt16());
for (int i = 0; i < span; i++) for (int i = 0; i < span; i++)
{ {
@ -1036,7 +1110,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
} }
private static void CollectColorRedTransformsNoneVectorized(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToRed, int[] histo) private static void CollectColorRedTransformsNoneVectorized(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToRed, Span<int> histo)
{ {
int pos = 0; int pos = 0;
while (tileHeight-- > 0) while (tileHeight-- > 0)
@ -1051,7 +1125,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
} }
private static void CollectColorBlueTransforms(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, int[] histo) private static void CollectColorBlueTransforms(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, Span<int> histo)
{ {
#if SUPPORTS_RUNTIME_INTRINSICS #if SUPPORTS_RUNTIME_INTRINSICS
if (Sse41.IsSupported) if (Sse41.IsSupported)
@ -1060,12 +1134,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
Span<ushort> values = stackalloc ushort[span]; Span<ushort> values = stackalloc ushort[span];
var multsr = Vector128.Create(LosslessUtils.Cst5b(redToBlue)); var multsr = Vector128.Create(LosslessUtils.Cst5b(redToBlue));
var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToBlue)); var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToBlue));
var maskgreen = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
var maskgreenblue = Vector128.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0);
var maskblue = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
var shufflerLow = Vector128.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255);
var shufflerHigh = Vector128.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14);
for (int y = 0; y < tileHeight; y++) for (int y = 0; y < tileHeight; y++)
{ {
Span<uint> srcSpan = bgra.Slice(y * stride); Span<uint> srcSpan = bgra.Slice(y * stride);
@ -1079,18 +1147,18 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
uint* input1Idx = src + x + (span / 2); uint* input1Idx = src + x + (span / 2);
Vector128<byte> input0 = Sse2.LoadVector128((ushort*)input0Idx).AsByte(); Vector128<byte> input0 = Sse2.LoadVector128((ushort*)input0Idx).AsByte();
Vector128<byte> input1 = Sse2.LoadVector128((ushort*)input1Idx).AsByte(); Vector128<byte> input1 = Sse2.LoadVector128((ushort*)input1Idx).AsByte();
Vector128<byte> r0 = Ssse3.Shuffle(input0, shufflerLow); Vector128<byte> r0 = Ssse3.Shuffle(input0, CollectColorBlueTransformsShuffleLowMask);
Vector128<byte> r1 = Ssse3.Shuffle(input1, shufflerHigh); Vector128<byte> r1 = Ssse3.Shuffle(input1, CollectColorBlueTransformsShuffleHighMask);
Vector128<byte> r = Sse2.Or(r0, r1); Vector128<byte> r = Sse2.Or(r0, r1);
Vector128<byte> gb0 = Sse2.And(input0, maskgreenblue); Vector128<byte> gb0 = Sse2.And(input0, CollectColorBlueTransformsGreenBlueMask);
Vector128<byte> gb1 = Sse2.And(input1, maskgreenblue); Vector128<byte> gb1 = Sse2.And(input1, CollectColorBlueTransformsGreenBlueMask);
Vector128<ushort> gb = Sse41.PackUnsignedSaturate(gb0.AsInt32(), gb1.AsInt32()); Vector128<ushort> gb = Sse41.PackUnsignedSaturate(gb0.AsInt32(), gb1.AsInt32());
Vector128<byte> g = Sse2.And(gb.AsByte(), maskgreen); Vector128<byte> g = Sse2.And(gb.AsByte(), CollectColorBlueTransformsGreenMask);
Vector128<short> a = Sse2.MultiplyHigh(r.AsInt16(), multsr); Vector128<short> a = Sse2.MultiplyHigh(r.AsInt16(), multsr);
Vector128<short> b = Sse2.MultiplyHigh(g.AsInt16(), multsg); Vector128<short> b = Sse2.MultiplyHigh(g.AsInt16(), multsg);
Vector128<byte> c = Sse2.Subtract(gb.AsByte(), b.AsByte()); Vector128<byte> c = Sse2.Subtract(gb.AsByte(), b.AsByte());
Vector128<byte> d = Sse2.Subtract(c, a.AsByte()); Vector128<byte> d = Sse2.Subtract(c, a.AsByte());
Vector128<byte> e = Sse2.And(d, maskblue); Vector128<byte> e = Sse2.And(d, CollectColorBlueTransformsBlueMask);
Sse2.Store(dst, e.AsUInt16()); Sse2.Store(dst, e.AsUInt16());
for (int i = 0; i < span; i++) for (int i = 0; i < span; i++)
{ {
@ -1114,7 +1182,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
} }
private static void CollectColorBlueTransformsNoneVectorized(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, int[] histo) private static void CollectColorBlueTransformsNoneVectorized(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, Span<int> histo)
{ {
int pos = 0; int pos = 0;
while (tileHeight-- > 0) while (tileHeight-- > 0)
@ -1143,7 +1211,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
private static double PredictionCostCrossColor(int[] accumulated, int[] counts) private static double PredictionCostCrossColor(int[] accumulated, Span<int> counts)
{ {
// Favor low entropy, locally and globally. // Favor low entropy, locally and globally.
// Favor small absolute values for PredictionCostSpatial. // Favor small absolute values for PredictionCostSpatial.
@ -1152,7 +1220,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
private static float PredictionCostSpatial(int[] counts, int weight0, double expVal) private static float PredictionCostSpatial(Span<int> counts, int weight0, double expVal)
{ {
int significantSymbols = 256 >> 4; int significantSymbols = 256 >> 4;
double expDecayFactor = 0.6; double expDecayFactor = 0.6;

35
src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs

@ -19,6 +19,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// </summary> /// </summary>
internal class Vp8LEncoder : IDisposable internal class Vp8LEncoder : IDisposable
{ {
/// <summary>
/// Scratch buffer to reduce allocations.
/// </summary>
private readonly int[] scratch = new int[256];
private readonly int[][] histoArgb = { new int[256], new int[256], new int[256], new int[256] };
private readonly int[][] bestHisto = { new int[256], new int[256], new int[256], new int[256] };
/// <summary> /// <summary>
/// The <see cref="MemoryAllocator"/> to use for buffer allocations. /// The <see cref="MemoryAllocator"/> to use for buffer allocations.
/// </summary> /// </summary>
@ -128,6 +137,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
} }
// This uses C#'s compiler optimization to refer to assembly's static data directly.
private static ReadOnlySpan<byte> Order => new byte[] { 1, 2, 0, 3 };
/// <summary> /// <summary>
/// Gets the memory for the image data as packed bgra values. /// Gets the memory for the image data as packed bgra values.
/// </summary> /// </summary>
@ -676,6 +688,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
this.EncodedData.GetSpan(), this.EncodedData.GetSpan(),
this.BgraScratch.GetSpan(), this.BgraScratch.GetSpan(),
this.TransformData.GetSpan(), this.TransformData.GetSpan(),
this.histoArgb,
this.bestHisto,
this.nearLossless, this.nearLossless,
nearLosslessStrength, nearLosslessStrength,
this.transparentColorMode, this.transparentColorMode,
@ -695,7 +709,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int transformWidth = LosslessUtils.SubSampleSize(width, colorTransformBits); int transformWidth = LosslessUtils.SubSampleSize(width, colorTransformBits);
int transformHeight = LosslessUtils.SubSampleSize(height, colorTransformBits); int transformHeight = LosslessUtils.SubSampleSize(height, colorTransformBits);
PredictorEncoder.ColorSpaceTransform(width, height, colorTransformBits, this.quality, this.EncodedData.GetSpan(), this.TransformData.GetSpan()); PredictorEncoder.ColorSpaceTransform(width, height, colorTransformBits, this.quality, this.EncodedData.GetSpan(), this.TransformData.GetSpan(), this.scratch);
this.bitWriter.PutBits(WebpConstants.TransformPresent, 1); this.bitWriter.PutBits(WebpConstants.TransformPresent, 1);
this.bitWriter.PutBits((uint)Vp8LTransformType.CrossColorTransform, 2); this.bitWriter.PutBits((uint)Vp8LTransformType.CrossColorTransform, 2);
@ -737,7 +751,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
var histogramImage = new List<Vp8LHistogram>() var histogramImage = new List<Vp8LHistogram>()
{ {
new Vp8LHistogram(cacheBits) new(cacheBits)
}; };
// Build histogram image and symbols from backward references. // Build histogram image and symbols from backward references.
@ -781,7 +795,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private void StoreHuffmanCode(HuffmanTree[] huffTree, HuffmanTreeToken[] tokens, HuffmanTreeCode huffmanCode) private void StoreHuffmanCode(HuffmanTree[] huffTree, HuffmanTreeToken[] tokens, HuffmanTreeCode huffmanCode)
{ {
int count = 0; int count = 0;
int[] symbols = { 0, 0 }; Span<int> symbols = this.scratch.AsSpan(0, 2);
symbols.Clear();
int maxBits = 8; int maxBits = 8;
int maxSymbol = 1 << maxBits; int maxSymbol = 1 << maxBits;
@ -974,10 +989,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
if (v.IsLiteral()) if (v.IsLiteral())
{ {
byte[] order = { 1, 2, 0, 3 };
for (int k = 0; k < 4; k++) for (int k = 0; k < 4; k++)
{ {
int code = (int)v.Literal(order[k]); int code = (int)v.Literal(Order[k]);
this.bitWriter.WriteHuffmanCode(codes[k], code); this.bitWriter.WriteHuffmanCode(codes[k], code);
} }
} }
@ -1093,9 +1107,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
histo[(int)HistoIx.HistoBluePred * 256]++; histo[(int)HistoIx.HistoBluePred * 256]++;
histo[(int)HistoIx.HistoAlphaPred * 256]++; histo[(int)HistoIx.HistoAlphaPred * 256]++;
var bitEntropy = new Vp8LBitEntropy();
for (int j = 0; j < (int)HistoIx.HistoTotal; j++) for (int j = 0; j < (int)HistoIx.HistoTotal; j++)
{ {
var bitEntropy = new Vp8LBitEntropy(); bitEntropy.Init();
Span<uint> curHisto = histo.Slice(j * 256, 256); Span<uint> curHisto = histo.Slice(j * 256, 256);
bitEntropy.BitsEntropyUnrefined(curHisto, 256); bitEntropy.BitsEntropyUnrefined(curHisto, 256);
entropyComp[j] = bitEntropy.BitsEntropyRefine(); entropyComp[j] = bitEntropy.BitsEntropyRefine();
@ -1191,9 +1206,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return false; return false;
} }
#if NET5_0_OR_GREATER
var paletteSlice = palette.Slice(0, this.PaletteSize);
paletteSlice.Sort();
#else
uint[] paletteArray = palette.Slice(0, this.PaletteSize).ToArray(); uint[] paletteArray = palette.Slice(0, this.PaletteSize).ToArray();
Array.Sort(paletteArray); Array.Sort(paletteArray);
paletteArray.CopyTo(palette); paletteArray.CopyTo(palette);
#endif
if (PaletteHasNonMonotonousDeltas(palette, this.PaletteSize)) if (PaletteHasNonMonotonousDeltas(palette, this.PaletteSize))
{ {
@ -1448,7 +1468,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
return mid; return mid;
} }
else if (sorted[mid] < color)
if (sorted[mid] < color)
{ {
low = mid; low = mid;
} }

57
src/ImageSharp/Formats/Webp/Lossless/Vp8LHistogram.cs

@ -157,29 +157,30 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// Estimate how many bits the combined entropy of literals and distance approximately maps to. /// Estimate how many bits the combined entropy of literals and distance approximately maps to.
/// </summary> /// </summary>
/// <returns>Estimated bits.</returns> /// <returns>Estimated bits.</returns>
public double EstimateBits() public double EstimateBits(Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy)
{ {
uint notUsed = 0; uint notUsed = 0;
return return
PopulationCost(this.Literal, this.NumCodes(), ref notUsed, ref this.IsUsed[0]) PopulationCost(this.Literal, this.NumCodes(), ref notUsed, ref this.IsUsed[0], stats, bitsEntropy)
+ PopulationCost(this.Red, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[1]) + PopulationCost(this.Red, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[1], stats, bitsEntropy)
+ PopulationCost(this.Blue, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[2]) + PopulationCost(this.Blue, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[2], stats, bitsEntropy)
+ PopulationCost(this.Alpha, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[3]) + PopulationCost(this.Alpha, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[3], stats, bitsEntropy)
+ PopulationCost(this.Distance, WebpConstants.NumDistanceCodes, ref notUsed, ref this.IsUsed[4]) + PopulationCost(this.Distance, WebpConstants.NumDistanceCodes, ref notUsed, ref this.IsUsed[4], stats, bitsEntropy)
+ ExtraCost(this.Literal.AsSpan(WebpConstants.NumLiteralCodes), WebpConstants.NumLengthCodes) + ExtraCost(this.Literal.AsSpan(WebpConstants.NumLiteralCodes), WebpConstants.NumLengthCodes)
+ ExtraCost(this.Distance, WebpConstants.NumDistanceCodes); + ExtraCost(this.Distance, WebpConstants.NumDistanceCodes);
} }
public void UpdateHistogramCost() public void UpdateHistogramCost(Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy)
{ {
uint alphaSym = 0, redSym = 0, blueSym = 0; uint alphaSym = 0, redSym = 0, blueSym = 0;
uint notUsed = 0; uint notUsed = 0;
double alphaCost = PopulationCost(this.Alpha, WebpConstants.NumLiteralCodes, ref alphaSym, ref this.IsUsed[3]);
double distanceCost = PopulationCost(this.Distance, WebpConstants.NumDistanceCodes, ref notUsed, ref this.IsUsed[4]) + ExtraCost(this.Distance, WebpConstants.NumDistanceCodes); double alphaCost = PopulationCost(this.Alpha, WebpConstants.NumLiteralCodes, ref alphaSym, ref this.IsUsed[3], stats, bitsEntropy);
double distanceCost = PopulationCost(this.Distance, WebpConstants.NumDistanceCodes, ref notUsed, ref this.IsUsed[4], stats, bitsEntropy) + ExtraCost(this.Distance, WebpConstants.NumDistanceCodes);
int numCodes = this.NumCodes(); int numCodes = this.NumCodes();
this.LiteralCost = PopulationCost(this.Literal, numCodes, ref notUsed, ref this.IsUsed[0]) + ExtraCost(this.Literal.AsSpan(WebpConstants.NumLiteralCodes), WebpConstants.NumLengthCodes); this.LiteralCost = PopulationCost(this.Literal, numCodes, ref notUsed, ref this.IsUsed[0], stats, bitsEntropy) + ExtraCost(this.Literal.AsSpan(WebpConstants.NumLiteralCodes), WebpConstants.NumLengthCodes);
this.RedCost = PopulationCost(this.Red, WebpConstants.NumLiteralCodes, ref redSym, ref this.IsUsed[1]); this.RedCost = PopulationCost(this.Red, WebpConstants.NumLiteralCodes, ref redSym, ref this.IsUsed[1], stats, bitsEntropy);
this.BlueCost = PopulationCost(this.Blue, WebpConstants.NumLiteralCodes, ref blueSym, ref this.IsUsed[2]); this.BlueCost = PopulationCost(this.Blue, WebpConstants.NumLiteralCodes, ref blueSym, ref this.IsUsed[2], stats, bitsEntropy);
this.BitCost = this.LiteralCost + this.RedCost + this.BlueCost + alphaCost + distanceCost; this.BitCost = this.LiteralCost + this.RedCost + this.BlueCost + alphaCost + distanceCost;
if ((alphaSym | redSym | blueSym) == NonTrivialSym) if ((alphaSym | redSym | blueSym) == NonTrivialSym)
{ {
@ -198,11 +199,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// Since the previous score passed is 'costThreshold', we only need to compare /// Since the previous score passed is 'costThreshold', we only need to compare
/// the partial cost against 'costThreshold + C(a) + C(b)' to possibly bail-out early. /// the partial cost against 'costThreshold + C(a) + C(b)' to possibly bail-out early.
/// </summary> /// </summary>
public double AddEval(Vp8LHistogram b, double costThreshold, Vp8LHistogram output) public double AddEval(Vp8LHistogram b, Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy, double costThreshold, Vp8LHistogram output)
{ {
double sumCost = this.BitCost + b.BitCost; double sumCost = this.BitCost + b.BitCost;
costThreshold += sumCost; costThreshold += sumCost;
if (this.GetCombinedHistogramEntropy(b, costThreshold, costInitial: 0, out double cost)) if (this.GetCombinedHistogramEntropy(b, stats, bitsEntropy, costThreshold, costInitial: 0, out double cost))
{ {
this.Add(b, output); this.Add(b, output);
output.BitCost = cost; output.BitCost = cost;
@ -212,10 +213,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return cost - sumCost; return cost - sumCost;
} }
public double AddThresh(Vp8LHistogram b, double costThreshold) public double AddThresh(Vp8LHistogram b, Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy, double costThreshold)
{ {
double costInitial = -this.BitCost; double costInitial = -this.BitCost;
this.GetCombinedHistogramEntropy(b, costThreshold, costInitial, out double cost); this.GetCombinedHistogramEntropy(b, stats, bitsEntropy, costThreshold, costInitial, out double cost);
return cost; return cost;
} }
@ -239,12 +240,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
: NonTrivialSym; : NonTrivialSym;
} }
public bool GetCombinedHistogramEntropy(Vp8LHistogram b, double costThreshold, double costInitial, out double cost) public bool GetCombinedHistogramEntropy(Vp8LHistogram b, Vp8LStreaks stats, Vp8LBitEntropy bitEntropy, double costThreshold, double costInitial, out double cost)
{ {
bool trivialAtEnd = false; bool trivialAtEnd = false;
cost = costInitial; cost = costInitial;
cost += GetCombinedEntropy(this.Literal, b.Literal, this.NumCodes(), this.IsUsed[0], b.IsUsed[0], false); cost += GetCombinedEntropy(this.Literal, b.Literal, this.NumCodes(), this.IsUsed[0], b.IsUsed[0], false, stats, bitEntropy);
cost += ExtraCostCombined(this.Literal.AsSpan(WebpConstants.NumLiteralCodes), b.Literal.AsSpan(WebpConstants.NumLiteralCodes), WebpConstants.NumLengthCodes); cost += ExtraCostCombined(this.Literal.AsSpan(WebpConstants.NumLiteralCodes), b.Literal.AsSpan(WebpConstants.NumLiteralCodes), WebpConstants.NumLengthCodes);
@ -267,25 +268,25 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
} }
cost += GetCombinedEntropy(this.Red, b.Red, WebpConstants.NumLiteralCodes, this.IsUsed[1], b.IsUsed[1], trivialAtEnd); cost += GetCombinedEntropy(this.Red, b.Red, WebpConstants.NumLiteralCodes, this.IsUsed[1], b.IsUsed[1], trivialAtEnd, stats, bitEntropy);
if (cost > costThreshold) if (cost > costThreshold)
{ {
return false; return false;
} }
cost += GetCombinedEntropy(this.Blue, b.Blue, WebpConstants.NumLiteralCodes, this.IsUsed[2], b.IsUsed[2], trivialAtEnd); cost += GetCombinedEntropy(this.Blue, b.Blue, WebpConstants.NumLiteralCodes, this.IsUsed[2], b.IsUsed[2], trivialAtEnd, stats, bitEntropy);
if (cost > costThreshold) if (cost > costThreshold)
{ {
return false; return false;
} }
cost += GetCombinedEntropy(this.Alpha, b.Alpha, WebpConstants.NumLiteralCodes, this.IsUsed[3], b.IsUsed[3], trivialAtEnd); cost += GetCombinedEntropy(this.Alpha, b.Alpha, WebpConstants.NumLiteralCodes, this.IsUsed[3], b.IsUsed[3], trivialAtEnd, stats, bitEntropy);
if (cost > costThreshold) if (cost > costThreshold)
{ {
return false; return false;
} }
cost += GetCombinedEntropy(this.Distance, b.Distance, WebpConstants.NumDistanceCodes, this.IsUsed[4], b.IsUsed[4], false); cost += GetCombinedEntropy(this.Distance, b.Distance, WebpConstants.NumDistanceCodes, this.IsUsed[4], b.IsUsed[4], false, stats, bitEntropy);
if (cost > costThreshold) if (cost > costThreshold)
{ {
return false; return false;
@ -415,9 +416,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
} }
private static double GetCombinedEntropy(uint[] x, uint[] y, int length, bool isXUsed, bool isYUsed, bool trivialAtEnd) private static double GetCombinedEntropy(uint[] x, uint[] y, int length, bool isXUsed, bool isYUsed, bool trivialAtEnd, Vp8LStreaks stats, Vp8LBitEntropy bitEntropy)
{ {
var stats = new Vp8LStreaks(); stats.Clear();
bitEntropy.Init();
if (trivialAtEnd) if (trivialAtEnd)
{ {
// This configuration is due to palettization that transforms an indexed // This configuration is due to palettization that transforms an indexed
@ -435,7 +437,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return stats.FinalHuffmanCost(); return stats.FinalHuffmanCost();
} }
var bitEntropy = new Vp8LBitEntropy();
if (isXUsed) if (isXUsed)
{ {
if (isYUsed) if (isYUsed)
@ -479,10 +480,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// <summary> /// <summary>
/// Get the symbol entropy for the distribution 'population'. /// Get the symbol entropy for the distribution 'population'.
/// </summary> /// </summary>
private static double PopulationCost(uint[] population, int length, ref uint trivialSym, ref bool isUsed) private static double PopulationCost(uint[] population, int length, ref uint trivialSym, ref bool isUsed, Vp8LStreaks stats, Vp8LBitEntropy bitEntropy)
{ {
var bitEntropy = new Vp8LBitEntropy(); bitEntropy.Init();
var stats = new Vp8LStreaks(); stats.Clear();
bitEntropy.BitsEntropyUnrefined(population, length, stats); bitEntropy.BitsEntropyUnrefined(population, length, stats);
trivialSym = (bitEntropy.NoneZeros == 1) ? bitEntropy.NoneZeroCode : NonTrivialSym; trivialSym = (bitEntropy.NoneZeros == 1) ? bitEntropy.NoneZeroCode : NonTrivialSym;

9
src/ImageSharp/Formats/Webp/Lossless/Vp8LStreaks.cs

@ -1,6 +1,8 @@
// Copyright (c) Six Labors. // Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0. // Licensed under the Apache License, Version 2.0.
using System;
namespace SixLabors.ImageSharp.Formats.Webp.Lossless namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
internal class Vp8LStreaks internal class Vp8LStreaks
@ -28,6 +30,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// </summary> /// </summary>
public int[][] Streaks { get; } public int[][] Streaks { get; }
public void Clear()
{
this.Counts.AsSpan().Clear();
this.Streaks[0].AsSpan().Clear();
this.Streaks[1].AsSpan().Clear();
}
public double FinalHuffmanCost() public double FinalHuffmanCost()
{ {
// The constants in this function are experimental and got rounded from // The constants in this function are experimental and got rounded from

3
src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs

@ -418,6 +418,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
var huffmanTables = new HuffmanCode[numHTreeGroups * tableSize]; var huffmanTables = new HuffmanCode[numHTreeGroups * tableSize];
var hTreeGroups = new HTreeGroup[numHTreeGroups]; var hTreeGroups = new HTreeGroup[numHTreeGroups];
Span<HuffmanCode> huffmanTable = huffmanTables.AsSpan(); Span<HuffmanCode> huffmanTable = huffmanTables.AsSpan();
int[] codeLengths = new int[maxAlphabetSize];
for (int i = 0; i < numHTreeGroupsMax; i++) for (int i = 0; i < numHTreeGroupsMax; i++)
{ {
hTreeGroups[i] = new HTreeGroup(HuffmanUtils.HuffmanPackedTableSize); hTreeGroups[i] = new HTreeGroup(HuffmanUtils.HuffmanPackedTableSize);
@ -425,7 +426,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int totalSize = 0; int totalSize = 0;
bool isTrivialLiteral = true; bool isTrivialLiteral = true;
int maxBits = 0; int maxBits = 0;
int[] codeLengths = new int[maxAlphabetSize]; codeLengths.AsSpan().Clear();
for (int j = 0; j < WebpConstants.HuffmanCodesPerMetaCode; j++) for (int j = 0; j < WebpConstants.HuffmanCodesPerMetaCode; j++)
{ {
int alphabetSize = WebpConstants.AlphabetSize[j]; int alphabetSize = WebpConstants.AlphabetSize[j];

99
src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs

@ -58,14 +58,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
} }
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public static int Vp8Disto16X16(Span<byte> a, Span<byte> b, Span<ushort> w) public static int Vp8Disto16X16(Span<byte> a, Span<byte> b, Span<ushort> w, Span<int> scratch)
{ {
int d = 0; int d = 0;
for (int y = 0; y < 16 * WebpConstants.Bps; y += 4 * WebpConstants.Bps) for (int y = 0; y < 16 * WebpConstants.Bps; y += 4 * WebpConstants.Bps)
{ {
for (int x = 0; x < 16; x += 4) for (int x = 0; x < 16; x += 4)
{ {
d += Vp8Disto4X4(a.Slice(x + y), b.Slice(x + y), w); d += Vp8Disto4X4(a.Slice(x + y), b.Slice(x + y), w, scratch);
} }
} }
@ -73,10 +73,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
} }
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public static int Vp8Disto4X4(Span<byte> a, Span<byte> b, Span<ushort> w) public static int Vp8Disto4X4(Span<byte> a, Span<byte> b, Span<ushort> w, Span<int> scratch)
{ {
int sum1 = TTransform(a, w); int sum1 = TTransform(a, w, scratch);
int sum2 = TTransform(b, w); int sum2 = TTransform(b, w, scratch);
return Math.Abs(sum2 - sum1) >> 5; return Math.Abs(sum2 - sum1) >> 5;
} }
@ -252,18 +252,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
public static void TM4(Span<byte> dst, Span<byte> yuv, int offset) => TrueMotion(dst, yuv, offset, 4); public static void TM4(Span<byte> dst, Span<byte> yuv, int offset) => TrueMotion(dst, yuv, offset, 4);
public static void VE4(Span<byte> dst, Span<byte> yuv, int offset) public static void VE4(Span<byte> dst, Span<byte> yuv, int offset, Span<byte> vals)
{ {
// vertical // vertical
int topOffset = offset - WebpConstants.Bps; int topOffset = offset - WebpConstants.Bps;
byte[] vals = vals[0] = Avg3(yuv[topOffset - 1], yuv[topOffset], yuv[topOffset + 1]);
{ vals[1] = Avg3(yuv[topOffset], yuv[topOffset + 1], yuv[topOffset + 2]);
Avg3(yuv[topOffset - 1], yuv[topOffset], yuv[topOffset + 1]), vals[2] = Avg3(yuv[topOffset + 1], yuv[topOffset + 2], yuv[topOffset + 3]);
Avg3(yuv[topOffset], yuv[topOffset + 1], yuv[topOffset + 2]), vals[3] = Avg3(yuv[topOffset + 2], yuv[topOffset + 3], yuv[topOffset + 4]);
Avg3(yuv[topOffset + 1], yuv[topOffset + 2], yuv[topOffset + 3]),
Avg3(yuv[topOffset + 2], yuv[topOffset + 3], yuv[topOffset + 4])
};
int endIdx = 4 * WebpConstants.Bps; int endIdx = 4 * WebpConstants.Bps;
for (int i = 0; i < endIdx; i += WebpConstants.Bps) for (int i = 0; i < endIdx; i += WebpConstants.Bps)
{ {
@ -504,9 +500,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
/// <summary> /// <summary>
/// Paragraph 14.3: Implementation of the Walsh-Hadamard transform inversion. /// Paragraph 14.3: Implementation of the Walsh-Hadamard transform inversion.
/// </summary> /// </summary>
public static void TransformWht(Span<short> input, Span<short> output) public static void TransformWht(Span<short> input, Span<short> output, Span<int> scratch)
{ {
int[] tmp = new int[16]; Span<int> tmp = scratch.Slice(0, 16);
tmp.Clear();
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
{ {
int iPlus4 = 4 + i; int iPlus4 = 4 + i;
@ -544,10 +541,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
/// Returns the weighted sum of the absolute value of transformed coefficients. /// Returns the weighted sum of the absolute value of transformed coefficients.
/// w[] contains a row-major 4 by 4 symmetric matrix. /// w[] contains a row-major 4 by 4 symmetric matrix.
/// </summary> /// </summary>
public static int TTransform(Span<byte> input, Span<ushort> w) public static int TTransform(Span<byte> input, Span<ushort> w, Span<int> scratch)
{ {
int sum = 0; int sum = 0;
int[] tmp = new int[16]; Span<int> tmp = scratch.Slice(0, 16);
tmp.Clear();
// horizontal pass. // horizontal pass.
int inputOffset = 0; int inputOffset = 0;
@ -591,15 +589,16 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
return sum; return sum;
} }
public static void TransformTwo(Span<short> src, Span<byte> dst) public static void TransformTwo(Span<short> src, Span<byte> dst, Span<int> scratch)
{ {
TransformOne(src, dst); TransformOne(src, dst, scratch);
TransformOne(src.Slice(16), dst.Slice(4)); TransformOne(src.Slice(16), dst.Slice(4), scratch);
} }
public static void TransformOne(Span<short> src, Span<byte> dst) public static void TransformOne(Span<short> src, Span<byte> dst, Span<int> scratch)
{ {
Span<int> tmp = stackalloc int[4 * 4]; Span<int> tmp = scratch.Slice(0, 16);
tmp.Clear();
int tmpOffset = 0; int tmpOffset = 0;
for (int srcOffset = 0; srcOffset < 4; srcOffset++) for (int srcOffset = 0; srcOffset < 4; srcOffset++)
{ {
@ -671,10 +670,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
Store2(dst, 3, a - d4, d1, c1); Store2(dst, 3, a - d4, d1, c1);
} }
public static void TransformUv(Span<short> src, Span<byte> dst) public static void TransformUv(Span<short> src, Span<byte> dst, Span<int> scratch)
{ {
TransformTwo(src.Slice(0 * 16), dst); TransformTwo(src.Slice(0 * 16), dst, scratch);
TransformTwo(src.Slice(2 * 16), dst.Slice(4 * WebpConstants.Bps)); TransformTwo(src.Slice(2 * 16), dst.Slice(4 * WebpConstants.Bps), scratch);
} }
public static void TransformDcuv(Span<short> src, Span<byte> dst) public static void TransformDcuv(Span<short> src, Span<byte> dst)
@ -934,11 +933,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int p0 = p[offset - step]; int p0 = p[offset - step];
int q0 = p[offset]; int q0 = p[offset];
int q1 = p[offset + step]; int q1 = p[offset + step];
int a = (3 * (q0 - p0)) + WebpLookupTables.Sclip1[p1 - q1]; int a = (3 * (q0 - p0)) + WebpLookupTables.Sclip1(p1 - q1);
int a1 = WebpLookupTables.Sclip2[(a + 4) >> 3]; int a1 = WebpLookupTables.Sclip2((a + 4) >> 3);
int a2 = WebpLookupTables.Sclip2[(a + 3) >> 3]; int a2 = WebpLookupTables.Sclip2((a + 3) >> 3);
p[offset - step] = WebpLookupTables.Clip1[p0 + a2]; p[offset - step] = WebpLookupTables.Clip1(p0 + a2);
p[offset] = WebpLookupTables.Clip1[q0 - a1]; p[offset] = WebpLookupTables.Clip1(q0 - a1);
} }
private static void DoFilter4(Span<byte> p, int offset, int step) private static void DoFilter4(Span<byte> p, int offset, int step)
@ -950,13 +949,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int q0 = p[offset]; int q0 = p[offset];
int q1 = p[offset + step]; int q1 = p[offset + step];
int a = 3 * (q0 - p0); int a = 3 * (q0 - p0);
int a1 = WebpLookupTables.Sclip2[(a + 4) >> 3]; int a1 = WebpLookupTables.Sclip2((a + 4) >> 3);
int a2 = WebpLookupTables.Sclip2[(a + 3) >> 3]; int a2 = WebpLookupTables.Sclip2((a + 3) >> 3);
int a3 = (a1 + 1) >> 1; int a3 = (a1 + 1) >> 1;
p[offsetMinus2Step] = WebpLookupTables.Clip1[p1 + a3]; p[offsetMinus2Step] = WebpLookupTables.Clip1(p1 + a3);
p[offset - step] = WebpLookupTables.Clip1[p0 + a2]; p[offset - step] = WebpLookupTables.Clip1(p0 + a2);
p[offset] = WebpLookupTables.Clip1[q0 - a1]; p[offset] = WebpLookupTables.Clip1(q0 - a1);
p[offset + step] = WebpLookupTables.Clip1[q1 - a3]; p[offset + step] = WebpLookupTables.Clip1(q1 - a3);
} }
private static void DoFilter6(Span<byte> p, int offset, int step) private static void DoFilter6(Span<byte> p, int offset, int step)
@ -971,18 +970,18 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int q0 = p[offset]; int q0 = p[offset];
int q1 = p[offset + step]; int q1 = p[offset + step];
int q2 = p[offset + step2]; int q2 = p[offset + step2];
int a = WebpLookupTables.Sclip1[(3 * (q0 - p0)) + WebpLookupTables.Sclip1[p1 - q1]]; int a = WebpLookupTables.Sclip1((3 * (q0 - p0)) + WebpLookupTables.Sclip1(p1 - q1));
// a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9] // a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9]
int a1 = ((27 * a) + 63) >> 7; // eq. to ((3 * a + 7) * 9) >> 7 int a1 = ((27 * a) + 63) >> 7; // eq. to ((3 * a + 7) * 9) >> 7
int a2 = ((18 * a) + 63) >> 7; // eq. to ((2 * a + 7) * 9) >> 7 int a2 = ((18 * a) + 63) >> 7; // eq. to ((2 * a + 7) * 9) >> 7
int a3 = ((9 * a) + 63) >> 7; // eq. to ((1 * a + 7) * 9) >> 7 int a3 = ((9 * a) + 63) >> 7; // eq. to ((1 * a + 7) * 9) >> 7
p[offset - step3] = WebpLookupTables.Clip1[p2 + a3]; p[offset - step3] = WebpLookupTables.Clip1(p2 + a3);
p[offset - step2] = WebpLookupTables.Clip1[p1 + a2]; p[offset - step2] = WebpLookupTables.Clip1(p1 + a2);
p[offsetMinusStep] = WebpLookupTables.Clip1[p0 + a1]; p[offsetMinusStep] = WebpLookupTables.Clip1(p0 + a1);
p[offset] = WebpLookupTables.Clip1[q0 - a1]; p[offset] = WebpLookupTables.Clip1(q0 - a1);
p[offset + step] = WebpLookupTables.Clip1[q1 - a2]; p[offset + step] = WebpLookupTables.Clip1(q1 - a2);
p[offset + step2] = WebpLookupTables.Clip1[q2 - a3]; p[offset + step2] = WebpLookupTables.Clip1(q2 - a3);
} }
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
@ -992,7 +991,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int p0 = p[offset - step]; int p0 = p[offset - step];
int q0 = p[offset]; int q0 = p[offset];
int q1 = p[offset + step]; int q1 = p[offset + step];
return (4 * WebpLookupTables.Abs0[p0 - q0]) + WebpLookupTables.Abs0[p1 - q1] <= t; return (4 * WebpLookupTables.Abs0(p0 - q0)) + WebpLookupTables.Abs0(p1 - q1) <= t;
} }
private static bool NeedsFilter2(Span<byte> p, int offset, int step, int t, int it) private static bool NeedsFilter2(Span<byte> p, int offset, int step, int t, int it)
@ -1007,14 +1006,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int q1 = p[offset + step]; int q1 = p[offset + step];
int q2 = p[offset + step2]; int q2 = p[offset + step2];
int q3 = p[offset + step3]; int q3 = p[offset + step3];
if ((4 * WebpLookupTables.Abs0[p0 - q0]) + WebpLookupTables.Abs0[p1 - q1] > t) if ((4 * WebpLookupTables.Abs0(p0 - q0)) + WebpLookupTables.Abs0(p1 - q1) > t)
{ {
return false; return false;
} }
return WebpLookupTables.Abs0[p3 - p2] <= it && WebpLookupTables.Abs0[p2 - p1] <= it && return WebpLookupTables.Abs0(p3 - p2) <= it && WebpLookupTables.Abs0(p2 - p1) <= it &&
WebpLookupTables.Abs0[p1 - p0] <= it && WebpLookupTables.Abs0[q3 - q2] <= it && WebpLookupTables.Abs0(p1 - p0) <= it && WebpLookupTables.Abs0(q3 - q2) <= it &&
WebpLookupTables.Abs0[q2 - q1] <= it && WebpLookupTables.Abs0[q1 - q0] <= it; WebpLookupTables.Abs0(q2 - q1) <= it && WebpLookupTables.Abs0(q1 - q0) <= it;
} }
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]
@ -1024,7 +1023,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int p0 = p[offset - step]; int p0 = p[offset - step];
int q0 = p[offset]; int q0 = p[offset];
int q1 = p[offset + step]; int q1 = p[offset + step];
return WebpLookupTables.Abs0[p1 - p0] > thresh || WebpLookupTables.Abs0[q1 - q0] > thresh; return WebpLookupTables.Abs0(p1 - p0) > thresh || WebpLookupTables.Abs0(q1 - q0) > thresh;
} }
[MethodImpl(InliningOptions.ShortMethod)] [MethodImpl(InliningOptions.ShortMethod)]

86
src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs

@ -31,7 +31,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int lambda = dqm.LambdaI16; int lambda = dqm.LambdaI16;
int tlambda = dqm.TLambda; int tlambda = dqm.TLambda;
Span<byte> src = it.YuvIn.AsSpan(Vp8EncIterator.YOffEnc); Span<byte> src = it.YuvIn.AsSpan(Vp8EncIterator.YOffEnc);
Span<int> scratch = it.Scratch3;
var rdTmp = new Vp8ModeScore(); var rdTmp = new Vp8ModeScore();
var res = new Vp8Residual();
Vp8ModeScore rdCur = rdTmp; Vp8ModeScore rdCur = rdTmp;
Vp8ModeScore rdBest = rd; Vp8ModeScore rdBest = rd;
int mode; int mode;
@ -39,7 +41,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
rd.ModeI16 = -1; rd.ModeI16 = -1;
for (mode = 0; mode < WebpConstants.NumPredModes; ++mode) for (mode = 0; mode < WebpConstants.NumPredModes; ++mode)
{ {
// scratch buffer. // Scratch buffer.
Span<byte> tmpDst = it.YuvOut2.AsSpan(Vp8EncIterator.YOffEnc); Span<byte> tmpDst = it.YuvOut2.AsSpan(Vp8EncIterator.YOffEnc);
rdCur.ModeI16 = mode; rdCur.ModeI16 = mode;
@ -48,9 +50,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
// Measure RD-score. // Measure RD-score.
rdCur.D = LossyUtils.Vp8Sse16X16(src, tmpDst); rdCur.D = LossyUtils.Vp8Sse16X16(src, tmpDst);
rdCur.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto16X16(src, tmpDst, WeightY)) : 0; rdCur.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto16X16(src, tmpDst, WeightY, scratch)) : 0;
rdCur.H = WebpConstants.Vp8FixedCostsI16[mode]; rdCur.H = WebpConstants.Vp8FixedCostsI16[mode];
rdCur.R = it.GetCostLuma16(rdCur, proba); rdCur.R = it.GetCostLuma16(rdCur, proba, res);
if (isFlat) if (isFlat)
{ {
@ -101,6 +103,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int tlambda = dqm.TLambda; int tlambda = dqm.TLambda;
Span<byte> src0 = it.YuvIn.AsSpan(Vp8EncIterator.YOffEnc); Span<byte> src0 = it.YuvIn.AsSpan(Vp8EncIterator.YOffEnc);
Span<byte> bestBlocks = it.YuvOut2.AsSpan(Vp8EncIterator.YOffEnc); Span<byte> bestBlocks = it.YuvOut2.AsSpan(Vp8EncIterator.YOffEnc);
Span<int> scratch = it.Scratch3;
int totalHeaderBits = 0; int totalHeaderBits = 0;
var rdBest = new Vp8ModeScore(); var rdBest = new Vp8ModeScore();
@ -113,31 +116,35 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
rdBest.H = 211; // '211' is the value of VP8BitCost(0, 145) rdBest.H = 211; // '211' is the value of VP8BitCost(0, 145)
rdBest.SetRdScore(dqm.LambdaMode); rdBest.SetRdScore(dqm.LambdaMode);
it.StartI4(); it.StartI4();
var rdi4 = new Vp8ModeScore();
var rdTmp = new Vp8ModeScore();
var res = new Vp8Residual();
Span<short> tmpLevels = new short[16];
do do
{ {
int numBlocks = 1; int numBlocks = 1;
var rdi4 = new Vp8ModeScore(); rdi4.Clear();
int mode; int mode;
int bestMode = -1; int bestMode = -1;
Span<byte> src = src0.Slice(WebpLookupTables.Vp8Scan[it.I4]); Span<byte> src = src0.Slice(WebpLookupTables.Vp8Scan[it.I4]);
short[] modeCosts = it.GetCostModeI4(rd.ModesI4); short[] modeCosts = it.GetCostModeI4(rd.ModesI4);
Span<byte> bestBlock = bestBlocks.Slice(WebpLookupTables.Vp8Scan[it.I4]); Span<byte> bestBlock = bestBlocks.Slice(WebpLookupTables.Vp8Scan[it.I4]);
Span<byte> tmpDst = it.Scratch.AsSpan(); Span<byte> tmpDst = it.Scratch.AsSpan();
tmpDst.Fill(0); tmpDst.Clear();
rdi4.InitScore(); rdi4.InitScore();
it.MakeIntra4Preds(); it.MakeIntra4Preds();
for (mode = 0; mode < WebpConstants.NumBModes; ++mode) for (mode = 0; mode < WebpConstants.NumBModes; ++mode)
{ {
var rdTmp = new Vp8ModeScore(); rdTmp.Clear();
short[] tmpLevels = new short[16]; tmpLevels.Clear();
// Reconstruct. // Reconstruct.
rdTmp.Nz = (uint)ReconstructIntra4(it, dqm, tmpLevels, src, tmpDst, mode); rdTmp.Nz = (uint)ReconstructIntra4(it, dqm, tmpLevels, src, tmpDst, mode);
// Compute RD-score. // Compute RD-score.
rdTmp.D = LossyUtils.Vp8Sse4X4(src, tmpDst); rdTmp.D = LossyUtils.Vp8Sse4X4(src, tmpDst);
rdTmp.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto4X4(src, tmpDst, WeightY)) : 0; rdTmp.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto4X4(src, tmpDst, WeightY, scratch)) : 0;
rdTmp.H = modeCosts[mode]; rdTmp.H = modeCosts[mode];
// Add flatness penalty, to avoid flat area to be mispredicted by a complex mode. // Add flatness penalty, to avoid flat area to be mispredicted by a complex mode.
@ -150,15 +157,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
rdTmp.R = 0; rdTmp.R = 0;
} }
// early-out check. // Early-out check.
rdTmp.SetRdScore(lambda); rdTmp.SetRdScore(lambda);
if (bestMode >= 0 && rdTmp.Score >= rdi4.Score) if (bestMode >= 0 && rdTmp.Score >= rdi4.Score)
{ {
continue; continue;
} }
// finish computing score. // Finish computing score.
rdTmp.R += it.GetCostLuma4(tmpLevels, proba); rdTmp.R += it.GetCostLuma4(tmpLevels, proba, res);
rdTmp.SetRdScore(lambda); rdTmp.SetRdScore(lambda);
if (bestMode < 0 || rdTmp.Score < rdi4.Score) if (bestMode < 0 || rdTmp.Score < rdi4.Score)
@ -213,13 +220,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
Span<byte> dst0 = it.YuvOut.AsSpan(Vp8EncIterator.UOffEnc); Span<byte> dst0 = it.YuvOut.AsSpan(Vp8EncIterator.UOffEnc);
Span<byte> dst = dst0; Span<byte> dst = dst0;
var rdBest = new Vp8ModeScore(); var rdBest = new Vp8ModeScore();
var rdUv = new Vp8ModeScore();
var res = new Vp8Residual();
int mode; int mode;
rd.ModeUv = -1; rd.ModeUv = -1;
rdBest.InitScore(); rdBest.InitScore();
for (mode = 0; mode < WebpConstants.NumPredModes; ++mode) for (mode = 0; mode < WebpConstants.NumPredModes; ++mode)
{ {
var rdUv = new Vp8ModeScore(); rdUv.Clear();
// Reconstruct // Reconstruct
rdUv.Nz = (uint)ReconstructUv(it, dqm, rdUv, tmpDst, mode); rdUv.Nz = (uint)ReconstructUv(it, dqm, rdUv, tmpDst, mode);
@ -228,7 +237,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
rdUv.D = LossyUtils.Vp8Sse16X8(src, tmpDst); rdUv.D = LossyUtils.Vp8Sse16X8(src, tmpDst);
rdUv.SD = 0; // not calling TDisto here: it tends to flatten areas. rdUv.SD = 0; // not calling TDisto here: it tends to flatten areas.
rdUv.H = WebpConstants.Vp8FixedCostsUv[mode]; rdUv.H = WebpConstants.Vp8FixedCostsUv[mode];
rdUv.R = it.GetCostUv(rdUv, proba); rdUv.R = it.GetCostUv(rdUv, proba, res);
if (mode > 0 && IsFlat(rdUv.UvLevels, numBlocks, WebpConstants.FlatnessLimitIUv)) if (mode > 0 && IsFlat(rdUv.UvLevels, numBlocks, WebpConstants.FlatnessLimitIUv))
{ {
rdUv.R += WebpConstants.FlatnessPenality * numBlocks; rdUv.R += WebpConstants.FlatnessPenality * numBlocks;
@ -271,16 +280,24 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
Span<byte> src = it.YuvIn.AsSpan(Vp8EncIterator.YOffEnc); Span<byte> src = it.YuvIn.AsSpan(Vp8EncIterator.YOffEnc);
int nz = 0; int nz = 0;
int n; int n;
short[] dcTmp = new short[16]; Span<short> shortScratchSpan = it.Scratch2.AsSpan();
short[] tmp = new short[16 * 16]; Span<int> scratch = it.Scratch3.AsSpan(0, 16);
Span<short> tmpSpan = tmp.AsSpan(); shortScratchSpan.Clear();
scratch.Clear();
Span<short> dcTmp = shortScratchSpan.Slice(0, 16);
Span<short> tmp = shortScratchSpan.Slice(16, 16 * 16);
for (n = 0; n < 16; n += 2) for (n = 0; n < 16; n += 2)
{ {
Vp8Encoding.FTransform2(src.Slice(WebpLookupTables.Vp8Scan[n]), reference.Slice(WebpLookupTables.Vp8Scan[n]), tmpSpan.Slice(n * 16, 16), tmpSpan.Slice((n + 1) * 16, 16)); Vp8Encoding.FTransform2(
src.Slice(WebpLookupTables.Vp8Scan[n]),
reference.Slice(WebpLookupTables.Vp8Scan[n]),
tmp.Slice(n * 16, 16),
tmp.Slice((n + 1) * 16, 16),
scratch);
} }
Vp8Encoding.FTransformWht(tmp, dcTmp); Vp8Encoding.FTransformWht(tmp, dcTmp, scratch);
nz |= QuantizeBlock(dcTmp, rd.YDcLevels, dqm.Y2) << 24; nz |= QuantizeBlock(dcTmp, rd.YDcLevels, dqm.Y2) << 24;
for (n = 0; n < 16; n += 2) for (n = 0; n < 16; n += 2)
@ -288,14 +305,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
// Zero-out the first coeff, so that: a) nz is correct below, and // Zero-out the first coeff, so that: a) nz is correct below, and
// b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified. // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified.
tmp[n * 16] = tmp[(n + 1) * 16] = 0; tmp[n * 16] = tmp[(n + 1) * 16] = 0;
nz |= Quantize2Blocks(tmpSpan.Slice(n * 16, 32), rd.YAcLevels.AsSpan(n * 16, 32), dqm.Y1) << n; nz |= Quantize2Blocks(tmp.Slice(n * 16, 32), rd.YAcLevels.AsSpan(n * 16, 32), dqm.Y1) << n;
} }
// Transform back. // Transform back.
LossyUtils.TransformWht(dcTmp, tmpSpan); LossyUtils.TransformWht(dcTmp, tmp, scratch);
for (n = 0; n < 16; n += 2) for (n = 0; n < 16; n += 2)
{ {
Vp8Encoding.ITransform(reference.Slice(WebpLookupTables.Vp8Scan[n]), tmpSpan.Slice(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8Scan[n]), true); Vp8Encoding.ITransform(reference.Slice(WebpLookupTables.Vp8Scan[n]), tmp.Slice(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8Scan[n]), true, scratch);
} }
return nz; return nz;
@ -304,10 +321,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
public static int ReconstructIntra4(Vp8EncIterator it, Vp8SegmentInfo dqm, Span<short> levels, Span<byte> src, Span<byte> yuvOut, int mode) public static int ReconstructIntra4(Vp8EncIterator it, Vp8SegmentInfo dqm, Span<short> levels, Span<byte> src, Span<byte> yuvOut, int mode)
{ {
Span<byte> reference = it.YuvP.AsSpan(Vp8Encoding.Vp8I4ModeOffsets[mode]); Span<byte> reference = it.YuvP.AsSpan(Vp8Encoding.Vp8I4ModeOffsets[mode]);
short[] tmp = new short[16]; Span<short> tmp = it.Scratch2.AsSpan(0, 16);
Vp8Encoding.FTransform(src, reference, tmp); Span<int> scratch = it.Scratch3.AsSpan(0, 16);
tmp.Clear();
scratch.Clear();
Vp8Encoding.FTransform(src, reference, tmp, scratch);
int nz = QuantizeBlock(tmp, levels, dqm.Y1); int nz = QuantizeBlock(tmp, levels, dqm.Y1);
Vp8Encoding.ITransform(reference, tmp, yuvOut, false); Vp8Encoding.ITransform(reference, tmp, yuvOut, false, scratch);
return nz; return nz;
} }
@ -318,27 +338,31 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
Span<byte> src = it.YuvIn.AsSpan(Vp8EncIterator.UOffEnc); Span<byte> src = it.YuvIn.AsSpan(Vp8EncIterator.UOffEnc);
int nz = 0; int nz = 0;
int n; int n;
short[] tmp = new short[8 * 16]; Span<short> tmp = it.Scratch2.AsSpan(0, 8 * 16);
Span<int> scratch = it.Scratch3.AsSpan(0, 16);
tmp.Clear();
scratch.Clear();
for (n = 0; n < 8; n += 2) for (n = 0; n < 8; n += 2)
{ {
Vp8Encoding.FTransform2( Vp8Encoding.FTransform2(
src.Slice(WebpLookupTables.Vp8ScanUv[n]), src.Slice(WebpLookupTables.Vp8ScanUv[n]),
reference.Slice(WebpLookupTables.Vp8ScanUv[n]), reference.Slice(WebpLookupTables.Vp8ScanUv[n]),
tmp.AsSpan(n * 16, 16), tmp.Slice(n * 16, 16),
tmp.AsSpan((n + 1) * 16, 16)); tmp.Slice((n + 1) * 16, 16),
scratch);
} }
CorrectDcValues(it, dqm.Uv, tmp, rd); CorrectDcValues(it, dqm.Uv, tmp, rd);
for (n = 0; n < 8; n += 2) for (n = 0; n < 8; n += 2)
{ {
nz |= Quantize2Blocks(tmp.AsSpan(n * 16, 32), rd.UvLevels.AsSpan(n * 16, 32), dqm.Uv) << n; nz |= Quantize2Blocks(tmp.Slice(n * 16, 32), rd.UvLevels.AsSpan(n * 16, 32), dqm.Uv) << n;
} }
for (n = 0; n < 8; n += 2) for (n = 0; n < 8; n += 2)
{ {
Vp8Encoding.ITransform(reference.Slice(WebpLookupTables.Vp8ScanUv[n]), tmp.AsSpan(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8ScanUv[n]), true); Vp8Encoding.ITransform(reference.Slice(WebpLookupTables.Vp8ScanUv[n]), tmp.Slice(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8ScanUv[n]), true, scratch);
} }
return nz << 16; return nz << 16;
@ -556,7 +580,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
return (sign ? -v0 : v0) >> DSCALE; return (sign ? -v0 : v0) >> DSCALE;
} }
public static void CorrectDcValues(Vp8EncIterator it, Vp8Matrix mtx, short[] tmp, Vp8ModeScore rd) public static void CorrectDcValues(Vp8EncIterator it, Vp8Matrix mtx, Span<short> tmp, Vp8ModeScore rd)
{ {
#pragma warning disable SA1005 // Single line comments should begin with single space #pragma warning disable SA1005 // Single line comments should begin with single space
// | top[0] | top[1] // | top[0] | top[1]
@ -571,7 +595,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
{ {
Span<sbyte> top = it.TopDerr.AsSpan((it.X * 4) + ch, 2); Span<sbyte> top = it.TopDerr.AsSpan((it.X * 4) + ch, 2);
Span<sbyte> left = it.LeftDerr.AsSpan(ch, 2); Span<sbyte> left = it.LeftDerr.AsSpan(ch, 2);
Span<short> c = tmp.AsSpan(ch * 4 * 16, 4 * 16); Span<short> c = tmp.Slice(ch * 4 * 16, 4 * 16);
c[0] += (short)(((C1 * top[0]) + (C2 * left[0])) >> (DSHIFT - DSCALE)); c[0] += (short)(((C1 * top[0]) + (C2 * left[0])) >> (DSHIFT - DSCALE));
int err0 = QuantizeSingle(c, mtx); int err0 = QuantizeSingle(c, mtx);
c[1 * 16] += (short)(((C1 * top[1]) + (C2 * err0)) >> (DSHIFT - DSCALE)); c[1 * 16] += (short)(((C1 * top[1]) + (C2 * err0)) >> (DSHIFT - DSCALE));

27
src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs

@ -81,6 +81,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
this.I4Boundary = new byte[37]; this.I4Boundary = new byte[37];
this.BitCount = new long[4, 3]; this.BitCount = new long[4, 3];
this.Scratch = new byte[WebpConstants.Bps * 16]; this.Scratch = new byte[WebpConstants.Bps * 16];
this.Scratch2 = new short[17 * 16];
this.Scratch3 = new int[16];
// To match the C initial values of the reference implementation, initialize all with 204. // To match the C initial values of the reference implementation, initialize all with 204.
byte defaultInitVal = 204; byte defaultInitVal = 204;
@ -216,10 +218,20 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
public int CountDown { get; set; } public int CountDown { get; set; }
/// <summary> /// <summary>
/// Gets the scratch buffer. /// Gets the byte scratch buffer.
/// </summary> /// </summary>
public byte[] Scratch { get; } public byte[] Scratch { get; }
/// <summary>
/// Gets the short scratch buffer.
/// </summary>
public short[] Scratch2 { get; }
/// <summary>
/// Gets the int scratch buffer.
/// </summary>
public int[] Scratch3 { get; }
public Vp8MacroBlockInfo CurrentMacroBlockInfo => this.Mb[this.currentMbIdx]; public Vp8MacroBlockInfo CurrentMacroBlockInfo => this.Mb[this.currentMbIdx];
private Vp8MacroBlockInfo[] Mb { get; } private Vp8MacroBlockInfo[] Mb { get; }
@ -380,7 +392,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int bestMode = 0; int bestMode = 0;
this.MakeLuma16Preds(); this.MakeLuma16Preds();
for (mode = 0; mode < maxMode; ++mode) for (mode = 0; mode < maxMode; mode++)
{ {
var histo = new Vp8Histogram(); var histo = new Vp8Histogram();
histo.CollectHistogram(this.YuvIn.AsSpan(YOffEnc), this.YuvP.AsSpan(Vp8Encoding.Vp8I16ModeOffsets[mode]), 0, 16); histo.CollectHistogram(this.YuvIn.AsSpan(YOffEnc), this.YuvP.AsSpan(Vp8Encoding.Vp8I16ModeOffsets[mode]), 0, 16);
@ -499,9 +511,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
this.CurrentMacroBlockInfo.MacroBlockType = Vp8MacroBlockType.I4X4; this.CurrentMacroBlockInfo.MacroBlockType = Vp8MacroBlockType.I4X4;
} }
public int GetCostLuma16(Vp8ModeScore rd, Vp8EncProba proba) public int GetCostLuma16(Vp8ModeScore rd, Vp8EncProba proba, Vp8Residual res)
{ {
var res = new Vp8Residual();
int r = 0; int r = 0;
// re-import the non-zero context. // re-import the non-zero context.
@ -539,11 +550,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
return WebpLookupTables.Vp8FixedCostsI4[top, left]; return WebpLookupTables.Vp8FixedCostsI4[top, left];
} }
public int GetCostLuma4(short[] levels, Vp8EncProba proba) public int GetCostLuma4(Span<short> levels, Vp8EncProba proba, Vp8Residual res)
{ {
int x = this.I4 & 3; int x = this.I4 & 3;
int y = this.I4 >> 2; int y = this.I4 >> 2;
var res = new Vp8Residual();
int r = 0; int r = 0;
res.Init(0, 3, proba); res.Init(0, 3, proba);
@ -553,9 +563,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
return r; return r;
} }
public int GetCostUv(Vp8ModeScore rd, Vp8EncProba proba) public int GetCostUv(Vp8ModeScore rd, Vp8EncProba proba, Vp8Residual res)
{ {
var res = new Vp8Residual();
int r = 0; int r = 0;
// re-import the non-zero context. // re-import the non-zero context.
@ -741,7 +750,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
Vp8Encoding.EncPredChroma8(this.YuvP, left, top); Vp8Encoding.EncPredChroma8(this.YuvP, left, top);
} }
public void MakeIntra4Preds() => Vp8Encoding.EncPredLuma4(this.YuvP, this.I4Boundary, this.I4BoundaryIdx); public void MakeIntra4Preds() => Vp8Encoding.EncPredLuma4(this.YuvP, this.I4Boundary, this.I4BoundaryIdx, this.Scratch.AsSpan(0, 4));
public void SwapOut() public void SwapOut()
{ {

18
src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs

@ -70,6 +70,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
/// </summary> /// </summary>
private int uvAlpha; private int uvAlpha;
/// <summary>
/// Scratch buffer to reduce allocations.
/// </summary>
private readonly int[] scratch = new int[16];
private readonly byte[] averageBytesPerMb = { 50, 24, 16, 9, 7, 5, 3, 2 }; private readonly byte[] averageBytesPerMb = { 50, 24, 16, 9, 7, 5, 3, 2 };
private const int NumMbSegments = 4; private const int NumMbSegments = 4;
@ -323,18 +328,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
this.StatLoop(width, height, yStride, uvStride); this.StatLoop(width, height, yStride, uvStride);
it.Init(); it.Init();
it.InitFilter(); it.InitFilter();
var info = new Vp8ModeScore();
var residual = new Vp8Residual();
do do
{ {
bool dontUseSkip = !this.Proba.UseSkipProba; bool dontUseSkip = !this.Proba.UseSkipProba;
info.Clear();
var info = new Vp8ModeScore();
it.Import(y, u, v, yStride, uvStride, width, height, false); it.Import(y, u, v, yStride, uvStride, width, height, false);
// Warning! order is important: first call VP8Decimate() and // Warning! order is important: first call VP8Decimate() and
// *then* decide how to code the skip decision if there's one. // *then* decide how to code the skip decision if there's one.
if (!this.Decimate(it, ref info, this.rdOptLevel) || dontUseSkip) if (!this.Decimate(it, ref info, this.rdOptLevel) || dontUseSkip)
{ {
this.CodeResiduals(it, info); this.CodeResiduals(it, info, residual);
} }
else else
{ {
@ -449,9 +455,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
it.Init(); it.Init();
this.SetLoopParams(stats.Q); this.SetLoopParams(stats.Q);
var info = new Vp8ModeScore();
do do
{ {
var info = new Vp8ModeScore(); info.Clear();
it.Import(y, u, v, yStride, uvStride, width, height, false); it.Import(y, u, v, yStride, uvStride, width, height, false);
if (this.Decimate(it, ref info, rdOpt)) if (this.Decimate(it, ref info, rdOpt))
{ {
@ -932,10 +939,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
return isSkipped; return isSkipped;
} }
private void CodeResiduals(Vp8EncIterator it, Vp8ModeScore rd) private void CodeResiduals(Vp8EncIterator it, Vp8ModeScore rd, Vp8Residual residual)
{ {
int x, y, ch; int x, y, ch;
var residual = new Vp8Residual();
bool i16 = it.CurrentMacroBlockInfo.MacroBlockType == Vp8MacroBlockType.I16X16; bool i16 = it.CurrentMacroBlockInfo.MacroBlockType == Vp8MacroBlockType.I16X16;
int segment = it.CurrentMacroBlockInfo.Segment; int segment = it.CurrentMacroBlockInfo.Segment;

54
src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs

@ -68,22 +68,20 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
} }
} }
public static void ITransform(Span<byte> reference, Span<short> input, Span<byte> dst, bool doTwo) public static void ITransform(Span<byte> reference, Span<short> input, Span<byte> dst, bool doTwo, Span<int> scratch)
{ {
ITransformOne(reference, input, dst); ITransformOne(reference, input, dst, scratch);
if (doTwo) if (doTwo)
{ {
ITransformOne(reference.Slice(4), input.Slice(16), dst.Slice(4)); ITransformOne(reference.Slice(4), input.Slice(16), dst.Slice(4), scratch);
} }
} }
public static void ITransformOne(Span<byte> reference, Span<short> input, Span<byte> dst) public static void ITransformOne(Span<byte> reference, Span<short> input, Span<byte> dst, Span<int> scratch)
{ {
int i; int i;
#pragma warning disable SA1312 // Variable names should begin with lower-case letter Span<int> tmp = scratch.Slice(0, 16);
int[] C = new int[4 * 4]; tmp.Clear();
#pragma warning restore SA1312 // Variable names should begin with lower-case letter
Span<int> tmp = C.AsSpan();
for (i = 0; i < 4; i++) for (i = 0; i < 4; i++)
{ {
// vertical pass. // vertical pass.
@ -99,7 +97,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
input = input.Slice(1); input = input.Slice(1);
} }
tmp = C.AsSpan(); tmp = scratch;
for (i = 0; i < 4; i++) for (i = 0; i < 4; i++)
{ {
// horizontal pass. // horizontal pass.
@ -116,16 +114,18 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
} }
} }
public static void FTransform2(Span<byte> src, Span<byte> reference, Span<short> output, Span<short> output2) public static void FTransform2(Span<byte> src, Span<byte> reference, Span<short> output, Span<short> output2, Span<int> scratch)
{ {
FTransform(src, reference, output); FTransform(src, reference, output, scratch);
FTransform(src.Slice(4), reference.Slice(4), output2); FTransform(src.Slice(4), reference.Slice(4), output2, scratch);
} }
public static void FTransform(Span<byte> src, Span<byte> reference, Span<short> output) public static void FTransform(Span<byte> src, Span<byte> reference, Span<short> output, Span<int> scratch)
{ {
int i; int i;
int[] tmp = new int[16]; Span<int> tmp = scratch.Slice(0, 16);
tmp.Clear();
int srcIdx = 0; int srcIdx = 0;
int refIdx = 0; int refIdx = 0;
for (i = 0; i < 4; i++) for (i = 0; i < 4; i++)
@ -160,9 +160,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
} }
} }
public static void FTransformWht(Span<short> input, Span<short> output) public static void FTransformWht(Span<short> input, Span<short> output, Span<int> scratch)
{ {
int[] tmp = new int[16]; Span<int> tmp = scratch.Slice(0, 16);
tmp.Clear();
int i; int i;
int inputIdx = 0; int inputIdx = 0;
for (i = 0; i < 4; i++) for (i = 0; i < 4; i++)
@ -234,11 +236,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
// Left samples are top[-5 .. -2], top_left is top[-1], top are // Left samples are top[-5 .. -2], top_left is top[-1], top are
// located at top[0..3], and top right is top[4..7] // located at top[0..3], and top right is top[4..7]
public static void EncPredLuma4(Span<byte> dst, Span<byte> top, int topOffset) public static void EncPredLuma4(Span<byte> dst, Span<byte> top, int topOffset, Span<byte> vals)
{ {
Dc4(dst.Slice(I4DC4), top, topOffset); Dc4(dst.Slice(I4DC4), top, topOffset);
Tm4(dst.Slice(I4TM4), top, topOffset); Tm4(dst.Slice(I4TM4), top, topOffset);
Ve4(dst.Slice(I4VE4), top, topOffset); Ve4(dst.Slice(I4VE4), top, topOffset, vals);
He4(dst.Slice(I4HE4), top, topOffset); He4(dst.Slice(I4HE4), top, topOffset);
Rd4(dst.Slice(I4RD4), top, topOffset); Rd4(dst.Slice(I4RD4), top, topOffset);
Vr4(dst.Slice(I4VR4), top, topOffset); Vr4(dst.Slice(I4VR4), top, topOffset);
@ -395,20 +397,16 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
} }
} }
private static void Ve4(Span<byte> dst, Span<byte> top, int topOffset) private static void Ve4(Span<byte> dst, Span<byte> top, int topOffset, Span<byte> vals)
{ {
// vertical // vertical
byte[] vals = vals[0] = LossyUtils.Avg3(top[topOffset - 1], top[topOffset], top[topOffset + 1]);
{ vals[1] = LossyUtils.Avg3(top[topOffset], top[topOffset + 1], top[topOffset + 2]);
LossyUtils.Avg3(top[topOffset - 1], top[topOffset], top[topOffset + 1]), vals[2] = LossyUtils.Avg3(top[topOffset + 1], top[topOffset + 2], top[topOffset + 3]);
LossyUtils.Avg3(top[topOffset], top[topOffset + 1], top[topOffset + 2]), vals[3] = LossyUtils.Avg3(top[topOffset + 2], top[topOffset + 3], top[topOffset + 4]);
LossyUtils.Avg3(top[topOffset + 1], top[topOffset + 2], top[topOffset + 3]),
LossyUtils.Avg3(top[topOffset + 2], top[topOffset + 3], top[topOffset + 4])
};
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
{ {
vals.AsSpan().CopyTo(dst.Slice(i * WebpConstants.Bps)); vals.CopyTo(dst.Slice(i * WebpConstants.Bps));
} }
} }

23
src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs

@ -8,6 +8,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
{ {
internal class Vp8Histogram internal class Vp8Histogram
{ {
private readonly int[] scratch = new int[16];
private readonly short[] output = new short[16];
private readonly int[] distribution = new int[MaxCoeffThresh + 1];
/// <summary> /// <summary>
/// Size of histogram used by CollectHistogram. /// Size of histogram used by CollectHistogram.
/// </summary> /// </summary>
@ -40,23 +46,22 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
public void CollectHistogram(Span<byte> reference, Span<byte> pred, int startBlock, int endBlock) public void CollectHistogram(Span<byte> reference, Span<byte> pred, int startBlock, int endBlock)
{ {
int j; int j;
int[] distribution = new int[MaxCoeffThresh + 1]; this.distribution.AsSpan().Clear();
for (j = startBlock; j < endBlock; j++) for (j = startBlock; j < endBlock; j++)
{ {
short[] output = new short[16]; this.output.AsSpan().Clear();
this.Vp8FTransform(reference.Slice(WebpLookupTables.Vp8DspScan[j]), pred.Slice(WebpLookupTables.Vp8DspScan[j]), this.output);
this.Vp8FTransform(reference.Slice(WebpLookupTables.Vp8DspScan[j]), pred.Slice(WebpLookupTables.Vp8DspScan[j]), output);
// Convert coefficients to bin. // Convert coefficients to bin.
for (int k = 0; k < 16; ++k) for (int k = 0; k < 16; ++k)
{ {
int v = Math.Abs(output[k]) >> 3; int v = Math.Abs(this.output[k]) >> 3;
int clippedValue = ClipMax(v, MaxCoeffThresh); int clippedValue = ClipMax(v, MaxCoeffThresh);
++distribution[clippedValue]; ++this.distribution[clippedValue];
} }
} }
this.SetHistogramData(distribution); this.SetHistogramData(this.distribution);
} }
public void Merge(Vp8Histogram other) public void Merge(Vp8Histogram other)
@ -97,7 +102,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
private void Vp8FTransform(Span<byte> src, Span<byte> reference, Span<short> output) private void Vp8FTransform(Span<byte> src, Span<byte> reference, Span<short> output)
{ {
int i; int i;
int[] tmp = new int[16]; Span<int> tmp = this.scratch;
tmp.Clear();
for (i = 0; i < 4; i++) for (i = 0; i < 4; i++)
{ {
int d0 = src[0] - reference[0]; // 9bit dynamic range ([-255,255]) int d0 = src[0] - reference[0]; // 9bit dynamic range ([-255,255])

18
src/ImageSharp/Formats/Webp/Lossy/Vp8ModeScore.cs

@ -1,6 +1,8 @@
// Copyright (c) Six Labors. // Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0. // Licensed under the Apache License, Version 2.0.
using System;
namespace SixLabors.ImageSharp.Formats.Webp.Lossy namespace SixLabors.ImageSharp.Formats.Webp.Lossy
{ {
/// <summary> /// <summary>
@ -93,6 +95,22 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
/// </summary> /// </summary>
public int[,] Derr { get; } public int[,] Derr { get; }
public void Clear()
{
this.YDcLevels.AsSpan().Clear();
this.YAcLevels.AsSpan().Clear();
this.UvLevels.AsSpan().Clear();
this.ModesI4.AsSpan().Clear();
for (int i = 0; i < 2; i++)
{
for (int j = 0; j < 3; j++)
{
this.Derr[i, j] = 0;
}
}
}
public void InitScore() public void InitScore()
{ {
this.D = 0; this.D = 0;

7
src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs

@ -2,6 +2,7 @@
// Licensed under the Apache License, Version 2.0. // Licensed under the Apache License, Version 2.0.
using System; using System;
using System.Runtime.CompilerServices;
namespace SixLabors.ImageSharp.Formats.Webp.Lossy namespace SixLabors.ImageSharp.Formats.Webp.Lossy
{ {
@ -16,7 +17,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
public int CoeffType { get; set; } public int CoeffType { get; set; }
public short[] Coeffs { get; set; } public short[] Coeffs { get; } = new short[16];
public Vp8BandProbas[] Prob { get; set; } public Vp8BandProbas[] Prob { get; set; }
@ -31,6 +32,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
this.Prob = prob.Coeffs[this.CoeffType]; this.Prob = prob.Coeffs[this.CoeffType];
this.Stats = prob.Stats[this.CoeffType]; this.Stats = prob.Stats[this.CoeffType];
this.Costs = prob.RemappedCosts[this.CoeffType]; this.Costs = prob.RemappedCosts[this.CoeffType];
this.Coeffs.AsSpan().Clear();
} }
public void SetCoeffs(Span<short> coeffs) public void SetCoeffs(Span<short> coeffs)
@ -46,7 +48,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
} }
} }
this.Coeffs = coeffs.Slice(0, 16).ToArray(); coeffs.Slice(0, 16).CopyTo(this.Coeffs);
} }
// Simulate block coding, but only record statistics. // Simulate block coding, but only record statistics.
@ -150,6 +152,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
return cost; return cost;
} }
[MethodImpl(InliningOptions.ShortMethod)]
private static int LevelCost(Span<ushort> table, int level) private static int LevelCost(Span<ushort> table, int level)
=> WebpLookupTables.Vp8LevelFixedCosts[level] + table[level > WebpConstants.MaxVariableLevel ? WebpConstants.MaxVariableLevel : level]; => WebpLookupTables.Vp8LevelFixedCosts[level] + table[level > WebpConstants.MaxVariableLevel ? WebpConstants.MaxVariableLevel : level];

30
src/ImageSharp/Formats/Webp/Lossy/WebpLossyDecoder.cs

@ -34,6 +34,16 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
/// </summary> /// </summary>
private readonly Configuration configuration; private readonly Configuration configuration;
/// <summary>
/// Scratch buffer to reduce allocations.
/// </summary>
private readonly int[] scratch = new int[16];
/// <summary>
/// Another scratch buffer to reduce allocations.
/// </summary>
private readonly byte[] scratchBytes = new byte[4];
/// <summary> /// <summary>
/// Initializes a new instance of the <see cref="WebpLossyDecoder"/> class. /// Initializes a new instance of the <see cref="WebpLossyDecoder"/> class.
/// </summary> /// </summary>
@ -395,7 +405,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
LossyUtils.TM4(dst, yuv, offset); LossyUtils.TM4(dst, yuv, offset);
break; break;
case 2: case 2:
LossyUtils.VE4(dst, yuv, offset); LossyUtils.VE4(dst, yuv, offset, this.scratchBytes);
break; break;
case 3: case 3:
LossyUtils.HE4(dst, yuv, offset); LossyUtils.HE4(dst, yuv, offset);
@ -420,7 +430,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
break; break;
} }
this.DoTransform(bits, coeffs.AsSpan(n * 16), dst); this.DoTransform(bits, coeffs.AsSpan(n * 16), dst, this.scratch);
} }
} }
else else
@ -456,7 +466,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
{ {
for (int n = 0; n < 16; ++n, bits <<= 2) for (int n = 0; n < 16; ++n, bits <<= 2)
{ {
this.DoTransform(bits, coeffs.AsSpan(n * 16), yDst.Slice(WebpConstants.Scan[n])); this.DoTransform(bits, coeffs.AsSpan(n * 16), yDst.Slice(WebpConstants.Scan[n]), this.scratch);
} }
} }
} }
@ -496,8 +506,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
break; break;
} }
this.DoUVTransform(bitsUv, coeffs.AsSpan(16 * 16), uDst); this.DoUVTransform(bitsUv, coeffs.AsSpan(16 * 16), uDst, this.scratch);
this.DoUVTransform(bitsUv >> 8, coeffs.AsSpan(20 * 16), vDst); this.DoUVTransform(bitsUv >> 8, coeffs.AsSpan(20 * 16), vDst, this.scratch);
// Stash away top samples for next block. // Stash away top samples for next block.
if (mby < dec.MbHeight - 1) if (mby < dec.MbHeight - 1)
@ -787,12 +797,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
} }
} }
private void DoTransform(uint bits, Span<short> src, Span<byte> dst) private void DoTransform(uint bits, Span<short> src, Span<byte> dst, Span<int> scratch)
{ {
switch (bits >> 30) switch (bits >> 30)
{ {
case 3: case 3:
LossyUtils.TransformOne(src, dst); LossyUtils.TransformOne(src, dst, scratch);
break; break;
case 2: case 2:
LossyUtils.TransformAc3(src, dst); LossyUtils.TransformAc3(src, dst);
@ -803,7 +813,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
} }
} }
private void DoUVTransform(uint bits, Span<short> src, Span<byte> dst) private void DoUVTransform(uint bits, Span<short> src, Span<byte> dst, Span<int> scratch)
{ {
// any non-zero coeff at all? // any non-zero coeff at all?
if ((bits & 0xff) > 0) if ((bits & 0xff) > 0)
@ -811,7 +821,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
// any non-zero AC coefficient? // any non-zero AC coefficient?
if ((bits & 0xaa) > 0) if ((bits & 0xaa) > 0)
{ {
LossyUtils.TransformUv(src, dst); // note we don't use the AC3 variant for U/V. LossyUtils.TransformUv(src, dst, scratch); // note we don't use the AC3 variant for U/V.
} }
else else
{ {
@ -884,7 +894,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
if (nz > 1) if (nz > 1)
{ {
// More than just the DC -> perform the full transform. // More than just the DC -> perform the full transform.
LossyUtils.TransformWht(dc, dst); LossyUtils.TransformWht(dc, dst, this.scratch);
} }
else else
{ {

56
src/ImageSharp/Formats/Webp/WebpCommonUtils.cs

@ -16,6 +16,16 @@ namespace SixLabors.ImageSharp.Formats.Webp
/// </summary> /// </summary>
internal static class WebpCommonUtils internal static class WebpCommonUtils
{ {
#if SUPPORTS_RUNTIME_INTRINSICS
private static readonly Vector256<byte> AlphaMaskVector256 = Vector256.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255);
private static readonly Vector256<byte> All0x80Vector256 = Vector256.Create((byte)0x80).AsByte();
private static readonly Vector128<byte> AlphaMask = Vector128.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255);
private static readonly Vector128<byte> All0x80 = Vector128.Create((byte)0x80).AsByte();
#endif
/// <summary> /// <summary>
/// Checks if the pixel row is not opaque. /// Checks if the pixel row is not opaque.
/// </summary> /// </summary>
@ -27,11 +37,6 @@ namespace SixLabors.ImageSharp.Formats.Webp
if (Avx2.IsSupported) if (Avx2.IsSupported)
{ {
ReadOnlySpan<byte> rowBytes = MemoryMarshal.AsBytes(row); ReadOnlySpan<byte> rowBytes = MemoryMarshal.AsBytes(row);
var alphaMaskVector256 = Vector256.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255);
Vector256<byte> all0x80Vector256 = Vector256.Create((byte)0x80).AsByte();
var alphaMask = Vector128.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255);
Vector128<byte> all0x80 = Vector128.Create((byte)0x80).AsByte();
int i = 0; int i = 0;
int length = (row.Length * 4) - 3; int length = (row.Length * 4) - 3;
fixed (byte* src = rowBytes) fixed (byte* src = rowBytes)
@ -42,14 +47,14 @@ namespace SixLabors.ImageSharp.Formats.Webp
Vector256<byte> a1 = Avx.LoadVector256(src + i + 32).AsByte(); Vector256<byte> a1 = Avx.LoadVector256(src + i + 32).AsByte();
Vector256<byte> a2 = Avx.LoadVector256(src + i + 64).AsByte(); Vector256<byte> a2 = Avx.LoadVector256(src + i + 64).AsByte();
Vector256<byte> a3 = Avx.LoadVector256(src + i + 96).AsByte(); Vector256<byte> a3 = Avx.LoadVector256(src + i + 96).AsByte();
Vector256<int> b0 = Avx2.And(a0, alphaMaskVector256).AsInt32(); Vector256<int> b0 = Avx2.And(a0, AlphaMaskVector256).AsInt32();
Vector256<int> b1 = Avx2.And(a1, alphaMaskVector256).AsInt32(); Vector256<int> b1 = Avx2.And(a1, AlphaMaskVector256).AsInt32();
Vector256<int> b2 = Avx2.And(a2, alphaMaskVector256).AsInt32(); Vector256<int> b2 = Avx2.And(a2, AlphaMaskVector256).AsInt32();
Vector256<int> b3 = Avx2.And(a3, alphaMaskVector256).AsInt32(); Vector256<int> b3 = Avx2.And(a3, AlphaMaskVector256).AsInt32();
Vector256<short> c0 = Avx2.PackSignedSaturate(b0, b1).AsInt16(); Vector256<short> c0 = Avx2.PackSignedSaturate(b0, b1).AsInt16();
Vector256<short> c1 = Avx2.PackSignedSaturate(b2, b3).AsInt16(); Vector256<short> c1 = Avx2.PackSignedSaturate(b2, b3).AsInt16();
Vector256<byte> d = Avx2.PackSignedSaturate(c0, c1).AsByte(); Vector256<byte> d = Avx2.PackSignedSaturate(c0, c1).AsByte();
Vector256<byte> bits = Avx2.CompareEqual(d, all0x80Vector256); Vector256<byte> bits = Avx2.CompareEqual(d, All0x80Vector256);
int mask = Avx2.MoveMask(bits); int mask = Avx2.MoveMask(bits);
if (mask != -1) if (mask != -1)
{ {
@ -59,7 +64,7 @@ namespace SixLabors.ImageSharp.Formats.Webp
for (; i + 64 <= length; i += 64) for (; i + 64 <= length; i += 64)
{ {
if (IsNoneOpaque64Bytes(src, i, alphaMask, all0x80)) if (IsNoneOpaque64Bytes(src, i))
{ {
return true; return true;
} }
@ -67,7 +72,7 @@ namespace SixLabors.ImageSharp.Formats.Webp
for (; i + 32 <= length; i += 32) for (; i + 32 <= length; i += 32)
{ {
if (IsNoneOpaque32Bytes(src, i, alphaMask, all0x80)) if (IsNoneOpaque32Bytes(src, i))
{ {
return true; return true;
} }
@ -85,16 +90,13 @@ namespace SixLabors.ImageSharp.Formats.Webp
else if (Sse2.IsSupported) else if (Sse2.IsSupported)
{ {
ReadOnlySpan<byte> rowBytes = MemoryMarshal.AsBytes(row); ReadOnlySpan<byte> rowBytes = MemoryMarshal.AsBytes(row);
var alphaMask = Vector128.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255);
Vector128<byte> all0x80 = Vector128.Create((byte)0x80).AsByte();
int i = 0; int i = 0;
int length = (row.Length * 4) - 3; int length = (row.Length * 4) - 3;
fixed (byte* src = rowBytes) fixed (byte* src = rowBytes)
{ {
for (; i + 64 <= length; i += 64) for (; i + 64 <= length; i += 64)
{ {
if (IsNoneOpaque64Bytes(src, i, alphaMask, all0x80)) if (IsNoneOpaque64Bytes(src, i))
{ {
return true; return true;
} }
@ -102,7 +104,7 @@ namespace SixLabors.ImageSharp.Formats.Webp
for (; i + 32 <= length; i += 32) for (; i + 32 <= length; i += 32)
{ {
if (IsNoneOpaque32Bytes(src, i, alphaMask, all0x80)) if (IsNoneOpaque32Bytes(src, i))
{ {
return true; return true;
} }
@ -133,20 +135,20 @@ namespace SixLabors.ImageSharp.Formats.Webp
} }
#if SUPPORTS_RUNTIME_INTRINSICS #if SUPPORTS_RUNTIME_INTRINSICS
private static unsafe bool IsNoneOpaque64Bytes(byte* src, int i, Vector128<byte> alphaMask, Vector128<byte> all0x80) private static unsafe bool IsNoneOpaque64Bytes(byte* src, int i)
{ {
Vector128<byte> a0 = Sse2.LoadVector128(src + i).AsByte(); Vector128<byte> a0 = Sse2.LoadVector128(src + i).AsByte();
Vector128<byte> a1 = Sse2.LoadVector128(src + i + 16).AsByte(); Vector128<byte> a1 = Sse2.LoadVector128(src + i + 16).AsByte();
Vector128<byte> a2 = Sse2.LoadVector128(src + i + 32).AsByte(); Vector128<byte> a2 = Sse2.LoadVector128(src + i + 32).AsByte();
Vector128<byte> a3 = Sse2.LoadVector128(src + i + 48).AsByte(); Vector128<byte> a3 = Sse2.LoadVector128(src + i + 48).AsByte();
Vector128<int> b0 = Sse2.And(a0, alphaMask).AsInt32(); Vector128<int> b0 = Sse2.And(a0, AlphaMask).AsInt32();
Vector128<int> b1 = Sse2.And(a1, alphaMask).AsInt32(); Vector128<int> b1 = Sse2.And(a1, AlphaMask).AsInt32();
Vector128<int> b2 = Sse2.And(a2, alphaMask).AsInt32(); Vector128<int> b2 = Sse2.And(a2, AlphaMask).AsInt32();
Vector128<int> b3 = Sse2.And(a3, alphaMask).AsInt32(); Vector128<int> b3 = Sse2.And(a3, AlphaMask).AsInt32();
Vector128<short> c0 = Sse2.PackSignedSaturate(b0, b1).AsInt16(); Vector128<short> c0 = Sse2.PackSignedSaturate(b0, b1).AsInt16();
Vector128<short> c1 = Sse2.PackSignedSaturate(b2, b3).AsInt16(); Vector128<short> c1 = Sse2.PackSignedSaturate(b2, b3).AsInt16();
Vector128<byte> d = Sse2.PackSignedSaturate(c0, c1).AsByte(); Vector128<byte> d = Sse2.PackSignedSaturate(c0, c1).AsByte();
Vector128<byte> bits = Sse2.CompareEqual(d, all0x80); Vector128<byte> bits = Sse2.CompareEqual(d, All0x80);
int mask = Sse2.MoveMask(bits); int mask = Sse2.MoveMask(bits);
if (mask != 0xFFFF) if (mask != 0xFFFF)
{ {
@ -156,15 +158,15 @@ namespace SixLabors.ImageSharp.Formats.Webp
return false; return false;
} }
private static unsafe bool IsNoneOpaque32Bytes(byte* src, int i, Vector128<byte> alphaMask, Vector128<byte> all0x80) private static unsafe bool IsNoneOpaque32Bytes(byte* src, int i)
{ {
Vector128<byte> a0 = Sse2.LoadVector128(src + i).AsByte(); Vector128<byte> a0 = Sse2.LoadVector128(src + i).AsByte();
Vector128<byte> a1 = Sse2.LoadVector128(src + i + 16).AsByte(); Vector128<byte> a1 = Sse2.LoadVector128(src + i + 16).AsByte();
Vector128<int> b0 = Sse2.And(a0, alphaMask).AsInt32(); Vector128<int> b0 = Sse2.And(a0, AlphaMask).AsInt32();
Vector128<int> b1 = Sse2.And(a1, alphaMask).AsInt32(); Vector128<int> b1 = Sse2.And(a1, AlphaMask).AsInt32();
Vector128<short> c = Sse2.PackSignedSaturate(b0, b1).AsInt16(); Vector128<short> c = Sse2.PackSignedSaturate(b0, b1).AsInt16();
Vector128<byte> d = Sse2.PackSignedSaturate(c, c).AsByte(); Vector128<byte> d = Sse2.PackSignedSaturate(c, c).AsByte();
Vector128<byte> bits = Sse2.CompareEqual(d, all0x80); Vector128<byte> bits = Sse2.CompareEqual(d, All0x80);
int mask = Sse2.MoveMask(bits); int mask = Sse2.MoveMask(bits);
if (mask != 0xFFFF) if (mask != 0xFFFF)
{ {

267
src/ImageSharp/Formats/Webp/WebpLookupTables.cs

@ -2,21 +2,13 @@
// Licensed under the Apache License, Version 2.0. // Licensed under the Apache License, Version 2.0.
using System; using System;
using System.Collections.Generic; using System.Runtime.CompilerServices;
namespace SixLabors.ImageSharp.Formats.Webp namespace SixLabors.ImageSharp.Formats.Webp
{ {
#pragma warning disable SA1201 // Elements should appear in the correct order #pragma warning disable SA1201 // Elements should appear in the correct order
internal static class WebpLookupTables internal static class WebpLookupTables
{ {
public static readonly Dictionary<int, byte> Abs0;
public static readonly Dictionary<int, byte> Clip1;
public static readonly Dictionary<int, sbyte> Sclip1;
public static readonly Dictionary<int, sbyte> Sclip2;
public static readonly byte[,][] ModesProba = new byte[10, 10][]; public static readonly byte[,][] ModesProba = new byte[10, 10][];
public static readonly ushort[] GammaToLinearTab = new ushort[256]; public static readonly ushort[] GammaToLinearTab = new ushort[256];
@ -54,6 +46,18 @@ namespace SixLabors.ImageSharp.Formats.Webp
8 + (0 * WebpConstants.Bps), 12 + (0 * WebpConstants.Bps), 8 + (4 * WebpConstants.Bps), 12 + (4 * WebpConstants.Bps) // V 8 + (0 * WebpConstants.Bps), 12 + (0 * WebpConstants.Bps), 8 + (4 * WebpConstants.Bps), 12 + (4 * WebpConstants.Bps) // V
}; };
[MethodImpl(InliningOptions.ShortMethod)]
public static byte Abs0(int x) => Abs0Table[x + 255];
[MethodImpl(InliningOptions.ShortMethod)]
public static sbyte Sclip1(int x) => Sclip1Table[x + 1020];
[MethodImpl(InliningOptions.ShortMethod)]
public static sbyte Sclip2(int x) => Sclip2Table[x + 112];
[MethodImpl(InliningOptions.ShortMethod)]
public static byte Clip1(int x) => Clip1Table[x + 255];
// fixed costs for coding levels, deduce from the coding tree. // fixed costs for coding levels, deduce from the coding tree.
// This is only the part that doesn't depend on the probability state. // This is only the part that doesn't depend on the probability state.
public static readonly short[] Vp8LevelFixedCosts = public static readonly short[] Vp8LevelFixedCosts =
@ -249,7 +253,8 @@ namespace SixLabors.ImageSharp.Formats.Webp
0 0
}; };
public static readonly byte[] NewRange = // This uses C#'s compiler optimization to refer to assembly's static data directly.
public static ReadOnlySpan<byte> NewRange => new byte[]
{ {
// range = ((range + 1) << kVP8Log2Range[range]) - 1 // range = ((range + 1) << kVP8Log2Range[range]) - 1
127, 127, 191, 127, 159, 191, 223, 127, 143, 159, 175, 191, 207, 223, 239, 127, 127, 191, 127, 159, 191, 223, 127, 143, 159, 175, 191, 207, 223, 239,
@ -567,7 +572,8 @@ namespace SixLabors.ImageSharp.Formats.Webp
}; };
// Paragraph 14.1 // Paragraph 14.1
public static readonly byte[] DcTable = // This uses C#'s compiler optimization to refer to assembly's static data directly.
public static ReadOnlySpan<byte> DcTable => new byte[]
{ {
4, 5, 6, 7, 8, 9, 10, 10, 4, 5, 6, 7, 8, 9, 10, 10,
11, 12, 13, 14, 15, 16, 17, 17, 11, 12, 13, 14, 15, 16, 17, 17,
@ -1042,7 +1048,8 @@ namespace SixLabors.ImageSharp.Formats.Webp
(17, 7), (17, 7), (17, 7), (17, 7), (17, 7), (17, 7), (17, 7), (17, 7), (17, 7), (17, 7), (17, 7), (17, 7), (17, 7), (17, 7), (17, 7), (17, 7),
}; };
public static readonly byte[] PrefixEncodeExtraBitsValue = // This uses C#'s compiler optimization to refer to assembly's static data directly.
public static ReadOnlySpan<byte> PrefixEncodeExtraBitsValue => new byte[]
{ {
0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3,
0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7,
@ -1233,33 +1240,223 @@ namespace SixLabors.ImageSharp.Formats.Webp
LinearToGammaTab[v] = (int)((255.0d * Math.Pow(scale * v, 1.0d / WebpConstants.Gamma)) + .5); LinearToGammaTab[v] = (int)((255.0d * Math.Pow(scale * v, 1.0d / WebpConstants.Gamma)) + .5);
} }
Abs0 = new Dictionary<int, byte>(); InitializeModesProbabilities();
for (int i = -255; i <= 255; i++) InitializeFixedCostsI4();
{ }
Abs0[i] = (byte)((i < 0) ? -i : i);
}
Clip1 = new Dictionary<int, byte>(); // This uses C#'s compiler optimization to refer to assembly's static data directly.
for (int i = -255; i <= 255 + 255; i++) private static ReadOnlySpan<byte> Abs0Table => new byte[]
{ {
Clip1[i] = (byte)(i < 0 ? 0 : i > 255 ? 255 : i); 0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0xf4, 0xf3, 0xf2, 0xf1, 0xf0, 0xef,
} 0xee, 0xed, 0xec, 0xeb, 0xea, 0xe9, 0xe8, 0xe7, 0xe6, 0xe5, 0xe4, 0xe3, 0xe2, 0xe1, 0xe0, 0xdf, 0xde,
0xdd, 0xdc, 0xdb, 0xda, 0xd9, 0xd8, 0xd7, 0xd6, 0xd5, 0xd4, 0xd3, 0xd2, 0xd1, 0xd0, 0xcf, 0xce, 0xcd,
0xcc, 0xcb, 0xca, 0xc9, 0xc8, 0xc7, 0xc6, 0xc5, 0xc4, 0xc3, 0xc2, 0xc1, 0xc0, 0xbf, 0xbe, 0xbd, 0xbc,
0xbb, 0xba, 0xb9, 0xb8, 0xb7, 0xb6, 0xb5, 0xb4, 0xb3, 0xb2, 0xb1, 0xb0, 0xaf, 0xae, 0xad, 0xac, 0xab,
0xaa, 0xa9, 0xa8, 0xa7, 0xa6, 0xa5, 0xa4, 0xa3, 0xa2, 0xa1, 0xa0, 0x9f, 0x9e, 0x9d, 0x9c, 0x9b, 0x9a,
0x99, 0x98, 0x97, 0x96, 0x95, 0x94, 0x93, 0x92, 0x91, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8b, 0x8a, 0x89,
0x88, 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81, 0x80, 0x7f, 0x7e, 0x7d, 0x7c, 0x7b, 0x7a, 0x79, 0x78,
0x77, 0x76, 0x75, 0x74, 0x73, 0x72, 0x71, 0x70, 0x6f, 0x6e, 0x6d, 0x6c, 0x6b, 0x6a, 0x69, 0x68, 0x67,
0x66, 0x65, 0x64, 0x63, 0x62, 0x61, 0x60, 0x5f, 0x5e, 0x5d, 0x5c, 0x5b, 0x5a, 0x59, 0x58, 0x57, 0x56,
0x55, 0x54, 0x53, 0x52, 0x51, 0x50, 0x4f, 0x4e, 0x4d, 0x4c, 0x4b, 0x4a, 0x49, 0x48, 0x47, 0x46, 0x45,
0x44, 0x43, 0x42, 0x41, 0x40, 0x3f, 0x3e, 0x3d, 0x3c, 0x3b, 0x3a, 0x39, 0x38, 0x37, 0x36, 0x35, 0x34,
0x33, 0x32, 0x31, 0x30, 0x2f, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28, 0x27, 0x26, 0x25, 0x24, 0x23,
0x22, 0x21, 0x20, 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12,
0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21,
0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32,
0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43,
0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54,
0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65,
0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76,
0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9,
0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb,
0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc,
0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed,
0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe,
0xff
};
Sclip1 = new Dictionary<int, sbyte>(); // This uses C#'s compiler optimization to refer to assembly's static data directly.
for (int i = -1020; i <= 1020; i++) private static ReadOnlySpan<byte> Clip1Table => new byte[]
{ {
Sclip1[i] = (sbyte)(i < -128 ? -128 : i > 127 ? 127 : i); 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
} 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21,
0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32,
0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43,
0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54,
0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65,
0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76,
0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9,
0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb,
0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc,
0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed,
0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff
};
Sclip2 = new Dictionary<int, sbyte>(); // This uses C#'s compiler optimization to refer to assembly's static data directly.
for (int i = -112; i <= 112; i++) private static ReadOnlySpan<sbyte> Sclip1Table => new sbyte[]
{ {
Sclip2[i] = (sbyte)(i < -16 ? -16 : i > 15 ? 15 : i); -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
} -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -127, -126, -125, -124, -123, -122, -121, -120,
-119, -118, -117, -116, -115, -114, -113, -112, -111, -110, -109, -108, -107, -106, -105, -104, -103,
-102, -101, -100, -99, -98, -97, -96, -95, -94, -93, -92, -91, -90, -89, -88, -87, -86, -85, -84, -83,
-82, -81, -80, -79, -78, -77, -76, -75, -74, -73, -72, -71, -70, -69, -68, -67, -66, -65, -64, -63, -62,
-61, -60, -59, -58, -57, -56, -55, -54, -53, -52, -51, -50, -49, -48, -47, -46, -45, -44, -43, -42, -41,
-40, -39, -38, -37, -36, -35, -34, -33, -32, -31, -30, -29, -28, -27, -26, -25, -24, -23, -22, -21, -20,
-19, -18, -17, -16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5,
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108,
109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127
};
InitializeModesProbabilities(); // This uses C#'s compiler optimization to refer to assembly's static data directly.
InitializeFixedCostsI4(); private static ReadOnlySpan<sbyte> Sclip2Table => new sbyte[]
} {
-16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
-16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
-16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
-16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
-16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -15, -14, -13, -12, -11, -10, -9, -8,
-7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15
};
private static void InitializeModesProbabilities() private static void InitializeModesProbabilities()
{ {

30
tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs

@ -132,6 +132,30 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
Assert.Equal(expectedOutput, pixelData); Assert.Equal(expectedOutput, pixelData);
} }
private static void RunPredictor11Test()
{
// arrange
uint[] topData = { 4278258949, 4278258949 };
uint left = 4294839812;
short[] scratch = new short[8];
uint expectedResult = 4294839812;
// act
unsafe
{
fixed (uint* top = &topData[1])
{
uint actual = LosslessUtils.Predictor11(left, top, scratch);
// assert
Assert.Equal(expectedResult, actual);
}
}
}
[Fact]
public void Predictor11_Works() => RunPredictor11Test();
[Fact] [Fact]
public void SubtractGreen_Works() => RunSubtractGreenTest(); public void SubtractGreen_Works() => RunSubtractGreenTest();
@ -145,6 +169,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
public void TransformColorInverse_Works() => RunTransformColorInverseTest(); public void TransformColorInverse_Works() => RunTransformColorInverseTest();
#if SUPPORTS_RUNTIME_INTRINSICS #if SUPPORTS_RUNTIME_INTRINSICS
[Fact]
public void Predictor11_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor11Test, HwIntrinsics.AllowAll);
[Fact]
public void Predictor11_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor11Test, HwIntrinsics.DisableSSE2);
[Fact] [Fact]
public void SubtractGreen_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunSubtractGreenTest, HwIntrinsics.AllowAll); public void SubtractGreen_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunSubtractGreenTest, HwIntrinsics.AllowAll);

6
tests/ImageSharp.Tests/Formats/WebP/PredictorEncoderTests.cs

@ -90,9 +90,10 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
int transformWidth = LosslessUtils.SubSampleSize(image.Width, colorTransformBits); int transformWidth = LosslessUtils.SubSampleSize(image.Width, colorTransformBits);
int transformHeight = LosslessUtils.SubSampleSize(image.Height, colorTransformBits); int transformHeight = LosslessUtils.SubSampleSize(image.Height, colorTransformBits);
uint[] transformData = new uint[transformWidth * transformHeight]; uint[] transformData = new uint[transformWidth * transformHeight];
int[] scratch = new int[256];
// act // act
PredictorEncoder.ColorSpaceTransform(image.Width, image.Height, colorTransformBits, 75, bgra, transformData); PredictorEncoder.ColorSpaceTransform(image.Width, image.Height, colorTransformBits, 75, bgra, transformData, scratch);
// assert // assert
Assert.Equal(expectedData, transformData); Assert.Equal(expectedData, transformData);
@ -119,9 +120,10 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
int transformWidth = LosslessUtils.SubSampleSize(image.Width, colorTransformBits); int transformWidth = LosslessUtils.SubSampleSize(image.Width, colorTransformBits);
int transformHeight = LosslessUtils.SubSampleSize(image.Height, colorTransformBits); int transformHeight = LosslessUtils.SubSampleSize(image.Height, colorTransformBits);
uint[] transformData = new uint[transformWidth * transformHeight]; uint[] transformData = new uint[transformWidth * transformHeight];
int[] scratch = new int[256];
// act // act
PredictorEncoder.ColorSpaceTransform(image.Width, image.Height, colorTransformBits, 75, bgra, transformData); PredictorEncoder.ColorSpaceTransform(image.Width, image.Height, colorTransformBits, 75, bgra, transformData, scratch);
// assert // assert
Assert.Equal(expectedData, transformData); Assert.Equal(expectedData, transformData);

Loading…
Cancel
Save