diff --git a/src/ImageSharp/Formats/Webp/BitReader/Vp8BitReader.cs b/src/ImageSharp/Formats/Webp/BitReader/Vp8BitReader.cs
index abf44127a9..d6ceca5bf5 100644
--- a/src/ImageSharp/Formats/Webp/BitReader/Vp8BitReader.cs
+++ b/src/ImageSharp/Formats/Webp/BitReader/Vp8BitReader.cs
@@ -142,10 +142,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitReader
[MethodImpl(InliningOptions.ShortMethod)]
public bool ReadBool() => this.ReadValue(1) is 1;
+ [MethodImpl(InliningOptions.ShortMethod)]
public uint ReadValue(int nBits)
{
- Guard.MustBeGreaterThan(nBits, 0, nameof(nBits));
- Guard.MustBeLessThanOrEqualTo(nBits, 32, nameof(nBits));
+ DebugGuard.MustBeGreaterThan(nBits, 0, nameof(nBits));
+ DebugGuard.MustBeLessThanOrEqualTo(nBits, 32, nameof(nBits));
uint v = 0;
while (nBits-- > 0)
@@ -156,10 +157,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitReader
return v;
}
+ [MethodImpl(InliningOptions.ShortMethod)]
public int ReadSignedValue(int nBits)
{
- Guard.MustBeGreaterThan(nBits, 0, nameof(nBits));
- Guard.MustBeLessThanOrEqualTo(nBits, 32, nameof(nBits));
+ DebugGuard.MustBeGreaterThan(nBits, 0, nameof(nBits));
+ DebugGuard.MustBeLessThanOrEqualTo(nBits, 32, nameof(nBits));
int value = (int)this.ReadValue(nBits);
return this.ReadValue(1) != 0 ? -value : value;
diff --git a/src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs b/src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs
index 601336fa4b..4df2feba81 100644
--- a/src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs
+++ b/src/ImageSharp/Formats/Webp/BitReader/Vp8LBitReader.cs
@@ -28,7 +28,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitReader
///
private const int Wbits = 32;
- private readonly uint[] bitMask =
+ private static readonly uint[] BitMask =
{
0,
0x000001, 0x000003, 0x000007, 0x00000f,
@@ -125,19 +125,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitReader
///
/// The number of bits to read (should not exceed 16).
/// A ushort value.
+ [MethodImpl(InliningOptions.ShortMethod)]
public uint ReadValue(int nBits)
{
- Guard.MustBeGreaterThan(nBits, 0, nameof(nBits));
+ DebugGuard.MustBeGreaterThan(nBits, 0, nameof(nBits));
if (!this.Eos && nBits <= Vp8LMaxNumBitRead)
{
- ulong val = this.PrefetchBits() & this.bitMask[nBits];
+ ulong val = this.PrefetchBits() & BitMask[nBits];
this.bitPos += nBits;
this.ShiftBytes();
return (uint)val;
}
- this.SetEndOfStream();
return 0;
}
@@ -169,6 +169,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitReader
///
/// Advances the read buffer by 4 bytes to make room for reading next 32 bits.
///
+ [MethodImpl(InliningOptions.ShortMethod)]
public void FillBitWindow()
{
if (this.bitPos >= Wbits)
@@ -181,7 +182,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitReader
/// Returns true if there was an attempt at reading bit past the end of the buffer.
///
/// True, if end of buffer was reached.
- public bool IsEndOfStream() => this.Eos || ((this.pos == this.len) && (this.bitPos > Lbits));
+ [MethodImpl(InliningOptions.ShortMethod)]
+ public bool IsEndOfStream() => this.Eos || (this.pos == this.len && this.bitPos > Lbits);
[MethodImpl(InliningOptions.ShortMethod)]
private void DoFillBitWindow() => this.ShiftBytes();
@@ -189,6 +191,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitReader
///
/// If not at EOS, reload up to Vp8LLbits byte-by-byte.
///
+ [MethodImpl(InliningOptions.ShortMethod)]
private void ShiftBytes()
{
System.Span dataSpan = this.Data.Memory.Span;
@@ -199,17 +202,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.BitReader
++this.pos;
this.bitPos -= 8;
}
-
- if (this.IsEndOfStream())
- {
- this.SetEndOfStream();
- }
- }
-
- private void SetEndOfStream()
- {
- this.Eos = true;
- this.bitPos = 0; // To avoid undefined behaviour with shifts.
}
}
}
diff --git a/src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs
index 70c4efb990..dc546f8ac2 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs
@@ -49,6 +49,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
double bitCostBest = -1;
int cacheBitsInitial = cacheBits;
Vp8LHashChain hashChainBox = null;
+ var stats = new Vp8LStreaks();
+ var bitsEntropy = new Vp8LBitEntropy();
for (int lz77Type = 1; lz77TypesToTry > 0; lz77TypesToTry &= ~lz77Type, lz77Type <<= 1)
{
int cacheBitsTmp = cacheBitsInitial;
@@ -81,7 +83,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
// Keep the best backward references.
var histo = new Vp8LHistogram(worst, cacheBitsTmp);
- double bitCost = histo.EstimateBits();
+ double bitCost = histo.EstimateBits(stats, bitsEntropy);
if (lz77TypeBest == 0 || bitCost < bitCostBest)
{
@@ -100,7 +102,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
Vp8LHashChain hashChainTmp = lz77TypeBest == (int)Vp8LLz77Type.Lz77Standard ? hashChain : hashChainBox;
BackwardReferencesTraceBackwards(width, height, bgra, cacheBits, hashChainTmp, best, worst);
var histo = new Vp8LHistogram(worst, cacheBits);
- double bitCostTrace = histo.EstimateBits();
+ double bitCostTrace = histo.EstimateBits(stats, bitsEntropy);
if (bitCostTrace < bitCostBest)
{
best = worst;
@@ -214,9 +216,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
}
+ var stats = new Vp8LStreaks();
+ var bitsEntropy = new Vp8LBitEntropy();
for (int i = 0; i <= cacheBitsMax; i++)
{
- double entropy = histos[i].EstimateBits();
+ double entropy = histos[i].EstimateBits(stats, bitsEntropy);
if (i == 0 || entropy < entropyMin)
{
entropyMin = entropy;
diff --git a/src/ImageSharp/Formats/Webp/Lossless/HistogramEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/HistogramEncoder.cs
index f2d4fb189f..5d407d73c1 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/HistogramEncoder.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/HistogramEncoder.cs
@@ -152,10 +152,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private static int HistogramCopyAndAnalyze(List origHistograms, List histograms, ushort[] histogramSymbols)
{
+ var stats = new Vp8LStreaks();
+ var bitsEntropy = new Vp8LBitEntropy();
for (int clusterId = 0, i = 0; i < origHistograms.Count; i++)
{
Vp8LHistogram origHistogram = origHistograms[i];
- origHistogram.UpdateHistogramCost();
+ origHistogram.UpdateHistogramCost(stats, bitsEntropy);
// Skip the histogram if it is completely empty, which can happen for tiles with no information (when they are skipped because of LZ77).
if (!origHistogram.IsUsed[0] && !origHistogram.IsUsed[1] && !origHistogram.IsUsed[2] && !origHistogram.IsUsed[3] && !origHistogram.IsUsed[4])
@@ -175,7 +177,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return numUsed;
}
- private static void HistogramCombineEntropyBin(List histograms, ushort[] clusters, ushort[] clusterMappings, Vp8LHistogram curCombo, ushort[] binMap, int numBins, double combineCostFactor)
+ private static void HistogramCombineEntropyBin(
+ List histograms,
+ ushort[] clusters,
+ ushort[] clusterMappings,
+ Vp8LHistogram curCombo,
+ ushort[] binMap,
+ int numBins,
+ double combineCostFactor)
{
var binInfo = new HistogramBinInfo[BinSize];
for (int idx = 0; idx < numBins; idx++)
@@ -191,6 +200,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
var indicesToRemove = new List();
+ var stats = new Vp8LStreaks();
+ var bitsEntropy = new Vp8LBitEntropy();
for (int idx = 0; idx < histograms.Count; idx++)
{
if (histograms[idx] == null)
@@ -209,7 +220,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
// Try to merge #idx into #first (both share the same binId)
double bitCost = histograms[idx].BitCost;
double bitCostThresh = -bitCost * combineCostFactor;
- double currCostDiff = histograms[first].AddEval(histograms[idx], bitCostThresh, curCombo);
+ double currCostDiff = histograms[first].AddEval(histograms[idx], stats, bitsEntropy, bitCostThresh, curCombo);
if (currCostDiff < bitCostThresh)
{
@@ -308,6 +319,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int numUsed = histograms.Count(h => h != null);
int outerIters = numUsed;
int numTriesNoSuccess = outerIters / 2;
+ var stats = new Vp8LStreaks();
+ var bitsEntropy = new Vp8LBitEntropy();
if (numUsed < minClusterSize)
{
@@ -354,7 +367,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
idx2 = mappings[idx2];
// Calculate cost reduction on combination.
- double currCost = HistoPriorityListPush(histoPriorityList, maxSize, histograms, idx1, idx2, bestCost);
+ double currCost = HistoPriorityListPush(histoPriorityList, maxSize, histograms, idx1, idx2, bestCost, stats, bitsEntropy);
// Found a better pair?
if (currCost < 0)
@@ -428,7 +441,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
if (doEval)
{
// Re-evaluate the cost of an updated pair.
- HistoListUpdatePair(histograms[p.Idx1], histograms[p.Idx2], 0.0d, p);
+ HistoListUpdatePair(histograms[p.Idx1], histograms[p.Idx2], stats, bitsEntropy, 0.0d, p);
if (p.CostDiff >= 0.0d)
{
histoPriorityList[j] = histoPriorityList[histoPriorityList.Count - 1];
@@ -456,6 +469,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
// Priority list of histogram pairs.
var histoPriorityList = new List();
int maxSize = histoSize * histoSize;
+ var stats = new Vp8LStreaks();
+ var bitsEntropy = new Vp8LBitEntropy();
for (int i = 0; i < histoSize; i++)
{
@@ -471,7 +486,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
continue;
}
- HistoPriorityListPush(histoPriorityList, maxSize, histograms, i, j, 0.0d);
+ HistoPriorityListPush(histoPriorityList, maxSize, histograms, i, j, 0.0d, stats, bitsEntropy);
}
}
@@ -510,7 +525,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
continue;
}
- HistoPriorityListPush(histoPriorityList, maxSize, histograms, idx1, i, 0.0d);
+ HistoPriorityListPush(histoPriorityList, maxSize, histograms, idx1, i, 0.0d, stats, bitsEntropy);
}
}
}
@@ -519,6 +534,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
int inSize = input.Count;
int outSize = output.Count;
+ var stats = new Vp8LStreaks();
+ var bitsEntropy = new Vp8LBitEntropy();
if (outSize > 1)
{
for (int i = 0; i < inSize; i++)
@@ -534,7 +551,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
double bestBits = double.MaxValue;
for (int k = 0; k < outSize; k++)
{
- double curBits = output[k].AddThresh(input[i], bestBits);
+ double curBits = output[k].AddThresh(input[i], stats, bitsEntropy, bestBits);
if (k == 0 || curBits < bestBits)
{
bestBits = curBits;
@@ -577,7 +594,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// Create a pair from indices "idx1" and "idx2" provided its cost is inferior to "threshold", a negative entropy.
///
/// The cost of the pair, or 0 if it superior to threshold.
- private static double HistoPriorityListPush(List histoList, int maxSize, List histograms, int idx1, int idx2, double threshold)
+ private static double HistoPriorityListPush(List histoList, int maxSize, List histograms, int idx1, int idx2, double threshold, Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy)
{
var pair = new HistogramPair();
@@ -598,7 +615,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
Vp8LHistogram h1 = histograms[idx1];
Vp8LHistogram h2 = histograms[idx2];
- HistoListUpdatePair(h1, h2, threshold, pair);
+ HistoListUpdatePair(h1, h2, stats, bitsEntropy, threshold, pair);
// Do not even consider the pair if it does not improve the entropy.
if (pair.CostDiff >= threshold)
@@ -616,11 +633,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
///
/// Update the cost diff and combo of a pair of histograms. This needs to be called when the the histograms have been merged with a third one.
///
- private static void HistoListUpdatePair(Vp8LHistogram h1, Vp8LHistogram h2, double threshold, HistogramPair pair)
+ private static void HistoListUpdatePair(Vp8LHistogram h1, Vp8LHistogram h2, Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy, double threshold, HistogramPair pair)
{
double sumCost = h1.BitCost + h2.BitCost;
pair.CostCombo = 0.0d;
- h1.GetCombinedHistogramEntropy(h2, sumCost + threshold, costInitial: pair.CostCombo, out double cost);
+ h1.GetCombinedHistogramEntropy(h2, stats, bitsEntropy, sumCost + threshold, costInitial: pair.CostCombo, out double cost);
pair.CostCombo = cost;
pair.CostDiff = pair.CostCombo - sumCost;
}
diff --git a/src/ImageSharp/Formats/Webp/Lossless/HuffmanTree.cs b/src/ImageSharp/Formats/Webp/Lossless/HuffmanTree.cs
index cd8be9aac3..0376311ed9 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/HuffmanTree.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/HuffmanTree.cs
@@ -49,14 +49,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
return -1;
}
- else if (t1.TotalCount < t2.TotalCount)
+
+ if (t1.TotalCount < t2.TotalCount)
{
return 1;
}
- else
- {
- return t1.Value < t2.Value ? -1 : 1;
- }
+
+ return t1.Value < t2.Value ? -1 : 1;
}
public IDeepCloneable DeepClone() => new HuffmanTree(this);
diff --git a/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs
index f2321d6813..3c81f1a22c 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs
@@ -202,9 +202,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
// Build the Huffman tree.
- HuffmanTree[] treeCopy = tree.AsSpan().Slice(0, treeSize).ToArray();
+#if NET5_0_OR_GREATER
+ Span treeSlice = tree.AsSpan(0, treeSize);
+ treeSlice.Sort(HuffmanTree.Compare);
+#else
+ HuffmanTree[] treeCopy = tree.AsSpan(0, treeSize).ToArray();
Array.Sort(treeCopy, HuffmanTree.Compare);
treeCopy.AsSpan().CopyTo(tree);
+#endif
if (treeSize > 1)
{
diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs
index b7f94415be..8231464070 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs
@@ -27,6 +27,30 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private const double Log2Reciprocal = 1.44269504088896338700465094007086;
+#if SUPPORTS_RUNTIME_INTRINSICS
+ private static readonly Vector256 AddGreenToBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255);
+
+ private static readonly Vector128 AddGreenToBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255);
+
+ private static readonly byte AddGreenToBlueAndRedShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
+
+ private static readonly Vector256 SubtractGreenFromBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255);
+
+ private static readonly Vector128 SubtractGreenFromBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255);
+
+ private static readonly byte SubtractGreenFromBlueAndRedShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
+
+ private static readonly Vector128 TransformColorAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
+
+ private static readonly Vector128 TransformColorRedBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
+
+ private static readonly byte TransformColorShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
+
+ private static readonly Vector128 TransformColorInverseAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
+
+ private static readonly byte TransformColorInverseShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
+#endif
+
///
/// Returns the exact index where array1 and array2 are different. For an index
/// inferior or equal to bestLenMatch, the return value just has to be strictly
@@ -97,7 +121,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
{
- var mask = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255);
int numPixels = pixelData.Length;
fixed (uint* p = pixelData)
{
@@ -106,7 +129,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
uint* idx = p + i;
Vector256 input = Avx.LoadVector256((ushort*)idx).AsByte();
- Vector256 in0g0g = Avx2.Shuffle(input, mask);
+ Vector256 in0g0g = Avx2.Shuffle(input, AddGreenToBlueAndRedMaskAvx2);
Vector256 output = Avx2.Add(input, in0g0g);
Avx.Store((byte*)idx, output);
}
@@ -119,7 +142,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
else if (Ssse3.IsSupported)
{
- var mask = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255);
int numPixels = pixelData.Length;
fixed (uint* p = pixelData)
{
@@ -128,7 +150,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
uint* idx = p + i;
Vector128 input = Sse2.LoadVector128((ushort*)idx).AsByte();
- Vector128 in0g0g = Ssse3.Shuffle(input, mask);
+ Vector128 in0g0g = Ssse3.Shuffle(input, AddGreenToBlueAndRedMaskSsse3);
Vector128 output = Sse2.Add(input, in0g0g);
Sse2.Store((byte*)idx, output.AsByte());
}
@@ -141,7 +163,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
else if (Sse2.IsSupported)
{
- byte mask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
int numPixels = pixelData.Length;
fixed (uint* p = pixelData)
{
@@ -151,8 +172,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
uint* idx = p + i;
Vector128 input = Sse2.LoadVector128((ushort*)idx);
Vector128 a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g
- Vector128 b = Sse2.ShuffleLow(a, mask);
- Vector128 c = Sse2.ShuffleHigh(b, mask); // 0g0g
+ Vector128 b = Sse2.ShuffleLow(a, AddGreenToBlueAndRedShuffleMask);
+ Vector128 c = Sse2.ShuffleHigh(b, AddGreenToBlueAndRedShuffleMask); // 0g0g
Vector128 output = Sse2.Add(input.AsByte(), c.AsByte());
Sse2.Store((byte*)idx, output);
}
@@ -189,7 +210,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
{
- var mask = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255);
int numPixels = pixelData.Length;
fixed (uint* p = pixelData)
{
@@ -198,7 +218,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
uint* idx = p + i;
Vector256 input = Avx.LoadVector256((ushort*)idx).AsByte();
- Vector256 in0g0g = Avx2.Shuffle(input, mask);
+ Vector256 in0g0g = Avx2.Shuffle(input, SubtractGreenFromBlueAndRedMaskAvx2);
Vector256 output = Avx2.Subtract(input, in0g0g);
Avx.Store((byte*)idx, output);
}
@@ -211,7 +231,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
else if (Ssse3.IsSupported)
{
- var mask = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255);
int numPixels = pixelData.Length;
fixed (uint* p = pixelData)
{
@@ -220,7 +239,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
uint* idx = p + i;
Vector128 input = Sse2.LoadVector128((ushort*)idx).AsByte();
- Vector128 in0g0g = Ssse3.Shuffle(input, mask);
+ Vector128 in0g0g = Ssse3.Shuffle(input, SubtractGreenFromBlueAndRedMaskSsse3);
Vector128 output = Sse2.Subtract(input, in0g0g);
Sse2.Store((byte*)idx, output.AsByte());
}
@@ -233,7 +252,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
else if (Sse2.IsSupported)
{
- byte mask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
int numPixels = pixelData.Length;
fixed (uint* p = pixelData)
{
@@ -243,8 +261,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
uint* idx = p + i;
Vector128 input = Sse2.LoadVector128((ushort*)idx);
Vector128 a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g
- Vector128 b = Sse2.ShuffleLow(a, mask);
- Vector128 c = Sse2.ShuffleHigh(b, mask); // 0g0g
+ Vector128 b = Sse2.ShuffleLow(a, SubtractGreenFromBlueAndRedShuffleMask);
+ Vector128 c = Sse2.ShuffleHigh(b, SubtractGreenFromBlueAndRedShuffleMask); // 0g0g
Vector128 output = Sse2.Subtract(input.AsByte(), c.AsByte());
Sse2.Store((byte*)idx, output);
}
@@ -394,9 +412,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
Vector128 multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue));
Vector128 multsb2 = MkCst16(Cst5b(m.RedToBlue), 0);
- var maskalphagreen = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
- var maskredblue = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
- byte shufflemask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
fixed (uint* src = data)
{
int idx;
@@ -404,15 +419,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
uint* pos = src + idx;
Vector128 input = Sse2.LoadVector128(pos);
- Vector128 a = Sse2.And(input.AsByte(), maskalphagreen);
- Vector128 b = Sse2.ShuffleLow(a.AsInt16(), shufflemask);
- Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), shufflemask);
+ Vector128 a = Sse2.And(input.AsByte(), TransformColorAlphaGreenMask);
+ Vector128 b = Sse2.ShuffleLow(a.AsInt16(), TransformColorShuffleMask);
+ Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), TransformColorShuffleMask);
Vector128 d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
Vector128 e = Sse2.ShiftLeftLogical(input.AsInt16(), 8);
Vector128 f = Sse2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16());
Vector128 g = Sse2.ShiftRightLogical(f.AsInt32(), 16);
Vector128 h = Sse2.Add(g.AsByte(), d.AsByte());
- Vector128 i = Sse2.And(h, maskredblue);
+ Vector128 i = Sse2.And(h, TransformColorRedBlueMask);
Vector128 output = Sse2.Subtract(input.AsByte(), i);
Sse2.Store((byte*)pos, output);
}
@@ -460,8 +475,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
Vector128 multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue));
Vector128 multsb2 = MkCst16(Cst5b(m.RedToBlue), 0);
- var maskalphagreen = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
- byte shufflemask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
fixed (uint* src = pixelData)
{
int idx;
@@ -469,9 +482,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
uint* pos = src + idx;
Vector128 input = Sse2.LoadVector128(pos);
- Vector128 a = Sse2.And(input.AsByte(), maskalphagreen);
- Vector128 b = Sse2.ShuffleLow(a.AsInt16(), shufflemask);
- Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), shufflemask);
+ Vector128 a = Sse2.And(input.AsByte(), TransformColorInverseAlphaGreenMask);
+ Vector128 b = Sse2.ShuffleLow(a.AsInt16(), TransformColorInverseShuffleMask);
+ Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), TransformColorInverseShuffleMask);
Vector128 d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
Vector128 e = Sse2.Add(input.AsByte(), d.AsByte());
Vector128 f = Sse2.ShiftLeftLogical(e.AsInt16(), 8);
@@ -551,6 +564,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int mask = tileWidth - 1;
int tilesPerRow = SubSampleSize(width, transform.Bits);
int predictorModeIdxBase = (y >> transform.Bits) * tilesPerRow;
+ Span scratch = stackalloc short[8];
while (y < yEnd)
{
int predictorModeIdx = predictorModeIdxBase;
@@ -608,7 +622,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
PredictorAdd10(input + x, output + x - width, xEnd - x, output + x);
break;
case 11:
- PredictorAdd11(input + x, output + x - width, xEnd - x, output + x);
+ PredictorAdd11(input + x, output + x - width, xEnd - x, output + x, scratch);
break;
case 12:
PredictorAdd12(input + x, output + x - width, xEnd - x, output + x);
@@ -704,7 +718,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// Compute the combined Shanon's entropy for distribution {X} and {X+Y}.
///
/// Shanon entropy.
- public static float CombinedShannonEntropy(int[] x, int[] y)
+ public static float CombinedShannonEntropy(Span x, Span y)
{
double retVal = 0.0d;
uint sumX = 0, sumXY = 0;
@@ -974,11 +988,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
[MethodImpl(InliningOptions.ShortMethod)]
- private static void PredictorAdd11(uint* input, uint* upper, int numberOfPixels, uint* output)
+ private static void PredictorAdd11(uint* input, uint* upper, int numberOfPixels, uint* output, Span scratch)
{
for (int x = 0; x < numberOfPixels; x++)
{
- uint pred = Predictor11(output[x - 1], upper + x);
+ uint pred = Predictor11(output[x - 1], upper + x, scratch);
output[x] = AddPixels(input[x], pred);
}
}
@@ -1031,7 +1045,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
public static uint Predictor10(uint left, uint* top) => Average4(left, top[-1], top[0], top[1]);
[MethodImpl(InliningOptions.ShortMethod)]
- public static uint Predictor11(uint left, uint* top) => Select(top[0], left, top[-1]);
+ public static uint Predictor11(uint left, uint* top, Span scratch) => Select(top[0], left, top[-1], scratch);
[MethodImpl(InliningOptions.ShortMethod)]
public static uint Predictor12(uint left, uint* top) => ClampedAddSubtractFull(left, top[0], top[-1]);
@@ -1148,11 +1162,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
[MethodImpl(InliningOptions.ShortMethod)]
- public static void PredictorSub11(uint* input, uint* upper, int numPixels, uint* output)
+ public static void PredictorSub11(uint* input, uint* upper, int numPixels, uint* output, Span scratch)
{
for (int x = 0; x < numPixels; x++)
{
- uint pred = Predictor11(input[x - 1], upper + x);
+ uint pred = Predictor11(input[x - 1], upper + x, scratch);
output[x] = SubPixels(input[x], pred);
}
}
@@ -1240,14 +1254,43 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private static Vector128 MkCst16(int hi, int lo) => Vector128.Create((hi << 16) | (lo & 0xffff));
#endif
- private static uint Select(uint a, uint b, uint c)
+ private static uint Select(uint a, uint b, uint c, Span scratch)
{
- int paMinusPb =
- Sub3((int)(a >> 24), (int)(b >> 24), (int)(c >> 24)) +
- Sub3((int)((a >> 16) & 0xff), (int)((b >> 16) & 0xff), (int)((c >> 16) & 0xff)) +
- Sub3((int)((a >> 8) & 0xff), (int)((b >> 8) & 0xff), (int)((c >> 8) & 0xff)) +
- Sub3((int)(a & 0xff), (int)(b & 0xff), (int)(c & 0xff));
- return paMinusPb <= 0 ? a : b;
+#if SUPPORTS_RUNTIME_INTRINSICS
+ if (Sse2.IsSupported)
+ {
+ Span output = scratch;
+ fixed (short* p = output)
+ {
+ Vector128 a0 = Sse2.ConvertScalarToVector128UInt32(a).AsByte();
+ Vector128 b0 = Sse2.ConvertScalarToVector128UInt32(b).AsByte();
+ Vector128 c0 = Sse2.ConvertScalarToVector128UInt32(c).AsByte();
+ Vector128 ac0 = Sse2.SubtractSaturate(a0, c0);
+ Vector128 ca0 = Sse2.SubtractSaturate(c0, a0);
+ Vector128 bc0 = Sse2.SubtractSaturate(b0, c0);
+ Vector128 cb0 = Sse2.SubtractSaturate(c0, b0);
+ Vector128 ac = Sse2.Or(ac0, ca0);
+ Vector128 bc = Sse2.Or(bc0, cb0);
+ Vector128 pa = Sse2.UnpackLow(ac, Vector128.Zero); // |a - c|
+ Vector128 pb = Sse2.UnpackLow(bc, Vector128.Zero); // |b - c|
+ Vector128 diff = Sse2.Subtract(pb.AsUInt16(), pa.AsUInt16());
+ Sse2.Store((ushort*)p, diff);
+ }
+
+ int paMinusPb = output[0] + output[1] + output[2] + output[3];
+
+ return (paMinusPb <= 0) ? a : b;
+ }
+ else
+#endif
+ {
+ int paMinusPb =
+ Sub3((int)(a >> 24), (int)(b >> 24), (int)(c >> 24)) +
+ Sub3((int)((a >> 16) & 0xff), (int)((b >> 16) & 0xff), (int)((c >> 16) & 0xff)) +
+ Sub3((int)((a >> 8) & 0xff), (int)((b >> 8) & 0xff), (int)((c >> 8) & 0xff)) +
+ Sub3((int)(a & 0xff), (int)(b & 0xff), (int)(c & 0xff));
+ return paMinusPb <= 0 ? a : b;
+ }
}
[MethodImpl(InliningOptions.ShortMethod)]
diff --git a/src/ImageSharp/Formats/Webp/Lossless/PixOrCopy.cs b/src/ImageSharp/Formats/Webp/Lossless/PixOrCopy.cs
index 2d71a7af64..6cd109121d 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/PixOrCopy.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/PixOrCopy.cs
@@ -15,7 +15,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
public uint BgraOrDistance { get; set; }
public static PixOrCopy CreateCacheIdx(int idx) =>
- new PixOrCopy()
+ new()
{
Mode = PixOrCopyMode.CacheIdx,
BgraOrDistance = (uint)idx,
@@ -23,14 +23,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
};
public static PixOrCopy CreateLiteral(uint bgra) =>
- new PixOrCopy()
+ new()
{
Mode = PixOrCopyMode.Literal,
BgraOrDistance = bgra,
Len = 1
};
- public static PixOrCopy CreateCopy(uint distance, ushort len) => new PixOrCopy()
+ public static PixOrCopy CreateCopy(uint distance, ushort len) => new()
{
Mode = PixOrCopyMode.Copy,
BgraOrDistance = distance,
diff --git a/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs
index 671e9a043e..99504dd488 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs
@@ -17,6 +17,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
///
internal static unsafe class PredictorEncoder
{
+ private static readonly sbyte[][] Offset =
+ {
+ new sbyte[] { 0, -1 }, new sbyte[] { 0, 1 }, new sbyte[] { -1, 0 }, new sbyte[] { 1, 0 }, new sbyte[] { -1, -1 }, new sbyte[] { -1, 1 }, new sbyte[] { 1, -1 }, new sbyte[] { 1, 1 }
+ };
+
private const int GreenRedToBlueNumAxis = 8;
private const int GreenRedToBlueMaxIters = 7;
@@ -29,6 +34,25 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private const int PredLowEffort = 11;
+#if SUPPORTS_RUNTIME_INTRINSICS
+ private static readonly Vector128 CollectColorRedTransformsGreenMask = Vector128.Create(0x00ff00).AsByte();
+
+ private static readonly Vector128 CollectColorRedTransformsAndMask = Vector128.Create((short)0xff).AsByte();
+
+ private static readonly Vector128 CollectColorBlueTransformsGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
+
+ private static readonly Vector128 CollectColorBlueTransformsGreenBlueMask = Vector128.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0);
+
+ private static readonly Vector128 CollectColorBlueTransformsBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
+
+ private static readonly Vector128 CollectColorBlueTransformsShuffleLowMask = Vector128.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255);
+
+ private static readonly Vector128 CollectColorBlueTransformsShuffleHighMask = Vector128.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14);
+#endif
+
+ // This uses C#'s compiler optimization to refer to assembly's static data directly.
+ private static ReadOnlySpan DeltaLut => new sbyte[] { 16, 16, 8, 4, 2, 2, 2 };
+
///
/// Finds the best predictor for each tile, and converts the image to residuals
/// with respect to predictions. If nearLosslessQuality < 100, applies
@@ -41,6 +65,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
Span bgra,
Span bgraScratch,
Span image,
+ int[][] histoArgb,
+ int[][] bestHisto,
bool nearLossless,
int nearLosslessQuality,
WebpTransparentColorMode transparentColorMode,
@@ -50,6 +76,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int tilesPerRow = LosslessUtils.SubSampleSize(width, bits);
int tilesPerCol = LosslessUtils.SubSampleSize(height, bits);
int maxQuantization = 1 << LosslessUtils.NearLosslessBits(nearLosslessQuality);
+ Span scratch = stackalloc short[8];
// TODO: Can we optimize this?
int[][] histo = new int[4][];
@@ -80,11 +107,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
histo,
bgraScratch,
bgra,
+ histoArgb,
+ bestHisto,
maxQuantization,
transparentColorMode,
usedSubtractGreen,
nearLossless,
- image);
+ image,
+ scratch);
image[(tileY * tilesPerRow) + tileX] = (uint)(WebpConstants.ArgbBlack | (pred << 8));
}
@@ -105,7 +135,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
lowEffort);
}
- public static void ColorSpaceTransform(int width, int height, int bits, int quality, Span bgra, Span image)
+ public static void ColorSpaceTransform(int width, int height, int bits, int quality, Span bgra, Span image, Span scratch)
{
int maxTileSize = 1 << bits;
int tileXSize = LosslessUtils.SubSampleSize(width, bits);
@@ -139,7 +169,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
height,
accumulatedRedHisto,
accumulatedBlueHisto,
- bgra);
+ bgra,
+ scratch);
image[offset] = MultipliersToColorCode(prevX);
CopyTileWithColorTransform(width, height, tileXOffset, tileYOffset, maxTileSize, prevX, bgra);
@@ -188,11 +219,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int[][] accumulated,
Span argbScratch,
Span argb,
+ int[][] histoArgb,
+ int[][] bestHisto,
int maxQuantization,
WebpTransparentColorMode transparentColorMode,
bool usedSubtractGreen,
bool nearLossless,
- Span modes)
+ Span modes,
+ Span scratch)
{
const int numPredModes = 14;
int startX = tileX << bits;
@@ -222,21 +256,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
float bestDiff = MaxDiffCost;
int bestMode = 0;
uint[] residuals = new uint[1 << WebpConstants.MaxTransformBits];
- int[][] histoArgb = new int[4][];
- int[][] bestHisto = new int[4][];
for (int i = 0; i < 4; i++)
{
- histoArgb[i] = new int[256];
- bestHisto[i] = new int[256];
+ histoArgb[i].AsSpan().Clear();
+ bestHisto[i].AsSpan().Clear();
}
for (int mode = 0; mode < numPredModes; mode++)
{
- for (int i = 0; i < 4; i++)
- {
- histoArgb[i].AsSpan().Fill(0);
- }
-
if (startY > 0)
{
// Read the row above the tile which will become the first upper_row.
@@ -272,7 +299,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
}
- GetResidual(width, height, upperRow, currentRow, maxDiffs, mode, startX, startX + maxX, y, maxQuantization, transparentColorMode, usedSubtractGreen, nearLossless, residuals);
+ GetResidual(width, height, upperRow, currentRow, maxDiffs, mode, startX, startX + maxX, y, maxQuantization, transparentColorMode, usedSubtractGreen, nearLossless, residuals, scratch);
for (int relativeX = 0; relativeX < maxX; ++relativeX)
{
UpdateHisto(histoArgb, residuals[relativeX]);
@@ -300,6 +327,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
bestDiff = curDiff;
bestMode = mode;
}
+
+ for (int i = 0; i < 4; i++)
+ {
+ histoArgb[i].AsSpan().Clear();
+ }
}
for (int i = 0; i < 4; i++)
@@ -333,11 +365,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
WebpTransparentColorMode transparentColorMode,
bool usedSubtractGreen,
bool nearLossless,
- Span output)
+ Span output,
+ Span scratch)
{
if (transparentColorMode == WebpTransparentColorMode.Preserve)
{
- PredictBatch(mode, xStart, y, xEnd - xStart, currentRowSpan, upperRowSpan, output);
+ PredictBatch(mode, xStart, y, xEnd - xStart, currentRowSpan, upperRowSpan, output, scratch);
}
else
{
@@ -395,7 +428,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
predict = LosslessUtils.Predictor10(currentRow[x - 1], upperRow + x);
break;
case 11:
- predict = LosslessUtils.Predictor11(currentRow[x - 1], upperRow + x);
+ predict = LosslessUtils.Predictor11(currentRow[x - 1], upperRow + x, scratch);
break;
case 12:
predict = LosslessUtils.Predictor12(currentRow[x - 1], upperRow + x);
@@ -583,6 +616,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
Span currentMaxDiffs = MemoryMarshal.Cast(currentRow.Slice(width + 1));
Span lowerMaxDiffs = currentMaxDiffs.Slice(width);
+ Span scratch = stackalloc short[8];
for (int y = 0; y < height; y++)
{
Span tmp32 = upperRow;
@@ -593,7 +627,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
if (lowEffort)
{
- PredictBatch(PredLowEffort, 0, y, width, currentRow, upperRow, argb.Slice(y * width));
+ PredictBatch(PredLowEffort, 0, y, width, currentRow, upperRow, argb.Slice(y * width), scratch);
}
else
{
@@ -634,7 +668,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
transparentColorMode,
usedSubtractGreen,
nearLossless,
- argb.Slice((y * width) + x));
+ argb.Slice((y * width) + x),
+ scratch);
x = xEnd;
}
@@ -649,7 +684,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int numPixels,
Span currentSpan,
Span upperSpan,
- Span outputSpan)
+ Span outputSpan,
+ Span scratch)
{
#pragma warning disable SA1503 // Braces should not be omitted
fixed (uint* current = currentSpan)
@@ -718,7 +754,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
LosslessUtils.PredictorSub10(current + xStart, upper + xStart, numPixels, output);
break;
case 11:
- LosslessUtils.PredictorSub11(current + xStart, upper + xStart, numPixels, output);
+ LosslessUtils.PredictorSub11(current + xStart, upper + xStart, numPixels, output, scratch);
break;
case 12:
LosslessUtils.PredictorSub12(current + xStart, upper + xStart, numPixels, output);
@@ -819,7 +855,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
}
- private static Vp8LMultipliers GetBestColorTransformForTile(int tileX, int tileY, int bits, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int quality, int xSize, int ySize, int[] accumulatedRedHisto, int[] accumulatedBlueHisto, Span argb)
+ private static Vp8LMultipliers GetBestColorTransformForTile(
+ int tileX,
+ int tileY,
+ int bits,
+ Vp8LMultipliers prevX,
+ Vp8LMultipliers prevY,
+ int quality,
+ int xSize,
+ int ySize,
+ int[] accumulatedRedHisto,
+ int[] accumulatedBlueHisto,
+ Span argb,
+ Span scratch)
{
int maxTileSize = 1 << bits;
int tileYOffset = tileY * maxTileSize;
@@ -832,18 +880,28 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
var bestTx = default(Vp8LMultipliers);
- GetBestGreenToRed(tileArgb, xSize, tileWidth, tileHeight, prevX, prevY, quality, accumulatedRedHisto, ref bestTx);
+ GetBestGreenToRed(tileArgb, xSize, scratch, tileWidth, tileHeight, prevX, prevY, quality, accumulatedRedHisto, ref bestTx);
- GetBestGreenRedToBlue(tileArgb, xSize, tileWidth, tileHeight, prevX, prevY, quality, accumulatedBlueHisto, ref bestTx);
+ GetBestGreenRedToBlue(tileArgb, xSize, scratch, tileWidth, tileHeight, prevX, prevY, quality, accumulatedBlueHisto, ref bestTx);
return bestTx;
}
- private static void GetBestGreenToRed(Span argb, int stride, int tileWidth, int tileHeight, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int quality, int[] accumulatedRedHisto, ref Vp8LMultipliers bestTx)
+ private static void GetBestGreenToRed(
+ Span argb,
+ int stride,
+ Span scratch,
+ int tileWidth,
+ int tileHeight,
+ Vp8LMultipliers prevX,
+ Vp8LMultipliers prevY,
+ int quality,
+ int[] accumulatedRedHisto,
+ ref Vp8LMultipliers bestTx)
{
int maxIters = 4 + ((7 * quality) >> 8); // in range [4..6]
int greenToRedBest = 0;
- double bestDiff = GetPredictionCostCrossColorRed(argb, stride, tileWidth, tileHeight, prevX, prevY, greenToRedBest, accumulatedRedHisto);
+ double bestDiff = GetPredictionCostCrossColorRed(argb, stride, scratch, tileWidth, tileHeight, prevX, prevY, greenToRedBest, accumulatedRedHisto);
for (int iter = 0; iter < maxIters; iter++)
{
// ColorTransformDelta is a 3.5 bit fixed point, so 32 is equal to
@@ -855,7 +913,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
for (int offset = -delta; offset <= delta; offset += 2 * delta)
{
int greenToRedCur = offset + greenToRedBest;
- double curDiff = GetPredictionCostCrossColorRed(argb, stride, tileWidth, tileHeight, prevX, prevY, greenToRedCur, accumulatedRedHisto);
+ double curDiff = GetPredictionCostCrossColorRed(argb, stride, scratch, tileWidth, tileHeight, prevX, prevY, greenToRedCur, accumulatedRedHisto);
if (curDiff < bestDiff)
{
bestDiff = curDiff;
@@ -867,24 +925,22 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
bestTx.GreenToRed = (byte)(greenToRedBest & 0xff);
}
- private static void GetBestGreenRedToBlue(Span argb, int stride, int tileWidth, int tileHeight, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int quality, int[] accumulatedBlueHisto, ref Vp8LMultipliers bestTx)
+ private static void GetBestGreenRedToBlue(Span argb, int stride, Span scratch, int tileWidth, int tileHeight, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int quality, int[] accumulatedBlueHisto, ref Vp8LMultipliers bestTx)
{
int iters = (quality < 25) ? 1 : (quality > 50) ? GreenRedToBlueMaxIters : 4;
int greenToBlueBest = 0;
int redToBlueBest = 0;
- sbyte[][] offset = { new sbyte[] { 0, -1 }, new sbyte[] { 0, 1 }, new sbyte[] { -1, 0 }, new sbyte[] { 1, 0 }, new sbyte[] { -1, -1 }, new sbyte[] { -1, 1 }, new sbyte[] { 1, -1 }, new sbyte[] { 1, 1 } };
- sbyte[] deltaLut = { 16, 16, 8, 4, 2, 2, 2 };
// Initial value at origin:
- double bestDiff = GetPredictionCostCrossColorBlue(argb, stride, tileWidth, tileHeight, prevX, prevY, greenToBlueBest, redToBlueBest, accumulatedBlueHisto);
+ double bestDiff = GetPredictionCostCrossColorBlue(argb, stride, scratch, tileWidth, tileHeight, prevX, prevY, greenToBlueBest, redToBlueBest, accumulatedBlueHisto);
for (int iter = 0; iter < iters; iter++)
{
- int delta = deltaLut[iter];
+ int delta = DeltaLut[iter];
for (int axis = 0; axis < GreenRedToBlueNumAxis; axis++)
{
- int greenToBlueCur = (offset[axis][0] * delta) + greenToBlueBest;
- int redToBlueCur = (offset[axis][1] * delta) + redToBlueBest;
- double curDiff = GetPredictionCostCrossColorBlue(argb, stride, tileWidth, tileHeight, prevX, prevY, greenToBlueCur, redToBlueCur, accumulatedBlueHisto);
+ int greenToBlueCur = (Offset[axis][0] * delta) + greenToBlueBest;
+ int redToBlueCur = (Offset[axis][1] * delta) + redToBlueBest;
+ double curDiff = GetPredictionCostCrossColorBlue(argb, stride, scratch, tileWidth, tileHeight, prevX, prevY, greenToBlueCur, redToBlueCur, accumulatedBlueHisto);
if (curDiff < bestDiff)
{
bestDiff = curDiff;
@@ -910,9 +966,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
bestTx.RedToBlue = (byte)(redToBlueBest & 0xff);
}
- private static double GetPredictionCostCrossColorRed(Span argb, int stride, int tileWidth, int tileHeight, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int greenToRed, int[] accumulatedRedHisto)
+ private static double GetPredictionCostCrossColorRed(
+ Span argb,
+ int stride,
+ Span scratch,
+ int tileWidth,
+ int tileHeight,
+ Vp8LMultipliers prevX,
+ Vp8LMultipliers prevY,
+ int greenToRed,
+ int[] accumulatedRedHisto)
{
- int[] histo = new int[256];
+ Span histo = scratch.Slice(0, 256);
+ histo.Clear();
CollectColorRedTransforms(argb, stride, tileWidth, tileHeight, greenToRed, histo);
double curDiff = PredictionCostCrossColor(accumulatedRedHisto, histo);
@@ -937,9 +1003,20 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return curDiff;
}
- private static double GetPredictionCostCrossColorBlue(Span argb, int stride, int tileWidth, int tileHeight, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int greenToBlue, int redToBlue, int[] accumulatedBlueHisto)
+ private static double GetPredictionCostCrossColorBlue(
+ Span argb,
+ int stride,
+ Span scratch,
+ int tileWidth,
+ int tileHeight,
+ Vp8LMultipliers prevX,
+ Vp8LMultipliers prevY,
+ int greenToBlue,
+ int redToBlue,
+ int[] accumulatedBlueHisto)
{
- int[] histo = new int[256];
+ Span histo = scratch.Slice(0, 256);
+ histo.Clear();
CollectColorBlueTransforms(argb, stride, tileWidth, tileHeight, greenToBlue, redToBlue, histo);
double curDiff = PredictionCostCrossColor(accumulatedBlueHisto, histo);
@@ -980,15 +1057,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return curDiff;
}
- private static void CollectColorRedTransforms(Span bgra, int stride, int tileWidth, int tileHeight, int greenToRed, int[] histo)
+ private static void CollectColorRedTransforms(Span bgra, int stride, int tileWidth, int tileHeight, int greenToRed, Span histo)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse41.IsSupported)
{
var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToRed));
- var maskgreen = Vector128.Create(0x00ff00);
- var mask = Vector128.Create((short)0xff);
-
const int span = 8;
Span values = stackalloc ushort[span];
for (int y = 0; y < tileHeight; y++)
@@ -1004,15 +1078,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
uint* input1Idx = src + x + (span / 2);
Vector128 input0 = Sse2.LoadVector128((ushort*)input0Idx).AsByte();
Vector128 input1 = Sse2.LoadVector128((ushort*)input1Idx).AsByte();
- Vector128 g0 = Sse2.And(input0, maskgreen.AsByte()); // 0 0 | g 0
- Vector128 g1 = Sse2.And(input1, maskgreen.AsByte());
+ Vector128 g0 = Sse2.And(input0, CollectColorRedTransformsGreenMask); // 0 0 | g 0
+ Vector128 g1 = Sse2.And(input1, CollectColorRedTransformsGreenMask);
Vector128 g = Sse41.PackUnsignedSaturate(g0.AsInt32(), g1.AsInt32()); // g 0
Vector128 a0 = Sse2.ShiftRightLogical(input0.AsInt32(), 16); // 0 0 | x r
Vector128 a1 = Sse2.ShiftRightLogical(input1.AsInt32(), 16);
Vector128 a = Sse41.PackUnsignedSaturate(a0, a1); // x r
Vector128 b = Sse2.MultiplyHigh(g.AsInt16(), multsg); // x dr
Vector128 c = Sse2.Subtract(a.AsByte(), b.AsByte()); // x r'
- Vector128 d = Sse2.And(c, mask.AsByte()); // 0 r'
+ Vector128 d = Sse2.And(c, CollectColorRedTransformsAndMask); // 0 r'
Sse2.Store(dst, d.AsUInt16());
for (int i = 0; i < span; i++)
{
@@ -1036,7 +1110,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
}
- private static void CollectColorRedTransformsNoneVectorized(Span bgra, int stride, int tileWidth, int tileHeight, int greenToRed, int[] histo)
+ private static void CollectColorRedTransformsNoneVectorized(Span bgra, int stride, int tileWidth, int tileHeight, int greenToRed, Span histo)
{
int pos = 0;
while (tileHeight-- > 0)
@@ -1051,7 +1125,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
}
- private static void CollectColorBlueTransforms(Span bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, int[] histo)
+ private static void CollectColorBlueTransforms(Span bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, Span histo)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse41.IsSupported)
@@ -1060,12 +1134,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
Span values = stackalloc ushort[span];
var multsr = Vector128.Create(LosslessUtils.Cst5b(redToBlue));
var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToBlue));
- var maskgreen = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
- var maskgreenblue = Vector128.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0);
- var maskblue = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
- var shufflerLow = Vector128.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255);
- var shufflerHigh = Vector128.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14);
-
for (int y = 0; y < tileHeight; y++)
{
Span srcSpan = bgra.Slice(y * stride);
@@ -1079,18 +1147,18 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
uint* input1Idx = src + x + (span / 2);
Vector128 input0 = Sse2.LoadVector128((ushort*)input0Idx).AsByte();
Vector128 input1 = Sse2.LoadVector128((ushort*)input1Idx).AsByte();
- Vector128 r0 = Ssse3.Shuffle(input0, shufflerLow);
- Vector128 r1 = Ssse3.Shuffle(input1, shufflerHigh);
+ Vector128 r0 = Ssse3.Shuffle(input0, CollectColorBlueTransformsShuffleLowMask);
+ Vector128 r1 = Ssse3.Shuffle(input1, CollectColorBlueTransformsShuffleHighMask);
Vector128 r = Sse2.Or(r0, r1);
- Vector128 gb0 = Sse2.And(input0, maskgreenblue);
- Vector128 gb1 = Sse2.And(input1, maskgreenblue);
+ Vector128 gb0 = Sse2.And(input0, CollectColorBlueTransformsGreenBlueMask);
+ Vector128 gb1 = Sse2.And(input1, CollectColorBlueTransformsGreenBlueMask);
Vector128 gb = Sse41.PackUnsignedSaturate(gb0.AsInt32(), gb1.AsInt32());
- Vector128 g = Sse2.And(gb.AsByte(), maskgreen);
+ Vector128 g = Sse2.And(gb.AsByte(), CollectColorBlueTransformsGreenMask);
Vector128 a = Sse2.MultiplyHigh(r.AsInt16(), multsr);
Vector128 b = Sse2.MultiplyHigh(g.AsInt16(), multsg);
Vector128 c = Sse2.Subtract(gb.AsByte(), b.AsByte());
Vector128 d = Sse2.Subtract(c, a.AsByte());
- Vector128 e = Sse2.And(d, maskblue);
+ Vector128 e = Sse2.And(d, CollectColorBlueTransformsBlueMask);
Sse2.Store(dst, e.AsUInt16());
for (int i = 0; i < span; i++)
{
@@ -1114,7 +1182,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
}
- private static void CollectColorBlueTransformsNoneVectorized(Span bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, int[] histo)
+ private static void CollectColorBlueTransformsNoneVectorized(Span bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, Span histo)
{
int pos = 0;
while (tileHeight-- > 0)
@@ -1143,7 +1211,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
[MethodImpl(InliningOptions.ShortMethod)]
- private static double PredictionCostCrossColor(int[] accumulated, int[] counts)
+ private static double PredictionCostCrossColor(int[] accumulated, Span counts)
{
// Favor low entropy, locally and globally.
// Favor small absolute values for PredictionCostSpatial.
@@ -1152,7 +1220,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
[MethodImpl(InliningOptions.ShortMethod)]
- private static float PredictionCostSpatial(int[] counts, int weight0, double expVal)
+ private static float PredictionCostSpatial(Span counts, int weight0, double expVal)
{
int significantSymbols = 256 >> 4;
double expDecayFactor = 0.6;
diff --git a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs
index 5e5443a2ba..4faa716495 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs
@@ -19,6 +19,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
///
internal class Vp8LEncoder : IDisposable
{
+ ///
+ /// Scratch buffer to reduce allocations.
+ ///
+ private readonly int[] scratch = new int[256];
+
+ private readonly int[][] histoArgb = { new int[256], new int[256], new int[256], new int[256] };
+
+ private readonly int[][] bestHisto = { new int[256], new int[256], new int[256], new int[256] };
+
///
/// The to use for buffer allocations.
///
@@ -128,6 +137,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
}
+ // This uses C#'s compiler optimization to refer to assembly's static data directly.
+ private static ReadOnlySpan Order => new byte[] { 1, 2, 0, 3 };
+
///
/// Gets the memory for the image data as packed bgra values.
///
@@ -676,6 +688,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
this.EncodedData.GetSpan(),
this.BgraScratch.GetSpan(),
this.TransformData.GetSpan(),
+ this.histoArgb,
+ this.bestHisto,
this.nearLossless,
nearLosslessStrength,
this.transparentColorMode,
@@ -695,7 +709,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int transformWidth = LosslessUtils.SubSampleSize(width, colorTransformBits);
int transformHeight = LosslessUtils.SubSampleSize(height, colorTransformBits);
- PredictorEncoder.ColorSpaceTransform(width, height, colorTransformBits, this.quality, this.EncodedData.GetSpan(), this.TransformData.GetSpan());
+ PredictorEncoder.ColorSpaceTransform(width, height, colorTransformBits, this.quality, this.EncodedData.GetSpan(), this.TransformData.GetSpan(), this.scratch);
this.bitWriter.PutBits(WebpConstants.TransformPresent, 1);
this.bitWriter.PutBits((uint)Vp8LTransformType.CrossColorTransform, 2);
@@ -737,7 +751,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
var histogramImage = new List()
{
- new Vp8LHistogram(cacheBits)
+ new(cacheBits)
};
// Build histogram image and symbols from backward references.
@@ -781,7 +795,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private void StoreHuffmanCode(HuffmanTree[] huffTree, HuffmanTreeToken[] tokens, HuffmanTreeCode huffmanCode)
{
int count = 0;
- int[] symbols = { 0, 0 };
+ Span symbols = this.scratch.AsSpan(0, 2);
+ symbols.Clear();
int maxBits = 8;
int maxSymbol = 1 << maxBits;
@@ -974,10 +989,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
if (v.IsLiteral())
{
- byte[] order = { 1, 2, 0, 3 };
for (int k = 0; k < 4; k++)
{
- int code = (int)v.Literal(order[k]);
+ int code = (int)v.Literal(Order[k]);
this.bitWriter.WriteHuffmanCode(codes[k], code);
}
}
@@ -1093,9 +1107,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
histo[(int)HistoIx.HistoBluePred * 256]++;
histo[(int)HistoIx.HistoAlphaPred * 256]++;
+ var bitEntropy = new Vp8LBitEntropy();
for (int j = 0; j < (int)HistoIx.HistoTotal; j++)
{
- var bitEntropy = new Vp8LBitEntropy();
+ bitEntropy.Init();
Span curHisto = histo.Slice(j * 256, 256);
bitEntropy.BitsEntropyUnrefined(curHisto, 256);
entropyComp[j] = bitEntropy.BitsEntropyRefine();
@@ -1191,9 +1206,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return false;
}
+#if NET5_0_OR_GREATER
+ var paletteSlice = palette.Slice(0, this.PaletteSize);
+ paletteSlice.Sort();
+#else
uint[] paletteArray = palette.Slice(0, this.PaletteSize).ToArray();
Array.Sort(paletteArray);
paletteArray.CopyTo(palette);
+#endif
if (PaletteHasNonMonotonousDeltas(palette, this.PaletteSize))
{
@@ -1448,7 +1468,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
return mid;
}
- else if (sorted[mid] < color)
+
+ if (sorted[mid] < color)
{
low = mid;
}
diff --git a/src/ImageSharp/Formats/Webp/Lossless/Vp8LHistogram.cs b/src/ImageSharp/Formats/Webp/Lossless/Vp8LHistogram.cs
index 42260e2b25..8b02015687 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/Vp8LHistogram.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/Vp8LHistogram.cs
@@ -157,29 +157,30 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// Estimate how many bits the combined entropy of literals and distance approximately maps to.
///
/// Estimated bits.
- public double EstimateBits()
+ public double EstimateBits(Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy)
{
uint notUsed = 0;
return
- PopulationCost(this.Literal, this.NumCodes(), ref notUsed, ref this.IsUsed[0])
- + PopulationCost(this.Red, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[1])
- + PopulationCost(this.Blue, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[2])
- + PopulationCost(this.Alpha, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[3])
- + PopulationCost(this.Distance, WebpConstants.NumDistanceCodes, ref notUsed, ref this.IsUsed[4])
+ PopulationCost(this.Literal, this.NumCodes(), ref notUsed, ref this.IsUsed[0], stats, bitsEntropy)
+ + PopulationCost(this.Red, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[1], stats, bitsEntropy)
+ + PopulationCost(this.Blue, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[2], stats, bitsEntropy)
+ + PopulationCost(this.Alpha, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[3], stats, bitsEntropy)
+ + PopulationCost(this.Distance, WebpConstants.NumDistanceCodes, ref notUsed, ref this.IsUsed[4], stats, bitsEntropy)
+ ExtraCost(this.Literal.AsSpan(WebpConstants.NumLiteralCodes), WebpConstants.NumLengthCodes)
+ ExtraCost(this.Distance, WebpConstants.NumDistanceCodes);
}
- public void UpdateHistogramCost()
+ public void UpdateHistogramCost(Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy)
{
uint alphaSym = 0, redSym = 0, blueSym = 0;
uint notUsed = 0;
- double alphaCost = PopulationCost(this.Alpha, WebpConstants.NumLiteralCodes, ref alphaSym, ref this.IsUsed[3]);
- double distanceCost = PopulationCost(this.Distance, WebpConstants.NumDistanceCodes, ref notUsed, ref this.IsUsed[4]) + ExtraCost(this.Distance, WebpConstants.NumDistanceCodes);
+
+ double alphaCost = PopulationCost(this.Alpha, WebpConstants.NumLiteralCodes, ref alphaSym, ref this.IsUsed[3], stats, bitsEntropy);
+ double distanceCost = PopulationCost(this.Distance, WebpConstants.NumDistanceCodes, ref notUsed, ref this.IsUsed[4], stats, bitsEntropy) + ExtraCost(this.Distance, WebpConstants.NumDistanceCodes);
int numCodes = this.NumCodes();
- this.LiteralCost = PopulationCost(this.Literal, numCodes, ref notUsed, ref this.IsUsed[0]) + ExtraCost(this.Literal.AsSpan(WebpConstants.NumLiteralCodes), WebpConstants.NumLengthCodes);
- this.RedCost = PopulationCost(this.Red, WebpConstants.NumLiteralCodes, ref redSym, ref this.IsUsed[1]);
- this.BlueCost = PopulationCost(this.Blue, WebpConstants.NumLiteralCodes, ref blueSym, ref this.IsUsed[2]);
+ this.LiteralCost = PopulationCost(this.Literal, numCodes, ref notUsed, ref this.IsUsed[0], stats, bitsEntropy) + ExtraCost(this.Literal.AsSpan(WebpConstants.NumLiteralCodes), WebpConstants.NumLengthCodes);
+ this.RedCost = PopulationCost(this.Red, WebpConstants.NumLiteralCodes, ref redSym, ref this.IsUsed[1], stats, bitsEntropy);
+ this.BlueCost = PopulationCost(this.Blue, WebpConstants.NumLiteralCodes, ref blueSym, ref this.IsUsed[2], stats, bitsEntropy);
this.BitCost = this.LiteralCost + this.RedCost + this.BlueCost + alphaCost + distanceCost;
if ((alphaSym | redSym | blueSym) == NonTrivialSym)
{
@@ -198,11 +199,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
/// Since the previous score passed is 'costThreshold', we only need to compare
/// the partial cost against 'costThreshold + C(a) + C(b)' to possibly bail-out early.
///
- public double AddEval(Vp8LHistogram b, double costThreshold, Vp8LHistogram output)
+ public double AddEval(Vp8LHistogram b, Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy, double costThreshold, Vp8LHistogram output)
{
double sumCost = this.BitCost + b.BitCost;
costThreshold += sumCost;
- if (this.GetCombinedHistogramEntropy(b, costThreshold, costInitial: 0, out double cost))
+ if (this.GetCombinedHistogramEntropy(b, stats, bitsEntropy, costThreshold, costInitial: 0, out double cost))
{
this.Add(b, output);
output.BitCost = cost;
@@ -212,10 +213,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return cost - sumCost;
}
- public double AddThresh(Vp8LHistogram b, double costThreshold)
+ public double AddThresh(Vp8LHistogram b, Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy, double costThreshold)
{
double costInitial = -this.BitCost;
- this.GetCombinedHistogramEntropy(b, costThreshold, costInitial, out double cost);
+ this.GetCombinedHistogramEntropy(b, stats, bitsEntropy, costThreshold, costInitial, out double cost);
return cost;
}
@@ -239,12 +240,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
: NonTrivialSym;
}
- public bool GetCombinedHistogramEntropy(Vp8LHistogram b, double costThreshold, double costInitial, out double cost)
+ public bool GetCombinedHistogramEntropy(Vp8LHistogram b, Vp8LStreaks stats, Vp8LBitEntropy bitEntropy, double costThreshold, double costInitial, out double cost)
{
bool trivialAtEnd = false;
cost = costInitial;
- cost += GetCombinedEntropy(this.Literal, b.Literal, this.NumCodes(), this.IsUsed[0], b.IsUsed[0], false);
+ cost += GetCombinedEntropy(this.Literal, b.Literal, this.NumCodes(), this.IsUsed[0], b.IsUsed[0], false, stats, bitEntropy);
cost += ExtraCostCombined(this.Literal.AsSpan(WebpConstants.NumLiteralCodes), b.Literal.AsSpan(WebpConstants.NumLiteralCodes), WebpConstants.NumLengthCodes);
@@ -267,25 +268,25 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
}
- cost += GetCombinedEntropy(this.Red, b.Red, WebpConstants.NumLiteralCodes, this.IsUsed[1], b.IsUsed[1], trivialAtEnd);
+ cost += GetCombinedEntropy(this.Red, b.Red, WebpConstants.NumLiteralCodes, this.IsUsed[1], b.IsUsed[1], trivialAtEnd, stats, bitEntropy);
if (cost > costThreshold)
{
return false;
}
- cost += GetCombinedEntropy(this.Blue, b.Blue, WebpConstants.NumLiteralCodes, this.IsUsed[2], b.IsUsed[2], trivialAtEnd);
+ cost += GetCombinedEntropy(this.Blue, b.Blue, WebpConstants.NumLiteralCodes, this.IsUsed[2], b.IsUsed[2], trivialAtEnd, stats, bitEntropy);
if (cost > costThreshold)
{
return false;
}
- cost += GetCombinedEntropy(this.Alpha, b.Alpha, WebpConstants.NumLiteralCodes, this.IsUsed[3], b.IsUsed[3], trivialAtEnd);
+ cost += GetCombinedEntropy(this.Alpha, b.Alpha, WebpConstants.NumLiteralCodes, this.IsUsed[3], b.IsUsed[3], trivialAtEnd, stats, bitEntropy);
if (cost > costThreshold)
{
return false;
}
- cost += GetCombinedEntropy(this.Distance, b.Distance, WebpConstants.NumDistanceCodes, this.IsUsed[4], b.IsUsed[4], false);
+ cost += GetCombinedEntropy(this.Distance, b.Distance, WebpConstants.NumDistanceCodes, this.IsUsed[4], b.IsUsed[4], false, stats, bitEntropy);
if (cost > costThreshold)
{
return false;
@@ -415,9 +416,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
}
- private static double GetCombinedEntropy(uint[] x, uint[] y, int length, bool isXUsed, bool isYUsed, bool trivialAtEnd)
+ private static double GetCombinedEntropy(uint[] x, uint[] y, int length, bool isXUsed, bool isYUsed, bool trivialAtEnd, Vp8LStreaks stats, Vp8LBitEntropy bitEntropy)
{
- var stats = new Vp8LStreaks();
+ stats.Clear();
+ bitEntropy.Init();
if (trivialAtEnd)
{
// This configuration is due to palettization that transforms an indexed
@@ -435,7 +437,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return stats.FinalHuffmanCost();
}
- var bitEntropy = new Vp8LBitEntropy();
if (isXUsed)
{
if (isYUsed)
@@ -479,10 +480,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
///
/// Get the symbol entropy for the distribution 'population'.
///
- private static double PopulationCost(uint[] population, int length, ref uint trivialSym, ref bool isUsed)
+ private static double PopulationCost(uint[] population, int length, ref uint trivialSym, ref bool isUsed, Vp8LStreaks stats, Vp8LBitEntropy bitEntropy)
{
- var bitEntropy = new Vp8LBitEntropy();
- var stats = new Vp8LStreaks();
+ bitEntropy.Init();
+ stats.Clear();
bitEntropy.BitsEntropyUnrefined(population, length, stats);
trivialSym = (bitEntropy.NoneZeros == 1) ? bitEntropy.NoneZeroCode : NonTrivialSym;
diff --git a/src/ImageSharp/Formats/Webp/Lossless/Vp8LStreaks.cs b/src/ImageSharp/Formats/Webp/Lossless/Vp8LStreaks.cs
index 27ddcfd434..df9f064426 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/Vp8LStreaks.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/Vp8LStreaks.cs
@@ -1,6 +1,8 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
+using System;
+
namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
internal class Vp8LStreaks
@@ -28,6 +30,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
///
public int[][] Streaks { get; }
+ public void Clear()
+ {
+ this.Counts.AsSpan().Clear();
+ this.Streaks[0].AsSpan().Clear();
+ this.Streaks[1].AsSpan().Clear();
+ }
+
public double FinalHuffmanCost()
{
// The constants in this function are experimental and got rounded from
diff --git a/src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs b/src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs
index 3e06a56423..cafccd0982 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs
@@ -418,6 +418,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
var huffmanTables = new HuffmanCode[numHTreeGroups * tableSize];
var hTreeGroups = new HTreeGroup[numHTreeGroups];
Span huffmanTable = huffmanTables.AsSpan();
+ int[] codeLengths = new int[maxAlphabetSize];
for (int i = 0; i < numHTreeGroupsMax; i++)
{
hTreeGroups[i] = new HTreeGroup(HuffmanUtils.HuffmanPackedTableSize);
@@ -425,7 +426,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int totalSize = 0;
bool isTrivialLiteral = true;
int maxBits = 0;
- int[] codeLengths = new int[maxAlphabetSize];
+ codeLengths.AsSpan().Clear();
for (int j = 0; j < WebpConstants.HuffmanCodesPerMetaCode; j++)
{
int alphabetSize = WebpConstants.AlphabetSize[j];
diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs
index 1584237b0c..d5db3dffa5 100644
--- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs
+++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs
@@ -58,14 +58,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
}
[MethodImpl(InliningOptions.ShortMethod)]
- public static int Vp8Disto16X16(Span a, Span b, Span w)
+ public static int Vp8Disto16X16(Span a, Span b, Span w, Span scratch)
{
int d = 0;
for (int y = 0; y < 16 * WebpConstants.Bps; y += 4 * WebpConstants.Bps)
{
for (int x = 0; x < 16; x += 4)
{
- d += Vp8Disto4X4(a.Slice(x + y), b.Slice(x + y), w);
+ d += Vp8Disto4X4(a.Slice(x + y), b.Slice(x + y), w, scratch);
}
}
@@ -73,10 +73,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
}
[MethodImpl(InliningOptions.ShortMethod)]
- public static int Vp8Disto4X4(Span a, Span b, Span w)
+ public static int Vp8Disto4X4(Span a, Span b, Span w, Span scratch)
{
- int sum1 = TTransform(a, w);
- int sum2 = TTransform(b, w);
+ int sum1 = TTransform(a, w, scratch);
+ int sum2 = TTransform(b, w, scratch);
return Math.Abs(sum2 - sum1) >> 5;
}
@@ -252,18 +252,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
[MethodImpl(InliningOptions.ShortMethod)]
public static void TM4(Span dst, Span yuv, int offset) => TrueMotion(dst, yuv, offset, 4);
- public static void VE4(Span dst, Span yuv, int offset)
+ public static void VE4(Span dst, Span yuv, int offset, Span vals)
{
// vertical
int topOffset = offset - WebpConstants.Bps;
- byte[] vals =
- {
- Avg3(yuv[topOffset - 1], yuv[topOffset], yuv[topOffset + 1]),
- Avg3(yuv[topOffset], yuv[topOffset + 1], yuv[topOffset + 2]),
- Avg3(yuv[topOffset + 1], yuv[topOffset + 2], yuv[topOffset + 3]),
- Avg3(yuv[topOffset + 2], yuv[topOffset + 3], yuv[topOffset + 4])
- };
-
+ vals[0] = Avg3(yuv[topOffset - 1], yuv[topOffset], yuv[topOffset + 1]);
+ vals[1] = Avg3(yuv[topOffset], yuv[topOffset + 1], yuv[topOffset + 2]);
+ vals[2] = Avg3(yuv[topOffset + 1], yuv[topOffset + 2], yuv[topOffset + 3]);
+ vals[3] = Avg3(yuv[topOffset + 2], yuv[topOffset + 3], yuv[topOffset + 4]);
int endIdx = 4 * WebpConstants.Bps;
for (int i = 0; i < endIdx; i += WebpConstants.Bps)
{
@@ -504,9 +500,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
///
/// Paragraph 14.3: Implementation of the Walsh-Hadamard transform inversion.
///
- public static void TransformWht(Span input, Span output)
+ public static void TransformWht(Span input, Span output, Span scratch)
{
- int[] tmp = new int[16];
+ Span tmp = scratch.Slice(0, 16);
+ tmp.Clear();
for (int i = 0; i < 4; i++)
{
int iPlus4 = 4 + i;
@@ -544,10 +541,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
/// Returns the weighted sum of the absolute value of transformed coefficients.
/// w[] contains a row-major 4 by 4 symmetric matrix.
///
- public static int TTransform(Span input, Span w)
+ public static int TTransform(Span input, Span w, Span scratch)
{
int sum = 0;
- int[] tmp = new int[16];
+ Span tmp = scratch.Slice(0, 16);
+ tmp.Clear();
// horizontal pass.
int inputOffset = 0;
@@ -591,15 +589,16 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
return sum;
}
- public static void TransformTwo(Span src, Span dst)
+ public static void TransformTwo(Span src, Span dst, Span scratch)
{
- TransformOne(src, dst);
- TransformOne(src.Slice(16), dst.Slice(4));
+ TransformOne(src, dst, scratch);
+ TransformOne(src.Slice(16), dst.Slice(4), scratch);
}
- public static void TransformOne(Span src, Span dst)
+ public static void TransformOne(Span src, Span dst, Span scratch)
{
- Span tmp = stackalloc int[4 * 4];
+ Span tmp = scratch.Slice(0, 16);
+ tmp.Clear();
int tmpOffset = 0;
for (int srcOffset = 0; srcOffset < 4; srcOffset++)
{
@@ -671,10 +670,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
Store2(dst, 3, a - d4, d1, c1);
}
- public static void TransformUv(Span src, Span dst)
+ public static void TransformUv(Span src, Span dst, Span scratch)
{
- TransformTwo(src.Slice(0 * 16), dst);
- TransformTwo(src.Slice(2 * 16), dst.Slice(4 * WebpConstants.Bps));
+ TransformTwo(src.Slice(0 * 16), dst, scratch);
+ TransformTwo(src.Slice(2 * 16), dst.Slice(4 * WebpConstants.Bps), scratch);
}
public static void TransformDcuv(Span src, Span dst)
@@ -934,11 +933,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int p0 = p[offset - step];
int q0 = p[offset];
int q1 = p[offset + step];
- int a = (3 * (q0 - p0)) + WebpLookupTables.Sclip1[p1 - q1];
- int a1 = WebpLookupTables.Sclip2[(a + 4) >> 3];
- int a2 = WebpLookupTables.Sclip2[(a + 3) >> 3];
- p[offset - step] = WebpLookupTables.Clip1[p0 + a2];
- p[offset] = WebpLookupTables.Clip1[q0 - a1];
+ int a = (3 * (q0 - p0)) + WebpLookupTables.Sclip1(p1 - q1);
+ int a1 = WebpLookupTables.Sclip2((a + 4) >> 3);
+ int a2 = WebpLookupTables.Sclip2((a + 3) >> 3);
+ p[offset - step] = WebpLookupTables.Clip1(p0 + a2);
+ p[offset] = WebpLookupTables.Clip1(q0 - a1);
}
private static void DoFilter4(Span p, int offset, int step)
@@ -950,13 +949,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int q0 = p[offset];
int q1 = p[offset + step];
int a = 3 * (q0 - p0);
- int a1 = WebpLookupTables.Sclip2[(a + 4) >> 3];
- int a2 = WebpLookupTables.Sclip2[(a + 3) >> 3];
+ int a1 = WebpLookupTables.Sclip2((a + 4) >> 3);
+ int a2 = WebpLookupTables.Sclip2((a + 3) >> 3);
int a3 = (a1 + 1) >> 1;
- p[offsetMinus2Step] = WebpLookupTables.Clip1[p1 + a3];
- p[offset - step] = WebpLookupTables.Clip1[p0 + a2];
- p[offset] = WebpLookupTables.Clip1[q0 - a1];
- p[offset + step] = WebpLookupTables.Clip1[q1 - a3];
+ p[offsetMinus2Step] = WebpLookupTables.Clip1(p1 + a3);
+ p[offset - step] = WebpLookupTables.Clip1(p0 + a2);
+ p[offset] = WebpLookupTables.Clip1(q0 - a1);
+ p[offset + step] = WebpLookupTables.Clip1(q1 - a3);
}
private static void DoFilter6(Span p, int offset, int step)
@@ -971,18 +970,18 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int q0 = p[offset];
int q1 = p[offset + step];
int q2 = p[offset + step2];
- int a = WebpLookupTables.Sclip1[(3 * (q0 - p0)) + WebpLookupTables.Sclip1[p1 - q1]];
+ int a = WebpLookupTables.Sclip1((3 * (q0 - p0)) + WebpLookupTables.Sclip1(p1 - q1));
// a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9]
int a1 = ((27 * a) + 63) >> 7; // eq. to ((3 * a + 7) * 9) >> 7
int a2 = ((18 * a) + 63) >> 7; // eq. to ((2 * a + 7) * 9) >> 7
int a3 = ((9 * a) + 63) >> 7; // eq. to ((1 * a + 7) * 9) >> 7
- p[offset - step3] = WebpLookupTables.Clip1[p2 + a3];
- p[offset - step2] = WebpLookupTables.Clip1[p1 + a2];
- p[offsetMinusStep] = WebpLookupTables.Clip1[p0 + a1];
- p[offset] = WebpLookupTables.Clip1[q0 - a1];
- p[offset + step] = WebpLookupTables.Clip1[q1 - a2];
- p[offset + step2] = WebpLookupTables.Clip1[q2 - a3];
+ p[offset - step3] = WebpLookupTables.Clip1(p2 + a3);
+ p[offset - step2] = WebpLookupTables.Clip1(p1 + a2);
+ p[offsetMinusStep] = WebpLookupTables.Clip1(p0 + a1);
+ p[offset] = WebpLookupTables.Clip1(q0 - a1);
+ p[offset + step] = WebpLookupTables.Clip1(q1 - a2);
+ p[offset + step2] = WebpLookupTables.Clip1(q2 - a3);
}
[MethodImpl(InliningOptions.ShortMethod)]
@@ -992,7 +991,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int p0 = p[offset - step];
int q0 = p[offset];
int q1 = p[offset + step];
- return (4 * WebpLookupTables.Abs0[p0 - q0]) + WebpLookupTables.Abs0[p1 - q1] <= t;
+ return (4 * WebpLookupTables.Abs0(p0 - q0)) + WebpLookupTables.Abs0(p1 - q1) <= t;
}
private static bool NeedsFilter2(Span p, int offset, int step, int t, int it)
@@ -1007,14 +1006,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int q1 = p[offset + step];
int q2 = p[offset + step2];
int q3 = p[offset + step3];
- if ((4 * WebpLookupTables.Abs0[p0 - q0]) + WebpLookupTables.Abs0[p1 - q1] > t)
+ if ((4 * WebpLookupTables.Abs0(p0 - q0)) + WebpLookupTables.Abs0(p1 - q1) > t)
{
return false;
}
- return WebpLookupTables.Abs0[p3 - p2] <= it && WebpLookupTables.Abs0[p2 - p1] <= it &&
- WebpLookupTables.Abs0[p1 - p0] <= it && WebpLookupTables.Abs0[q3 - q2] <= it &&
- WebpLookupTables.Abs0[q2 - q1] <= it && WebpLookupTables.Abs0[q1 - q0] <= it;
+ return WebpLookupTables.Abs0(p3 - p2) <= it && WebpLookupTables.Abs0(p2 - p1) <= it &&
+ WebpLookupTables.Abs0(p1 - p0) <= it && WebpLookupTables.Abs0(q3 - q2) <= it &&
+ WebpLookupTables.Abs0(q2 - q1) <= it && WebpLookupTables.Abs0(q1 - q0) <= it;
}
[MethodImpl(InliningOptions.ShortMethod)]
@@ -1024,7 +1023,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int p0 = p[offset - step];
int q0 = p[offset];
int q1 = p[offset + step];
- return WebpLookupTables.Abs0[p1 - p0] > thresh || WebpLookupTables.Abs0[q1 - q0] > thresh;
+ return WebpLookupTables.Abs0(p1 - p0) > thresh || WebpLookupTables.Abs0(q1 - q0) > thresh;
}
[MethodImpl(InliningOptions.ShortMethod)]
diff --git a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs
index 2ed4381660..18d7494f0f 100644
--- a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs
+++ b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs
@@ -31,7 +31,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int lambda = dqm.LambdaI16;
int tlambda = dqm.TLambda;
Span src = it.YuvIn.AsSpan(Vp8EncIterator.YOffEnc);
+ Span scratch = it.Scratch3;
var rdTmp = new Vp8ModeScore();
+ var res = new Vp8Residual();
Vp8ModeScore rdCur = rdTmp;
Vp8ModeScore rdBest = rd;
int mode;
@@ -39,7 +41,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
rd.ModeI16 = -1;
for (mode = 0; mode < WebpConstants.NumPredModes; ++mode)
{
- // scratch buffer.
+ // Scratch buffer.
Span tmpDst = it.YuvOut2.AsSpan(Vp8EncIterator.YOffEnc);
rdCur.ModeI16 = mode;
@@ -48,9 +50,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
// Measure RD-score.
rdCur.D = LossyUtils.Vp8Sse16X16(src, tmpDst);
- rdCur.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto16X16(src, tmpDst, WeightY)) : 0;
+ rdCur.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto16X16(src, tmpDst, WeightY, scratch)) : 0;
rdCur.H = WebpConstants.Vp8FixedCostsI16[mode];
- rdCur.R = it.GetCostLuma16(rdCur, proba);
+ rdCur.R = it.GetCostLuma16(rdCur, proba, res);
if (isFlat)
{
@@ -101,6 +103,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int tlambda = dqm.TLambda;
Span src0 = it.YuvIn.AsSpan(Vp8EncIterator.YOffEnc);
Span bestBlocks = it.YuvOut2.AsSpan(Vp8EncIterator.YOffEnc);
+ Span scratch = it.Scratch3;
int totalHeaderBits = 0;
var rdBest = new Vp8ModeScore();
@@ -113,31 +116,35 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
rdBest.H = 211; // '211' is the value of VP8BitCost(0, 145)
rdBest.SetRdScore(dqm.LambdaMode);
it.StartI4();
+ var rdi4 = new Vp8ModeScore();
+ var rdTmp = new Vp8ModeScore();
+ var res = new Vp8Residual();
+ Span tmpLevels = new short[16];
do
{
int numBlocks = 1;
- var rdi4 = new Vp8ModeScore();
+ rdi4.Clear();
int mode;
int bestMode = -1;
Span src = src0.Slice(WebpLookupTables.Vp8Scan[it.I4]);
short[] modeCosts = it.GetCostModeI4(rd.ModesI4);
Span bestBlock = bestBlocks.Slice(WebpLookupTables.Vp8Scan[it.I4]);
Span tmpDst = it.Scratch.AsSpan();
- tmpDst.Fill(0);
+ tmpDst.Clear();
rdi4.InitScore();
it.MakeIntra4Preds();
for (mode = 0; mode < WebpConstants.NumBModes; ++mode)
{
- var rdTmp = new Vp8ModeScore();
- short[] tmpLevels = new short[16];
+ rdTmp.Clear();
+ tmpLevels.Clear();
// Reconstruct.
rdTmp.Nz = (uint)ReconstructIntra4(it, dqm, tmpLevels, src, tmpDst, mode);
// Compute RD-score.
rdTmp.D = LossyUtils.Vp8Sse4X4(src, tmpDst);
- rdTmp.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto4X4(src, tmpDst, WeightY)) : 0;
+ rdTmp.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto4X4(src, tmpDst, WeightY, scratch)) : 0;
rdTmp.H = modeCosts[mode];
// Add flatness penalty, to avoid flat area to be mispredicted by a complex mode.
@@ -150,15 +157,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
rdTmp.R = 0;
}
- // early-out check.
+ // Early-out check.
rdTmp.SetRdScore(lambda);
if (bestMode >= 0 && rdTmp.Score >= rdi4.Score)
{
continue;
}
- // finish computing score.
- rdTmp.R += it.GetCostLuma4(tmpLevels, proba);
+ // Finish computing score.
+ rdTmp.R += it.GetCostLuma4(tmpLevels, proba, res);
rdTmp.SetRdScore(lambda);
if (bestMode < 0 || rdTmp.Score < rdi4.Score)
@@ -213,13 +220,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
Span dst0 = it.YuvOut.AsSpan(Vp8EncIterator.UOffEnc);
Span dst = dst0;
var rdBest = new Vp8ModeScore();
+ var rdUv = new Vp8ModeScore();
+ var res = new Vp8Residual();
int mode;
rd.ModeUv = -1;
rdBest.InitScore();
for (mode = 0; mode < WebpConstants.NumPredModes; ++mode)
{
- var rdUv = new Vp8ModeScore();
+ rdUv.Clear();
// Reconstruct
rdUv.Nz = (uint)ReconstructUv(it, dqm, rdUv, tmpDst, mode);
@@ -228,7 +237,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
rdUv.D = LossyUtils.Vp8Sse16X8(src, tmpDst);
rdUv.SD = 0; // not calling TDisto here: it tends to flatten areas.
rdUv.H = WebpConstants.Vp8FixedCostsUv[mode];
- rdUv.R = it.GetCostUv(rdUv, proba);
+ rdUv.R = it.GetCostUv(rdUv, proba, res);
if (mode > 0 && IsFlat(rdUv.UvLevels, numBlocks, WebpConstants.FlatnessLimitIUv))
{
rdUv.R += WebpConstants.FlatnessPenality * numBlocks;
@@ -271,16 +280,24 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
Span src = it.YuvIn.AsSpan(Vp8EncIterator.YOffEnc);
int nz = 0;
int n;
- short[] dcTmp = new short[16];
- short[] tmp = new short[16 * 16];
- Span tmpSpan = tmp.AsSpan();
+ Span shortScratchSpan = it.Scratch2.AsSpan();
+ Span scratch = it.Scratch3.AsSpan(0, 16);
+ shortScratchSpan.Clear();
+ scratch.Clear();
+ Span dcTmp = shortScratchSpan.Slice(0, 16);
+ Span tmp = shortScratchSpan.Slice(16, 16 * 16);
for (n = 0; n < 16; n += 2)
{
- Vp8Encoding.FTransform2(src.Slice(WebpLookupTables.Vp8Scan[n]), reference.Slice(WebpLookupTables.Vp8Scan[n]), tmpSpan.Slice(n * 16, 16), tmpSpan.Slice((n + 1) * 16, 16));
+ Vp8Encoding.FTransform2(
+ src.Slice(WebpLookupTables.Vp8Scan[n]),
+ reference.Slice(WebpLookupTables.Vp8Scan[n]),
+ tmp.Slice(n * 16, 16),
+ tmp.Slice((n + 1) * 16, 16),
+ scratch);
}
- Vp8Encoding.FTransformWht(tmp, dcTmp);
+ Vp8Encoding.FTransformWht(tmp, dcTmp, scratch);
nz |= QuantizeBlock(dcTmp, rd.YDcLevels, dqm.Y2) << 24;
for (n = 0; n < 16; n += 2)
@@ -288,14 +305,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
// Zero-out the first coeff, so that: a) nz is correct below, and
// b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified.
tmp[n * 16] = tmp[(n + 1) * 16] = 0;
- nz |= Quantize2Blocks(tmpSpan.Slice(n * 16, 32), rd.YAcLevels.AsSpan(n * 16, 32), dqm.Y1) << n;
+ nz |= Quantize2Blocks(tmp.Slice(n * 16, 32), rd.YAcLevels.AsSpan(n * 16, 32), dqm.Y1) << n;
}
// Transform back.
- LossyUtils.TransformWht(dcTmp, tmpSpan);
+ LossyUtils.TransformWht(dcTmp, tmp, scratch);
for (n = 0; n < 16; n += 2)
{
- Vp8Encoding.ITransform(reference.Slice(WebpLookupTables.Vp8Scan[n]), tmpSpan.Slice(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8Scan[n]), true);
+ Vp8Encoding.ITransform(reference.Slice(WebpLookupTables.Vp8Scan[n]), tmp.Slice(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8Scan[n]), true, scratch);
}
return nz;
@@ -304,10 +321,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
public static int ReconstructIntra4(Vp8EncIterator it, Vp8SegmentInfo dqm, Span levels, Span src, Span yuvOut, int mode)
{
Span reference = it.YuvP.AsSpan(Vp8Encoding.Vp8I4ModeOffsets[mode]);
- short[] tmp = new short[16];
- Vp8Encoding.FTransform(src, reference, tmp);
+ Span tmp = it.Scratch2.AsSpan(0, 16);
+ Span scratch = it.Scratch3.AsSpan(0, 16);
+ tmp.Clear();
+ scratch.Clear();
+ Vp8Encoding.FTransform(src, reference, tmp, scratch);
int nz = QuantizeBlock(tmp, levels, dqm.Y1);
- Vp8Encoding.ITransform(reference, tmp, yuvOut, false);
+ Vp8Encoding.ITransform(reference, tmp, yuvOut, false, scratch);
return nz;
}
@@ -318,27 +338,31 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
Span src = it.YuvIn.AsSpan(Vp8EncIterator.UOffEnc);
int nz = 0;
int n;
- short[] tmp = new short[8 * 16];
+ Span tmp = it.Scratch2.AsSpan(0, 8 * 16);
+ Span scratch = it.Scratch3.AsSpan(0, 16);
+ tmp.Clear();
+ scratch.Clear();
for (n = 0; n < 8; n += 2)
{
Vp8Encoding.FTransform2(
src.Slice(WebpLookupTables.Vp8ScanUv[n]),
reference.Slice(WebpLookupTables.Vp8ScanUv[n]),
- tmp.AsSpan(n * 16, 16),
- tmp.AsSpan((n + 1) * 16, 16));
+ tmp.Slice(n * 16, 16),
+ tmp.Slice((n + 1) * 16, 16),
+ scratch);
}
CorrectDcValues(it, dqm.Uv, tmp, rd);
for (n = 0; n < 8; n += 2)
{
- nz |= Quantize2Blocks(tmp.AsSpan(n * 16, 32), rd.UvLevels.AsSpan(n * 16, 32), dqm.Uv) << n;
+ nz |= Quantize2Blocks(tmp.Slice(n * 16, 32), rd.UvLevels.AsSpan(n * 16, 32), dqm.Uv) << n;
}
for (n = 0; n < 8; n += 2)
{
- Vp8Encoding.ITransform(reference.Slice(WebpLookupTables.Vp8ScanUv[n]), tmp.AsSpan(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8ScanUv[n]), true);
+ Vp8Encoding.ITransform(reference.Slice(WebpLookupTables.Vp8ScanUv[n]), tmp.Slice(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8ScanUv[n]), true, scratch);
}
return nz << 16;
@@ -556,7 +580,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
return (sign ? -v0 : v0) >> DSCALE;
}
- public static void CorrectDcValues(Vp8EncIterator it, Vp8Matrix mtx, short[] tmp, Vp8ModeScore rd)
+ public static void CorrectDcValues(Vp8EncIterator it, Vp8Matrix mtx, Span tmp, Vp8ModeScore rd)
{
#pragma warning disable SA1005 // Single line comments should begin with single space
// | top[0] | top[1]
@@ -571,7 +595,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
{
Span top = it.TopDerr.AsSpan((it.X * 4) + ch, 2);
Span left = it.LeftDerr.AsSpan(ch, 2);
- Span c = tmp.AsSpan(ch * 4 * 16, 4 * 16);
+ Span c = tmp.Slice(ch * 4 * 16, 4 * 16);
c[0] += (short)(((C1 * top[0]) + (C2 * left[0])) >> (DSHIFT - DSCALE));
int err0 = QuantizeSingle(c, mtx);
c[1 * 16] += (short)(((C1 * top[1]) + (C2 * err0)) >> (DSHIFT - DSCALE));
diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs
index ca3f8481e2..79fd8d8543 100644
--- a/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs
+++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs
@@ -81,6 +81,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
this.I4Boundary = new byte[37];
this.BitCount = new long[4, 3];
this.Scratch = new byte[WebpConstants.Bps * 16];
+ this.Scratch2 = new short[17 * 16];
+ this.Scratch3 = new int[16];
// To match the C initial values of the reference implementation, initialize all with 204.
byte defaultInitVal = 204;
@@ -216,10 +218,20 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
public int CountDown { get; set; }
///
- /// Gets the scratch buffer.
+ /// Gets the byte scratch buffer.
///
public byte[] Scratch { get; }
+ ///
+ /// Gets the short scratch buffer.
+ ///
+ public short[] Scratch2 { get; }
+
+ ///
+ /// Gets the int scratch buffer.
+ ///
+ public int[] Scratch3 { get; }
+
public Vp8MacroBlockInfo CurrentMacroBlockInfo => this.Mb[this.currentMbIdx];
private Vp8MacroBlockInfo[] Mb { get; }
@@ -380,7 +392,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int bestMode = 0;
this.MakeLuma16Preds();
- for (mode = 0; mode < maxMode; ++mode)
+ for (mode = 0; mode < maxMode; mode++)
{
var histo = new Vp8Histogram();
histo.CollectHistogram(this.YuvIn.AsSpan(YOffEnc), this.YuvP.AsSpan(Vp8Encoding.Vp8I16ModeOffsets[mode]), 0, 16);
@@ -499,9 +511,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
this.CurrentMacroBlockInfo.MacroBlockType = Vp8MacroBlockType.I4X4;
}
- public int GetCostLuma16(Vp8ModeScore rd, Vp8EncProba proba)
+ public int GetCostLuma16(Vp8ModeScore rd, Vp8EncProba proba, Vp8Residual res)
{
- var res = new Vp8Residual();
int r = 0;
// re-import the non-zero context.
@@ -539,11 +550,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
return WebpLookupTables.Vp8FixedCostsI4[top, left];
}
- public int GetCostLuma4(short[] levels, Vp8EncProba proba)
+ public int GetCostLuma4(Span levels, Vp8EncProba proba, Vp8Residual res)
{
int x = this.I4 & 3;
int y = this.I4 >> 2;
- var res = new Vp8Residual();
int r = 0;
res.Init(0, 3, proba);
@@ -553,9 +563,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
return r;
}
- public int GetCostUv(Vp8ModeScore rd, Vp8EncProba proba)
+ public int GetCostUv(Vp8ModeScore rd, Vp8EncProba proba, Vp8Residual res)
{
- var res = new Vp8Residual();
int r = 0;
// re-import the non-zero context.
@@ -741,7 +750,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
Vp8Encoding.EncPredChroma8(this.YuvP, left, top);
}
- public void MakeIntra4Preds() => Vp8Encoding.EncPredLuma4(this.YuvP, this.I4Boundary, this.I4BoundaryIdx);
+ public void MakeIntra4Preds() => Vp8Encoding.EncPredLuma4(this.YuvP, this.I4Boundary, this.I4BoundaryIdx, this.Scratch.AsSpan(0, 4));
public void SwapOut()
{
diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs
index d41da790b3..728574682f 100644
--- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs
+++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs
@@ -70,6 +70,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
///
private int uvAlpha;
+ ///
+ /// Scratch buffer to reduce allocations.
+ ///
+ private readonly int[] scratch = new int[16];
+
private readonly byte[] averageBytesPerMb = { 50, 24, 16, 9, 7, 5, 3, 2 };
private const int NumMbSegments = 4;
@@ -323,18 +328,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
this.StatLoop(width, height, yStride, uvStride);
it.Init();
it.InitFilter();
+ var info = new Vp8ModeScore();
+ var residual = new Vp8Residual();
do
{
bool dontUseSkip = !this.Proba.UseSkipProba;
-
- var info = new Vp8ModeScore();
+ info.Clear();
it.Import(y, u, v, yStride, uvStride, width, height, false);
// Warning! order is important: first call VP8Decimate() and
// *then* decide how to code the skip decision if there's one.
if (!this.Decimate(it, ref info, this.rdOptLevel) || dontUseSkip)
{
- this.CodeResiduals(it, info);
+ this.CodeResiduals(it, info, residual);
}
else
{
@@ -449,9 +455,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
it.Init();
this.SetLoopParams(stats.Q);
+ var info = new Vp8ModeScore();
do
{
- var info = new Vp8ModeScore();
+ info.Clear();
it.Import(y, u, v, yStride, uvStride, width, height, false);
if (this.Decimate(it, ref info, rdOpt))
{
@@ -932,10 +939,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
return isSkipped;
}
- private void CodeResiduals(Vp8EncIterator it, Vp8ModeScore rd)
+ private void CodeResiduals(Vp8EncIterator it, Vp8ModeScore rd, Vp8Residual residual)
{
int x, y, ch;
- var residual = new Vp8Residual();
bool i16 = it.CurrentMacroBlockInfo.MacroBlockType == Vp8MacroBlockType.I16X16;
int segment = it.CurrentMacroBlockInfo.Segment;
diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs
index f8b4853e2a..0567a0f27d 100644
--- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs
+++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs
@@ -68,22 +68,20 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
}
}
- public static void ITransform(Span reference, Span input, Span dst, bool doTwo)
+ public static void ITransform(Span reference, Span input, Span dst, bool doTwo, Span scratch)
{
- ITransformOne(reference, input, dst);
+ ITransformOne(reference, input, dst, scratch);
if (doTwo)
{
- ITransformOne(reference.Slice(4), input.Slice(16), dst.Slice(4));
+ ITransformOne(reference.Slice(4), input.Slice(16), dst.Slice(4), scratch);
}
}
- public static void ITransformOne(Span reference, Span input, Span dst)
+ public static void ITransformOne(Span reference, Span input, Span dst, Span scratch)
{
int i;
-#pragma warning disable SA1312 // Variable names should begin with lower-case letter
- int[] C = new int[4 * 4];
-#pragma warning restore SA1312 // Variable names should begin with lower-case letter
- Span tmp = C.AsSpan();
+ Span tmp = scratch.Slice(0, 16);
+ tmp.Clear();
for (i = 0; i < 4; i++)
{
// vertical pass.
@@ -99,7 +97,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
input = input.Slice(1);
}
- tmp = C.AsSpan();
+ tmp = scratch;
for (i = 0; i < 4; i++)
{
// horizontal pass.
@@ -116,16 +114,18 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
}
}
- public static void FTransform2(Span src, Span reference, Span output, Span output2)
+ public static void FTransform2(Span src, Span reference, Span output, Span output2, Span scratch)
{
- FTransform(src, reference, output);
- FTransform(src.Slice(4), reference.Slice(4), output2);
+ FTransform(src, reference, output, scratch);
+ FTransform(src.Slice(4), reference.Slice(4), output2, scratch);
}
- public static void FTransform(Span src, Span reference, Span output)
+ public static void FTransform(Span src, Span reference, Span output, Span scratch)
{
int i;
- int[] tmp = new int[16];
+ Span tmp = scratch.Slice(0, 16);
+ tmp.Clear();
+
int srcIdx = 0;
int refIdx = 0;
for (i = 0; i < 4; i++)
@@ -160,9 +160,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
}
}
- public static void FTransformWht(Span input, Span output)
+ public static void FTransformWht(Span input, Span output, Span scratch)
{
- int[] tmp = new int[16];
+ Span tmp = scratch.Slice(0, 16);
+ tmp.Clear();
+
int i;
int inputIdx = 0;
for (i = 0; i < 4; i++)
@@ -234,11 +236,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
// Left samples are top[-5 .. -2], top_left is top[-1], top are
// located at top[0..3], and top right is top[4..7]
- public static void EncPredLuma4(Span dst, Span top, int topOffset)
+ public static void EncPredLuma4(Span dst, Span top, int topOffset, Span vals)
{
Dc4(dst.Slice(I4DC4), top, topOffset);
Tm4(dst.Slice(I4TM4), top, topOffset);
- Ve4(dst.Slice(I4VE4), top, topOffset);
+ Ve4(dst.Slice(I4VE4), top, topOffset, vals);
He4(dst.Slice(I4HE4), top, topOffset);
Rd4(dst.Slice(I4RD4), top, topOffset);
Vr4(dst.Slice(I4VR4), top, topOffset);
@@ -395,20 +397,16 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
}
}
- private static void Ve4(Span dst, Span top, int topOffset)
+ private static void Ve4(Span dst, Span top, int topOffset, Span vals)
{
// vertical
- byte[] vals =
- {
- LossyUtils.Avg3(top[topOffset - 1], top[topOffset], top[topOffset + 1]),
- LossyUtils.Avg3(top[topOffset], top[topOffset + 1], top[topOffset + 2]),
- LossyUtils.Avg3(top[topOffset + 1], top[topOffset + 2], top[topOffset + 3]),
- LossyUtils.Avg3(top[topOffset + 2], top[topOffset + 3], top[topOffset + 4])
- };
-
+ vals[0] = LossyUtils.Avg3(top[topOffset - 1], top[topOffset], top[topOffset + 1]);
+ vals[1] = LossyUtils.Avg3(top[topOffset], top[topOffset + 1], top[topOffset + 2]);
+ vals[2] = LossyUtils.Avg3(top[topOffset + 1], top[topOffset + 2], top[topOffset + 3]);
+ vals[3] = LossyUtils.Avg3(top[topOffset + 2], top[topOffset + 3], top[topOffset + 4]);
for (int i = 0; i < 4; i++)
{
- vals.AsSpan().CopyTo(dst.Slice(i * WebpConstants.Bps));
+ vals.CopyTo(dst.Slice(i * WebpConstants.Bps));
}
}
diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs
index 5d048514ea..7192fa2d05 100644
--- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs
+++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs
@@ -8,6 +8,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
{
internal class Vp8Histogram
{
+ private readonly int[] scratch = new int[16];
+
+ private readonly short[] output = new short[16];
+
+ private readonly int[] distribution = new int[MaxCoeffThresh + 1];
+
///
/// Size of histogram used by CollectHistogram.
///
@@ -40,23 +46,22 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
public void CollectHistogram(Span reference, Span pred, int startBlock, int endBlock)
{
int j;
- int[] distribution = new int[MaxCoeffThresh + 1];
+ this.distribution.AsSpan().Clear();
for (j = startBlock; j < endBlock; j++)
{
- short[] output = new short[16];
-
- this.Vp8FTransform(reference.Slice(WebpLookupTables.Vp8DspScan[j]), pred.Slice(WebpLookupTables.Vp8DspScan[j]), output);
+ this.output.AsSpan().Clear();
+ this.Vp8FTransform(reference.Slice(WebpLookupTables.Vp8DspScan[j]), pred.Slice(WebpLookupTables.Vp8DspScan[j]), this.output);
// Convert coefficients to bin.
for (int k = 0; k < 16; ++k)
{
- int v = Math.Abs(output[k]) >> 3;
+ int v = Math.Abs(this.output[k]) >> 3;
int clippedValue = ClipMax(v, MaxCoeffThresh);
- ++distribution[clippedValue];
+ ++this.distribution[clippedValue];
}
}
- this.SetHistogramData(distribution);
+ this.SetHistogramData(this.distribution);
}
public void Merge(Vp8Histogram other)
@@ -97,7 +102,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
private void Vp8FTransform(Span src, Span reference, Span output)
{
int i;
- int[] tmp = new int[16];
+ Span tmp = this.scratch;
+ tmp.Clear();
+
for (i = 0; i < 4; i++)
{
int d0 = src[0] - reference[0]; // 9bit dynamic range ([-255,255])
diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8ModeScore.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8ModeScore.cs
index 7182f60210..1c92a9d2d9 100644
--- a/src/ImageSharp/Formats/Webp/Lossy/Vp8ModeScore.cs
+++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8ModeScore.cs
@@ -1,6 +1,8 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
+using System;
+
namespace SixLabors.ImageSharp.Formats.Webp.Lossy
{
///
@@ -93,6 +95,22 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
///
public int[,] Derr { get; }
+ public void Clear()
+ {
+ this.YDcLevels.AsSpan().Clear();
+ this.YAcLevels.AsSpan().Clear();
+ this.UvLevels.AsSpan().Clear();
+ this.ModesI4.AsSpan().Clear();
+
+ for (int i = 0; i < 2; i++)
+ {
+ for (int j = 0; j < 3; j++)
+ {
+ this.Derr[i, j] = 0;
+ }
+ }
+ }
+
public void InitScore()
{
this.D = 0;
diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs
index 93d76e2836..4eeeedd376 100644
--- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs
+++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs
@@ -2,6 +2,7 @@
// Licensed under the Apache License, Version 2.0.
using System;
+using System.Runtime.CompilerServices;
namespace SixLabors.ImageSharp.Formats.Webp.Lossy
{
@@ -16,7 +17,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
public int CoeffType { get; set; }
- public short[] Coeffs { get; set; }
+ public short[] Coeffs { get; } = new short[16];
public Vp8BandProbas[] Prob { get; set; }
@@ -31,6 +32,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
this.Prob = prob.Coeffs[this.CoeffType];
this.Stats = prob.Stats[this.CoeffType];
this.Costs = prob.RemappedCosts[this.CoeffType];
+ this.Coeffs.AsSpan().Clear();
}
public void SetCoeffs(Span coeffs)
@@ -46,7 +48,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
}
}
- this.Coeffs = coeffs.Slice(0, 16).ToArray();
+ coeffs.Slice(0, 16).CopyTo(this.Coeffs);
}
// Simulate block coding, but only record statistics.
@@ -150,6 +152,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
return cost;
}
+ [MethodImpl(InliningOptions.ShortMethod)]
private static int LevelCost(Span table, int level)
=> WebpLookupTables.Vp8LevelFixedCosts[level] + table[level > WebpConstants.MaxVariableLevel ? WebpConstants.MaxVariableLevel : level];
diff --git a/src/ImageSharp/Formats/Webp/Lossy/WebpLossyDecoder.cs b/src/ImageSharp/Formats/Webp/Lossy/WebpLossyDecoder.cs
index d6f5f7103e..17863f0c4b 100644
--- a/src/ImageSharp/Formats/Webp/Lossy/WebpLossyDecoder.cs
+++ b/src/ImageSharp/Formats/Webp/Lossy/WebpLossyDecoder.cs
@@ -34,6 +34,16 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
///
private readonly Configuration configuration;
+ ///
+ /// Scratch buffer to reduce allocations.
+ ///
+ private readonly int[] scratch = new int[16];
+
+ ///
+ /// Another scratch buffer to reduce allocations.
+ ///
+ private readonly byte[] scratchBytes = new byte[4];
+
///
/// Initializes a new instance of the class.
///
@@ -395,7 +405,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
LossyUtils.TM4(dst, yuv, offset);
break;
case 2:
- LossyUtils.VE4(dst, yuv, offset);
+ LossyUtils.VE4(dst, yuv, offset, this.scratchBytes);
break;
case 3:
LossyUtils.HE4(dst, yuv, offset);
@@ -420,7 +430,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
break;
}
- this.DoTransform(bits, coeffs.AsSpan(n * 16), dst);
+ this.DoTransform(bits, coeffs.AsSpan(n * 16), dst, this.scratch);
}
}
else
@@ -456,7 +466,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
{
for (int n = 0; n < 16; ++n, bits <<= 2)
{
- this.DoTransform(bits, coeffs.AsSpan(n * 16), yDst.Slice(WebpConstants.Scan[n]));
+ this.DoTransform(bits, coeffs.AsSpan(n * 16), yDst.Slice(WebpConstants.Scan[n]), this.scratch);
}
}
}
@@ -496,8 +506,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
break;
}
- this.DoUVTransform(bitsUv, coeffs.AsSpan(16 * 16), uDst);
- this.DoUVTransform(bitsUv >> 8, coeffs.AsSpan(20 * 16), vDst);
+ this.DoUVTransform(bitsUv, coeffs.AsSpan(16 * 16), uDst, this.scratch);
+ this.DoUVTransform(bitsUv >> 8, coeffs.AsSpan(20 * 16), vDst, this.scratch);
// Stash away top samples for next block.
if (mby < dec.MbHeight - 1)
@@ -787,12 +797,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
}
}
- private void DoTransform(uint bits, Span src, Span dst)
+ private void DoTransform(uint bits, Span src, Span dst, Span scratch)
{
switch (bits >> 30)
{
case 3:
- LossyUtils.TransformOne(src, dst);
+ LossyUtils.TransformOne(src, dst, scratch);
break;
case 2:
LossyUtils.TransformAc3(src, dst);
@@ -803,7 +813,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
}
}
- private void DoUVTransform(uint bits, Span src, Span dst)
+ private void DoUVTransform(uint bits, Span src, Span dst, Span scratch)
{
// any non-zero coeff at all?
if ((bits & 0xff) > 0)
@@ -811,7 +821,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
// any non-zero AC coefficient?
if ((bits & 0xaa) > 0)
{
- LossyUtils.TransformUv(src, dst); // note we don't use the AC3 variant for U/V.
+ LossyUtils.TransformUv(src, dst, scratch); // note we don't use the AC3 variant for U/V.
}
else
{
@@ -884,7 +894,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
if (nz > 1)
{
// More than just the DC -> perform the full transform.
- LossyUtils.TransformWht(dc, dst);
+ LossyUtils.TransformWht(dc, dst, this.scratch);
}
else
{
diff --git a/src/ImageSharp/Formats/Webp/WebpCommonUtils.cs b/src/ImageSharp/Formats/Webp/WebpCommonUtils.cs
index d6e8d0a068..4251af7428 100644
--- a/src/ImageSharp/Formats/Webp/WebpCommonUtils.cs
+++ b/src/ImageSharp/Formats/Webp/WebpCommonUtils.cs
@@ -16,6 +16,16 @@ namespace SixLabors.ImageSharp.Formats.Webp
///
internal static class WebpCommonUtils
{
+#if SUPPORTS_RUNTIME_INTRINSICS
+ private static readonly Vector256 AlphaMaskVector256 = Vector256.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255);
+
+ private static readonly Vector256 All0x80Vector256 = Vector256.Create((byte)0x80).AsByte();
+
+ private static readonly Vector128 AlphaMask = Vector128.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255);
+
+ private static readonly Vector128 All0x80 = Vector128.Create((byte)0x80).AsByte();
+#endif
+
///
/// Checks if the pixel row is not opaque.
///
@@ -27,11 +37,6 @@ namespace SixLabors.ImageSharp.Formats.Webp
if (Avx2.IsSupported)
{
ReadOnlySpan rowBytes = MemoryMarshal.AsBytes(row);
- var alphaMaskVector256 = Vector256.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255);
- Vector256 all0x80Vector256 = Vector256.Create((byte)0x80).AsByte();
- var alphaMask = Vector128.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255);
- Vector128 all0x80 = Vector128.Create((byte)0x80).AsByte();
-
int i = 0;
int length = (row.Length * 4) - 3;
fixed (byte* src = rowBytes)
@@ -42,14 +47,14 @@ namespace SixLabors.ImageSharp.Formats.Webp
Vector256 a1 = Avx.LoadVector256(src + i + 32).AsByte();
Vector256 a2 = Avx.LoadVector256(src + i + 64).AsByte();
Vector256 a3 = Avx.LoadVector256(src + i + 96).AsByte();
- Vector256 b0 = Avx2.And(a0, alphaMaskVector256).AsInt32();
- Vector256 b1 = Avx2.And(a1, alphaMaskVector256).AsInt32();
- Vector256 b2 = Avx2.And(a2, alphaMaskVector256).AsInt32();
- Vector256 b3 = Avx2.And(a3, alphaMaskVector256).AsInt32();
+ Vector256 b0 = Avx2.And(a0, AlphaMaskVector256).AsInt32();
+ Vector256 b1 = Avx2.And(a1, AlphaMaskVector256).AsInt32();
+ Vector256 b2 = Avx2.And(a2, AlphaMaskVector256).AsInt32();
+ Vector256 b3 = Avx2.And(a3, AlphaMaskVector256).AsInt32();
Vector256 c0 = Avx2.PackSignedSaturate(b0, b1).AsInt16();
Vector256 c1 = Avx2.PackSignedSaturate(b2, b3).AsInt16();
Vector256 d = Avx2.PackSignedSaturate(c0, c1).AsByte();
- Vector256 bits = Avx2.CompareEqual(d, all0x80Vector256);
+ Vector256 bits = Avx2.CompareEqual(d, All0x80Vector256);
int mask = Avx2.MoveMask(bits);
if (mask != -1)
{
@@ -59,7 +64,7 @@ namespace SixLabors.ImageSharp.Formats.Webp
for (; i + 64 <= length; i += 64)
{
- if (IsNoneOpaque64Bytes(src, i, alphaMask, all0x80))
+ if (IsNoneOpaque64Bytes(src, i))
{
return true;
}
@@ -67,7 +72,7 @@ namespace SixLabors.ImageSharp.Formats.Webp
for (; i + 32 <= length; i += 32)
{
- if (IsNoneOpaque32Bytes(src, i, alphaMask, all0x80))
+ if (IsNoneOpaque32Bytes(src, i))
{
return true;
}
@@ -85,16 +90,13 @@ namespace SixLabors.ImageSharp.Formats.Webp
else if (Sse2.IsSupported)
{
ReadOnlySpan rowBytes = MemoryMarshal.AsBytes(row);
- var alphaMask = Vector128.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255);
- Vector128 all0x80 = Vector128.Create((byte)0x80).AsByte();
-
int i = 0;
int length = (row.Length * 4) - 3;
fixed (byte* src = rowBytes)
{
for (; i + 64 <= length; i += 64)
{
- if (IsNoneOpaque64Bytes(src, i, alphaMask, all0x80))
+ if (IsNoneOpaque64Bytes(src, i))
{
return true;
}
@@ -102,7 +104,7 @@ namespace SixLabors.ImageSharp.Formats.Webp
for (; i + 32 <= length; i += 32)
{
- if (IsNoneOpaque32Bytes(src, i, alphaMask, all0x80))
+ if (IsNoneOpaque32Bytes(src, i))
{
return true;
}
@@ -133,20 +135,20 @@ namespace SixLabors.ImageSharp.Formats.Webp
}
#if SUPPORTS_RUNTIME_INTRINSICS
- private static unsafe bool IsNoneOpaque64Bytes(byte* src, int i, Vector128 alphaMask, Vector128 all0x80)
+ private static unsafe bool IsNoneOpaque64Bytes(byte* src, int i)
{
Vector128 a0 = Sse2.LoadVector128(src + i).AsByte();
Vector128 a1 = Sse2.LoadVector128(src + i + 16).AsByte();
Vector128 a2 = Sse2.LoadVector128(src + i + 32).AsByte();
Vector128 a3 = Sse2.LoadVector128(src + i + 48).AsByte();
- Vector128 b0 = Sse2.And(a0, alphaMask).AsInt32();
- Vector128 b1 = Sse2.And(a1, alphaMask).AsInt32();
- Vector128 b2 = Sse2.And(a2, alphaMask).AsInt32();
- Vector128 b3 = Sse2.And(a3, alphaMask).AsInt32();
+ Vector128 b0 = Sse2.And(a0, AlphaMask).AsInt32();
+ Vector128 b1 = Sse2.And(a1, AlphaMask).AsInt32();
+ Vector128 b2 = Sse2.And(a2, AlphaMask).AsInt32();
+ Vector128 b3 = Sse2.And(a3, AlphaMask).AsInt32();
Vector128 c0 = Sse2.PackSignedSaturate(b0, b1).AsInt16();
Vector128 c1 = Sse2.PackSignedSaturate(b2, b3).AsInt16();
Vector128 d = Sse2.PackSignedSaturate(c0, c1).AsByte();
- Vector128 bits = Sse2.CompareEqual(d, all0x80);
+ Vector128 bits = Sse2.CompareEqual(d, All0x80);
int mask = Sse2.MoveMask(bits);
if (mask != 0xFFFF)
{
@@ -156,15 +158,15 @@ namespace SixLabors.ImageSharp.Formats.Webp
return false;
}
- private static unsafe bool IsNoneOpaque32Bytes(byte* src, int i, Vector128 alphaMask, Vector128