From 70c99d3d02369d4584d18e64393e239a5f86e30b Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 31 Oct 2021 13:17:32 +0100 Subject: [PATCH 1/8] Reduce allocations --- .../Webp/Lossless/BackwardReferenceEncoder.cs | 10 +- .../Formats/Webp/Lossless/HistogramEncoder.cs | 41 ++++-- .../Formats/Webp/Lossless/HuffmanTree.cs | 9 +- .../Formats/Webp/Lossless/LosslessUtils.cs | 2 +- .../Formats/Webp/Lossless/PixOrCopy.cs | 6 +- .../Formats/Webp/Lossless/PredictorEncoder.cs | 123 +++++++++++++----- .../Formats/Webp/Lossless/Vp8LEncoder.cs | 29 ++++- .../Formats/Webp/Lossless/Vp8LHistogram.cs | 57 ++++---- .../Formats/Webp/Lossless/Vp8LStreaks.cs | 9 ++ .../Webp/Lossless/WebpLosslessDecoder.cs | 3 +- .../Formats/Webp/Lossy/LossyUtils.cs | 51 ++++---- src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs | 86 +++++++----- .../Formats/Webp/Lossy/Vp8EncIterator.cs | 27 ++-- .../Formats/Webp/Lossy/Vp8Encoder.cs | 18 ++- .../Formats/Webp/Lossy/Vp8Encoding.cs | 54 ++++---- .../Formats/Webp/Lossy/Vp8Histogram.cs | 23 ++-- .../Formats/Webp/Lossy/Vp8ModeScore.cs | 18 +++ .../Formats/Webp/Lossy/Vp8Residual.cs | 5 +- .../Formats/Webp/Lossy/WebpLossyDecoder.cs | 30 +++-- .../Formats/WebP/PredictorEncoderTests.cs | 6 +- 20 files changed, 390 insertions(+), 217 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs index 70c4efb99..dc546f8ac 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/BackwardReferenceEncoder.cs @@ -49,6 +49,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless double bitCostBest = -1; int cacheBitsInitial = cacheBits; Vp8LHashChain hashChainBox = null; + var stats = new Vp8LStreaks(); + var bitsEntropy = new Vp8LBitEntropy(); for (int lz77Type = 1; lz77TypesToTry > 0; lz77TypesToTry &= ~lz77Type, lz77Type <<= 1) { int cacheBitsTmp = cacheBitsInitial; @@ -81,7 +83,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless // Keep the best backward references. var histo = new Vp8LHistogram(worst, cacheBitsTmp); - double bitCost = histo.EstimateBits(); + double bitCost = histo.EstimateBits(stats, bitsEntropy); if (lz77TypeBest == 0 || bitCost < bitCostBest) { @@ -100,7 +102,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless Vp8LHashChain hashChainTmp = lz77TypeBest == (int)Vp8LLz77Type.Lz77Standard ? hashChain : hashChainBox; BackwardReferencesTraceBackwards(width, height, bgra, cacheBits, hashChainTmp, best, worst); var histo = new Vp8LHistogram(worst, cacheBits); - double bitCostTrace = histo.EstimateBits(); + double bitCostTrace = histo.EstimateBits(stats, bitsEntropy); if (bitCostTrace < bitCostBest) { best = worst; @@ -214,9 +216,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless } } + var stats = new Vp8LStreaks(); + var bitsEntropy = new Vp8LBitEntropy(); for (int i = 0; i <= cacheBitsMax; i++) { - double entropy = histos[i].EstimateBits(); + double entropy = histos[i].EstimateBits(stats, bitsEntropy); if (i == 0 || entropy < entropyMin) { entropyMin = entropy; diff --git a/src/ImageSharp/Formats/Webp/Lossless/HistogramEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/HistogramEncoder.cs index f2d4fb189..5d407d73c 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/HistogramEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/HistogramEncoder.cs @@ -152,10 +152,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless private static int HistogramCopyAndAnalyze(List origHistograms, List histograms, ushort[] histogramSymbols) { + var stats = new Vp8LStreaks(); + var bitsEntropy = new Vp8LBitEntropy(); for (int clusterId = 0, i = 0; i < origHistograms.Count; i++) { Vp8LHistogram origHistogram = origHistograms[i]; - origHistogram.UpdateHistogramCost(); + origHistogram.UpdateHistogramCost(stats, bitsEntropy); // Skip the histogram if it is completely empty, which can happen for tiles with no information (when they are skipped because of LZ77). if (!origHistogram.IsUsed[0] && !origHistogram.IsUsed[1] && !origHistogram.IsUsed[2] && !origHistogram.IsUsed[3] && !origHistogram.IsUsed[4]) @@ -175,7 +177,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless return numUsed; } - private static void HistogramCombineEntropyBin(List histograms, ushort[] clusters, ushort[] clusterMappings, Vp8LHistogram curCombo, ushort[] binMap, int numBins, double combineCostFactor) + private static void HistogramCombineEntropyBin( + List histograms, + ushort[] clusters, + ushort[] clusterMappings, + Vp8LHistogram curCombo, + ushort[] binMap, + int numBins, + double combineCostFactor) { var binInfo = new HistogramBinInfo[BinSize]; for (int idx = 0; idx < numBins; idx++) @@ -191,6 +200,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless } var indicesToRemove = new List(); + var stats = new Vp8LStreaks(); + var bitsEntropy = new Vp8LBitEntropy(); for (int idx = 0; idx < histograms.Count; idx++) { if (histograms[idx] == null) @@ -209,7 +220,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless // Try to merge #idx into #first (both share the same binId) double bitCost = histograms[idx].BitCost; double bitCostThresh = -bitCost * combineCostFactor; - double currCostDiff = histograms[first].AddEval(histograms[idx], bitCostThresh, curCombo); + double currCostDiff = histograms[first].AddEval(histograms[idx], stats, bitsEntropy, bitCostThresh, curCombo); if (currCostDiff < bitCostThresh) { @@ -308,6 +319,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless int numUsed = histograms.Count(h => h != null); int outerIters = numUsed; int numTriesNoSuccess = outerIters / 2; + var stats = new Vp8LStreaks(); + var bitsEntropy = new Vp8LBitEntropy(); if (numUsed < minClusterSize) { @@ -354,7 +367,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless idx2 = mappings[idx2]; // Calculate cost reduction on combination. - double currCost = HistoPriorityListPush(histoPriorityList, maxSize, histograms, idx1, idx2, bestCost); + double currCost = HistoPriorityListPush(histoPriorityList, maxSize, histograms, idx1, idx2, bestCost, stats, bitsEntropy); // Found a better pair? if (currCost < 0) @@ -428,7 +441,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless if (doEval) { // Re-evaluate the cost of an updated pair. - HistoListUpdatePair(histograms[p.Idx1], histograms[p.Idx2], 0.0d, p); + HistoListUpdatePair(histograms[p.Idx1], histograms[p.Idx2], stats, bitsEntropy, 0.0d, p); if (p.CostDiff >= 0.0d) { histoPriorityList[j] = histoPriorityList[histoPriorityList.Count - 1]; @@ -456,6 +469,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless // Priority list of histogram pairs. var histoPriorityList = new List(); int maxSize = histoSize * histoSize; + var stats = new Vp8LStreaks(); + var bitsEntropy = new Vp8LBitEntropy(); for (int i = 0; i < histoSize; i++) { @@ -471,7 +486,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless continue; } - HistoPriorityListPush(histoPriorityList, maxSize, histograms, i, j, 0.0d); + HistoPriorityListPush(histoPriorityList, maxSize, histograms, i, j, 0.0d, stats, bitsEntropy); } } @@ -510,7 +525,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless continue; } - HistoPriorityListPush(histoPriorityList, maxSize, histograms, idx1, i, 0.0d); + HistoPriorityListPush(histoPriorityList, maxSize, histograms, idx1, i, 0.0d, stats, bitsEntropy); } } } @@ -519,6 +534,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless { int inSize = input.Count; int outSize = output.Count; + var stats = new Vp8LStreaks(); + var bitsEntropy = new Vp8LBitEntropy(); if (outSize > 1) { for (int i = 0; i < inSize; i++) @@ -534,7 +551,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless double bestBits = double.MaxValue; for (int k = 0; k < outSize; k++) { - double curBits = output[k].AddThresh(input[i], bestBits); + double curBits = output[k].AddThresh(input[i], stats, bitsEntropy, bestBits); if (k == 0 || curBits < bestBits) { bestBits = curBits; @@ -577,7 +594,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// Create a pair from indices "idx1" and "idx2" provided its cost is inferior to "threshold", a negative entropy. /// /// The cost of the pair, or 0 if it superior to threshold. - private static double HistoPriorityListPush(List histoList, int maxSize, List histograms, int idx1, int idx2, double threshold) + private static double HistoPriorityListPush(List histoList, int maxSize, List histograms, int idx1, int idx2, double threshold, Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy) { var pair = new HistogramPair(); @@ -598,7 +615,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless Vp8LHistogram h1 = histograms[idx1]; Vp8LHistogram h2 = histograms[idx2]; - HistoListUpdatePair(h1, h2, threshold, pair); + HistoListUpdatePair(h1, h2, stats, bitsEntropy, threshold, pair); // Do not even consider the pair if it does not improve the entropy. if (pair.CostDiff >= threshold) @@ -616,11 +633,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// /// Update the cost diff and combo of a pair of histograms. This needs to be called when the the histograms have been merged with a third one. /// - private static void HistoListUpdatePair(Vp8LHistogram h1, Vp8LHistogram h2, double threshold, HistogramPair pair) + private static void HistoListUpdatePair(Vp8LHistogram h1, Vp8LHistogram h2, Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy, double threshold, HistogramPair pair) { double sumCost = h1.BitCost + h2.BitCost; pair.CostCombo = 0.0d; - h1.GetCombinedHistogramEntropy(h2, sumCost + threshold, costInitial: pair.CostCombo, out double cost); + h1.GetCombinedHistogramEntropy(h2, stats, bitsEntropy, sumCost + threshold, costInitial: pair.CostCombo, out double cost); pair.CostCombo = cost; pair.CostDiff = pair.CostCombo - sumCost; } diff --git a/src/ImageSharp/Formats/Webp/Lossless/HuffmanTree.cs b/src/ImageSharp/Formats/Webp/Lossless/HuffmanTree.cs index cd8be9aac..0376311ed 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/HuffmanTree.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/HuffmanTree.cs @@ -49,14 +49,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless { return -1; } - else if (t1.TotalCount < t2.TotalCount) + + if (t1.TotalCount < t2.TotalCount) { return 1; } - else - { - return t1.Value < t2.Value ? -1 : 1; - } + + return t1.Value < t2.Value ? -1 : 1; } public IDeepCloneable DeepClone() => new HuffmanTree(this); diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index b7f94415b..06204ae91 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -704,7 +704,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// Compute the combined Shanon's entropy for distribution {X} and {X+Y}. /// /// Shanon entropy. - public static float CombinedShannonEntropy(int[] x, int[] y) + public static float CombinedShannonEntropy(Span x, Span y) { double retVal = 0.0d; uint sumX = 0, sumXY = 0; diff --git a/src/ImageSharp/Formats/Webp/Lossless/PixOrCopy.cs b/src/ImageSharp/Formats/Webp/Lossless/PixOrCopy.cs index 2d71a7af6..6cd109121 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/PixOrCopy.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/PixOrCopy.cs @@ -15,7 +15,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless public uint BgraOrDistance { get; set; } public static PixOrCopy CreateCacheIdx(int idx) => - new PixOrCopy() + new() { Mode = PixOrCopyMode.CacheIdx, BgraOrDistance = (uint)idx, @@ -23,14 +23,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless }; public static PixOrCopy CreateLiteral(uint bgra) => - new PixOrCopy() + new() { Mode = PixOrCopyMode.Literal, BgraOrDistance = bgra, Len = 1 }; - public static PixOrCopy CreateCopy(uint distance, ushort len) => new PixOrCopy() + public static PixOrCopy CreateCopy(uint distance, ushort len) => new() { Mode = PixOrCopyMode.Copy, BgraOrDistance = distance, diff --git a/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs index 671e9a043..713fc7919 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs @@ -17,6 +17,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// internal static unsafe class PredictorEncoder { + private static readonly sbyte[] DeltaLut = { 16, 16, 8, 4, 2, 2, 2 }; + + private static readonly sbyte[][] Offset = + { + new sbyte[] { 0, -1 }, new sbyte[] { 0, 1 }, new sbyte[] { -1, 0 }, new sbyte[] { 1, 0 }, new sbyte[] { -1, -1 }, new sbyte[] { -1, 1 }, new sbyte[] { 1, -1 }, new sbyte[] { 1, 1 } + }; + private const int GreenRedToBlueNumAxis = 8; private const int GreenRedToBlueMaxIters = 7; @@ -41,6 +48,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless Span bgra, Span bgraScratch, Span image, + int[][] histoArgb, + int[][] bestHisto, bool nearLossless, int nearLosslessQuality, WebpTransparentColorMode transparentColorMode, @@ -80,6 +89,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless histo, bgraScratch, bgra, + histoArgb, + bestHisto, maxQuantization, transparentColorMode, usedSubtractGreen, @@ -105,7 +116,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless lowEffort); } - public static void ColorSpaceTransform(int width, int height, int bits, int quality, Span bgra, Span image) + public static void ColorSpaceTransform(int width, int height, int bits, int quality, Span bgra, Span image, Span scratch) { int maxTileSize = 1 << bits; int tileXSize = LosslessUtils.SubSampleSize(width, bits); @@ -139,7 +150,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless height, accumulatedRedHisto, accumulatedBlueHisto, - bgra); + bgra, + scratch); image[offset] = MultipliersToColorCode(prevX); CopyTileWithColorTransform(width, height, tileXOffset, tileYOffset, maxTileSize, prevX, bgra); @@ -188,6 +200,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless int[][] accumulated, Span argbScratch, Span argb, + int[][] histoArgb, + int[][] bestHisto, int maxQuantization, WebpTransparentColorMode transparentColorMode, bool usedSubtractGreen, @@ -222,21 +236,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless float bestDiff = MaxDiffCost; int bestMode = 0; uint[] residuals = new uint[1 << WebpConstants.MaxTransformBits]; - int[][] histoArgb = new int[4][]; - int[][] bestHisto = new int[4][]; for (int i = 0; i < 4; i++) { - histoArgb[i] = new int[256]; - bestHisto[i] = new int[256]; + histoArgb[i].AsSpan().Clear(); + bestHisto[i].AsSpan().Clear(); } for (int mode = 0; mode < numPredModes; mode++) { - for (int i = 0; i < 4; i++) - { - histoArgb[i].AsSpan().Fill(0); - } - if (startY > 0) { // Read the row above the tile which will become the first upper_row. @@ -300,6 +307,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless bestDiff = curDiff; bestMode = mode; } + + for (int i = 0; i < 4; i++) + { + histoArgb[i].AsSpan().Clear(); + } } for (int i = 0; i < 4; i++) @@ -819,7 +831,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless } } - private static Vp8LMultipliers GetBestColorTransformForTile(int tileX, int tileY, int bits, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int quality, int xSize, int ySize, int[] accumulatedRedHisto, int[] accumulatedBlueHisto, Span argb) + private static Vp8LMultipliers GetBestColorTransformForTile( + int tileX, + int tileY, + int bits, + Vp8LMultipliers prevX, + Vp8LMultipliers prevY, + int quality, + int xSize, + int ySize, + int[] accumulatedRedHisto, + int[] accumulatedBlueHisto, + Span argb, + Span scratch) { int maxTileSize = 1 << bits; int tileYOffset = tileY * maxTileSize; @@ -832,18 +856,28 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless var bestTx = default(Vp8LMultipliers); - GetBestGreenToRed(tileArgb, xSize, tileWidth, tileHeight, prevX, prevY, quality, accumulatedRedHisto, ref bestTx); + GetBestGreenToRed(tileArgb, xSize, scratch, tileWidth, tileHeight, prevX, prevY, quality, accumulatedRedHisto, ref bestTx); - GetBestGreenRedToBlue(tileArgb, xSize, tileWidth, tileHeight, prevX, prevY, quality, accumulatedBlueHisto, ref bestTx); + GetBestGreenRedToBlue(tileArgb, xSize, scratch, tileWidth, tileHeight, prevX, prevY, quality, accumulatedBlueHisto, ref bestTx); return bestTx; } - private static void GetBestGreenToRed(Span argb, int stride, int tileWidth, int tileHeight, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int quality, int[] accumulatedRedHisto, ref Vp8LMultipliers bestTx) + private static void GetBestGreenToRed( + Span argb, + int stride, + Span scratch, + int tileWidth, + int tileHeight, + Vp8LMultipliers prevX, + Vp8LMultipliers prevY, + int quality, + int[] accumulatedRedHisto, + ref Vp8LMultipliers bestTx) { int maxIters = 4 + ((7 * quality) >> 8); // in range [4..6] int greenToRedBest = 0; - double bestDiff = GetPredictionCostCrossColorRed(argb, stride, tileWidth, tileHeight, prevX, prevY, greenToRedBest, accumulatedRedHisto); + double bestDiff = GetPredictionCostCrossColorRed(argb, stride, scratch, tileWidth, tileHeight, prevX, prevY, greenToRedBest, accumulatedRedHisto); for (int iter = 0; iter < maxIters; iter++) { // ColorTransformDelta is a 3.5 bit fixed point, so 32 is equal to @@ -855,7 +889,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless for (int offset = -delta; offset <= delta; offset += 2 * delta) { int greenToRedCur = offset + greenToRedBest; - double curDiff = GetPredictionCostCrossColorRed(argb, stride, tileWidth, tileHeight, prevX, prevY, greenToRedCur, accumulatedRedHisto); + double curDiff = GetPredictionCostCrossColorRed(argb, stride, scratch, tileWidth, tileHeight, prevX, prevY, greenToRedCur, accumulatedRedHisto); if (curDiff < bestDiff) { bestDiff = curDiff; @@ -867,24 +901,22 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless bestTx.GreenToRed = (byte)(greenToRedBest & 0xff); } - private static void GetBestGreenRedToBlue(Span argb, int stride, int tileWidth, int tileHeight, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int quality, int[] accumulatedBlueHisto, ref Vp8LMultipliers bestTx) + private static void GetBestGreenRedToBlue(Span argb, int stride, Span scratch, int tileWidth, int tileHeight, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int quality, int[] accumulatedBlueHisto, ref Vp8LMultipliers bestTx) { int iters = (quality < 25) ? 1 : (quality > 50) ? GreenRedToBlueMaxIters : 4; int greenToBlueBest = 0; int redToBlueBest = 0; - sbyte[][] offset = { new sbyte[] { 0, -1 }, new sbyte[] { 0, 1 }, new sbyte[] { -1, 0 }, new sbyte[] { 1, 0 }, new sbyte[] { -1, -1 }, new sbyte[] { -1, 1 }, new sbyte[] { 1, -1 }, new sbyte[] { 1, 1 } }; - sbyte[] deltaLut = { 16, 16, 8, 4, 2, 2, 2 }; // Initial value at origin: - double bestDiff = GetPredictionCostCrossColorBlue(argb, stride, tileWidth, tileHeight, prevX, prevY, greenToBlueBest, redToBlueBest, accumulatedBlueHisto); + double bestDiff = GetPredictionCostCrossColorBlue(argb, stride, scratch, tileWidth, tileHeight, prevX, prevY, greenToBlueBest, redToBlueBest, accumulatedBlueHisto); for (int iter = 0; iter < iters; iter++) { - int delta = deltaLut[iter]; + int delta = DeltaLut[iter]; for (int axis = 0; axis < GreenRedToBlueNumAxis; axis++) { - int greenToBlueCur = (offset[axis][0] * delta) + greenToBlueBest; - int redToBlueCur = (offset[axis][1] * delta) + redToBlueBest; - double curDiff = GetPredictionCostCrossColorBlue(argb, stride, tileWidth, tileHeight, prevX, prevY, greenToBlueCur, redToBlueCur, accumulatedBlueHisto); + int greenToBlueCur = (Offset[axis][0] * delta) + greenToBlueBest; + int redToBlueCur = (Offset[axis][1] * delta) + redToBlueBest; + double curDiff = GetPredictionCostCrossColorBlue(argb, stride, scratch, tileWidth, tileHeight, prevX, prevY, greenToBlueCur, redToBlueCur, accumulatedBlueHisto); if (curDiff < bestDiff) { bestDiff = curDiff; @@ -910,9 +942,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless bestTx.RedToBlue = (byte)(redToBlueBest & 0xff); } - private static double GetPredictionCostCrossColorRed(Span argb, int stride, int tileWidth, int tileHeight, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int greenToRed, int[] accumulatedRedHisto) + private static double GetPredictionCostCrossColorRed( + Span argb, + int stride, + Span scratch, + int tileWidth, + int tileHeight, + Vp8LMultipliers prevX, + Vp8LMultipliers prevY, + int greenToRed, + int[] accumulatedRedHisto) { - int[] histo = new int[256]; + Span histo = scratch.Slice(0, 256); + histo.Clear(); CollectColorRedTransforms(argb, stride, tileWidth, tileHeight, greenToRed, histo); double curDiff = PredictionCostCrossColor(accumulatedRedHisto, histo); @@ -937,9 +979,20 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless return curDiff; } - private static double GetPredictionCostCrossColorBlue(Span argb, int stride, int tileWidth, int tileHeight, Vp8LMultipliers prevX, Vp8LMultipliers prevY, int greenToBlue, int redToBlue, int[] accumulatedBlueHisto) + private static double GetPredictionCostCrossColorBlue( + Span argb, + int stride, + Span scratch, + int tileWidth, + int tileHeight, + Vp8LMultipliers prevX, + Vp8LMultipliers prevY, + int greenToBlue, + int redToBlue, + int[] accumulatedBlueHisto) { - int[] histo = new int[256]; + Span histo = scratch.Slice(0, 256); + histo.Clear(); CollectColorBlueTransforms(argb, stride, tileWidth, tileHeight, greenToBlue, redToBlue, histo); double curDiff = PredictionCostCrossColor(accumulatedBlueHisto, histo); @@ -980,7 +1033,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless return curDiff; } - private static void CollectColorRedTransforms(Span bgra, int stride, int tileWidth, int tileHeight, int greenToRed, int[] histo) + private static void CollectColorRedTransforms(Span bgra, int stride, int tileWidth, int tileHeight, int greenToRed, Span histo) { #if SUPPORTS_RUNTIME_INTRINSICS if (Sse41.IsSupported) @@ -1036,7 +1089,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless } } - private static void CollectColorRedTransformsNoneVectorized(Span bgra, int stride, int tileWidth, int tileHeight, int greenToRed, int[] histo) + private static void CollectColorRedTransformsNoneVectorized(Span bgra, int stride, int tileWidth, int tileHeight, int greenToRed, Span histo) { int pos = 0; while (tileHeight-- > 0) @@ -1051,7 +1104,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless } } - private static void CollectColorBlueTransforms(Span bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, int[] histo) + private static void CollectColorBlueTransforms(Span bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, Span histo) { #if SUPPORTS_RUNTIME_INTRINSICS if (Sse41.IsSupported) @@ -1114,7 +1167,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless } } - private static void CollectColorBlueTransformsNoneVectorized(Span bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, int[] histo) + private static void CollectColorBlueTransformsNoneVectorized(Span bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, Span histo) { int pos = 0; while (tileHeight-- > 0) @@ -1143,7 +1196,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless } [MethodImpl(InliningOptions.ShortMethod)] - private static double PredictionCostCrossColor(int[] accumulated, int[] counts) + private static double PredictionCostCrossColor(int[] accumulated, Span counts) { // Favor low entropy, locally and globally. // Favor small absolute values for PredictionCostSpatial. @@ -1152,7 +1205,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless } [MethodImpl(InliningOptions.ShortMethod)] - private static float PredictionCostSpatial(int[] counts, int weight0, double expVal) + private static float PredictionCostSpatial(Span counts, int weight0, double expVal) { int significantSymbols = 256 >> 4; double expDecayFactor = 0.6; diff --git a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs index 693585637..818488696 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs @@ -19,6 +19,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// internal class Vp8LEncoder : IDisposable { + /// + /// Scratch buffer to reduce allocations. + /// + private readonly int[] scratch = new int[256]; + + private int[][] histoArgb = { new int[256], new int[256], new int[256], new int[256] }; + + private int[][] bestHisto = { new int[256], new int[256], new int[256], new int[256] }; + /// /// The to use for buffer allocations. /// @@ -76,6 +85,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless private const int PaletteInvSize = 1 << PaletteInvSizeBits; + private static readonly byte[] Order = { 1, 2, 0, 3 }; + /// /// Initializes a new instance of the class. /// @@ -675,6 +686,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless this.EncodedData.GetSpan(), this.BgraScratch.GetSpan(), this.TransformData.GetSpan(), + this.histoArgb, + this.bestHisto, this.nearLossless, nearLosslessStrength, this.transparentColorMode, @@ -694,7 +707,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless int transformWidth = LosslessUtils.SubSampleSize(width, colorTransformBits); int transformHeight = LosslessUtils.SubSampleSize(height, colorTransformBits); - PredictorEncoder.ColorSpaceTransform(width, height, colorTransformBits, this.quality, this.EncodedData.GetSpan(), this.TransformData.GetSpan()); + PredictorEncoder.ColorSpaceTransform(width, height, colorTransformBits, this.quality, this.EncodedData.GetSpan(), this.TransformData.GetSpan(), this.scratch); this.bitWriter.PutBits(WebpConstants.TransformPresent, 1); this.bitWriter.PutBits((uint)Vp8LTransformType.CrossColorTransform, 2); @@ -736,7 +749,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless var histogramImage = new List() { - new Vp8LHistogram(cacheBits) + new(cacheBits) }; // Build histogram image and symbols from backward references. @@ -780,7 +793,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless private void StoreHuffmanCode(HuffmanTree[] huffTree, HuffmanTreeToken[] tokens, HuffmanTreeCode huffmanCode) { int count = 0; - int[] symbols = { 0, 0 }; + Span symbols = this.scratch.AsSpan(0, 2); + symbols.Clear(); int maxBits = 8; int maxSymbol = 1 << maxBits; @@ -973,10 +987,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless if (v.IsLiteral()) { - byte[] order = { 1, 2, 0, 3 }; for (int k = 0; k < 4; k++) { - int code = (int)v.Literal(order[k]); + int code = (int)v.Literal(Order[k]); this.bitWriter.WriteHuffmanCode(codes[k], code); } } @@ -1092,9 +1105,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless histo[(int)HistoIx.HistoBluePred * 256]++; histo[(int)HistoIx.HistoAlphaPred * 256]++; + var bitEntropy = new Vp8LBitEntropy(); for (int j = 0; j < (int)HistoIx.HistoTotal; j++) { - var bitEntropy = new Vp8LBitEntropy(); + bitEntropy.Init(); Span curHisto = histo.Slice(j * 256, 256); bitEntropy.BitsEntropyUnrefined(curHisto, 256); entropyComp[j] = bitEntropy.BitsEntropyRefine(); @@ -1447,7 +1461,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless { return mid; } - else if (sorted[mid] < color) + + if (sorted[mid] < color) { low = mid; } diff --git a/src/ImageSharp/Formats/Webp/Lossless/Vp8LHistogram.cs b/src/ImageSharp/Formats/Webp/Lossless/Vp8LHistogram.cs index 42260e2b2..8b0201568 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/Vp8LHistogram.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/Vp8LHistogram.cs @@ -157,29 +157,30 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// Estimate how many bits the combined entropy of literals and distance approximately maps to. /// /// Estimated bits. - public double EstimateBits() + public double EstimateBits(Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy) { uint notUsed = 0; return - PopulationCost(this.Literal, this.NumCodes(), ref notUsed, ref this.IsUsed[0]) - + PopulationCost(this.Red, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[1]) - + PopulationCost(this.Blue, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[2]) - + PopulationCost(this.Alpha, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[3]) - + PopulationCost(this.Distance, WebpConstants.NumDistanceCodes, ref notUsed, ref this.IsUsed[4]) + PopulationCost(this.Literal, this.NumCodes(), ref notUsed, ref this.IsUsed[0], stats, bitsEntropy) + + PopulationCost(this.Red, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[1], stats, bitsEntropy) + + PopulationCost(this.Blue, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[2], stats, bitsEntropy) + + PopulationCost(this.Alpha, WebpConstants.NumLiteralCodes, ref notUsed, ref this.IsUsed[3], stats, bitsEntropy) + + PopulationCost(this.Distance, WebpConstants.NumDistanceCodes, ref notUsed, ref this.IsUsed[4], stats, bitsEntropy) + ExtraCost(this.Literal.AsSpan(WebpConstants.NumLiteralCodes), WebpConstants.NumLengthCodes) + ExtraCost(this.Distance, WebpConstants.NumDistanceCodes); } - public void UpdateHistogramCost() + public void UpdateHistogramCost(Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy) { uint alphaSym = 0, redSym = 0, blueSym = 0; uint notUsed = 0; - double alphaCost = PopulationCost(this.Alpha, WebpConstants.NumLiteralCodes, ref alphaSym, ref this.IsUsed[3]); - double distanceCost = PopulationCost(this.Distance, WebpConstants.NumDistanceCodes, ref notUsed, ref this.IsUsed[4]) + ExtraCost(this.Distance, WebpConstants.NumDistanceCodes); + + double alphaCost = PopulationCost(this.Alpha, WebpConstants.NumLiteralCodes, ref alphaSym, ref this.IsUsed[3], stats, bitsEntropy); + double distanceCost = PopulationCost(this.Distance, WebpConstants.NumDistanceCodes, ref notUsed, ref this.IsUsed[4], stats, bitsEntropy) + ExtraCost(this.Distance, WebpConstants.NumDistanceCodes); int numCodes = this.NumCodes(); - this.LiteralCost = PopulationCost(this.Literal, numCodes, ref notUsed, ref this.IsUsed[0]) + ExtraCost(this.Literal.AsSpan(WebpConstants.NumLiteralCodes), WebpConstants.NumLengthCodes); - this.RedCost = PopulationCost(this.Red, WebpConstants.NumLiteralCodes, ref redSym, ref this.IsUsed[1]); - this.BlueCost = PopulationCost(this.Blue, WebpConstants.NumLiteralCodes, ref blueSym, ref this.IsUsed[2]); + this.LiteralCost = PopulationCost(this.Literal, numCodes, ref notUsed, ref this.IsUsed[0], stats, bitsEntropy) + ExtraCost(this.Literal.AsSpan(WebpConstants.NumLiteralCodes), WebpConstants.NumLengthCodes); + this.RedCost = PopulationCost(this.Red, WebpConstants.NumLiteralCodes, ref redSym, ref this.IsUsed[1], stats, bitsEntropy); + this.BlueCost = PopulationCost(this.Blue, WebpConstants.NumLiteralCodes, ref blueSym, ref this.IsUsed[2], stats, bitsEntropy); this.BitCost = this.LiteralCost + this.RedCost + this.BlueCost + alphaCost + distanceCost; if ((alphaSym | redSym | blueSym) == NonTrivialSym) { @@ -198,11 +199,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// Since the previous score passed is 'costThreshold', we only need to compare /// the partial cost against 'costThreshold + C(a) + C(b)' to possibly bail-out early. /// - public double AddEval(Vp8LHistogram b, double costThreshold, Vp8LHistogram output) + public double AddEval(Vp8LHistogram b, Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy, double costThreshold, Vp8LHistogram output) { double sumCost = this.BitCost + b.BitCost; costThreshold += sumCost; - if (this.GetCombinedHistogramEntropy(b, costThreshold, costInitial: 0, out double cost)) + if (this.GetCombinedHistogramEntropy(b, stats, bitsEntropy, costThreshold, costInitial: 0, out double cost)) { this.Add(b, output); output.BitCost = cost; @@ -212,10 +213,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless return cost - sumCost; } - public double AddThresh(Vp8LHistogram b, double costThreshold) + public double AddThresh(Vp8LHistogram b, Vp8LStreaks stats, Vp8LBitEntropy bitsEntropy, double costThreshold) { double costInitial = -this.BitCost; - this.GetCombinedHistogramEntropy(b, costThreshold, costInitial, out double cost); + this.GetCombinedHistogramEntropy(b, stats, bitsEntropy, costThreshold, costInitial, out double cost); return cost; } @@ -239,12 +240,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless : NonTrivialSym; } - public bool GetCombinedHistogramEntropy(Vp8LHistogram b, double costThreshold, double costInitial, out double cost) + public bool GetCombinedHistogramEntropy(Vp8LHistogram b, Vp8LStreaks stats, Vp8LBitEntropy bitEntropy, double costThreshold, double costInitial, out double cost) { bool trivialAtEnd = false; cost = costInitial; - cost += GetCombinedEntropy(this.Literal, b.Literal, this.NumCodes(), this.IsUsed[0], b.IsUsed[0], false); + cost += GetCombinedEntropy(this.Literal, b.Literal, this.NumCodes(), this.IsUsed[0], b.IsUsed[0], false, stats, bitEntropy); cost += ExtraCostCombined(this.Literal.AsSpan(WebpConstants.NumLiteralCodes), b.Literal.AsSpan(WebpConstants.NumLiteralCodes), WebpConstants.NumLengthCodes); @@ -267,25 +268,25 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless } } - cost += GetCombinedEntropy(this.Red, b.Red, WebpConstants.NumLiteralCodes, this.IsUsed[1], b.IsUsed[1], trivialAtEnd); + cost += GetCombinedEntropy(this.Red, b.Red, WebpConstants.NumLiteralCodes, this.IsUsed[1], b.IsUsed[1], trivialAtEnd, stats, bitEntropy); if (cost > costThreshold) { return false; } - cost += GetCombinedEntropy(this.Blue, b.Blue, WebpConstants.NumLiteralCodes, this.IsUsed[2], b.IsUsed[2], trivialAtEnd); + cost += GetCombinedEntropy(this.Blue, b.Blue, WebpConstants.NumLiteralCodes, this.IsUsed[2], b.IsUsed[2], trivialAtEnd, stats, bitEntropy); if (cost > costThreshold) { return false; } - cost += GetCombinedEntropy(this.Alpha, b.Alpha, WebpConstants.NumLiteralCodes, this.IsUsed[3], b.IsUsed[3], trivialAtEnd); + cost += GetCombinedEntropy(this.Alpha, b.Alpha, WebpConstants.NumLiteralCodes, this.IsUsed[3], b.IsUsed[3], trivialAtEnd, stats, bitEntropy); if (cost > costThreshold) { return false; } - cost += GetCombinedEntropy(this.Distance, b.Distance, WebpConstants.NumDistanceCodes, this.IsUsed[4], b.IsUsed[4], false); + cost += GetCombinedEntropy(this.Distance, b.Distance, WebpConstants.NumDistanceCodes, this.IsUsed[4], b.IsUsed[4], false, stats, bitEntropy); if (cost > costThreshold) { return false; @@ -415,9 +416,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless } } - private static double GetCombinedEntropy(uint[] x, uint[] y, int length, bool isXUsed, bool isYUsed, bool trivialAtEnd) + private static double GetCombinedEntropy(uint[] x, uint[] y, int length, bool isXUsed, bool isYUsed, bool trivialAtEnd, Vp8LStreaks stats, Vp8LBitEntropy bitEntropy) { - var stats = new Vp8LStreaks(); + stats.Clear(); + bitEntropy.Init(); if (trivialAtEnd) { // This configuration is due to palettization that transforms an indexed @@ -435,7 +437,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless return stats.FinalHuffmanCost(); } - var bitEntropy = new Vp8LBitEntropy(); if (isXUsed) { if (isYUsed) @@ -479,10 +480,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// /// Get the symbol entropy for the distribution 'population'. /// - private static double PopulationCost(uint[] population, int length, ref uint trivialSym, ref bool isUsed) + private static double PopulationCost(uint[] population, int length, ref uint trivialSym, ref bool isUsed, Vp8LStreaks stats, Vp8LBitEntropy bitEntropy) { - var bitEntropy = new Vp8LBitEntropy(); - var stats = new Vp8LStreaks(); + bitEntropy.Init(); + stats.Clear(); bitEntropy.BitsEntropyUnrefined(population, length, stats); trivialSym = (bitEntropy.NoneZeros == 1) ? bitEntropy.NoneZeroCode : NonTrivialSym; diff --git a/src/ImageSharp/Formats/Webp/Lossless/Vp8LStreaks.cs b/src/ImageSharp/Formats/Webp/Lossless/Vp8LStreaks.cs index 27ddcfd43..df9f06442 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/Vp8LStreaks.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/Vp8LStreaks.cs @@ -1,6 +1,8 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. +using System; + namespace SixLabors.ImageSharp.Formats.Webp.Lossless { internal class Vp8LStreaks @@ -28,6 +30,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// public int[][] Streaks { get; } + public void Clear() + { + this.Counts.AsSpan().Clear(); + this.Streaks[0].AsSpan().Clear(); + this.Streaks[1].AsSpan().Clear(); + } + public double FinalHuffmanCost() { // The constants in this function are experimental and got rounded from diff --git a/src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs b/src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs index 768365e44..4f7a4eb3d 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/WebpLosslessDecoder.cs @@ -418,6 +418,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless var huffmanTables = new HuffmanCode[numHTreeGroups * tableSize]; var hTreeGroups = new HTreeGroup[numHTreeGroups]; Span huffmanTable = huffmanTables.AsSpan(); + int[] codeLengths = new int[maxAlphabetSize]; for (int i = 0; i < numHTreeGroupsMax; i++) { hTreeGroups[i] = new HTreeGroup(HuffmanUtils.HuffmanPackedTableSize); @@ -425,7 +426,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless int totalSize = 0; bool isTrivialLiteral = true; int maxBits = 0; - int[] codeLengths = new int[maxAlphabetSize]; + codeLengths.AsSpan().Clear(); for (int j = 0; j < WebpConstants.HuffmanCodesPerMetaCode; j++) { int alphabetSize = WebpConstants.AlphabetSize[j]; diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index 1584237b0..d31857d53 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -58,14 +58,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy } [MethodImpl(InliningOptions.ShortMethod)] - public static int Vp8Disto16X16(Span a, Span b, Span w) + public static int Vp8Disto16X16(Span a, Span b, Span w, Span scratch) { int d = 0; for (int y = 0; y < 16 * WebpConstants.Bps; y += 4 * WebpConstants.Bps) { for (int x = 0; x < 16; x += 4) { - d += Vp8Disto4X4(a.Slice(x + y), b.Slice(x + y), w); + d += Vp8Disto4X4(a.Slice(x + y), b.Slice(x + y), w, scratch); } } @@ -73,10 +73,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy } [MethodImpl(InliningOptions.ShortMethod)] - public static int Vp8Disto4X4(Span a, Span b, Span w) + public static int Vp8Disto4X4(Span a, Span b, Span w, Span scratch) { - int sum1 = TTransform(a, w); - int sum2 = TTransform(b, w); + int sum1 = TTransform(a, w, scratch); + int sum2 = TTransform(b, w, scratch); return Math.Abs(sum2 - sum1) >> 5; } @@ -252,18 +252,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy [MethodImpl(InliningOptions.ShortMethod)] public static void TM4(Span dst, Span yuv, int offset) => TrueMotion(dst, yuv, offset, 4); - public static void VE4(Span dst, Span yuv, int offset) + public static void VE4(Span dst, Span yuv, int offset, Span vals) { // vertical int topOffset = offset - WebpConstants.Bps; - byte[] vals = - { - Avg3(yuv[topOffset - 1], yuv[topOffset], yuv[topOffset + 1]), - Avg3(yuv[topOffset], yuv[topOffset + 1], yuv[topOffset + 2]), - Avg3(yuv[topOffset + 1], yuv[topOffset + 2], yuv[topOffset + 3]), - Avg3(yuv[topOffset + 2], yuv[topOffset + 3], yuv[topOffset + 4]) - }; - + vals[0] = Avg3(yuv[topOffset - 1], yuv[topOffset], yuv[topOffset + 1]); + vals[1] = Avg3(yuv[topOffset], yuv[topOffset + 1], yuv[topOffset + 2]); + vals[2] = Avg3(yuv[topOffset + 1], yuv[topOffset + 2], yuv[topOffset + 3]); + vals[3] = Avg3(yuv[topOffset + 2], yuv[topOffset + 3], yuv[topOffset + 4]); int endIdx = 4 * WebpConstants.Bps; for (int i = 0; i < endIdx; i += WebpConstants.Bps) { @@ -504,9 +500,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy /// /// Paragraph 14.3: Implementation of the Walsh-Hadamard transform inversion. /// - public static void TransformWht(Span input, Span output) + public static void TransformWht(Span input, Span output, Span scratch) { - int[] tmp = new int[16]; + Span tmp = scratch.Slice(0, 16); + tmp.Clear(); for (int i = 0; i < 4; i++) { int iPlus4 = 4 + i; @@ -544,10 +541,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy /// Returns the weighted sum of the absolute value of transformed coefficients. /// w[] contains a row-major 4 by 4 symmetric matrix. /// - public static int TTransform(Span input, Span w) + public static int TTransform(Span input, Span w, Span scratch) { int sum = 0; - int[] tmp = new int[16]; + Span tmp = scratch.Slice(0, 16); + tmp.Clear(); // horizontal pass. int inputOffset = 0; @@ -591,15 +589,16 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy return sum; } - public static void TransformTwo(Span src, Span dst) + public static void TransformTwo(Span src, Span dst, Span scratch) { - TransformOne(src, dst); - TransformOne(src.Slice(16), dst.Slice(4)); + TransformOne(src, dst, scratch); + TransformOne(src.Slice(16), dst.Slice(4), scratch); } - public static void TransformOne(Span src, Span dst) + public static void TransformOne(Span src, Span dst, Span scratch) { - Span tmp = stackalloc int[4 * 4]; + Span tmp = scratch.Slice(0, 16); + tmp.Clear(); int tmpOffset = 0; for (int srcOffset = 0; srcOffset < 4; srcOffset++) { @@ -671,10 +670,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy Store2(dst, 3, a - d4, d1, c1); } - public static void TransformUv(Span src, Span dst) + public static void TransformUv(Span src, Span dst, Span scratch) { - TransformTwo(src.Slice(0 * 16), dst); - TransformTwo(src.Slice(2 * 16), dst.Slice(4 * WebpConstants.Bps)); + TransformTwo(src.Slice(0 * 16), dst, scratch); + TransformTwo(src.Slice(2 * 16), dst.Slice(4 * WebpConstants.Bps), scratch); } public static void TransformDcuv(Span src, Span dst) diff --git a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs index 2ed438166..18d7494f0 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/QuantEnc.cs @@ -31,7 +31,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy int lambda = dqm.LambdaI16; int tlambda = dqm.TLambda; Span src = it.YuvIn.AsSpan(Vp8EncIterator.YOffEnc); + Span scratch = it.Scratch3; var rdTmp = new Vp8ModeScore(); + var res = new Vp8Residual(); Vp8ModeScore rdCur = rdTmp; Vp8ModeScore rdBest = rd; int mode; @@ -39,7 +41,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy rd.ModeI16 = -1; for (mode = 0; mode < WebpConstants.NumPredModes; ++mode) { - // scratch buffer. + // Scratch buffer. Span tmpDst = it.YuvOut2.AsSpan(Vp8EncIterator.YOffEnc); rdCur.ModeI16 = mode; @@ -48,9 +50,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy // Measure RD-score. rdCur.D = LossyUtils.Vp8Sse16X16(src, tmpDst); - rdCur.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto16X16(src, tmpDst, WeightY)) : 0; + rdCur.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto16X16(src, tmpDst, WeightY, scratch)) : 0; rdCur.H = WebpConstants.Vp8FixedCostsI16[mode]; - rdCur.R = it.GetCostLuma16(rdCur, proba); + rdCur.R = it.GetCostLuma16(rdCur, proba, res); if (isFlat) { @@ -101,6 +103,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy int tlambda = dqm.TLambda; Span src0 = it.YuvIn.AsSpan(Vp8EncIterator.YOffEnc); Span bestBlocks = it.YuvOut2.AsSpan(Vp8EncIterator.YOffEnc); + Span scratch = it.Scratch3; int totalHeaderBits = 0; var rdBest = new Vp8ModeScore(); @@ -113,31 +116,35 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy rdBest.H = 211; // '211' is the value of VP8BitCost(0, 145) rdBest.SetRdScore(dqm.LambdaMode); it.StartI4(); + var rdi4 = new Vp8ModeScore(); + var rdTmp = new Vp8ModeScore(); + var res = new Vp8Residual(); + Span tmpLevels = new short[16]; do { int numBlocks = 1; - var rdi4 = new Vp8ModeScore(); + rdi4.Clear(); int mode; int bestMode = -1; Span src = src0.Slice(WebpLookupTables.Vp8Scan[it.I4]); short[] modeCosts = it.GetCostModeI4(rd.ModesI4); Span bestBlock = bestBlocks.Slice(WebpLookupTables.Vp8Scan[it.I4]); Span tmpDst = it.Scratch.AsSpan(); - tmpDst.Fill(0); + tmpDst.Clear(); rdi4.InitScore(); it.MakeIntra4Preds(); for (mode = 0; mode < WebpConstants.NumBModes; ++mode) { - var rdTmp = new Vp8ModeScore(); - short[] tmpLevels = new short[16]; + rdTmp.Clear(); + tmpLevels.Clear(); // Reconstruct. rdTmp.Nz = (uint)ReconstructIntra4(it, dqm, tmpLevels, src, tmpDst, mode); // Compute RD-score. rdTmp.D = LossyUtils.Vp8Sse4X4(src, tmpDst); - rdTmp.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto4X4(src, tmpDst, WeightY)) : 0; + rdTmp.SD = tlambda != 0 ? Mult8B(tlambda, LossyUtils.Vp8Disto4X4(src, tmpDst, WeightY, scratch)) : 0; rdTmp.H = modeCosts[mode]; // Add flatness penalty, to avoid flat area to be mispredicted by a complex mode. @@ -150,15 +157,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy rdTmp.R = 0; } - // early-out check. + // Early-out check. rdTmp.SetRdScore(lambda); if (bestMode >= 0 && rdTmp.Score >= rdi4.Score) { continue; } - // finish computing score. - rdTmp.R += it.GetCostLuma4(tmpLevels, proba); + // Finish computing score. + rdTmp.R += it.GetCostLuma4(tmpLevels, proba, res); rdTmp.SetRdScore(lambda); if (bestMode < 0 || rdTmp.Score < rdi4.Score) @@ -213,13 +220,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy Span dst0 = it.YuvOut.AsSpan(Vp8EncIterator.UOffEnc); Span dst = dst0; var rdBest = new Vp8ModeScore(); + var rdUv = new Vp8ModeScore(); + var res = new Vp8Residual(); int mode; rd.ModeUv = -1; rdBest.InitScore(); for (mode = 0; mode < WebpConstants.NumPredModes; ++mode) { - var rdUv = new Vp8ModeScore(); + rdUv.Clear(); // Reconstruct rdUv.Nz = (uint)ReconstructUv(it, dqm, rdUv, tmpDst, mode); @@ -228,7 +237,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy rdUv.D = LossyUtils.Vp8Sse16X8(src, tmpDst); rdUv.SD = 0; // not calling TDisto here: it tends to flatten areas. rdUv.H = WebpConstants.Vp8FixedCostsUv[mode]; - rdUv.R = it.GetCostUv(rdUv, proba); + rdUv.R = it.GetCostUv(rdUv, proba, res); if (mode > 0 && IsFlat(rdUv.UvLevels, numBlocks, WebpConstants.FlatnessLimitIUv)) { rdUv.R += WebpConstants.FlatnessPenality * numBlocks; @@ -271,16 +280,24 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy Span src = it.YuvIn.AsSpan(Vp8EncIterator.YOffEnc); int nz = 0; int n; - short[] dcTmp = new short[16]; - short[] tmp = new short[16 * 16]; - Span tmpSpan = tmp.AsSpan(); + Span shortScratchSpan = it.Scratch2.AsSpan(); + Span scratch = it.Scratch3.AsSpan(0, 16); + shortScratchSpan.Clear(); + scratch.Clear(); + Span dcTmp = shortScratchSpan.Slice(0, 16); + Span tmp = shortScratchSpan.Slice(16, 16 * 16); for (n = 0; n < 16; n += 2) { - Vp8Encoding.FTransform2(src.Slice(WebpLookupTables.Vp8Scan[n]), reference.Slice(WebpLookupTables.Vp8Scan[n]), tmpSpan.Slice(n * 16, 16), tmpSpan.Slice((n + 1) * 16, 16)); + Vp8Encoding.FTransform2( + src.Slice(WebpLookupTables.Vp8Scan[n]), + reference.Slice(WebpLookupTables.Vp8Scan[n]), + tmp.Slice(n * 16, 16), + tmp.Slice((n + 1) * 16, 16), + scratch); } - Vp8Encoding.FTransformWht(tmp, dcTmp); + Vp8Encoding.FTransformWht(tmp, dcTmp, scratch); nz |= QuantizeBlock(dcTmp, rd.YDcLevels, dqm.Y2) << 24; for (n = 0; n < 16; n += 2) @@ -288,14 +305,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy // Zero-out the first coeff, so that: a) nz is correct below, and // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified. tmp[n * 16] = tmp[(n + 1) * 16] = 0; - nz |= Quantize2Blocks(tmpSpan.Slice(n * 16, 32), rd.YAcLevels.AsSpan(n * 16, 32), dqm.Y1) << n; + nz |= Quantize2Blocks(tmp.Slice(n * 16, 32), rd.YAcLevels.AsSpan(n * 16, 32), dqm.Y1) << n; } // Transform back. - LossyUtils.TransformWht(dcTmp, tmpSpan); + LossyUtils.TransformWht(dcTmp, tmp, scratch); for (n = 0; n < 16; n += 2) { - Vp8Encoding.ITransform(reference.Slice(WebpLookupTables.Vp8Scan[n]), tmpSpan.Slice(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8Scan[n]), true); + Vp8Encoding.ITransform(reference.Slice(WebpLookupTables.Vp8Scan[n]), tmp.Slice(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8Scan[n]), true, scratch); } return nz; @@ -304,10 +321,13 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy public static int ReconstructIntra4(Vp8EncIterator it, Vp8SegmentInfo dqm, Span levels, Span src, Span yuvOut, int mode) { Span reference = it.YuvP.AsSpan(Vp8Encoding.Vp8I4ModeOffsets[mode]); - short[] tmp = new short[16]; - Vp8Encoding.FTransform(src, reference, tmp); + Span tmp = it.Scratch2.AsSpan(0, 16); + Span scratch = it.Scratch3.AsSpan(0, 16); + tmp.Clear(); + scratch.Clear(); + Vp8Encoding.FTransform(src, reference, tmp, scratch); int nz = QuantizeBlock(tmp, levels, dqm.Y1); - Vp8Encoding.ITransform(reference, tmp, yuvOut, false); + Vp8Encoding.ITransform(reference, tmp, yuvOut, false, scratch); return nz; } @@ -318,27 +338,31 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy Span src = it.YuvIn.AsSpan(Vp8EncIterator.UOffEnc); int nz = 0; int n; - short[] tmp = new short[8 * 16]; + Span tmp = it.Scratch2.AsSpan(0, 8 * 16); + Span scratch = it.Scratch3.AsSpan(0, 16); + tmp.Clear(); + scratch.Clear(); for (n = 0; n < 8; n += 2) { Vp8Encoding.FTransform2( src.Slice(WebpLookupTables.Vp8ScanUv[n]), reference.Slice(WebpLookupTables.Vp8ScanUv[n]), - tmp.AsSpan(n * 16, 16), - tmp.AsSpan((n + 1) * 16, 16)); + tmp.Slice(n * 16, 16), + tmp.Slice((n + 1) * 16, 16), + scratch); } CorrectDcValues(it, dqm.Uv, tmp, rd); for (n = 0; n < 8; n += 2) { - nz |= Quantize2Blocks(tmp.AsSpan(n * 16, 32), rd.UvLevels.AsSpan(n * 16, 32), dqm.Uv) << n; + nz |= Quantize2Blocks(tmp.Slice(n * 16, 32), rd.UvLevels.AsSpan(n * 16, 32), dqm.Uv) << n; } for (n = 0; n < 8; n += 2) { - Vp8Encoding.ITransform(reference.Slice(WebpLookupTables.Vp8ScanUv[n]), tmp.AsSpan(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8ScanUv[n]), true); + Vp8Encoding.ITransform(reference.Slice(WebpLookupTables.Vp8ScanUv[n]), tmp.Slice(n * 16, 32), yuvOut.Slice(WebpLookupTables.Vp8ScanUv[n]), true, scratch); } return nz << 16; @@ -556,7 +580,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy return (sign ? -v0 : v0) >> DSCALE; } - public static void CorrectDcValues(Vp8EncIterator it, Vp8Matrix mtx, short[] tmp, Vp8ModeScore rd) + public static void CorrectDcValues(Vp8EncIterator it, Vp8Matrix mtx, Span tmp, Vp8ModeScore rd) { #pragma warning disable SA1005 // Single line comments should begin with single space // | top[0] | top[1] @@ -571,7 +595,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy { Span top = it.TopDerr.AsSpan((it.X * 4) + ch, 2); Span left = it.LeftDerr.AsSpan(ch, 2); - Span c = tmp.AsSpan(ch * 4 * 16, 4 * 16); + Span c = tmp.Slice(ch * 4 * 16, 4 * 16); c[0] += (short)(((C1 * top[0]) + (C2 * left[0])) >> (DSHIFT - DSCALE)); int err0 = QuantizeSingle(c, mtx); c[1 * 16] += (short)(((C1 * top[1]) + (C2 * err0)) >> (DSHIFT - DSCALE)); diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs index ca3f8481e..79fd8d854 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs @@ -81,6 +81,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy this.I4Boundary = new byte[37]; this.BitCount = new long[4, 3]; this.Scratch = new byte[WebpConstants.Bps * 16]; + this.Scratch2 = new short[17 * 16]; + this.Scratch3 = new int[16]; // To match the C initial values of the reference implementation, initialize all with 204. byte defaultInitVal = 204; @@ -216,10 +218,20 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy public int CountDown { get; set; } /// - /// Gets the scratch buffer. + /// Gets the byte scratch buffer. /// public byte[] Scratch { get; } + /// + /// Gets the short scratch buffer. + /// + public short[] Scratch2 { get; } + + /// + /// Gets the int scratch buffer. + /// + public int[] Scratch3 { get; } + public Vp8MacroBlockInfo CurrentMacroBlockInfo => this.Mb[this.currentMbIdx]; private Vp8MacroBlockInfo[] Mb { get; } @@ -380,7 +392,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy int bestMode = 0; this.MakeLuma16Preds(); - for (mode = 0; mode < maxMode; ++mode) + for (mode = 0; mode < maxMode; mode++) { var histo = new Vp8Histogram(); histo.CollectHistogram(this.YuvIn.AsSpan(YOffEnc), this.YuvP.AsSpan(Vp8Encoding.Vp8I16ModeOffsets[mode]), 0, 16); @@ -499,9 +511,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy this.CurrentMacroBlockInfo.MacroBlockType = Vp8MacroBlockType.I4X4; } - public int GetCostLuma16(Vp8ModeScore rd, Vp8EncProba proba) + public int GetCostLuma16(Vp8ModeScore rd, Vp8EncProba proba, Vp8Residual res) { - var res = new Vp8Residual(); int r = 0; // re-import the non-zero context. @@ -539,11 +550,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy return WebpLookupTables.Vp8FixedCostsI4[top, left]; } - public int GetCostLuma4(short[] levels, Vp8EncProba proba) + public int GetCostLuma4(Span levels, Vp8EncProba proba, Vp8Residual res) { int x = this.I4 & 3; int y = this.I4 >> 2; - var res = new Vp8Residual(); int r = 0; res.Init(0, 3, proba); @@ -553,9 +563,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy return r; } - public int GetCostUv(Vp8ModeScore rd, Vp8EncProba proba) + public int GetCostUv(Vp8ModeScore rd, Vp8EncProba proba, Vp8Residual res) { - var res = new Vp8Residual(); int r = 0; // re-import the non-zero context. @@ -741,7 +750,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy Vp8Encoding.EncPredChroma8(this.YuvP, left, top); } - public void MakeIntra4Preds() => Vp8Encoding.EncPredLuma4(this.YuvP, this.I4Boundary, this.I4BoundaryIdx); + public void MakeIntra4Preds() => Vp8Encoding.EncPredLuma4(this.YuvP, this.I4Boundary, this.I4BoundaryIdx, this.Scratch.AsSpan(0, 4)); public void SwapOut() { diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs index 37808d56c..1a9d3a6e3 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs @@ -70,6 +70,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy /// private int uvAlpha; + /// + /// Scratch buffer to reduce allocations. + /// + private readonly int[] scratch = new int[16]; + private readonly byte[] averageBytesPerMb = { 50, 24, 16, 9, 7, 5, 3, 2 }; private const int NumMbSegments = 4; @@ -321,18 +326,19 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy this.StatLoop(width, height, yStride, uvStride); it.Init(); it.InitFilter(); + var info = new Vp8ModeScore(); + var residual = new Vp8Residual(); do { bool dontUseSkip = !this.Proba.UseSkipProba; - - var info = new Vp8ModeScore(); + info.Clear(); it.Import(y, u, v, yStride, uvStride, width, height, false); // Warning! order is important: first call VP8Decimate() and // *then* decide how to code the skip decision if there's one. if (!this.Decimate(it, ref info, this.rdOptLevel) || dontUseSkip) { - this.CodeResiduals(it, info); + this.CodeResiduals(it, info, residual); } else { @@ -447,9 +453,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy it.Init(); this.SetLoopParams(stats.Q); + var info = new Vp8ModeScore(); do { - var info = new Vp8ModeScore(); + info.Clear(); it.Import(y, u, v, yStride, uvStride, width, height, false); if (this.Decimate(it, ref info, rdOpt)) { @@ -930,10 +937,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy return isSkipped; } - private void CodeResiduals(Vp8EncIterator it, Vp8ModeScore rd) + private void CodeResiduals(Vp8EncIterator it, Vp8ModeScore rd, Vp8Residual residual) { int x, y, ch; - var residual = new Vp8Residual(); bool i16 = it.CurrentMacroBlockInfo.MacroBlockType == Vp8MacroBlockType.I16X16; int segment = it.CurrentMacroBlockInfo.Segment; diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs index f8b4853e2..0567a0f27 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoding.cs @@ -68,22 +68,20 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy } } - public static void ITransform(Span reference, Span input, Span dst, bool doTwo) + public static void ITransform(Span reference, Span input, Span dst, bool doTwo, Span scratch) { - ITransformOne(reference, input, dst); + ITransformOne(reference, input, dst, scratch); if (doTwo) { - ITransformOne(reference.Slice(4), input.Slice(16), dst.Slice(4)); + ITransformOne(reference.Slice(4), input.Slice(16), dst.Slice(4), scratch); } } - public static void ITransformOne(Span reference, Span input, Span dst) + public static void ITransformOne(Span reference, Span input, Span dst, Span scratch) { int i; -#pragma warning disable SA1312 // Variable names should begin with lower-case letter - int[] C = new int[4 * 4]; -#pragma warning restore SA1312 // Variable names should begin with lower-case letter - Span tmp = C.AsSpan(); + Span tmp = scratch.Slice(0, 16); + tmp.Clear(); for (i = 0; i < 4; i++) { // vertical pass. @@ -99,7 +97,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy input = input.Slice(1); } - tmp = C.AsSpan(); + tmp = scratch; for (i = 0; i < 4; i++) { // horizontal pass. @@ -116,16 +114,18 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy } } - public static void FTransform2(Span src, Span reference, Span output, Span output2) + public static void FTransform2(Span src, Span reference, Span output, Span output2, Span scratch) { - FTransform(src, reference, output); - FTransform(src.Slice(4), reference.Slice(4), output2); + FTransform(src, reference, output, scratch); + FTransform(src.Slice(4), reference.Slice(4), output2, scratch); } - public static void FTransform(Span src, Span reference, Span output) + public static void FTransform(Span src, Span reference, Span output, Span scratch) { int i; - int[] tmp = new int[16]; + Span tmp = scratch.Slice(0, 16); + tmp.Clear(); + int srcIdx = 0; int refIdx = 0; for (i = 0; i < 4; i++) @@ -160,9 +160,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy } } - public static void FTransformWht(Span input, Span output) + public static void FTransformWht(Span input, Span output, Span scratch) { - int[] tmp = new int[16]; + Span tmp = scratch.Slice(0, 16); + tmp.Clear(); + int i; int inputIdx = 0; for (i = 0; i < 4; i++) @@ -234,11 +236,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy // Left samples are top[-5 .. -2], top_left is top[-1], top are // located at top[0..3], and top right is top[4..7] - public static void EncPredLuma4(Span dst, Span top, int topOffset) + public static void EncPredLuma4(Span dst, Span top, int topOffset, Span vals) { Dc4(dst.Slice(I4DC4), top, topOffset); Tm4(dst.Slice(I4TM4), top, topOffset); - Ve4(dst.Slice(I4VE4), top, topOffset); + Ve4(dst.Slice(I4VE4), top, topOffset, vals); He4(dst.Slice(I4HE4), top, topOffset); Rd4(dst.Slice(I4RD4), top, topOffset); Vr4(dst.Slice(I4VR4), top, topOffset); @@ -395,20 +397,16 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy } } - private static void Ve4(Span dst, Span top, int topOffset) + private static void Ve4(Span dst, Span top, int topOffset, Span vals) { // vertical - byte[] vals = - { - LossyUtils.Avg3(top[topOffset - 1], top[topOffset], top[topOffset + 1]), - LossyUtils.Avg3(top[topOffset], top[topOffset + 1], top[topOffset + 2]), - LossyUtils.Avg3(top[topOffset + 1], top[topOffset + 2], top[topOffset + 3]), - LossyUtils.Avg3(top[topOffset + 2], top[topOffset + 3], top[topOffset + 4]) - }; - + vals[0] = LossyUtils.Avg3(top[topOffset - 1], top[topOffset], top[topOffset + 1]); + vals[1] = LossyUtils.Avg3(top[topOffset], top[topOffset + 1], top[topOffset + 2]); + vals[2] = LossyUtils.Avg3(top[topOffset + 1], top[topOffset + 2], top[topOffset + 3]); + vals[3] = LossyUtils.Avg3(top[topOffset + 2], top[topOffset + 3], top[topOffset + 4]); for (int i = 0; i < 4; i++) { - vals.AsSpan().CopyTo(dst.Slice(i * WebpConstants.Bps)); + vals.CopyTo(dst.Slice(i * WebpConstants.Bps)); } } diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs index 5d048514e..7192fa2d0 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs @@ -8,6 +8,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy { internal class Vp8Histogram { + private readonly int[] scratch = new int[16]; + + private readonly short[] output = new short[16]; + + private readonly int[] distribution = new int[MaxCoeffThresh + 1]; + /// /// Size of histogram used by CollectHistogram. /// @@ -40,23 +46,22 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy public void CollectHistogram(Span reference, Span pred, int startBlock, int endBlock) { int j; - int[] distribution = new int[MaxCoeffThresh + 1]; + this.distribution.AsSpan().Clear(); for (j = startBlock; j < endBlock; j++) { - short[] output = new short[16]; - - this.Vp8FTransform(reference.Slice(WebpLookupTables.Vp8DspScan[j]), pred.Slice(WebpLookupTables.Vp8DspScan[j]), output); + this.output.AsSpan().Clear(); + this.Vp8FTransform(reference.Slice(WebpLookupTables.Vp8DspScan[j]), pred.Slice(WebpLookupTables.Vp8DspScan[j]), this.output); // Convert coefficients to bin. for (int k = 0; k < 16; ++k) { - int v = Math.Abs(output[k]) >> 3; + int v = Math.Abs(this.output[k]) >> 3; int clippedValue = ClipMax(v, MaxCoeffThresh); - ++distribution[clippedValue]; + ++this.distribution[clippedValue]; } } - this.SetHistogramData(distribution); + this.SetHistogramData(this.distribution); } public void Merge(Vp8Histogram other) @@ -97,7 +102,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy private void Vp8FTransform(Span src, Span reference, Span output) { int i; - int[] tmp = new int[16]; + Span tmp = this.scratch; + tmp.Clear(); + for (i = 0; i < 4; i++) { int d0 = src[0] - reference[0]; // 9bit dynamic range ([-255,255]) diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8ModeScore.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8ModeScore.cs index 7182f6021..1c92a9d2d 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8ModeScore.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8ModeScore.cs @@ -1,6 +1,8 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. +using System; + namespace SixLabors.ImageSharp.Formats.Webp.Lossy { /// @@ -93,6 +95,22 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy /// public int[,] Derr { get; } + public void Clear() + { + this.YDcLevels.AsSpan().Clear(); + this.YAcLevels.AsSpan().Clear(); + this.UvLevels.AsSpan().Clear(); + this.ModesI4.AsSpan().Clear(); + + for (int i = 0; i < 2; i++) + { + for (int j = 0; j < 3; j++) + { + this.Derr[i, j] = 0; + } + } + } + public void InitScore() { this.D = 0; diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs index 93d76e283..2962ebbab 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs @@ -16,7 +16,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy public int CoeffType { get; set; } - public short[] Coeffs { get; set; } + public short[] Coeffs { get; } = new short[16]; public Vp8BandProbas[] Prob { get; set; } @@ -31,6 +31,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy this.Prob = prob.Coeffs[this.CoeffType]; this.Stats = prob.Stats[this.CoeffType]; this.Costs = prob.RemappedCosts[this.CoeffType]; + this.Coeffs.AsSpan().Clear(); } public void SetCoeffs(Span coeffs) @@ -46,7 +47,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy } } - this.Coeffs = coeffs.Slice(0, 16).ToArray(); + coeffs.Slice(0, 16).CopyTo(this.Coeffs); } // Simulate block coding, but only record statistics. diff --git a/src/ImageSharp/Formats/Webp/Lossy/WebpLossyDecoder.cs b/src/ImageSharp/Formats/Webp/Lossy/WebpLossyDecoder.cs index ebb0b0aa4..4f283f9f5 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/WebpLossyDecoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/WebpLossyDecoder.cs @@ -34,6 +34,16 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy /// private readonly Configuration configuration; + /// + /// Scratch buffer to reduce allocations. + /// + private readonly int[] scratch = new int[16]; + + /// + /// Another scratch buffer to reduce allocations. + /// + private readonly byte[] scratchBytes = new byte[4]; + /// /// Initializes a new instance of the class. /// @@ -395,7 +405,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy LossyUtils.TM4(dst, yuv, offset); break; case 2: - LossyUtils.VE4(dst, yuv, offset); + LossyUtils.VE4(dst, yuv, offset, this.scratchBytes); break; case 3: LossyUtils.HE4(dst, yuv, offset); @@ -420,7 +430,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy break; } - this.DoTransform(bits, coeffs.AsSpan(n * 16), dst); + this.DoTransform(bits, coeffs.AsSpan(n * 16), dst, this.scratch); } } else @@ -456,7 +466,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy { for (int n = 0; n < 16; ++n, bits <<= 2) { - this.DoTransform(bits, coeffs.AsSpan(n * 16), yDst.Slice(WebpConstants.Scan[n])); + this.DoTransform(bits, coeffs.AsSpan(n * 16), yDst.Slice(WebpConstants.Scan[n]), this.scratch); } } } @@ -496,8 +506,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy break; } - this.DoUVTransform(bitsUv, coeffs.AsSpan(16 * 16), uDst); - this.DoUVTransform(bitsUv >> 8, coeffs.AsSpan(20 * 16), vDst); + this.DoUVTransform(bitsUv, coeffs.AsSpan(16 * 16), uDst, this.scratch); + this.DoUVTransform(bitsUv >> 8, coeffs.AsSpan(20 * 16), vDst, this.scratch); // Stash away top samples for next block. if (mby < dec.MbHeight - 1) @@ -787,12 +797,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy } } - private void DoTransform(uint bits, Span src, Span dst) + private void DoTransform(uint bits, Span src, Span dst, Span scratch) { switch (bits >> 30) { case 3: - LossyUtils.TransformOne(src, dst); + LossyUtils.TransformOne(src, dst, scratch); break; case 2: LossyUtils.TransformAc3(src, dst); @@ -803,7 +813,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy } } - private void DoUVTransform(uint bits, Span src, Span dst) + private void DoUVTransform(uint bits, Span src, Span dst, Span scratch) { // any non-zero coeff at all? if ((bits & 0xff) > 0) @@ -811,7 +821,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy // any non-zero AC coefficient? if ((bits & 0xaa) > 0) { - LossyUtils.TransformUv(src, dst); // note we don't use the AC3 variant for U/V. + LossyUtils.TransformUv(src, dst, scratch); // note we don't use the AC3 variant for U/V. } else { @@ -884,7 +894,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy if (nz > 1) { // More than just the DC -> perform the full transform. - LossyUtils.TransformWht(dc, dst); + LossyUtils.TransformWht(dc, dst, this.scratch); } else { diff --git a/tests/ImageSharp.Tests/Formats/WebP/PredictorEncoderTests.cs b/tests/ImageSharp.Tests/Formats/WebP/PredictorEncoderTests.cs index b48020198..d78f7e2f2 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/PredictorEncoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/PredictorEncoderTests.cs @@ -90,9 +90,10 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp int transformWidth = LosslessUtils.SubSampleSize(image.Width, colorTransformBits); int transformHeight = LosslessUtils.SubSampleSize(image.Height, colorTransformBits); uint[] transformData = new uint[transformWidth * transformHeight]; + int[] scratch = new int[256]; // act - PredictorEncoder.ColorSpaceTransform(image.Width, image.Height, colorTransformBits, 75, bgra, transformData); + PredictorEncoder.ColorSpaceTransform(image.Width, image.Height, colorTransformBits, 75, bgra, transformData, scratch); // assert Assert.Equal(expectedData, transformData); @@ -119,9 +120,10 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp int transformWidth = LosslessUtils.SubSampleSize(image.Width, colorTransformBits); int transformHeight = LosslessUtils.SubSampleSize(image.Height, colorTransformBits); uint[] transformData = new uint[transformWidth * transformHeight]; + int[] scratch = new int[256]; // act - PredictorEncoder.ColorSpaceTransform(image.Width, image.Height, colorTransformBits, 75, bgra, transformData); + PredictorEncoder.ColorSpaceTransform(image.Width, image.Height, colorTransformBits, 75, bgra, transformData, scratch); // assert Assert.Equal(expectedData, transformData); From ed8d2afcb07d7f56e48f1b59351d229389aaea3a Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 31 Oct 2021 13:26:31 +0100 Subject: [PATCH 2/8] Use Span version of Sort() to reduce allocations --- src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs | 5 +++++ src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs index f2321d681..6320983ba 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs @@ -202,9 +202,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless } // Build the Huffman tree. +#if NET5_0_OR_GREATER + Span treeSlice = tree.AsSpan().Slice(0, treeSize); + treeSlice.Sort(HuffmanTree.Compare); +#else HuffmanTree[] treeCopy = tree.AsSpan().Slice(0, treeSize).ToArray(); Array.Sort(treeCopy, HuffmanTree.Compare); treeCopy.AsSpan().CopyTo(tree); +#endif if (treeSize > 1) { diff --git a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs index 818488696..29dbde8b0 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs @@ -1204,9 +1204,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless return false; } +#if NET5_0_OR_GREATER + var paletteSlice = palette.Slice(0, this.PaletteSize); + paletteSlice.Sort(); +#else uint[] paletteArray = palette.Slice(0, this.PaletteSize).ToArray(); Array.Sort(paletteArray); paletteArray.CopyTo(palette); +#endif if (PaletteHasNonMonotonousDeltas(palette, this.PaletteSize)) { From 15a10126d29f5e6b9c42544bc0cb4388cf32bdfe Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 31 Oct 2021 14:10:21 +0100 Subject: [PATCH 3/8] Define sse and avx masks as static readonly --- .../Formats/Webp/Lossless/LosslessUtils.cs | 65 +++++++++++-------- .../Formats/Webp/Lossless/PredictorEncoder.cs | 43 +++++++----- .../Formats/Webp/WebpCommonUtils.cs | 56 ++++++++-------- 3 files changed, 93 insertions(+), 71 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index 06204ae91..c195eb0fe 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -27,6 +27,30 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless private const double Log2Reciprocal = 1.44269504088896338700465094007086; +#if SUPPORTS_RUNTIME_INTRINSICS + private static readonly Vector256 AddGreenToBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255); + + private static readonly Vector128 AddGreenToBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255); + + private static readonly byte AddGreenToBlueAndRedShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0); + + private static readonly Vector256 SubtractGreenFromBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255); + + private static readonly Vector128 SubtractGreenFromBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255); + + private static readonly byte SubtractGreenFromBlueAndRedShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0); + + private static readonly Vector128 TransformColorAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + + private static readonly Vector128 TransformColorRedBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); + + private static readonly byte TransformColorShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0); + + private static readonly Vector128 TransformColorInverseAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + + private static readonly byte TransformColorInverseShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0); +#endif + /// /// Returns the exact index where array1 and array2 are different. For an index /// inferior or equal to bestLenMatch, the return value just has to be strictly @@ -97,7 +121,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless #if SUPPORTS_RUNTIME_INTRINSICS if (Avx2.IsSupported) { - var mask = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255); int numPixels = pixelData.Length; fixed (uint* p = pixelData) { @@ -106,7 +129,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless { uint* idx = p + i; Vector256 input = Avx.LoadVector256((ushort*)idx).AsByte(); - Vector256 in0g0g = Avx2.Shuffle(input, mask); + Vector256 in0g0g = Avx2.Shuffle(input, AddGreenToBlueAndRedMaskAvx2); Vector256 output = Avx2.Add(input, in0g0g); Avx.Store((byte*)idx, output); } @@ -119,7 +142,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless } else if (Ssse3.IsSupported) { - var mask = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255); int numPixels = pixelData.Length; fixed (uint* p = pixelData) { @@ -128,7 +150,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless { uint* idx = p + i; Vector128 input = Sse2.LoadVector128((ushort*)idx).AsByte(); - Vector128 in0g0g = Ssse3.Shuffle(input, mask); + Vector128 in0g0g = Ssse3.Shuffle(input, AddGreenToBlueAndRedMaskSsse3); Vector128 output = Sse2.Add(input, in0g0g); Sse2.Store((byte*)idx, output.AsByte()); } @@ -141,7 +163,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless } else if (Sse2.IsSupported) { - byte mask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0); int numPixels = pixelData.Length; fixed (uint* p = pixelData) { @@ -151,8 +172,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless uint* idx = p + i; Vector128 input = Sse2.LoadVector128((ushort*)idx); Vector128 a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g - Vector128 b = Sse2.ShuffleLow(a, mask); - Vector128 c = Sse2.ShuffleHigh(b, mask); // 0g0g + Vector128 b = Sse2.ShuffleLow(a, AddGreenToBlueAndRedShuffleMask); + Vector128 c = Sse2.ShuffleHigh(b, AddGreenToBlueAndRedShuffleMask); // 0g0g Vector128 output = Sse2.Add(input.AsByte(), c.AsByte()); Sse2.Store((byte*)idx, output); } @@ -189,7 +210,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless #if SUPPORTS_RUNTIME_INTRINSICS if (Avx2.IsSupported) { - var mask = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255); int numPixels = pixelData.Length; fixed (uint* p = pixelData) { @@ -198,7 +218,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless { uint* idx = p + i; Vector256 input = Avx.LoadVector256((ushort*)idx).AsByte(); - Vector256 in0g0g = Avx2.Shuffle(input, mask); + Vector256 in0g0g = Avx2.Shuffle(input, SubtractGreenFromBlueAndRedMaskAvx2); Vector256 output = Avx2.Subtract(input, in0g0g); Avx.Store((byte*)idx, output); } @@ -211,7 +231,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless } else if (Ssse3.IsSupported) { - var mask = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255); int numPixels = pixelData.Length; fixed (uint* p = pixelData) { @@ -220,7 +239,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless { uint* idx = p + i; Vector128 input = Sse2.LoadVector128((ushort*)idx).AsByte(); - Vector128 in0g0g = Ssse3.Shuffle(input, mask); + Vector128 in0g0g = Ssse3.Shuffle(input, SubtractGreenFromBlueAndRedMaskSsse3); Vector128 output = Sse2.Subtract(input, in0g0g); Sse2.Store((byte*)idx, output.AsByte()); } @@ -233,7 +252,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless } else if (Sse2.IsSupported) { - byte mask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0); int numPixels = pixelData.Length; fixed (uint* p = pixelData) { @@ -243,8 +261,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless uint* idx = p + i; Vector128 input = Sse2.LoadVector128((ushort*)idx); Vector128 a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g - Vector128 b = Sse2.ShuffleLow(a, mask); - Vector128 c = Sse2.ShuffleHigh(b, mask); // 0g0g + Vector128 b = Sse2.ShuffleLow(a, SubtractGreenFromBlueAndRedShuffleMask); + Vector128 c = Sse2.ShuffleHigh(b, SubtractGreenFromBlueAndRedShuffleMask); // 0g0g Vector128 output = Sse2.Subtract(input.AsByte(), c.AsByte()); Sse2.Store((byte*)idx, output); } @@ -394,9 +412,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless { Vector128 multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); Vector128 multsb2 = MkCst16(Cst5b(m.RedToBlue), 0); - var maskalphagreen = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); - var maskredblue = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); - byte shufflemask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0); fixed (uint* src = data) { int idx; @@ -404,15 +419,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless { uint* pos = src + idx; Vector128 input = Sse2.LoadVector128(pos); - Vector128 a = Sse2.And(input.AsByte(), maskalphagreen); - Vector128 b = Sse2.ShuffleLow(a.AsInt16(), shufflemask); - Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), shufflemask); + Vector128 a = Sse2.And(input.AsByte(), TransformColorAlphaGreenMask); + Vector128 b = Sse2.ShuffleLow(a.AsInt16(), TransformColorShuffleMask); + Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), TransformColorShuffleMask); Vector128 d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); Vector128 e = Sse2.ShiftLeftLogical(input.AsInt16(), 8); Vector128 f = Sse2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16()); Vector128 g = Sse2.ShiftRightLogical(f.AsInt32(), 16); Vector128 h = Sse2.Add(g.AsByte(), d.AsByte()); - Vector128 i = Sse2.And(h, maskredblue); + Vector128 i = Sse2.And(h, TransformColorRedBlueMask); Vector128 output = Sse2.Subtract(input.AsByte(), i); Sse2.Store((byte*)pos, output); } @@ -460,8 +475,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless { Vector128 multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); Vector128 multsb2 = MkCst16(Cst5b(m.RedToBlue), 0); - var maskalphagreen = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); - byte shufflemask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0); fixed (uint* src = pixelData) { int idx; @@ -469,9 +482,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless { uint* pos = src + idx; Vector128 input = Sse2.LoadVector128(pos); - Vector128 a = Sse2.And(input.AsByte(), maskalphagreen); - Vector128 b = Sse2.ShuffleLow(a.AsInt16(), shufflemask); - Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), shufflemask); + Vector128 a = Sse2.And(input.AsByte(), TransformColorInverseAlphaGreenMask); + Vector128 b = Sse2.ShuffleLow(a.AsInt16(), TransformColorInverseShuffleMask); + Vector128 c = Sse2.ShuffleHigh(b.AsInt16(), TransformColorInverseShuffleMask); Vector128 d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); Vector128 e = Sse2.Add(input.AsByte(), d.AsByte()); Vector128 f = Sse2.ShiftLeftLogical(e.AsInt16(), 8); diff --git a/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs index 713fc7919..abb727447 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs @@ -36,6 +36,22 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless private const int PredLowEffort = 11; +#if SUPPORTS_RUNTIME_INTRINSICS + private static readonly Vector128 CollectColorRedTransformsGreenMask = Vector128.Create(0x00ff00).AsByte(); + + private static readonly Vector128 CollectColorRedTransformsAndMask = Vector128.Create((short)0xff).AsByte(); + + private static readonly Vector128 CollectColorBlueTransformsGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + + private static readonly Vector128 CollectColorBlueTransformsGreenBlueMask = Vector128.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0); + + private static readonly Vector128 CollectColorBlueTransformsBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); + + private static readonly Vector128 CollectColorBlueTransformsShuffleLowMask = Vector128.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255); + + private static readonly Vector128 CollectColorBlueTransformsShuffleHighMask = Vector128.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14); +#endif + /// /// Finds the best predictor for each tile, and converts the image to residuals /// with respect to predictions. If nearLosslessQuality < 100, applies @@ -1039,9 +1055,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless if (Sse41.IsSupported) { var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToRed)); - var maskgreen = Vector128.Create(0x00ff00); - var mask = Vector128.Create((short)0xff); - const int span = 8; Span values = stackalloc ushort[span]; for (int y = 0; y < tileHeight; y++) @@ -1057,15 +1070,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless uint* input1Idx = src + x + (span / 2); Vector128 input0 = Sse2.LoadVector128((ushort*)input0Idx).AsByte(); Vector128 input1 = Sse2.LoadVector128((ushort*)input1Idx).AsByte(); - Vector128 g0 = Sse2.And(input0, maskgreen.AsByte()); // 0 0 | g 0 - Vector128 g1 = Sse2.And(input1, maskgreen.AsByte()); + Vector128 g0 = Sse2.And(input0, CollectColorRedTransformsGreenMask); // 0 0 | g 0 + Vector128 g1 = Sse2.And(input1, CollectColorRedTransformsGreenMask); Vector128 g = Sse41.PackUnsignedSaturate(g0.AsInt32(), g1.AsInt32()); // g 0 Vector128 a0 = Sse2.ShiftRightLogical(input0.AsInt32(), 16); // 0 0 | x r Vector128 a1 = Sse2.ShiftRightLogical(input1.AsInt32(), 16); Vector128 a = Sse41.PackUnsignedSaturate(a0, a1); // x r Vector128 b = Sse2.MultiplyHigh(g.AsInt16(), multsg); // x dr Vector128 c = Sse2.Subtract(a.AsByte(), b.AsByte()); // x r' - Vector128 d = Sse2.And(c, mask.AsByte()); // 0 r' + Vector128 d = Sse2.And(c, CollectColorRedTransformsAndMask); // 0 r' Sse2.Store(dst, d.AsUInt16()); for (int i = 0; i < span; i++) { @@ -1113,12 +1126,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless Span values = stackalloc ushort[span]; var multsr = Vector128.Create(LosslessUtils.Cst5b(redToBlue)); var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToBlue)); - var maskgreen = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); - var maskgreenblue = Vector128.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0); - var maskblue = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); - var shufflerLow = Vector128.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255); - var shufflerHigh = Vector128.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14); - for (int y = 0; y < tileHeight; y++) { Span srcSpan = bgra.Slice(y * stride); @@ -1132,18 +1139,18 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless uint* input1Idx = src + x + (span / 2); Vector128 input0 = Sse2.LoadVector128((ushort*)input0Idx).AsByte(); Vector128 input1 = Sse2.LoadVector128((ushort*)input1Idx).AsByte(); - Vector128 r0 = Ssse3.Shuffle(input0, shufflerLow); - Vector128 r1 = Ssse3.Shuffle(input1, shufflerHigh); + Vector128 r0 = Ssse3.Shuffle(input0, CollectColorBlueTransformsShuffleLowMask); + Vector128 r1 = Ssse3.Shuffle(input1, CollectColorBlueTransformsShuffleHighMask); Vector128 r = Sse2.Or(r0, r1); - Vector128 gb0 = Sse2.And(input0, maskgreenblue); - Vector128 gb1 = Sse2.And(input1, maskgreenblue); + Vector128 gb0 = Sse2.And(input0, CollectColorBlueTransformsGreenBlueMask); + Vector128 gb1 = Sse2.And(input1, CollectColorBlueTransformsGreenBlueMask); Vector128 gb = Sse41.PackUnsignedSaturate(gb0.AsInt32(), gb1.AsInt32()); - Vector128 g = Sse2.And(gb.AsByte(), maskgreen); + Vector128 g = Sse2.And(gb.AsByte(), CollectColorBlueTransformsGreenMask); Vector128 a = Sse2.MultiplyHigh(r.AsInt16(), multsr); Vector128 b = Sse2.MultiplyHigh(g.AsInt16(), multsg); Vector128 c = Sse2.Subtract(gb.AsByte(), b.AsByte()); Vector128 d = Sse2.Subtract(c, a.AsByte()); - Vector128 e = Sse2.And(d, maskblue); + Vector128 e = Sse2.And(d, CollectColorBlueTransformsBlueMask); Sse2.Store(dst, e.AsUInt16()); for (int i = 0; i < span; i++) { diff --git a/src/ImageSharp/Formats/Webp/WebpCommonUtils.cs b/src/ImageSharp/Formats/Webp/WebpCommonUtils.cs index d6e8d0a06..4251af742 100644 --- a/src/ImageSharp/Formats/Webp/WebpCommonUtils.cs +++ b/src/ImageSharp/Formats/Webp/WebpCommonUtils.cs @@ -16,6 +16,16 @@ namespace SixLabors.ImageSharp.Formats.Webp /// internal static class WebpCommonUtils { +#if SUPPORTS_RUNTIME_INTRINSICS + private static readonly Vector256 AlphaMaskVector256 = Vector256.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255); + + private static readonly Vector256 All0x80Vector256 = Vector256.Create((byte)0x80).AsByte(); + + private static readonly Vector128 AlphaMask = Vector128.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255); + + private static readonly Vector128 All0x80 = Vector128.Create((byte)0x80).AsByte(); +#endif + /// /// Checks if the pixel row is not opaque. /// @@ -27,11 +37,6 @@ namespace SixLabors.ImageSharp.Formats.Webp if (Avx2.IsSupported) { ReadOnlySpan rowBytes = MemoryMarshal.AsBytes(row); - var alphaMaskVector256 = Vector256.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255); - Vector256 all0x80Vector256 = Vector256.Create((byte)0x80).AsByte(); - var alphaMask = Vector128.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255); - Vector128 all0x80 = Vector128.Create((byte)0x80).AsByte(); - int i = 0; int length = (row.Length * 4) - 3; fixed (byte* src = rowBytes) @@ -42,14 +47,14 @@ namespace SixLabors.ImageSharp.Formats.Webp Vector256 a1 = Avx.LoadVector256(src + i + 32).AsByte(); Vector256 a2 = Avx.LoadVector256(src + i + 64).AsByte(); Vector256 a3 = Avx.LoadVector256(src + i + 96).AsByte(); - Vector256 b0 = Avx2.And(a0, alphaMaskVector256).AsInt32(); - Vector256 b1 = Avx2.And(a1, alphaMaskVector256).AsInt32(); - Vector256 b2 = Avx2.And(a2, alphaMaskVector256).AsInt32(); - Vector256 b3 = Avx2.And(a3, alphaMaskVector256).AsInt32(); + Vector256 b0 = Avx2.And(a0, AlphaMaskVector256).AsInt32(); + Vector256 b1 = Avx2.And(a1, AlphaMaskVector256).AsInt32(); + Vector256 b2 = Avx2.And(a2, AlphaMaskVector256).AsInt32(); + Vector256 b3 = Avx2.And(a3, AlphaMaskVector256).AsInt32(); Vector256 c0 = Avx2.PackSignedSaturate(b0, b1).AsInt16(); Vector256 c1 = Avx2.PackSignedSaturate(b2, b3).AsInt16(); Vector256 d = Avx2.PackSignedSaturate(c0, c1).AsByte(); - Vector256 bits = Avx2.CompareEqual(d, all0x80Vector256); + Vector256 bits = Avx2.CompareEqual(d, All0x80Vector256); int mask = Avx2.MoveMask(bits); if (mask != -1) { @@ -59,7 +64,7 @@ namespace SixLabors.ImageSharp.Formats.Webp for (; i + 64 <= length; i += 64) { - if (IsNoneOpaque64Bytes(src, i, alphaMask, all0x80)) + if (IsNoneOpaque64Bytes(src, i)) { return true; } @@ -67,7 +72,7 @@ namespace SixLabors.ImageSharp.Formats.Webp for (; i + 32 <= length; i += 32) { - if (IsNoneOpaque32Bytes(src, i, alphaMask, all0x80)) + if (IsNoneOpaque32Bytes(src, i)) { return true; } @@ -85,16 +90,13 @@ namespace SixLabors.ImageSharp.Formats.Webp else if (Sse2.IsSupported) { ReadOnlySpan rowBytes = MemoryMarshal.AsBytes(row); - var alphaMask = Vector128.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255); - Vector128 all0x80 = Vector128.Create((byte)0x80).AsByte(); - int i = 0; int length = (row.Length * 4) - 3; fixed (byte* src = rowBytes) { for (; i + 64 <= length; i += 64) { - if (IsNoneOpaque64Bytes(src, i, alphaMask, all0x80)) + if (IsNoneOpaque64Bytes(src, i)) { return true; } @@ -102,7 +104,7 @@ namespace SixLabors.ImageSharp.Formats.Webp for (; i + 32 <= length; i += 32) { - if (IsNoneOpaque32Bytes(src, i, alphaMask, all0x80)) + if (IsNoneOpaque32Bytes(src, i)) { return true; } @@ -133,20 +135,20 @@ namespace SixLabors.ImageSharp.Formats.Webp } #if SUPPORTS_RUNTIME_INTRINSICS - private static unsafe bool IsNoneOpaque64Bytes(byte* src, int i, Vector128 alphaMask, Vector128 all0x80) + private static unsafe bool IsNoneOpaque64Bytes(byte* src, int i) { Vector128 a0 = Sse2.LoadVector128(src + i).AsByte(); Vector128 a1 = Sse2.LoadVector128(src + i + 16).AsByte(); Vector128 a2 = Sse2.LoadVector128(src + i + 32).AsByte(); Vector128 a3 = Sse2.LoadVector128(src + i + 48).AsByte(); - Vector128 b0 = Sse2.And(a0, alphaMask).AsInt32(); - Vector128 b1 = Sse2.And(a1, alphaMask).AsInt32(); - Vector128 b2 = Sse2.And(a2, alphaMask).AsInt32(); - Vector128 b3 = Sse2.And(a3, alphaMask).AsInt32(); + Vector128 b0 = Sse2.And(a0, AlphaMask).AsInt32(); + Vector128 b1 = Sse2.And(a1, AlphaMask).AsInt32(); + Vector128 b2 = Sse2.And(a2, AlphaMask).AsInt32(); + Vector128 b3 = Sse2.And(a3, AlphaMask).AsInt32(); Vector128 c0 = Sse2.PackSignedSaturate(b0, b1).AsInt16(); Vector128 c1 = Sse2.PackSignedSaturate(b2, b3).AsInt16(); Vector128 d = Sse2.PackSignedSaturate(c0, c1).AsByte(); - Vector128 bits = Sse2.CompareEqual(d, all0x80); + Vector128 bits = Sse2.CompareEqual(d, All0x80); int mask = Sse2.MoveMask(bits); if (mask != 0xFFFF) { @@ -156,15 +158,15 @@ namespace SixLabors.ImageSharp.Formats.Webp return false; } - private static unsafe bool IsNoneOpaque32Bytes(byte* src, int i, Vector128 alphaMask, Vector128 all0x80) + private static unsafe bool IsNoneOpaque32Bytes(byte* src, int i) { Vector128 a0 = Sse2.LoadVector128(src + i).AsByte(); Vector128 a1 = Sse2.LoadVector128(src + i + 16).AsByte(); - Vector128 b0 = Sse2.And(a0, alphaMask).AsInt32(); - Vector128 b1 = Sse2.And(a1, alphaMask).AsInt32(); + Vector128 b0 = Sse2.And(a0, AlphaMask).AsInt32(); + Vector128 b1 = Sse2.And(a1, AlphaMask).AsInt32(); Vector128 c = Sse2.PackSignedSaturate(b0, b1).AsInt16(); Vector128 d = Sse2.PackSignedSaturate(c, c).AsByte(); - Vector128 bits = Sse2.CompareEqual(d, all0x80); + Vector128 bits = Sse2.CompareEqual(d, All0x80); int mask = Sse2.MoveMask(bits); if (mask != 0xFFFF) { From e51f5008c3a53f203d0d9f21957146f95a6bf17b Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 31 Oct 2021 16:51:37 +0100 Subject: [PATCH 4/8] Add AggressiveInlining to LevelCosts --- src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs index 2962ebbab..4eeeedd37 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs @@ -2,6 +2,7 @@ // Licensed under the Apache License, Version 2.0. using System; +using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp.Formats.Webp.Lossy { @@ -151,6 +152,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy return cost; } + [MethodImpl(InliningOptions.ShortMethod)] private static int LevelCost(Span table, int level) => WebpLookupTables.Vp8LevelFixedCosts[level] + table[level > WebpConstants.MaxVariableLevel ? WebpConstants.MaxVariableLevel : level]; From 67fd2d0427290e6a76eec0e49fb133986efbf3b6 Mon Sep 17 00:00:00 2001 From: Brian Popow <38701097+brianpopow@users.noreply.github.com> Date: Mon, 1 Nov 2021 13:07:39 +0100 Subject: [PATCH 5/8] Use ReadOnlySpan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Günther Foidl --- src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs | 3 ++- src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs index abb727447..c6dc6b8b2 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs @@ -17,7 +17,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// internal static unsafe class PredictorEncoder { - private static readonly sbyte[] DeltaLut = { 16, 16, 8, 4, 2, 2, 2 }; + // This uses C#'s compiler optimization to refer to assembly's static data directly. + private static ReadOnlySpan DeltaLut => new sbyte[] { 16, 16, 8, 4, 2, 2, 2 }; private static readonly sbyte[][] Offset = { diff --git a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs index c46e7193f..1a9036ec9 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs @@ -85,7 +85,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless private const int PaletteInvSize = 1 << PaletteInvSizeBits; - private static readonly byte[] Order = { 1, 2, 0, 3 }; + // This uses C#'s compiler optimization to refer to assembly's static data directly. + private static ReadOnlySpan Order => new byte[] { 1, 2, 0, 3 }; /// /// Initializes a new instance of the class. From 86f4903c827635170e43cae57730bea4b951d6c7 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Mon, 1 Nov 2021 13:35:39 +0100 Subject: [PATCH 6/8] Fix build errors --- src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs | 6 +++--- src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs index c6dc6b8b2..89c930561 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs @@ -17,9 +17,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// internal static unsafe class PredictorEncoder { - // This uses C#'s compiler optimization to refer to assembly's static data directly. - private static ReadOnlySpan DeltaLut => new sbyte[] { 16, 16, 8, 4, 2, 2, 2 }; - private static readonly sbyte[][] Offset = { new sbyte[] { 0, -1 }, new sbyte[] { 0, 1 }, new sbyte[] { -1, 0 }, new sbyte[] { 1, 0 }, new sbyte[] { -1, -1 }, new sbyte[] { -1, 1 }, new sbyte[] { 1, -1 }, new sbyte[] { 1, 1 } @@ -53,6 +50,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless private static readonly Vector128 CollectColorBlueTransformsShuffleHighMask = Vector128.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14); #endif + // This uses C#'s compiler optimization to refer to assembly's static data directly. + private static ReadOnlySpan DeltaLut => new sbyte[] { 16, 16, 8, 4, 2, 2, 2 }; + /// /// Finds the best predictor for each tile, and converts the image to residuals /// with respect to predictions. If nearLosslessQuality < 100, applies diff --git a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs index 1a9036ec9..6a0a3184e 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs @@ -85,9 +85,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless private const int PaletteInvSize = 1 << PaletteInvSizeBits; - // This uses C#'s compiler optimization to refer to assembly's static data directly. - private static ReadOnlySpan Order => new byte[] { 1, 2, 0, 3 }; - /// /// Initializes a new instance of the class. /// @@ -140,6 +137,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless } } + // This uses C#'s compiler optimization to refer to assembly's static data directly. + private static ReadOnlySpan Order => new byte[] { 1, 2, 0, 3 }; + /// /// Gets the memory for the image data as packed bgra values. /// From 853b1173697c0f56084eea21fd7d04f40764fa96 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Mon, 1 Nov 2021 19:46:24 +0100 Subject: [PATCH 7/8] Make histo and best histo array readonly --- src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs index 6a0a3184e..da815a479 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/Vp8LEncoder.cs @@ -24,9 +24,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless /// private readonly int[] scratch = new int[256]; - private int[][] histoArgb = { new int[256], new int[256], new int[256], new int[256] }; + private readonly int[][] histoArgb = { new int[256], new int[256], new int[256], new int[256] }; - private int[][] bestHisto = { new int[256], new int[256], new int[256], new int[256] }; + private readonly int[][] bestHisto = { new int[256], new int[256], new int[256], new int[256] }; /// /// The to use for buffer allocations. From e97c364b373ffcc8bf11295ee9597bff3af7b927 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Fri, 5 Nov 2021 12:40:26 +0100 Subject: [PATCH 8/8] Use AsSpan() parameters to slice --- src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs index 6320983ba..3c81f1a22 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs @@ -203,10 +203,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless // Build the Huffman tree. #if NET5_0_OR_GREATER - Span treeSlice = tree.AsSpan().Slice(0, treeSize); + Span treeSlice = tree.AsSpan(0, treeSize); treeSlice.Sort(HuffmanTree.Compare); #else - HuffmanTree[] treeCopy = tree.AsSpan().Slice(0, treeSize).ToArray(); + HuffmanTree[] treeCopy = tree.AsSpan(0, treeSize).ToArray(); Array.Sort(treeCopy, HuffmanTree.Compare); treeCopy.AsSpan().CopyTo(tree); #endif