From 6885d97c8834f3cf29b89380a5cd4cc3d05a9d79 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 9 Nov 2021 19:59:36 -0800 Subject: [PATCH 1/5] hoist some calculations out --- .../Quantization/WuQuantizer{TPixel}.cs | 26 +++++++++++++++++++ .../Codecs/EncodeIndexedPng.cs | 2 +- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/src/ImageSharp/Processing/Processors/Quantization/WuQuantizer{TPixel}.cs b/src/ImageSharp/Processing/Processors/Quantization/WuQuantizer{TPixel}.cs index bf4a5ca41..9b8263163 100644 --- a/src/ImageSharp/Processing/Processors/Quantization/WuQuantizer{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Quantization/WuQuantizer{TPixel}.cs @@ -418,18 +418,44 @@ namespace SixLabors.ImageSharp.Processing.Processors.Quantization for (int r = 1; r < IndexCount; r++) { volumeSpan.Clear(); +#if ALTERNATE + int ind1_r = (r << ((IndexBits * 2) + IndexAlphaBits)) + + (r << (IndexBits + IndexAlphaBits + 1)) + + (r << (IndexBits * 2)) + + (r << (IndexBits + 1)) + + r; +#endif for (int g = 1; g < IndexCount; g++) { areaSpan.Clear(); +#if ALTERNATE + int ind1_g = ind1_r + + (g << (IndexBits + IndexAlphaBits)) + + (g << IndexBits) + + g; + + int r_g = r + g; +#endif + for (int b = 1; b < IndexCount; b++) { +#if ALTERNATE + int ind1_b = ind1_g + + ((r_g + b) << IndexAlphaBits) + + b; +#endif + Moment line = default; for (int a = 1; a < IndexAlphaCount; a++) { +#if ALTERNATE + int ind1 = ind1_b + a; +#else int ind1 = GetPaletteIndex(r, g, b, a); +#endif line += momentSpan[ind1]; areaSpan[a] += line; diff --git a/tests/ImageSharp.Benchmarks/Codecs/EncodeIndexedPng.cs b/tests/ImageSharp.Benchmarks/Codecs/EncodeIndexedPng.cs index de8d41202..b33922262 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/EncodeIndexedPng.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/EncodeIndexedPng.cs @@ -85,7 +85,7 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs public void PngCoreWuNoDither() { using var memoryStream = new MemoryStream(); - var options = new PngEncoder { Quantizer = new WuQuantizer(new QuantizerOptions { Dither = null }) }; + var options = new PngEncoder { Quantizer = new WuQuantizer(new QuantizerOptions { Dither = null }), ColorType = PngColorType.Palette }; this.bmpCore.SaveAsPng(memoryStream, options); } } From c0d5dfbac59d1ce6937f3be61ccb0841f59e702d Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 10 Nov 2021 23:55:01 -0800 Subject: [PATCH 2/5] Better variable names --- .../Quantization/WuQuantizer{TPixel}.cs | 31 ++++++++----------- 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Quantization/WuQuantizer{TPixel}.cs b/src/ImageSharp/Processing/Processors/Quantization/WuQuantizer{TPixel}.cs index 9b8263163..2824f53ee 100644 --- a/src/ImageSharp/Processing/Processors/Quantization/WuQuantizer{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Quantization/WuQuantizer{TPixel}.cs @@ -417,45 +417,40 @@ namespace SixLabors.ImageSharp.Processing.Processors.Quantization for (int r = 1; r < IndexCount; r++) { - volumeSpan.Clear(); -#if ALTERNATE - int ind1_r = (r << ((IndexBits * 2) + IndexAlphaBits)) + + // Currently, RyuJIT hoists the invariants of multi-level nested loop only to the + // immediate outer loop. See https://github.com/dotnet/runtime/issues/61420 + // To ensure the calculation doesn't happen repeatedly, hoist some of the calculations + // in the form of ind1* manually. + int ind1R = (r << ((IndexBits * 2) + IndexAlphaBits)) + (r << (IndexBits + IndexAlphaBits + 1)) + (r << (IndexBits * 2)) + (r << (IndexBits + 1)) + r; -#endif + + volumeSpan.Clear(); for (int g = 1; g < IndexCount; g++) { - areaSpan.Clear(); - -#if ALTERNATE - int ind1_g = ind1_r + + int ind1G = ind1R + (g << (IndexBits + IndexAlphaBits)) + (g << IndexBits) + g; - int r_g = r + g; -#endif + + areaSpan.Clear(); for (int b = 1; b < IndexCount; b++) { -#if ALTERNATE - int ind1_b = ind1_g + + int ind1B = ind1G + ((r_g + b) << IndexAlphaBits) + b; -#endif Moment line = default; for (int a = 1; a < IndexAlphaCount; a++) { -#if ALTERNATE - int ind1 = ind1_b + a; -#else - int ind1 = GetPaletteIndex(r, g, b, a); -#endif + int ind1 = ind1B + a; + line += momentSpan[ind1]; areaSpan[a] += line; From 57357b076a939011b3a80f5f83700e90e0899c9a Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 12 Nov 2021 07:41:08 -0800 Subject: [PATCH 3/5] Optimize Mark --- .../Quantization/WuQuantizer{TPixel}.cs | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/ImageSharp/Processing/Processors/Quantization/WuQuantizer{TPixel}.cs b/src/ImageSharp/Processing/Processors/Quantization/WuQuantizer{TPixel}.cs index 2824f53ee..4218810d7 100644 --- a/src/ImageSharp/Processing/Processors/Quantization/WuQuantizer{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Quantization/WuQuantizer{TPixel}.cs @@ -649,13 +649,35 @@ namespace SixLabors.ImageSharp.Processing.Processors.Quantization for (int r = cube.RMin + 1; r <= cube.RMax; r++) { + // Currently, RyuJIT hoists the invariants of multi-level nested loop only to the + // immediate outer loop. See https://github.com/dotnet/runtime/issues/61420 + // To ensure the calculation doesn't happen repeatedly, hoist some of the calculations + // in the form of ind1* manually. + int ind1R = (r << ((IndexBits * 2) + IndexAlphaBits)) + + (r << (IndexBits + IndexAlphaBits + 1)) + + (r << (IndexBits * 2)) + + (r << (IndexBits + 1)) + + r; + for (int g = cube.GMin + 1; g <= cube.GMax; g++) { + int ind1G = ind1R + + (g << (IndexBits + IndexAlphaBits)) + + (g << IndexBits) + + g; + int r_g = r + g; + for (int b = cube.BMin + 1; b <= cube.BMax; b++) { + int ind1B = ind1G + + ((r_g + b) << IndexAlphaBits) + + b; + for (int a = cube.AMin + 1; a <= cube.AMax; a++) { - tagSpan[GetPaletteIndex(r, g, b, a)] = label; + int index = ind1B + a; + + tagSpan[index] = label; } } } From bdd728e4d331aa89124d113051e4483d040d6ca2 Mon Sep 17 00:00:00 2001 From: Berkan Diler Date: Sun, 14 Nov 2021 10:32:59 +0100 Subject: [PATCH 4/5] Revert a de-optimization from #1734 and add a comment --- src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs index d9d42e061..abe59516f 100644 --- a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs +++ b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs @@ -288,8 +288,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg /// The number of components to write. private void WriteDefineHuffmanTables(int componentCount) { + // This uses a C#'s compiler optimization that refers to the static data segment of the assembly, + // and doesn't incur any allocation at all. // Table identifiers. - ReadOnlySpan headers = stackalloc byte[] + ReadOnlySpan headers = new byte[] { 0x00, 0x10, From c22919d55eb02ee1a553e0dd3d8bb7b67b701d21 Mon Sep 17 00:00:00 2001 From: Berkan Diler Date: Sun, 14 Nov 2021 10:58:46 +0100 Subject: [PATCH 5/5] Replace Span.Fill(default) calls with Span.Clear() Span.Clear() is more optimized than Span.Fill(default) --- .../Tiff/Compression/Compressors/TiffLzwEncoder.cs | 4 ++-- .../Compression/Decompressors/T6TiffCompression.cs | 12 ++++++++++-- src/ImageSharp/Formats/Webp/Lossless/CostModel.cs | 2 +- .../Formats/Webp/Lossless/HistogramEncoder.cs | 2 +- src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs | 2 +- .../Formats/Webp/Lossless/Vp8LHistogram.cs | 10 +++++----- src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs | 8 ++++---- src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs | 2 +- 8 files changed, 25 insertions(+), 17 deletions(-) diff --git a/src/ImageSharp/Formats/Tiff/Compression/Compressors/TiffLzwEncoder.cs b/src/ImageSharp/Formats/Tiff/Compression/Compressors/TiffLzwEncoder.cs index baeabdbb2..d4d1d1cb6 100644 --- a/src/ImageSharp/Formats/Tiff/Compression/Compressors/TiffLzwEncoder.cs +++ b/src/ImageSharp/Formats/Tiff/Compression/Compressors/TiffLzwEncoder.cs @@ -256,8 +256,8 @@ namespace SixLabors.ImageSharp.Formats.Tiff.Compression.Compressors private void ResetTables() { - this.children.GetSpan().Fill(0); - this.siblings.GetSpan().Fill(0); + this.children.GetSpan().Clear(); + this.siblings.GetSpan().Clear(); this.bitsPerCode = MinBits; this.maxCode = MaxValue(this.bitsPerCode); this.nextValidCode = EoiCode + 1; diff --git a/src/ImageSharp/Formats/Tiff/Compression/Decompressors/T6TiffCompression.cs b/src/ImageSharp/Formats/Tiff/Compression/Decompressors/T6TiffCompression.cs index e86418741..972f4d8ff 100644 --- a/src/ImageSharp/Formats/Tiff/Compression/Decompressors/T6TiffCompression.cs +++ b/src/ImageSharp/Formats/Tiff/Compression/Decompressors/T6TiffCompression.cs @@ -64,7 +64,7 @@ namespace SixLabors.ImageSharp.Formats.Tiff.Compression.Decompressors uint bitsWritten = 0; for (int y = 0; y < height; y++) { - scanLine.Fill(0); + scanLine.Clear(); Decode2DScanline(bitReader, this.isWhiteZero, referenceScanLine, scanLine); bitsWritten = this.WriteScanLine(buffer, scanLine, bitsWritten); @@ -116,7 +116,15 @@ namespace SixLabors.ImageSharp.Formats.Tiff.Compression.Decompressors { // If a TIFF reader encounters EOFB before the expected number of lines has been extracted, // it is appropriate to assume that the missing rows consist entirely of white pixels. - scanline.Fill(whiteIsZero ? (byte)0 : (byte)255); + if (whiteIsZero) + { + scanline.Clear(); + } + else + { + scanline.Fill((byte)255); + } + break; } diff --git a/src/ImageSharp/Formats/Webp/Lossless/CostModel.cs b/src/ImageSharp/Formats/Webp/Lossless/CostModel.cs index 7f4d0307b..bdaf30dc9 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/CostModel.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/CostModel.cs @@ -87,7 +87,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless if (nonzeros <= 1) { - output.AsSpan(0, numSymbols).Fill(0); + output.AsSpan(0, numSymbols).Clear(); } else { diff --git a/src/ImageSharp/Formats/Webp/Lossless/HistogramEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/HistogramEncoder.cs index 5d407d73c..b52f8eb5d 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/HistogramEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/HistogramEncoder.cs @@ -287,7 +287,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless // Create a mapping from a cluster id to its minimal version. int clusterMax = 0; - clusterMappingsTmp.AsSpan().Fill(0); + clusterMappingsTmp.AsSpan().Clear(); // Re-map the ids. for (int i = 0; i < symbols.Length; i++) diff --git a/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs index 3c81f1a22..5db01ca1c 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/HuffmanUtils.cs @@ -28,7 +28,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless public static void CreateHuffmanTree(uint[] histogram, int treeDepthLimit, bool[] bufRle, HuffmanTree[] huffTree, HuffmanTreeCode huffCode) { int numSymbols = huffCode.NumSymbols; - bufRle.AsSpan().Fill(false); + bufRle.AsSpan().Clear(); OptimizeHuffmanForRle(numSymbols, bufRle, histogram); GenerateOptimalTree(huffTree, histogram, numSymbols, treeDepthLimit, huffCode.CodeLengths); diff --git a/src/ImageSharp/Formats/Webp/Lossless/Vp8LHistogram.cs b/src/ImageSharp/Formats/Webp/Lossless/Vp8LHistogram.cs index 8b0201568..bdb53f5c6 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/Vp8LHistogram.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/Vp8LHistogram.cs @@ -320,7 +320,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless } else { - output.Literal.AsSpan(0, literalSize).Fill(0); + output.Literal.AsSpan(0, literalSize).Clear(); } } @@ -343,7 +343,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless } else { - output.Red.AsSpan(0, size).Fill(0); + output.Red.AsSpan(0, size).Clear(); } } @@ -366,7 +366,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless } else { - output.Blue.AsSpan(0, size).Fill(0); + output.Blue.AsSpan(0, size).Clear(); } } @@ -389,7 +389,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless } else { - output.Alpha.AsSpan(0, size).Fill(0); + output.Alpha.AsSpan(0, size).Clear(); } } @@ -412,7 +412,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless } else { - output.Distance.AsSpan(0, size).Fill(0); + output.Distance.AsSpan(0, size).Clear(); } } diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs index 6279aef65..fcd61f2c0 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs @@ -911,7 +911,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy this.LeftNz[8] = 0; - this.LeftDerr.AsSpan().Fill(0); + this.LeftDerr.AsSpan().Clear(); } private void InitTop() @@ -919,14 +919,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy int topSize = this.mbw * 16; this.YTop.AsSpan(0, topSize).Fill(127); this.UvTop.AsSpan().Fill(127); - this.Nz.AsSpan().Fill(0); + this.Nz.AsSpan().Clear(); int predsW = (4 * this.mbw) + 1; int predsH = (4 * this.mbh) + 1; int predsSize = predsW * predsH; - this.Preds.AsSpan(predsSize + this.predsWidth, this.mbw).Fill(0); + this.Preds.AsSpan(predsSize + this.predsWidth, this.mbw).Clear(); - this.TopDerr.AsSpan().Fill(0); + this.TopDerr.AsSpan().Clear(); } private int Bit(uint nz, int n) => (nz & (1 << n)) != 0 ? 1 : 0; diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs index 8a4115d21..37e09d080 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Encoder.cs @@ -546,7 +546,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy int predsW = (4 * this.Mbw) + 1; int predsH = (4 * this.Mbh) + 1; int predsSize = predsW * predsH; - this.Preds.AsSpan(predsSize + this.PredsWidth - 4, 4).Fill(0); + this.Preds.AsSpan(predsSize + this.PredsWidth - 4, 4).Clear(); this.Nz[0] = 0; // constant }