Browse Source

Merge pull request #1673 from br3aker/jpeg-encoder-optimization

Yet another jpeg encoder optimization
pull/1674/head
James Jackson-South 5 years ago
committed by GitHub
parent
commit
89ab2ba69a
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 8
      src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanLut.cs
  2. 155
      src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs
  3. 14
      src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanSpec.cs
  4. 28
      tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs
  5. 241
      tests/ImageSharp.Tests/Formats/Jpg/HuffmanScanEncoderTests.cs

8
src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanLut.cs

@ -5,8 +5,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
{
/// <summary>
/// A compiled look-up table representation of a huffmanSpec.
/// Each value maps to a uint32 of which the 8 most significant bits hold the
/// codeword size in bits and the 24 least significant bits hold the codeword.
/// Each value maps to a int32 of which the 24 most significant bits hold the
/// codeword in bits and the 8 least significant bits hold the codeword size.
/// The maximum codeword size is 16 bits.
/// </summary>
internal readonly struct HuffmanLut
@ -51,10 +51,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
for (int i = 0; i < spec.Count.Length; i++)
{
int bits = (i + 1) << 24;
int len = i + 1;
for (int j = 0; j < spec.Count[i]; j++)
{
this.Values[spec.Values[k]] = bits | code;
this.Values[spec.Values[k]] = len | (code << 8);
code++;
k++;
}

155
src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanScanEncoder.cs

@ -3,6 +3,10 @@
using System.IO;
using System.Runtime.CompilerServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
using System.Threading;
using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.PixelFormats;
@ -11,6 +15,12 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
{
internal class HuffmanScanEncoder
{
/// <summary>
/// Compiled huffman tree to encode given values.
/// </summary>
/// <remarks>Yields codewords by index consisting of [run length | bitsize].</remarks>
private HuffmanLut[] huffmanTables;
/// <summary>
/// Number of bytes cached before being written to target stream via Stream.Write(byte[], offest, count).
/// </summary>
@ -64,6 +74,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
public void Encode444<TPixel>(Image<TPixel> pixels, ref Block8x8F luminanceQuantTable, ref Block8x8F chrominanceQuantTable, CancellationToken cancellationToken)
where TPixel : unmanaged, IPixel<TPixel>
{
this.huffmanTables = HuffmanLut.TheHuffmanLut;
var unzig = ZigZag.CreateUnzigTable();
// ReSharper disable once InconsistentNaming
@ -122,6 +134,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
public void Encode420<TPixel>(Image<TPixel> pixels, ref Block8x8F luminanceQuantTable, ref Block8x8F chrominanceQuantTable, CancellationToken cancellationToken)
where TPixel : unmanaged, IPixel<TPixel>
{
this.huffmanTables = HuffmanLut.TheHuffmanLut;
var unzig = ZigZag.CreateUnzigTable();
// ReSharper disable once InconsistentNaming
@ -187,6 +201,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
public void EncodeGrayscale<TPixel>(Image<TPixel> pixels, ref Block8x8F luminanceQuantTable, CancellationToken cancellationToken)
where TPixel : unmanaged, IPixel<TPixel>
{
this.huffmanTables = HuffmanLut.TheHuffmanLut;
var unzig = ZigZag.CreateUnzigTable();
// ReSharper disable once InconsistentNaming
@ -243,16 +259,16 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
Block8x8F.Quantize(ref refTemp1, ref refTemp2, ref quant, ref unZig);
int dc = (int)refTemp2[0];
// Emit the DC delta.
this.EmitHuffRLE((2 * (int)index) + 0, 0, dc - prevDC);
int dc = (int)refTemp2[0];
this.EmitDirectCurrentTerm(this.huffmanTables[2 * (int)index].Values, dc - prevDC);
// Emit the AC components.
int h = (2 * (int)index) + 1;
int runLength = 0;
int[] acHuffTable = this.huffmanTables[(2 * (int)index) + 1].Values;
for (int zig = 1; zig < Block8x8F.Size; zig++)
int runLength = 0;
int lastValuableIndex = GetLastValuableElementIndex(ref refTemp2);
for (int zig = 1; zig <= lastValuableIndex; zig++)
{
int ac = (int)refTemp2[zig];
@ -264,18 +280,21 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
{
while (runLength > 15)
{
this.EmitHuff(h, 0xf0);
this.EmitHuff(acHuffTable, 0xf0);
runLength -= 16;
}
this.EmitHuffRLE(h, runLength, ac);
this.EmitHuffRLE(acHuffTable, runLength, ac);
runLength = 0;
}
}
if (runLength > 0)
// if mcu block contains trailing zeros - we must write end of block (EOB) value indicating that current block is over
// this can be done for any number of trailing zeros, even when all 63 ac values are zero
// (Block8x8F.Size - 1) == 63 - last index of the mcu elements
if (lastValuableIndex != Block8x8F.Size - 1)
{
this.EmitHuff(h, 0x00);
this.EmitHuff(acHuffTable, 0x00);
}
return dc;
@ -306,6 +325,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
{
byte b = (byte)(bits >> 24);
this.emitBuffer[this.emitLen++] = b;
// Adding stuff byte
// This is because by JPEG standard scan data can contain JPEG markers (indicated by the 0xFF byte, followed by a non-zero byte)
// Considering this every 0xFF byte must be followed by 0x00 padding byte to signal that this is not a marker
if (b == byte.MaxValue)
{
this.emitBuffer[this.emitLen++] = byte.MinValue;
@ -334,23 +357,43 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
/// <summary>
/// Emits the given value with the given Huffman encoder.
/// </summary>
/// <param name="index">The index of the Huffman encoder</param>
/// <param name="table">Compiled Huffman spec values.</param>
/// <param name="value">The value to encode.</param>
[MethodImpl(InliningOptions.ShortMethod)]
private void EmitHuff(int index, int value)
private void EmitHuff(int[] table, int value)
{
int x = HuffmanLut.TheHuffmanLut[index].Values[value];
this.Emit(x & ((1 << 24) - 1), x >> 24);
int x = table[value];
this.Emit(x >> 8, x & 0xff);
}
[MethodImpl(InliningOptions.ShortMethod)]
private void EmitDirectCurrentTerm(int[] table, int value)
{
int a = value;
int b = value;
if (a < 0)
{
a = -value;
b = value - 1;
}
int bt = GetHuffmanEncodingLength((uint)a);
this.EmitHuff(table, bt);
if (bt > 0)
{
this.Emit(b & ((1 << bt) - 1), bt);
}
}
/// <summary>
/// Emits a run of runLength copies of value encoded with the given Huffman encoder.
/// </summary>
/// <param name="index">The index of the Huffman encoder</param>
/// <param name="table">Compiled Huffman spec values.</param>
/// <param name="runLength">The number of copies to encode.</param>
/// <param name="value">The value to encode.</param>
[MethodImpl(InliningOptions.ShortMethod)]
private void EmitHuffRLE(int index, int runLength, int value)
private void EmitHuffRLE(int[] table, int runLength, int value)
{
int a = value;
int b = value;
@ -362,11 +405,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
int bt = GetHuffmanEncodingLength((uint)a);
this.EmitHuff(index, (runLength << 4) | bt);
if (bt > 0)
{
this.Emit(b & ((1 << bt) - 1), bt);
}
this.EmitHuff(table, (runLength << 4) | bt);
this.Emit(b & ((1 << bt) - 1), bt);
}
/// <summary>
@ -380,11 +420,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
if (padBitsCount != 0)
{
this.Emit((1 << padBitsCount) - 1, padBitsCount);
}
// flush remaining bytes
if (this.emitLen != 0)
{
this.target.Write(this.emitBuffer, 0, this.emitLen);
}
}
@ -393,11 +428,11 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
/// Calculates how many minimum bits needed to store given value for Huffman jpeg encoding.
/// </summary>
/// <remarks>
/// This method returns 0 for input value 0. This is done specificaly for huffman encoding
/// This is an internal operation supposed to be used only in <see cref="HuffmanScanEncoder"/> class for jpeg encoding.
/// </remarks>
/// <param name="value">The value.</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int GetHuffmanEncodingLength(uint value)
[MethodImpl(InliningOptions.ShortMethod)]
internal static int GetHuffmanEncodingLength(uint value)
{
DebugGuard.IsTrue(value <= (1 << 16), "Huffman encoder is supposed to encode a value of 16bit size max");
#if SUPPORTS_BITOPERATIONS
@ -423,5 +458,67 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
return Numerics.Log2(value << 1);
#endif
}
/// <summary>
/// Returns index of the last non-zero element in given mcu block.
/// If all values of the mcu block are zero, this method might return different results depending on the runtime and hardware support.
/// This is jpeg mcu specific code, mcu[0] stores a dc value which will be encoded outside of the loop.
/// This method is guaranteed to return either -1 or 0 if all elements are zero.
/// </summary>
/// <remarks>
/// This is an internal operation supposed to be used only in <see cref="HuffmanScanEncoder"/> class for jpeg encoding.
/// </remarks>
/// <param name="mcu">Mcu block.</param>
/// <returns>Index of the last non-zero element.</returns>
[MethodImpl(InliningOptions.ShortMethod)]
internal static int GetLastValuableElementIndex(ref Block8x8F mcu)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
{
const int equalityMask = unchecked((int)0b1111_1111_1111_1111_1111_1111_1111_1111);
Vector256<int> zero8 = Vector256<int>.Zero;
ref Vector256<float> mcuStride = ref mcu.V0;
for (int i = 7; i >= 0; i--)
{
int areEqual = Avx2.MoveMask(Avx2.CompareEqual(Avx.ConvertToVector256Int32(Unsafe.Add(ref mcuStride, i)), zero8).AsByte());
// we do not know for sure if this stride contain all non-zero elements or if it has some trailing zeros
if (areEqual != equalityMask)
{
// last index in the stride, we go from the end to the start of the stride
int startIndex = i * 8;
int index = startIndex + 7;
ref float elemRef = ref Unsafe.As<Block8x8F, float>(ref mcu);
while (index >= startIndex && (int)Unsafe.Add(ref elemRef, index) == 0)
{
index--;
}
// this implementation will return -1 if all ac components are zero and dc are zero
return index;
}
}
return -1;
}
else
#endif
{
int index = Block8x8F.Size - 1;
ref float elemRef = ref Unsafe.As<Block8x8F, float>(ref mcu);
while (index > 0 && (int)Unsafe.Add(ref elemRef, index) == 0)
{
index--;
}
// this implementation will return 0 if all ac components and dc are zero
return index;
}
}
}
}

14
src/ImageSharp/Formats/Jpeg/Components/Encoder/HuffmanSpec.cs

@ -1,4 +1,4 @@
// Copyright (c) Six Labors.
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
@ -24,9 +24,11 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
0, 0, 0
},
new byte[]
{
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
}),
{
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
}),
// Luminance AC.
new HuffmanSpec(
new byte[]
{
@ -60,6 +62,8 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
0xf9, 0xfa
}),
// Chrominance DC.
new HuffmanSpec(
new byte[]
{
@ -132,4 +136,4 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
this.Values = values;
}
}
}
}

28
tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs

@ -114,21 +114,21 @@ namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg
BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19042
Intel Core i7-6700K CPU 4.00GHz (Skylake), 1 CPU, 8 logical and 4 physical cores
.NET Core SDK=6.0.100-preview.3.21202.5
[Host] : .NET Core 3.1.13 (CoreCLR 4.700.21.11102, CoreFX 4.700.21.11602), X64 RyuJIT [AttachedDebugger]
[Host] : .NET Core 3.1.13 (CoreCLR 4.700.21.11102, CoreFX 4.700.21.11602), X64 RyuJIT
DefaultJob : .NET Core 3.1.13 (CoreCLR 4.700.21.11102, CoreFX 4.700.21.11602), X64 RyuJIT
| Method | Quality | Mean | Error | StdDev | Ratio | RatioSD |
|---------------------------- |-------- |---------:|---------:|---------:|------:|--------:|
| 'System.Drawing Jpeg 4:2:0' | 75 | 30.60 ms | 0.496 ms | 0.464 ms | 1.00 | 0.00 |
| 'ImageSharp Jpeg 4:2:0' | 75 | 29.86 ms | 0.350 ms | 0.311 ms | 0.98 | 0.02 |
| 'ImageSharp Jpeg 4:4:4' | 75 | 45.36 ms | 0.899 ms | 1.036 ms | 1.48 | 0.05 |
| | | | | | | |
| 'System.Drawing Jpeg 4:2:0' | 90 | 34.05 ms | 0.669 ms | 0.687 ms | 1.00 | 0.00 |
| 'ImageSharp Jpeg 4:2:0' | 90 | 37.26 ms | 0.706 ms | 0.660 ms | 1.10 | 0.03 |
| 'ImageSharp Jpeg 4:4:4' | 90 | 52.54 ms | 0.579 ms | 0.514 ms | 1.55 | 0.04 |
| | | | | | | |
| 'System.Drawing Jpeg 4:2:0' | 100 | 39.36 ms | 0.267 ms | 0.237 ms | 1.00 | 0.00 |
| 'ImageSharp Jpeg 4:2:0' | 100 | 42.44 ms | 0.410 ms | 0.383 ms | 1.08 | 0.01 |
| 'ImageSharp Jpeg 4:4:4' | 100 | 70.88 ms | 0.508 ms | 0.450 ms | 1.80 | 0.02 |
| Method | Quality | Mean | Error | StdDev | Ratio |
|---------------------------- |-------- |---------:|---------:|---------:|------:|
| 'System.Drawing Jpeg 4:2:0' | 75 | 29.41 ms | 0.108 ms | 0.096 ms | 1.00 |
| 'ImageSharp Jpeg 4:2:0' | 75 | 26.30 ms | 0.131 ms | 0.109 ms | 0.89 |
| 'ImageSharp Jpeg 4:4:4' | 75 | 36.70 ms | 0.303 ms | 0.269 ms | 1.25 |
| | | | | | |
| 'System.Drawing Jpeg 4:2:0' | 90 | 32.67 ms | 0.226 ms | 0.211 ms | 1.00 |
| 'ImageSharp Jpeg 4:2:0' | 90 | 33.56 ms | 0.237 ms | 0.222 ms | 1.03 |
| 'ImageSharp Jpeg 4:4:4' | 90 | 44.82 ms | 0.250 ms | 0.234 ms | 1.37 |
| | | | | | |
| 'System.Drawing Jpeg 4:2:0' | 100 | 39.06 ms | 0.233 ms | 0.218 ms | 1.00 |
| 'ImageSharp Jpeg 4:2:0' | 100 | 40.23 ms | 0.225 ms | 0.277 ms | 1.03 |
| 'ImageSharp Jpeg 4:4:4' | 100 | 63.35 ms | 0.486 ms | 0.431 ms | 1.62 |
*/

241
tests/ImageSharp.Tests/Formats/Jpg/HuffmanScanEncoderTests.cs

@ -0,0 +1,241 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using SixLabors.ImageSharp.Formats.Jpeg.Components;
using SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder;
using SixLabors.ImageSharp.Tests.TestUtilities;
using Xunit;
using Xunit.Abstractions;
// ReSharper disable InconsistentNaming
namespace SixLabors.ImageSharp.Tests.Formats.Jpg
{
[Trait("Format", "Jpg")]
public class HuffmanScanEncoderTests
{
private ITestOutputHelper Output { get; }
public HuffmanScanEncoderTests(ITestOutputHelper output)
{
this.Output = output;
}
private static int GetHuffmanEncodingLength_Reference(uint number)
{
int bits = 0;
if (number > 32767)
{
number >>= 16;
bits += 16;
}
if (number > 127)
{
number >>= 8;
bits += 8;
}
if (number > 7)
{
number >>= 4;
bits += 4;
}
if (number > 1)
{
number >>= 2;
bits += 2;
}
if (number > 0)
{
bits++;
}
return bits;
}
[Fact]
public void GetHuffmanEncodingLength_Zero()
{
int expected = 0;
int actual = HuffmanScanEncoder.GetHuffmanEncodingLength(0);
Assert.Equal(expected, actual);
}
[Theory]
[InlineData(1)]
[InlineData(2)]
public void GetHuffmanEncodingLength_Random(int seed)
{
int maxNumber = 1 << 16;
var rng = new Random(seed);
for (int i = 0; i < 1000; i++)
{
uint number = (uint)rng.Next(0, maxNumber);
int expected = GetHuffmanEncodingLength_Reference(number);
int actual = HuffmanScanEncoder.GetHuffmanEncodingLength(number);
Assert.Equal(expected, actual);
}
}
[Fact]
public void GetLastValuableElementIndex_AllZero()
{
static void RunTest()
{
Block8x8F data = default;
int expectedLessThan = 1;
int actual = HuffmanScanEncoder.GetLastValuableElementIndex(ref data);
Assert.True(actual < expectedLessThan);
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2);
}
[Fact]
public void GetLastValuableElementIndex_AllNonZero()
{
static void RunTest()
{
Block8x8F data = default;
for (int i = 0; i < Block8x8F.Size; i++)
{
data[i] = 10;
}
int expected = Block8x8F.Size - 1;
int actual = HuffmanScanEncoder.GetLastValuableElementIndex(ref data);
Assert.Equal(expected, actual);
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2);
}
[Theory]
[InlineData(1)]
[InlineData(2)]
public void GetLastValuableElementIndex_RandomFilledSingle(int seed)
{
static void RunTest(string seedSerialized)
{
int seed = FeatureTestRunner.Deserialize<int>(seedSerialized);
var rng = new Random(seed);
for (int i = 0; i < 1000; i++)
{
Block8x8F data = default;
int setIndex = rng.Next(1, Block8x8F.Size);
data[setIndex] = rng.Next();
int expected = setIndex;
int actual = HuffmanScanEncoder.GetLastValuableElementIndex(ref data);
Assert.Equal(expected, actual);
}
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
seed,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2);
}
[Theory]
[InlineData(1)]
[InlineData(2)]
public void GetLastValuableElementIndex_RandomFilledPartially(int seed)
{
static void RunTest(string seedSerialized)
{
int seed = FeatureTestRunner.Deserialize<int>(seedSerialized);
var rng = new Random(seed);
for (int i = 0; i < 1000; i++)
{
Block8x8F data = default;
int lastIndex = rng.Next(1, Block8x8F.Size);
int fillValue = rng.Next();
for (int dataIndex = 0; dataIndex <= lastIndex; dataIndex++)
{
data[dataIndex] = fillValue;
}
int expected = lastIndex;
int actual = HuffmanScanEncoder.GetLastValuableElementIndex(ref data);
Assert.Equal(expected, actual);
}
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
seed,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2);
}
[Theory]
[InlineData(1)]
[InlineData(2)]
public void GetLastValuableElementIndex_RandomFilledFragmented(int seed)
{
static void RunTest(string seedSerialized)
{
int seed = FeatureTestRunner.Deserialize<int>(seedSerialized);
var rng = new Random(seed);
for (int i = 0; i < 1000; i++)
{
Block8x8F data = default;
int fillValue = rng.Next();
// first filled chunk
int lastIndex1 = rng.Next(1, Block8x8F.Size / 2);
for (int dataIndex = 0; dataIndex <= lastIndex1; dataIndex++)
{
data[dataIndex] = fillValue;
}
// second filled chunk, there might be a spot with zero(s) between first and second chunk
int lastIndex2 = rng.Next(lastIndex1 + 1, Block8x8F.Size);
for (int dataIndex = 0; dataIndex <= lastIndex2; dataIndex++)
{
data[dataIndex] = fillValue;
}
int expected = lastIndex2;
int actual = HuffmanScanEncoder.GetLastValuableElementIndex(ref data);
Assert.Equal(expected, actual);
}
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
seed,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2);
}
}
}
Loading…
Cancel
Save