📷 A modern, cross-platform, 2D Graphics library for .NET
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

689 lines
28 KiB

// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.IO;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Threading;
using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Encoder
{
internal class HuffmanScanEncoder
{
/// <summary>
/// Maximum number of bytes encoded jpeg 8x8 block can occupy.
/// It's highly unlikely for block to occupy this much space - it's a theoretical limit.
/// </summary>
/// <remarks>
/// Where 16 is maximum huffman code binary length according to itu
/// specs. 10 is maximum value binary length, value comes from discrete
/// cosine tranform with value range: [-1024..1023]. Block stores
/// 8x8 = 64 values thus multiplication by 64. Then divided by 8 to get
/// the number of bytes. This value is then multiplied by
/// <see cref="MaxBytesPerBlockMultiplier"/> for performance reasons.
/// </remarks>
private const int MaxBytesPerBlock = (16 + 10) * 64 / 8 * MaxBytesPerBlockMultiplier;
/// <summary>
/// Multiplier used within cache buffers size calculation.
/// </summary>
/// <remarks>
/// <para>
/// Theoretically, <see cref="MaxBytesPerBlock"/> bytes buffer can fit
/// exactly one minimal coding unit. In reality, coding blocks occupy much
/// less space than the theoretical maximum - this can be exploited.
/// If temporal buffer size is multiplied by at least 2, second half of
/// the resulting buffer will be used as an overflow 'guard' if next
/// block would occupy maximum number of bytes. While first half may fit
/// many blocks before needing to flush.
/// </para>
/// <para>
/// This is subject to change. This can be equal to 1 but recomended
/// value is 2 or even greater - futher benchmarking needed.
/// </para>
/// </remarks>
private const int MaxBytesPerBlockMultiplier = 2;
/// <summary>
/// <see cref="streamWriteBuffer"/> size multiplier.
/// </summary>
/// <remarks>
/// Jpeg specification requiers to insert 'stuff' bytes after each
/// 0xff byte value. Worst case scenarion is when all bytes are 0xff.
/// While it's highly unlikely (if not impossible) to get such
/// combination, it's theoretically possible so buffer size must be guarded.
/// </remarks>
private const int OutputBufferLengthMultiplier = 2;
/// <summary>
/// Compiled huffman tree to encode given values.
/// </summary>
/// <remarks>Yields codewords by index consisting of [run length | bitsize].</remarks>
private HuffmanLut[] huffmanTables;
/// <summary>
/// Emitted bits 'micro buffer' before being transferred to the <see cref="emitBuffer"/>.
/// </summary>
private uint accumulatedBits;
/// <summary>
/// Buffer for temporal storage of huffman rle encoding bit data.
/// </summary>
/// <remarks>
/// Encoding bits are assembled to 4 byte unsigned integers and then copied to this buffer.
/// This process does NOT include inserting stuff bytes.
/// </remarks>
private readonly uint[] emitBuffer;
/// <summary>
/// Buffer for temporal storage which is then written to the output stream.
/// </summary>
/// <remarks>
/// Encoding bits from <see cref="emitBuffer"/> are copied to this byte buffer including stuff bytes.
/// </remarks>
private readonly byte[] streamWriteBuffer;
/// <summary>
/// Number of jagged bits stored in <see cref="accumulatedBits"/>
/// </summary>
private int bitCount;
private int emitWriteIndex;
private Block8x8 tempBlock;
/// <summary>
/// The output stream. All attempted writes after the first error become no-ops.
/// </summary>
private readonly Stream target;
/// <summary>
/// Initializes a new instance of the <see cref="HuffmanScanEncoder"/> class.
/// </summary>
/// <param name="blocksPerCodingUnit">Amount of encoded 8x8 blocks per single jpeg macroblock.</param>
/// <param name="outputStream">Output stream for saving encoded data.</param>
public HuffmanScanEncoder(int blocksPerCodingUnit, Stream outputStream)
{
int emitBufferByteLength = MaxBytesPerBlock * blocksPerCodingUnit;
this.emitBuffer = new uint[emitBufferByteLength / sizeof(uint)];
this.emitWriteIndex = this.emitBuffer.Length;
this.streamWriteBuffer = new byte[emitBufferByteLength * OutputBufferLengthMultiplier];
this.target = outputStream;
}
/// <summary>
/// Gets a value indicating whether <see cref="emitBuffer"/> is full
/// and must be flushed using <see cref="FlushToStream()"/>
/// before encoding next 8x8 coding block.
/// </summary>
private bool IsStreamFlushNeeded
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get => this.emitWriteIndex < (uint)this.emitBuffer.Length / 2;
}
/// <summary>
/// Encodes the image with no subsampling.
/// </summary>
/// <typeparam name="TPixel">The pixel format.</typeparam>
/// <param name="pixels">The pixel accessor providing access to the image pixels.</param>
/// <param name="luminanceQuantTable">Luminance quantization table provided by the callee.</param>
/// <param name="chrominanceQuantTable">Chrominance quantization table provided by the callee.</param>
/// <param name="cancellationToken">The token to monitor for cancellation.</param>
public void Encode444<TPixel>(Image<TPixel> pixels, ref Block8x8F luminanceQuantTable, ref Block8x8F chrominanceQuantTable, CancellationToken cancellationToken)
where TPixel : unmanaged, IPixel<TPixel>
{
FastFloatingPointDCT.AdjustToFDCT(ref luminanceQuantTable);
FastFloatingPointDCT.AdjustToFDCT(ref chrominanceQuantTable);
this.huffmanTables = HuffmanLut.TheHuffmanLut;
// ReSharper disable once InconsistentNaming
int prevDCY = 0, prevDCCb = 0, prevDCCr = 0;
ImageFrame<TPixel> frame = pixels.Frames.RootFrame;
Buffer2D<TPixel> pixelBuffer = frame.PixelBuffer;
RowOctet<TPixel> currentRows = default;
var pixelConverter = new YCbCrForwardConverter444<TPixel>(frame);
for (int y = 0; y < pixels.Height; y += 8)
{
cancellationToken.ThrowIfCancellationRequested();
currentRows.Update(pixelBuffer, y);
for (int x = 0; x < pixels.Width; x += 8)
{
pixelConverter.Convert(x, y, ref currentRows);
prevDCY = this.WriteBlock(
QuantIndex.Luminance,
prevDCY,
ref pixelConverter.Y,
ref luminanceQuantTable);
prevDCCb = this.WriteBlock(
QuantIndex.Chrominance,
prevDCCb,
ref pixelConverter.Cb,
ref chrominanceQuantTable);
prevDCCr = this.WriteBlock(
QuantIndex.Chrominance,
prevDCCr,
ref pixelConverter.Cr,
ref chrominanceQuantTable);
if (this.IsStreamFlushNeeded)
{
this.FlushToStream();
}
}
}
this.FlushRemainingBytes();
}
/// <summary>
/// Encodes the image with subsampling. The Cb and Cr components are each subsampled
/// at a factor of 2 both horizontally and vertically.
/// </summary>
/// <typeparam name="TPixel">The pixel format.</typeparam>
/// <param name="pixels">The pixel accessor providing access to the image pixels.</param>
/// <param name="luminanceQuantTable">Luminance quantization table provided by the callee.</param>
/// <param name="chrominanceQuantTable">Chrominance quantization table provided by the callee.</param>
/// <param name="cancellationToken">The token to monitor for cancellation.</param>
public void Encode420<TPixel>(Image<TPixel> pixels, ref Block8x8F luminanceQuantTable, ref Block8x8F chrominanceQuantTable, CancellationToken cancellationToken)
where TPixel : unmanaged, IPixel<TPixel>
{
FastFloatingPointDCT.AdjustToFDCT(ref luminanceQuantTable);
FastFloatingPointDCT.AdjustToFDCT(ref chrominanceQuantTable);
this.huffmanTables = HuffmanLut.TheHuffmanLut;
// ReSharper disable once InconsistentNaming
int prevDCY = 0, prevDCCb = 0, prevDCCr = 0;
ImageFrame<TPixel> frame = pixels.Frames.RootFrame;
Buffer2D<TPixel> pixelBuffer = frame.PixelBuffer;
RowOctet<TPixel> currentRows = default;
var pixelConverter = new YCbCrForwardConverter420<TPixel>(frame);
for (int y = 0; y < pixels.Height; y += 16)
{
cancellationToken.ThrowIfCancellationRequested();
for (int x = 0; x < pixels.Width; x += 16)
{
for (int i = 0; i < 2; i++)
{
int yOff = i * 8;
currentRows.Update(pixelBuffer, y + yOff);
pixelConverter.Convert(x, y, ref currentRows, i);
prevDCY = this.WriteBlock(
QuantIndex.Luminance,
prevDCY,
ref pixelConverter.YLeft,
ref luminanceQuantTable);
prevDCY = this.WriteBlock(
QuantIndex.Luminance,
prevDCY,
ref pixelConverter.YRight,
ref luminanceQuantTable);
}
prevDCCb = this.WriteBlock(
QuantIndex.Chrominance,
prevDCCb,
ref pixelConverter.Cb,
ref chrominanceQuantTable);
prevDCCr = this.WriteBlock(
QuantIndex.Chrominance,
prevDCCr,
ref pixelConverter.Cr,
ref chrominanceQuantTable);
if (this.IsStreamFlushNeeded)
{
this.FlushToStream();
}
}
}
this.FlushRemainingBytes();
}
/// <summary>
/// Encodes the image with no chroma, just luminance.
/// </summary>
/// <typeparam name="TPixel">The pixel format.</typeparam>
/// <param name="pixels">The pixel accessor providing access to the image pixels.</param>
/// <param name="luminanceQuantTable">Luminance quantization table provided by the callee.</param>
/// <param name="cancellationToken">The token to monitor for cancellation.</param>
public void EncodeGrayscale<TPixel>(Image<TPixel> pixels, ref Block8x8F luminanceQuantTable, CancellationToken cancellationToken)
where TPixel : unmanaged, IPixel<TPixel>
{
FastFloatingPointDCT.AdjustToFDCT(ref luminanceQuantTable);
this.huffmanTables = HuffmanLut.TheHuffmanLut;
// ReSharper disable once InconsistentNaming
int prevDCY = 0;
var pixelConverter = LuminanceForwardConverter<TPixel>.Create();
ImageFrame<TPixel> frame = pixels.Frames.RootFrame;
Buffer2D<TPixel> pixelBuffer = frame.PixelBuffer;
RowOctet<TPixel> currentRows = default;
for (int y = 0; y < pixels.Height; y += 8)
{
cancellationToken.ThrowIfCancellationRequested();
currentRows.Update(pixelBuffer, y);
for (int x = 0; x < pixels.Width; x += 8)
{
pixelConverter.Convert(frame, x, y, ref currentRows);
prevDCY = this.WriteBlock(
QuantIndex.Luminance,
prevDCY,
ref pixelConverter.Y,
ref luminanceQuantTable);
if (this.IsStreamFlushNeeded)
{
this.FlushToStream();
}
}
}
this.FlushRemainingBytes();
}
/// <summary>
/// Encodes the image with no subsampling and keeps the pixel data as Rgb24.
/// </summary>
/// <typeparam name="TPixel">The pixel format.</typeparam>
/// <param name="pixels">The pixel accessor providing access to the image pixels.</param>
/// <param name="quantTable">Quantization table provided by the callee.</param>
/// <param name="cancellationToken">The token to monitor for cancellation.</param>
public void EncodeRgb<TPixel>(Image<TPixel> pixels, ref Block8x8F quantTable, CancellationToken cancellationToken)
where TPixel : unmanaged, IPixel<TPixel>
{
FastFloatingPointDCT.AdjustToFDCT(ref quantTable);
this.huffmanTables = HuffmanLut.TheHuffmanLut;
// ReSharper disable once InconsistentNaming
int prevDCR = 0, prevDCG = 0, prevDCB = 0;
ImageFrame<TPixel> frame = pixels.Frames.RootFrame;
Buffer2D<TPixel> pixelBuffer = frame.PixelBuffer;
RowOctet<TPixel> currentRows = default;
var pixelConverter = new RgbForwardConverter<TPixel>(frame);
for (int y = 0; y < pixels.Height; y += 8)
{
cancellationToken.ThrowIfCancellationRequested();
currentRows.Update(pixelBuffer, y);
for (int x = 0; x < pixels.Width; x += 8)
{
pixelConverter.Convert(x, y, ref currentRows);
prevDCR = this.WriteBlock(
QuantIndex.Luminance,
prevDCR,
ref pixelConverter.R,
ref quantTable);
prevDCG = this.WriteBlock(
QuantIndex.Luminance,
prevDCG,
ref pixelConverter.G,
ref quantTable);
prevDCB = this.WriteBlock(
QuantIndex.Luminance,
prevDCB,
ref pixelConverter.B,
ref quantTable);
if (this.IsStreamFlushNeeded)
{
this.FlushToStream();
}
}
}
this.FlushRemainingBytes();
}
/// <summary>
/// Writes a block of pixel data using the given quantization table,
/// returning the post-quantized DC value of the DCT-transformed block.
/// The block is in natural (not zig-zag) order.
/// </summary>
/// <param name="index">The quantization table index.</param>
/// <param name="prevDC">The previous DC value.</param>
/// <param name="block">Source block.</param>
/// <param name="quant">Quantization table.</param>
/// <returns>The <see cref="int"/>.</returns>
private int WriteBlock(
QuantIndex index,
int prevDC,
ref Block8x8F block,
ref Block8x8F quant)
{
ref Block8x8 spectralBlock = ref this.tempBlock;
// Shifting level from 0..255 to -128..127
block.AddInPlace(-128f);
// Discrete cosine transform
FastFloatingPointDCT.TransformFDCT(ref block);
// Quantization
Block8x8F.Quantize(ref block, ref spectralBlock, ref quant);
// Emit the DC delta.
int dc = spectralBlock[0];
this.EmitHuffRLE(this.huffmanTables[2 * (int)index].Values, 0, dc - prevDC);
// Emit the AC components.
int[] acHuffTable = this.huffmanTables[(2 * (int)index) + 1].Values;
nint lastValuableIndex = spectralBlock.GetLastNonZeroIndex();
int runLength = 0;
ref short blockRef = ref Unsafe.As<Block8x8, short>(ref spectralBlock);
for (nint zig = 1; zig <= lastValuableIndex; zig++)
{
const int zeroRun1 = 1 << 4;
const int zeroRun16 = 16 << 4;
int ac = Unsafe.Add(ref blockRef, zig);
if (ac == 0)
{
runLength += zeroRun1;
}
else
{
while (runLength >= zeroRun16)
{
this.EmitHuff(acHuffTable, 0xf0);
runLength -= zeroRun16;
}
this.EmitHuffRLE(acHuffTable, runLength, ac);
runLength = 0;
}
}
// if mcu block contains trailing zeros - we must write end of block (EOB) value indicating that current block is over
// this can be done for any number of trailing zeros, even when all 63 ac values are zero
// (Block8x8F.Size - 1) == 63 - last index of the mcu elements
if (lastValuableIndex != Block8x8F.Size - 1)
{
this.EmitHuff(acHuffTable, 0x00);
}
return dc;
}
/// <summary>
/// Emits the most significant count of bits to the buffer.
/// </summary>
/// <remarks>
/// <para>
/// Supports up to 32 count of bits but, generally speaking, jpeg
/// standard assures that there won't be more than 16 bits per single
/// value.
/// </para>
/// <para>
/// Emitting algorithm uses 3 intermediate buffers for caching before
/// writing to the stream:
/// <list type="number">
/// <item>
/// <term>uint32</term>
/// <description>
/// Bit buffer. Encoded spectral values can occupy up to 16 bits, bits
/// are assembled to whole bytes via this intermediate buffer.
/// </description>
/// </item>
/// <item>
/// <term>uint32[]</term>
/// <description>
/// Assembled bytes from uint32 buffer are saved into this buffer.
/// uint32 buffer values are saved using indices from the last to the first.
/// As bytes are saved to the memory as 4-byte packages endianness matters:
/// Jpeg stream is big-endian, indexing buffer bytes from the last index to the
/// first eliminates all operations to extract separate bytes. This only works for
/// little-endian machines (there are no known examples of big-endian users atm).
/// For big-endians this approach is slower due to the separate byte extraction.
/// </description>
/// </item>
/// <item>
/// <term>byte[]</term>
/// <description>
/// Byte buffer used only during <see cref="FlushToStream(int)"/> method.
/// </description>
/// </item>
/// </list>
/// </para>
/// </remarks>
/// <param name="bits">Bits to emit, must be shifted to the left.</param>
/// <param name="count">Bits count stored in the bits parameter.</param>
[MethodImpl(InliningOptions.ShortMethod)]
private void Emit(uint bits, int count)
{
this.accumulatedBits |= bits >> this.bitCount;
count += this.bitCount;
if (count >= 32)
{
this.emitBuffer[--this.emitWriteIndex] = this.accumulatedBits;
this.accumulatedBits = bits << (32 - this.bitCount);
count -= 32;
}
this.bitCount = count;
}
/// <summary>
/// Emits the given value with the given Huffman table.
/// </summary>
/// <param name="table">Huffman table.</param>
/// <param name="value">Value to encode.</param>
[MethodImpl(InliningOptions.ShortMethod)]
private void EmitHuff(int[] table, int value)
{
int x = table[value];
this.Emit((uint)x & 0xffff_ff00u, x & 0xff);
}
/// <summary>
/// Emits given value via huffman rle encoding.
/// </summary>
/// <param name="table">Huffman table.</param>
/// <param name="runLength">The number of preceding zeroes, preshifted by 4 to the left.</param>
/// <param name="value">Value to encode.</param>
[MethodImpl(InliningOptions.ShortMethod)]
private void EmitHuffRLE(int[] table, int runLength, int value)
{
DebugGuard.IsTrue((runLength & 0xf) == 0, $"{nameof(runLength)} parameter must be shifted to the left by 4 bits");
int a = value;
int b = value;
if (a < 0)
{
a = -value;
b = value - 1;
}
int valueLen = GetHuffmanEncodingLength((uint)a);
// Huffman prefix code
int huffPackage = table[runLength | valueLen];
int prefixLen = huffPackage & 0xff;
uint prefix = (uint)huffPackage & 0xffff_0000u;
// Actual encoded value
uint encodedValue = (uint)b << (32 - valueLen);
// Doing two binary shifts to get rid of leading 1's in negative value case
this.Emit(prefix | (encodedValue >> prefixLen), prefixLen + valueLen);
}
/// <summary>
/// Calculates how many minimum bits needed to store given value for Huffman jpeg encoding.
/// </summary>
/// <remarks>
/// This is an internal operation supposed to be used only in <see cref="HuffmanScanEncoder"/> class for jpeg encoding.
/// </remarks>
/// <param name="value">The value.</param>
[MethodImpl(InliningOptions.ShortMethod)]
internal static int GetHuffmanEncodingLength(uint value)
{
DebugGuard.IsTrue(value <= (1 << 16), "Huffman encoder is supposed to encode a value of 16bit size max");
#if SUPPORTS_BITOPERATIONS
// This should have been implemented as (BitOperations.Log2(value) + 1) as in non-intrinsic implementation
// But internal log2 is implemented like this: (31 - (int)Lzcnt.LeadingZeroCount(value))
// BitOperations.Log2 implementation also checks if input value is zero for the convention 0->0
// Lzcnt would return 32 for input value of 0 - no need to check that with branching
// Fallback code if Lzcnt is not supported still use if-check
// But most modern CPUs support this instruction so this should not be a problem
return 32 - BitOperations.LeadingZeroCount(value);
#else
// Ideally:
// if 0 - return 0 in this case
// else - return log2(value) + 1
//
// Hack based on input value constraint:
// We know that input values are guaranteed to be maximum 16 bit large for huffman encoding
// We can safely shift input value for one bit -> log2(value << 1)
// Because of the 16 bit value constraint it won't overflow
// With that input value change we no longer need to add 1 before returning
// And this eliminates need to check if input value is zero - it is a standard convention which Log2SoftwareFallback adheres to
return Numerics.Log2(value << 1);
#endif
}
/// <summary>
/// General method for flushing cached spectral data bytes to
/// the ouput stream respecting stuff bytes.
/// </summary>
/// <remarks>
/// Bytes cached via <see cref="Emit"/> are stored in 4-bytes blocks
/// which makes this method endianness dependent.
/// </remarks>
[MethodImpl(InliningOptions.ShortMethod)]
private void FlushToStream(int endIndex)
{
Span<byte> emitBytes = MemoryMarshal.AsBytes(this.emitBuffer.AsSpan());
int writeIdx = 0;
int startIndex = emitBytes.Length - 1;
// Some platforms may fail to eliminate this if-else branching
// Even if it happens - buffer is flushed in big packs,
// branching overhead shouldn't be noticeable
if (BitConverter.IsLittleEndian)
{
// For little endian case bytes are ordered and can be
// safely written to the stream with stuff bytes
// First byte is cached on the most significant index
// so we are going from the end of the array to its beginning:
// ... [ double word #1 ] [ double word #0 ]
// ... [idx3|idx2|idx1|idx0] [idx3|idx2|idx1|idx0]
for (int i = startIndex; i >= endIndex; i--)
{
byte value = emitBytes[i];
this.streamWriteBuffer[writeIdx++] = value;
// Inserting stuff byte
if (value == 0xff)
{
this.streamWriteBuffer[writeIdx++] = 0x00;
}
}
}
else
{
// For big endian case bytes are ordered in 4-byte packs
// which are ordered like bytes in the little endian case by in 4-byte packs:
// ... [ double word #1 ] [ double word #0 ]
// ... [idx0|idx1|idx2|idx3] [idx0|idx1|idx2|idx3]
// So we must write each 4-bytes in 'natural order'
for (int i = startIndex; i >= endIndex; i -= 4)
{
// This loop is caused by the nature of underlying byte buffer
// implementation and indeed causes performace by somewhat 5%
// compared to little endian scenario
// Even with this performance drop this cached buffer implementation
// is faster than individually writing bytes using binary shifts and binary and(s)
for (int j = i - 3; j <= i; j++)
{
byte value = emitBytes[j];
this.streamWriteBuffer[writeIdx++] = value;
// Inserting stuff byte
if (value == 0xff)
{
this.streamWriteBuffer[writeIdx++] = 0x00;
}
}
}
}
this.target.Write(this.streamWriteBuffer, 0, writeIdx);
}
/// <summary>
/// Flushes spectral data bytes after encoding all channel blocks
/// in a single jpeg macroblock using <see cref="WriteBlock"/>.
/// </summary>
/// <remarks>
/// This must be called only if <see cref="IsStreamFlushNeeded"/> is true
/// only during the macroblocks encoding routine.
/// </remarks>
private void FlushToStream()
{
this.FlushToStream(this.emitWriteIndex * 4);
this.emitWriteIndex = this.emitBuffer.Length;
}
/// <summary>
/// Flushes final cached bits to the stream padding 1's to
/// complement full bytes.
/// </summary>
/// <remarks>
/// This must be called only once at the end of the encoding routine.
/// <see cref="IsStreamFlushNeeded"/> check is not needed.
/// </remarks>
[MethodImpl(InliningOptions.ShortMethod)]
private void FlushRemainingBytes()
{
// Padding all 4 bytes with 1's while not corrupting initial bits stored in accumulatedBits
// And writing only valuable count of bytes count we want to write to the output stream
int valuableBytesCount = (int)Numerics.DivideCeil((uint)this.bitCount, 8);
uint packedBytes = this.accumulatedBits | (uint.MaxValue >> this.bitCount);
this.emitBuffer[--this.emitWriteIndex] = packedBytes;
// Flush cached bytes to the output stream with padding bits
this.FlushToStream((this.emitWriteIndex * 4) - 4 + valuableBytesCount);
}
}
}