diff --git a/src/ImageSharp/Formats/Tiff/Compression/Compressors/TiffLzwEncoder.cs b/src/ImageSharp/Formats/Tiff/Compression/Compressors/TiffLzwEncoder.cs index db7d18a41a..93876c0719 100644 --- a/src/ImageSharp/Formats/Tiff/Compression/Compressors/TiffLzwEncoder.cs +++ b/src/ImageSharp/Formats/Tiff/Compression/Compressors/TiffLzwEncoder.cs @@ -254,7 +254,6 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Tiff.Utils { this.children.GetSpan().Fill(0); this.siblings.GetSpan().Fill(0); - this.bitsPerCode = MinBits; this.maxCode = MaxValue(this.bitsPerCode); this.nextValidCode = EoiCode + 1; diff --git a/src/ImageSharp/Formats/Tiff/Compression/Decompressors/LzwString.cs b/src/ImageSharp/Formats/Tiff/Compression/Decompressors/LzwString.cs new file mode 100644 index 0000000000..ebe7319413 --- /dev/null +++ b/src/ImageSharp/Formats/Tiff/Compression/Decompressors/LzwString.cs @@ -0,0 +1,95 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; + +namespace SixLabors.ImageSharp.Formats.Experimental.Tiff.Compression.Decompressors +{ + /// + /// Represents a lzw string with a code word and a code length. + /// + public class LzwString + { + private static readonly LzwString Empty = new LzwString(0, 0, 0, null); + + private readonly LzwString previous; + private readonly byte value; + + /// + /// Initializes a new instance of the class. + /// + /// The code word. + public LzwString(byte code) + : this(code, code, 1, null) + { + } + + private LzwString(byte value, byte firstChar, int length, LzwString previous) + { + this.value = value; + this.FirstChar = firstChar; + this.Length = length; + this.previous = previous; + } + + /// + /// Gets the code length; + /// + public int Length { get; } + + /// + /// Gets the first character of the codeword. + /// + public byte FirstChar { get; } + + /// + /// Concatenates two code words. + /// + /// The code word to concatenate. + /// A concatenated lzw string. + public LzwString Concatenate(byte other) + { + if (this == Empty) + { + return new LzwString(other); + } + + return new LzwString(other, this.FirstChar, this.Length + 1, this); + } + + /// + /// Writes decoded pixel to buffer at a given position. + /// + /// The buffer to write to. + /// The position to write to. + /// The number of bytes written. + public int WriteTo(Span buffer, int offset) + { + if (this.Length == 0) + { + return 0; + } + + if (this.Length == 1) + { + buffer[offset] = this.value; + return 1; + } + + LzwString e = this; + var endIdx = this.Length - 1; + if (endIdx >= buffer.Length) + { + TiffThrowHelper.ThrowImageFormatException("Error reading lzw compressed stream. Either pixel buffer to write to is to small or code length is invalid!"); + } + + for (int i = endIdx; i >= 0; i--) + { + buffer[offset + i] = e.value; + e = e.previous; + } + + return this.Length; + } + } +} diff --git a/src/ImageSharp/Formats/Tiff/Compression/Decompressors/LzwTiffCompression.cs b/src/ImageSharp/Formats/Tiff/Compression/Decompressors/LzwTiffCompression.cs index 98aecd1732..f0439fb7e4 100644 --- a/src/ImageSharp/Formats/Tiff/Compression/Decompressors/LzwTiffCompression.cs +++ b/src/ImageSharp/Formats/Tiff/Compression/Decompressors/LzwTiffCompression.cs @@ -30,8 +30,8 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Tiff.Compression.Decompresso /// protected override void Decompress(BufferedReadStream stream, int byteCount, Span buffer) { - var decoder = new TiffLzwDecoder(stream, this.Allocator); - decoder.DecodePixels(buffer.Length, 8, buffer); + var decoder = new TiffLzwDecoder(stream); + decoder.DecodePixels(buffer); if (this.Predictor == TiffPredictor.Horizontal) { diff --git a/src/ImageSharp/Formats/Tiff/Compression/Decompressors/TiffLzwDecoder.cs b/src/ImageSharp/Formats/Tiff/Compression/Decompressors/TiffLzwDecoder.cs index d8150bea77..2f7ff0ee36 100644 --- a/src/ImageSharp/Formats/Tiff/Compression/Decompressors/TiffLzwDecoder.cs +++ b/src/ImageSharp/Formats/Tiff/Compression/Decompressors/TiffLzwDecoder.cs @@ -2,193 +2,254 @@ // Licensed under the Apache License, Version 2.0. using System; -using System.Buffers; using System.IO; -using SixLabors.ImageSharp.Formats.Gif; -using SixLabors.ImageSharp.Memory; namespace SixLabors.ImageSharp.Formats.Experimental.Tiff.Compression.Decompressors { + /* + This implementation is based on a port of a java tiff decoder by Harald Kuhr: https://github.com/haraldk/TwelveMonkeys + + Original licence: + + BSD 3-Clause License + + * Copyright (c) 2015, Harald Kuhr + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + ** Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED.IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + /// - /// Decompresses and decodes data using the dynamic LZW algorithms. + /// Decompresses and decodes data using the dynamic LZW algorithms, see TIFF spec Section 13. /// - /// - /// This code is based on the used for GIF decoding. There is potential - /// for a shared implementation. Differences between the GIF and TIFF implementations of the LZW - /// encoding are: (i) The GIF implementation includes an initial 'data size' byte, whilst this is - /// always 8 for TIFF. (ii) The GIF implementation writes a number of sub-blocks with an initial - /// byte indicating the length of the sub-block. In TIFF the data is written as a single block - /// with no length indicator (this can be determined from the 'StripByteCounts' entry). - /// internal sealed class TiffLzwDecoder { /// - /// The max decoder pixel stack size. + /// The stream to decode. /// - private const int MaxStackSize = 4096; + private readonly Stream stream; /// - /// The null code. + /// As soon as we use entry 4094 of the table (maxTableSize - 2), the lzw compressor write out a (12-bit) ClearCode. + /// At this point, the compressor reinitializes the string table and then writes out 9-bit codes again. /// - private const int NullCode = -1; + private const int ClearCode = 256; /// - /// The stream to decode. + /// End of Information. /// - private readonly Stream stream; + private const int EoiCode = 257; /// - /// The memory allocator. + /// Minimum code length of 9 bits. /// - private readonly MemoryAllocator allocator; + private const int MinBits = 9; + + /// + /// Maximum code length of 12 bits. + /// + private const int MaxBits = 12; + + /// + /// Maximum table size of 4096. + /// + private const int TableSize = 1 << MaxBits; + + private readonly LzwString[] table; + + private int tableLength; + private int bitsPerCode; + private int oldCode = ClearCode; + private int maxCode; + private int bitMask; + private int maxString; + private bool eofReached; + private int nextData; + private int nextBits; /// /// Initializes a new instance of the class /// and sets the stream, where the compressed data should be read from. /// /// The stream to read from. - /// The memory allocator. /// is null. - public TiffLzwDecoder(Stream stream, MemoryAllocator allocator) + public TiffLzwDecoder(Stream stream) { Guard.NotNull(stream, nameof(stream)); this.stream = stream; - this.allocator = allocator; + this.table = new LzwString[TableSize]; + for (int i = 0; i < 256; i++) + { + this.table[i] = new LzwString((byte)i); + } + + this.Init(); + } + + private void Init() + { + // Table length is 256 + 2, because of special clear code and end of information code. + this.tableLength = 258; + this.bitsPerCode = MinBits; + this.bitMask = BitmaskFor(this.bitsPerCode); + this.maxCode = this.MaxCode(); + this.maxString = 1; } /// /// Decodes and decompresses all pixel indices from the stream. /// - /// The length of the compressed data. - /// Size of the data. /// The pixel array to decode to. - public void DecodePixels(int length, int dataSize, Span pixels) + public void DecodePixels(Span pixels) { - Guard.MustBeLessThan(dataSize, int.MaxValue, nameof(dataSize)); - - // Initialize buffers - using IMemoryOwner prefixMemory = this.allocator.Allocate(MaxStackSize, AllocationOptions.Clean); - using IMemoryOwner suffixMemory = this.allocator.Allocate(MaxStackSize, AllocationOptions.Clean); - using IMemoryOwner pixelStackMemory = this.allocator.Allocate(MaxStackSize + 1, AllocationOptions.Clean); - - Span prefix = prefixMemory.GetSpan(); - Span suffix = suffixMemory.GetSpan(); - Span pixelStack = pixelStackMemory.GetSpan(); - - // Calculate the clear code. The value of the clear code is 2 ^ dataSize - int clearCode = 1 << dataSize; - - int codeSize = dataSize + 1; - - // Calculate the end code - int endCode = clearCode + 1; - - // Calculate the available code. - int availableCode = clearCode + 2; - - // Jillzhangs Code see: http://giflib.codeplex.com/ - // Adapted from John Cristy's ImageMagick. + // Adapted from the pseudo-code example found in the TIFF 6.0 Specification, 1992. + // See Section 13: "LZW Compression"/"LZW Decoding", page 61+ int code; - int oldCode = NullCode; - int codeMask = (1 << codeSize) - 1; - - int inputByte = 0; - int bits = 0; - - int top = 0; - int xyz = 0; - - int first = 0; + int offset = 0; - for (code = 0; code < clearCode; code++) + while ((code = this.GetNextCode()) != EoiCode) { - prefix[code] = 0; - suffix[code] = (byte)code; - } - - // Decoding process - while (xyz < length) - { - if (top == 0) + if (code == ClearCode) { - // Get the next code - int data = inputByte & ((1 << bits) - 1); + this.Init(); + code = this.GetNextCode(); - while (bits < codeSize) - { - inputByte = this.stream.ReadByte(); - data = (data << 8) | inputByte; - bits += 8; - } - - data >>= bits - codeSize; - bits -= codeSize; - code = data & codeMask; - - // Interpret the code - if (code > availableCode || code == endCode) + if (code == EoiCode) { break; } - if (code == clearCode) + if (this.table[code] == null) { - // Reset the decoder - codeSize = dataSize + 1; - codeMask = (1 << codeSize) - 1; - availableCode = clearCode + 2; - oldCode = NullCode; - continue; + TiffThrowHelper.ThrowImageFormatException($"Corrupted TIFF LZW: code {code} (table size: {this.tableLength})"); } - if (oldCode == NullCode) + offset += this.table[code].WriteTo(pixels, offset); + } + else + { + if (this.table[this.oldCode] == null) { - pixelStack[top++] = suffix[code]; - oldCode = code; - first = code; - continue; + TiffThrowHelper.ThrowImageFormatException($"Corrupted TIFF LZW: code {this.oldCode} (table size: {this.tableLength})"); } - int inCode = code; - if (code == availableCode) + if (this.IsInTable(code)) { - pixelStack[top++] = (byte)first; + offset += this.table[code].WriteTo(pixels, offset); - code = oldCode; + this.AddStringToTable(this.table[this.oldCode].Concatenate(this.table[code].FirstChar)); } - - while (code > clearCode) + else { - pixelStack[top++] = suffix[code]; - code = prefix[code]; + LzwString outString = this.table[this.oldCode].Concatenate(this.table[this.oldCode].FirstChar); + + offset += outString.WriteTo(pixels, offset); + this.AddStringToTable(outString); } + } - first = suffix[code]; + this.oldCode = code; - pixelStack[top++] = suffix[code]; + if (offset >= pixels.Length) + { + break; + } + } + } - if (availableCode < MaxStackSize) - { - prefix[availableCode] = oldCode; - suffix[availableCode] = first; - availableCode++; - if (availableCode > codeMask - 1 && availableCode < MaxStackSize) - { - codeSize++; - codeMask = (1 << codeSize) - 1; - } - } + private void AddStringToTable(LzwString lzwString) + { + if (this.tableLength > this.table.Length) + { + TiffThrowHelper.ThrowImageFormatException($"TIFF LZW with more than {MaxBits} bits per code encountered (table overflow)"); + } + + this.table[this.tableLength++] = lzwString; + + if (this.tableLength > this.maxCode) + { + this.bitsPerCode++; - oldCode = inCode; + if (this.bitsPerCode > MaxBits) + { + // Continue reading MaxBits (12 bit) length codes. + this.bitsPerCode = MaxBits; } - // Pop a pixel off the pixel stack. - top--; + this.bitMask = BitmaskFor(this.bitsPerCode); + this.maxCode = this.MaxCode(); + } + + if (lzwString.Length > this.maxString) + { + this.maxString = lzwString.Length; + } + } + + private int GetNextCode() + { + if (this.eofReached) + { + return EoiCode; + } - // Clear missing pixels - pixels[xyz++] = (byte)pixelStack[top]; + int read = this.stream.ReadByte(); + if (read < 0) + { + this.eofReached = true; + return EoiCode; } + + this.nextData = (this.nextData << 8) | read; + this.nextBits += 8; + + if (this.nextBits < this.bitsPerCode) + { + read = this.stream.ReadByte(); + if (read < 0) + { + this.eofReached = true; + return EoiCode; + } + + this.nextData = (this.nextData << 8) | read; + this.nextBits += 8; + } + + var code = (this.nextData >> (this.nextBits - this.bitsPerCode)) & this.bitMask; + this.nextBits -= this.bitsPerCode; + + return code; } + + private bool IsInTable(int code) => code < this.tableLength; + + private int MaxCode() => this.bitMask - 1; + + private static int BitmaskFor(int bits) => (1 << bits) - 1; } } diff --git a/tests/ImageSharp.Tests/Formats/Tiff/Compression/LzwTiffCompressionTests.cs b/tests/ImageSharp.Tests/Formats/Tiff/Compression/LzwTiffCompressionTests.cs index 94835962da..410ead84d2 100644 --- a/tests/ImageSharp.Tests/Formats/Tiff/Compression/LzwTiffCompressionTests.cs +++ b/tests/ImageSharp.Tests/Formats/Tiff/Compression/LzwTiffCompressionTests.cs @@ -30,6 +30,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Tiff.Compression [Theory] [InlineData(new byte[] { })] [InlineData(new byte[] { 42 })] // One byte + [InlineData(new byte[] { 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 })] [InlineData(new byte[] { 42, 16, 128, 53, 96, 218, 7, 64, 3, 4, 97 })] // Random bytes [InlineData(new byte[] { 1, 2, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 3, 4 })] // Repeated bytes [InlineData(new byte[] { 1, 2, 42, 53, 42, 53, 42, 53, 42, 53, 42, 53, 3, 4 })] // Repeated sequence