diff --git a/src/ImageSharp/Formats/Tiff/Compression/Compressors/TiffLzwEncoder.cs b/src/ImageSharp/Formats/Tiff/Compression/Compressors/TiffLzwEncoder.cs
index db7d18a41a..93876c0719 100644
--- a/src/ImageSharp/Formats/Tiff/Compression/Compressors/TiffLzwEncoder.cs
+++ b/src/ImageSharp/Formats/Tiff/Compression/Compressors/TiffLzwEncoder.cs
@@ -254,7 +254,6 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Tiff.Utils
{
this.children.GetSpan().Fill(0);
this.siblings.GetSpan().Fill(0);
-
this.bitsPerCode = MinBits;
this.maxCode = MaxValue(this.bitsPerCode);
this.nextValidCode = EoiCode + 1;
diff --git a/src/ImageSharp/Formats/Tiff/Compression/Decompressors/LzwString.cs b/src/ImageSharp/Formats/Tiff/Compression/Decompressors/LzwString.cs
new file mode 100644
index 0000000000..ebe7319413
--- /dev/null
+++ b/src/ImageSharp/Formats/Tiff/Compression/Decompressors/LzwString.cs
@@ -0,0 +1,95 @@
+// Copyright (c) Six Labors.
+// Licensed under the Apache License, Version 2.0.
+
+using System;
+
+namespace SixLabors.ImageSharp.Formats.Experimental.Tiff.Compression.Decompressors
+{
+ ///
+ /// Represents a lzw string with a code word and a code length.
+ ///
+ public class LzwString
+ {
+ private static readonly LzwString Empty = new LzwString(0, 0, 0, null);
+
+ private readonly LzwString previous;
+ private readonly byte value;
+
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ /// The code word.
+ public LzwString(byte code)
+ : this(code, code, 1, null)
+ {
+ }
+
+ private LzwString(byte value, byte firstChar, int length, LzwString previous)
+ {
+ this.value = value;
+ this.FirstChar = firstChar;
+ this.Length = length;
+ this.previous = previous;
+ }
+
+ ///
+ /// Gets the code length;
+ ///
+ public int Length { get; }
+
+ ///
+ /// Gets the first character of the codeword.
+ ///
+ public byte FirstChar { get; }
+
+ ///
+ /// Concatenates two code words.
+ ///
+ /// The code word to concatenate.
+ /// A concatenated lzw string.
+ public LzwString Concatenate(byte other)
+ {
+ if (this == Empty)
+ {
+ return new LzwString(other);
+ }
+
+ return new LzwString(other, this.FirstChar, this.Length + 1, this);
+ }
+
+ ///
+ /// Writes decoded pixel to buffer at a given position.
+ ///
+ /// The buffer to write to.
+ /// The position to write to.
+ /// The number of bytes written.
+ public int WriteTo(Span buffer, int offset)
+ {
+ if (this.Length == 0)
+ {
+ return 0;
+ }
+
+ if (this.Length == 1)
+ {
+ buffer[offset] = this.value;
+ return 1;
+ }
+
+ LzwString e = this;
+ var endIdx = this.Length - 1;
+ if (endIdx >= buffer.Length)
+ {
+ TiffThrowHelper.ThrowImageFormatException("Error reading lzw compressed stream. Either pixel buffer to write to is to small or code length is invalid!");
+ }
+
+ for (int i = endIdx; i >= 0; i--)
+ {
+ buffer[offset + i] = e.value;
+ e = e.previous;
+ }
+
+ return this.Length;
+ }
+ }
+}
diff --git a/src/ImageSharp/Formats/Tiff/Compression/Decompressors/LzwTiffCompression.cs b/src/ImageSharp/Formats/Tiff/Compression/Decompressors/LzwTiffCompression.cs
index 98aecd1732..f0439fb7e4 100644
--- a/src/ImageSharp/Formats/Tiff/Compression/Decompressors/LzwTiffCompression.cs
+++ b/src/ImageSharp/Formats/Tiff/Compression/Decompressors/LzwTiffCompression.cs
@@ -30,8 +30,8 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Tiff.Compression.Decompresso
///
protected override void Decompress(BufferedReadStream stream, int byteCount, Span buffer)
{
- var decoder = new TiffLzwDecoder(stream, this.Allocator);
- decoder.DecodePixels(buffer.Length, 8, buffer);
+ var decoder = new TiffLzwDecoder(stream);
+ decoder.DecodePixels(buffer);
if (this.Predictor == TiffPredictor.Horizontal)
{
diff --git a/src/ImageSharp/Formats/Tiff/Compression/Decompressors/TiffLzwDecoder.cs b/src/ImageSharp/Formats/Tiff/Compression/Decompressors/TiffLzwDecoder.cs
index d8150bea77..2f7ff0ee36 100644
--- a/src/ImageSharp/Formats/Tiff/Compression/Decompressors/TiffLzwDecoder.cs
+++ b/src/ImageSharp/Formats/Tiff/Compression/Decompressors/TiffLzwDecoder.cs
@@ -2,193 +2,254 @@
// Licensed under the Apache License, Version 2.0.
using System;
-using System.Buffers;
using System.IO;
-using SixLabors.ImageSharp.Formats.Gif;
-using SixLabors.ImageSharp.Memory;
namespace SixLabors.ImageSharp.Formats.Experimental.Tiff.Compression.Decompressors
{
+ /*
+ This implementation is based on a port of a java tiff decoder by Harald Kuhr: https://github.com/haraldk/TwelveMonkeys
+
+ Original licence:
+
+ BSD 3-Clause License
+
+ * Copyright (c) 2015, Harald Kuhr
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ ** Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
///
- /// Decompresses and decodes data using the dynamic LZW algorithms.
+ /// Decompresses and decodes data using the dynamic LZW algorithms, see TIFF spec Section 13.
///
- ///
- /// This code is based on the used for GIF decoding. There is potential
- /// for a shared implementation. Differences between the GIF and TIFF implementations of the LZW
- /// encoding are: (i) The GIF implementation includes an initial 'data size' byte, whilst this is
- /// always 8 for TIFF. (ii) The GIF implementation writes a number of sub-blocks with an initial
- /// byte indicating the length of the sub-block. In TIFF the data is written as a single block
- /// with no length indicator (this can be determined from the 'StripByteCounts' entry).
- ///
internal sealed class TiffLzwDecoder
{
///
- /// The max decoder pixel stack size.
+ /// The stream to decode.
///
- private const int MaxStackSize = 4096;
+ private readonly Stream stream;
///
- /// The null code.
+ /// As soon as we use entry 4094 of the table (maxTableSize - 2), the lzw compressor write out a (12-bit) ClearCode.
+ /// At this point, the compressor reinitializes the string table and then writes out 9-bit codes again.
///
- private const int NullCode = -1;
+ private const int ClearCode = 256;
///
- /// The stream to decode.
+ /// End of Information.
///
- private readonly Stream stream;
+ private const int EoiCode = 257;
///
- /// The memory allocator.
+ /// Minimum code length of 9 bits.
///
- private readonly MemoryAllocator allocator;
+ private const int MinBits = 9;
+
+ ///
+ /// Maximum code length of 12 bits.
+ ///
+ private const int MaxBits = 12;
+
+ ///
+ /// Maximum table size of 4096.
+ ///
+ private const int TableSize = 1 << MaxBits;
+
+ private readonly LzwString[] table;
+
+ private int tableLength;
+ private int bitsPerCode;
+ private int oldCode = ClearCode;
+ private int maxCode;
+ private int bitMask;
+ private int maxString;
+ private bool eofReached;
+ private int nextData;
+ private int nextBits;
///
/// Initializes a new instance of the class
/// and sets the stream, where the compressed data should be read from.
///
/// The stream to read from.
- /// The memory allocator.
/// is null.
- public TiffLzwDecoder(Stream stream, MemoryAllocator allocator)
+ public TiffLzwDecoder(Stream stream)
{
Guard.NotNull(stream, nameof(stream));
this.stream = stream;
- this.allocator = allocator;
+ this.table = new LzwString[TableSize];
+ for (int i = 0; i < 256; i++)
+ {
+ this.table[i] = new LzwString((byte)i);
+ }
+
+ this.Init();
+ }
+
+ private void Init()
+ {
+ // Table length is 256 + 2, because of special clear code and end of information code.
+ this.tableLength = 258;
+ this.bitsPerCode = MinBits;
+ this.bitMask = BitmaskFor(this.bitsPerCode);
+ this.maxCode = this.MaxCode();
+ this.maxString = 1;
}
///
/// Decodes and decompresses all pixel indices from the stream.
///
- /// The length of the compressed data.
- /// Size of the data.
/// The pixel array to decode to.
- public void DecodePixels(int length, int dataSize, Span pixels)
+ public void DecodePixels(Span pixels)
{
- Guard.MustBeLessThan(dataSize, int.MaxValue, nameof(dataSize));
-
- // Initialize buffers
- using IMemoryOwner prefixMemory = this.allocator.Allocate(MaxStackSize, AllocationOptions.Clean);
- using IMemoryOwner suffixMemory = this.allocator.Allocate(MaxStackSize, AllocationOptions.Clean);
- using IMemoryOwner pixelStackMemory = this.allocator.Allocate(MaxStackSize + 1, AllocationOptions.Clean);
-
- Span prefix = prefixMemory.GetSpan();
- Span suffix = suffixMemory.GetSpan();
- Span pixelStack = pixelStackMemory.GetSpan();
-
- // Calculate the clear code. The value of the clear code is 2 ^ dataSize
- int clearCode = 1 << dataSize;
-
- int codeSize = dataSize + 1;
-
- // Calculate the end code
- int endCode = clearCode + 1;
-
- // Calculate the available code.
- int availableCode = clearCode + 2;
-
- // Jillzhangs Code see: http://giflib.codeplex.com/
- // Adapted from John Cristy's ImageMagick.
+ // Adapted from the pseudo-code example found in the TIFF 6.0 Specification, 1992.
+ // See Section 13: "LZW Compression"/"LZW Decoding", page 61+
int code;
- int oldCode = NullCode;
- int codeMask = (1 << codeSize) - 1;
-
- int inputByte = 0;
- int bits = 0;
-
- int top = 0;
- int xyz = 0;
-
- int first = 0;
+ int offset = 0;
- for (code = 0; code < clearCode; code++)
+ while ((code = this.GetNextCode()) != EoiCode)
{
- prefix[code] = 0;
- suffix[code] = (byte)code;
- }
-
- // Decoding process
- while (xyz < length)
- {
- if (top == 0)
+ if (code == ClearCode)
{
- // Get the next code
- int data = inputByte & ((1 << bits) - 1);
+ this.Init();
+ code = this.GetNextCode();
- while (bits < codeSize)
- {
- inputByte = this.stream.ReadByte();
- data = (data << 8) | inputByte;
- bits += 8;
- }
-
- data >>= bits - codeSize;
- bits -= codeSize;
- code = data & codeMask;
-
- // Interpret the code
- if (code > availableCode || code == endCode)
+ if (code == EoiCode)
{
break;
}
- if (code == clearCode)
+ if (this.table[code] == null)
{
- // Reset the decoder
- codeSize = dataSize + 1;
- codeMask = (1 << codeSize) - 1;
- availableCode = clearCode + 2;
- oldCode = NullCode;
- continue;
+ TiffThrowHelper.ThrowImageFormatException($"Corrupted TIFF LZW: code {code} (table size: {this.tableLength})");
}
- if (oldCode == NullCode)
+ offset += this.table[code].WriteTo(pixels, offset);
+ }
+ else
+ {
+ if (this.table[this.oldCode] == null)
{
- pixelStack[top++] = suffix[code];
- oldCode = code;
- first = code;
- continue;
+ TiffThrowHelper.ThrowImageFormatException($"Corrupted TIFF LZW: code {this.oldCode} (table size: {this.tableLength})");
}
- int inCode = code;
- if (code == availableCode)
+ if (this.IsInTable(code))
{
- pixelStack[top++] = (byte)first;
+ offset += this.table[code].WriteTo(pixels, offset);
- code = oldCode;
+ this.AddStringToTable(this.table[this.oldCode].Concatenate(this.table[code].FirstChar));
}
-
- while (code > clearCode)
+ else
{
- pixelStack[top++] = suffix[code];
- code = prefix[code];
+ LzwString outString = this.table[this.oldCode].Concatenate(this.table[this.oldCode].FirstChar);
+
+ offset += outString.WriteTo(pixels, offset);
+ this.AddStringToTable(outString);
}
+ }
- first = suffix[code];
+ this.oldCode = code;
- pixelStack[top++] = suffix[code];
+ if (offset >= pixels.Length)
+ {
+ break;
+ }
+ }
+ }
- if (availableCode < MaxStackSize)
- {
- prefix[availableCode] = oldCode;
- suffix[availableCode] = first;
- availableCode++;
- if (availableCode > codeMask - 1 && availableCode < MaxStackSize)
- {
- codeSize++;
- codeMask = (1 << codeSize) - 1;
- }
- }
+ private void AddStringToTable(LzwString lzwString)
+ {
+ if (this.tableLength > this.table.Length)
+ {
+ TiffThrowHelper.ThrowImageFormatException($"TIFF LZW with more than {MaxBits} bits per code encountered (table overflow)");
+ }
+
+ this.table[this.tableLength++] = lzwString;
+
+ if (this.tableLength > this.maxCode)
+ {
+ this.bitsPerCode++;
- oldCode = inCode;
+ if (this.bitsPerCode > MaxBits)
+ {
+ // Continue reading MaxBits (12 bit) length codes.
+ this.bitsPerCode = MaxBits;
}
- // Pop a pixel off the pixel stack.
- top--;
+ this.bitMask = BitmaskFor(this.bitsPerCode);
+ this.maxCode = this.MaxCode();
+ }
+
+ if (lzwString.Length > this.maxString)
+ {
+ this.maxString = lzwString.Length;
+ }
+ }
+
+ private int GetNextCode()
+ {
+ if (this.eofReached)
+ {
+ return EoiCode;
+ }
- // Clear missing pixels
- pixels[xyz++] = (byte)pixelStack[top];
+ int read = this.stream.ReadByte();
+ if (read < 0)
+ {
+ this.eofReached = true;
+ return EoiCode;
}
+
+ this.nextData = (this.nextData << 8) | read;
+ this.nextBits += 8;
+
+ if (this.nextBits < this.bitsPerCode)
+ {
+ read = this.stream.ReadByte();
+ if (read < 0)
+ {
+ this.eofReached = true;
+ return EoiCode;
+ }
+
+ this.nextData = (this.nextData << 8) | read;
+ this.nextBits += 8;
+ }
+
+ var code = (this.nextData >> (this.nextBits - this.bitsPerCode)) & this.bitMask;
+ this.nextBits -= this.bitsPerCode;
+
+ return code;
}
+
+ private bool IsInTable(int code) => code < this.tableLength;
+
+ private int MaxCode() => this.bitMask - 1;
+
+ private static int BitmaskFor(int bits) => (1 << bits) - 1;
}
}
diff --git a/tests/ImageSharp.Tests/Formats/Tiff/Compression/LzwTiffCompressionTests.cs b/tests/ImageSharp.Tests/Formats/Tiff/Compression/LzwTiffCompressionTests.cs
index 94835962da..410ead84d2 100644
--- a/tests/ImageSharp.Tests/Formats/Tiff/Compression/LzwTiffCompressionTests.cs
+++ b/tests/ImageSharp.Tests/Formats/Tiff/Compression/LzwTiffCompressionTests.cs
@@ -30,6 +30,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Tiff.Compression
[Theory]
[InlineData(new byte[] { })]
[InlineData(new byte[] { 42 })] // One byte
+ [InlineData(new byte[] { 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 })]
[InlineData(new byte[] { 42, 16, 128, 53, 96, 218, 7, 64, 3, 4, 97 })] // Random bytes
[InlineData(new byte[] { 1, 2, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 3, 4 })] // Repeated bytes
[InlineData(new byte[] { 1, 2, 42, 53, 42, 53, 42, 53, 42, 53, 42, 53, 3, 4 })] // Repeated sequence