mirror of https://github.com/SixLabors/ImageSharp
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1495 lines
60 KiB
1495 lines
60 KiB
// Copyright (c) Six Labors.
|
|
// Licensed under the Six Labors Split License.
|
|
|
|
using System.Numerics;
|
|
using System.Runtime.CompilerServices;
|
|
using System.Runtime.InteropServices;
|
|
using System.Runtime.Intrinsics;
|
|
using System.Runtime.Intrinsics.X86;
|
|
using SixLabors.ImageSharp.Common.Helpers;
|
|
using SixLabors.ImageSharp.Memory;
|
|
|
|
namespace SixLabors.ImageSharp.Formats.Webp.Lossless;
|
|
|
|
/// <summary>
|
|
/// Utility functions for the lossless decoder.
|
|
/// </summary>
|
|
internal static unsafe class LosslessUtils
|
|
{
|
|
private const int PrefixLookupIdxMax = 512;
|
|
|
|
private const int LogLookupIdxMax = 256;
|
|
|
|
private const int ApproxLogMax = 4096;
|
|
|
|
private const int ApproxLogWithCorrectionMax = 65536;
|
|
|
|
private const double Log2Reciprocal = 1.44269504088896338700465094007086;
|
|
|
|
/// <summary>
|
|
/// Returns the exact index where array1 and array2 are different. For an index
|
|
/// inferior or equal to bestLenMatch, the return value just has to be strictly
|
|
/// inferior to bestLenMatch match. The current behavior is to return 0 if this index
|
|
/// is bestLenMatch, and the index itself otherwise.
|
|
/// If no two elements are the same, it returns maxLimit.
|
|
/// </summary>
|
|
public static int FindMatchLength(ReadOnlySpan<uint> array1, ReadOnlySpan<uint> array2, int bestLenMatch, int maxLimit)
|
|
{
|
|
// Before 'expensive' linear match, check if the two arrays match at the
|
|
// current best length index.
|
|
if (array1[bestLenMatch] != array2[bestLenMatch])
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
return VectorMismatch(array1, array2, maxLimit);
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static int VectorMismatch(ReadOnlySpan<uint> array1, ReadOnlySpan<uint> array2, int length)
|
|
{
|
|
int matchLen = 0;
|
|
ref uint array1Ref = ref MemoryMarshal.GetReference(array1);
|
|
ref uint array2Ref = ref MemoryMarshal.GetReference(array2);
|
|
|
|
while (matchLen < length && Unsafe.Add(ref array1Ref, (uint)matchLen) == Unsafe.Add(ref array2Ref, (uint)matchLen))
|
|
{
|
|
matchLen++;
|
|
}
|
|
|
|
return matchLen;
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static int MaxFindCopyLength(int len) => len < BackwardReferenceEncoder.MaxLength ? len : BackwardReferenceEncoder.MaxLength;
|
|
|
|
public static int PrefixEncodeBits(int distance, ref int extraBits)
|
|
{
|
|
if (distance < PrefixLookupIdxMax)
|
|
{
|
|
(int code, int bits) = WebpLookupTables.PrefixEncodeCode[distance];
|
|
extraBits = bits;
|
|
return code;
|
|
}
|
|
|
|
return PrefixEncodeBitsNoLut(distance, ref extraBits);
|
|
}
|
|
|
|
public static int PrefixEncode(int distance, ref int extraBits, ref int extraBitsValue)
|
|
{
|
|
if (distance < PrefixLookupIdxMax)
|
|
{
|
|
(int code, int bits) = WebpLookupTables.PrefixEncodeCode[distance];
|
|
extraBits = bits;
|
|
extraBitsValue = WebpLookupTables.PrefixEncodeExtraBitsValue[distance];
|
|
|
|
return code;
|
|
}
|
|
|
|
return PrefixEncodeNoLut(distance, ref extraBits, ref extraBitsValue);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Add green to blue and red channels (i.e. perform the inverse transform of 'subtract green').
|
|
/// </summary>
|
|
/// <param name="pixelData">The pixel data to apply the transformation.</param>
|
|
public static void AddGreenToBlueAndRed(Span<uint> pixelData)
|
|
{
|
|
if (Vector256.IsHardwareAccelerated && pixelData.Length >= 8)
|
|
{
|
|
// The `255` values disable the write for alpha (A), since 0x80 is set in the control byte (high bit set).
|
|
// Each byte index is within its respective 128-bit lane (0�15 and 16�31), so this is safe for per-lane shuffle.
|
|
// The high bits are not set for the index bytes, and the values are always < 16 per lane, satisfying AVX2 lane rules.
|
|
Vector256<byte> addGreenToBlueAndRedMask = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255);
|
|
nuint numPixels = (uint)pixelData.Length;
|
|
nuint i = 0;
|
|
do
|
|
{
|
|
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
|
|
Vector256<byte> input = Unsafe.As<uint, Vector256<uint>>(ref pos).AsByte();
|
|
Vector256<byte> in0g0g = Vector256_.ShufflePerLane(input, addGreenToBlueAndRedMask);
|
|
Vector256<byte> output = input + in0g0g;
|
|
Unsafe.As<uint, Vector256<uint>>(ref pos) = output.AsUInt32();
|
|
i += 8;
|
|
}
|
|
while (i <= numPixels - 8);
|
|
|
|
if (i != numPixels)
|
|
{
|
|
AddGreenToBlueAndRedScalar(pixelData[(int)i..]);
|
|
}
|
|
}
|
|
else if (Vector128.IsHardwareAccelerated && pixelData.Length >= 4)
|
|
{
|
|
Vector128<byte> addGreenToBlueAndRedMask = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255);
|
|
nuint numPixels = (uint)pixelData.Length;
|
|
nuint i = 0;
|
|
do
|
|
{
|
|
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
|
|
Vector128<byte> input = Unsafe.As<uint, Vector128<uint>>(ref pos).AsByte();
|
|
Vector128<byte> in0g0g = Vector128_.ShuffleNative(input, addGreenToBlueAndRedMask);
|
|
Vector128<byte> output = input + in0g0g;
|
|
Unsafe.As<uint, Vector128<uint>>(ref pos) = output.AsUInt32();
|
|
i += 4;
|
|
}
|
|
while (i <= numPixels - 4);
|
|
|
|
if (i != numPixels)
|
|
{
|
|
AddGreenToBlueAndRedScalar(pixelData[(int)i..]);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
AddGreenToBlueAndRedScalar(pixelData);
|
|
}
|
|
}
|
|
|
|
private static void AddGreenToBlueAndRedScalar(Span<uint> pixelData)
|
|
{
|
|
int numPixels = pixelData.Length;
|
|
for (int i = 0; i < numPixels; i++)
|
|
{
|
|
uint argb = pixelData[i];
|
|
uint green = (argb >> 8) & 0xff;
|
|
uint redBlue = argb & 0x00ff00ffu;
|
|
redBlue += (green << 16) | green;
|
|
redBlue &= 0x00ff00ffu;
|
|
pixelData[i] = (argb & 0xff00ff00u) | redBlue;
|
|
}
|
|
}
|
|
|
|
public static void SubtractGreenFromBlueAndRed(Span<uint> pixelData)
|
|
{
|
|
if (Vector256.IsHardwareAccelerated && pixelData.Length >= 8)
|
|
{
|
|
Vector256<byte> subtractGreenFromBlueAndRedMask = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255);
|
|
nuint numPixels = (uint)pixelData.Length;
|
|
nuint i = 0;
|
|
do
|
|
{
|
|
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
|
|
Vector256<byte> input = Unsafe.As<uint, Vector256<uint>>(ref pos).AsByte();
|
|
Vector256<byte> in0g0g = Vector256_.ShufflePerLane(input, subtractGreenFromBlueAndRedMask);
|
|
Vector256<byte> output = input - in0g0g;
|
|
Unsafe.As<uint, Vector256<uint>>(ref pos) = output.AsUInt32();
|
|
i += 8;
|
|
}
|
|
while (i <= numPixels - 8);
|
|
|
|
if (i != numPixels)
|
|
{
|
|
SubtractGreenFromBlueAndRedScalar(pixelData[(int)i..]);
|
|
}
|
|
}
|
|
else if (Vector128.IsHardwareAccelerated && pixelData.Length >= 4)
|
|
{
|
|
Vector128<byte> subtractGreenFromBlueAndRedMask = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255);
|
|
nuint numPixels = (uint)pixelData.Length;
|
|
nuint i = 0;
|
|
do
|
|
{
|
|
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
|
|
Vector128<byte> input = Unsafe.As<uint, Vector128<uint>>(ref pos).AsByte();
|
|
Vector128<byte> in0g0g = Vector128_.ShuffleNative(input, subtractGreenFromBlueAndRedMask);
|
|
Vector128<byte> output = input - in0g0g;
|
|
Unsafe.As<uint, Vector128<uint>>(ref pos) = output.AsUInt32();
|
|
i += 4;
|
|
}
|
|
while (i <= numPixels - 4);
|
|
|
|
if (i != numPixels)
|
|
{
|
|
SubtractGreenFromBlueAndRedScalar(pixelData[(int)i..]);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
SubtractGreenFromBlueAndRedScalar(pixelData);
|
|
}
|
|
}
|
|
|
|
private static void SubtractGreenFromBlueAndRedScalar(Span<uint> pixelData)
|
|
{
|
|
int numPixels = pixelData.Length;
|
|
for (int i = 0; i < numPixels; i++)
|
|
{
|
|
uint argb = pixelData[i];
|
|
uint green = (argb >> 8) & 0xff;
|
|
uint newR = (((argb >> 16) & 0xff) - green) & 0xff;
|
|
uint newB = (((argb >> 0) & 0xff) - green) & 0xff;
|
|
pixelData[i] = (argb & 0xff00ff00u) | (newR << 16) | newB;
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// If there are not many unique pixel values, it is more efficient to create a color index array and replace the pixel values by the array's indices.
|
|
/// This will reverse the color index transform.
|
|
/// </summary>
|
|
/// <param name="transform">The transform data contains color table size and the entries in the color table.</param>
|
|
/// <param name="pixelData">The pixel data to apply the reverse transform on.</param>
|
|
/// <param name="outputSpan">The resulting pixel data with the reversed transformation data.</param>
|
|
public static void ColorIndexInverseTransform(
|
|
Vp8LTransform transform,
|
|
Span<uint> pixelData,
|
|
Span<uint> outputSpan)
|
|
{
|
|
int bitsPerPixel = 8 >> transform.Bits;
|
|
int width = transform.XSize;
|
|
int height = transform.YSize;
|
|
Span<uint> colorMap = transform.Data.GetSpan();
|
|
int decodedPixels = 0;
|
|
if (bitsPerPixel < 8)
|
|
{
|
|
int pixelsPerByte = 1 << transform.Bits;
|
|
int countMask = pixelsPerByte - 1;
|
|
int bitMask = (1 << bitsPerPixel) - 1;
|
|
|
|
int pixelDataPos = 0;
|
|
for (int y = 0; y < height; y++)
|
|
{
|
|
uint packedPixels = 0;
|
|
for (int x = 0; x < width; x++)
|
|
{
|
|
// We need to load fresh 'packed_pixels' once every
|
|
// 'pixelsPerByte' increments of x. Fortunately, pixelsPerByte
|
|
// is a power of 2, so we can just use a mask for that, instead of
|
|
// decrementing a counter.
|
|
if ((x & countMask) == 0)
|
|
{
|
|
packedPixels = GetArgbIndex(pixelData[pixelDataPos++]);
|
|
}
|
|
|
|
outputSpan[decodedPixels++] = colorMap[(int)(packedPixels & bitMask)];
|
|
packedPixels >>= bitsPerPixel;
|
|
}
|
|
}
|
|
|
|
outputSpan.CopyTo(pixelData);
|
|
}
|
|
else
|
|
{
|
|
for (int y = 0; y < height; y++)
|
|
{
|
|
for (int x = 0; x < width; x++)
|
|
{
|
|
uint colorMapIndex = GetArgbIndex(pixelData[decodedPixels]);
|
|
pixelData[decodedPixels] = colorMap[(int)colorMapIndex];
|
|
decodedPixels++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// The goal of the color transform is to de-correlate the R, G and B values of each pixel.
|
|
/// Color transform keeps the green (G) value as it is, transforms red (R) based on green and transforms blue (B) based on green and then based on red.
|
|
/// </summary>
|
|
/// <param name="transform">The transform data.</param>
|
|
/// <param name="pixelData">The pixel data to apply the inverse transform on.</param>
|
|
public static void ColorSpaceInverseTransform(Vp8LTransform transform, Span<uint> pixelData)
|
|
{
|
|
int width = transform.XSize;
|
|
int yEnd = transform.YSize;
|
|
int tileWidth = 1 << transform.Bits;
|
|
int mask = tileWidth - 1;
|
|
int safeWidth = width & ~mask;
|
|
int remainingWidth = width - safeWidth;
|
|
int tilesPerRow = SubSampleSize(width, transform.Bits);
|
|
int y = 0;
|
|
int predRowIdxStart = (y >> transform.Bits) * tilesPerRow;
|
|
Span<uint> transformData = transform.Data.GetSpan();
|
|
|
|
int pixelPos = 0;
|
|
while (y < yEnd)
|
|
{
|
|
int predRowIdx = predRowIdxStart;
|
|
Vp8LMultipliers m = default;
|
|
int srcSafeEnd = pixelPos + safeWidth;
|
|
int srcEnd = pixelPos + width;
|
|
while (pixelPos < srcSafeEnd)
|
|
{
|
|
uint colorCode = transformData[predRowIdx++];
|
|
ColorCodeToMultipliers(colorCode, ref m);
|
|
TransformColorInverse(m, pixelData.Slice(pixelPos, tileWidth));
|
|
pixelPos += tileWidth;
|
|
}
|
|
|
|
if (pixelPos < srcEnd)
|
|
{
|
|
uint colorCode = transformData[predRowIdx];
|
|
ColorCodeToMultipliers(colorCode, ref m);
|
|
TransformColorInverse(m, pixelData.Slice(pixelPos, remainingWidth));
|
|
pixelPos += remainingWidth;
|
|
}
|
|
|
|
y++;
|
|
if ((y & mask) == 0)
|
|
{
|
|
predRowIdxStart += tilesPerRow;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Color transform keeps the green (G) value as it is, transforms red (R) based on green and transforms blue (B) based on green and then based on red.
|
|
/// </summary>
|
|
/// <param name="m">The Vp8LMultipliers.</param>
|
|
/// <param name="pixelData">The pixel data to transform.</param>
|
|
/// <param name="numPixels">The number of pixels to process.</param>
|
|
public static void TransformColor(Vp8LMultipliers m, Span<uint> pixelData, int numPixels)
|
|
{
|
|
if (Avx2.IsSupported && numPixels >= 8)
|
|
{
|
|
Vector256<byte> transformColorAlphaGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
|
|
Vector256<byte> transformColorRedBlueMask256 = Vector256.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
|
|
Vector256<int> multsrb = MkCst32(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue));
|
|
Vector256<int> multsb2 = MkCst32(Cst5b(m.RedToBlue), 0);
|
|
|
|
nuint idx = 0;
|
|
do
|
|
{
|
|
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx);
|
|
Vector256<uint> input = Unsafe.As<uint, Vector256<uint>>(ref pos);
|
|
Vector256<byte> a = Avx2.And(input.AsByte(), transformColorAlphaGreenMask256);
|
|
Vector256<short> b = Avx2.ShuffleLow(a.AsInt16(), SimdUtils.Shuffle.MMShuffle2200);
|
|
Vector256<short> c = Avx2.ShuffleHigh(b.AsInt16(), SimdUtils.Shuffle.MMShuffle2200);
|
|
Vector256<short> d = Avx2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
|
|
Vector256<short> e = Avx2.ShiftLeftLogical(input.AsInt16(), 8);
|
|
Vector256<short> f = Avx2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16());
|
|
Vector256<int> g = Avx2.ShiftRightLogical(f.AsInt32(), 16);
|
|
Vector256<byte> h = Avx2.Add(g.AsByte(), d.AsByte());
|
|
Vector256<byte> i = Avx2.And(h, transformColorRedBlueMask256);
|
|
Vector256<byte> output = Avx2.Subtract(input.AsByte(), i);
|
|
Unsafe.As<uint, Vector256<uint>>(ref pos) = output.AsUInt32();
|
|
idx += 8;
|
|
}
|
|
while (idx <= (uint)numPixels - 8);
|
|
|
|
if (idx != (uint)numPixels)
|
|
{
|
|
TransformColorScalar(m, pixelData[(int)idx..], numPixels - (int)idx);
|
|
}
|
|
}
|
|
else if (Vector128.IsHardwareAccelerated && numPixels >= 4)
|
|
{
|
|
Vector128<byte> transformColorAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
|
|
Vector128<byte> transformColorRedBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
|
|
Vector128<int> multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue));
|
|
Vector128<int> multsb2 = MkCst16(Cst5b(m.RedToBlue), 0);
|
|
nuint idx = 0;
|
|
do
|
|
{
|
|
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx);
|
|
Vector128<uint> input = Unsafe.As<uint, Vector128<uint>>(ref pos);
|
|
Vector128<byte> a = input.AsByte() & transformColorAlphaGreenMask;
|
|
Vector128<short> b = Vector128_.ShuffleLow(a.AsInt16(), SimdUtils.Shuffle.MMShuffle2200);
|
|
Vector128<short> c = Vector128_.ShuffleHigh(b.AsInt16(), SimdUtils.Shuffle.MMShuffle2200);
|
|
Vector128<short> d = Vector128_.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
|
|
Vector128<short> e = Vector128_.ShiftLeftLogical(input.AsInt16(), 8);
|
|
Vector128<short> f = Vector128_.MultiplyHigh(e.AsInt16(), multsb2.AsInt16());
|
|
Vector128<int> g = Vector128.ShiftRightLogical(f.AsInt32(), 16);
|
|
Vector128<byte> h = g.AsByte() + d.AsByte();
|
|
Vector128<byte> i = h & transformColorRedBlueMask;
|
|
Vector128<byte> output = input.AsByte() - i;
|
|
Unsafe.As<uint, Vector128<uint>>(ref pos) = output.AsUInt32();
|
|
idx += 4;
|
|
}
|
|
while ((int)idx <= numPixels - 4);
|
|
|
|
if ((int)idx != numPixels)
|
|
{
|
|
TransformColorScalar(m, pixelData[(int)idx..], numPixels - (int)idx);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
TransformColorScalar(m, pixelData, numPixels);
|
|
}
|
|
}
|
|
|
|
private static void TransformColorScalar(Vp8LMultipliers m, Span<uint> data, int numPixels)
|
|
{
|
|
for (int i = 0; i < numPixels; i++)
|
|
{
|
|
uint argb = data[i];
|
|
sbyte green = U32ToS8(argb >> 8);
|
|
sbyte red = U32ToS8(argb >> 16);
|
|
int newRed = red & 0xff;
|
|
int newBlue = (int)(argb & 0xff);
|
|
newRed -= ColorTransformDelta((sbyte)m.GreenToRed, green);
|
|
newRed &= 0xff;
|
|
newBlue -= ColorTransformDelta((sbyte)m.GreenToBlue, green);
|
|
newBlue -= ColorTransformDelta((sbyte)m.RedToBlue, red);
|
|
newBlue &= 0xff;
|
|
data[i] = (argb & 0xff00ff00u) | ((uint)newRed << 16) | (uint)newBlue;
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Reverses the color space transform.
|
|
/// </summary>
|
|
/// <param name="m">The color transform element.</param>
|
|
/// <param name="pixelData">The pixel data to apply the inverse transform on.</param>
|
|
public static void TransformColorInverse(Vp8LMultipliers m, Span<uint> pixelData)
|
|
{
|
|
if (Avx2.IsSupported && pixelData.Length >= 8)
|
|
{
|
|
Vector256<byte> transformColorInverseAlphaGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
|
|
Vector256<int> multsrb = MkCst32(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue));
|
|
Vector256<int> multsb2 = MkCst32(Cst5b(m.RedToBlue), 0);
|
|
nuint idx;
|
|
for (idx = 0; idx <= (uint)pixelData.Length - 8; idx += 8)
|
|
{
|
|
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx);
|
|
Vector256<uint> input = Unsafe.As<uint, Vector256<uint>>(ref pos);
|
|
Vector256<byte> a = Avx2.And(input.AsByte(), transformColorInverseAlphaGreenMask256);
|
|
Vector256<short> b = Avx2.ShuffleLow(a.AsInt16(), SimdUtils.Shuffle.MMShuffle2200);
|
|
Vector256<short> c = Avx2.ShuffleHigh(b.AsInt16(), SimdUtils.Shuffle.MMShuffle2200);
|
|
Vector256<short> d = Avx2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
|
|
Vector256<byte> e = Avx2.Add(input.AsByte(), d.AsByte());
|
|
Vector256<short> f = Avx2.ShiftLeftLogical(e.AsInt16(), 8);
|
|
Vector256<short> g = Avx2.MultiplyHigh(f, multsb2.AsInt16());
|
|
Vector256<int> h = Avx2.ShiftRightLogical(g.AsInt32(), 8);
|
|
Vector256<byte> i = Avx2.Add(h.AsByte(), f.AsByte());
|
|
Vector256<short> j = Avx2.ShiftRightLogical(i.AsInt16(), 8);
|
|
Vector256<byte> output = Avx2.Or(j.AsByte(), a);
|
|
Unsafe.As<uint, Vector256<uint>>(ref pos) = output.AsUInt32();
|
|
}
|
|
|
|
if (idx != (uint)pixelData.Length)
|
|
{
|
|
TransformColorInverseScalar(m, pixelData[(int)idx..]);
|
|
}
|
|
}
|
|
else if (Vector128.IsHardwareAccelerated && pixelData.Length >= 4)
|
|
{
|
|
Vector128<byte> transformColorInverseAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
|
|
Vector128<int> multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue));
|
|
Vector128<int> multsb2 = MkCst16(Cst5b(m.RedToBlue), 0);
|
|
|
|
nuint idx;
|
|
for (idx = 0; idx <= (uint)pixelData.Length - 4; idx += 4)
|
|
{
|
|
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx);
|
|
Vector128<uint> input = Unsafe.As<uint, Vector128<uint>>(ref pos);
|
|
Vector128<byte> a = input.AsByte() & transformColorInverseAlphaGreenMask;
|
|
Vector128<short> b = Vector128_.ShuffleLow(a.AsInt16(), SimdUtils.Shuffle.MMShuffle2200);
|
|
Vector128<short> c = Vector128_.ShuffleHigh(b.AsInt16(), SimdUtils.Shuffle.MMShuffle2200);
|
|
Vector128<short> d = Vector128_.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
|
|
Vector128<byte> e = input.AsByte() + d.AsByte();
|
|
Vector128<short> f = Vector128_.ShiftLeftLogical(e.AsInt16(), 8);
|
|
Vector128<short> g = Vector128_.MultiplyHigh(f, multsb2.AsInt16());
|
|
Vector128<int> h = Vector128.ShiftRightLogical(g.AsInt32(), 8);
|
|
Vector128<byte> i = h.AsByte() + f.AsByte();
|
|
Vector128<short> j = Vector128.ShiftRightLogical(i.AsInt16(), 8);
|
|
Vector128<byte> output = j.AsByte() | a;
|
|
Unsafe.As<uint, Vector128<uint>>(ref pos) = output.AsUInt32();
|
|
}
|
|
|
|
if (idx != (uint)pixelData.Length)
|
|
{
|
|
TransformColorInverseScalar(m, pixelData[(int)idx..]);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
TransformColorInverseScalar(m, pixelData);
|
|
}
|
|
}
|
|
|
|
private static void TransformColorInverseScalar(Vp8LMultipliers m, Span<uint> pixelData)
|
|
{
|
|
for (int i = 0; i < pixelData.Length; i++)
|
|
{
|
|
uint argb = pixelData[i];
|
|
sbyte green = (sbyte)(argb >> 8);
|
|
uint red = argb >> 16;
|
|
int newRed = (int)(red & 0xff);
|
|
int newBlue = (int)argb & 0xff;
|
|
newRed += ColorTransformDelta((sbyte)m.GreenToRed, green);
|
|
newRed &= 0xff;
|
|
newBlue += ColorTransformDelta((sbyte)m.GreenToBlue, green);
|
|
newBlue += ColorTransformDelta((sbyte)m.RedToBlue, (sbyte)newRed);
|
|
newBlue &= 0xff;
|
|
|
|
pixelData[i] = (argb & 0xff00ff00u) | ((uint)newRed << 16) | (uint)newBlue;
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// This will reverse the predictor transform.
|
|
/// The predictor transform can be used to reduce entropy by exploiting the fact that neighboring pixels are often correlated.
|
|
/// In the predictor transform, the current pixel value is predicted from the pixels already decoded (in scan-line order) and only the residual value (actual - predicted) is encoded.
|
|
/// The prediction mode determines the type of prediction to use. The image is divided into squares and all the pixels in a square use same prediction mode.
|
|
/// </summary>
|
|
/// <param name="transform">The transform data.</param>
|
|
/// <param name="pixelData">The pixel data to apply the inverse transform.</param>
|
|
/// <param name="outputSpan">The resulting pixel data with the reversed transformation data.</param>
|
|
public static void PredictorInverseTransform(
|
|
Vp8LTransform transform,
|
|
Span<uint> pixelData,
|
|
Span<uint> outputSpan)
|
|
{
|
|
fixed (uint* inputFixed = pixelData)
|
|
{
|
|
fixed (uint* outputFixed = outputSpan)
|
|
{
|
|
uint* input = inputFixed;
|
|
uint* output = outputFixed;
|
|
|
|
int width = transform.XSize;
|
|
Span<uint> transformData = transform.Data.GetSpan();
|
|
|
|
// First Row follows the L (mode=1) mode.
|
|
PredictorAdd0(input, 1, output);
|
|
PredictorAdd1(input + 1, width - 1, output + 1);
|
|
input += width;
|
|
output += width;
|
|
|
|
int y = 1;
|
|
int yEnd = transform.YSize;
|
|
int tileWidth = 1 << transform.Bits;
|
|
int mask = tileWidth - 1;
|
|
int tilesPerRow = SubSampleSize(width, transform.Bits);
|
|
int predictorModeIdxBase = (y >> transform.Bits) * tilesPerRow;
|
|
Span<short> scratch = stackalloc short[8];
|
|
while (y < yEnd)
|
|
{
|
|
int predictorModeIdx = predictorModeIdxBase;
|
|
int x = 1;
|
|
|
|
// First pixel follows the T (mode=2) mode.
|
|
PredictorAdd2(input, output - width, 1, output);
|
|
|
|
// .. the rest:
|
|
while (x < width)
|
|
{
|
|
uint predictorMode = (transformData[predictorModeIdx++] >> 8) & 0xf;
|
|
int xEnd = (x & ~mask) + tileWidth;
|
|
if (xEnd > width)
|
|
{
|
|
xEnd = width;
|
|
}
|
|
|
|
// There are 14 different prediction modes.
|
|
// In each prediction mode, the current pixel value is predicted from one
|
|
// or more neighboring pixels whose values are already known.
|
|
switch (predictorMode)
|
|
{
|
|
case 0:
|
|
PredictorAdd0(input + x, xEnd - x, output + x);
|
|
break;
|
|
case 1:
|
|
PredictorAdd1(input + x, xEnd - x, output + x);
|
|
break;
|
|
case 2:
|
|
PredictorAdd2(input + x, output + x - width, xEnd - x, output + x);
|
|
break;
|
|
case 3:
|
|
PredictorAdd3(input + x, output + x - width, xEnd - x, output + x);
|
|
break;
|
|
case 4:
|
|
PredictorAdd4(input + x, output + x - width, xEnd - x, output + x);
|
|
break;
|
|
case 5:
|
|
PredictorAdd5(input + x, output + x - width, xEnd - x, output + x);
|
|
break;
|
|
case 6:
|
|
PredictorAdd6(input + x, output + x - width, xEnd - x, output + x);
|
|
break;
|
|
case 7:
|
|
PredictorAdd7(input + x, output + x - width, xEnd - x, output + x);
|
|
break;
|
|
case 8:
|
|
PredictorAdd8(input + x, output + x - width, xEnd - x, output + x);
|
|
break;
|
|
case 9:
|
|
PredictorAdd9(input + x, output + x - width, xEnd - x, output + x);
|
|
break;
|
|
case 10:
|
|
PredictorAdd10(input + x, output + x - width, xEnd - x, output + x);
|
|
break;
|
|
case 11:
|
|
PredictorAdd11(input + x, output + x - width, xEnd - x, output + x, scratch);
|
|
break;
|
|
case 12:
|
|
PredictorAdd12(input + x, output + x - width, xEnd - x, output + x);
|
|
break;
|
|
case 13:
|
|
PredictorAdd13(input + x, output + x - width, xEnd - x, output + x);
|
|
break;
|
|
}
|
|
|
|
x = xEnd;
|
|
}
|
|
|
|
input += width;
|
|
output += width;
|
|
y++;
|
|
|
|
if ((y & mask) == 0)
|
|
{
|
|
// Use the same mask, since tiles are squares.
|
|
predictorModeIdxBase += tilesPerRow;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
outputSpan.CopyTo(pixelData);
|
|
}
|
|
|
|
public static void ExpandColorMap(int numColors, Span<uint> transformData, Span<uint> newColorMap)
|
|
{
|
|
newColorMap[0] = transformData[0];
|
|
Span<byte> data = MemoryMarshal.Cast<uint, byte>(transformData);
|
|
Span<byte> newData = MemoryMarshal.Cast<uint, byte>(newColorMap);
|
|
int numColorsX4 = 4 * numColors;
|
|
int i;
|
|
for (i = 4; i < numColorsX4; i++)
|
|
{
|
|
// Equivalent to AddPixelEq(), on a byte-basis.
|
|
newData[i] = (byte)((data[i] + newData[i - 4]) & 0xff);
|
|
}
|
|
|
|
int colorMapLength4 = 4 * newColorMap.Length;
|
|
for (; i < colorMapLength4; i++)
|
|
{
|
|
newData[i] = 0; // black tail.
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Difference of each component, mod 256.
|
|
/// </summary>
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static uint SubPixels(uint a, uint b)
|
|
{
|
|
uint alphaAndGreen = 0x00ff00ffu + (a & 0xff00ff00u) - (b & 0xff00ff00u);
|
|
uint redAndBlue = 0xff00ff00u + (a & 0x00ff00ffu) - (b & 0x00ff00ffu);
|
|
return (alphaAndGreen & 0xff00ff00u) | (redAndBlue & 0x00ff00ffu);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
|
|
/// </summary>
|
|
public static void BundleColorMap(Span<byte> row, int width, int xBits, Span<uint> dst)
|
|
{
|
|
int x;
|
|
if (xBits > 0)
|
|
{
|
|
int bitDepth = 1 << (3 - xBits);
|
|
int mask = (1 << xBits) - 1;
|
|
uint code = 0xff000000;
|
|
for (x = 0; x < width; x++)
|
|
{
|
|
int xsub = x & mask;
|
|
if (xsub == 0)
|
|
{
|
|
code = 0xff000000;
|
|
}
|
|
|
|
code |= (uint)(row[x] << (8 + (bitDepth * xsub)));
|
|
dst[x >> xBits] = code;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (x = 0; x < width; x++)
|
|
{
|
|
dst[x] = (uint)(0xff000000 | (row[x] << 8));
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Compute the combined Shanon's entropy for distribution {X} and {X+Y}.
|
|
/// </summary>
|
|
/// <returns>Shanon entropy.</returns>
|
|
public static float CombinedShannonEntropy(Span<int> x, Span<int> y)
|
|
{
|
|
if (Avx2.IsSupported)
|
|
{
|
|
double retVal = 0.0d;
|
|
Vector256<int> tmp = Vector256<int>.Zero; // has the size of the scratch space of sizeof(int) * 8
|
|
ref int xRef = ref MemoryMarshal.GetReference(x);
|
|
ref int yRef = ref MemoryMarshal.GetReference(y);
|
|
Vector256<int> sumXY256 = Vector256<int>.Zero;
|
|
Vector256<int> sumX256 = Vector256<int>.Zero;
|
|
ref int tmpRef = ref Unsafe.As<Vector256<int>, int>(ref tmp);
|
|
for (nuint i = 0; i < 256; i += 8)
|
|
{
|
|
Vector256<int> xVec = Unsafe.As<int, Vector256<int>>(ref Unsafe.Add(ref xRef, i));
|
|
Vector256<int> yVec = Unsafe.As<int, Vector256<int>>(ref Unsafe.Add(ref yRef, i));
|
|
|
|
// Check if any X is non-zero: this actually provides a speedup as X is usually sparse.
|
|
int mask = Avx2.MoveMask(Avx2.CompareEqual(xVec, Vector256<int>.Zero).AsByte());
|
|
if (mask != -1)
|
|
{
|
|
Vector256<int> xy256 = Avx2.Add(xVec, yVec);
|
|
sumXY256 = Avx2.Add(sumXY256, xy256);
|
|
sumX256 = Avx2.Add(sumX256, xVec);
|
|
|
|
// Analyze the different X + Y.
|
|
Unsafe.As<int, Vector256<int>>(ref tmpRef) = xy256;
|
|
if (tmpRef != 0)
|
|
{
|
|
retVal -= FastSLog2((uint)tmpRef);
|
|
if (Unsafe.Add(ref xRef, i) != 0)
|
|
{
|
|
retVal -= FastSLog2((uint)Unsafe.Add(ref xRef, i));
|
|
}
|
|
}
|
|
|
|
if (Unsafe.Add(ref tmpRef, 1) != 0)
|
|
{
|
|
retVal -= FastSLog2((uint)Unsafe.Add(ref tmpRef, 1));
|
|
if (Unsafe.Add(ref xRef, i + 1) != 0)
|
|
{
|
|
retVal -= FastSLog2((uint)Unsafe.Add(ref xRef, i + 1));
|
|
}
|
|
}
|
|
|
|
if (Unsafe.Add(ref tmpRef, 2) != 0)
|
|
{
|
|
retVal -= FastSLog2((uint)Unsafe.Add(ref tmpRef, 2));
|
|
if (Unsafe.Add(ref xRef, i + 2) != 0)
|
|
{
|
|
retVal -= FastSLog2((uint)Unsafe.Add(ref xRef, i + 2));
|
|
}
|
|
}
|
|
|
|
if (Unsafe.Add(ref tmpRef, 3) != 0)
|
|
{
|
|
retVal -= FastSLog2((uint)Unsafe.Add(ref tmpRef, 3));
|
|
if (Unsafe.Add(ref xRef, i + 3) != 0)
|
|
{
|
|
retVal -= FastSLog2((uint)Unsafe.Add(ref xRef, i + 3));
|
|
}
|
|
}
|
|
|
|
if (Unsafe.Add(ref tmpRef, 4) != 0)
|
|
{
|
|
retVal -= FastSLog2((uint)Unsafe.Add(ref tmpRef, 4));
|
|
if (Unsafe.Add(ref xRef, i + 4) != 0)
|
|
{
|
|
retVal -= FastSLog2((uint)Unsafe.Add(ref xRef, i + 4));
|
|
}
|
|
}
|
|
|
|
if (Unsafe.Add(ref tmpRef, 5) != 0)
|
|
{
|
|
retVal -= FastSLog2((uint)Unsafe.Add(ref tmpRef, 5));
|
|
if (Unsafe.Add(ref xRef, i + 5) != 0)
|
|
{
|
|
retVal -= FastSLog2((uint)Unsafe.Add(ref xRef, i + 5));
|
|
}
|
|
}
|
|
|
|
if (Unsafe.Add(ref tmpRef, 6) != 0)
|
|
{
|
|
retVal -= FastSLog2((uint)Unsafe.Add(ref tmpRef, 6));
|
|
if (Unsafe.Add(ref xRef, i + 6) != 0)
|
|
{
|
|
retVal -= FastSLog2((uint)Unsafe.Add(ref xRef, i + 6));
|
|
}
|
|
}
|
|
|
|
if (Unsafe.Add(ref tmpRef, 7) != 0)
|
|
{
|
|
retVal -= FastSLog2((uint)Unsafe.Add(ref tmpRef, 7));
|
|
if (Unsafe.Add(ref xRef, i + 7) != 0)
|
|
{
|
|
retVal -= FastSLog2((uint)Unsafe.Add(ref xRef, i + 7));
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// X is fully 0, so only deal with Y.
|
|
sumXY256 = Avx2.Add(sumXY256, yVec);
|
|
|
|
if (Unsafe.Add(ref yRef, i) != 0)
|
|
{
|
|
retVal -= FastSLog2((uint)Unsafe.Add(ref yRef, i));
|
|
}
|
|
|
|
if (Unsafe.Add(ref yRef, i + 1) != 0)
|
|
{
|
|
retVal -= FastSLog2((uint)Unsafe.Add(ref yRef, i + 1));
|
|
}
|
|
|
|
if (Unsafe.Add(ref yRef, i + 2) != 0)
|
|
{
|
|
retVal -= FastSLog2((uint)Unsafe.Add(ref yRef, i + 2));
|
|
}
|
|
|
|
if (Unsafe.Add(ref yRef, i + 3) != 0)
|
|
{
|
|
retVal -= FastSLog2((uint)Unsafe.Add(ref yRef, i + 3));
|
|
}
|
|
|
|
if (Unsafe.Add(ref yRef, i + 4) != 0)
|
|
{
|
|
retVal -= FastSLog2((uint)Unsafe.Add(ref yRef, i + 4));
|
|
}
|
|
|
|
if (Unsafe.Add(ref yRef, i + 5) != 0)
|
|
{
|
|
retVal -= FastSLog2((uint)Unsafe.Add(ref yRef, i + 5));
|
|
}
|
|
|
|
if (Unsafe.Add(ref yRef, i + 6) != 0)
|
|
{
|
|
retVal -= FastSLog2((uint)Unsafe.Add(ref yRef, i + 6));
|
|
}
|
|
|
|
if (Unsafe.Add(ref yRef, i + 7) != 0)
|
|
{
|
|
retVal -= FastSLog2((uint)Unsafe.Add(ref yRef, i + 7));
|
|
}
|
|
}
|
|
}
|
|
|
|
// Sum up sumX256 to get sumX and sum up sumXY256 to get sumXY.
|
|
int sumX = Numerics.ReduceSum(sumX256);
|
|
int sumXY = Numerics.ReduceSum(sumXY256);
|
|
|
|
retVal += FastSLog2((uint)sumX) + FastSLog2((uint)sumXY);
|
|
|
|
return (float)retVal;
|
|
}
|
|
else
|
|
{
|
|
double retVal = 0.0d;
|
|
uint sumX = 0, sumXY = 0;
|
|
for (int i = 0; i < 256; i++)
|
|
{
|
|
uint xi = (uint)x[i];
|
|
if (xi != 0)
|
|
{
|
|
uint xy = xi + (uint)y[i];
|
|
sumX += xi;
|
|
retVal -= FastSLog2(xi);
|
|
sumXY += xy;
|
|
retVal -= FastSLog2(xy);
|
|
}
|
|
else if (y[i] != 0)
|
|
{
|
|
sumXY += (uint)y[i];
|
|
retVal -= FastSLog2((uint)y[i]);
|
|
}
|
|
}
|
|
|
|
retVal += FastSLog2(sumX) + FastSLog2(sumXY);
|
|
return (float)retVal;
|
|
}
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static byte TransformColorRed(sbyte greenToRed, uint argb)
|
|
{
|
|
sbyte green = U32ToS8(argb >> 8);
|
|
int newRed = (int)(argb >> 16);
|
|
newRed -= ColorTransformDelta(greenToRed, green);
|
|
return (byte)(newRed & 0xff);
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static byte TransformColorBlue(sbyte greenToBlue, sbyte redToBlue, uint argb)
|
|
{
|
|
sbyte green = U32ToS8(argb >> 8);
|
|
sbyte red = U32ToS8(argb >> 16);
|
|
int newBlue = (int)(argb & 0xff);
|
|
newBlue -= ColorTransformDelta(greenToBlue, green);
|
|
newBlue -= ColorTransformDelta(redToBlue, red);
|
|
return (byte)(newBlue & 0xff);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Fast calculation of log2(v) for integer input.
|
|
/// </summary>
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static float FastLog2(uint v) => v < LogLookupIdxMax ? WebpLookupTables.Log2Table[v] : FastLog2Slow(v);
|
|
|
|
/// <summary>
|
|
/// Fast calculation of v * log2(v) for integer input.
|
|
/// </summary>
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static float FastSLog2(uint v) => v < LogLookupIdxMax ? WebpLookupTables.SLog2Table[v] : FastSLog2Slow(v);
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static void ColorCodeToMultipliers(uint colorCode, ref Vp8LMultipliers m)
|
|
{
|
|
m.GreenToRed = (byte)(colorCode & 0xff);
|
|
m.GreenToBlue = (byte)((colorCode >> 8) & 0xff);
|
|
m.RedToBlue = (byte)((colorCode >> 16) & 0xff);
|
|
}
|
|
|
|
// Converts near lossless quality into max number of bits shaved off.
|
|
// 100 -> 0
|
|
// 80..99 -> 1
|
|
// 60..79 -> 2
|
|
// 40..59 -> 3
|
|
// 20..39 -> 4
|
|
// 0..19 -> 5
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static int NearLosslessBits(int nearLosslessQuality) => 5 - (nearLosslessQuality / 20);
|
|
|
|
private static float FastSLog2Slow(uint v)
|
|
{
|
|
DebugGuard.MustBeGreaterThanOrEqualTo<uint>(v, LogLookupIdxMax, nameof(v));
|
|
|
|
if (v < ApproxLogWithCorrectionMax)
|
|
{
|
|
int logCnt = 0;
|
|
uint y = 1;
|
|
float vF = v;
|
|
uint origV = v;
|
|
do
|
|
{
|
|
++logCnt;
|
|
v >>= 1;
|
|
y <<= 1;
|
|
}
|
|
while (v >= LogLookupIdxMax);
|
|
|
|
// vf = (2^log_cnt) * Xf; where y = 2^log_cnt and Xf < 256
|
|
// Xf = floor(Xf) * (1 + (v % y) / v)
|
|
// log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v)
|
|
// The correction factor: log(1 + d) ~ d; for very small d values, so
|
|
// log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v
|
|
// LOG_2_RECIPROCAL ~ 23/16
|
|
int correction = (int)((23 * (origV & (y - 1))) >> 4);
|
|
return (vF * (WebpLookupTables.Log2Table[v] + logCnt)) + correction;
|
|
}
|
|
|
|
return (float)(Log2Reciprocal * v * Math.Log(v));
|
|
}
|
|
|
|
private static float FastLog2Slow(uint v)
|
|
{
|
|
DebugGuard.MustBeGreaterThanOrEqualTo<uint>(v, LogLookupIdxMax, nameof(v));
|
|
|
|
if (v < ApproxLogWithCorrectionMax)
|
|
{
|
|
int logCnt = 0;
|
|
uint y = 1;
|
|
uint origV = v;
|
|
do
|
|
{
|
|
++logCnt;
|
|
v >>= 1;
|
|
y <<= 1;
|
|
}
|
|
while (v >= LogLookupIdxMax);
|
|
|
|
double log2 = WebpLookupTables.Log2Table[v] + logCnt;
|
|
if (origV >= ApproxLogMax)
|
|
{
|
|
// Since the division is still expensive, add this correction factor only
|
|
// for large values of 'v'.
|
|
int correction = (int)(23 * (origV & (y - 1))) >> 4;
|
|
log2 += (double)correction / origV;
|
|
}
|
|
|
|
return (float)log2;
|
|
}
|
|
|
|
return (float)(Log2Reciprocal * Math.Log(v));
|
|
}
|
|
|
|
/// <summary>
|
|
/// Splitting of distance and length codes into prefixes and
|
|
/// extra bits. The prefixes are encoded with an entropy code
|
|
/// while the extra bits are stored just as normal bits.
|
|
/// </summary>
|
|
private static int PrefixEncodeBitsNoLut(int distance, ref int extraBits)
|
|
{
|
|
int highestBit = BitOperations.Log2((uint)--distance);
|
|
int secondHighestBit = (distance >> (highestBit - 1)) & 1;
|
|
extraBits = highestBit - 1;
|
|
int code = (2 * highestBit) + secondHighestBit;
|
|
return code;
|
|
}
|
|
|
|
private static int PrefixEncodeNoLut(int distance, ref int extraBits, ref int extraBitsValue)
|
|
{
|
|
int highestBit = BitOperations.Log2((uint)--distance);
|
|
int secondHighestBit = (distance >> (highestBit - 1)) & 1;
|
|
extraBits = highestBit - 1;
|
|
extraBitsValue = distance & ((1 << extraBits) - 1);
|
|
int code = (2 * highestBit) + secondHighestBit;
|
|
return code;
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
private static void PredictorAdd0(uint* input, int numberOfPixels, uint* output)
|
|
{
|
|
for (int x = 0; x < numberOfPixels; x++)
|
|
{
|
|
output[x] = AddPixels(input[x], WebpConstants.ArgbBlack);
|
|
}
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
private static void PredictorAdd1(uint* input, int numberOfPixels, uint* output)
|
|
{
|
|
uint left = output[-1];
|
|
for (int x = 0; x < numberOfPixels; x++)
|
|
{
|
|
output[x] = left = AddPixels(input[x], left);
|
|
}
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
private static void PredictorAdd2(uint* input, uint* upper, int numberOfPixels, uint* output)
|
|
{
|
|
for (int x = 0; x < numberOfPixels; x++)
|
|
{
|
|
uint pred = Predictor2(output[x - 1], upper + x);
|
|
output[x] = AddPixels(input[x], pred);
|
|
}
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
private static void PredictorAdd3(uint* input, uint* upper, int numberOfPixels, uint* output)
|
|
{
|
|
for (int x = 0; x < numberOfPixels; x++)
|
|
{
|
|
uint pred = Predictor3(output[x - 1], upper + x);
|
|
output[x] = AddPixels(input[x], pred);
|
|
}
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
private static void PredictorAdd4(uint* input, uint* upper, int numberOfPixels, uint* output)
|
|
{
|
|
for (int x = 0; x < numberOfPixels; x++)
|
|
{
|
|
uint pred = Predictor4(output[x - 1], upper + x);
|
|
output[x] = AddPixels(input[x], pred);
|
|
}
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
private static void PredictorAdd5(uint* input, uint* upper, int numberOfPixels, uint* output)
|
|
{
|
|
for (int x = 0; x < numberOfPixels; x++)
|
|
{
|
|
uint pred = Predictor5(output[x - 1], upper + x);
|
|
output[x] = AddPixels(input[x], pred);
|
|
}
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
private static void PredictorAdd6(uint* input, uint* upper, int numberOfPixels, uint* output)
|
|
{
|
|
for (int x = 0; x < numberOfPixels; x++)
|
|
{
|
|
uint pred = Predictor6(output[x - 1], upper + x);
|
|
output[x] = AddPixels(input[x], pred);
|
|
}
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
private static void PredictorAdd7(uint* input, uint* upper, int numberOfPixels, uint* output)
|
|
{
|
|
for (int x = 0; x < numberOfPixels; x++)
|
|
{
|
|
uint pred = Predictor7(output[x - 1], upper + x);
|
|
output[x] = AddPixels(input[x], pred);
|
|
}
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
private static void PredictorAdd8(uint* input, uint* upper, int numberOfPixels, uint* output)
|
|
{
|
|
for (int x = 0; x < numberOfPixels; x++)
|
|
{
|
|
uint pred = Predictor8(output[x - 1], upper + x);
|
|
output[x] = AddPixels(input[x], pred);
|
|
}
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
private static void PredictorAdd9(uint* input, uint* upper, int numberOfPixels, uint* output)
|
|
{
|
|
for (int x = 0; x < numberOfPixels; x++)
|
|
{
|
|
uint pred = Predictor9(output[x - 1], upper + x);
|
|
output[x] = AddPixels(input[x], pred);
|
|
}
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
private static void PredictorAdd10(uint* input, uint* upper, int numberOfPixels, uint* output)
|
|
{
|
|
for (int x = 0; x < numberOfPixels; x++)
|
|
{
|
|
uint pred = Predictor10(output[x - 1], upper + x);
|
|
output[x] = AddPixels(input[x], pred);
|
|
}
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
private static void PredictorAdd11(uint* input, uint* upper, int numberOfPixels, uint* output, Span<short> scratch)
|
|
{
|
|
for (int x = 0; x < numberOfPixels; x++)
|
|
{
|
|
uint pred = Predictor11(output[x - 1], upper + x, scratch);
|
|
output[x] = AddPixels(input[x], pred);
|
|
}
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
private static void PredictorAdd12(uint* input, uint* upper, int numberOfPixels, uint* output)
|
|
{
|
|
for (int x = 0; x < numberOfPixels; x++)
|
|
{
|
|
uint pred = Predictor12(output[x - 1], upper + x);
|
|
output[x] = AddPixels(input[x], pred);
|
|
}
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
private static void PredictorAdd13(uint* input, uint* upper, int numberOfPixels, uint* output)
|
|
{
|
|
for (int x = 0; x < numberOfPixels; x++)
|
|
{
|
|
uint pred = Predictor13(output[x - 1], upper + x);
|
|
output[x] = AddPixels(input[x], pred);
|
|
}
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static uint Predictor2(uint left, uint* top) => top[0];
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static uint Predictor3(uint left, uint* top) => top[1];
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static uint Predictor4(uint left, uint* top) => top[-1];
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static uint Predictor5(uint left, uint* top) => Average3(left, top[0], top[1]);
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static uint Predictor6(uint left, uint* top) => Average2(left, top[-1]);
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static uint Predictor7(uint left, uint* top) => Average2(left, top[0]);
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static uint Predictor8(uint left, uint* top) => Average2(top[-1], top[0]);
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static uint Predictor9(uint left, uint* top) => Average2(top[0], top[1]);
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static uint Predictor10(uint left, uint* top) => Average4(left, top[-1], top[0], top[1]);
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static uint Predictor11(uint left, uint* top, Span<short> scratch) => Select(top[0], left, top[-1], scratch);
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static uint Predictor12(uint left, uint* top) => ClampedAddSubtractFull(left, top[0], top[-1]);
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static uint Predictor13(uint left, uint* top) => ClampedAddSubtractHalf(left, top[0], top[-1]);
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static void PredictorSub0(uint* input, int numPixels, uint* output)
|
|
{
|
|
for (int i = 0; i < numPixels; i++)
|
|
{
|
|
output[i] = SubPixels(input[i], WebpConstants.ArgbBlack);
|
|
}
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static void PredictorSub1(uint* input, int numPixels, uint* output)
|
|
{
|
|
for (int i = 0; i < numPixels; i++)
|
|
{
|
|
output[i] = SubPixels(input[i], input[i - 1]);
|
|
}
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static void PredictorSub2(uint* input, uint* upper, int numPixels, uint* output)
|
|
{
|
|
for (int x = 0; x < numPixels; x++)
|
|
{
|
|
uint pred = Predictor2(input[x - 1], upper + x);
|
|
output[x] = SubPixels(input[x], pred);
|
|
}
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static void PredictorSub3(uint* input, uint* upper, int numPixels, uint* output)
|
|
{
|
|
for (int x = 0; x < numPixels; x++)
|
|
{
|
|
uint pred = Predictor3(input[x - 1], upper + x);
|
|
output[x] = SubPixels(input[x], pred);
|
|
}
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static void PredictorSub4(uint* input, uint* upper, int numPixels, uint* output)
|
|
{
|
|
for (int x = 0; x < numPixels; x++)
|
|
{
|
|
uint pred = Predictor4(input[x - 1], upper + x);
|
|
output[x] = SubPixels(input[x], pred);
|
|
}
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static void PredictorSub5(uint* input, uint* upper, int numPixels, uint* output)
|
|
{
|
|
for (int x = 0; x < numPixels; x++)
|
|
{
|
|
uint pred = Predictor5(input[x - 1], upper + x);
|
|
output[x] = SubPixels(input[x], pred);
|
|
}
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static void PredictorSub6(uint* input, uint* upper, int numPixels, uint* output)
|
|
{
|
|
for (int x = 0; x < numPixels; x++)
|
|
{
|
|
uint pred = Predictor6(input[x - 1], upper + x);
|
|
output[x] = SubPixels(input[x], pred);
|
|
}
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static void PredictorSub7(uint* input, uint* upper, int numPixels, uint* output)
|
|
{
|
|
for (int x = 0; x < numPixels; x++)
|
|
{
|
|
uint pred = Predictor7(input[x - 1], upper + x);
|
|
output[x] = SubPixels(input[x], pred);
|
|
}
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static void PredictorSub8(uint* input, uint* upper, int numPixels, uint* output)
|
|
{
|
|
for (int x = 0; x < numPixels; x++)
|
|
{
|
|
uint pred = Predictor8(input[x - 1], upper + x);
|
|
output[x] = SubPixels(input[x], pred);
|
|
}
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static void PredictorSub9(uint* input, uint* upper, int numPixels, uint* output)
|
|
{
|
|
for (int x = 0; x < numPixels; x++)
|
|
{
|
|
uint pred = Predictor9(input[x - 1], upper + x);
|
|
output[x] = SubPixels(input[x], pred);
|
|
}
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static void PredictorSub10(uint* input, uint* upper, int numPixels, uint* output)
|
|
{
|
|
for (int x = 0; x < numPixels; x++)
|
|
{
|
|
uint pred = Predictor10(input[x - 1], upper + x);
|
|
output[x] = SubPixels(input[x], pred);
|
|
}
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static void PredictorSub11(uint* input, uint* upper, int numPixels, uint* output, Span<short> scratch)
|
|
{
|
|
for (int x = 0; x < numPixels; x++)
|
|
{
|
|
uint pred = Predictor11(input[x - 1], upper + x, scratch);
|
|
output[x] = SubPixels(input[x], pred);
|
|
}
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static void PredictorSub12(uint* input, uint* upper, int numPixels, uint* output)
|
|
{
|
|
for (int x = 0; x < numPixels; x++)
|
|
{
|
|
uint pred = Predictor12(input[x - 1], upper + x);
|
|
output[x] = SubPixels(input[x], pred);
|
|
}
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static void PredictorSub13(uint* input, uint* upper, int numPixels, uint* output)
|
|
{
|
|
for (int x = 0; x < numPixels; x++)
|
|
{
|
|
uint pred = Predictor13(input[x - 1], upper + x);
|
|
output[x] = SubPixels(input[x], pred);
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Computes sampled size of 'size' when sampling using 'sampling bits'.
|
|
/// </summary>
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static int SubSampleSize(int size, int samplingBits) => (size + (1 << samplingBits) - 1) >> samplingBits;
|
|
|
|
/// <summary>
|
|
/// Sum of each component, mod 256.
|
|
/// </summary>
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static uint AddPixels(uint a, uint b)
|
|
{
|
|
uint alphaAndGreen = (a & 0xff00ff00u) + (b & 0xff00ff00u);
|
|
uint redAndBlue = (a & 0x00ff00ffu) + (b & 0x00ff00ffu);
|
|
return (alphaAndGreen & 0xff00ff00u) | (redAndBlue & 0x00ff00ffu);
|
|
}
|
|
|
|
// For sign-extended multiplying constants, pre-shifted by 5:
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
public static short Cst5b(int x) => (short)(((short)(x << 8)) >> 5);
|
|
|
|
private static uint ClampedAddSubtractFull(uint c0, uint c1, uint c2)
|
|
{
|
|
if (Vector128.IsHardwareAccelerated)
|
|
{
|
|
Vector128<byte> c0Vec = Vector128_.UnpackLow(Vector128.CreateScalar(c0).AsByte(), Vector128<byte>.Zero);
|
|
Vector128<byte> c1Vec = Vector128_.UnpackLow(Vector128.CreateScalar(c1).AsByte(), Vector128<byte>.Zero);
|
|
Vector128<byte> c2Vec = Vector128_.UnpackLow(Vector128.CreateScalar(c2).AsByte(), Vector128<byte>.Zero);
|
|
Vector128<short> v1 = c0Vec.AsInt16() + c1Vec.AsInt16();
|
|
Vector128<short> v2 = v1 - c2Vec.AsInt16();
|
|
Vector128<byte> b = Vector128_.PackUnsignedSaturate(v2, v2);
|
|
return b.AsUInt32().ToScalar();
|
|
}
|
|
|
|
{
|
|
int a = AddSubtractComponentFull(
|
|
(int)(c0 >> 24),
|
|
(int)(c1 >> 24),
|
|
(int)(c2 >> 24));
|
|
int r = AddSubtractComponentFull(
|
|
(int)((c0 >> 16) & 0xff),
|
|
(int)((c1 >> 16) & 0xff),
|
|
(int)((c2 >> 16) & 0xff));
|
|
int g = AddSubtractComponentFull(
|
|
(int)((c0 >> 8) & 0xff),
|
|
(int)((c1 >> 8) & 0xff),
|
|
(int)((c2 >> 8) & 0xff));
|
|
int b = AddSubtractComponentFull((int)(c0 & 0xff), (int)(c1 & 0xff), (int)(c2 & 0xff));
|
|
return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b;
|
|
}
|
|
}
|
|
|
|
private static uint ClampedAddSubtractHalf(uint c0, uint c1, uint c2)
|
|
{
|
|
if (Vector128.IsHardwareAccelerated)
|
|
{
|
|
Vector128<byte> c0Vec = Vector128_.UnpackLow(Vector128.CreateScalar(c0).AsByte(), Vector128<byte>.Zero);
|
|
Vector128<byte> c1Vec = Vector128_.UnpackLow(Vector128.CreateScalar(c1).AsByte(), Vector128<byte>.Zero);
|
|
Vector128<byte> b0 = Vector128_.UnpackLow(Vector128.CreateScalar(c2).AsByte(), Vector128<byte>.Zero);
|
|
Vector128<short> avg = c1Vec.AsInt16() + c0Vec.AsInt16();
|
|
Vector128<short> a0 = Vector128.ShiftRightLogical(avg, 1);
|
|
Vector128<short> a1 = a0 - b0.AsInt16();
|
|
Vector128<short> bgta = Vector128.GreaterThan(b0.AsInt16(), a0.AsInt16());
|
|
Vector128<short> a2 = a1 - bgta;
|
|
Vector128<short> a3 = Vector128.ShiftRightArithmetic(a2, 1);
|
|
Vector128<short> a4 = (a0 + a3).AsInt16();
|
|
Vector128<byte> a5 = Vector128_.PackUnsignedSaturate(a4, a4);
|
|
return a5.AsUInt32().ToScalar();
|
|
}
|
|
|
|
{
|
|
uint ave = Average2(c0, c1);
|
|
int a = AddSubtractComponentHalf((int)(ave >> 24), (int)(c2 >> 24));
|
|
int r = AddSubtractComponentHalf((int)((ave >> 16) & 0xff), (int)((c2 >> 16) & 0xff));
|
|
int g = AddSubtractComponentHalf((int)((ave >> 8) & 0xff), (int)((c2 >> 8) & 0xff));
|
|
int b = AddSubtractComponentHalf((int)(ave & 0xff), (int)(c2 & 0xff));
|
|
return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b;
|
|
}
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
private static int AddSubtractComponentHalf(int a, int b) => (int)Clip255((uint)(a + ((a - b) / 2)));
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
private static int AddSubtractComponentFull(int a, int b, int c) => (int)Clip255((uint)(a + b - c));
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
private static uint Clip255(uint a) => a < 256 ? a : ~a >> 24;
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
private static Vector128<int> MkCst16(int hi, int lo) => Vector128.Create((hi << 16) | (lo & 0xffff));
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
private static Vector256<int> MkCst32(int hi, int lo) => Vector256.Create((hi << 16) | (lo & 0xffff));
|
|
|
|
private static uint Select(uint a, uint b, uint c, Span<short> scratch)
|
|
{
|
|
if (Vector128.IsHardwareAccelerated)
|
|
{
|
|
fixed (short* ptr = &MemoryMarshal.GetReference(scratch))
|
|
{
|
|
Vector128<byte> a0 = Vector128.CreateScalar(a).AsByte();
|
|
Vector128<byte> b0 = Vector128.CreateScalar(b).AsByte();
|
|
Vector128<byte> c0 = Vector128.CreateScalar(c).AsByte();
|
|
Vector128<byte> ac0 = Vector128_.SubtractSaturate(a0, c0);
|
|
Vector128<byte> ca0 = Vector128_.SubtractSaturate(c0, a0);
|
|
Vector128<byte> bc0 = Vector128_.SubtractSaturate(b0, c0);
|
|
Vector128<byte> cb0 = Vector128_.SubtractSaturate(c0, b0);
|
|
Vector128<byte> ac = ac0 | ca0;
|
|
Vector128<byte> bc = bc0 | cb0;
|
|
Vector128<byte> pa = Vector128_.UnpackLow(ac, Vector128<byte>.Zero); // |a - c|
|
|
Vector128<byte> pb = Vector128_.UnpackLow(bc, Vector128<byte>.Zero); // |b - c|
|
|
Vector128<ushort> diff = pb.AsUInt16() - pa.AsUInt16();
|
|
diff.Store((ushort*)ptr);
|
|
int paMinusPb = ptr[3] + ptr[2] + ptr[1] + ptr[0];
|
|
return (paMinusPb <= 0) ? a : b;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
int paMinusPb =
|
|
Sub3((int)(a >> 24), (int)(b >> 24), (int)(c >> 24)) +
|
|
Sub3((int)((a >> 16) & 0xff), (int)((b >> 16) & 0xff), (int)((c >> 16) & 0xff)) +
|
|
Sub3((int)((a >> 8) & 0xff), (int)((b >> 8) & 0xff), (int)((c >> 8) & 0xff)) +
|
|
Sub3((int)(a & 0xff), (int)(b & 0xff), (int)(c & 0xff));
|
|
return paMinusPb <= 0 ? a : b;
|
|
}
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
private static int Sub3(int a, int b, int c)
|
|
{
|
|
int pb = b - c;
|
|
int pa = a - c;
|
|
return Math.Abs(pb) - Math.Abs(pa);
|
|
}
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
private static uint Average2(uint a0, uint a1) => (((a0 ^ a1) & 0xfefefefeu) >> 1) + (a0 & a1);
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
private static uint Average3(uint a0, uint a1, uint a2) => Average2(Average2(a0, a2), a1);
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
private static uint Average4(uint a0, uint a1, uint a2, uint a3) => Average2(Average2(a0, a1), Average2(a2, a3));
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
private static uint GetArgbIndex(uint idx) => (idx >> 8) & 0xff;
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
private static int ColorTransformDelta(sbyte colorPred, sbyte color) => (colorPred * color) >> 5;
|
|
|
|
[MethodImpl(InliningOptions.ShortMethod)]
|
|
private static sbyte U32ToS8(uint v) => (sbyte)(v & 0xff);
|
|
}
|
|
|