📷 A modern, cross-platform, 2D Graphics library for .NET
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

1495 lines
60 KiB

// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using SixLabors.ImageSharp.Common.Helpers;
using SixLabors.ImageSharp.Memory;
namespace SixLabors.ImageSharp.Formats.Webp.Lossless;
/// <summary>
/// Utility functions for the lossless decoder.
/// </summary>
internal static unsafe class LosslessUtils
{
private const int PrefixLookupIdxMax = 512;
private const int LogLookupIdxMax = 256;
private const int ApproxLogMax = 4096;
private const int ApproxLogWithCorrectionMax = 65536;
private const double Log2Reciprocal = 1.44269504088896338700465094007086;
/// <summary>
/// Returns the exact index where array1 and array2 are different. For an index
/// inferior or equal to bestLenMatch, the return value just has to be strictly
/// inferior to bestLenMatch match. The current behavior is to return 0 if this index
/// is bestLenMatch, and the index itself otherwise.
/// If no two elements are the same, it returns maxLimit.
/// </summary>
public static int FindMatchLength(ReadOnlySpan<uint> array1, ReadOnlySpan<uint> array2, int bestLenMatch, int maxLimit)
{
// Before 'expensive' linear match, check if the two arrays match at the
// current best length index.
if (array1[bestLenMatch] != array2[bestLenMatch])
{
return 0;
}
return VectorMismatch(array1, array2, maxLimit);
}
[MethodImpl(InliningOptions.ShortMethod)]
public static int VectorMismatch(ReadOnlySpan<uint> array1, ReadOnlySpan<uint> array2, int length)
{
int matchLen = 0;
ref uint array1Ref = ref MemoryMarshal.GetReference(array1);
ref uint array2Ref = ref MemoryMarshal.GetReference(array2);
while (matchLen < length && Unsafe.Add(ref array1Ref, (uint)matchLen) == Unsafe.Add(ref array2Ref, (uint)matchLen))
{
matchLen++;
}
return matchLen;
}
[MethodImpl(InliningOptions.ShortMethod)]
public static int MaxFindCopyLength(int len) => len < BackwardReferenceEncoder.MaxLength ? len : BackwardReferenceEncoder.MaxLength;
public static int PrefixEncodeBits(int distance, ref int extraBits)
{
if (distance < PrefixLookupIdxMax)
{
(int code, int bits) = WebpLookupTables.PrefixEncodeCode[distance];
extraBits = bits;
return code;
}
return PrefixEncodeBitsNoLut(distance, ref extraBits);
}
public static int PrefixEncode(int distance, ref int extraBits, ref int extraBitsValue)
{
if (distance < PrefixLookupIdxMax)
{
(int code, int bits) = WebpLookupTables.PrefixEncodeCode[distance];
extraBits = bits;
extraBitsValue = WebpLookupTables.PrefixEncodeExtraBitsValue[distance];
return code;
}
return PrefixEncodeNoLut(distance, ref extraBits, ref extraBitsValue);
}
/// <summary>
/// Add green to blue and red channels (i.e. perform the inverse transform of 'subtract green').
/// </summary>
/// <param name="pixelData">The pixel data to apply the transformation.</param>
public static void AddGreenToBlueAndRed(Span<uint> pixelData)
{
if (Vector256.IsHardwareAccelerated && pixelData.Length >= 8)
{
// The `255` values disable the write for alpha (A), since 0x80 is set in the control byte (high bit set).
// Each byte index is within its respective 128-bit lane (0�15 and 16�31), so this is safe for per-lane shuffle.
// The high bits are not set for the index bytes, and the values are always < 16 per lane, satisfying AVX2 lane rules.
Vector256<byte> addGreenToBlueAndRedMask = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255);
nuint numPixels = (uint)pixelData.Length;
nuint i = 0;
do
{
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
Vector256<byte> input = Unsafe.As<uint, Vector256<uint>>(ref pos).AsByte();
Vector256<byte> in0g0g = Vector256_.ShufflePerLane(input, addGreenToBlueAndRedMask);
Vector256<byte> output = input + in0g0g;
Unsafe.As<uint, Vector256<uint>>(ref pos) = output.AsUInt32();
i += 8;
}
while (i <= numPixels - 8);
if (i != numPixels)
{
AddGreenToBlueAndRedScalar(pixelData[(int)i..]);
}
}
else if (Vector128.IsHardwareAccelerated && pixelData.Length >= 4)
{
Vector128<byte> addGreenToBlueAndRedMask = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255);
nuint numPixels = (uint)pixelData.Length;
nuint i = 0;
do
{
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
Vector128<byte> input = Unsafe.As<uint, Vector128<uint>>(ref pos).AsByte();
Vector128<byte> in0g0g = Vector128_.ShuffleNative(input, addGreenToBlueAndRedMask);
Vector128<byte> output = input + in0g0g;
Unsafe.As<uint, Vector128<uint>>(ref pos) = output.AsUInt32();
i += 4;
}
while (i <= numPixels - 4);
if (i != numPixels)
{
AddGreenToBlueAndRedScalar(pixelData[(int)i..]);
}
}
else
{
AddGreenToBlueAndRedScalar(pixelData);
}
}
private static void AddGreenToBlueAndRedScalar(Span<uint> pixelData)
{
int numPixels = pixelData.Length;
for (int i = 0; i < numPixels; i++)
{
uint argb = pixelData[i];
uint green = (argb >> 8) & 0xff;
uint redBlue = argb & 0x00ff00ffu;
redBlue += (green << 16) | green;
redBlue &= 0x00ff00ffu;
pixelData[i] = (argb & 0xff00ff00u) | redBlue;
}
}
public static void SubtractGreenFromBlueAndRed(Span<uint> pixelData)
{
if (Vector256.IsHardwareAccelerated && pixelData.Length >= 8)
{
Vector256<byte> subtractGreenFromBlueAndRedMask = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255);
nuint numPixels = (uint)pixelData.Length;
nuint i = 0;
do
{
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
Vector256<byte> input = Unsafe.As<uint, Vector256<uint>>(ref pos).AsByte();
Vector256<byte> in0g0g = Vector256_.ShufflePerLane(input, subtractGreenFromBlueAndRedMask);
Vector256<byte> output = input - in0g0g;
Unsafe.As<uint, Vector256<uint>>(ref pos) = output.AsUInt32();
i += 8;
}
while (i <= numPixels - 8);
if (i != numPixels)
{
SubtractGreenFromBlueAndRedScalar(pixelData[(int)i..]);
}
}
else if (Vector128.IsHardwareAccelerated && pixelData.Length >= 4)
{
Vector128<byte> subtractGreenFromBlueAndRedMask = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255);
nuint numPixels = (uint)pixelData.Length;
nuint i = 0;
do
{
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i);
Vector128<byte> input = Unsafe.As<uint, Vector128<uint>>(ref pos).AsByte();
Vector128<byte> in0g0g = Vector128_.ShuffleNative(input, subtractGreenFromBlueAndRedMask);
Vector128<byte> output = input - in0g0g;
Unsafe.As<uint, Vector128<uint>>(ref pos) = output.AsUInt32();
i += 4;
}
while (i <= numPixels - 4);
if (i != numPixels)
{
SubtractGreenFromBlueAndRedScalar(pixelData[(int)i..]);
}
}
else
{
SubtractGreenFromBlueAndRedScalar(pixelData);
}
}
private static void SubtractGreenFromBlueAndRedScalar(Span<uint> pixelData)
{
int numPixels = pixelData.Length;
for (int i = 0; i < numPixels; i++)
{
uint argb = pixelData[i];
uint green = (argb >> 8) & 0xff;
uint newR = (((argb >> 16) & 0xff) - green) & 0xff;
uint newB = (((argb >> 0) & 0xff) - green) & 0xff;
pixelData[i] = (argb & 0xff00ff00u) | (newR << 16) | newB;
}
}
/// <summary>
/// If there are not many unique pixel values, it is more efficient to create a color index array and replace the pixel values by the array's indices.
/// This will reverse the color index transform.
/// </summary>
/// <param name="transform">The transform data contains color table size and the entries in the color table.</param>
/// <param name="pixelData">The pixel data to apply the reverse transform on.</param>
/// <param name="outputSpan">The resulting pixel data with the reversed transformation data.</param>
public static void ColorIndexInverseTransform(
Vp8LTransform transform,
Span<uint> pixelData,
Span<uint> outputSpan)
{
int bitsPerPixel = 8 >> transform.Bits;
int width = transform.XSize;
int height = transform.YSize;
Span<uint> colorMap = transform.Data.GetSpan();
int decodedPixels = 0;
if (bitsPerPixel < 8)
{
int pixelsPerByte = 1 << transform.Bits;
int countMask = pixelsPerByte - 1;
int bitMask = (1 << bitsPerPixel) - 1;
int pixelDataPos = 0;
for (int y = 0; y < height; y++)
{
uint packedPixels = 0;
for (int x = 0; x < width; x++)
{
// We need to load fresh 'packed_pixels' once every
// 'pixelsPerByte' increments of x. Fortunately, pixelsPerByte
// is a power of 2, so we can just use a mask for that, instead of
// decrementing a counter.
if ((x & countMask) == 0)
{
packedPixels = GetArgbIndex(pixelData[pixelDataPos++]);
}
outputSpan[decodedPixels++] = colorMap[(int)(packedPixels & bitMask)];
packedPixels >>= bitsPerPixel;
}
}
outputSpan.CopyTo(pixelData);
}
else
{
for (int y = 0; y < height; y++)
{
for (int x = 0; x < width; x++)
{
uint colorMapIndex = GetArgbIndex(pixelData[decodedPixels]);
pixelData[decodedPixels] = colorMap[(int)colorMapIndex];
decodedPixels++;
}
}
}
}
/// <summary>
/// The goal of the color transform is to de-correlate the R, G and B values of each pixel.
/// Color transform keeps the green (G) value as it is, transforms red (R) based on green and transforms blue (B) based on green and then based on red.
/// </summary>
/// <param name="transform">The transform data.</param>
/// <param name="pixelData">The pixel data to apply the inverse transform on.</param>
public static void ColorSpaceInverseTransform(Vp8LTransform transform, Span<uint> pixelData)
{
int width = transform.XSize;
int yEnd = transform.YSize;
int tileWidth = 1 << transform.Bits;
int mask = tileWidth - 1;
int safeWidth = width & ~mask;
int remainingWidth = width - safeWidth;
int tilesPerRow = SubSampleSize(width, transform.Bits);
int y = 0;
int predRowIdxStart = (y >> transform.Bits) * tilesPerRow;
Span<uint> transformData = transform.Data.GetSpan();
int pixelPos = 0;
while (y < yEnd)
{
int predRowIdx = predRowIdxStart;
Vp8LMultipliers m = default;
int srcSafeEnd = pixelPos + safeWidth;
int srcEnd = pixelPos + width;
while (pixelPos < srcSafeEnd)
{
uint colorCode = transformData[predRowIdx++];
ColorCodeToMultipliers(colorCode, ref m);
TransformColorInverse(m, pixelData.Slice(pixelPos, tileWidth));
pixelPos += tileWidth;
}
if (pixelPos < srcEnd)
{
uint colorCode = transformData[predRowIdx];
ColorCodeToMultipliers(colorCode, ref m);
TransformColorInverse(m, pixelData.Slice(pixelPos, remainingWidth));
pixelPos += remainingWidth;
}
y++;
if ((y & mask) == 0)
{
predRowIdxStart += tilesPerRow;
}
}
}
/// <summary>
/// Color transform keeps the green (G) value as it is, transforms red (R) based on green and transforms blue (B) based on green and then based on red.
/// </summary>
/// <param name="m">The Vp8LMultipliers.</param>
/// <param name="pixelData">The pixel data to transform.</param>
/// <param name="numPixels">The number of pixels to process.</param>
public static void TransformColor(Vp8LMultipliers m, Span<uint> pixelData, int numPixels)
{
if (Avx2.IsSupported && numPixels >= 8)
{
Vector256<byte> transformColorAlphaGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
Vector256<byte> transformColorRedBlueMask256 = Vector256.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
Vector256<int> multsrb = MkCst32(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue));
Vector256<int> multsb2 = MkCst32(Cst5b(m.RedToBlue), 0);
nuint idx = 0;
do
{
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx);
Vector256<uint> input = Unsafe.As<uint, Vector256<uint>>(ref pos);
Vector256<byte> a = Avx2.And(input.AsByte(), transformColorAlphaGreenMask256);
Vector256<short> b = Avx2.ShuffleLow(a.AsInt16(), SimdUtils.Shuffle.MMShuffle2200);
Vector256<short> c = Avx2.ShuffleHigh(b.AsInt16(), SimdUtils.Shuffle.MMShuffle2200);
Vector256<short> d = Avx2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
Vector256<short> e = Avx2.ShiftLeftLogical(input.AsInt16(), 8);
Vector256<short> f = Avx2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16());
Vector256<int> g = Avx2.ShiftRightLogical(f.AsInt32(), 16);
Vector256<byte> h = Avx2.Add(g.AsByte(), d.AsByte());
Vector256<byte> i = Avx2.And(h, transformColorRedBlueMask256);
Vector256<byte> output = Avx2.Subtract(input.AsByte(), i);
Unsafe.As<uint, Vector256<uint>>(ref pos) = output.AsUInt32();
idx += 8;
}
while (idx <= (uint)numPixels - 8);
if (idx != (uint)numPixels)
{
TransformColorScalar(m, pixelData[(int)idx..], numPixels - (int)idx);
}
}
else if (Vector128.IsHardwareAccelerated && numPixels >= 4)
{
Vector128<byte> transformColorAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
Vector128<byte> transformColorRedBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
Vector128<int> multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue));
Vector128<int> multsb2 = MkCst16(Cst5b(m.RedToBlue), 0);
nuint idx = 0;
do
{
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx);
Vector128<uint> input = Unsafe.As<uint, Vector128<uint>>(ref pos);
Vector128<byte> a = input.AsByte() & transformColorAlphaGreenMask;
Vector128<short> b = Vector128_.ShuffleLow(a.AsInt16(), SimdUtils.Shuffle.MMShuffle2200);
Vector128<short> c = Vector128_.ShuffleHigh(b.AsInt16(), SimdUtils.Shuffle.MMShuffle2200);
Vector128<short> d = Vector128_.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
Vector128<short> e = Vector128_.ShiftLeftLogical(input.AsInt16(), 8);
Vector128<short> f = Vector128_.MultiplyHigh(e.AsInt16(), multsb2.AsInt16());
Vector128<int> g = Vector128.ShiftRightLogical(f.AsInt32(), 16);
Vector128<byte> h = g.AsByte() + d.AsByte();
Vector128<byte> i = h & transformColorRedBlueMask;
Vector128<byte> output = input.AsByte() - i;
Unsafe.As<uint, Vector128<uint>>(ref pos) = output.AsUInt32();
idx += 4;
}
while ((int)idx <= numPixels - 4);
if ((int)idx != numPixels)
{
TransformColorScalar(m, pixelData[(int)idx..], numPixels - (int)idx);
}
}
else
{
TransformColorScalar(m, pixelData, numPixels);
}
}
private static void TransformColorScalar(Vp8LMultipliers m, Span<uint> data, int numPixels)
{
for (int i = 0; i < numPixels; i++)
{
uint argb = data[i];
sbyte green = U32ToS8(argb >> 8);
sbyte red = U32ToS8(argb >> 16);
int newRed = red & 0xff;
int newBlue = (int)(argb & 0xff);
newRed -= ColorTransformDelta((sbyte)m.GreenToRed, green);
newRed &= 0xff;
newBlue -= ColorTransformDelta((sbyte)m.GreenToBlue, green);
newBlue -= ColorTransformDelta((sbyte)m.RedToBlue, red);
newBlue &= 0xff;
data[i] = (argb & 0xff00ff00u) | ((uint)newRed << 16) | (uint)newBlue;
}
}
/// <summary>
/// Reverses the color space transform.
/// </summary>
/// <param name="m">The color transform element.</param>
/// <param name="pixelData">The pixel data to apply the inverse transform on.</param>
public static void TransformColorInverse(Vp8LMultipliers m, Span<uint> pixelData)
{
if (Avx2.IsSupported && pixelData.Length >= 8)
{
Vector256<byte> transformColorInverseAlphaGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
Vector256<int> multsrb = MkCst32(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue));
Vector256<int> multsb2 = MkCst32(Cst5b(m.RedToBlue), 0);
nuint idx;
for (idx = 0; idx <= (uint)pixelData.Length - 8; idx += 8)
{
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx);
Vector256<uint> input = Unsafe.As<uint, Vector256<uint>>(ref pos);
Vector256<byte> a = Avx2.And(input.AsByte(), transformColorInverseAlphaGreenMask256);
Vector256<short> b = Avx2.ShuffleLow(a.AsInt16(), SimdUtils.Shuffle.MMShuffle2200);
Vector256<short> c = Avx2.ShuffleHigh(b.AsInt16(), SimdUtils.Shuffle.MMShuffle2200);
Vector256<short> d = Avx2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
Vector256<byte> e = Avx2.Add(input.AsByte(), d.AsByte());
Vector256<short> f = Avx2.ShiftLeftLogical(e.AsInt16(), 8);
Vector256<short> g = Avx2.MultiplyHigh(f, multsb2.AsInt16());
Vector256<int> h = Avx2.ShiftRightLogical(g.AsInt32(), 8);
Vector256<byte> i = Avx2.Add(h.AsByte(), f.AsByte());
Vector256<short> j = Avx2.ShiftRightLogical(i.AsInt16(), 8);
Vector256<byte> output = Avx2.Or(j.AsByte(), a);
Unsafe.As<uint, Vector256<uint>>(ref pos) = output.AsUInt32();
}
if (idx != (uint)pixelData.Length)
{
TransformColorInverseScalar(m, pixelData[(int)idx..]);
}
}
else if (Vector128.IsHardwareAccelerated && pixelData.Length >= 4)
{
Vector128<byte> transformColorInverseAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
Vector128<int> multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue));
Vector128<int> multsb2 = MkCst16(Cst5b(m.RedToBlue), 0);
nuint idx;
for (idx = 0; idx <= (uint)pixelData.Length - 4; idx += 4)
{
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx);
Vector128<uint> input = Unsafe.As<uint, Vector128<uint>>(ref pos);
Vector128<byte> a = input.AsByte() & transformColorInverseAlphaGreenMask;
Vector128<short> b = Vector128_.ShuffleLow(a.AsInt16(), SimdUtils.Shuffle.MMShuffle2200);
Vector128<short> c = Vector128_.ShuffleHigh(b.AsInt16(), SimdUtils.Shuffle.MMShuffle2200);
Vector128<short> d = Vector128_.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
Vector128<byte> e = input.AsByte() + d.AsByte();
Vector128<short> f = Vector128_.ShiftLeftLogical(e.AsInt16(), 8);
Vector128<short> g = Vector128_.MultiplyHigh(f, multsb2.AsInt16());
Vector128<int> h = Vector128.ShiftRightLogical(g.AsInt32(), 8);
Vector128<byte> i = h.AsByte() + f.AsByte();
Vector128<short> j = Vector128.ShiftRightLogical(i.AsInt16(), 8);
Vector128<byte> output = j.AsByte() | a;
Unsafe.As<uint, Vector128<uint>>(ref pos) = output.AsUInt32();
}
if (idx != (uint)pixelData.Length)
{
TransformColorInverseScalar(m, pixelData[(int)idx..]);
}
}
else
{
TransformColorInverseScalar(m, pixelData);
}
}
private static void TransformColorInverseScalar(Vp8LMultipliers m, Span<uint> pixelData)
{
for (int i = 0; i < pixelData.Length; i++)
{
uint argb = pixelData[i];
sbyte green = (sbyte)(argb >> 8);
uint red = argb >> 16;
int newRed = (int)(red & 0xff);
int newBlue = (int)argb & 0xff;
newRed += ColorTransformDelta((sbyte)m.GreenToRed, green);
newRed &= 0xff;
newBlue += ColorTransformDelta((sbyte)m.GreenToBlue, green);
newBlue += ColorTransformDelta((sbyte)m.RedToBlue, (sbyte)newRed);
newBlue &= 0xff;
pixelData[i] = (argb & 0xff00ff00u) | ((uint)newRed << 16) | (uint)newBlue;
}
}
/// <summary>
/// This will reverse the predictor transform.
/// The predictor transform can be used to reduce entropy by exploiting the fact that neighboring pixels are often correlated.
/// In the predictor transform, the current pixel value is predicted from the pixels already decoded (in scan-line order) and only the residual value (actual - predicted) is encoded.
/// The prediction mode determines the type of prediction to use. The image is divided into squares and all the pixels in a square use same prediction mode.
/// </summary>
/// <param name="transform">The transform data.</param>
/// <param name="pixelData">The pixel data to apply the inverse transform.</param>
/// <param name="outputSpan">The resulting pixel data with the reversed transformation data.</param>
public static void PredictorInverseTransform(
Vp8LTransform transform,
Span<uint> pixelData,
Span<uint> outputSpan)
{
fixed (uint* inputFixed = pixelData)
{
fixed (uint* outputFixed = outputSpan)
{
uint* input = inputFixed;
uint* output = outputFixed;
int width = transform.XSize;
Span<uint> transformData = transform.Data.GetSpan();
// First Row follows the L (mode=1) mode.
PredictorAdd0(input, 1, output);
PredictorAdd1(input + 1, width - 1, output + 1);
input += width;
output += width;
int y = 1;
int yEnd = transform.YSize;
int tileWidth = 1 << transform.Bits;
int mask = tileWidth - 1;
int tilesPerRow = SubSampleSize(width, transform.Bits);
int predictorModeIdxBase = (y >> transform.Bits) * tilesPerRow;
Span<short> scratch = stackalloc short[8];
while (y < yEnd)
{
int predictorModeIdx = predictorModeIdxBase;
int x = 1;
// First pixel follows the T (mode=2) mode.
PredictorAdd2(input, output - width, 1, output);
// .. the rest:
while (x < width)
{
uint predictorMode = (transformData[predictorModeIdx++] >> 8) & 0xf;
int xEnd = (x & ~mask) + tileWidth;
if (xEnd > width)
{
xEnd = width;
}
// There are 14 different prediction modes.
// In each prediction mode, the current pixel value is predicted from one
// or more neighboring pixels whose values are already known.
switch (predictorMode)
{
case 0:
PredictorAdd0(input + x, xEnd - x, output + x);
break;
case 1:
PredictorAdd1(input + x, xEnd - x, output + x);
break;
case 2:
PredictorAdd2(input + x, output + x - width, xEnd - x, output + x);
break;
case 3:
PredictorAdd3(input + x, output + x - width, xEnd - x, output + x);
break;
case 4:
PredictorAdd4(input + x, output + x - width, xEnd - x, output + x);
break;
case 5:
PredictorAdd5(input + x, output + x - width, xEnd - x, output + x);
break;
case 6:
PredictorAdd6(input + x, output + x - width, xEnd - x, output + x);
break;
case 7:
PredictorAdd7(input + x, output + x - width, xEnd - x, output + x);
break;
case 8:
PredictorAdd8(input + x, output + x - width, xEnd - x, output + x);
break;
case 9:
PredictorAdd9(input + x, output + x - width, xEnd - x, output + x);
break;
case 10:
PredictorAdd10(input + x, output + x - width, xEnd - x, output + x);
break;
case 11:
PredictorAdd11(input + x, output + x - width, xEnd - x, output + x, scratch);
break;
case 12:
PredictorAdd12(input + x, output + x - width, xEnd - x, output + x);
break;
case 13:
PredictorAdd13(input + x, output + x - width, xEnd - x, output + x);
break;
}
x = xEnd;
}
input += width;
output += width;
y++;
if ((y & mask) == 0)
{
// Use the same mask, since tiles are squares.
predictorModeIdxBase += tilesPerRow;
}
}
}
}
outputSpan.CopyTo(pixelData);
}
public static void ExpandColorMap(int numColors, Span<uint> transformData, Span<uint> newColorMap)
{
newColorMap[0] = transformData[0];
Span<byte> data = MemoryMarshal.Cast<uint, byte>(transformData);
Span<byte> newData = MemoryMarshal.Cast<uint, byte>(newColorMap);
int numColorsX4 = 4 * numColors;
int i;
for (i = 4; i < numColorsX4; i++)
{
// Equivalent to AddPixelEq(), on a byte-basis.
newData[i] = (byte)((data[i] + newData[i - 4]) & 0xff);
}
int colorMapLength4 = 4 * newColorMap.Length;
for (; i < colorMapLength4; i++)
{
newData[i] = 0; // black tail.
}
}
/// <summary>
/// Difference of each component, mod 256.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static uint SubPixels(uint a, uint b)
{
uint alphaAndGreen = 0x00ff00ffu + (a & 0xff00ff00u) - (b & 0xff00ff00u);
uint redAndBlue = 0xff00ff00u + (a & 0x00ff00ffu) - (b & 0x00ff00ffu);
return (alphaAndGreen & 0xff00ff00u) | (redAndBlue & 0x00ff00ffu);
}
/// <summary>
/// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
/// </summary>
public static void BundleColorMap(Span<byte> row, int width, int xBits, Span<uint> dst)
{
int x;
if (xBits > 0)
{
int bitDepth = 1 << (3 - xBits);
int mask = (1 << xBits) - 1;
uint code = 0xff000000;
for (x = 0; x < width; x++)
{
int xsub = x & mask;
if (xsub == 0)
{
code = 0xff000000;
}
code |= (uint)(row[x] << (8 + (bitDepth * xsub)));
dst[x >> xBits] = code;
}
}
else
{
for (x = 0; x < width; x++)
{
dst[x] = (uint)(0xff000000 | (row[x] << 8));
}
}
}
/// <summary>
/// Compute the combined Shanon's entropy for distribution {X} and {X+Y}.
/// </summary>
/// <returns>Shanon entropy.</returns>
public static float CombinedShannonEntropy(Span<int> x, Span<int> y)
{
if (Avx2.IsSupported)
{
double retVal = 0.0d;
Vector256<int> tmp = Vector256<int>.Zero; // has the size of the scratch space of sizeof(int) * 8
ref int xRef = ref MemoryMarshal.GetReference(x);
ref int yRef = ref MemoryMarshal.GetReference(y);
Vector256<int> sumXY256 = Vector256<int>.Zero;
Vector256<int> sumX256 = Vector256<int>.Zero;
ref int tmpRef = ref Unsafe.As<Vector256<int>, int>(ref tmp);
for (nuint i = 0; i < 256; i += 8)
{
Vector256<int> xVec = Unsafe.As<int, Vector256<int>>(ref Unsafe.Add(ref xRef, i));
Vector256<int> yVec = Unsafe.As<int, Vector256<int>>(ref Unsafe.Add(ref yRef, i));
// Check if any X is non-zero: this actually provides a speedup as X is usually sparse.
int mask = Avx2.MoveMask(Avx2.CompareEqual(xVec, Vector256<int>.Zero).AsByte());
if (mask != -1)
{
Vector256<int> xy256 = Avx2.Add(xVec, yVec);
sumXY256 = Avx2.Add(sumXY256, xy256);
sumX256 = Avx2.Add(sumX256, xVec);
// Analyze the different X + Y.
Unsafe.As<int, Vector256<int>>(ref tmpRef) = xy256;
if (tmpRef != 0)
{
retVal -= FastSLog2((uint)tmpRef);
if (Unsafe.Add(ref xRef, i) != 0)
{
retVal -= FastSLog2((uint)Unsafe.Add(ref xRef, i));
}
}
if (Unsafe.Add(ref tmpRef, 1) != 0)
{
retVal -= FastSLog2((uint)Unsafe.Add(ref tmpRef, 1));
if (Unsafe.Add(ref xRef, i + 1) != 0)
{
retVal -= FastSLog2((uint)Unsafe.Add(ref xRef, i + 1));
}
}
if (Unsafe.Add(ref tmpRef, 2) != 0)
{
retVal -= FastSLog2((uint)Unsafe.Add(ref tmpRef, 2));
if (Unsafe.Add(ref xRef, i + 2) != 0)
{
retVal -= FastSLog2((uint)Unsafe.Add(ref xRef, i + 2));
}
}
if (Unsafe.Add(ref tmpRef, 3) != 0)
{
retVal -= FastSLog2((uint)Unsafe.Add(ref tmpRef, 3));
if (Unsafe.Add(ref xRef, i + 3) != 0)
{
retVal -= FastSLog2((uint)Unsafe.Add(ref xRef, i + 3));
}
}
if (Unsafe.Add(ref tmpRef, 4) != 0)
{
retVal -= FastSLog2((uint)Unsafe.Add(ref tmpRef, 4));
if (Unsafe.Add(ref xRef, i + 4) != 0)
{
retVal -= FastSLog2((uint)Unsafe.Add(ref xRef, i + 4));
}
}
if (Unsafe.Add(ref tmpRef, 5) != 0)
{
retVal -= FastSLog2((uint)Unsafe.Add(ref tmpRef, 5));
if (Unsafe.Add(ref xRef, i + 5) != 0)
{
retVal -= FastSLog2((uint)Unsafe.Add(ref xRef, i + 5));
}
}
if (Unsafe.Add(ref tmpRef, 6) != 0)
{
retVal -= FastSLog2((uint)Unsafe.Add(ref tmpRef, 6));
if (Unsafe.Add(ref xRef, i + 6) != 0)
{
retVal -= FastSLog2((uint)Unsafe.Add(ref xRef, i + 6));
}
}
if (Unsafe.Add(ref tmpRef, 7) != 0)
{
retVal -= FastSLog2((uint)Unsafe.Add(ref tmpRef, 7));
if (Unsafe.Add(ref xRef, i + 7) != 0)
{
retVal -= FastSLog2((uint)Unsafe.Add(ref xRef, i + 7));
}
}
}
else
{
// X is fully 0, so only deal with Y.
sumXY256 = Avx2.Add(sumXY256, yVec);
if (Unsafe.Add(ref yRef, i) != 0)
{
retVal -= FastSLog2((uint)Unsafe.Add(ref yRef, i));
}
if (Unsafe.Add(ref yRef, i + 1) != 0)
{
retVal -= FastSLog2((uint)Unsafe.Add(ref yRef, i + 1));
}
if (Unsafe.Add(ref yRef, i + 2) != 0)
{
retVal -= FastSLog2((uint)Unsafe.Add(ref yRef, i + 2));
}
if (Unsafe.Add(ref yRef, i + 3) != 0)
{
retVal -= FastSLog2((uint)Unsafe.Add(ref yRef, i + 3));
}
if (Unsafe.Add(ref yRef, i + 4) != 0)
{
retVal -= FastSLog2((uint)Unsafe.Add(ref yRef, i + 4));
}
if (Unsafe.Add(ref yRef, i + 5) != 0)
{
retVal -= FastSLog2((uint)Unsafe.Add(ref yRef, i + 5));
}
if (Unsafe.Add(ref yRef, i + 6) != 0)
{
retVal -= FastSLog2((uint)Unsafe.Add(ref yRef, i + 6));
}
if (Unsafe.Add(ref yRef, i + 7) != 0)
{
retVal -= FastSLog2((uint)Unsafe.Add(ref yRef, i + 7));
}
}
}
// Sum up sumX256 to get sumX and sum up sumXY256 to get sumXY.
int sumX = Numerics.ReduceSum(sumX256);
int sumXY = Numerics.ReduceSum(sumXY256);
retVal += FastSLog2((uint)sumX) + FastSLog2((uint)sumXY);
return (float)retVal;
}
else
{
double retVal = 0.0d;
uint sumX = 0, sumXY = 0;
for (int i = 0; i < 256; i++)
{
uint xi = (uint)x[i];
if (xi != 0)
{
uint xy = xi + (uint)y[i];
sumX += xi;
retVal -= FastSLog2(xi);
sumXY += xy;
retVal -= FastSLog2(xy);
}
else if (y[i] != 0)
{
sumXY += (uint)y[i];
retVal -= FastSLog2((uint)y[i]);
}
}
retVal += FastSLog2(sumX) + FastSLog2(sumXY);
return (float)retVal;
}
}
[MethodImpl(InliningOptions.ShortMethod)]
public static byte TransformColorRed(sbyte greenToRed, uint argb)
{
sbyte green = U32ToS8(argb >> 8);
int newRed = (int)(argb >> 16);
newRed -= ColorTransformDelta(greenToRed, green);
return (byte)(newRed & 0xff);
}
[MethodImpl(InliningOptions.ShortMethod)]
public static byte TransformColorBlue(sbyte greenToBlue, sbyte redToBlue, uint argb)
{
sbyte green = U32ToS8(argb >> 8);
sbyte red = U32ToS8(argb >> 16);
int newBlue = (int)(argb & 0xff);
newBlue -= ColorTransformDelta(greenToBlue, green);
newBlue -= ColorTransformDelta(redToBlue, red);
return (byte)(newBlue & 0xff);
}
/// <summary>
/// Fast calculation of log2(v) for integer input.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static float FastLog2(uint v) => v < LogLookupIdxMax ? WebpLookupTables.Log2Table[v] : FastLog2Slow(v);
/// <summary>
/// Fast calculation of v * log2(v) for integer input.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static float FastSLog2(uint v) => v < LogLookupIdxMax ? WebpLookupTables.SLog2Table[v] : FastSLog2Slow(v);
[MethodImpl(InliningOptions.ShortMethod)]
public static void ColorCodeToMultipliers(uint colorCode, ref Vp8LMultipliers m)
{
m.GreenToRed = (byte)(colorCode & 0xff);
m.GreenToBlue = (byte)((colorCode >> 8) & 0xff);
m.RedToBlue = (byte)((colorCode >> 16) & 0xff);
}
// Converts near lossless quality into max number of bits shaved off.
// 100 -> 0
// 80..99 -> 1
// 60..79 -> 2
// 40..59 -> 3
// 20..39 -> 4
// 0..19 -> 5
[MethodImpl(InliningOptions.ShortMethod)]
public static int NearLosslessBits(int nearLosslessQuality) => 5 - (nearLosslessQuality / 20);
private static float FastSLog2Slow(uint v)
{
DebugGuard.MustBeGreaterThanOrEqualTo<uint>(v, LogLookupIdxMax, nameof(v));
if (v < ApproxLogWithCorrectionMax)
{
int logCnt = 0;
uint y = 1;
float vF = v;
uint origV = v;
do
{
++logCnt;
v >>= 1;
y <<= 1;
}
while (v >= LogLookupIdxMax);
// vf = (2^log_cnt) * Xf; where y = 2^log_cnt and Xf < 256
// Xf = floor(Xf) * (1 + (v % y) / v)
// log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v)
// The correction factor: log(1 + d) ~ d; for very small d values, so
// log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v
// LOG_2_RECIPROCAL ~ 23/16
int correction = (int)((23 * (origV & (y - 1))) >> 4);
return (vF * (WebpLookupTables.Log2Table[v] + logCnt)) + correction;
}
return (float)(Log2Reciprocal * v * Math.Log(v));
}
private static float FastLog2Slow(uint v)
{
DebugGuard.MustBeGreaterThanOrEqualTo<uint>(v, LogLookupIdxMax, nameof(v));
if (v < ApproxLogWithCorrectionMax)
{
int logCnt = 0;
uint y = 1;
uint origV = v;
do
{
++logCnt;
v >>= 1;
y <<= 1;
}
while (v >= LogLookupIdxMax);
double log2 = WebpLookupTables.Log2Table[v] + logCnt;
if (origV >= ApproxLogMax)
{
// Since the division is still expensive, add this correction factor only
// for large values of 'v'.
int correction = (int)(23 * (origV & (y - 1))) >> 4;
log2 += (double)correction / origV;
}
return (float)log2;
}
return (float)(Log2Reciprocal * Math.Log(v));
}
/// <summary>
/// Splitting of distance and length codes into prefixes and
/// extra bits. The prefixes are encoded with an entropy code
/// while the extra bits are stored just as normal bits.
/// </summary>
private static int PrefixEncodeBitsNoLut(int distance, ref int extraBits)
{
int highestBit = BitOperations.Log2((uint)--distance);
int secondHighestBit = (distance >> (highestBit - 1)) & 1;
extraBits = highestBit - 1;
int code = (2 * highestBit) + secondHighestBit;
return code;
}
private static int PrefixEncodeNoLut(int distance, ref int extraBits, ref int extraBitsValue)
{
int highestBit = BitOperations.Log2((uint)--distance);
int secondHighestBit = (distance >> (highestBit - 1)) & 1;
extraBits = highestBit - 1;
extraBitsValue = distance & ((1 << extraBits) - 1);
int code = (2 * highestBit) + secondHighestBit;
return code;
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void PredictorAdd0(uint* input, int numberOfPixels, uint* output)
{
for (int x = 0; x < numberOfPixels; x++)
{
output[x] = AddPixels(input[x], WebpConstants.ArgbBlack);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void PredictorAdd1(uint* input, int numberOfPixels, uint* output)
{
uint left = output[-1];
for (int x = 0; x < numberOfPixels; x++)
{
output[x] = left = AddPixels(input[x], left);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void PredictorAdd2(uint* input, uint* upper, int numberOfPixels, uint* output)
{
for (int x = 0; x < numberOfPixels; x++)
{
uint pred = Predictor2(output[x - 1], upper + x);
output[x] = AddPixels(input[x], pred);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void PredictorAdd3(uint* input, uint* upper, int numberOfPixels, uint* output)
{
for (int x = 0; x < numberOfPixels; x++)
{
uint pred = Predictor3(output[x - 1], upper + x);
output[x] = AddPixels(input[x], pred);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void PredictorAdd4(uint* input, uint* upper, int numberOfPixels, uint* output)
{
for (int x = 0; x < numberOfPixels; x++)
{
uint pred = Predictor4(output[x - 1], upper + x);
output[x] = AddPixels(input[x], pred);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void PredictorAdd5(uint* input, uint* upper, int numberOfPixels, uint* output)
{
for (int x = 0; x < numberOfPixels; x++)
{
uint pred = Predictor5(output[x - 1], upper + x);
output[x] = AddPixels(input[x], pred);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void PredictorAdd6(uint* input, uint* upper, int numberOfPixels, uint* output)
{
for (int x = 0; x < numberOfPixels; x++)
{
uint pred = Predictor6(output[x - 1], upper + x);
output[x] = AddPixels(input[x], pred);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void PredictorAdd7(uint* input, uint* upper, int numberOfPixels, uint* output)
{
for (int x = 0; x < numberOfPixels; x++)
{
uint pred = Predictor7(output[x - 1], upper + x);
output[x] = AddPixels(input[x], pred);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void PredictorAdd8(uint* input, uint* upper, int numberOfPixels, uint* output)
{
for (int x = 0; x < numberOfPixels; x++)
{
uint pred = Predictor8(output[x - 1], upper + x);
output[x] = AddPixels(input[x], pred);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void PredictorAdd9(uint* input, uint* upper, int numberOfPixels, uint* output)
{
for (int x = 0; x < numberOfPixels; x++)
{
uint pred = Predictor9(output[x - 1], upper + x);
output[x] = AddPixels(input[x], pred);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void PredictorAdd10(uint* input, uint* upper, int numberOfPixels, uint* output)
{
for (int x = 0; x < numberOfPixels; x++)
{
uint pred = Predictor10(output[x - 1], upper + x);
output[x] = AddPixels(input[x], pred);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void PredictorAdd11(uint* input, uint* upper, int numberOfPixels, uint* output, Span<short> scratch)
{
for (int x = 0; x < numberOfPixels; x++)
{
uint pred = Predictor11(output[x - 1], upper + x, scratch);
output[x] = AddPixels(input[x], pred);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void PredictorAdd12(uint* input, uint* upper, int numberOfPixels, uint* output)
{
for (int x = 0; x < numberOfPixels; x++)
{
uint pred = Predictor12(output[x - 1], upper + x);
output[x] = AddPixels(input[x], pred);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void PredictorAdd13(uint* input, uint* upper, int numberOfPixels, uint* output)
{
for (int x = 0; x < numberOfPixels; x++)
{
uint pred = Predictor13(output[x - 1], upper + x);
output[x] = AddPixels(input[x], pred);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
public static uint Predictor2(uint left, uint* top) => top[0];
[MethodImpl(InliningOptions.ShortMethod)]
public static uint Predictor3(uint left, uint* top) => top[1];
[MethodImpl(InliningOptions.ShortMethod)]
public static uint Predictor4(uint left, uint* top) => top[-1];
[MethodImpl(InliningOptions.ShortMethod)]
public static uint Predictor5(uint left, uint* top) => Average3(left, top[0], top[1]);
[MethodImpl(InliningOptions.ShortMethod)]
public static uint Predictor6(uint left, uint* top) => Average2(left, top[-1]);
[MethodImpl(InliningOptions.ShortMethod)]
public static uint Predictor7(uint left, uint* top) => Average2(left, top[0]);
[MethodImpl(InliningOptions.ShortMethod)]
public static uint Predictor8(uint left, uint* top) => Average2(top[-1], top[0]);
[MethodImpl(InliningOptions.ShortMethod)]
public static uint Predictor9(uint left, uint* top) => Average2(top[0], top[1]);
[MethodImpl(InliningOptions.ShortMethod)]
public static uint Predictor10(uint left, uint* top) => Average4(left, top[-1], top[0], top[1]);
[MethodImpl(InliningOptions.ShortMethod)]
public static uint Predictor11(uint left, uint* top, Span<short> scratch) => Select(top[0], left, top[-1], scratch);
[MethodImpl(InliningOptions.ShortMethod)]
public static uint Predictor12(uint left, uint* top) => ClampedAddSubtractFull(left, top[0], top[-1]);
[MethodImpl(InliningOptions.ShortMethod)]
public static uint Predictor13(uint left, uint* top) => ClampedAddSubtractHalf(left, top[0], top[-1]);
[MethodImpl(InliningOptions.ShortMethod)]
public static void PredictorSub0(uint* input, int numPixels, uint* output)
{
for (int i = 0; i < numPixels; i++)
{
output[i] = SubPixels(input[i], WebpConstants.ArgbBlack);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
public static void PredictorSub1(uint* input, int numPixels, uint* output)
{
for (int i = 0; i < numPixels; i++)
{
output[i] = SubPixels(input[i], input[i - 1]);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
public static void PredictorSub2(uint* input, uint* upper, int numPixels, uint* output)
{
for (int x = 0; x < numPixels; x++)
{
uint pred = Predictor2(input[x - 1], upper + x);
output[x] = SubPixels(input[x], pred);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
public static void PredictorSub3(uint* input, uint* upper, int numPixels, uint* output)
{
for (int x = 0; x < numPixels; x++)
{
uint pred = Predictor3(input[x - 1], upper + x);
output[x] = SubPixels(input[x], pred);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
public static void PredictorSub4(uint* input, uint* upper, int numPixels, uint* output)
{
for (int x = 0; x < numPixels; x++)
{
uint pred = Predictor4(input[x - 1], upper + x);
output[x] = SubPixels(input[x], pred);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
public static void PredictorSub5(uint* input, uint* upper, int numPixels, uint* output)
{
for (int x = 0; x < numPixels; x++)
{
uint pred = Predictor5(input[x - 1], upper + x);
output[x] = SubPixels(input[x], pred);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
public static void PredictorSub6(uint* input, uint* upper, int numPixels, uint* output)
{
for (int x = 0; x < numPixels; x++)
{
uint pred = Predictor6(input[x - 1], upper + x);
output[x] = SubPixels(input[x], pred);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
public static void PredictorSub7(uint* input, uint* upper, int numPixels, uint* output)
{
for (int x = 0; x < numPixels; x++)
{
uint pred = Predictor7(input[x - 1], upper + x);
output[x] = SubPixels(input[x], pred);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
public static void PredictorSub8(uint* input, uint* upper, int numPixels, uint* output)
{
for (int x = 0; x < numPixels; x++)
{
uint pred = Predictor8(input[x - 1], upper + x);
output[x] = SubPixels(input[x], pred);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
public static void PredictorSub9(uint* input, uint* upper, int numPixels, uint* output)
{
for (int x = 0; x < numPixels; x++)
{
uint pred = Predictor9(input[x - 1], upper + x);
output[x] = SubPixels(input[x], pred);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
public static void PredictorSub10(uint* input, uint* upper, int numPixels, uint* output)
{
for (int x = 0; x < numPixels; x++)
{
uint pred = Predictor10(input[x - 1], upper + x);
output[x] = SubPixels(input[x], pred);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
public static void PredictorSub11(uint* input, uint* upper, int numPixels, uint* output, Span<short> scratch)
{
for (int x = 0; x < numPixels; x++)
{
uint pred = Predictor11(input[x - 1], upper + x, scratch);
output[x] = SubPixels(input[x], pred);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
public static void PredictorSub12(uint* input, uint* upper, int numPixels, uint* output)
{
for (int x = 0; x < numPixels; x++)
{
uint pred = Predictor12(input[x - 1], upper + x);
output[x] = SubPixels(input[x], pred);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
public static void PredictorSub13(uint* input, uint* upper, int numPixels, uint* output)
{
for (int x = 0; x < numPixels; x++)
{
uint pred = Predictor13(input[x - 1], upper + x);
output[x] = SubPixels(input[x], pred);
}
}
/// <summary>
/// Computes sampled size of 'size' when sampling using 'sampling bits'.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static int SubSampleSize(int size, int samplingBits) => (size + (1 << samplingBits) - 1) >> samplingBits;
/// <summary>
/// Sum of each component, mod 256.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static uint AddPixels(uint a, uint b)
{
uint alphaAndGreen = (a & 0xff00ff00u) + (b & 0xff00ff00u);
uint redAndBlue = (a & 0x00ff00ffu) + (b & 0x00ff00ffu);
return (alphaAndGreen & 0xff00ff00u) | (redAndBlue & 0x00ff00ffu);
}
// For sign-extended multiplying constants, pre-shifted by 5:
[MethodImpl(InliningOptions.ShortMethod)]
public static short Cst5b(int x) => (short)(((short)(x << 8)) >> 5);
private static uint ClampedAddSubtractFull(uint c0, uint c1, uint c2)
{
if (Vector128.IsHardwareAccelerated)
{
Vector128<byte> c0Vec = Vector128_.UnpackLow(Vector128.CreateScalar(c0).AsByte(), Vector128<byte>.Zero);
Vector128<byte> c1Vec = Vector128_.UnpackLow(Vector128.CreateScalar(c1).AsByte(), Vector128<byte>.Zero);
Vector128<byte> c2Vec = Vector128_.UnpackLow(Vector128.CreateScalar(c2).AsByte(), Vector128<byte>.Zero);
Vector128<short> v1 = c0Vec.AsInt16() + c1Vec.AsInt16();
Vector128<short> v2 = v1 - c2Vec.AsInt16();
Vector128<byte> b = Vector128_.PackUnsignedSaturate(v2, v2);
return b.AsUInt32().ToScalar();
}
{
int a = AddSubtractComponentFull(
(int)(c0 >> 24),
(int)(c1 >> 24),
(int)(c2 >> 24));
int r = AddSubtractComponentFull(
(int)((c0 >> 16) & 0xff),
(int)((c1 >> 16) & 0xff),
(int)((c2 >> 16) & 0xff));
int g = AddSubtractComponentFull(
(int)((c0 >> 8) & 0xff),
(int)((c1 >> 8) & 0xff),
(int)((c2 >> 8) & 0xff));
int b = AddSubtractComponentFull((int)(c0 & 0xff), (int)(c1 & 0xff), (int)(c2 & 0xff));
return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b;
}
}
private static uint ClampedAddSubtractHalf(uint c0, uint c1, uint c2)
{
if (Vector128.IsHardwareAccelerated)
{
Vector128<byte> c0Vec = Vector128_.UnpackLow(Vector128.CreateScalar(c0).AsByte(), Vector128<byte>.Zero);
Vector128<byte> c1Vec = Vector128_.UnpackLow(Vector128.CreateScalar(c1).AsByte(), Vector128<byte>.Zero);
Vector128<byte> b0 = Vector128_.UnpackLow(Vector128.CreateScalar(c2).AsByte(), Vector128<byte>.Zero);
Vector128<short> avg = c1Vec.AsInt16() + c0Vec.AsInt16();
Vector128<short> a0 = Vector128.ShiftRightLogical(avg, 1);
Vector128<short> a1 = a0 - b0.AsInt16();
Vector128<short> bgta = Vector128.GreaterThan(b0.AsInt16(), a0.AsInt16());
Vector128<short> a2 = a1 - bgta;
Vector128<short> a3 = Vector128.ShiftRightArithmetic(a2, 1);
Vector128<short> a4 = (a0 + a3).AsInt16();
Vector128<byte> a5 = Vector128_.PackUnsignedSaturate(a4, a4);
return a5.AsUInt32().ToScalar();
}
{
uint ave = Average2(c0, c1);
int a = AddSubtractComponentHalf((int)(ave >> 24), (int)(c2 >> 24));
int r = AddSubtractComponentHalf((int)((ave >> 16) & 0xff), (int)((c2 >> 16) & 0xff));
int g = AddSubtractComponentHalf((int)((ave >> 8) & 0xff), (int)((c2 >> 8) & 0xff));
int b = AddSubtractComponentHalf((int)(ave & 0xff), (int)(c2 & 0xff));
return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b;
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static int AddSubtractComponentHalf(int a, int b) => (int)Clip255((uint)(a + ((a - b) / 2)));
[MethodImpl(InliningOptions.ShortMethod)]
private static int AddSubtractComponentFull(int a, int b, int c) => (int)Clip255((uint)(a + b - c));
[MethodImpl(InliningOptions.ShortMethod)]
private static uint Clip255(uint a) => a < 256 ? a : ~a >> 24;
[MethodImpl(InliningOptions.ShortMethod)]
private static Vector128<int> MkCst16(int hi, int lo) => Vector128.Create((hi << 16) | (lo & 0xffff));
[MethodImpl(InliningOptions.ShortMethod)]
private static Vector256<int> MkCst32(int hi, int lo) => Vector256.Create((hi << 16) | (lo & 0xffff));
private static uint Select(uint a, uint b, uint c, Span<short> scratch)
{
if (Vector128.IsHardwareAccelerated)
{
fixed (short* ptr = &MemoryMarshal.GetReference(scratch))
{
Vector128<byte> a0 = Vector128.CreateScalar(a).AsByte();
Vector128<byte> b0 = Vector128.CreateScalar(b).AsByte();
Vector128<byte> c0 = Vector128.CreateScalar(c).AsByte();
Vector128<byte> ac0 = Vector128_.SubtractSaturate(a0, c0);
Vector128<byte> ca0 = Vector128_.SubtractSaturate(c0, a0);
Vector128<byte> bc0 = Vector128_.SubtractSaturate(b0, c0);
Vector128<byte> cb0 = Vector128_.SubtractSaturate(c0, b0);
Vector128<byte> ac = ac0 | ca0;
Vector128<byte> bc = bc0 | cb0;
Vector128<byte> pa = Vector128_.UnpackLow(ac, Vector128<byte>.Zero); // |a - c|
Vector128<byte> pb = Vector128_.UnpackLow(bc, Vector128<byte>.Zero); // |b - c|
Vector128<ushort> diff = pb.AsUInt16() - pa.AsUInt16();
diff.Store((ushort*)ptr);
int paMinusPb = ptr[3] + ptr[2] + ptr[1] + ptr[0];
return (paMinusPb <= 0) ? a : b;
}
}
else
{
int paMinusPb =
Sub3((int)(a >> 24), (int)(b >> 24), (int)(c >> 24)) +
Sub3((int)((a >> 16) & 0xff), (int)((b >> 16) & 0xff), (int)((c >> 16) & 0xff)) +
Sub3((int)((a >> 8) & 0xff), (int)((b >> 8) & 0xff), (int)((c >> 8) & 0xff)) +
Sub3((int)(a & 0xff), (int)(b & 0xff), (int)(c & 0xff));
return paMinusPb <= 0 ? a : b;
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static int Sub3(int a, int b, int c)
{
int pb = b - c;
int pa = a - c;
return Math.Abs(pb) - Math.Abs(pa);
}
[MethodImpl(InliningOptions.ShortMethod)]
private static uint Average2(uint a0, uint a1) => (((a0 ^ a1) & 0xfefefefeu) >> 1) + (a0 & a1);
[MethodImpl(InliningOptions.ShortMethod)]
private static uint Average3(uint a0, uint a1, uint a2) => Average2(Average2(a0, a2), a1);
[MethodImpl(InliningOptions.ShortMethod)]
private static uint Average4(uint a0, uint a1, uint a2, uint a3) => Average2(Average2(a0, a1), Average2(a2, a3));
[MethodImpl(InliningOptions.ShortMethod)]
private static uint GetArgbIndex(uint idx) => (idx >> 8) & 0xff;
[MethodImpl(InliningOptions.ShortMethod)]
private static int ColorTransformDelta(sbyte colorPred, sbyte color) => (colorPred * color) >> 5;
[MethodImpl(InliningOptions.ShortMethod)]
private static sbyte U32ToS8(uint v) => (sbyte)(v & 0xff);
}