Browse Source

Add sse2 version of select

pull/1804/head
Brian Popow 4 years ago
parent
commit
35d2afa0bb
  1. 60
      src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs
  2. 27
      src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs

60
src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs

@ -27,6 +27,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private const double Log2Reciprocal = 1.44269504088896338700465094007086;
#if SUPPORTS_RUNTIME_INTRINSICS
private static readonly Vector128<byte> Zero = Vector128.Create(0).AsByte();
#endif
/// <summary>
/// Returns the exact index where array1 and array2 are different. For an index
/// inferior or equal to bestLenMatch, the return value just has to be strictly
@ -551,6 +555,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int mask = tileWidth - 1;
int tilesPerRow = SubSampleSize(width, transform.Bits);
int predictorModeIdxBase = (y >> transform.Bits) * tilesPerRow;
Span<short> scratch = stackalloc short[8];
while (y < yEnd)
{
int predictorModeIdx = predictorModeIdxBase;
@ -608,7 +613,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
PredictorAdd10(input + x, output + x - width, xEnd - x, output + x);
break;
case 11:
PredictorAdd11(input + x, output + x - width, xEnd - x, output + x);
PredictorAdd11(input + x, output + x - width, xEnd - x, output + x, scratch);
break;
case 12:
PredictorAdd12(input + x, output + x - width, xEnd - x, output + x);
@ -974,11 +979,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void PredictorAdd11(uint* input, uint* upper, int numberOfPixels, uint* output)
private static void PredictorAdd11(uint* input, uint* upper, int numberOfPixels, uint* output, Span<short> scratch)
{
for (int x = 0; x < numberOfPixels; x++)
{
uint pred = Predictor11(output[x - 1], upper + x);
uint pred = Predictor11(output[x - 1], upper + x, scratch);
output[x] = AddPixels(input[x], pred);
}
}
@ -1031,7 +1036,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
public static uint Predictor10(uint left, uint* top) => Average4(left, top[-1], top[0], top[1]);
[MethodImpl(InliningOptions.ShortMethod)]
public static uint Predictor11(uint left, uint* top) => Select(top[0], left, top[-1]);
public static uint Predictor11(uint left, uint* top, Span<short> scratch) => Select(top[0], left, top[-1], scratch);
[MethodImpl(InliningOptions.ShortMethod)]
public static uint Predictor12(uint left, uint* top) => ClampedAddSubtractFull(left, top[0], top[-1]);
@ -1148,11 +1153,11 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
[MethodImpl(InliningOptions.ShortMethod)]
public static void PredictorSub11(uint* input, uint* upper, int numPixels, uint* output)
public static void PredictorSub11(uint* input, uint* upper, int numPixels, uint* output, Span<short> scratch)
{
for (int x = 0; x < numPixels; x++)
{
uint pred = Predictor11(input[x - 1], upper + x);
uint pred = Predictor11(input[x - 1], upper + x, scratch);
output[x] = SubPixels(input[x], pred);
}
}
@ -1240,14 +1245,43 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private static Vector128<int> MkCst16(int hi, int lo) => Vector128.Create((hi << 16) | (lo & 0xffff));
#endif
private static uint Select(uint a, uint b, uint c)
private static uint Select(uint a, uint b, uint c, Span<short> scratch)
{
int paMinusPb =
Sub3((int)(a >> 24), (int)(b >> 24), (int)(c >> 24)) +
Sub3((int)((a >> 16) & 0xff), (int)((b >> 16) & 0xff), (int)((c >> 16) & 0xff)) +
Sub3((int)((a >> 8) & 0xff), (int)((b >> 8) & 0xff), (int)((c >> 8) & 0xff)) +
Sub3((int)(a & 0xff), (int)(b & 0xff), (int)(c & 0xff));
return paMinusPb <= 0 ? a : b;
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse2.IsSupported)
{
Span<short> output = scratch;
fixed (short* p = output)
{
Vector128<byte> a0 = Sse2.ConvertScalarToVector128UInt32(a).AsByte();
Vector128<byte> b0 = Sse2.ConvertScalarToVector128UInt32(b).AsByte();
Vector128<byte> c0 = Sse2.ConvertScalarToVector128UInt32(c).AsByte();
Vector128<byte> ac0 = Sse2.SubtractSaturate(a0, c0);
Vector128<byte> ca0 = Sse2.SubtractSaturate(c0, a0);
Vector128<byte> bc0 = Sse2.SubtractSaturate(b0, c0);
Vector128<byte> cb0 = Sse2.SubtractSaturate(c0, b0);
Vector128<byte> ac = Sse2.Or(ac0, ca0);
Vector128<byte> bc = Sse2.Or(bc0, cb0);
Vector128<byte> pa = Sse2.UnpackLow(ac, Zero); // |a - c|
Vector128<byte> pb = Sse2.UnpackLow(bc, Zero); // |b - c|
Vector128<ushort> diff = Sse2.Subtract(pb.AsUInt16(), pa.AsUInt16());
Sse2.Store((ushort*)p, diff);
}
int paMinusPb = output[0] + output[1] + output[2] + output[3];
return (paMinusPb <= 0) ? a : b;
}
else
#endif
{
int paMinusPb =
Sub3((int)(a >> 24), (int)(b >> 24), (int)(c >> 24)) +
Sub3((int)((a >> 16) & 0xff), (int)((b >> 16) & 0xff), (int)((c >> 16) & 0xff)) +
Sub3((int)((a >> 8) & 0xff), (int)((b >> 8) & 0xff), (int)((c >> 8) & 0xff)) +
Sub3((int)(a & 0xff), (int)(b & 0xff), (int)(c & 0xff));
return paMinusPb <= 0 ? a : b;
}
}
[MethodImpl(InliningOptions.ShortMethod)]

27
src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs

@ -50,6 +50,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int tilesPerRow = LosslessUtils.SubSampleSize(width, bits);
int tilesPerCol = LosslessUtils.SubSampleSize(height, bits);
int maxQuantization = 1 << LosslessUtils.NearLosslessBits(nearLosslessQuality);
Span<short> scratch = stackalloc short[8];
// TODO: Can we optimize this?
int[][] histo = new int[4][];
@ -84,7 +85,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
transparentColorMode,
usedSubtractGreen,
nearLossless,
image);
image,
scratch);
image[(tileY * tilesPerRow) + tileX] = (uint)(WebpConstants.ArgbBlack | (pred << 8));
}
@ -192,7 +194,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
WebpTransparentColorMode transparentColorMode,
bool usedSubtractGreen,
bool nearLossless,
Span<uint> modes)
Span<uint> modes,
Span<short> scratch)
{
const int numPredModes = 14;
int startX = tileX << bits;
@ -272,7 +275,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
}
}
GetResidual(width, height, upperRow, currentRow, maxDiffs, mode, startX, startX + maxX, y, maxQuantization, transparentColorMode, usedSubtractGreen, nearLossless, residuals);
GetResidual(width, height, upperRow, currentRow, maxDiffs, mode, startX, startX + maxX, y, maxQuantization, transparentColorMode, usedSubtractGreen, nearLossless, residuals, scratch);
for (int relativeX = 0; relativeX < maxX; ++relativeX)
{
UpdateHisto(histoArgb, residuals[relativeX]);
@ -333,11 +336,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
WebpTransparentColorMode transparentColorMode,
bool usedSubtractGreen,
bool nearLossless,
Span<uint> output)
Span<uint> output,
Span<short> scratch)
{
if (transparentColorMode == WebpTransparentColorMode.Preserve)
{
PredictBatch(mode, xStart, y, xEnd - xStart, currentRowSpan, upperRowSpan, output);
PredictBatch(mode, xStart, y, xEnd - xStart, currentRowSpan, upperRowSpan, output, scratch);
}
else
{
@ -395,7 +399,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
predict = LosslessUtils.Predictor10(currentRow[x - 1], upperRow + x);
break;
case 11:
predict = LosslessUtils.Predictor11(currentRow[x - 1], upperRow + x);
predict = LosslessUtils.Predictor11(currentRow[x - 1], upperRow + x, scratch);
break;
case 12:
predict = LosslessUtils.Predictor12(currentRow[x - 1], upperRow + x);
@ -583,6 +587,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
Span<byte> currentMaxDiffs = MemoryMarshal.Cast<uint, byte>(currentRow.Slice(width + 1));
Span<byte> lowerMaxDiffs = currentMaxDiffs.Slice(width);
Span<short> scratch = stackalloc short[8];
for (int y = 0; y < height; y++)
{
Span<uint> tmp32 = upperRow;
@ -593,7 +598,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
if (lowEffort)
{
PredictBatch(PredLowEffort, 0, y, width, currentRow, upperRow, argb.Slice(y * width));
PredictBatch(PredLowEffort, 0, y, width, currentRow, upperRow, argb.Slice(y * width), scratch);
}
else
{
@ -634,7 +639,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
transparentColorMode,
usedSubtractGreen,
nearLossless,
argb.Slice((y * width) + x));
argb.Slice((y * width) + x),
scratch);
x = xEnd;
}
@ -649,7 +655,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
int numPixels,
Span<uint> currentSpan,
Span<uint> upperSpan,
Span<uint> outputSpan)
Span<uint> outputSpan,
Span<short> scratch)
{
#pragma warning disable SA1503 // Braces should not be omitted
fixed (uint* current = currentSpan)
@ -718,7 +725,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
LosslessUtils.PredictorSub10(current + xStart, upper + xStart, numPixels, output);
break;
case 11:
LosslessUtils.PredictorSub11(current + xStart, upper + xStart, numPixels, output);
LosslessUtils.PredictorSub11(current + xStart, upper + xStart, numPixels, output, scratch);
break;
case 12:
LosslessUtils.PredictorSub12(current + xStart, upper + xStart, numPixels, output);

Loading…
Cancel
Save