Browse Source

Define sse and avx masks as static readonly

pull/1799/head
Brian Popow 5 years ago
parent
commit
15a10126d2
  1. 65
      src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs
  2. 43
      src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs
  3. 56
      src/ImageSharp/Formats/Webp/WebpCommonUtils.cs

65
src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs

@ -27,6 +27,30 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private const double Log2Reciprocal = 1.44269504088896338700465094007086; private const double Log2Reciprocal = 1.44269504088896338700465094007086;
#if SUPPORTS_RUNTIME_INTRINSICS
private static readonly Vector256<byte> AddGreenToBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255);
private static readonly Vector128<byte> AddGreenToBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255);
private static readonly byte AddGreenToBlueAndRedShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
private static readonly Vector256<byte> SubtractGreenFromBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255);
private static readonly Vector128<byte> SubtractGreenFromBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255);
private static readonly byte SubtractGreenFromBlueAndRedShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
private static readonly Vector128<byte> TransformColorAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
private static readonly Vector128<byte> TransformColorRedBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
private static readonly byte TransformColorShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
private static readonly Vector128<byte> TransformColorInverseAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
private static readonly byte TransformColorInverseShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
#endif
/// <summary> /// <summary>
/// Returns the exact index where array1 and array2 are different. For an index /// Returns the exact index where array1 and array2 are different. For an index
/// inferior or equal to bestLenMatch, the return value just has to be strictly /// inferior or equal to bestLenMatch, the return value just has to be strictly
@ -97,7 +121,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
#if SUPPORTS_RUNTIME_INTRINSICS #if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported) if (Avx2.IsSupported)
{ {
var mask = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255);
int numPixels = pixelData.Length; int numPixels = pixelData.Length;
fixed (uint* p = pixelData) fixed (uint* p = pixelData)
{ {
@ -106,7 +129,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
uint* idx = p + i; uint* idx = p + i;
Vector256<byte> input = Avx.LoadVector256((ushort*)idx).AsByte(); Vector256<byte> input = Avx.LoadVector256((ushort*)idx).AsByte();
Vector256<byte> in0g0g = Avx2.Shuffle(input, mask); Vector256<byte> in0g0g = Avx2.Shuffle(input, AddGreenToBlueAndRedMaskAvx2);
Vector256<byte> output = Avx2.Add(input, in0g0g); Vector256<byte> output = Avx2.Add(input, in0g0g);
Avx.Store((byte*)idx, output); Avx.Store((byte*)idx, output);
} }
@ -119,7 +142,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
else if (Ssse3.IsSupported) else if (Ssse3.IsSupported)
{ {
var mask = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255);
int numPixels = pixelData.Length; int numPixels = pixelData.Length;
fixed (uint* p = pixelData) fixed (uint* p = pixelData)
{ {
@ -128,7 +150,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
uint* idx = p + i; uint* idx = p + i;
Vector128<byte> input = Sse2.LoadVector128((ushort*)idx).AsByte(); Vector128<byte> input = Sse2.LoadVector128((ushort*)idx).AsByte();
Vector128<byte> in0g0g = Ssse3.Shuffle(input, mask); Vector128<byte> in0g0g = Ssse3.Shuffle(input, AddGreenToBlueAndRedMaskSsse3);
Vector128<byte> output = Sse2.Add(input, in0g0g); Vector128<byte> output = Sse2.Add(input, in0g0g);
Sse2.Store((byte*)idx, output.AsByte()); Sse2.Store((byte*)idx, output.AsByte());
} }
@ -141,7 +163,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
else if (Sse2.IsSupported) else if (Sse2.IsSupported)
{ {
byte mask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
int numPixels = pixelData.Length; int numPixels = pixelData.Length;
fixed (uint* p = pixelData) fixed (uint* p = pixelData)
{ {
@ -151,8 +172,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
uint* idx = p + i; uint* idx = p + i;
Vector128<ushort> input = Sse2.LoadVector128((ushort*)idx); Vector128<ushort> input = Sse2.LoadVector128((ushort*)idx);
Vector128<ushort> a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g Vector128<ushort> a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g
Vector128<ushort> b = Sse2.ShuffleLow(a, mask); Vector128<ushort> b = Sse2.ShuffleLow(a, AddGreenToBlueAndRedShuffleMask);
Vector128<ushort> c = Sse2.ShuffleHigh(b, mask); // 0g0g Vector128<ushort> c = Sse2.ShuffleHigh(b, AddGreenToBlueAndRedShuffleMask); // 0g0g
Vector128<byte> output = Sse2.Add(input.AsByte(), c.AsByte()); Vector128<byte> output = Sse2.Add(input.AsByte(), c.AsByte());
Sse2.Store((byte*)idx, output); Sse2.Store((byte*)idx, output);
} }
@ -189,7 +210,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
#if SUPPORTS_RUNTIME_INTRINSICS #if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported) if (Avx2.IsSupported)
{ {
var mask = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255);
int numPixels = pixelData.Length; int numPixels = pixelData.Length;
fixed (uint* p = pixelData) fixed (uint* p = pixelData)
{ {
@ -198,7 +218,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
uint* idx = p + i; uint* idx = p + i;
Vector256<byte> input = Avx.LoadVector256((ushort*)idx).AsByte(); Vector256<byte> input = Avx.LoadVector256((ushort*)idx).AsByte();
Vector256<byte> in0g0g = Avx2.Shuffle(input, mask); Vector256<byte> in0g0g = Avx2.Shuffle(input, SubtractGreenFromBlueAndRedMaskAvx2);
Vector256<byte> output = Avx2.Subtract(input, in0g0g); Vector256<byte> output = Avx2.Subtract(input, in0g0g);
Avx.Store((byte*)idx, output); Avx.Store((byte*)idx, output);
} }
@ -211,7 +231,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
else if (Ssse3.IsSupported) else if (Ssse3.IsSupported)
{ {
var mask = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255);
int numPixels = pixelData.Length; int numPixels = pixelData.Length;
fixed (uint* p = pixelData) fixed (uint* p = pixelData)
{ {
@ -220,7 +239,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
uint* idx = p + i; uint* idx = p + i;
Vector128<byte> input = Sse2.LoadVector128((ushort*)idx).AsByte(); Vector128<byte> input = Sse2.LoadVector128((ushort*)idx).AsByte();
Vector128<byte> in0g0g = Ssse3.Shuffle(input, mask); Vector128<byte> in0g0g = Ssse3.Shuffle(input, SubtractGreenFromBlueAndRedMaskSsse3);
Vector128<byte> output = Sse2.Subtract(input, in0g0g); Vector128<byte> output = Sse2.Subtract(input, in0g0g);
Sse2.Store((byte*)idx, output.AsByte()); Sse2.Store((byte*)idx, output.AsByte());
} }
@ -233,7 +252,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
} }
else if (Sse2.IsSupported) else if (Sse2.IsSupported)
{ {
byte mask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
int numPixels = pixelData.Length; int numPixels = pixelData.Length;
fixed (uint* p = pixelData) fixed (uint* p = pixelData)
{ {
@ -243,8 +261,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
uint* idx = p + i; uint* idx = p + i;
Vector128<ushort> input = Sse2.LoadVector128((ushort*)idx); Vector128<ushort> input = Sse2.LoadVector128((ushort*)idx);
Vector128<ushort> a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g Vector128<ushort> a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g
Vector128<ushort> b = Sse2.ShuffleLow(a, mask); Vector128<ushort> b = Sse2.ShuffleLow(a, SubtractGreenFromBlueAndRedShuffleMask);
Vector128<ushort> c = Sse2.ShuffleHigh(b, mask); // 0g0g Vector128<ushort> c = Sse2.ShuffleHigh(b, SubtractGreenFromBlueAndRedShuffleMask); // 0g0g
Vector128<byte> output = Sse2.Subtract(input.AsByte(), c.AsByte()); Vector128<byte> output = Sse2.Subtract(input.AsByte(), c.AsByte());
Sse2.Store((byte*)idx, output); Sse2.Store((byte*)idx, output);
} }
@ -394,9 +412,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
Vector128<int> multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); Vector128<int> multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue));
Vector128<int> multsb2 = MkCst16(Cst5b(m.RedToBlue), 0); Vector128<int> multsb2 = MkCst16(Cst5b(m.RedToBlue), 0);
var maskalphagreen = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
var maskredblue = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
byte shufflemask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
fixed (uint* src = data) fixed (uint* src = data)
{ {
int idx; int idx;
@ -404,15 +419,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
uint* pos = src + idx; uint* pos = src + idx;
Vector128<uint> input = Sse2.LoadVector128(pos); Vector128<uint> input = Sse2.LoadVector128(pos);
Vector128<byte> a = Sse2.And(input.AsByte(), maskalphagreen); Vector128<byte> a = Sse2.And(input.AsByte(), TransformColorAlphaGreenMask);
Vector128<short> b = Sse2.ShuffleLow(a.AsInt16(), shufflemask); Vector128<short> b = Sse2.ShuffleLow(a.AsInt16(), TransformColorShuffleMask);
Vector128<short> c = Sse2.ShuffleHigh(b.AsInt16(), shufflemask); Vector128<short> c = Sse2.ShuffleHigh(b.AsInt16(), TransformColorShuffleMask);
Vector128<short> d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); Vector128<short> d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
Vector128<short> e = Sse2.ShiftLeftLogical(input.AsInt16(), 8); Vector128<short> e = Sse2.ShiftLeftLogical(input.AsInt16(), 8);
Vector128<short> f = Sse2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16()); Vector128<short> f = Sse2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16());
Vector128<int> g = Sse2.ShiftRightLogical(f.AsInt32(), 16); Vector128<int> g = Sse2.ShiftRightLogical(f.AsInt32(), 16);
Vector128<byte> h = Sse2.Add(g.AsByte(), d.AsByte()); Vector128<byte> h = Sse2.Add(g.AsByte(), d.AsByte());
Vector128<byte> i = Sse2.And(h, maskredblue); Vector128<byte> i = Sse2.And(h, TransformColorRedBlueMask);
Vector128<byte> output = Sse2.Subtract(input.AsByte(), i); Vector128<byte> output = Sse2.Subtract(input.AsByte(), i);
Sse2.Store((byte*)pos, output); Sse2.Store((byte*)pos, output);
} }
@ -460,8 +475,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
Vector128<int> multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); Vector128<int> multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue));
Vector128<int> multsb2 = MkCst16(Cst5b(m.RedToBlue), 0); Vector128<int> multsb2 = MkCst16(Cst5b(m.RedToBlue), 0);
var maskalphagreen = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
byte shufflemask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0);
fixed (uint* src = pixelData) fixed (uint* src = pixelData)
{ {
int idx; int idx;
@ -469,9 +482,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{ {
uint* pos = src + idx; uint* pos = src + idx;
Vector128<uint> input = Sse2.LoadVector128(pos); Vector128<uint> input = Sse2.LoadVector128(pos);
Vector128<byte> a = Sse2.And(input.AsByte(), maskalphagreen); Vector128<byte> a = Sse2.And(input.AsByte(), TransformColorInverseAlphaGreenMask);
Vector128<short> b = Sse2.ShuffleLow(a.AsInt16(), shufflemask); Vector128<short> b = Sse2.ShuffleLow(a.AsInt16(), TransformColorInverseShuffleMask);
Vector128<short> c = Sse2.ShuffleHigh(b.AsInt16(), shufflemask); Vector128<short> c = Sse2.ShuffleHigh(b.AsInt16(), TransformColorInverseShuffleMask);
Vector128<short> d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); Vector128<short> d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16());
Vector128<byte> e = Sse2.Add(input.AsByte(), d.AsByte()); Vector128<byte> e = Sse2.Add(input.AsByte(), d.AsByte());
Vector128<short> f = Sse2.ShiftLeftLogical(e.AsInt16(), 8); Vector128<short> f = Sse2.ShiftLeftLogical(e.AsInt16(), 8);

43
src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs

@ -36,6 +36,22 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private const int PredLowEffort = 11; private const int PredLowEffort = 11;
#if SUPPORTS_RUNTIME_INTRINSICS
private static readonly Vector128<byte> CollectColorRedTransformsGreenMask = Vector128.Create(0x00ff00).AsByte();
private static readonly Vector128<byte> CollectColorRedTransformsAndMask = Vector128.Create((short)0xff).AsByte();
private static readonly Vector128<byte> CollectColorBlueTransformsGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
private static readonly Vector128<byte> CollectColorBlueTransformsGreenBlueMask = Vector128.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0);
private static readonly Vector128<byte> CollectColorBlueTransformsBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
private static readonly Vector128<byte> CollectColorBlueTransformsShuffleLowMask = Vector128.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255);
private static readonly Vector128<byte> CollectColorBlueTransformsShuffleHighMask = Vector128.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14);
#endif
/// <summary> /// <summary>
/// Finds the best predictor for each tile, and converts the image to residuals /// Finds the best predictor for each tile, and converts the image to residuals
/// with respect to predictions. If nearLosslessQuality &lt; 100, applies /// with respect to predictions. If nearLosslessQuality &lt; 100, applies
@ -1039,9 +1055,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
if (Sse41.IsSupported) if (Sse41.IsSupported)
{ {
var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToRed)); var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToRed));
var maskgreen = Vector128.Create(0x00ff00);
var mask = Vector128.Create((short)0xff);
const int span = 8; const int span = 8;
Span<ushort> values = stackalloc ushort[span]; Span<ushort> values = stackalloc ushort[span];
for (int y = 0; y < tileHeight; y++) for (int y = 0; y < tileHeight; y++)
@ -1057,15 +1070,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
uint* input1Idx = src + x + (span / 2); uint* input1Idx = src + x + (span / 2);
Vector128<byte> input0 = Sse2.LoadVector128((ushort*)input0Idx).AsByte(); Vector128<byte> input0 = Sse2.LoadVector128((ushort*)input0Idx).AsByte();
Vector128<byte> input1 = Sse2.LoadVector128((ushort*)input1Idx).AsByte(); Vector128<byte> input1 = Sse2.LoadVector128((ushort*)input1Idx).AsByte();
Vector128<byte> g0 = Sse2.And(input0, maskgreen.AsByte()); // 0 0 | g 0 Vector128<byte> g0 = Sse2.And(input0, CollectColorRedTransformsGreenMask); // 0 0 | g 0
Vector128<byte> g1 = Sse2.And(input1, maskgreen.AsByte()); Vector128<byte> g1 = Sse2.And(input1, CollectColorRedTransformsGreenMask);
Vector128<ushort> g = Sse41.PackUnsignedSaturate(g0.AsInt32(), g1.AsInt32()); // g 0 Vector128<ushort> g = Sse41.PackUnsignedSaturate(g0.AsInt32(), g1.AsInt32()); // g 0
Vector128<int> a0 = Sse2.ShiftRightLogical(input0.AsInt32(), 16); // 0 0 | x r Vector128<int> a0 = Sse2.ShiftRightLogical(input0.AsInt32(), 16); // 0 0 | x r
Vector128<int> a1 = Sse2.ShiftRightLogical(input1.AsInt32(), 16); Vector128<int> a1 = Sse2.ShiftRightLogical(input1.AsInt32(), 16);
Vector128<ushort> a = Sse41.PackUnsignedSaturate(a0, a1); // x r Vector128<ushort> a = Sse41.PackUnsignedSaturate(a0, a1); // x r
Vector128<short> b = Sse2.MultiplyHigh(g.AsInt16(), multsg); // x dr Vector128<short> b = Sse2.MultiplyHigh(g.AsInt16(), multsg); // x dr
Vector128<byte> c = Sse2.Subtract(a.AsByte(), b.AsByte()); // x r' Vector128<byte> c = Sse2.Subtract(a.AsByte(), b.AsByte()); // x r'
Vector128<byte> d = Sse2.And(c, mask.AsByte()); // 0 r' Vector128<byte> d = Sse2.And(c, CollectColorRedTransformsAndMask); // 0 r'
Sse2.Store(dst, d.AsUInt16()); Sse2.Store(dst, d.AsUInt16());
for (int i = 0; i < span; i++) for (int i = 0; i < span; i++)
{ {
@ -1113,12 +1126,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
Span<ushort> values = stackalloc ushort[span]; Span<ushort> values = stackalloc ushort[span];
var multsr = Vector128.Create(LosslessUtils.Cst5b(redToBlue)); var multsr = Vector128.Create(LosslessUtils.Cst5b(redToBlue));
var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToBlue)); var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToBlue));
var maskgreen = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
var maskgreenblue = Vector128.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0);
var maskblue = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
var shufflerLow = Vector128.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255);
var shufflerHigh = Vector128.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14);
for (int y = 0; y < tileHeight; y++) for (int y = 0; y < tileHeight; y++)
{ {
Span<uint> srcSpan = bgra.Slice(y * stride); Span<uint> srcSpan = bgra.Slice(y * stride);
@ -1132,18 +1139,18 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
uint* input1Idx = src + x + (span / 2); uint* input1Idx = src + x + (span / 2);
Vector128<byte> input0 = Sse2.LoadVector128((ushort*)input0Idx).AsByte(); Vector128<byte> input0 = Sse2.LoadVector128((ushort*)input0Idx).AsByte();
Vector128<byte> input1 = Sse2.LoadVector128((ushort*)input1Idx).AsByte(); Vector128<byte> input1 = Sse2.LoadVector128((ushort*)input1Idx).AsByte();
Vector128<byte> r0 = Ssse3.Shuffle(input0, shufflerLow); Vector128<byte> r0 = Ssse3.Shuffle(input0, CollectColorBlueTransformsShuffleLowMask);
Vector128<byte> r1 = Ssse3.Shuffle(input1, shufflerHigh); Vector128<byte> r1 = Ssse3.Shuffle(input1, CollectColorBlueTransformsShuffleHighMask);
Vector128<byte> r = Sse2.Or(r0, r1); Vector128<byte> r = Sse2.Or(r0, r1);
Vector128<byte> gb0 = Sse2.And(input0, maskgreenblue); Vector128<byte> gb0 = Sse2.And(input0, CollectColorBlueTransformsGreenBlueMask);
Vector128<byte> gb1 = Sse2.And(input1, maskgreenblue); Vector128<byte> gb1 = Sse2.And(input1, CollectColorBlueTransformsGreenBlueMask);
Vector128<ushort> gb = Sse41.PackUnsignedSaturate(gb0.AsInt32(), gb1.AsInt32()); Vector128<ushort> gb = Sse41.PackUnsignedSaturate(gb0.AsInt32(), gb1.AsInt32());
Vector128<byte> g = Sse2.And(gb.AsByte(), maskgreen); Vector128<byte> g = Sse2.And(gb.AsByte(), CollectColorBlueTransformsGreenMask);
Vector128<short> a = Sse2.MultiplyHigh(r.AsInt16(), multsr); Vector128<short> a = Sse2.MultiplyHigh(r.AsInt16(), multsr);
Vector128<short> b = Sse2.MultiplyHigh(g.AsInt16(), multsg); Vector128<short> b = Sse2.MultiplyHigh(g.AsInt16(), multsg);
Vector128<byte> c = Sse2.Subtract(gb.AsByte(), b.AsByte()); Vector128<byte> c = Sse2.Subtract(gb.AsByte(), b.AsByte());
Vector128<byte> d = Sse2.Subtract(c, a.AsByte()); Vector128<byte> d = Sse2.Subtract(c, a.AsByte());
Vector128<byte> e = Sse2.And(d, maskblue); Vector128<byte> e = Sse2.And(d, CollectColorBlueTransformsBlueMask);
Sse2.Store(dst, e.AsUInt16()); Sse2.Store(dst, e.AsUInt16());
for (int i = 0; i < span; i++) for (int i = 0; i < span; i++)
{ {

56
src/ImageSharp/Formats/Webp/WebpCommonUtils.cs

@ -16,6 +16,16 @@ namespace SixLabors.ImageSharp.Formats.Webp
/// </summary> /// </summary>
internal static class WebpCommonUtils internal static class WebpCommonUtils
{ {
#if SUPPORTS_RUNTIME_INTRINSICS
private static readonly Vector256<byte> AlphaMaskVector256 = Vector256.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255);
private static readonly Vector256<byte> All0x80Vector256 = Vector256.Create((byte)0x80).AsByte();
private static readonly Vector128<byte> AlphaMask = Vector128.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255);
private static readonly Vector128<byte> All0x80 = Vector128.Create((byte)0x80).AsByte();
#endif
/// <summary> /// <summary>
/// Checks if the pixel row is not opaque. /// Checks if the pixel row is not opaque.
/// </summary> /// </summary>
@ -27,11 +37,6 @@ namespace SixLabors.ImageSharp.Formats.Webp
if (Avx2.IsSupported) if (Avx2.IsSupported)
{ {
ReadOnlySpan<byte> rowBytes = MemoryMarshal.AsBytes(row); ReadOnlySpan<byte> rowBytes = MemoryMarshal.AsBytes(row);
var alphaMaskVector256 = Vector256.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255);
Vector256<byte> all0x80Vector256 = Vector256.Create((byte)0x80).AsByte();
var alphaMask = Vector128.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255);
Vector128<byte> all0x80 = Vector128.Create((byte)0x80).AsByte();
int i = 0; int i = 0;
int length = (row.Length * 4) - 3; int length = (row.Length * 4) - 3;
fixed (byte* src = rowBytes) fixed (byte* src = rowBytes)
@ -42,14 +47,14 @@ namespace SixLabors.ImageSharp.Formats.Webp
Vector256<byte> a1 = Avx.LoadVector256(src + i + 32).AsByte(); Vector256<byte> a1 = Avx.LoadVector256(src + i + 32).AsByte();
Vector256<byte> a2 = Avx.LoadVector256(src + i + 64).AsByte(); Vector256<byte> a2 = Avx.LoadVector256(src + i + 64).AsByte();
Vector256<byte> a3 = Avx.LoadVector256(src + i + 96).AsByte(); Vector256<byte> a3 = Avx.LoadVector256(src + i + 96).AsByte();
Vector256<int> b0 = Avx2.And(a0, alphaMaskVector256).AsInt32(); Vector256<int> b0 = Avx2.And(a0, AlphaMaskVector256).AsInt32();
Vector256<int> b1 = Avx2.And(a1, alphaMaskVector256).AsInt32(); Vector256<int> b1 = Avx2.And(a1, AlphaMaskVector256).AsInt32();
Vector256<int> b2 = Avx2.And(a2, alphaMaskVector256).AsInt32(); Vector256<int> b2 = Avx2.And(a2, AlphaMaskVector256).AsInt32();
Vector256<int> b3 = Avx2.And(a3, alphaMaskVector256).AsInt32(); Vector256<int> b3 = Avx2.And(a3, AlphaMaskVector256).AsInt32();
Vector256<short> c0 = Avx2.PackSignedSaturate(b0, b1).AsInt16(); Vector256<short> c0 = Avx2.PackSignedSaturate(b0, b1).AsInt16();
Vector256<short> c1 = Avx2.PackSignedSaturate(b2, b3).AsInt16(); Vector256<short> c1 = Avx2.PackSignedSaturate(b2, b3).AsInt16();
Vector256<byte> d = Avx2.PackSignedSaturate(c0, c1).AsByte(); Vector256<byte> d = Avx2.PackSignedSaturate(c0, c1).AsByte();
Vector256<byte> bits = Avx2.CompareEqual(d, all0x80Vector256); Vector256<byte> bits = Avx2.CompareEqual(d, All0x80Vector256);
int mask = Avx2.MoveMask(bits); int mask = Avx2.MoveMask(bits);
if (mask != -1) if (mask != -1)
{ {
@ -59,7 +64,7 @@ namespace SixLabors.ImageSharp.Formats.Webp
for (; i + 64 <= length; i += 64) for (; i + 64 <= length; i += 64)
{ {
if (IsNoneOpaque64Bytes(src, i, alphaMask, all0x80)) if (IsNoneOpaque64Bytes(src, i))
{ {
return true; return true;
} }
@ -67,7 +72,7 @@ namespace SixLabors.ImageSharp.Formats.Webp
for (; i + 32 <= length; i += 32) for (; i + 32 <= length; i += 32)
{ {
if (IsNoneOpaque32Bytes(src, i, alphaMask, all0x80)) if (IsNoneOpaque32Bytes(src, i))
{ {
return true; return true;
} }
@ -85,16 +90,13 @@ namespace SixLabors.ImageSharp.Formats.Webp
else if (Sse2.IsSupported) else if (Sse2.IsSupported)
{ {
ReadOnlySpan<byte> rowBytes = MemoryMarshal.AsBytes(row); ReadOnlySpan<byte> rowBytes = MemoryMarshal.AsBytes(row);
var alphaMask = Vector128.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255);
Vector128<byte> all0x80 = Vector128.Create((byte)0x80).AsByte();
int i = 0; int i = 0;
int length = (row.Length * 4) - 3; int length = (row.Length * 4) - 3;
fixed (byte* src = rowBytes) fixed (byte* src = rowBytes)
{ {
for (; i + 64 <= length; i += 64) for (; i + 64 <= length; i += 64)
{ {
if (IsNoneOpaque64Bytes(src, i, alphaMask, all0x80)) if (IsNoneOpaque64Bytes(src, i))
{ {
return true; return true;
} }
@ -102,7 +104,7 @@ namespace SixLabors.ImageSharp.Formats.Webp
for (; i + 32 <= length; i += 32) for (; i + 32 <= length; i += 32)
{ {
if (IsNoneOpaque32Bytes(src, i, alphaMask, all0x80)) if (IsNoneOpaque32Bytes(src, i))
{ {
return true; return true;
} }
@ -133,20 +135,20 @@ namespace SixLabors.ImageSharp.Formats.Webp
} }
#if SUPPORTS_RUNTIME_INTRINSICS #if SUPPORTS_RUNTIME_INTRINSICS
private static unsafe bool IsNoneOpaque64Bytes(byte* src, int i, Vector128<byte> alphaMask, Vector128<byte> all0x80) private static unsafe bool IsNoneOpaque64Bytes(byte* src, int i)
{ {
Vector128<byte> a0 = Sse2.LoadVector128(src + i).AsByte(); Vector128<byte> a0 = Sse2.LoadVector128(src + i).AsByte();
Vector128<byte> a1 = Sse2.LoadVector128(src + i + 16).AsByte(); Vector128<byte> a1 = Sse2.LoadVector128(src + i + 16).AsByte();
Vector128<byte> a2 = Sse2.LoadVector128(src + i + 32).AsByte(); Vector128<byte> a2 = Sse2.LoadVector128(src + i + 32).AsByte();
Vector128<byte> a3 = Sse2.LoadVector128(src + i + 48).AsByte(); Vector128<byte> a3 = Sse2.LoadVector128(src + i + 48).AsByte();
Vector128<int> b0 = Sse2.And(a0, alphaMask).AsInt32(); Vector128<int> b0 = Sse2.And(a0, AlphaMask).AsInt32();
Vector128<int> b1 = Sse2.And(a1, alphaMask).AsInt32(); Vector128<int> b1 = Sse2.And(a1, AlphaMask).AsInt32();
Vector128<int> b2 = Sse2.And(a2, alphaMask).AsInt32(); Vector128<int> b2 = Sse2.And(a2, AlphaMask).AsInt32();
Vector128<int> b3 = Sse2.And(a3, alphaMask).AsInt32(); Vector128<int> b3 = Sse2.And(a3, AlphaMask).AsInt32();
Vector128<short> c0 = Sse2.PackSignedSaturate(b0, b1).AsInt16(); Vector128<short> c0 = Sse2.PackSignedSaturate(b0, b1).AsInt16();
Vector128<short> c1 = Sse2.PackSignedSaturate(b2, b3).AsInt16(); Vector128<short> c1 = Sse2.PackSignedSaturate(b2, b3).AsInt16();
Vector128<byte> d = Sse2.PackSignedSaturate(c0, c1).AsByte(); Vector128<byte> d = Sse2.PackSignedSaturate(c0, c1).AsByte();
Vector128<byte> bits = Sse2.CompareEqual(d, all0x80); Vector128<byte> bits = Sse2.CompareEqual(d, All0x80);
int mask = Sse2.MoveMask(bits); int mask = Sse2.MoveMask(bits);
if (mask != 0xFFFF) if (mask != 0xFFFF)
{ {
@ -156,15 +158,15 @@ namespace SixLabors.ImageSharp.Formats.Webp
return false; return false;
} }
private static unsafe bool IsNoneOpaque32Bytes(byte* src, int i, Vector128<byte> alphaMask, Vector128<byte> all0x80) private static unsafe bool IsNoneOpaque32Bytes(byte* src, int i)
{ {
Vector128<byte> a0 = Sse2.LoadVector128(src + i).AsByte(); Vector128<byte> a0 = Sse2.LoadVector128(src + i).AsByte();
Vector128<byte> a1 = Sse2.LoadVector128(src + i + 16).AsByte(); Vector128<byte> a1 = Sse2.LoadVector128(src + i + 16).AsByte();
Vector128<int> b0 = Sse2.And(a0, alphaMask).AsInt32(); Vector128<int> b0 = Sse2.And(a0, AlphaMask).AsInt32();
Vector128<int> b1 = Sse2.And(a1, alphaMask).AsInt32(); Vector128<int> b1 = Sse2.And(a1, AlphaMask).AsInt32();
Vector128<short> c = Sse2.PackSignedSaturate(b0, b1).AsInt16(); Vector128<short> c = Sse2.PackSignedSaturate(b0, b1).AsInt16();
Vector128<byte> d = Sse2.PackSignedSaturate(c, c).AsByte(); Vector128<byte> d = Sse2.PackSignedSaturate(c, c).AsByte();
Vector128<byte> bits = Sse2.CompareEqual(d, all0x80); Vector128<byte> bits = Sse2.CompareEqual(d, All0x80);
int mask = Sse2.MoveMask(bits); int mask = Sse2.MoveMask(bits);
if (mask != 0xFFFF) if (mask != 0xFFFF)
{ {

Loading…
Cancel
Save