|
|
|
@ -27,6 +27,30 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless |
|
|
|
|
|
|
|
private const double Log2Reciprocal = 1.44269504088896338700465094007086; |
|
|
|
|
|
|
|
#if SUPPORTS_RUNTIME_INTRINSICS
|
|
|
|
private static readonly Vector256<byte> AddGreenToBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255); |
|
|
|
|
|
|
|
private static readonly Vector128<byte> AddGreenToBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255); |
|
|
|
|
|
|
|
private static readonly byte AddGreenToBlueAndRedShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0); |
|
|
|
|
|
|
|
private static readonly Vector256<byte> SubtractGreenFromBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255); |
|
|
|
|
|
|
|
private static readonly Vector128<byte> SubtractGreenFromBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255); |
|
|
|
|
|
|
|
private static readonly byte SubtractGreenFromBlueAndRedShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0); |
|
|
|
|
|
|
|
private static readonly Vector128<byte> TransformColorAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); |
|
|
|
|
|
|
|
private static readonly Vector128<byte> TransformColorRedBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); |
|
|
|
|
|
|
|
private static readonly byte TransformColorShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0); |
|
|
|
|
|
|
|
private static readonly Vector128<byte> TransformColorInverseAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); |
|
|
|
|
|
|
|
private static readonly byte TransformColorInverseShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0); |
|
|
|
#endif
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Returns the exact index where array1 and array2 are different. For an index
|
|
|
|
/// inferior or equal to bestLenMatch, the return value just has to be strictly
|
|
|
|
@ -97,7 +121,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless |
|
|
|
#if SUPPORTS_RUNTIME_INTRINSICS
|
|
|
|
if (Avx2.IsSupported) |
|
|
|
{ |
|
|
|
var mask = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255); |
|
|
|
int numPixels = pixelData.Length; |
|
|
|
fixed (uint* p = pixelData) |
|
|
|
{ |
|
|
|
@ -106,7 +129,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless |
|
|
|
{ |
|
|
|
uint* idx = p + i; |
|
|
|
Vector256<byte> input = Avx.LoadVector256((ushort*)idx).AsByte(); |
|
|
|
Vector256<byte> in0g0g = Avx2.Shuffle(input, mask); |
|
|
|
Vector256<byte> in0g0g = Avx2.Shuffle(input, AddGreenToBlueAndRedMaskAvx2); |
|
|
|
Vector256<byte> output = Avx2.Add(input, in0g0g); |
|
|
|
Avx.Store((byte*)idx, output); |
|
|
|
} |
|
|
|
@ -119,7 +142,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless |
|
|
|
} |
|
|
|
else if (Ssse3.IsSupported) |
|
|
|
{ |
|
|
|
var mask = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255); |
|
|
|
int numPixels = pixelData.Length; |
|
|
|
fixed (uint* p = pixelData) |
|
|
|
{ |
|
|
|
@ -128,7 +150,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless |
|
|
|
{ |
|
|
|
uint* idx = p + i; |
|
|
|
Vector128<byte> input = Sse2.LoadVector128((ushort*)idx).AsByte(); |
|
|
|
Vector128<byte> in0g0g = Ssse3.Shuffle(input, mask); |
|
|
|
Vector128<byte> in0g0g = Ssse3.Shuffle(input, AddGreenToBlueAndRedMaskSsse3); |
|
|
|
Vector128<byte> output = Sse2.Add(input, in0g0g); |
|
|
|
Sse2.Store((byte*)idx, output.AsByte()); |
|
|
|
} |
|
|
|
@ -141,7 +163,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless |
|
|
|
} |
|
|
|
else if (Sse2.IsSupported) |
|
|
|
{ |
|
|
|
byte mask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0); |
|
|
|
int numPixels = pixelData.Length; |
|
|
|
fixed (uint* p = pixelData) |
|
|
|
{ |
|
|
|
@ -151,8 +172,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless |
|
|
|
uint* idx = p + i; |
|
|
|
Vector128<ushort> input = Sse2.LoadVector128((ushort*)idx); |
|
|
|
Vector128<ushort> a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g
|
|
|
|
Vector128<ushort> b = Sse2.ShuffleLow(a, mask); |
|
|
|
Vector128<ushort> c = Sse2.ShuffleHigh(b, mask); // 0g0g
|
|
|
|
Vector128<ushort> b = Sse2.ShuffleLow(a, AddGreenToBlueAndRedShuffleMask); |
|
|
|
Vector128<ushort> c = Sse2.ShuffleHigh(b, AddGreenToBlueAndRedShuffleMask); // 0g0g
|
|
|
|
Vector128<byte> output = Sse2.Add(input.AsByte(), c.AsByte()); |
|
|
|
Sse2.Store((byte*)idx, output); |
|
|
|
} |
|
|
|
@ -189,7 +210,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless |
|
|
|
#if SUPPORTS_RUNTIME_INTRINSICS
|
|
|
|
if (Avx2.IsSupported) |
|
|
|
{ |
|
|
|
var mask = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255); |
|
|
|
int numPixels = pixelData.Length; |
|
|
|
fixed (uint* p = pixelData) |
|
|
|
{ |
|
|
|
@ -198,7 +218,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless |
|
|
|
{ |
|
|
|
uint* idx = p + i; |
|
|
|
Vector256<byte> input = Avx.LoadVector256((ushort*)idx).AsByte(); |
|
|
|
Vector256<byte> in0g0g = Avx2.Shuffle(input, mask); |
|
|
|
Vector256<byte> in0g0g = Avx2.Shuffle(input, SubtractGreenFromBlueAndRedMaskAvx2); |
|
|
|
Vector256<byte> output = Avx2.Subtract(input, in0g0g); |
|
|
|
Avx.Store((byte*)idx, output); |
|
|
|
} |
|
|
|
@ -211,7 +231,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless |
|
|
|
} |
|
|
|
else if (Ssse3.IsSupported) |
|
|
|
{ |
|
|
|
var mask = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255); |
|
|
|
int numPixels = pixelData.Length; |
|
|
|
fixed (uint* p = pixelData) |
|
|
|
{ |
|
|
|
@ -220,7 +239,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless |
|
|
|
{ |
|
|
|
uint* idx = p + i; |
|
|
|
Vector128<byte> input = Sse2.LoadVector128((ushort*)idx).AsByte(); |
|
|
|
Vector128<byte> in0g0g = Ssse3.Shuffle(input, mask); |
|
|
|
Vector128<byte> in0g0g = Ssse3.Shuffle(input, SubtractGreenFromBlueAndRedMaskSsse3); |
|
|
|
Vector128<byte> output = Sse2.Subtract(input, in0g0g); |
|
|
|
Sse2.Store((byte*)idx, output.AsByte()); |
|
|
|
} |
|
|
|
@ -233,7 +252,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless |
|
|
|
} |
|
|
|
else if (Sse2.IsSupported) |
|
|
|
{ |
|
|
|
byte mask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0); |
|
|
|
int numPixels = pixelData.Length; |
|
|
|
fixed (uint* p = pixelData) |
|
|
|
{ |
|
|
|
@ -243,8 +261,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless |
|
|
|
uint* idx = p + i; |
|
|
|
Vector128<ushort> input = Sse2.LoadVector128((ushort*)idx); |
|
|
|
Vector128<ushort> a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g
|
|
|
|
Vector128<ushort> b = Sse2.ShuffleLow(a, mask); |
|
|
|
Vector128<ushort> c = Sse2.ShuffleHigh(b, mask); // 0g0g
|
|
|
|
Vector128<ushort> b = Sse2.ShuffleLow(a, SubtractGreenFromBlueAndRedShuffleMask); |
|
|
|
Vector128<ushort> c = Sse2.ShuffleHigh(b, SubtractGreenFromBlueAndRedShuffleMask); // 0g0g
|
|
|
|
Vector128<byte> output = Sse2.Subtract(input.AsByte(), c.AsByte()); |
|
|
|
Sse2.Store((byte*)idx, output); |
|
|
|
} |
|
|
|
@ -394,9 +412,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless |
|
|
|
{ |
|
|
|
Vector128<int> multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); |
|
|
|
Vector128<int> multsb2 = MkCst16(Cst5b(m.RedToBlue), 0); |
|
|
|
var maskalphagreen = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); |
|
|
|
var maskredblue = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); |
|
|
|
byte shufflemask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0); |
|
|
|
fixed (uint* src = data) |
|
|
|
{ |
|
|
|
int idx; |
|
|
|
@ -404,15 +419,15 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless |
|
|
|
{ |
|
|
|
uint* pos = src + idx; |
|
|
|
Vector128<uint> input = Sse2.LoadVector128(pos); |
|
|
|
Vector128<byte> a = Sse2.And(input.AsByte(), maskalphagreen); |
|
|
|
Vector128<short> b = Sse2.ShuffleLow(a.AsInt16(), shufflemask); |
|
|
|
Vector128<short> c = Sse2.ShuffleHigh(b.AsInt16(), shufflemask); |
|
|
|
Vector128<byte> a = Sse2.And(input.AsByte(), TransformColorAlphaGreenMask); |
|
|
|
Vector128<short> b = Sse2.ShuffleLow(a.AsInt16(), TransformColorShuffleMask); |
|
|
|
Vector128<short> c = Sse2.ShuffleHigh(b.AsInt16(), TransformColorShuffleMask); |
|
|
|
Vector128<short> d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); |
|
|
|
Vector128<short> e = Sse2.ShiftLeftLogical(input.AsInt16(), 8); |
|
|
|
Vector128<short> f = Sse2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16()); |
|
|
|
Vector128<int> g = Sse2.ShiftRightLogical(f.AsInt32(), 16); |
|
|
|
Vector128<byte> h = Sse2.Add(g.AsByte(), d.AsByte()); |
|
|
|
Vector128<byte> i = Sse2.And(h, maskredblue); |
|
|
|
Vector128<byte> i = Sse2.And(h, TransformColorRedBlueMask); |
|
|
|
Vector128<byte> output = Sse2.Subtract(input.AsByte(), i); |
|
|
|
Sse2.Store((byte*)pos, output); |
|
|
|
} |
|
|
|
@ -460,8 +475,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless |
|
|
|
{ |
|
|
|
Vector128<int> multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); |
|
|
|
Vector128<int> multsb2 = MkCst16(Cst5b(m.RedToBlue), 0); |
|
|
|
var maskalphagreen = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); |
|
|
|
byte shufflemask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0); |
|
|
|
fixed (uint* src = pixelData) |
|
|
|
{ |
|
|
|
int idx; |
|
|
|
@ -469,9 +482,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless |
|
|
|
{ |
|
|
|
uint* pos = src + idx; |
|
|
|
Vector128<uint> input = Sse2.LoadVector128(pos); |
|
|
|
Vector128<byte> a = Sse2.And(input.AsByte(), maskalphagreen); |
|
|
|
Vector128<short> b = Sse2.ShuffleLow(a.AsInt16(), shufflemask); |
|
|
|
Vector128<short> c = Sse2.ShuffleHigh(b.AsInt16(), shufflemask); |
|
|
|
Vector128<byte> a = Sse2.And(input.AsByte(), TransformColorInverseAlphaGreenMask); |
|
|
|
Vector128<short> b = Sse2.ShuffleLow(a.AsInt16(), TransformColorInverseShuffleMask); |
|
|
|
Vector128<short> c = Sse2.ShuffleHigh(b.AsInt16(), TransformColorInverseShuffleMask); |
|
|
|
Vector128<short> d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); |
|
|
|
Vector128<byte> e = Sse2.Add(input.AsByte(), d.AsByte()); |
|
|
|
Vector128<short> f = Sse2.ShiftLeftLogical(e.AsInt16(), 8); |
|
|
|
|