|
|
|
@ -96,7 +96,7 @@ internal static unsafe class LosslessUtils |
|
|
|
{ |
|
|
|
if (Avx2.IsSupported) |
|
|
|
{ |
|
|
|
var addGreenToBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255); |
|
|
|
Vector256<byte> addGreenToBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255); |
|
|
|
int numPixels = pixelData.Length; |
|
|
|
nint i; |
|
|
|
for (i = 0; i <= numPixels - 8; i += 8) |
|
|
|
@ -115,7 +115,7 @@ internal static unsafe class LosslessUtils |
|
|
|
} |
|
|
|
else if (Ssse3.IsSupported) |
|
|
|
{ |
|
|
|
var addGreenToBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255); |
|
|
|
Vector128<byte> addGreenToBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255); |
|
|
|
int numPixels = pixelData.Length; |
|
|
|
nint i; |
|
|
|
for (i = 0; i <= numPixels - 4; i += 4) |
|
|
|
@ -138,13 +138,11 @@ internal static unsafe class LosslessUtils |
|
|
|
nint i; |
|
|
|
for (i = 0; i <= numPixels - 4; i += 4) |
|
|
|
{ |
|
|
|
const byte mmShuffle_2200 = 0b_10_10_00_00; |
|
|
|
|
|
|
|
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i); |
|
|
|
Vector128<byte> input = Unsafe.As<uint, Vector128<uint>>(ref pos).AsByte(); |
|
|
|
Vector128<ushort> a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g
|
|
|
|
Vector128<ushort> b = Sse2.ShuffleLow(a, mmShuffle_2200); |
|
|
|
Vector128<ushort> c = Sse2.ShuffleHigh(b, mmShuffle_2200); // 0g0g
|
|
|
|
Vector128<ushort> b = Sse2.ShuffleLow(a, SimdUtils.Shuffle.MMShuffle2200); |
|
|
|
Vector128<ushort> c = Sse2.ShuffleHigh(b, SimdUtils.Shuffle.MMShuffle2200); // 0g0g
|
|
|
|
Vector128<byte> output = Sse2.Add(input.AsByte(), c.AsByte()); |
|
|
|
Unsafe.As<uint, Vector128<uint>>(ref pos) = output.AsUInt32(); |
|
|
|
} |
|
|
|
@ -178,7 +176,7 @@ internal static unsafe class LosslessUtils |
|
|
|
{ |
|
|
|
if (Avx2.IsSupported) |
|
|
|
{ |
|
|
|
var subtractGreenFromBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255); |
|
|
|
Vector256<byte> subtractGreenFromBlueAndRedMaskAvx2 = Vector256.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255, 17, 255, 17, 255, 21, 255, 21, 255, 25, 255, 25, 255, 29, 255, 29, 255); |
|
|
|
int numPixels = pixelData.Length; |
|
|
|
nint i; |
|
|
|
for (i = 0; i <= numPixels - 8; i += 8) |
|
|
|
@ -197,7 +195,7 @@ internal static unsafe class LosslessUtils |
|
|
|
} |
|
|
|
else if (Ssse3.IsSupported) |
|
|
|
{ |
|
|
|
var subtractGreenFromBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255); |
|
|
|
Vector128<byte> subtractGreenFromBlueAndRedMaskSsse3 = Vector128.Create(1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255); |
|
|
|
int numPixels = pixelData.Length; |
|
|
|
nint i; |
|
|
|
for (i = 0; i <= numPixels - 4; i += 4) |
|
|
|
@ -220,13 +218,11 @@ internal static unsafe class LosslessUtils |
|
|
|
nint i; |
|
|
|
for (i = 0; i <= numPixels - 4; i += 4) |
|
|
|
{ |
|
|
|
const byte mmShuffle_2200 = 0b_10_10_00_00; |
|
|
|
|
|
|
|
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), i); |
|
|
|
Vector128<byte> input = Unsafe.As<uint, Vector128<uint>>(ref pos).AsByte(); |
|
|
|
Vector128<ushort> a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g
|
|
|
|
Vector128<ushort> b = Sse2.ShuffleLow(a, mmShuffle_2200); |
|
|
|
Vector128<ushort> c = Sse2.ShuffleHigh(b, mmShuffle_2200); // 0g0g
|
|
|
|
Vector128<ushort> b = Sse2.ShuffleLow(a, SimdUtils.Shuffle.MMShuffle2200); |
|
|
|
Vector128<ushort> c = Sse2.ShuffleHigh(b, SimdUtils.Shuffle.MMShuffle2200); // 0g0g
|
|
|
|
Vector128<byte> output = Sse2.Subtract(input.AsByte(), c.AsByte()); |
|
|
|
Unsafe.As<uint, Vector128<uint>>(ref pos) = output.AsUInt32(); |
|
|
|
} |
|
|
|
@ -334,7 +330,7 @@ internal static unsafe class LosslessUtils |
|
|
|
while (y < yEnd) |
|
|
|
{ |
|
|
|
int predRowIdx = predRowIdxStart; |
|
|
|
var m = default(Vp8LMultipliers); |
|
|
|
Vp8LMultipliers m = default; |
|
|
|
int srcSafeEnd = pixelPos + safeWidth; |
|
|
|
int srcEnd = pixelPos + width; |
|
|
|
while (pixelPos < srcSafeEnd) |
|
|
|
@ -371,21 +367,19 @@ internal static unsafe class LosslessUtils |
|
|
|
{ |
|
|
|
if (Avx2.IsSupported && numPixels >= 8) |
|
|
|
{ |
|
|
|
var transformColorAlphaGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); |
|
|
|
var transformColorRedBlueMask256 = Vector256.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); |
|
|
|
Vector256<byte> transformColorAlphaGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); |
|
|
|
Vector256<byte> transformColorRedBlueMask256 = Vector256.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); |
|
|
|
Vector256<int> multsrb = MkCst32(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); |
|
|
|
Vector256<int> multsb2 = MkCst32(Cst5b(m.RedToBlue), 0); |
|
|
|
|
|
|
|
nint idx; |
|
|
|
for (idx = 0; idx <= numPixels - 8; idx += 8) |
|
|
|
{ |
|
|
|
const byte mmShuffle_2200 = 0b_10_10_00_00; |
|
|
|
|
|
|
|
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx); |
|
|
|
Vector256<uint> input = Unsafe.As<uint, Vector256<uint>>(ref pos); |
|
|
|
Vector256<byte> a = Avx2.And(input.AsByte(), transformColorAlphaGreenMask256); |
|
|
|
Vector256<short> b = Avx2.ShuffleLow(a.AsInt16(), mmShuffle_2200); |
|
|
|
Vector256<short> c = Avx2.ShuffleHigh(b.AsInt16(), mmShuffle_2200); |
|
|
|
Vector256<short> b = Avx2.ShuffleLow(a.AsInt16(), SimdUtils.Shuffle.MMShuffle2200); |
|
|
|
Vector256<short> c = Avx2.ShuffleHigh(b.AsInt16(), SimdUtils.Shuffle.MMShuffle2200); |
|
|
|
Vector256<short> d = Avx2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); |
|
|
|
Vector256<short> e = Avx2.ShiftLeftLogical(input.AsInt16(), 8); |
|
|
|
Vector256<short> f = Avx2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16()); |
|
|
|
@ -403,20 +397,18 @@ internal static unsafe class LosslessUtils |
|
|
|
} |
|
|
|
else if (Sse2.IsSupported) |
|
|
|
{ |
|
|
|
var transformColorAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); |
|
|
|
var transformColorRedBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); |
|
|
|
Vector128<byte> transformColorAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); |
|
|
|
Vector128<byte> transformColorRedBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); |
|
|
|
Vector128<int> multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); |
|
|
|
Vector128<int> multsb2 = MkCst16(Cst5b(m.RedToBlue), 0); |
|
|
|
nint idx; |
|
|
|
for (idx = 0; idx <= numPixels - 4; idx += 4) |
|
|
|
{ |
|
|
|
const byte mmShuffle_2200 = 0b_10_10_00_00; |
|
|
|
|
|
|
|
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx); |
|
|
|
Vector128<uint> input = Unsafe.As<uint, Vector128<uint>>(ref pos); |
|
|
|
Vector128<byte> a = Sse2.And(input.AsByte(), transformColorAlphaGreenMask); |
|
|
|
Vector128<short> b = Sse2.ShuffleLow(a.AsInt16(), mmShuffle_2200); |
|
|
|
Vector128<short> c = Sse2.ShuffleHigh(b.AsInt16(), mmShuffle_2200); |
|
|
|
Vector128<short> b = Sse2.ShuffleLow(a.AsInt16(), SimdUtils.Shuffle.MMShuffle2200); |
|
|
|
Vector128<short> c = Sse2.ShuffleHigh(b.AsInt16(), SimdUtils.Shuffle.MMShuffle2200); |
|
|
|
Vector128<short> d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); |
|
|
|
Vector128<short> e = Sse2.ShiftLeftLogical(input.AsInt16(), 8); |
|
|
|
Vector128<short> f = Sse2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16()); |
|
|
|
@ -465,19 +457,17 @@ internal static unsafe class LosslessUtils |
|
|
|
{ |
|
|
|
if (Avx2.IsSupported && pixelData.Length >= 8) |
|
|
|
{ |
|
|
|
var transformColorInverseAlphaGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); |
|
|
|
Vector256<byte> transformColorInverseAlphaGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); |
|
|
|
Vector256<int> multsrb = MkCst32(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); |
|
|
|
Vector256<int> multsb2 = MkCst32(Cst5b(m.RedToBlue), 0); |
|
|
|
nint idx; |
|
|
|
for (idx = 0; idx <= pixelData.Length - 8; idx += 8) |
|
|
|
{ |
|
|
|
const byte mmShuffle_2200 = 0b_10_10_00_00; |
|
|
|
|
|
|
|
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx); |
|
|
|
Vector256<uint> input = Unsafe.As<uint, Vector256<uint>>(ref pos); |
|
|
|
Vector256<byte> a = Avx2.And(input.AsByte(), transformColorInverseAlphaGreenMask256); |
|
|
|
Vector256<short> b = Avx2.ShuffleLow(a.AsInt16(), mmShuffle_2200); |
|
|
|
Vector256<short> c = Avx2.ShuffleHigh(b.AsInt16(), mmShuffle_2200); |
|
|
|
Vector256<short> b = Avx2.ShuffleLow(a.AsInt16(), SimdUtils.Shuffle.MMShuffle2200); |
|
|
|
Vector256<short> c = Avx2.ShuffleHigh(b.AsInt16(), SimdUtils.Shuffle.MMShuffle2200); |
|
|
|
Vector256<short> d = Avx2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); |
|
|
|
Vector256<byte> e = Avx2.Add(input.AsByte(), d.AsByte()); |
|
|
|
Vector256<short> f = Avx2.ShiftLeftLogical(e.AsInt16(), 8); |
|
|
|
@ -496,20 +486,18 @@ internal static unsafe class LosslessUtils |
|
|
|
} |
|
|
|
else if (Sse2.IsSupported) |
|
|
|
{ |
|
|
|
var transformColorInverseAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); |
|
|
|
Vector128<byte> transformColorInverseAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); |
|
|
|
Vector128<int> multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); |
|
|
|
Vector128<int> multsb2 = MkCst16(Cst5b(m.RedToBlue), 0); |
|
|
|
|
|
|
|
nint idx; |
|
|
|
for (idx = 0; idx <= pixelData.Length - 4; idx += 4) |
|
|
|
{ |
|
|
|
const byte mmShuffle_2200 = 0b_10_10_00_00; |
|
|
|
|
|
|
|
ref uint pos = ref Unsafe.Add(ref MemoryMarshal.GetReference(pixelData), idx); |
|
|
|
Vector128<uint> input = Unsafe.As<uint, Vector128<uint>>(ref pos); |
|
|
|
Vector128<byte> a = Sse2.And(input.AsByte(), transformColorInverseAlphaGreenMask); |
|
|
|
Vector128<short> b = Sse2.ShuffleLow(a.AsInt16(), mmShuffle_2200); |
|
|
|
Vector128<short> c = Sse2.ShuffleHigh(b.AsInt16(), mmShuffle_2200); |
|
|
|
Vector128<short> b = Sse2.ShuffleLow(a.AsInt16(), SimdUtils.Shuffle.MMShuffle2200); |
|
|
|
Vector128<short> c = Sse2.ShuffleHigh(b.AsInt16(), SimdUtils.Shuffle.MMShuffle2200); |
|
|
|
Vector128<short> d = Sse2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); |
|
|
|
Vector128<byte> e = Sse2.Add(input.AsByte(), d.AsByte()); |
|
|
|
Vector128<short> f = Sse2.ShiftLeftLogical(e.AsInt16(), 8); |
|
|
|
|