Browse Source

Avoid pinning

pull/1824/head
Brian Popow 5 years ago
parent
commit
c15e62ce50
  1. 144
      src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs

144
src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs

@ -1079,34 +1079,32 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
for (int y = 0; y < tileHeight; y++)
{
Span<uint> srcSpan = bgra.Slice(y * stride);
#pragma warning disable SA1503 // Braces should not be omitted
fixed (uint* src = srcSpan)
fixed (ushort* dst = values)
ref uint inputRef = ref MemoryMarshal.GetReference(srcSpan);
for (int x = 0; x + span <= tileWidth; x += span)
{
for (int x = 0; x + span <= tileWidth; x += span)
int input0Idx = x;
int input1Idx = x + (span / 2);
Vector128<byte> input0 = Unsafe.As<uint, Vector128<uint>>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte();
Vector128<byte> input1 = Unsafe.As<uint, Vector128<uint>>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte();
Vector128<byte> g0 = Sse2.And(input0, CollectColorRedTransformsGreenMask); // 0 0 | g 0
Vector128<byte> g1 = Sse2.And(input1, CollectColorRedTransformsGreenMask);
Vector128<ushort> g = Sse41.PackUnsignedSaturate(g0.AsInt32(), g1.AsInt32()); // g 0
Vector128<int> a0 = Sse2.ShiftRightLogical(input0.AsInt32(), 16); // 0 0 | x r
Vector128<int> a1 = Sse2.ShiftRightLogical(input1.AsInt32(), 16);
Vector128<ushort> a = Sse41.PackUnsignedSaturate(a0, a1); // x r
Vector128<short> b = Sse2.MultiplyHigh(g.AsInt16(), multsg); // x dr
Vector128<byte> c = Sse2.Subtract(a.AsByte(), b.AsByte()); // x r'
Vector128<byte> d = Sse2.And(c, CollectColorRedTransformsAndMask); // 0 r'
ref ushort outputRef = ref MemoryMarshal.GetReference(values);
Unsafe.As<ushort, Vector128<ushort>>(ref outputRef) = d.AsUInt16();
for (int i = 0; i < span; i++)
{
uint* input0Idx = src + x;
uint* input1Idx = src + x + (span / 2);
Vector128<byte> input0 = Sse2.LoadVector128((ushort*)input0Idx).AsByte();
Vector128<byte> input1 = Sse2.LoadVector128((ushort*)input1Idx).AsByte();
Vector128<byte> g0 = Sse2.And(input0, CollectColorRedTransformsGreenMask); // 0 0 | g 0
Vector128<byte> g1 = Sse2.And(input1, CollectColorRedTransformsGreenMask);
Vector128<ushort> g = Sse41.PackUnsignedSaturate(g0.AsInt32(), g1.AsInt32()); // g 0
Vector128<int> a0 = Sse2.ShiftRightLogical(input0.AsInt32(), 16); // 0 0 | x r
Vector128<int> a1 = Sse2.ShiftRightLogical(input1.AsInt32(), 16);
Vector128<ushort> a = Sse41.PackUnsignedSaturate(a0, a1); // x r
Vector128<short> b = Sse2.MultiplyHigh(g.AsInt16(), multsg); // x dr
Vector128<byte> c = Sse2.Subtract(a.AsByte(), b.AsByte()); // x r'
Vector128<byte> d = Sse2.And(c, CollectColorRedTransformsAndMask); // 0 r'
Sse2.Store(dst, d.AsUInt16());
for (int i = 0; i < span; i++)
{
++histo[values[i]];
}
++histo[values[i]];
}
}
}
#pragma warning restore SA1503 // Braces should not be omitted
int leftOver = tileWidth & (span - 1);
if (leftOver > 0)
@ -1148,36 +1146,34 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
for (int y = 0; y < tileHeight; y++)
{
Span<uint> srcSpan = bgra.Slice(y * stride);
#pragma warning disable SA1503 // Braces should not be omitted
fixed (uint* src = srcSpan)
fixed (ushort* dst = values)
ref uint inputRef = ref MemoryMarshal.GetReference(srcSpan);
for (int x = 0; x + span <= tileWidth; x += span)
{
for (int x = 0; x + span <= tileWidth; x += span)
int input0Idx = x;
int input1Idx = x + (span / 2);
Vector256<byte> input0 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte();
Vector256<byte> input1 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte();
Vector256<byte> r0 = Avx2.Shuffle(input0, CollectColorBlueTransformsShuffleLowMask256);
Vector256<byte> r1 = Avx2.Shuffle(input1, CollectColorBlueTransformsShuffleHighMask256);
Vector256<byte> r = Avx2.Or(r0, r1);
Vector256<byte> gb0 = Avx2.And(input0, CollectColorBlueTransformsGreenBlueMask256);
Vector256<byte> gb1 = Avx2.And(input1, CollectColorBlueTransformsGreenBlueMask256);
Vector256<ushort> gb = Avx2.PackUnsignedSaturate(gb0.AsInt32(), gb1.AsInt32());
Vector256<byte> g = Avx2.And(gb.AsByte(), CollectColorBlueTransformsGreenMask256);
Vector256<short> a = Avx2.MultiplyHigh(r.AsInt16(), multsr);
Vector256<short> b = Avx2.MultiplyHigh(g.AsInt16(), multsg);
Vector256<byte> c = Avx2.Subtract(gb.AsByte(), b.AsByte());
Vector256<byte> d = Avx2.Subtract(c, a.AsByte());
Vector256<byte> e = Avx2.And(d, CollectColorBlueTransformsBlueMask256);
ref ushort outputRef = ref MemoryMarshal.GetReference(values);
Unsafe.As<ushort, Vector256<ushort>>(ref outputRef) = e.AsUInt16();
for (int i = 0; i < span; i++)
{
uint* input0Idx = src + x;
uint* input1Idx = src + x + (span / 2);
Vector256<byte> input0 = Avx.LoadVector256(input0Idx).AsByte();
Vector256<byte> input1 = Avx.LoadVector256(input1Idx).AsByte();
Vector256<byte> r0 = Avx2.Shuffle(input0, CollectColorBlueTransformsShuffleLowMask256);
Vector256<byte> r1 = Avx2.Shuffle(input1, CollectColorBlueTransformsShuffleHighMask256);
Vector256<byte> r = Avx2.Or(r0, r1);
Vector256<byte> gb0 = Avx2.And(input0, CollectColorBlueTransformsGreenBlueMask256);
Vector256<byte> gb1 = Avx2.And(input1, CollectColorBlueTransformsGreenBlueMask256);
Vector256<ushort> gb = Avx2.PackUnsignedSaturate(gb0.AsInt32(), gb1.AsInt32());
Vector256<byte> g = Avx2.And(gb.AsByte(), CollectColorBlueTransformsGreenMask256);
Vector256<short> a = Avx2.MultiplyHigh(r.AsInt16(), multsr);
Vector256<short> b = Avx2.MultiplyHigh(g.AsInt16(), multsg);
Vector256<byte> c = Avx2.Subtract(gb.AsByte(), b.AsByte());
Vector256<byte> d = Avx2.Subtract(c, a.AsByte());
Vector256<byte> e = Avx2.And(d, CollectColorBlueTransformsBlueMask256);
Avx.Store(dst, e.AsUInt16());
for (int i = 0; i < span; i++)
{
++histo[values[i]];
}
++histo[values[i]];
}
}
#pragma warning restore SA1503 // Braces should not be omitted
int leftOver = tileWidth & (span - 1);
if (leftOver > 0)
@ -1195,37 +1191,35 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
for (int y = 0; y < tileHeight; y++)
{
Span<uint> srcSpan = bgra.Slice(y * stride);
#pragma warning disable SA1503 // Braces should not be omitted
fixed (uint* src = srcSpan)
fixed (ushort* dst = values)
ref uint inputRef = ref MemoryMarshal.GetReference(srcSpan);
for (int x = 0; x + span <= tileWidth; x += span)
{
for (int x = 0; x + span <= tileWidth; x += span)
int input0Idx = x;
int input1Idx = x + (span / 2);
Vector128<byte> input0 = Unsafe.As<uint, Vector128<uint>>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte();
Vector128<byte> input1 = Unsafe.As<uint, Vector128<uint>>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte();
Vector128<byte> r0 = Ssse3.Shuffle(input0, CollectColorBlueTransformsShuffleLowMask);
Vector128<byte> r1 = Ssse3.Shuffle(input1, CollectColorBlueTransformsShuffleHighMask);
Vector128<byte> r = Sse2.Or(r0, r1);
Vector128<byte> gb0 = Sse2.And(input0, CollectColorBlueTransformsGreenBlueMask);
Vector128<byte> gb1 = Sse2.And(input1, CollectColorBlueTransformsGreenBlueMask);
Vector128<ushort> gb = Sse41.PackUnsignedSaturate(gb0.AsInt32(), gb1.AsInt32());
Vector128<byte> g = Sse2.And(gb.AsByte(), CollectColorBlueTransformsGreenMask);
Vector128<short> a = Sse2.MultiplyHigh(r.AsInt16(), multsr);
Vector128<short> b = Sse2.MultiplyHigh(g.AsInt16(), multsg);
Vector128<byte> c = Sse2.Subtract(gb.AsByte(), b.AsByte());
Vector128<byte> d = Sse2.Subtract(c, a.AsByte());
Vector128<byte> e = Sse2.And(d, CollectColorBlueTransformsBlueMask);
ref ushort outputRef = ref MemoryMarshal.GetReference(values);
Unsafe.As<ushort, Vector128<ushort>>(ref outputRef) = e.AsUInt16();
for (int i = 0; i < span; i++)
{
uint* input0Idx = src + x;
uint* input1Idx = src + x + (span / 2);
Vector128<byte> input0 = Sse2.LoadVector128((ushort*)input0Idx).AsByte();
Vector128<byte> input1 = Sse2.LoadVector128((ushort*)input1Idx).AsByte();
Vector128<byte> r0 = Ssse3.Shuffle(input0, CollectColorBlueTransformsShuffleLowMask);
Vector128<byte> r1 = Ssse3.Shuffle(input1, CollectColorBlueTransformsShuffleHighMask);
Vector128<byte> r = Sse2.Or(r0, r1);
Vector128<byte> gb0 = Sse2.And(input0, CollectColorBlueTransformsGreenBlueMask);
Vector128<byte> gb1 = Sse2.And(input1, CollectColorBlueTransformsGreenBlueMask);
Vector128<ushort> gb = Sse41.PackUnsignedSaturate(gb0.AsInt32(), gb1.AsInt32());
Vector128<byte> g = Sse2.And(gb.AsByte(), CollectColorBlueTransformsGreenMask);
Vector128<short> a = Sse2.MultiplyHigh(r.AsInt16(), multsr);
Vector128<short> b = Sse2.MultiplyHigh(g.AsInt16(), multsg);
Vector128<byte> c = Sse2.Subtract(gb.AsByte(), b.AsByte());
Vector128<byte> d = Sse2.Subtract(c, a.AsByte());
Vector128<byte> e = Sse2.And(d, CollectColorBlueTransformsBlueMask);
Sse2.Store(dst, e.AsUInt16());
for (int i = 0; i < span; i++)
{
++histo[values[i]];
}
++histo[values[i]];
}
}
}
#pragma warning restore SA1503 // Braces should not be omitted
int leftOver = tileWidth & (span - 1);
if (leftOver > 0)

Loading…
Cancel
Save