Browse Source

Avx version of CollectColorBlueTransforms

pull/1824/head
Brian Popow 5 years ago
parent
commit
a2c0900f61
  1. 2
      src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs
  2. 60
      src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs

2
src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs

@ -744,6 +744,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return (float)retVal;
}
[MethodImpl(InliningOptions.ShortMethod)]
public static byte TransformColorRed(sbyte greenToRed, uint argb)
{
sbyte green = U32ToS8(argb >> 8);
@ -752,6 +753,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return (byte)(newRed & 0xff);
}
[MethodImpl(InliningOptions.ShortMethod)]
public static byte TransformColorBlue(sbyte greenToBlue, sbyte redToBlue, uint argb)
{
sbyte green = U32ToS8(argb >> 8);

60
src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs

@ -48,6 +48,17 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private static readonly Vector128<byte> CollectColorBlueTransformsShuffleLowMask = Vector128.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 255, 255, 255, 255, 255, 255, 255);
private static readonly Vector128<byte> CollectColorBlueTransformsShuffleHighMask = Vector128.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14);
private static readonly Vector256<byte> CollectColorBlueTransformsShuffleLowMask256 = Vector256.Create(255, 2, 255, 6, 255, 10, 255, 14, 255, 18, 255, 22, 255, 26, 255, 30, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255);
private static readonly Vector256<byte> CollectColorBlueTransformsShuffleHighMask256 = Vector256.Create(255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 2, 255, 6, 255, 10, 255, 14, 255, 18, 255, 22, 255, 26, 255, 30, 255);
private static readonly Vector256<byte> CollectColorBlueTransformsGreenBlueMask256 = Vector256.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0);
private static readonly Vector256<byte> CollectColorBlueTransformsBlueMask256 = Vector256.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0);
private static readonly Vector256<byte> CollectColorBlueTransformsGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
#endif
// This uses C#'s compiler optimization to refer to assembly's static data directly.
@ -1128,7 +1139,54 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
private static void CollectColorBlueTransforms(Span<uint> bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, Span<int> histo)
{
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse41.IsSupported)
if (Avx2.IsSupported && tileWidth > 16)
{
const int span = 16;
Span<ushort> values = stackalloc ushort[span];
var multsr = Vector256.Create(LosslessUtils.Cst5b(redToBlue));
var multsg = Vector256.Create(LosslessUtils.Cst5b(greenToBlue));
for (int y = 0; y < tileHeight; y++)
{
Span<uint> srcSpan = bgra.Slice(y * stride);
#pragma warning disable SA1503 // Braces should not be omitted
fixed (uint* src = srcSpan)
fixed (ushort* dst = values)
{
for (int x = 0; x + span <= tileWidth; x += span)
{
uint* input0Idx = src + x;
uint* input1Idx = src + x + (span / 2);
Vector256<byte> input0 = Avx.LoadVector256(input0Idx).AsByte();
Vector256<byte> input1 = Avx.LoadVector256(input1Idx).AsByte();
Vector256<byte> r0 = Avx2.Shuffle(input0, CollectColorBlueTransformsShuffleLowMask256);
Vector256<byte> r1 = Avx2.Shuffle(input1, CollectColorBlueTransformsShuffleHighMask256);
Vector256<byte> r = Avx2.Or(r0, r1);
Vector256<byte> gb0 = Avx2.And(input0, CollectColorBlueTransformsGreenBlueMask256);
Vector256<byte> gb1 = Avx2.And(input1, CollectColorBlueTransformsGreenBlueMask256);
Vector256<ushort> gb = Avx2.PackUnsignedSaturate(gb0.AsInt32(), gb1.AsInt32());
Vector256<byte> g = Avx2.And(gb.AsByte(), CollectColorBlueTransformsGreenMask256);
Vector256<short> a = Avx2.MultiplyHigh(r.AsInt16(), multsr);
Vector256<short> b = Avx2.MultiplyHigh(g.AsInt16(), multsg);
Vector256<byte> c = Avx2.Subtract(gb.AsByte(), b.AsByte());
Vector256<byte> d = Avx2.Subtract(c, a.AsByte());
Vector256<byte> e = Avx2.And(d, CollectColorBlueTransformsBlueMask256);
Avx.Store(dst, e.AsUInt16());
for (int i = 0; i < span; i++)
{
++histo[values[i]];
}
}
}
#pragma warning restore SA1503 // Braces should not be omitted
int leftOver = tileWidth & (span - 1);
if (leftOver > 0)
{
CollectColorBlueTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToBlue, redToBlue, histo);
}
}
}
else if (Sse41.IsSupported)
{
const int span = 8;
Span<ushort> values = stackalloc ushort[span];

Loading…
Cancel
Save