Browse Source

Add Avx2 version of CheckNonOpaque

pull/1552/head
Brian Popow 5 years ago
parent
commit
415836fdcc
  1. 129
      src/ImageSharp/Formats/WebP/Lossy/YuvConversion.cs
  2. 66
      tests/ImageSharp.Tests/Formats/WebP/YuvConversionTests.cs

129
src/ImageSharp/Formats/WebP/Lossy/YuvConversion.cs

@ -101,13 +101,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
/// </summary> /// </summary>
/// <param name="row">The row to check.</param> /// <param name="row">The row to check.</param>
/// <returns>Returns true if alpha has non-0xff values.</returns> /// <returns>Returns true if alpha has non-0xff values.</returns>
[MethodImpl(InliningOptions.ShortMethod)]
public static unsafe bool CheckNonOpaque(Span<Rgba32> row) public static unsafe bool CheckNonOpaque(Span<Rgba32> row)
{ {
#if SUPPORTS_RUNTIME_INTRINSICS #if SUPPORTS_RUNTIME_INTRINSICS
if (Sse2.IsSupported) if (Avx2.IsSupported)
{ {
ReadOnlySpan<byte> rowBytes = MemoryMarshal.AsBytes(row); ReadOnlySpan<byte> rowBytes = MemoryMarshal.AsBytes(row);
var alphaMaskVector256 = Vector256.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255);
Vector256<byte> all0x80Vector256 = Vector256.Create((byte)0x80).AsByte();
var alphaMask = Vector128.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255); var alphaMask = Vector128.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255);
Vector128<byte> all0x80 = Vector128.Create((byte)0x80).AsByte(); Vector128<byte> all0x80 = Vector128.Create((byte)0x80).AsByte();
@ -115,22 +116,30 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int length = (row.Length * 4) - 3; int length = (row.Length * 4) - 3;
fixed (byte* src = rowBytes) fixed (byte* src = rowBytes)
{ {
for (; i + 128 <= length; i += 128)
{
Vector256<byte> a0 = Avx.LoadVector256(src + i).AsByte();
Vector256<byte> a1 = Avx.LoadVector256(src + i + 32).AsByte();
Vector256<byte> a2 = Avx.LoadVector256(src + i + 64).AsByte();
Vector256<byte> a3 = Avx.LoadVector256(src + i + 96).AsByte();
Vector256<int> b0 = Avx2.And(a0, alphaMaskVector256).AsInt32();
Vector256<int> b1 = Avx2.And(a1, alphaMaskVector256).AsInt32();
Vector256<int> b2 = Avx2.And(a2, alphaMaskVector256).AsInt32();
Vector256<int> b3 = Avx2.And(a3, alphaMaskVector256).AsInt32();
Vector256<short> c0 = Avx2.PackSignedSaturate(b0, b1).AsInt16();
Vector256<short> c1 = Avx2.PackSignedSaturate(b2, b3).AsInt16();
Vector256<byte> d = Avx2.PackSignedSaturate(c0, c1).AsByte();
Vector256<byte> bits = Avx2.CompareEqual(d, all0x80Vector256);
int mask = Avx2.MoveMask(bits);
if (mask != -1)
{
return true;
}
}
for (; i + 64 <= length; i += 64) for (; i + 64 <= length; i += 64)
{ {
Vector128<byte> a0 = Sse2.LoadVector128(src + i).AsByte(); if (IsNoneOpaque64Bytes(src, i, alphaMask, all0x80))
Vector128<byte> a1 = Sse2.LoadVector128(src + i + 16).AsByte();
Vector128<byte> a2 = Sse2.LoadVector128(src + i + 32).AsByte();
Vector128<byte> a3 = Sse2.LoadVector128(src + i + 48).AsByte();
Vector128<int> b0 = Sse2.And(a0, alphaMask).AsInt32();
Vector128<int> b1 = Sse2.And(a1, alphaMask).AsInt32();
Vector128<int> b2 = Sse2.And(a2, alphaMask).AsInt32();
Vector128<int> b3 = Sse2.And(a3, alphaMask).AsInt32();
Vector128<short> c0 = Sse2.PackSignedSaturate(b0, b1).AsInt16();
Vector128<short> c1 = Sse2.PackSignedSaturate(b2, b3).AsInt16();
Vector128<byte> d = Sse2.PackSignedSaturate(c0, c1).AsByte();
Vector128<byte> bits = Sse2.CompareEqual(d, all0x80);
int mask = Sse2.MoveMask(bits);
if (mask != 0xFFFF)
{ {
return true; return true;
} }
@ -138,15 +147,42 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
for (; i + 32 <= length; i += 32) for (; i + 32 <= length; i += 32)
{ {
Vector128<byte> a0 = Sse2.LoadVector128(src + i).AsByte(); if (IsNoneOpaque32Bytes(src, i, alphaMask, all0x80))
Vector128<byte> a1 = Sse2.LoadVector128(src + i + 16).AsByte(); {
Vector128<int> b0 = Sse2.And(a0, alphaMask).AsInt32(); return true;
Vector128<int> b1 = Sse2.And(a1, alphaMask).AsInt32(); }
Vector128<short> c = Sse2.PackSignedSaturate(b0, b1).AsInt16(); }
Vector128<byte> d = Sse2.PackSignedSaturate(c, c).AsByte();
Vector128<byte> bits = Sse2.CompareEqual(d, all0x80); for (; i <= length; i += 4)
int mask = Sse2.MoveMask(bits); {
if (mask != 0xFFFF) if (src[i + 3] != 0xFF)
{
return true;
}
}
}
}
else if (Sse2.IsSupported)
{
ReadOnlySpan<byte> rowBytes = MemoryMarshal.AsBytes(row);
var alphaMask = Vector128.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255);
Vector128<byte> all0x80 = Vector128.Create((byte)0x80).AsByte();
int i = 0;
int length = (row.Length * 4) - 3;
fixed (byte* src = rowBytes)
{
for (; i + 64 <= length; i += 64)
{
if (IsNoneOpaque64Bytes(src, i, alphaMask, all0x80))
{
return true;
}
}
for (; i + 32 <= length; i += 32)
{
if (IsNoneOpaque32Bytes(src, i, alphaMask, all0x80))
{ {
return true; return true;
} }
@ -176,6 +212,49 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
return false; return false;
} }
#if SUPPORTS_RUNTIME_INTRINSICS
private static unsafe bool IsNoneOpaque64Bytes(byte* src, int i, Vector128<byte> alphaMask, Vector128<byte> all0x80)
{
Vector128<byte> a0 = Sse2.LoadVector128(src + i).AsByte();
Vector128<byte> a1 = Sse2.LoadVector128(src + i + 16).AsByte();
Vector128<byte> a2 = Sse2.LoadVector128(src + i + 32).AsByte();
Vector128<byte> a3 = Sse2.LoadVector128(src + i + 48).AsByte();
Vector128<int> b0 = Sse2.And(a0, alphaMask).AsInt32();
Vector128<int> b1 = Sse2.And(a1, alphaMask).AsInt32();
Vector128<int> b2 = Sse2.And(a2, alphaMask).AsInt32();
Vector128<int> b3 = Sse2.And(a3, alphaMask).AsInt32();
Vector128<short> c0 = Sse2.PackSignedSaturate(b0, b1).AsInt16();
Vector128<short> c1 = Sse2.PackSignedSaturate(b2, b3).AsInt16();
Vector128<byte> d = Sse2.PackSignedSaturate(c0, c1).AsByte();
Vector128<byte> bits = Sse2.CompareEqual(d, all0x80);
int mask = Sse2.MoveMask(bits);
if (mask != 0xFFFF)
{
return true;
}
return false;
}
private static unsafe bool IsNoneOpaque32Bytes(byte* src, int i, Vector128<byte> alphaMask, Vector128<byte> all0x80)
{
Vector128<byte> a0 = Sse2.LoadVector128(src + i).AsByte();
Vector128<byte> a1 = Sse2.LoadVector128(src + i + 16).AsByte();
Vector128<int> b0 = Sse2.And(a0, alphaMask).AsInt32();
Vector128<int> b1 = Sse2.And(a1, alphaMask).AsInt32();
Vector128<short> c = Sse2.PackSignedSaturate(b0, b1).AsInt16();
Vector128<byte> d = Sse2.PackSignedSaturate(c, c).AsByte();
Vector128<byte> bits = Sse2.CompareEqual(d, all0x80);
int mask = Sse2.MoveMask(bits);
if (mask != 0xFFFF)
{
return true;
}
return false;
}
#endif
/// <summary> /// <summary>
/// Converts a rgba pixel row to Y. /// Converts a rgba pixel row to Y.
/// </summary> /// </summary>

66
tests/ImageSharp.Tests/Formats/WebP/YuvConversionTests.cs

@ -287,13 +287,45 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
148, 158, 158, 255, 148, 158, 158, 255,
171, 165, 151, 255, 171, 165, 151, 255,
209, 208, 210, 255, 209, 208, 210, 255,
174, 183, 189, 100, 174, 183, 189, 255,
148, 158, 158, 255,
148, 158, 158, 255,
171, 165, 151, 255,
209, 208, 210, 255,
171, 165, 151, 255,
209, 208, 210, 255,
174, 183, 189, 255,
148, 158, 158, 255, 148, 158, 158, 255,
171, 165, 151, 255,
209, 208, 210, 255,
174, 183, 189, 255,
148, 158, 158, 255,
171, 165, 151, 255,
209, 208, 210, 255,
174, 183, 189, 255,
148, 158, 158, 255, 148, 158, 158, 255,
171, 165, 151, 255, 171, 165, 151, 255,
209, 208, 210, 255, 209, 208, 210, 255,
174, 183, 189, 255, 174, 183, 189, 255,
148, 158, 158, 255, 148, 158, 158, 255,
209, 208, 210, 255,
174, 183, 189, 255,
148, 158, 158, 255,
148, 158, 158, 255,
171, 165, 151, 255,
209, 208, 210, 255,
174, 183, 189, 255,
148, 158, 158, 255,
148, 158, 158, 255,
171, 165, 151, 255,
209, 208, 210, 255,
174, 183, 189, 255,
148, 158, 158, 255,
148, 158, 158, 100,
171, 165, 151, 0,
209, 208, 210, 100,
174, 183, 189, 255,
148, 158, 158, 255,
}; };
Span<Rgba32> row = MemoryMarshal.Cast<byte, Rgba32>(rowBytes); Span<Rgba32> row = MemoryMarshal.Cast<byte, Rgba32>(rowBytes);
@ -334,6 +366,38 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
148, 158, 158, 255, 148, 158, 158, 255,
171, 165, 151, 255, 171, 165, 151, 255,
209, 208, 210, 255, 209, 208, 210, 255,
171, 165, 151, 255,
209, 208, 210, 255,
174, 183, 189, 255,
148, 158, 158, 255,
171, 165, 151, 255,
209, 208, 210, 255,
174, 183, 189, 255,
148, 158, 158, 255,
171, 165, 151, 255,
209, 208, 210, 255,
174, 183, 189, 255,
148, 158, 158, 255,
171, 165, 151, 255,
209, 208, 210, 255,
174, 183, 189, 255,
148, 158, 158, 255,
209, 208, 210, 255,
174, 183, 189, 255,
148, 158, 158, 255,
148, 158, 158, 255,
171, 165, 151, 255,
209, 208, 210, 255,
174, 183, 189, 255,
148, 158, 158, 255,
148, 158, 158, 255,
171, 165, 151, 255,
209, 208, 210, 255,
174, 183, 189, 255,
148, 158, 158, 255,
148, 158, 158, 255,
171, 165, 151, 255,
209, 208, 210, 255,
174, 183, 189, 255, 174, 183, 189, 255,
148, 158, 158, 255, 148, 158, 158, 255,
}; };

Loading…
Cancel
Save