diff --git a/src/ImageSharp/Formats/WebP/Lossy/YuvConversion.cs b/src/ImageSharp/Formats/WebP/Lossy/YuvConversion.cs
index 4bd70add6..3c67bfb57 100644
--- a/src/ImageSharp/Formats/WebP/Lossy/YuvConversion.cs
+++ b/src/ImageSharp/Formats/WebP/Lossy/YuvConversion.cs
@@ -101,13 +101,14 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
///
/// The row to check.
/// Returns true if alpha has non-0xff values.
- [MethodImpl(InliningOptions.ShortMethod)]
public static unsafe bool CheckNonOpaque(Span row)
{
#if SUPPORTS_RUNTIME_INTRINSICS
- if (Sse2.IsSupported)
+ if (Avx2.IsSupported)
{
ReadOnlySpan rowBytes = MemoryMarshal.AsBytes(row);
+ var alphaMaskVector256 = Vector256.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255);
+ Vector256 all0x80Vector256 = Vector256.Create((byte)0x80).AsByte();
var alphaMask = Vector128.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255);
Vector128 all0x80 = Vector128.Create((byte)0x80).AsByte();
@@ -115,22 +116,30 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
int length = (row.Length * 4) - 3;
fixed (byte* src = rowBytes)
{
+ for (; i + 128 <= length; i += 128)
+ {
+ Vector256 a0 = Avx.LoadVector256(src + i).AsByte();
+ Vector256 a1 = Avx.LoadVector256(src + i + 32).AsByte();
+ Vector256 a2 = Avx.LoadVector256(src + i + 64).AsByte();
+ Vector256 a3 = Avx.LoadVector256(src + i + 96).AsByte();
+ Vector256 b0 = Avx2.And(a0, alphaMaskVector256).AsInt32();
+ Vector256 b1 = Avx2.And(a1, alphaMaskVector256).AsInt32();
+ Vector256 b2 = Avx2.And(a2, alphaMaskVector256).AsInt32();
+ Vector256 b3 = Avx2.And(a3, alphaMaskVector256).AsInt32();
+ Vector256 c0 = Avx2.PackSignedSaturate(b0, b1).AsInt16();
+ Vector256 c1 = Avx2.PackSignedSaturate(b2, b3).AsInt16();
+ Vector256 d = Avx2.PackSignedSaturate(c0, c1).AsByte();
+ Vector256 bits = Avx2.CompareEqual(d, all0x80Vector256);
+ int mask = Avx2.MoveMask(bits);
+ if (mask != -1)
+ {
+ return true;
+ }
+ }
+
for (; i + 64 <= length; i += 64)
{
- Vector128 a0 = Sse2.LoadVector128(src + i).AsByte();
- Vector128 a1 = Sse2.LoadVector128(src + i + 16).AsByte();
- Vector128 a2 = Sse2.LoadVector128(src + i + 32).AsByte();
- Vector128 a3 = Sse2.LoadVector128(src + i + 48).AsByte();
- Vector128 b0 = Sse2.And(a0, alphaMask).AsInt32();
- Vector128 b1 = Sse2.And(a1, alphaMask).AsInt32();
- Vector128 b2 = Sse2.And(a2, alphaMask).AsInt32();
- Vector128 b3 = Sse2.And(a3, alphaMask).AsInt32();
- Vector128 c0 = Sse2.PackSignedSaturate(b0, b1).AsInt16();
- Vector128 c1 = Sse2.PackSignedSaturate(b2, b3).AsInt16();
- Vector128 d = Sse2.PackSignedSaturate(c0, c1).AsByte();
- Vector128 bits = Sse2.CompareEqual(d, all0x80);
- int mask = Sse2.MoveMask(bits);
- if (mask != 0xFFFF)
+ if (IsNoneOpaque64Bytes(src, i, alphaMask, all0x80))
{
return true;
}
@@ -138,15 +147,42 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
for (; i + 32 <= length; i += 32)
{
- Vector128 a0 = Sse2.LoadVector128(src + i).AsByte();
- Vector128 a1 = Sse2.LoadVector128(src + i + 16).AsByte();
- Vector128 b0 = Sse2.And(a0, alphaMask).AsInt32();
- Vector128 b1 = Sse2.And(a1, alphaMask).AsInt32();
- Vector128 c = Sse2.PackSignedSaturate(b0, b1).AsInt16();
- Vector128 d = Sse2.PackSignedSaturate(c, c).AsByte();
- Vector128 bits = Sse2.CompareEqual(d, all0x80);
- int mask = Sse2.MoveMask(bits);
- if (mask != 0xFFFF)
+ if (IsNoneOpaque32Bytes(src, i, alphaMask, all0x80))
+ {
+ return true;
+ }
+ }
+
+ for (; i <= length; i += 4)
+ {
+ if (src[i + 3] != 0xFF)
+ {
+ return true;
+ }
+ }
+ }
+ }
+ else if (Sse2.IsSupported)
+ {
+ ReadOnlySpan rowBytes = MemoryMarshal.AsBytes(row);
+ var alphaMask = Vector128.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255);
+ Vector128 all0x80 = Vector128.Create((byte)0x80).AsByte();
+
+ int i = 0;
+ int length = (row.Length * 4) - 3;
+ fixed (byte* src = rowBytes)
+ {
+ for (; i + 64 <= length; i += 64)
+ {
+ if (IsNoneOpaque64Bytes(src, i, alphaMask, all0x80))
+ {
+ return true;
+ }
+ }
+
+ for (; i + 32 <= length; i += 32)
+ {
+ if (IsNoneOpaque32Bytes(src, i, alphaMask, all0x80))
{
return true;
}
@@ -176,6 +212,49 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
return false;
}
+#if SUPPORTS_RUNTIME_INTRINSICS
+ private static unsafe bool IsNoneOpaque64Bytes(byte* src, int i, Vector128 alphaMask, Vector128 all0x80)
+ {
+ Vector128 a0 = Sse2.LoadVector128(src + i).AsByte();
+ Vector128 a1 = Sse2.LoadVector128(src + i + 16).AsByte();
+ Vector128 a2 = Sse2.LoadVector128(src + i + 32).AsByte();
+ Vector128 a3 = Sse2.LoadVector128(src + i + 48).AsByte();
+ Vector128 b0 = Sse2.And(a0, alphaMask).AsInt32();
+ Vector128 b1 = Sse2.And(a1, alphaMask).AsInt32();
+ Vector128 b2 = Sse2.And(a2, alphaMask).AsInt32();
+ Vector128 b3 = Sse2.And(a3, alphaMask).AsInt32();
+ Vector128 c0 = Sse2.PackSignedSaturate(b0, b1).AsInt16();
+ Vector128 c1 = Sse2.PackSignedSaturate(b2, b3).AsInt16();
+ Vector128 d = Sse2.PackSignedSaturate(c0, c1).AsByte();
+ Vector128 bits = Sse2.CompareEqual(d, all0x80);
+ int mask = Sse2.MoveMask(bits);
+ if (mask != 0xFFFF)
+ {
+ return true;
+ }
+
+ return false;
+ }
+
+ private static unsafe bool IsNoneOpaque32Bytes(byte* src, int i, Vector128 alphaMask, Vector128 all0x80)
+ {
+ Vector128 a0 = Sse2.LoadVector128(src + i).AsByte();
+ Vector128 a1 = Sse2.LoadVector128(src + i + 16).AsByte();
+ Vector128 b0 = Sse2.And(a0, alphaMask).AsInt32();
+ Vector128 b1 = Sse2.And(a1, alphaMask).AsInt32();
+ Vector128 c = Sse2.PackSignedSaturate(b0, b1).AsInt16();
+ Vector128 d = Sse2.PackSignedSaturate(c, c).AsByte();
+ Vector128 bits = Sse2.CompareEqual(d, all0x80);
+ int mask = Sse2.MoveMask(bits);
+ if (mask != 0xFFFF)
+ {
+ return true;
+ }
+
+ return false;
+ }
+#endif
+
///
/// Converts a rgba pixel row to Y.
///
diff --git a/tests/ImageSharp.Tests/Formats/WebP/YuvConversionTests.cs b/tests/ImageSharp.Tests/Formats/WebP/YuvConversionTests.cs
index 79b2315a2..59ef221bf 100644
--- a/tests/ImageSharp.Tests/Formats/WebP/YuvConversionTests.cs
+++ b/tests/ImageSharp.Tests/Formats/WebP/YuvConversionTests.cs
@@ -287,13 +287,45 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
148, 158, 158, 255,
171, 165, 151, 255,
209, 208, 210, 255,
- 174, 183, 189, 100,
+ 174, 183, 189, 255,
+ 148, 158, 158, 255,
+ 148, 158, 158, 255,
+ 171, 165, 151, 255,
+ 209, 208, 210, 255,
+ 171, 165, 151, 255,
+ 209, 208, 210, 255,
+ 174, 183, 189, 255,
148, 158, 158, 255,
+ 171, 165, 151, 255,
+ 209, 208, 210, 255,
+ 174, 183, 189, 255,
+ 148, 158, 158, 255,
+ 171, 165, 151, 255,
+ 209, 208, 210, 255,
+ 174, 183, 189, 255,
148, 158, 158, 255,
171, 165, 151, 255,
209, 208, 210, 255,
174, 183, 189, 255,
148, 158, 158, 255,
+ 209, 208, 210, 255,
+ 174, 183, 189, 255,
+ 148, 158, 158, 255,
+ 148, 158, 158, 255,
+ 171, 165, 151, 255,
+ 209, 208, 210, 255,
+ 174, 183, 189, 255,
+ 148, 158, 158, 255,
+ 148, 158, 158, 255,
+ 171, 165, 151, 255,
+ 209, 208, 210, 255,
+ 174, 183, 189, 255,
+ 148, 158, 158, 255,
+ 148, 158, 158, 100,
+ 171, 165, 151, 0,
+ 209, 208, 210, 100,
+ 174, 183, 189, 255,
+ 148, 158, 158, 255,
};
Span row = MemoryMarshal.Cast(rowBytes);
@@ -334,6 +366,38 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
148, 158, 158, 255,
171, 165, 151, 255,
209, 208, 210, 255,
+ 171, 165, 151, 255,
+ 209, 208, 210, 255,
+ 174, 183, 189, 255,
+ 148, 158, 158, 255,
+ 171, 165, 151, 255,
+ 209, 208, 210, 255,
+ 174, 183, 189, 255,
+ 148, 158, 158, 255,
+ 171, 165, 151, 255,
+ 209, 208, 210, 255,
+ 174, 183, 189, 255,
+ 148, 158, 158, 255,
+ 171, 165, 151, 255,
+ 209, 208, 210, 255,
+ 174, 183, 189, 255,
+ 148, 158, 158, 255,
+ 209, 208, 210, 255,
+ 174, 183, 189, 255,
+ 148, 158, 158, 255,
+ 148, 158, 158, 255,
+ 171, 165, 151, 255,
+ 209, 208, 210, 255,
+ 174, 183, 189, 255,
+ 148, 158, 158, 255,
+ 148, 158, 158, 255,
+ 171, 165, 151, 255,
+ 209, 208, 210, 255,
+ 174, 183, 189, 255,
+ 148, 158, 158, 255,
+ 148, 158, 158, 255,
+ 171, 165, 151, 255,
+ 209, 208, 210, 255,
174, 183, 189, 255,
148, 158, 158, 255,
};