diff --git a/src/ImageSharp/Formats/WebP/Lossy/YuvConversion.cs b/src/ImageSharp/Formats/WebP/Lossy/YuvConversion.cs
index c49d2048c0..4bd70add64 100644
--- a/src/ImageSharp/Formats/WebP/Lossy/YuvConversion.cs
+++ b/src/ImageSharp/Formats/WebP/Lossy/YuvConversion.cs
@@ -4,9 +4,15 @@
using System;
using System.Buffers;
using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.PixelFormats;
+#if SUPPORTS_RUNTIME_INTRINSICS
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+#endif
+
namespace SixLabors.ImageSharp.Formats.Webp.Lossy
{
internal static class YuvConversion
@@ -96,13 +102,74 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
/// The row to check.
/// Returns true if alpha has non-0xff values.
[MethodImpl(InliningOptions.ShortMethod)]
- public static bool CheckNonOpaque(Span row)
+ public static unsafe bool CheckNonOpaque(Span row)
{
- for (int x = 0; x < row.Length; x++)
+#if SUPPORTS_RUNTIME_INTRINSICS
+ if (Sse2.IsSupported)
+ {
+ ReadOnlySpan rowBytes = MemoryMarshal.AsBytes(row);
+ var alphaMask = Vector128.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255);
+ Vector128 all0x80 = Vector128.Create((byte)0x80).AsByte();
+
+ int i = 0;
+ int length = (row.Length * 4) - 3;
+ fixed (byte* src = rowBytes)
+ {
+ for (; i + 64 <= length; i += 64)
+ {
+ Vector128 a0 = Sse2.LoadVector128(src + i).AsByte();
+ Vector128 a1 = Sse2.LoadVector128(src + i + 16).AsByte();
+ Vector128 a2 = Sse2.LoadVector128(src + i + 32).AsByte();
+ Vector128 a3 = Sse2.LoadVector128(src + i + 48).AsByte();
+ Vector128 b0 = Sse2.And(a0, alphaMask).AsInt32();
+ Vector128 b1 = Sse2.And(a1, alphaMask).AsInt32();
+ Vector128 b2 = Sse2.And(a2, alphaMask).AsInt32();
+ Vector128 b3 = Sse2.And(a3, alphaMask).AsInt32();
+ Vector128 c0 = Sse2.PackSignedSaturate(b0, b1).AsInt16();
+ Vector128 c1 = Sse2.PackSignedSaturate(b2, b3).AsInt16();
+ Vector128 d = Sse2.PackSignedSaturate(c0, c1).AsByte();
+ Vector128 bits = Sse2.CompareEqual(d, all0x80);
+ int mask = Sse2.MoveMask(bits);
+ if (mask != 0xFFFF)
+ {
+ return true;
+ }
+ }
+
+ for (; i + 32 <= length; i += 32)
+ {
+ Vector128 a0 = Sse2.LoadVector128(src + i).AsByte();
+ Vector128 a1 = Sse2.LoadVector128(src + i + 16).AsByte();
+ Vector128 b0 = Sse2.And(a0, alphaMask).AsInt32();
+ Vector128 b1 = Sse2.And(a1, alphaMask).AsInt32();
+ Vector128 c = Sse2.PackSignedSaturate(b0, b1).AsInt16();
+ Vector128 d = Sse2.PackSignedSaturate(c, c).AsByte();
+ Vector128 bits = Sse2.CompareEqual(d, all0x80);
+ int mask = Sse2.MoveMask(bits);
+ if (mask != 0xFFFF)
+ {
+ return true;
+ }
+ }
+
+ for (; i <= length; i += 4)
+ {
+ if (src[i + 3] != 0xFF)
+ {
+ return true;
+ }
+ }
+ }
+ }
+ else
+#endif
{
- if (row[x].A != 255)
+ for (int x = 0; x < row.Length; x++)
{
- return true;
+ if (row[x].A != 0xFF)
+ {
+ return true;
+ }
}
}
diff --git a/tests/ImageSharp.Tests/Formats/WebP/YuvConversionTests.cs b/tests/ImageSharp.Tests/Formats/WebP/YuvConversionTests.cs
index 19820c2efa..79b2315a20 100644
--- a/tests/ImageSharp.Tests/Formats/WebP/YuvConversionTests.cs
+++ b/tests/ImageSharp.Tests/Formats/WebP/YuvConversionTests.cs
@@ -2,17 +2,45 @@
// Licensed under the Apache License, Version 2.0.
using System;
+using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Advanced;
using SixLabors.ImageSharp.Formats.Webp.Lossy;
using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.PixelFormats;
using Xunit;
+#if SUPPORTS_RUNTIME_INTRINSICS
+using SixLabors.ImageSharp.Tests.TestUtilities;
+#endif
namespace SixLabors.ImageSharp.Tests.Formats.Webp
{
[Trait("Format", "Webp")]
public class YuvConversionTests
{
+ [Fact]
+ public void CheckNonOpaque_WithOpaquePixels_Works() => RunCheckNoneOpaqueWithOpaquePixelsTest();
+
+ [Fact]
+ public void CheckNonOpaque_WithNoneOpaquePixels_Works() => RunCheckNoneOpaqueWithNoneOpaquePixelsTest();
+
+#if SUPPORTS_RUNTIME_INTRINSICS
+ [Fact]
+ public void CheckNonOpaque_WithOpaquePixels_WithHardwareIntrinsics_Works()
+ => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCheckNoneOpaqueWithOpaquePixelsTest, HwIntrinsics.AllowAll);
+
+ [Fact]
+ public void CheckNonOpaque_WithOpaquePixels_WithoutSse2_Works()
+ => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCheckNoneOpaqueWithOpaquePixelsTest, HwIntrinsics.DisableSSE2);
+
+ [Fact]
+ public void CheckNonOpaque_WithNoneOpaquePixels_WithHardwareIntrinsics_Works()
+ => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCheckNoneOpaqueWithNoneOpaquePixelsTest, HwIntrinsics.AllowAll);
+
+ [Fact]
+ public void CheckNonOpaque_WithNoneOpaquePixels_WithoutSse2_Works()
+ => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCheckNoneOpaqueWithNoneOpaquePixelsTest, HwIntrinsics.DisableSSE2);
+#endif
+
[Theory]
[WithFile(TestImages.WebP.Yuv, PixelTypes.Rgba32)]
public void ConvertRgbToYuv_Works(TestImageProvider provider)
@@ -233,5 +261,89 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
Assert.True(expectedU.AsSpan().SequenceEqual(u.Slice(0, expectedU.Length)));
Assert.True(expectedV.AsSpan().SequenceEqual(v.Slice(0, expectedV.Length)));
}
+
+ private static void RunCheckNoneOpaqueWithNoneOpaquePixelsTest()
+ {
+ // arrange
+ byte[] rowBytes =
+ {
+ 122, 120, 101, 255,
+ 171, 165, 151, 255,
+ 209, 208, 210, 255,
+ 174, 183, 189, 255,
+ 148, 158, 158, 255,
+ 122, 120, 101, 255,
+ 171, 165, 151, 255,
+ 209, 208, 210, 255,
+ 174, 183, 189, 255,
+ 148, 158, 158, 255,
+ 171, 165, 151, 255,
+ 209, 208, 210, 255,
+ 174, 183, 189, 255,
+ 148, 158, 158, 255,
+ 171, 165, 151, 255,
+ 209, 208, 210, 255,
+ 174, 183, 189, 255,
+ 148, 158, 158, 255,
+ 171, 165, 151, 255,
+ 209, 208, 210, 255,
+ 174, 183, 189, 100,
+ 148, 158, 158, 255,
+ 148, 158, 158, 255,
+ 171, 165, 151, 255,
+ 209, 208, 210, 255,
+ 174, 183, 189, 255,
+ 148, 158, 158, 255,
+ };
+ Span row = MemoryMarshal.Cast(rowBytes);
+
+ // act
+ bool noneOpaque = YuvConversion.CheckNonOpaque(row);
+
+ // assert
+ Assert.True(noneOpaque);
+ }
+
+ private static void RunCheckNoneOpaqueWithOpaquePixelsTest()
+ {
+ // arrange
+ byte[] rowBytes =
+ {
+ 122, 120, 101, 255,
+ 171, 165, 151, 255,
+ 209, 208, 210, 255,
+ 174, 183, 189, 255,
+ 148, 158, 158, 255,
+ 122, 120, 101, 255,
+ 171, 165, 151, 255,
+ 209, 208, 210, 255,
+ 174, 183, 189, 255,
+ 148, 158, 158, 255,
+ 171, 165, 151, 255,
+ 209, 208, 210, 255,
+ 174, 183, 189, 255,
+ 148, 158, 158, 255,
+ 171, 165, 151, 255,
+ 209, 208, 210, 255,
+ 174, 183, 189, 255,
+ 148, 158, 158, 255,
+ 171, 165, 151, 255,
+ 209, 208, 210, 255,
+ 174, 183, 189, 255,
+ 148, 158, 158, 255,
+ 148, 158, 158, 255,
+ 171, 165, 151, 255,
+ 209, 208, 210, 255,
+ 174, 183, 189, 255,
+ 148, 158, 158, 255,
+ };
+ Span row = MemoryMarshal.Cast(rowBytes);
+
+ // act
+ bool noneOpaque = YuvConversion.CheckNonOpaque(row);
+
+ // assert
+ Assert.False(noneOpaque);
+ }
}
}