diff --git a/src/ImageSharp/Formats/WebP/Lossy/YuvConversion.cs b/src/ImageSharp/Formats/WebP/Lossy/YuvConversion.cs index c49d2048c0..4bd70add64 100644 --- a/src/ImageSharp/Formats/WebP/Lossy/YuvConversion.cs +++ b/src/ImageSharp/Formats/WebP/Lossy/YuvConversion.cs @@ -4,9 +4,15 @@ using System; using System.Buffers; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif + namespace SixLabors.ImageSharp.Formats.Webp.Lossy { internal static class YuvConversion @@ -96,13 +102,74 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy /// The row to check. /// Returns true if alpha has non-0xff values. [MethodImpl(InliningOptions.ShortMethod)] - public static bool CheckNonOpaque(Span row) + public static unsafe bool CheckNonOpaque(Span row) { - for (int x = 0; x < row.Length; x++) +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse2.IsSupported) + { + ReadOnlySpan rowBytes = MemoryMarshal.AsBytes(row); + var alphaMask = Vector128.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255); + Vector128 all0x80 = Vector128.Create((byte)0x80).AsByte(); + + int i = 0; + int length = (row.Length * 4) - 3; + fixed (byte* src = rowBytes) + { + for (; i + 64 <= length; i += 64) + { + Vector128 a0 = Sse2.LoadVector128(src + i).AsByte(); + Vector128 a1 = Sse2.LoadVector128(src + i + 16).AsByte(); + Vector128 a2 = Sse2.LoadVector128(src + i + 32).AsByte(); + Vector128 a3 = Sse2.LoadVector128(src + i + 48).AsByte(); + Vector128 b0 = Sse2.And(a0, alphaMask).AsInt32(); + Vector128 b1 = Sse2.And(a1, alphaMask).AsInt32(); + Vector128 b2 = Sse2.And(a2, alphaMask).AsInt32(); + Vector128 b3 = Sse2.And(a3, alphaMask).AsInt32(); + Vector128 c0 = Sse2.PackSignedSaturate(b0, b1).AsInt16(); + Vector128 c1 = Sse2.PackSignedSaturate(b2, b3).AsInt16(); + Vector128 d = Sse2.PackSignedSaturate(c0, c1).AsByte(); + Vector128 bits = Sse2.CompareEqual(d, all0x80); + int mask = Sse2.MoveMask(bits); + if (mask != 0xFFFF) + { + return true; + } + } + + for (; i + 32 <= length; i += 32) + { + Vector128 a0 = Sse2.LoadVector128(src + i).AsByte(); + Vector128 a1 = Sse2.LoadVector128(src + i + 16).AsByte(); + Vector128 b0 = Sse2.And(a0, alphaMask).AsInt32(); + Vector128 b1 = Sse2.And(a1, alphaMask).AsInt32(); + Vector128 c = Sse2.PackSignedSaturate(b0, b1).AsInt16(); + Vector128 d = Sse2.PackSignedSaturate(c, c).AsByte(); + Vector128 bits = Sse2.CompareEqual(d, all0x80); + int mask = Sse2.MoveMask(bits); + if (mask != 0xFFFF) + { + return true; + } + } + + for (; i <= length; i += 4) + { + if (src[i + 3] != 0xFF) + { + return true; + } + } + } + } + else +#endif { - if (row[x].A != 255) + for (int x = 0; x < row.Length; x++) { - return true; + if (row[x].A != 0xFF) + { + return true; + } } } diff --git a/tests/ImageSharp.Tests/Formats/WebP/YuvConversionTests.cs b/tests/ImageSharp.Tests/Formats/WebP/YuvConversionTests.cs index 19820c2efa..79b2315a20 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/YuvConversionTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/YuvConversionTests.cs @@ -2,17 +2,45 @@ // Licensed under the Apache License, Version 2.0. using System; +using System.Runtime.InteropServices; using SixLabors.ImageSharp.Advanced; using SixLabors.ImageSharp.Formats.Webp.Lossy; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; using Xunit; +#if SUPPORTS_RUNTIME_INTRINSICS +using SixLabors.ImageSharp.Tests.TestUtilities; +#endif namespace SixLabors.ImageSharp.Tests.Formats.Webp { [Trait("Format", "Webp")] public class YuvConversionTests { + [Fact] + public void CheckNonOpaque_WithOpaquePixels_Works() => RunCheckNoneOpaqueWithOpaquePixelsTest(); + + [Fact] + public void CheckNonOpaque_WithNoneOpaquePixels_Works() => RunCheckNoneOpaqueWithNoneOpaquePixelsTest(); + +#if SUPPORTS_RUNTIME_INTRINSICS + [Fact] + public void CheckNonOpaque_WithOpaquePixels_WithHardwareIntrinsics_Works() + => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCheckNoneOpaqueWithOpaquePixelsTest, HwIntrinsics.AllowAll); + + [Fact] + public void CheckNonOpaque_WithOpaquePixels_WithoutSse2_Works() + => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCheckNoneOpaqueWithOpaquePixelsTest, HwIntrinsics.DisableSSE2); + + [Fact] + public void CheckNonOpaque_WithNoneOpaquePixels_WithHardwareIntrinsics_Works() + => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCheckNoneOpaqueWithNoneOpaquePixelsTest, HwIntrinsics.AllowAll); + + [Fact] + public void CheckNonOpaque_WithNoneOpaquePixels_WithoutSse2_Works() + => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunCheckNoneOpaqueWithNoneOpaquePixelsTest, HwIntrinsics.DisableSSE2); +#endif + [Theory] [WithFile(TestImages.WebP.Yuv, PixelTypes.Rgba32)] public void ConvertRgbToYuv_Works(TestImageProvider provider) @@ -233,5 +261,89 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp Assert.True(expectedU.AsSpan().SequenceEqual(u.Slice(0, expectedU.Length))); Assert.True(expectedV.AsSpan().SequenceEqual(v.Slice(0, expectedV.Length))); } + + private static void RunCheckNoneOpaqueWithNoneOpaquePixelsTest() + { + // arrange + byte[] rowBytes = + { + 122, 120, 101, 255, + 171, 165, 151, 255, + 209, 208, 210, 255, + 174, 183, 189, 255, + 148, 158, 158, 255, + 122, 120, 101, 255, + 171, 165, 151, 255, + 209, 208, 210, 255, + 174, 183, 189, 255, + 148, 158, 158, 255, + 171, 165, 151, 255, + 209, 208, 210, 255, + 174, 183, 189, 255, + 148, 158, 158, 255, + 171, 165, 151, 255, + 209, 208, 210, 255, + 174, 183, 189, 255, + 148, 158, 158, 255, + 171, 165, 151, 255, + 209, 208, 210, 255, + 174, 183, 189, 100, + 148, 158, 158, 255, + 148, 158, 158, 255, + 171, 165, 151, 255, + 209, 208, 210, 255, + 174, 183, 189, 255, + 148, 158, 158, 255, + }; + Span row = MemoryMarshal.Cast(rowBytes); + + // act + bool noneOpaque = YuvConversion.CheckNonOpaque(row); + + // assert + Assert.True(noneOpaque); + } + + private static void RunCheckNoneOpaqueWithOpaquePixelsTest() + { + // arrange + byte[] rowBytes = + { + 122, 120, 101, 255, + 171, 165, 151, 255, + 209, 208, 210, 255, + 174, 183, 189, 255, + 148, 158, 158, 255, + 122, 120, 101, 255, + 171, 165, 151, 255, + 209, 208, 210, 255, + 174, 183, 189, 255, + 148, 158, 158, 255, + 171, 165, 151, 255, + 209, 208, 210, 255, + 174, 183, 189, 255, + 148, 158, 158, 255, + 171, 165, 151, 255, + 209, 208, 210, 255, + 174, 183, 189, 255, + 148, 158, 158, 255, + 171, 165, 151, 255, + 209, 208, 210, 255, + 174, 183, 189, 255, + 148, 158, 158, 255, + 148, 158, 158, 255, + 171, 165, 151, 255, + 209, 208, 210, 255, + 174, 183, 189, 255, + 148, 158, 158, 255, + }; + Span row = MemoryMarshal.Cast(rowBytes); + + // act + bool noneOpaque = YuvConversion.CheckNonOpaque(row); + + // assert + Assert.False(noneOpaque); + } } }