From c8f1f2c89df590e5a8804ddce21c03696fd8b468 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnther=20Foidl?= Date: Thu, 27 Jul 2023 22:26:20 +0200 Subject: [PATCH] Simplified check if there are any non-equal bytes Hm, I remembered that movemask isn't the fastest, and ptest (TestZ in .NET-terms) is faster but current benchmarks didn't prove this, also Intel's instruction table didn't show any benefit in terms of latency or throughput. Thus simplified that check. --- src/ImageSharp/Formats/Gif/GifEncoderCore.cs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/ImageSharp/Formats/Gif/GifEncoderCore.cs b/src/ImageSharp/Formats/Gif/GifEncoderCore.cs index 5d18823274..be08c0da90 100644 --- a/src/ImageSharp/Formats/Gif/GifEncoderCore.cs +++ b/src/ImageSharp/Formats/Gif/GifEncoderCore.cs @@ -435,11 +435,11 @@ internal sealed class GifEncoderCore : IImageEncoderInternals { Vector256 vec = Vector256.LoadUnsafe(ref rowPtr, (nuint)x); Vector256 notEquals = ~Vector256.Equals(vec, trimmableVec256); + uint mask = notEquals.ExtractMostSignificantBits(); - if (notEquals != Vector256.Zero) + if (mask != 0) { isTransparentRow = false; - uint mask = notEquals.ExtractMostSignificantBits(); nint start = x + (nint)uint.TrailingZeroCount(mask); nint end = (nint)uint.LeadingZeroCount(mask); @@ -463,11 +463,11 @@ internal sealed class GifEncoderCore : IImageEncoderInternals { Vector128 vec = Vector128.LoadUnsafe(ref rowPtr, (nuint)x); Vector128 notEquals = ~Vector128.Equals(vec, trimmableVec); + uint mask = notEquals.ExtractMostSignificantBits(); - if (notEquals != Vector128.Zero) + if (mask != 0) { isTransparentRow = false; - uint mask = notEquals.ExtractMostSignificantBits(); nint start = x + (nint)uint.TrailingZeroCount(mask); nint end = (nint)uint.LeadingZeroCount(mask) - Vector128.Count; @@ -493,11 +493,11 @@ internal sealed class GifEncoderCore : IImageEncoderInternals Vector256 vec = Unsafe.ReadUnaligned>(ref Unsafe.Add(ref rowPtr, x)); Vector256 notEquals = Avx2.CompareEqual(vec, trimmableVec256); notEquals = Avx2.Xor(notEquals, Vector256.AllBitsSet); + int mask = Avx2.MoveMask(notEquals); - if (!Avx.TestZ(notEquals, notEquals)) + if (mask != 0) { isTransparentRow = false; - int mask = Avx2.MoveMask(notEquals); nint start = x + (nint)(uint)BitOperations.TrailingZeroCount(mask); nint end = (nint)(uint)BitOperations.LeadingZeroCount((uint)mask); @@ -522,11 +522,11 @@ internal sealed class GifEncoderCore : IImageEncoderInternals Vector128 vec = Unsafe.ReadUnaligned>(ref Unsafe.Add(ref rowPtr, x)); Vector128 notEquals = Sse2.CompareEqual(vec, trimmableVec); notEquals = Sse2.Xor(notEquals, Vector128.AllBitsSet); + int mask = Sse2.MoveMask(notEquals); - if (!Sse41.TestZ(notEquals, notEquals)) + if (mask != 0) { isTransparentRow = false; - int mask = Sse2.MoveMask(notEquals); nint start = x + (nint)(uint)BitOperations.TrailingZeroCount(mask); nint end = (nint)(uint)BitOperations.LeadingZeroCount((uint)mask) - Vector128.Count;