diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index 9a6d974bd..94ad343c8 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -52,6 +52,8 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless private static readonly Vector128 TransformColorInverseAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + private static readonly Vector256 TransformColorInverseAlphaGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + private static readonly byte TransformColorInverseShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0); #endif @@ -505,7 +507,38 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless public static void TransformColorInverse(Vp8LMultipliers m, Span pixelData) { #if SUPPORTS_RUNTIME_INTRINSICS - if (Sse2.IsSupported) + if (Avx2.IsSupported && pixelData.Length >= 8) + { + Vector256 multsrb = MkCst32(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); + Vector256 multsb2 = MkCst32(Cst5b(m.RedToBlue), 0); + fixed (uint* src = pixelData) + { + int idx; + for (idx = 0; idx + 8 <= pixelData.Length; idx += 8) + { + uint* pos = src + idx; + Vector256 input = Avx.LoadVector256(pos); + Vector256 a = Avx2.And(input.AsByte(), TransformColorInverseAlphaGreenMask256); + Vector256 b = Avx2.ShuffleLow(a.AsInt16(), TransformColorInverseShuffleMask); + Vector256 c = Avx2.ShuffleHigh(b.AsInt16(), TransformColorInverseShuffleMask); + Vector256 d = Avx2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); + Vector256 e = Avx2.Add(input.AsByte(), d.AsByte()); + Vector256 f = Avx2.ShiftLeftLogical(e.AsInt16(), 8); + Vector256 g = Avx2.MultiplyHigh(f, multsb2.AsInt16()); + Vector256 h = Avx2.ShiftRightLogical(g.AsInt32(), 8); + Vector256 i = Avx2.Add(h.AsByte(), f.AsByte()); + Vector256 j = Avx2.ShiftRightLogical(i.AsInt16(), 8); + Vector256 output = Avx2.Or(j.AsByte(), a); + Avx.Store((byte*)pos, output); + } + + if (idx != pixelData.Length) + { + TransformColorInverseNoneVectorized(m, pixelData.Slice(idx)); + } + } + } + else if (Sse2.IsSupported) { Vector128 multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); Vector128 multsb2 = MkCst16(Cst5b(m.RedToBlue), 0);