diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index f9b97c6c44..9a6d974bdd 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -42,8 +42,12 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless private static readonly Vector128 TransformColorAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + private static readonly Vector256 TransformColorAlphaGreenMask256 = Vector256.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + private static readonly Vector128 TransformColorRedBlueMask = Vector128.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); + private static readonly Vector256 TransformColorRedBlueMask256 = Vector256.Create(255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0); + private static readonly byte TransformColorShuffleMask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0); private static readonly Vector128 TransformColorInverseAlphaGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); @@ -408,7 +412,37 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless public static void TransformColor(Vp8LMultipliers m, Span data, int numPixels) { #if SUPPORTS_RUNTIME_INTRINSICS - if (Sse2.IsSupported) + if (Avx2.IsSupported && numPixels >= 8) + { + Vector256 multsrb = MkCst32(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); + Vector256 multsb2 = MkCst32(Cst5b(m.RedToBlue), 0); + fixed (uint* src = data) + { + int idx; + for (idx = 0; idx + 8 <= numPixels; idx += 8) + { + uint* pos = src + idx; + Vector256 input = Avx.LoadVector256(pos); + Vector256 a = Avx2.And(input.AsByte(), TransformColorAlphaGreenMask256); + Vector256 b = Avx2.ShuffleLow(a.AsInt16(), TransformColorShuffleMask); + Vector256 c = Avx2.ShuffleHigh(b.AsInt16(), TransformColorShuffleMask); + Vector256 d = Avx2.MultiplyHigh(c.AsInt16(), multsrb.AsInt16()); + Vector256 e = Avx2.ShiftLeftLogical(input.AsInt16(), 8); + Vector256 f = Avx2.MultiplyHigh(e.AsInt16(), multsb2.AsInt16()); + Vector256 g = Avx2.ShiftRightLogical(f.AsInt32(), 16); + Vector256 h = Avx2.Add(g.AsByte(), d.AsByte()); + Vector256 i = Avx2.And(h, TransformColorRedBlueMask256); + Vector256 output = Avx2.Subtract(input.AsByte(), i); + Avx.Store((byte*)pos, output); + } + + if (idx != numPixels) + { + TransformColorNoneVectorized(m, data.Slice(idx), numPixels - idx); + } + } + } + else if (Sse2.IsSupported) { Vector128 multsrb = MkCst16(Cst5b(m.GreenToRed), Cst5b(m.GreenToBlue)); Vector128 multsb2 = MkCst16(Cst5b(m.RedToBlue), 0); @@ -1288,6 +1322,9 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless #if SUPPORTS_RUNTIME_INTRINSICS [MethodImpl(InliningOptions.ShortMethod)] private static Vector128 MkCst16(int hi, int lo) => Vector128.Create((hi << 16) | (lo & 0xffff)); + + [MethodImpl(InliningOptions.ShortMethod)] + private static Vector256 MkCst32(int hi, int lo) => Vector256.Create((hi << 16) | (lo & 0xffff)); #endif private static uint Select(uint a, uint b, uint c, Span scratch)