From 8806d6bd24fe58910b68b22d925a3e92c5385e5a Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Fri, 12 Nov 2021 10:14:34 +0100 Subject: [PATCH] Add Avx version of CollectColorRedTransforms --- .../Formats/Webp/Lossless/PredictorEncoder.cs | 47 ++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs b/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs index d11102c404..48c02f0d36 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs @@ -39,6 +39,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless private static readonly Vector128 CollectColorRedTransformsAndMask = Vector128.Create((short)0xff).AsByte(); + private static readonly Vector256 CollectColorRedTransformsGreenMask256 = Vector256.Create(0x00ff00).AsByte(); + + private static readonly Vector256 CollectColorRedTransformsAndMask256 = Vector256.Create((short)0xff).AsByte(); + private static readonly Vector128 CollectColorBlueTransformsGreenMask = Vector128.Create(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); private static readonly Vector128 CollectColorBlueTransformsGreenBlueMask = Vector128.Create(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0); @@ -1071,7 +1075,48 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless private static void CollectColorRedTransforms(Span bgra, int stride, int tileWidth, int tileHeight, int greenToRed, Span histo) { #if SUPPORTS_RUNTIME_INTRINSICS - if (Sse41.IsSupported) + if (Avx2.IsSupported && tileWidth > 16) + { + var multsg = Vector256.Create(LosslessUtils.Cst5b(greenToRed)); + const int span = 16; + Span values = stackalloc ushort[span]; + for (int y = 0; y < tileHeight; y++) + { + Span srcSpan = bgra.Slice(y * stride); + ref uint inputRef = ref MemoryMarshal.GetReference(srcSpan); + for (int x = 0; x + span <= tileWidth; x += span) + { + int input0Idx = x; + int input1Idx = x + (span / 2); + Vector256 input0 = Unsafe.As>(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte(); + Vector256 input1 = Unsafe.As>(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte(); + Vector256 g0 = Avx2.And(input0, CollectColorRedTransformsGreenMask256); // 0 0 | g 0 + Vector256 g1 = Avx2.And(input1, CollectColorRedTransformsGreenMask256); + Vector256 g = Avx2.PackUnsignedSaturate(g0.AsInt32(), g1.AsInt32()); // g 0 + Vector256 a0 = Avx2.ShiftRightLogical(input0.AsInt32(), 16); // 0 0 | x r + Vector256 a1 = Avx2.ShiftRightLogical(input1.AsInt32(), 16); + Vector256 a = Avx2.PackUnsignedSaturate(a0, a1); // x r + Vector256 b = Avx2.MultiplyHigh(g.AsInt16(), multsg); // x dr + Vector256 c = Avx2.Subtract(a.AsByte(), b.AsByte()); // x r' + Vector256 d = Avx2.And(c, CollectColorRedTransformsAndMask256); // 0 r' + + ref ushort outputRef = ref MemoryMarshal.GetReference(values); + Unsafe.As>(ref outputRef) = d.AsUInt16(); + + for (int i = 0; i < span; i++) + { + ++histo[values[i]]; + } + } + } + + int leftOver = tileWidth & (span - 1); + if (leftOver > 0) + { + CollectColorRedTransformsNoneVectorized(bgra.Slice(tileWidth - leftOver), stride, leftOver, tileHeight, greenToRed, histo); + } + } + else if (Sse41.IsSupported) { var multsg = Vector128.Create(LosslessUtils.Cst5b(greenToRed)); const int span = 8;