From 8b8871b3ba75581ee2ff5f3fcb294bd640743136 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sun, 7 Nov 2021 16:39:42 +0100 Subject: [PATCH] Make Mean16x4 static and move to LossyUtils --- .../Formats/Webp/Lossy/LossyUtils.cs | 68 +++++++++++++++++- .../Formats/Webp/Lossy/Vp8EncIterator.cs | 72 +------------------ 2 files changed, 70 insertions(+), 70 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs index d5db3dffa..c3f6e522a 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs @@ -4,12 +4,20 @@ using System; using System.Buffers.Binary; using System.Runtime.CompilerServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif // ReSharper disable InconsistentNaming namespace SixLabors.ImageSharp.Formats.Webp.Lossy { - internal static class LossyUtils + internal static unsafe class LossyUtils { +#if SUPPORTS_RUNTIME_INTRINSICS + private static readonly Vector128 Mean16x4Mask = Vector128.Create(0x00ff).AsByte(); +#endif + [MethodImpl(InliningOptions.ShortMethod)] public static int Vp8Sse16X16(Span a, Span b) => GetSse(a, b, 16, 16); @@ -801,6 +809,64 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy FilterLoop24(v, offsetPlus4, 1, stride, 8, thresh, ithresh, hevThresh); } + public static void Mean16x4(Span input, Span dc, Span tmp) + { +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse2.IsSupported) + { +#pragma warning disable SA1503 // Braces should not be omitted + tmp.Clear(); + fixed (byte* inputPtr = input) + fixed (ushort* tmpPtr = tmp) + { + Vector128 a0 = Sse2.LoadVector128(inputPtr); + Vector128 a1 = Sse2.LoadVector128(inputPtr + WebpConstants.Bps); + Vector128 a2 = Sse2.LoadVector128(inputPtr + (WebpConstants.Bps * 2)); + Vector128 a3 = Sse2.LoadVector128(inputPtr + (WebpConstants.Bps * 3)); + Vector128 b0 = Sse2.ShiftRightLogical(a0.AsInt16(), 8); // hi byte + Vector128 b1 = Sse2.ShiftRightLogical(a1.AsInt16(), 8); + Vector128 b2 = Sse2.ShiftRightLogical(a2.AsInt16(), 8); + Vector128 b3 = Sse2.ShiftRightLogical(a3.AsInt16(), 8); + Vector128 c0 = Sse2.And(a0, Mean16x4Mask); // lo byte + Vector128 c1 = Sse2.And(a1, Mean16x4Mask); + Vector128 c2 = Sse2.And(a2, Mean16x4Mask); + Vector128 c3 = Sse2.And(a3, Mean16x4Mask); + Vector128 d0 = Sse2.Add(b0.AsInt32(), c0.AsInt32()); + Vector128 d1 = Sse2.Add(b1.AsInt32(), c1.AsInt32()); + Vector128 d2 = Sse2.Add(b2.AsInt32(), c2.AsInt32()); + Vector128 d3 = Sse2.Add(b3.AsInt32(), c3.AsInt32()); + Vector128 e0 = Sse2.Add(d0, d1); + Vector128 e1 = Sse2.Add(d2, d3); + Vector128 f0 = Sse2.Add(e0, e1); + Sse2.Store(tmpPtr, f0.AsUInt16()); + } +#pragma warning restore SA1503 // Braces should not be omitted + + dc[0] = (uint)(tmp[1] + tmp[0]); + dc[1] = (uint)(tmp[3] + tmp[2]); + dc[2] = (uint)(tmp[5] + tmp[4]); + dc[3] = (uint)(tmp[7] + tmp[6]); + } + else +#endif + { + for (int k = 0; k < 4; k++) + { + uint avg = 0; + for (int y = 0; y < 4; y++) + { + for (int x = 0; x < 4; x++) + { + avg += input[x + (y * WebpConstants.Bps)]; + } + } + + dc[k] = avg; + input = input.Slice(4); // go to next 4x4 block. + } + } + } + [MethodImpl(InliningOptions.ShortMethod)] public static uint LoadUv(byte u, byte v) => (uint)(u | (v << 16)); // We process u and v together stashed into 32bit(16bit each). diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs index 489977cb8..57e18832e 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8EncIterator.cs @@ -2,10 +2,6 @@ // Licensed under the Apache License, Version 2.0. using System; -#if SUPPORTS_RUNTIME_INTRINSICS -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; -#endif namespace SixLabors.ImageSharp.Formats.Webp.Lossy { @@ -13,7 +9,7 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy /// Iterator structure to iterate through macroblocks, pointing to the /// right neighbouring data (samples, predictions, contexts, ...) /// - internal unsafe class Vp8EncIterator + internal class Vp8EncIterator { public const int YOffEnc = 0; @@ -33,10 +29,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy private readonly int mbh; -#if SUPPORTS_RUNTIME_INTRINSICS - private static readonly Vector128 Mean16x4Mask = Vector128.Create(0x00ff).AsByte(); -#endif - /// /// Stride of the prediction plane(=4*mbw + 1). /// @@ -371,10 +363,10 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy uint m2; for (k = 0; k < 16; k += 4) { - this.Mean16x4(this.YuvIn.AsSpan(YOffEnc + (k * WebpConstants.Bps)), dc.Slice(k, 4), tmp); + LossyUtils.Mean16x4(this.YuvIn.AsSpan(YOffEnc + (k * WebpConstants.Bps)), dc.Slice(k, 4), tmp); } - for (m = 0, m2 = 0, k = 0; k < 16; ++k) + for (m = 0, m2 = 0, k = 0; k < 16; k++) { m += dc[k]; m2 += dc[k] * dc[k]; @@ -832,64 +824,6 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy this.Nz[this.nzIdx] = nz; } - private void Mean16x4(Span input, Span dc, Span tmp) - { -#if SUPPORTS_RUNTIME_INTRINSICS - if (Sse2.IsSupported) - { -#pragma warning disable SA1503 // Braces should not be omitted - tmp.Clear(); - fixed (byte* inputPtr = input) - fixed (ushort* tmpPtr = tmp) - { - Vector128 a0 = Sse2.LoadVector128(inputPtr); - Vector128 a1 = Sse2.LoadVector128(inputPtr + WebpConstants.Bps); - Vector128 a2 = Sse2.LoadVector128(inputPtr + (WebpConstants.Bps * 2)); - Vector128 a3 = Sse2.LoadVector128(inputPtr + (WebpConstants.Bps * 3)); - Vector128 b0 = Sse2.ShiftRightLogical(a0.AsInt16(), 8); // hi byte - Vector128 b1 = Sse2.ShiftRightLogical(a1.AsInt16(), 8); - Vector128 b2 = Sse2.ShiftRightLogical(a2.AsInt16(), 8); - Vector128 b3 = Sse2.ShiftRightLogical(a3.AsInt16(), 8); - Vector128 c0 = Sse2.And(a0, Mean16x4Mask); // lo byte - Vector128 c1 = Sse2.And(a1, Mean16x4Mask); - Vector128 c2 = Sse2.And(a2, Mean16x4Mask); - Vector128 c3 = Sse2.And(a3, Mean16x4Mask); - Vector128 d0 = Sse2.Add(b0.AsInt32(), c0.AsInt32()); - Vector128 d1 = Sse2.Add(b1.AsInt32(), c1.AsInt32()); - Vector128 d2 = Sse2.Add(b2.AsInt32(), c2.AsInt32()); - Vector128 d3 = Sse2.Add(b3.AsInt32(), c3.AsInt32()); - Vector128 e0 = Sse2.Add(d0, d1); - Vector128 e1 = Sse2.Add(d2, d3); - Vector128 f0 = Sse2.Add(e0, e1); - Sse2.Store(tmpPtr, f0.AsUInt16()); - } -#pragma warning restore SA1503 // Braces should not be omitted - - dc[0] = (uint)(tmp[1] + tmp[0]); - dc[1] = (uint)(tmp[3] + tmp[2]); - dc[2] = (uint)(tmp[5] + tmp[4]); - dc[3] = (uint)(tmp[7] + tmp[6]); - } - else -#endif - { - for (int k = 0; k < 4; k++) - { - uint avg = 0; - for (int y = 0; y < 4; y++) - { - for (int x = 0; x < 4; x++) - { - avg += input[x + (y * WebpConstants.Bps)]; - } - } - - dc[k] = avg; - input = input.Slice(4); // go to next 4x4 block. - } - } - } - private void ImportBlock(Span src, int srcStride, Span dst, int w, int h, int size) { int dstIdx = 0;