From ea031c803d71b3a69101aa56aa5d7391b53cf605 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Fri, 18 Dec 2020 11:36:16 +0100 Subject: [PATCH 1/2] Attempt to use SSE in Subtract-Green Transform --- .../Formats/WebP/Lossless/LosslessUtils.cs | 80 ++++++++++++++++++- 1 file changed, 77 insertions(+), 3 deletions(-) diff --git a/src/ImageSharp/Formats/WebP/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/WebP/Lossless/LosslessUtils.cs index b37a9dd3a6..7121a06cd2 100644 --- a/src/ImageSharp/Formats/WebP/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/WebP/Lossless/LosslessUtils.cs @@ -4,9 +4,13 @@ using System; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; - using SixLabors.ImageSharp.Memory; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif + namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless { /// @@ -98,7 +102,42 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless /// The pixel data to apply the transformation. public static void AddGreenToBlueAndRed(Span pixelData) { - for (int i = 0; i < pixelData.Length; i++) +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse.IsSupported) + { + var mask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0); + int numPixels = pixelData.Length; + int i; + fixed (uint* p = pixelData) + { + for (i = 0; i < numPixels; i += 4) + { + var idx = (ushort*)p + i; + Vector128 input = Sse2.LoadVector128(idx); + Vector128 a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g + Vector128 b = Sse2.ShuffleLow(a, mask); + Vector128 c = Sse2.ShuffleHigh(b, mask); // 0g0g + Vector128 output = Sse2.Add(input, c); + Sse2.Store(idx, output); + } + + if (i != numPixels) + { + AddGreenToBlueAndRedSequential(pixelData.Slice(i)); + } + } + } + else +#endif + { + AddGreenToBlueAndRedSequential(pixelData); + } + } + + private static void AddGreenToBlueAndRedSequential(Span pixelData) + { + int numPixels = pixelData.Length; + for (int i = 0; i < numPixels; i++) { uint argb = pixelData[i]; uint green = (argb >> 8) & 0xff; @@ -109,8 +148,43 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless } } - public static void SubtractGreenFromBlueAndRed(Span pixelData, int numPixels) + public static void SubtractGreenFromBlueAndRed(Span pixelData) + { +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse.IsSupported) + { + var mask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0); + int numPixels = pixelData.Length; + int i; + fixed (uint* p = pixelData) + { + for (i = 0; i < numPixels; i += 4) + { + var idx = (ushort*)p + i; + Vector128 input = Sse2.LoadVector128(idx); + Vector128 a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g + Vector128 b = Sse2.ShuffleLow(a, mask); + Vector128 c = Sse2.ShuffleHigh(b, mask); // 0g0g + Vector128 output = Sse2.Subtract(input, c); + Sse2.Store(idx, output); + } + + if (i != numPixels) + { + SubtractGreenFromBlueAndRedSequential(pixelData.Slice(i)); + } + } + } + else +#endif + { + SubtractGreenFromBlueAndRedSequential(pixelData); + } + } + + private static void SubtractGreenFromBlueAndRedSequential(Span pixelData) { + int numPixels = pixelData.Length; for (int i = 0; i < numPixels; i++) { uint argb = pixelData[i]; From 469c5d62640641a7f2220aab24dcef36928bbd36 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Fri, 18 Dec 2020 12:38:34 +0100 Subject: [PATCH 2/2] Sse2.Subtract and Sse2.Add need to operate on bytes --- .../Formats/WebP/Lossless/LosslessUtils.cs | 20 +++++++++---------- .../Formats/WebP/Lossless/Vp8LEncoder.cs | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/ImageSharp/Formats/WebP/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/WebP/Lossless/LosslessUtils.cs index 7121a06cd2..523b0d804b 100644 --- a/src/ImageSharp/Formats/WebP/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/WebP/Lossless/LosslessUtils.cs @@ -110,15 +110,15 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless int i; fixed (uint* p = pixelData) { - for (i = 0; i < numPixels; i += 4) + for (i = 0; i + 4 <= numPixels; i += 4) { - var idx = (ushort*)p + i; - Vector128 input = Sse2.LoadVector128(idx); + var idx = p + i; + Vector128 input = Sse2.LoadVector128((ushort*)idx); Vector128 a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g Vector128 b = Sse2.ShuffleLow(a, mask); Vector128 c = Sse2.ShuffleHigh(b, mask); // 0g0g - Vector128 output = Sse2.Add(input, c); - Sse2.Store(idx, output); + Vector128 output = Sse2.Add(input.AsByte(), c.AsByte()); + Sse2.Store((byte*)idx, output); } if (i != numPixels) @@ -158,15 +158,15 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless int i; fixed (uint* p = pixelData) { - for (i = 0; i < numPixels; i += 4) + for (i = 0; i + 4 <= numPixels; i += 4) { - var idx = (ushort*)p + i; - Vector128 input = Sse2.LoadVector128(idx); + var idx = p + i; + Vector128 input = Sse2.LoadVector128((ushort*)idx); Vector128 a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g Vector128 b = Sse2.ShuffleLow(a, mask); Vector128 c = Sse2.ShuffleHigh(b, mask); // 0g0g - Vector128 output = Sse2.Subtract(input, c); - Sse2.Store(idx, output); + Vector128 output = Sse2.Subtract(input.AsByte(), c.AsByte()); + Sse2.Store((byte*)idx, output); } if (i != numPixels) diff --git a/src/ImageSharp/Formats/WebP/Lossless/Vp8LEncoder.cs b/src/ImageSharp/Formats/WebP/Lossless/Vp8LEncoder.cs index f1952c77ab..e0d2cd093d 100644 --- a/src/ImageSharp/Formats/WebP/Lossless/Vp8LEncoder.cs +++ b/src/ImageSharp/Formats/WebP/Lossless/Vp8LEncoder.cs @@ -567,7 +567,7 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless { this.bitWriter.PutBits(WebpConstants.TransformPresent, 1); this.bitWriter.PutBits((uint)Vp8LTransformType.SubtractGreen, 2); - LosslessUtils.SubtractGreenFromBlueAndRed(this.Bgra.GetSpan(), width * height); + LosslessUtils.SubtractGreenFromBlueAndRed(this.Bgra.GetSpan()); } private void ApplyPredictFilter(int width, int height, bool usedSubtractGreen)