diff --git a/src/ImageSharp/Formats/WebP/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/WebP/Lossless/LosslessUtils.cs index b37a9dd3a6..523b0d804b 100644 --- a/src/ImageSharp/Formats/WebP/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/WebP/Lossless/LosslessUtils.cs @@ -4,9 +4,13 @@ using System; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; - using SixLabors.ImageSharp.Memory; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif + namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless { /// @@ -98,7 +102,42 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless /// The pixel data to apply the transformation. public static void AddGreenToBlueAndRed(Span pixelData) { - for (int i = 0; i < pixelData.Length; i++) +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse.IsSupported) + { + var mask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0); + int numPixels = pixelData.Length; + int i; + fixed (uint* p = pixelData) + { + for (i = 0; i + 4 <= numPixels; i += 4) + { + var idx = p + i; + Vector128 input = Sse2.LoadVector128((ushort*)idx); + Vector128 a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g + Vector128 b = Sse2.ShuffleLow(a, mask); + Vector128 c = Sse2.ShuffleHigh(b, mask); // 0g0g + Vector128 output = Sse2.Add(input.AsByte(), c.AsByte()); + Sse2.Store((byte*)idx, output); + } + + if (i != numPixels) + { + AddGreenToBlueAndRedSequential(pixelData.Slice(i)); + } + } + } + else +#endif + { + AddGreenToBlueAndRedSequential(pixelData); + } + } + + private static void AddGreenToBlueAndRedSequential(Span pixelData) + { + int numPixels = pixelData.Length; + for (int i = 0; i < numPixels; i++) { uint argb = pixelData[i]; uint green = (argb >> 8) & 0xff; @@ -109,8 +148,43 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless } } - public static void SubtractGreenFromBlueAndRed(Span pixelData, int numPixels) + public static void SubtractGreenFromBlueAndRed(Span pixelData) + { +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse.IsSupported) + { + var mask = SimdUtils.Shuffle.MmShuffle(2, 2, 0, 0); + int numPixels = pixelData.Length; + int i; + fixed (uint* p = pixelData) + { + for (i = 0; i + 4 <= numPixels; i += 4) + { + var idx = p + i; + Vector128 input = Sse2.LoadVector128((ushort*)idx); + Vector128 a = Sse2.ShiftRightLogical(input.AsUInt16(), 8); // 0 a 0 g + Vector128 b = Sse2.ShuffleLow(a, mask); + Vector128 c = Sse2.ShuffleHigh(b, mask); // 0g0g + Vector128 output = Sse2.Subtract(input.AsByte(), c.AsByte()); + Sse2.Store((byte*)idx, output); + } + + if (i != numPixels) + { + SubtractGreenFromBlueAndRedSequential(pixelData.Slice(i)); + } + } + } + else +#endif + { + SubtractGreenFromBlueAndRedSequential(pixelData); + } + } + + private static void SubtractGreenFromBlueAndRedSequential(Span pixelData) { + int numPixels = pixelData.Length; for (int i = 0; i < numPixels; i++) { uint argb = pixelData[i]; diff --git a/src/ImageSharp/Formats/WebP/Lossless/Vp8LEncoder.cs b/src/ImageSharp/Formats/WebP/Lossless/Vp8LEncoder.cs index f1952c77ab..e0d2cd093d 100644 --- a/src/ImageSharp/Formats/WebP/Lossless/Vp8LEncoder.cs +++ b/src/ImageSharp/Formats/WebP/Lossless/Vp8LEncoder.cs @@ -567,7 +567,7 @@ namespace SixLabors.ImageSharp.Formats.Experimental.Webp.Lossless { this.bitWriter.PutBits(WebpConstants.TransformPresent, 1); this.bitWriter.PutBits((uint)Vp8LTransformType.SubtractGreen, 2); - LosslessUtils.SubtractGreenFromBlueAndRed(this.Bgra.GetSpan(), width * height); + LosslessUtils.SubtractGreenFromBlueAndRed(this.Bgra.GetSpan()); } private void ApplyPredictFilter(int width, int height, bool usedSubtractGreen)