diff --git a/src/ImageSharp/Formats/Webp/AlphaDecoder.cs b/src/ImageSharp/Formats/Webp/AlphaDecoder.cs index ed129dc26b..39ff0dbbb4 100644 --- a/src/ImageSharp/Formats/Webp/AlphaDecoder.cs +++ b/src/ImageSharp/Formats/Webp/AlphaDecoder.cs @@ -311,13 +311,50 @@ namespace SixLabors.ImageSharp.Formats.Webp private static void HorizontalUnfilter(Span prev, Span input, Span dst, int width) { - byte pred = (byte)(prev == null ? 0 : prev[0]); +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse2.IsSupported) + { + dst[0] = (byte)(input[0] + (prev == null ? 0 : prev[0])); + if (width <= 1) + { + return; + } - for (int i = 0; i < width; i++) + int i; + var last = Vector128.Create(dst[0], 0, 0, 0); + ref byte srcRef = ref MemoryMarshal.GetReference(input); + for (i = 1; i + 8 <= width; i += 8) + { + var a0 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref srcRef, i)), 0); + Vector128 a1 = Sse2.Add(a0.AsByte(), last.AsByte()); + Vector128 a2 = Sse2.ShiftLeftLogical128BitLane(a1, 1); + Vector128 a3 = Sse2.Add(a1, a2); + Vector128 a4 = Sse2.ShiftLeftLogical128BitLane(a3, 2); + Vector128 a5 = Sse2.Add(a3, a4); + Vector128 a6 = Sse2.ShiftLeftLogical128BitLane(a5, 4); + Vector128 a7 = Sse2.Add(a5, a6); + + ref byte outputRef = ref Unsafe.Add(ref MemoryMarshal.GetReference(dst), i); + Unsafe.As>(ref outputRef) = a7.GetLower(); + last = Sse2.ShiftRightLogical(a7.AsInt64(), 56).AsInt32(); + } + + for (; i < width; ++i) + { + dst[i] = (byte)(input[i] + dst[i - 1]); + } + } + else +#endif { - byte val = (byte)(pred + input[i]); - pred = val; - dst[i] = val; + byte pred = (byte)(prev == null ? 0 : prev[0]); + + for (int i = 0; i < width; i++) + { + byte val = (byte)(pred + input[i]); + pred = val; + dst[i] = val; + } } } diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index ebb198a2d8..e7782b0ef4 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -2,7 +2,6 @@ // Licensed under the Apache License, Version 2.0. using System; -using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using SixLabors.ImageSharp.Memory;