From 2d9ac58b29c40dd52eceb20f88e19a0da44c9abb Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Thu, 6 Jan 2022 17:24:16 +0100 Subject: [PATCH 1/6] AVX2 version of VerticalUnfilter --- src/ImageSharp/Formats/Webp/AlphaDecoder.cs | 31 +++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/AlphaDecoder.cs b/src/ImageSharp/Formats/Webp/AlphaDecoder.cs index e63cd27b5..ed129dc26 100644 --- a/src/ImageSharp/Formats/Webp/AlphaDecoder.cs +++ b/src/ImageSharp/Formats/Webp/AlphaDecoder.cs @@ -9,6 +9,10 @@ using System.Runtime.InteropServices; using SixLabors.ImageSharp.Formats.Webp.BitReader; using SixLabors.ImageSharp.Formats.Webp.Lossless; using SixLabors.ImageSharp.Memory; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif namespace SixLabors.ImageSharp.Formats.Webp { @@ -325,9 +329,32 @@ namespace SixLabors.ImageSharp.Formats.Webp } else { - for (int i = 0; i < width; i++) +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx2.IsSupported) { - dst[i] = (byte)(prev[i] + input[i]); + int i; + int maxPos = width & ~31; + for (i = 0; i < maxPos; i += 32) + { + Vector256 a0 = Unsafe.As>(ref Unsafe.Add(ref MemoryMarshal.GetReference(input), i)); + Vector256 b0 = Unsafe.As>(ref Unsafe.Add(ref MemoryMarshal.GetReference(prev), i)); + Vector256 c0 = Avx2.Add(a0.AsByte(), b0.AsByte()); + ref byte outputRef = ref Unsafe.Add(ref MemoryMarshal.GetReference(dst), i); + Unsafe.As>(ref outputRef) = c0; + } + + for (; i < width; i++) + { + dst[i] = (byte)(prev[i] + input[i]); + } + } + else +#endif + { + for (int i = 0; i < width; i++) + { + dst[i] = (byte)(prev[i] + input[i]); + } } } } From 4289b8c3984f2d4a1926c6eb26f4707e6c3e8d62 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Thu, 6 Jan 2022 17:24:46 +0100 Subject: [PATCH 2/6] Fix indenting --- tests/ImageSharp.Tests/Formats/GeneralFormatTests.cs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/ImageSharp.Tests/Formats/GeneralFormatTests.cs b/tests/ImageSharp.Tests/Formats/GeneralFormatTests.cs index 5a8425c1f..89e65b211 100644 --- a/tests/ImageSharp.Tests/Formats/GeneralFormatTests.cs +++ b/tests/ImageSharp.Tests/Formats/GeneralFormatTests.cs @@ -85,11 +85,11 @@ namespace SixLabors.ImageSharp.Tests.Formats public static readonly TheoryData QuantizerNames = new() { - nameof(KnownQuantizers.Octree), - nameof(KnownQuantizers.WebSafe), - nameof(KnownQuantizers.Werner), - nameof(KnownQuantizers.Wu) - }; + nameof(KnownQuantizers.Octree), + nameof(KnownQuantizers.WebSafe), + nameof(KnownQuantizers.Werner), + nameof(KnownQuantizers.Wu) + }; [Theory] [WithFile(TestImages.Png.CalliphoraPartial, nameof(QuantizerNames), PixelTypes.Rgba32)] From d7ecfb58660585fc1bee2511a2169cbf31d23355 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Thu, 6 Jan 2022 19:36:44 +0100 Subject: [PATCH 3/6] SSE2 version of HorizontalUnfilter --- src/ImageSharp/Formats/Webp/AlphaDecoder.cs | 47 +++++++++++++++++-- .../Formats/Webp/Lossless/LosslessUtils.cs | 1 - 2 files changed, 42 insertions(+), 6 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/AlphaDecoder.cs b/src/ImageSharp/Formats/Webp/AlphaDecoder.cs index ed129dc26..39ff0dbbb 100644 --- a/src/ImageSharp/Formats/Webp/AlphaDecoder.cs +++ b/src/ImageSharp/Formats/Webp/AlphaDecoder.cs @@ -311,13 +311,50 @@ namespace SixLabors.ImageSharp.Formats.Webp private static void HorizontalUnfilter(Span prev, Span input, Span dst, int width) { - byte pred = (byte)(prev == null ? 0 : prev[0]); +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse2.IsSupported) + { + dst[0] = (byte)(input[0] + (prev == null ? 0 : prev[0])); + if (width <= 1) + { + return; + } - for (int i = 0; i < width; i++) + int i; + var last = Vector128.Create(dst[0], 0, 0, 0); + ref byte srcRef = ref MemoryMarshal.GetReference(input); + for (i = 1; i + 8 <= width; i += 8) + { + var a0 = Vector128.Create(Unsafe.As(ref Unsafe.Add(ref srcRef, i)), 0); + Vector128 a1 = Sse2.Add(a0.AsByte(), last.AsByte()); + Vector128 a2 = Sse2.ShiftLeftLogical128BitLane(a1, 1); + Vector128 a3 = Sse2.Add(a1, a2); + Vector128 a4 = Sse2.ShiftLeftLogical128BitLane(a3, 2); + Vector128 a5 = Sse2.Add(a3, a4); + Vector128 a6 = Sse2.ShiftLeftLogical128BitLane(a5, 4); + Vector128 a7 = Sse2.Add(a5, a6); + + ref byte outputRef = ref Unsafe.Add(ref MemoryMarshal.GetReference(dst), i); + Unsafe.As>(ref outputRef) = a7.GetLower(); + last = Sse2.ShiftRightLogical(a7.AsInt64(), 56).AsInt32(); + } + + for (; i < width; ++i) + { + dst[i] = (byte)(input[i] + dst[i - 1]); + } + } + else +#endif { - byte val = (byte)(pred + input[i]); - pred = val; - dst[i] = val; + byte pred = (byte)(prev == null ? 0 : prev[0]); + + for (int i = 0; i < width; i++) + { + byte val = (byte)(pred + input[i]); + pred = val; + dst[i] = val; + } } } diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index ebb198a2d..e7782b0ef 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -2,7 +2,6 @@ // Licensed under the Apache License, Version 2.0. using System; -using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using SixLabors.ImageSharp.Memory; From dae65033634fd814f9b2491737d51fdd0db2a243 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Fri, 7 Jan 2022 15:19:26 +0100 Subject: [PATCH 4/6] Add tests for horizontal and vertical filter without hardware intrinsics --- .../Formats/WebP/WebpDecoderTests.cs | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/ImageSharp.Tests/Formats/WebP/WebpDecoderTests.cs b/tests/ImageSharp.Tests/Formats/WebP/WebpDecoderTests.cs index 22342e612..1c92fdf33 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/WebpDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/WebpDecoderTests.cs @@ -19,6 +19,10 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp private static MagickReferenceDecoder ReferenceDecoder => new(); + private static string TestImageLossyHorizontalFilterPath => Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, Lossy.AlphaCompressedHorizontalFilter); + + private static string TestImageLossyVerticalFilterPath => Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, Lossy.AlphaCompressedVerticalFilter); + private static string TestImageLossySimpleFilterPath => Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, Lossy.SimpleFilter02); private static string TestImageLossyComplexFilterPath => Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, Lossy.BikeComplexFilter); @@ -365,6 +369,26 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp }); #if SUPPORTS_RUNTIME_INTRINSICS + private static void RunDecodeLossyWithHorizontalFilter() + { + var provider = TestImageProvider.File(TestImageLossyHorizontalFilterPath); + using (Image image = provider.GetImage(WebpDecoder)) + { + image.DebugSave(provider); + image.CompareToOriginal(provider, ReferenceDecoder); + } + } + + private static void RunDecodeLossyWithVerticalFilter() + { + var provider = TestImageProvider.File(TestImageLossyVerticalFilterPath); + using (Image image = provider.GetImage(WebpDecoder)) + { + image.DebugSave(provider); + image.CompareToOriginal(provider, ReferenceDecoder); + } + } + private static void RunDecodeLossyWithSimpleFilterTest() { var provider = TestImageProvider.File(TestImageLossySimpleFilterPath); @@ -385,6 +409,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp } } + [Fact] + public void DecodeLossyWithHorizontalFilter_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunDecodeLossyWithHorizontalFilter, HwIntrinsics.DisableHWIntrinsic); + + [Fact] + public void DecodeLossyWithVerticalFilter_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunDecodeLossyWithVerticalFilter, HwIntrinsics.DisableHWIntrinsic); + [Fact] public void DecodeLossyWithSimpleFilterTest_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunDecodeLossyWithSimpleFilterTest, HwIntrinsics.DisableHWIntrinsic); From d8db6539d507486e93b13bbcad12150e8bc817a4 Mon Sep 17 00:00:00 2001 From: Brian Popow <38701097+brianpopow@users.noreply.github.com> Date: Sat, 15 Jan 2022 16:11:30 +0100 Subject: [PATCH 5/6] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Günther Foidl --- src/ImageSharp/Formats/Webp/AlphaDecoder.cs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/AlphaDecoder.cs b/src/ImageSharp/Formats/Webp/AlphaDecoder.cs index 39ff0dbbb..24104369e 100644 --- a/src/ImageSharp/Formats/Webp/AlphaDecoder.cs +++ b/src/ImageSharp/Formats/Webp/AlphaDecoder.cs @@ -314,14 +314,14 @@ namespace SixLabors.ImageSharp.Formats.Webp #if SUPPORTS_RUNTIME_INTRINSICS if (Sse2.IsSupported) { - dst[0] = (byte)(input[0] + (prev == null ? 0 : prev[0])); + dst[0] = (byte)(input[0] + (prev.IsEmpty ? 0 : prev[0])); if (width <= 1) { return; } - int i; - var last = Vector128.Create(dst[0], 0, 0, 0); + nint i; + var last = Vector128.Zero.WithElement(0, dst[0]); ref byte srcRef = ref MemoryMarshal.GetReference(input); for (i = 1; i + 8 <= width; i += 8) { @@ -347,7 +347,7 @@ namespace SixLabors.ImageSharp.Formats.Webp else #endif { - byte pred = (byte)(prev == null ? 0 : prev[0]); + byte pred = (byte)(prev.IsEmpty ? 0 : prev[0]); for (int i = 0; i < width; i++) { @@ -369,7 +369,7 @@ namespace SixLabors.ImageSharp.Formats.Webp #if SUPPORTS_RUNTIME_INTRINSICS if (Avx2.IsSupported) { - int i; + nint i; int maxPos = width & ~31; for (i = 0; i < maxPos; i += 32) { From 6fdf643d55c4140580c9a6810d77ff2f096868d8 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Sat, 15 Jan 2022 16:24:15 +0100 Subject: [PATCH 6/6] Fix build errors --- src/ImageSharp/Formats/Webp/AlphaDecoder.cs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/AlphaDecoder.cs b/src/ImageSharp/Formats/Webp/AlphaDecoder.cs index 24104369e..b1e155c1d 100644 --- a/src/ImageSharp/Formats/Webp/AlphaDecoder.cs +++ b/src/ImageSharp/Formats/Webp/AlphaDecoder.cs @@ -321,7 +321,7 @@ namespace SixLabors.ImageSharp.Formats.Webp } nint i; - var last = Vector128.Zero.WithElement(0, dst[0]); + Vector128 last = Vector128.Zero.WithElement(0, dst[0]); ref byte srcRef = ref MemoryMarshal.GetReference(input); for (i = 1; i + 8 <= width; i += 8) { @@ -341,7 +341,7 @@ namespace SixLabors.ImageSharp.Formats.Webp for (; i < width; ++i) { - dst[i] = (byte)(input[i] + dst[i - 1]); + dst[(int)i] = (byte)(input[(int)i] + dst[(int)i - 1]); } } else @@ -360,7 +360,7 @@ namespace SixLabors.ImageSharp.Formats.Webp private static void VerticalUnfilter(Span prev, Span input, Span dst, int width) { - if (prev == null) + if (prev.IsEmpty) { HorizontalUnfilter(null, input, dst, width); } @@ -382,7 +382,7 @@ namespace SixLabors.ImageSharp.Formats.Webp for (; i < width; i++) { - dst[i] = (byte)(prev[i] + input[i]); + dst[(int)i] = (byte)(prev[(int)i] + input[(int)i]); } } else @@ -398,7 +398,7 @@ namespace SixLabors.ImageSharp.Formats.Webp private static void GradientUnfilter(Span prev, Span input, Span dst, int width) { - if (prev == null) + if (prev.IsEmpty) { HorizontalUnfilter(null, input, dst, width); }