From 0c8dbeae8c6854e75ccefa3902df85f01160f2dc Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Thu, 24 Feb 2022 13:58:16 +0100 Subject: [PATCH] Add Avx version of up filter --- .../Formats/Png/Filters/UpFilter.cs | 38 ++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/src/ImageSharp/Formats/Png/Filters/UpFilter.cs b/src/ImageSharp/Formats/Png/Filters/UpFilter.cs index 2d2a442a1..20a828e52 100644 --- a/src/ImageSharp/Formats/Png/Filters/UpFilter.cs +++ b/src/ImageSharp/Formats/Png/Filters/UpFilter.cs @@ -31,7 +31,11 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters DebugGuard.MustBeSameSized(scanline, previousScanline, nameof(scanline)); #if SUPPORTS_RUNTIME_INTRINSICS - if (Sse2.IsSupported) + if (Avx2.IsSupported) + { + DecodeAvx2(scanline, previousScanline); + } + else if (Sse2.IsSupported) { DecodeSse2(scanline, previousScanline); } @@ -43,6 +47,38 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters } #if SUPPORTS_RUNTIME_INTRINSICS + private static void DecodeAvx2(Span scanline, Span previousScanline) + { + ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline); + ref byte prevBaseRef = ref MemoryMarshal.GetReference(previousScanline); + + // Up(x) + Prior(x) + int rb = scanline.Length; + int offset = 1; + const int bytesPerBatch = 32; + while (rb >= bytesPerBatch) + { + ref byte scanRef = ref Unsafe.Add(ref scanBaseRef, offset); + Vector256 current = Unsafe.As>(ref scanRef); + Vector256 up = Unsafe.As>(ref Unsafe.Add(ref prevBaseRef, offset)); + + Vector256 sum = Avx2.Add(up, current); + Unsafe.As>(ref scanRef) = sum; + + offset += bytesPerBatch; + rb -= bytesPerBatch; + } + + // Handle left over. + for (int i = offset; i < scanline.Length; i++) + { + ref byte scan = ref Unsafe.Add(ref scanBaseRef, offset); + byte above = Unsafe.Add(ref prevBaseRef, offset); + scan = (byte)(scan + above); + offset++; + } + } + private static void DecodeSse2(Span scanline, Span previousScanline) { ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline);