From 8192e116f2b88eb65f80a366c63e1abee6415915 Mon Sep 17 00:00:00 2001 From: Brian Popow Date: Tue, 23 Nov 2021 15:09:48 +0100 Subject: [PATCH] Add AVX2 version of AddVector --- .../Formats/Webp/Lossless/Vp8LHistogram.cs | 51 +++++++++++++++++-- .../Formats/Webp/Lossy/Vp8Histogram.cs | 2 +- 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/src/ImageSharp/Formats/Webp/Lossless/Vp8LHistogram.cs b/src/ImageSharp/Formats/Webp/Lossless/Vp8LHistogram.cs index bdb53f5c6a..ac8cc0f655 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/Vp8LHistogram.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/Vp8LHistogram.cs @@ -3,10 +3,16 @@ using System; using System.Collections.Generic; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif namespace SixLabors.ImageSharp.Formats.Webp.Lossless { - internal class Vp8LHistogram : IDeepCloneable + internal sealed class Vp8LHistogram : IDeepCloneable { private const uint NonTrivialSym = 0xffffffff; @@ -505,11 +511,48 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless return cost; } - private static void AddVector(uint[] a, uint[] b, uint[] output, int size) + private static void AddVector(Span a, Span b, Span output, int size) { - for (int i = 0; i < size; i++) +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx2.IsSupported) { - output[i] = a[i] + b[i]; + ref uint aRef = ref MemoryMarshal.GetReference(a); + ref uint bRef = ref MemoryMarshal.GetReference(b); + ref uint outputRef = ref MemoryMarshal.GetReference(output); + int i; + + for (i = 0; i + 32 <= size; i += 32) + { + // Load values. + Vector256 a0 = Unsafe.As>(ref Unsafe.Add(ref aRef, i)); + Vector256 a1 = Unsafe.As>(ref Unsafe.Add(ref aRef, i + 8)); + Vector256 a2 = Unsafe.As>(ref Unsafe.Add(ref aRef, i + 16)); + Vector256 a3 = Unsafe.As>(ref Unsafe.Add(ref aRef, i + 24)); + Vector256 b0 = Unsafe.As>(ref Unsafe.Add(ref bRef, i)); + Vector256 b1 = Unsafe.As>(ref Unsafe.Add(ref bRef, i + 8)); + Vector256 b2 = Unsafe.As>(ref Unsafe.Add(ref bRef, i + 16)); + Vector256 b3 = Unsafe.As>(ref Unsafe.Add(ref bRef, i + 24)); + + // Note we are adding uint32_t's as *signed* int32's (using _mm_add_epi32). But + // that's ok since the histogram values are less than 1<<28 (max picture size). + Unsafe.As>(ref Unsafe.Add(ref outputRef, i)) = Avx2.Add(a0, b0); + Unsafe.As>(ref Unsafe.Add(ref outputRef, i + 8)) = Avx2.Add(a1, b1); + Unsafe.As>(ref Unsafe.Add(ref outputRef, i + 16)) = Avx2.Add(a2, b2); + Unsafe.As>(ref Unsafe.Add(ref outputRef, i + 24)) = Avx2.Add(a3, b3); + } + + for (; i < size; i++) + { + output[i] = a[i] + b[i]; + } + } + else +#endif + { + for (int i = 0; i < size; i++) + { + output[i] = a[i] + b[i]; + } } } } diff --git a/src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs b/src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs index 6e724e4758..89e7baff39 100644 --- a/src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs +++ b/src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs @@ -6,7 +6,7 @@ using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp.Formats.Webp.Lossy { - internal class Vp8Histogram + internal sealed class Vp8Histogram { private readonly int[] scratch = new int[16];