Browse Source

Add AVX2 version of AddVector

pull/1849/head
Brian Popow 5 years ago
parent
commit
8192e116f2
  1. 51
      src/ImageSharp/Formats/Webp/Lossless/Vp8LHistogram.cs
  2. 2
      src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs

51
src/ImageSharp/Formats/Webp/Lossless/Vp8LHistogram.cs

@ -3,10 +3,16 @@
using System;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
internal class Vp8LHistogram : IDeepCloneable
internal sealed class Vp8LHistogram : IDeepCloneable
{
private const uint NonTrivialSym = 0xffffffff;
@ -505,11 +511,48 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossless
return cost;
}
private static void AddVector(uint[] a, uint[] b, uint[] output, int size)
private static void AddVector(Span<uint> a, Span<uint> b, Span<uint> output, int size)
{
for (int i = 0; i < size; i++)
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
{
output[i] = a[i] + b[i];
ref uint aRef = ref MemoryMarshal.GetReference(a);
ref uint bRef = ref MemoryMarshal.GetReference(b);
ref uint outputRef = ref MemoryMarshal.GetReference(output);
int i;
for (i = 0; i + 32 <= size; i += 32)
{
// Load values.
Vector256<uint> a0 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref aRef, i));
Vector256<uint> a1 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref aRef, i + 8));
Vector256<uint> a2 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref aRef, i + 16));
Vector256<uint> a3 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref aRef, i + 24));
Vector256<uint> b0 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref bRef, i));
Vector256<uint> b1 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref bRef, i + 8));
Vector256<uint> b2 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref bRef, i + 16));
Vector256<uint> b3 = Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref bRef, i + 24));
// Note we are adding uint32_t's as *signed* int32's (using _mm_add_epi32). But
// that's ok since the histogram values are less than 1<<28 (max picture size).
Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref outputRef, i)) = Avx2.Add(a0, b0);
Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref outputRef, i + 8)) = Avx2.Add(a1, b1);
Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref outputRef, i + 16)) = Avx2.Add(a2, b2);
Unsafe.As<uint, Vector256<uint>>(ref Unsafe.Add(ref outputRef, i + 24)) = Avx2.Add(a3, b3);
}
for (; i < size; i++)
{
output[i] = a[i] + b[i];
}
}
else
#endif
{
for (int i = 0; i < size; i++)
{
output[i] = a[i] + b[i];
}
}
}
}

2
src/ImageSharp/Formats/Webp/Lossy/Vp8Histogram.cs

@ -6,7 +6,7 @@ using System.Runtime.CompilerServices;
namespace SixLabors.ImageSharp.Formats.Webp.Lossy
{
internal class Vp8Histogram
internal sealed class Vp8Histogram
{
private readonly int[] scratch = new int[16];

Loading…
Cancel
Save