Browse Source

Don't use Vector512 (SLOW)

pull/2793/head
James Jackson-South 4 days ago
parent
commit
a96c78d60c
  1. 2
      README.md
  2. 34
      src/ImageSharp/Common/Helpers/Numerics.cs
  3. 110
      src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernel.cs

2
README.md

@ -8,7 +8,7 @@ SixLabors.ImageSharp
<div align="center">
[![Build Status](https://img.shields.io/github/actions/workflow/status/SixLabors/ImageSharp/build-and-test.yml?branch=main)](https://github.com/SixLabors/ImageSharp/actions)
[![Code coverage](https://codecov.io/gh/SixLabors/ImageSharp/branch/main/graph/badge.svg)](https://codecov.io/gh/SixLabors/ImageSharp)
[![codecov](https://codecov.io/gh/SixLabors/ImageSharp/graph/badge.svg?token=g2WJwz770q)](https://codecov.io/gh/SixLabors/ImageSharp)
[![License: Six Labors Split](https://img.shields.io/badge/license-Six%20Labors%20Split-%23e30183)](https://github.com/SixLabors/ImageSharp/blob/main/LICENSE)
[![Twitter](https://img.shields.io/twitter/url/http/shields.io.svg?style=flat&logo=twitter)](https://twitter.com/intent/tweet?hashtags=imagesharp,dotnet,oss&text=ImageSharp.+A+new+cross-platform+2D+graphics+API+in+C%23&url=https%3a%2f%2fgithub.com%2fSixLabors%2fImageSharp&via=sixlabors)

34
src/ImageSharp/Common/Helpers/Numerics.cs

@ -1089,39 +1089,7 @@ internal static class Numerics
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void Normalize(Span<float> span, float sum)
{
if (Vector512.IsHardwareAccelerated)
{
ref float startRef = ref MemoryMarshal.GetReference(span);
ref float endRef = ref Unsafe.Add(ref startRef, span.Length & ~15);
Vector512<float> sum512 = Vector512.Create(sum);
while (Unsafe.IsAddressLessThan(ref startRef, ref endRef))
{
Unsafe.As<float, Vector512<float>>(ref startRef) /= sum512;
startRef = ref Unsafe.Add(ref startRef, (nuint)16);
}
if ((span.Length & 15) >= 8)
{
Unsafe.As<float, Vector256<float>>(ref startRef) /= sum512.GetLower();
startRef = ref Unsafe.Add(ref startRef, (nuint)8);
}
if ((span.Length & 7) >= 4)
{
Unsafe.As<float, Vector128<float>>(ref startRef) /= sum512.GetLower().GetLower();
startRef = ref Unsafe.Add(ref startRef, (nuint)4);
}
endRef = ref Unsafe.Add(ref startRef, span.Length & 3);
while (Unsafe.IsAddressLessThan(ref startRef, ref endRef))
{
startRef /= sum;
startRef = ref Unsafe.Add(ref startRef, (nuint)1);
}
}
else if (Vector256.IsHardwareAccelerated)
if (Vector256.IsHardwareAccelerated)
{
ref float startRef = ref MemoryMarshal.GetReference(span);
ref float endRef = ref Unsafe.Add(ref startRef, span.Length & ~7);

110
src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernel.cs

@ -94,97 +94,43 @@ internal readonly unsafe struct ResizeKernel
{
if (IsHardwareAccelerated)
{
if (Vector512.IsHardwareAccelerated)
{
float* bufferStart = this.bufferPtr;
ref Vector4 rowEndRef = ref Unsafe.Add(ref rowStartRef, this.Length & ~7);
Vector512<float> result512_0 = Vector512<float>.Zero;
Vector512<float> result512_1 = Vector512<float>.Zero;
while (Unsafe.IsAddressLessThan(ref rowStartRef, ref rowEndRef))
{
Vector512<float> pixels512_0 = Unsafe.As<Vector4, Vector512<float>>(ref rowStartRef);
Vector512<float> pixels512_1 = Unsafe.As<Vector4, Vector512<float>>(ref Unsafe.Add(ref rowStartRef, (nuint)4));
result512_0 = Vector512_.MultiplyAdd(result512_0, Vector512.Load(bufferStart), pixels512_0);
result512_1 = Vector512_.MultiplyAdd(result512_1, Vector512.Load(bufferStart + 16), pixels512_1);
bufferStart += 32;
rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)8);
}
result512_0 += result512_1;
if ((this.Length & 7) >= 4)
{
Vector512<float> pixels512_0 = Unsafe.As<Vector4, Vector512<float>>(ref rowStartRef);
result512_0 = Vector512_.MultiplyAdd(result512_0, Vector512.Load(bufferStart), pixels512_0);
bufferStart += 16;
rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)4);
}
Vector256<float> result256 = result512_0.GetLower() + result512_0.GetUpper();
if ((this.Length & 3) >= 2)
{
Vector256<float> pixels256_0 = Unsafe.As<Vector4, Vector256<float>>(ref rowStartRef);
result256 = Vector256_.MultiplyAdd(result256, Vector256.Load(bufferStart), pixels256_0);
bufferStart += 8;
rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)2);
}
Vector128<float> result128 = result256.GetLower() + result256.GetUpper();
if ((this.Length & 1) != 0)
{
Vector128<float> pixels128 = Unsafe.As<Vector4, Vector128<float>>(ref rowStartRef);
result128 = Vector128_.MultiplyAdd(result128, Vector128.Load(bufferStart), pixels128);
}
float* bufferStart = this.bufferPtr;
ref Vector4 rowEndRef = ref Unsafe.Add(ref rowStartRef, this.Length & ~3);
Vector256<float> result256_0 = Vector256<float>.Zero;
Vector256<float> result256_1 = Vector256<float>.Zero;
return result128.AsVector4();
}
else
while (Unsafe.IsAddressLessThan(ref rowStartRef, ref rowEndRef))
{
float* bufferStart = this.bufferPtr;
ref Vector4 rowEndRef = ref Unsafe.Add(ref rowStartRef, this.Length & ~3);
Vector256<float> result256_0 = Vector256<float>.Zero;
Vector256<float> result256_1 = Vector256<float>.Zero;
Vector256<float> pixels256_0 = Unsafe.As<Vector4, Vector256<float>>(ref rowStartRef);
Vector256<float> pixels256_1 = Unsafe.As<Vector4, Vector256<float>>(ref Unsafe.Add(ref rowStartRef, (nuint)2));
while (Unsafe.IsAddressLessThan(ref rowStartRef, ref rowEndRef))
{
Vector256<float> pixels256_0 = Unsafe.As<Vector4, Vector256<float>>(ref rowStartRef);
Vector256<float> pixels256_1 = Unsafe.As<Vector4, Vector256<float>>(ref Unsafe.Add(ref rowStartRef, (nuint)2));
result256_0 = Vector256_.MultiplyAdd(result256_0, Vector256.Load(bufferStart), pixels256_0);
result256_1 = Vector256_.MultiplyAdd(result256_1, Vector256.Load(bufferStart + 8), pixels256_1);
result256_0 = Vector256_.MultiplyAdd(result256_0, Vector256.Load(bufferStart), pixels256_0);
result256_1 = Vector256_.MultiplyAdd(result256_1, Vector256.Load(bufferStart + 8), pixels256_1);
bufferStart += 16;
rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)4);
}
result256_0 += result256_1;
bufferStart += 16;
rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)4);
}
if ((this.Length & 3) >= 2)
{
Vector256<float> pixels256_0 = Unsafe.As<Vector4, Vector256<float>>(ref rowStartRef);
result256_0 = Vector256_.MultiplyAdd(result256_0, Vector256.Load(bufferStart), pixels256_0);
result256_0 += result256_1;
bufferStart += 8;
rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)2);
}
if ((this.Length & 3) >= 2)
{
Vector256<float> pixels256_0 = Unsafe.As<Vector4, Vector256<float>>(ref rowStartRef);
result256_0 = Vector256_.MultiplyAdd(result256_0, Vector256.Load(bufferStart), pixels256_0);
Vector128<float> result128 = result256_0.GetLower() + result256_0.GetUpper();
bufferStart += 8;
rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)2);
}
if ((this.Length & 1) != 0)
{
Vector128<float> pixels128 = Unsafe.As<Vector4, Vector128<float>>(ref rowStartRef);
result128 = Vector128_.MultiplyAdd(result128, Vector128.Load(bufferStart), pixels128);
}
Vector128<float> result128 = result256_0.GetLower() + result256_0.GetUpper();
return result128.AsVector4();
if ((this.Length & 1) != 0)
{
Vector128<float> pixels128 = Unsafe.As<Vector4, Vector128<float>>(ref rowStartRef);
result128 = Vector128_.MultiplyAdd(result128, Vector128.Load(bufferStart), pixels128);
}
return result128.AsVector4();
}
else
{
@ -219,7 +165,7 @@ internal readonly unsafe struct ResizeKernel
{
DebugGuard.IsTrue(values.Length == this.Length, nameof(values), "ResizeKernel.Fill: values.Length != this.Length!");
if (Vector256.IsHardwareAccelerated)
if (IsHardwareAccelerated)
{
Vector4* bufferStart = (Vector4*)this.bufferPtr;
ref float valuesStart = ref MemoryMarshal.GetReference(values);

Loading…
Cancel
Save