Browse Source

Don't use Vector512 (SLOW)

pull/2793/head
James Jackson-South 3 months ago
parent
commit
a96c78d60c
  1. 2
      README.md
  2. 34
      src/ImageSharp/Common/Helpers/Numerics.cs
  3. 110
      src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernel.cs

2
README.md

@ -8,7 +8,7 @@ SixLabors.ImageSharp
<div align="center"> <div align="center">
[![Build Status](https://img.shields.io/github/actions/workflow/status/SixLabors/ImageSharp/build-and-test.yml?branch=main)](https://github.com/SixLabors/ImageSharp/actions) [![Build Status](https://img.shields.io/github/actions/workflow/status/SixLabors/ImageSharp/build-and-test.yml?branch=main)](https://github.com/SixLabors/ImageSharp/actions)
[![Code coverage](https://codecov.io/gh/SixLabors/ImageSharp/branch/main/graph/badge.svg)](https://codecov.io/gh/SixLabors/ImageSharp) [![codecov](https://codecov.io/gh/SixLabors/ImageSharp/graph/badge.svg?token=g2WJwz770q)](https://codecov.io/gh/SixLabors/ImageSharp)
[![License: Six Labors Split](https://img.shields.io/badge/license-Six%20Labors%20Split-%23e30183)](https://github.com/SixLabors/ImageSharp/blob/main/LICENSE) [![License: Six Labors Split](https://img.shields.io/badge/license-Six%20Labors%20Split-%23e30183)](https://github.com/SixLabors/ImageSharp/blob/main/LICENSE)
[![Twitter](https://img.shields.io/twitter/url/http/shields.io.svg?style=flat&logo=twitter)](https://twitter.com/intent/tweet?hashtags=imagesharp,dotnet,oss&text=ImageSharp.+A+new+cross-platform+2D+graphics+API+in+C%23&url=https%3a%2f%2fgithub.com%2fSixLabors%2fImageSharp&via=sixlabors) [![Twitter](https://img.shields.io/twitter/url/http/shields.io.svg?style=flat&logo=twitter)](https://twitter.com/intent/tweet?hashtags=imagesharp,dotnet,oss&text=ImageSharp.+A+new+cross-platform+2D+graphics+API+in+C%23&url=https%3a%2f%2fgithub.com%2fSixLabors%2fImageSharp&via=sixlabors)

34
src/ImageSharp/Common/Helpers/Numerics.cs

@ -1089,39 +1089,7 @@ internal static class Numerics
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void Normalize(Span<float> span, float sum) public static void Normalize(Span<float> span, float sum)
{ {
if (Vector512.IsHardwareAccelerated) if (Vector256.IsHardwareAccelerated)
{
ref float startRef = ref MemoryMarshal.GetReference(span);
ref float endRef = ref Unsafe.Add(ref startRef, span.Length & ~15);
Vector512<float> sum512 = Vector512.Create(sum);
while (Unsafe.IsAddressLessThan(ref startRef, ref endRef))
{
Unsafe.As<float, Vector512<float>>(ref startRef) /= sum512;
startRef = ref Unsafe.Add(ref startRef, (nuint)16);
}
if ((span.Length & 15) >= 8)
{
Unsafe.As<float, Vector256<float>>(ref startRef) /= sum512.GetLower();
startRef = ref Unsafe.Add(ref startRef, (nuint)8);
}
if ((span.Length & 7) >= 4)
{
Unsafe.As<float, Vector128<float>>(ref startRef) /= sum512.GetLower().GetLower();
startRef = ref Unsafe.Add(ref startRef, (nuint)4);
}
endRef = ref Unsafe.Add(ref startRef, span.Length & 3);
while (Unsafe.IsAddressLessThan(ref startRef, ref endRef))
{
startRef /= sum;
startRef = ref Unsafe.Add(ref startRef, (nuint)1);
}
}
else if (Vector256.IsHardwareAccelerated)
{ {
ref float startRef = ref MemoryMarshal.GetReference(span); ref float startRef = ref MemoryMarshal.GetReference(span);
ref float endRef = ref Unsafe.Add(ref startRef, span.Length & ~7); ref float endRef = ref Unsafe.Add(ref startRef, span.Length & ~7);

110
src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernel.cs

@ -94,97 +94,43 @@ internal readonly unsafe struct ResizeKernel
{ {
if (IsHardwareAccelerated) if (IsHardwareAccelerated)
{ {
if (Vector512.IsHardwareAccelerated) float* bufferStart = this.bufferPtr;
{ ref Vector4 rowEndRef = ref Unsafe.Add(ref rowStartRef, this.Length & ~3);
float* bufferStart = this.bufferPtr; Vector256<float> result256_0 = Vector256<float>.Zero;
ref Vector4 rowEndRef = ref Unsafe.Add(ref rowStartRef, this.Length & ~7); Vector256<float> result256_1 = Vector256<float>.Zero;
Vector512<float> result512_0 = Vector512<float>.Zero;
Vector512<float> result512_1 = Vector512<float>.Zero;
while (Unsafe.IsAddressLessThan(ref rowStartRef, ref rowEndRef))
{
Vector512<float> pixels512_0 = Unsafe.As<Vector4, Vector512<float>>(ref rowStartRef);
Vector512<float> pixels512_1 = Unsafe.As<Vector4, Vector512<float>>(ref Unsafe.Add(ref rowStartRef, (nuint)4));
result512_0 = Vector512_.MultiplyAdd(result512_0, Vector512.Load(bufferStart), pixels512_0);
result512_1 = Vector512_.MultiplyAdd(result512_1, Vector512.Load(bufferStart + 16), pixels512_1);
bufferStart += 32;
rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)8);
}
result512_0 += result512_1;
if ((this.Length & 7) >= 4)
{
Vector512<float> pixels512_0 = Unsafe.As<Vector4, Vector512<float>>(ref rowStartRef);
result512_0 = Vector512_.MultiplyAdd(result512_0, Vector512.Load(bufferStart), pixels512_0);
bufferStart += 16;
rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)4);
}
Vector256<float> result256 = result512_0.GetLower() + result512_0.GetUpper();
if ((this.Length & 3) >= 2)
{
Vector256<float> pixels256_0 = Unsafe.As<Vector4, Vector256<float>>(ref rowStartRef);
result256 = Vector256_.MultiplyAdd(result256, Vector256.Load(bufferStart), pixels256_0);
bufferStart += 8;
rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)2);
}
Vector128<float> result128 = result256.GetLower() + result256.GetUpper();
if ((this.Length & 1) != 0)
{
Vector128<float> pixels128 = Unsafe.As<Vector4, Vector128<float>>(ref rowStartRef);
result128 = Vector128_.MultiplyAdd(result128, Vector128.Load(bufferStart), pixels128);
}
return result128.AsVector4(); while (Unsafe.IsAddressLessThan(ref rowStartRef, ref rowEndRef))
}
else
{ {
float* bufferStart = this.bufferPtr; Vector256<float> pixels256_0 = Unsafe.As<Vector4, Vector256<float>>(ref rowStartRef);
ref Vector4 rowEndRef = ref Unsafe.Add(ref rowStartRef, this.Length & ~3); Vector256<float> pixels256_1 = Unsafe.As<Vector4, Vector256<float>>(ref Unsafe.Add(ref rowStartRef, (nuint)2));
Vector256<float> result256_0 = Vector256<float>.Zero;
Vector256<float> result256_1 = Vector256<float>.Zero;
while (Unsafe.IsAddressLessThan(ref rowStartRef, ref rowEndRef)) result256_0 = Vector256_.MultiplyAdd(result256_0, Vector256.Load(bufferStart), pixels256_0);
{ result256_1 = Vector256_.MultiplyAdd(result256_1, Vector256.Load(bufferStart + 8), pixels256_1);
Vector256<float> pixels256_0 = Unsafe.As<Vector4, Vector256<float>>(ref rowStartRef);
Vector256<float> pixels256_1 = Unsafe.As<Vector4, Vector256<float>>(ref Unsafe.Add(ref rowStartRef, (nuint)2));
result256_0 = Vector256_.MultiplyAdd(result256_0, Vector256.Load(bufferStart), pixels256_0); bufferStart += 16;
result256_1 = Vector256_.MultiplyAdd(result256_1, Vector256.Load(bufferStart + 8), pixels256_1); rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)4);
}
bufferStart += 16;
rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)4);
}
result256_0 += result256_1;
if ((this.Length & 3) >= 2) result256_0 += result256_1;
{
Vector256<float> pixels256_0 = Unsafe.As<Vector4, Vector256<float>>(ref rowStartRef);
result256_0 = Vector256_.MultiplyAdd(result256_0, Vector256.Load(bufferStart), pixels256_0);
bufferStart += 8; if ((this.Length & 3) >= 2)
rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)2); {
} Vector256<float> pixels256_0 = Unsafe.As<Vector4, Vector256<float>>(ref rowStartRef);
result256_0 = Vector256_.MultiplyAdd(result256_0, Vector256.Load(bufferStart), pixels256_0);
Vector128<float> result128 = result256_0.GetLower() + result256_0.GetUpper(); bufferStart += 8;
rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)2);
}
if ((this.Length & 1) != 0) Vector128<float> result128 = result256_0.GetLower() + result256_0.GetUpper();
{
Vector128<float> pixels128 = Unsafe.As<Vector4, Vector128<float>>(ref rowStartRef);
result128 = Vector128_.MultiplyAdd(result128, Vector128.Load(bufferStart), pixels128);
}
return result128.AsVector4(); if ((this.Length & 1) != 0)
{
Vector128<float> pixels128 = Unsafe.As<Vector4, Vector128<float>>(ref rowStartRef);
result128 = Vector128_.MultiplyAdd(result128, Vector128.Load(bufferStart), pixels128);
} }
return result128.AsVector4();
} }
else else
{ {
@ -219,7 +165,7 @@ internal readonly unsafe struct ResizeKernel
{ {
DebugGuard.IsTrue(values.Length == this.Length, nameof(values), "ResizeKernel.Fill: values.Length != this.Length!"); DebugGuard.IsTrue(values.Length == this.Length, nameof(values), "ResizeKernel.Fill: values.Length != this.Length!");
if (Vector256.IsHardwareAccelerated) if (IsHardwareAccelerated)
{ {
Vector4* bufferStart = (Vector4*)this.bufferPtr; Vector4* bufferStart = (Vector4*)this.bufferPtr;
ref float valuesStart = ref MemoryMarshal.GetReference(values); ref float valuesStart = ref MemoryMarshal.GetReference(values);

Loading…
Cancel
Save