Browse Source

fixed benchmarks and optimized implementation

af/merge-core
Anton Firszov 8 years ago
parent
commit
3e92875a9a
  1. 61
      src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs
  2. 67
      tests/ImageSharp.Benchmarks/Color/Bulk/PackFromVector4.cs
  3. 197
      tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4.cs
  4. 52
      tests/ImageSharp.Benchmarks/General/Vectorization/UInt32ToSingle.cs
  5. 6
      tests/ImageSharp.Tests/Common/SimdUtilsTests.cs

61
src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs

@ -39,9 +39,8 @@ namespace SixLabors.ImageSharp
ref Vector<byte> sourceBase = ref Unsafe.As<byte, Vector<byte>>(ref MemoryMarshal.GetReference(source));
ref Vector<float> destBase = ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(dest));
ref Vector<uint> destBaseU = ref Unsafe.As<Vector<float>, Vector<uint>>(ref destBase);
const float Scale = 1f / 255f;
var scale = new Vector<float>(1f / 255f);
for (int i = 0; i < n; i++)
{
@ -51,26 +50,28 @@ namespace SixLabors.ImageSharp
Vector.Widen(s0, out Vector<uint> w0, out Vector<uint> w1);
Vector.Widen(s1, out Vector<uint> w2, out Vector<uint> w3);
ref Vector<uint> d = ref Unsafe.Add(ref destBaseU, i * 4);
d = w0;
Unsafe.Add(ref d, 1) = w1;
Unsafe.Add(ref d, 2) = w2;
Unsafe.Add(ref d, 3) = w3;
}
n = dest.Length / Vector<float>.Count;
Vector<float> f0 = ConvertToSingle(w0, scale);
Vector<float> f1 = ConvertToSingle(w1, scale);
Vector<float> f2 = ConvertToSingle(w2, scale);
Vector<float> f3 = ConvertToSingle(w3, scale);
for (int i = 0; i < n; i++)
{
ref Vector<float> df = ref Unsafe.Add(ref destBase, i);
ref Vector<uint> du = ref Unsafe.As<Vector<float>, Vector<uint>>(ref df);
Vector<float> v = Vector.ConvertToSingle(du);
v *= Scale;
df = v;
ref Vector<float> d = ref Unsafe.Add(ref destBase, i * 4);
d = f0;
Unsafe.Add(ref d, 1) = f1;
Unsafe.Add(ref d, 2) = f2;
Unsafe.Add(ref d, 3) = f3;
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector<float> ConvertToSingle(Vector<uint> u, Vector<float> scale)
{
Vector<int> vi = Vector.AsVectorInt32(u);
Vector<float> v = Vector.ConvertToSingle(vi);
v *= scale;
return v;
}
/// <summary>
/// A variant of <see cref="SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows"/>, which is faster on new .NET runtime.
/// </summary>
@ -92,26 +93,21 @@ namespace SixLabors.ImageSharp
ref Vector<float> sourceBase = ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(source));
ref Vector<byte> destBase = ref Unsafe.As<byte, Vector<byte>>(ref MemoryMarshal.GetReference(dest));
Vector<float> scale = new Vector<float>(255);
for (int i = 0; i < n; i++)
{
ref Vector<float> s = ref Unsafe.Add(ref sourceBase, i * 4);
Vector<float> f0 = s;
f0 = Clamp(f0);
Vector<float> f1 = Unsafe.Add(ref s, 1);
f1 = Clamp(f1);
Vector<float> f2 = Unsafe.Add(ref s, 2);
f2 = Clamp(f2);
Vector<float> f3 = Unsafe.Add(ref s, 3);
f3 = Clamp(f3);
Vector<uint> w0 = Vector.ConvertToUInt32(f0 * 255f);
Vector<uint> w1 = Vector.ConvertToUInt32(f1 * 255f);
Vector<uint> w2 = Vector.ConvertToUInt32(f2 * 255f);
Vector<uint> w3 = Vector.ConvertToUInt32(f3 * 255f);
Vector<uint> w0 = ConvertToUInt32(f0, scale);
Vector<uint> w1 = ConvertToUInt32(f1, scale);
Vector<uint> w2 = ConvertToUInt32(f2, scale);
Vector<uint> w3 = ConvertToUInt32(f3, scale);
Vector<ushort> u0 = Vector.Narrow(w0, w1);
Vector<ushort> u1 = Vector.Narrow(w2, w3);
@ -123,9 +119,12 @@ namespace SixLabors.ImageSharp
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector<float> Clamp(Vector<float> x)
private static Vector<uint> ConvertToUInt32(Vector<float> vf, Vector<float> scale)
{
return Vector.Min(Vector.Max(x, Vector<float>.Zero), Vector<float>.One);
vf = Vector.Min(Vector.Max(vf, Vector<float>.Zero), Vector<float>.One);
vf *= scale;
Vector<int> vi = Vector.ConvertToInt32(vf);
return Vector.AsVectorUInt32(vi);
}
}
}

67
tests/ImageSharp.Benchmarks/Color/Bulk/PackFromVector4.cs

@ -26,7 +26,8 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
[Params(
//64,
2048)]
2048
)]
public int Count { get; set; }
[GlobalSetup]
@ -43,7 +44,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
this.source.Dispose();
}
[Benchmark]
//[Benchmark]
public void PerElement()
{
ref Vector4 s = ref MemoryMarshal.GetReference(this.source.GetSpan());
@ -55,14 +56,14 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
}
}
[Benchmark(Baseline = true)]
public void CommonBulk()
[Benchmark]
public void PixelOperations_Base()
{
new PixelOperations<TPixel>().PackFromVector4(this.source.GetSpan(), this.destination.GetSpan(), this.Count);
}
[Benchmark]
public void OptimizedBulk()
public void PixelOperations_Specialized()
{
PixelOperations<TPixel>.Instance.PackFromVector4(this.source.GetSpan(), this.destination.GetSpan(), this.Count);
}
@ -70,7 +71,30 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
public class PackFromVector4_Rgba32 : PackFromVector4<Rgba32>
{
//[Benchmark]
[Benchmark]
public void FastDefault()
{
ref Vector4 sBase = ref this.source.GetSpan()[0];
ref Rgba32 dBase = ref this.destination.GetSpan()[0];
Vector4 maxBytes = new Vector4(255);
Vector4 half = new Vector4(0.5f);
for (int i = 0; i < this.Count; i++)
{
Vector4 v = Unsafe.Add(ref sBase, i);
v *= maxBytes;
v += half;
v = Vector4.Clamp(v, Vector4.Zero, maxBytes);
ref Rgba32 d = ref Unsafe.Add(ref dBase, i);
d.R = (byte)v.X;
d.G = (byte)v.Y;
d.B = (byte)v.Z;
d.A = (byte)v.W;
}
}
[Benchmark(Baseline = true)]
public void BulkConvertNormalizedFloatToByteClampOverflows()
{
Span<float> sBytes = MemoryMarshal.Cast<Vector4, float>(this.source.GetSpan());
@ -88,29 +112,16 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
SimdUtils.ExtendedIntrinsics.BulkConvertNormalizedFloatToByteClampOverflows(sBytes, dFloats);
}
// TODO: Check again later!
// RESULTS:
//
// BenchmarkDotNet=v0.10.14, OS=Windows 10.0.17134
// Intel Core i7-7700HQ CPU 2.80GHz (Kaby Lake), 1 CPU, 8 logical and 4 physical cores
// Frequency=2742187 Hz, Resolution=364.6724 ns, Timer=TSC
// .NET Core SDK=2.1.400-preview-009063
// [Host] : .NET Core 2.1.1 (CoreCLR 4.6.26606.02, CoreFX 4.6.26606.05), 64bit RyuJIT
// Job-XIFINS : .NET Framework 4.7.1 (CLR 4.0.30319.42000), 64bit RyuJIT-v4.7.3190.0
// Job-RTQZPN : .NET Core 2.1.1 (CoreCLR 4.6.26606.02, CoreFX 4.6.26606.05), 64bit RyuJIT
//
// LaunchCount=1 TargetCount=3 WarmupCount=3
// Method | Runtime | Count | Mean | Error | StdDev | Scaled | ScaledSD | Allocated |
// ----------------------------------------------------------------- |-------- |------ |----------:|----------:|----------:|-------:|---------:|----------:|
// FastDefault | Clr | 2048 | 15.989 us | 6.1384 us | 0.3468 us | 4.07 | 0.08 | 0 B |
// BulkConvertNormalizedFloatToByteClampOverflows | Clr | 2048 | 3.931 us | 0.6264 us | 0.0354 us | 1.00 | 0.00 | 0 B |
// ExtendedIntrinsic_BulkConvertNormalizedFloatToByteClampOverflows | Clr | 2048 | 2.100 us | 0.4717 us | 0.0267 us | 0.53 | 0.01 | 0 B |
//
// Method | Runtime | Count | Mean | Error | StdDev | Scaled | ScaledSD | Allocated |
// ----------------------------------------------------------------- |-------- |------ |----------:|-----------:|----------:|-------:|---------:|----------:|
// ExtendedIntrinsic_BulkConvertNormalizedFloatToByteClampOverflows | Clr | 2048 | 3.755 us | 0.8959 us | 0.0506 us | 0.22 | 0.00 | 0 B |
// PerElement | Clr | 2048 | 17.387 us | 15.1569 us | 0.8564 us | 1.02 | 0.04 | 0 B |
// CommonBulk | Clr | 2048 | 17.121 us | 0.7634 us | 0.0431 us | 1.00 | 0.00 | 24 B |
// OptimizedBulk | Clr | 2048 | 4.018 us | 0.3858 us | 0.0218 us | 0.23 | 0.00 | 0 B |
// | | | | | | | | |
// ExtendedIntrinsic_BulkConvertNormalizedFloatToByteClampOverflows | Core | 2048 | 22.232 us | 1.6154 us | 0.0913 us | 1.31 | 0.04 | 0 B |
// PerElement | Core | 2048 | 16.741 us | 2.9254 us | 0.1653 us | 0.98 | 0.03 | 0 B |
// CommonBulk | Core | 2048 | 17.022 us | 11.4894 us | 0.6492 us | 1.00 | 0.00 | 24 B |
// OptimizedBulk | Core | 2048 | 3.707 us | 0.1500 us | 0.0085 us | 0.22 | 0.01 | 0 B |
// | | | | | | | | |
// FastDefault | Core | 2048 | 14.693 us | 0.5131 us | 0.0290 us | 3.76 | 0.03 | 0 B |
// BulkConvertNormalizedFloatToByteClampOverflows | Core | 2048 | 3.913 us | 0.5661 us | 0.0320 us | 1.00 | 0.00 | 0 B |
// ExtendedIntrinsic_BulkConvertNormalizedFloatToByteClampOverflows | Core | 2048 | 1.966 us | 0.4056 us | 0.0229 us | 0.50 | 0.01 | 0 B |
}
}

197
tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4.cs

@ -29,8 +29,9 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
[Params(
//64,
//512
256
//256,
//512,
2048
)]
public int Count { get; set; }
@ -60,70 +61,214 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
}
}
//[Benchmark]
public void CommonBulk()
[Benchmark]
public void PixelOperations_Base()
{
new PixelOperations<TPixel>().ToVector4(this.source.GetSpan(), this.destination.GetSpan(), this.Count);
}
//[Benchmark]
public void OptimizedBulk()
[Benchmark]
public void PixelOperations_Specialized()
{
PixelOperations<TPixel>.Instance.ToVector4(this.source.GetSpan(), this.destination.GetSpan(), this.Count);
}
}
[RyuJitX64Job]
[DisassemblyDiagnoser(printAsm: true, printSource: true)]
[Config(typeof(Config.ShortClr))]
public class ToVector4_Rgba32 : ToVector4<Rgba32>
{
class Config : ManualConfig
{
}
[Benchmark(Baseline = true)]
public void FastScalarBulk()
[Benchmark]
public void BasicBulk()
{
ref Rgba32 sBase = ref this.source.GetSpan()[0];
ref Vector4 dBase = ref this.destination.GetSpan()[0];
Vector4 scale = new Vector4(1f / 255f);
Vector4 v = default;
for (int i = 0; i < this.Count; i++)
{
ref Rgba32 s = ref Unsafe.Add(ref sBase, i);
ref Vector4 d = ref Unsafe.Add(ref dBase, i);
d.X = s.R;
d.Y = s.G;
d.Z = s.B;
d.W = s.A;
v.X = s.R;
v.Y = s.G;
v.Z = s.B;
v.W = s.A;
v *= scale;
Unsafe.Add(ref dBase, i) = v;
}
}
[Benchmark(Baseline = true)]
public void BulkConvertByteToNormalizedFloat_2Loops()
{
Span<byte> sBytes = MemoryMarshal.Cast<Rgba32, byte>(this.source.GetSpan());
Span<float> dFloats = MemoryMarshal.Cast<Vector4, float>(this.destination.GetSpan());
var bVec = new Vector<float>(256.0f / 255.0f);
var magicFloat = new Vector<float>(32768.0f);
var magicInt = new Vector<uint>(1191182336); // reinterpreded value of 32768.0f
var mask = new Vector<uint>(255);
ref SimdUtils.Octet.OfByte sourceBase = ref Unsafe.As<byte, SimdUtils.Octet.OfByte>(ref MemoryMarshal.GetReference((ReadOnlySpan<byte>)sBytes));
ref SimdUtils.Octet.OfUInt32 destBaseAsWideOctet = ref Unsafe.As<float, SimdUtils.Octet.OfUInt32>(ref MemoryMarshal.GetReference(dFloats));
ref Vector<float> destBaseAsFloat = ref Unsafe.As<SimdUtils.Octet.OfUInt32, Vector<float>>(ref destBaseAsWideOctet);
int n = dFloats.Length / 8;
for (int i = 0; i < n; i++)
{
ref SimdUtils.Octet.OfByte s = ref Unsafe.Add(ref sourceBase, i);
ref SimdUtils.Octet.OfUInt32 d = ref Unsafe.Add(ref destBaseAsWideOctet, i);
d.LoadFrom(ref s);
}
for (int i = 0; i < n; i++)
{
ref Vector<float> df = ref Unsafe.Add(ref destBaseAsFloat, i);
var vi = Vector.AsVectorUInt32(df);
vi &= mask;
vi |= magicInt;
var vf = Vector.AsVectorSingle(vi);
vf = (vf - magicFloat) * bVec;
df = vf;
}
}
//[Benchmark]
public void BulkConvertByteToNormalizedFloat_ConvertInSameLoop()
{
Span<byte> sBytes = MemoryMarshal.Cast<Rgba32, byte>(this.source.GetSpan());
Span<float> dFloats = MemoryMarshal.Cast<Vector4, float>(this.destination.GetSpan());
var bVec = new Vector<float>(256.0f / 255.0f);
var magicFloat = new Vector<float>(32768.0f);
var magicInt = new Vector<uint>(1191182336); // reinterpreded value of 32768.0f
var mask = new Vector<uint>(255);
ref SimdUtils.Octet.OfByte sourceBase = ref Unsafe.As<byte, SimdUtils.Octet.OfByte>(ref MemoryMarshal.GetReference((ReadOnlySpan<byte>)sBytes));
ref SimdUtils.Octet.OfUInt32 destBaseAsWideOctet = ref Unsafe.As<float, SimdUtils.Octet.OfUInt32>(ref MemoryMarshal.GetReference(dFloats));
ref Vector<float> destBaseAsFloat = ref Unsafe.As<SimdUtils.Octet.OfUInt32, Vector<float>>(ref destBaseAsWideOctet);
int n = dFloats.Length / 8;
var temp = default(SimdUtils.Octet.OfUInt32);
ref Vector<uint> tempRef = ref Unsafe.As<SimdUtils.Octet.OfUInt32, Vector<uint>>(ref temp);
for (int i = 0; i < n; i++)
{
ref SimdUtils.Octet.OfByte s = ref Unsafe.Add(ref sourceBase, i);
temp.LoadFrom(ref s);
Vector<uint> vi = tempRef;
vi &= mask;
vi |= magicInt;
var vf = Vector.AsVectorSingle(vi);
vf = (vf - magicFloat) * bVec;
Unsafe.Add(ref destBaseAsFloat, i) = vf;
}
}
[Benchmark]
public void BulkConvertByteToNormalizedFloat()
public void ExtendedIntrinsics_BulkConvertByteToNormalizedFloat_2Loops()
{
Span<byte> sBytes = MemoryMarshal.Cast<Rgba32, byte>(this.source.GetSpan());
Span<float> dFloats = MemoryMarshal.Cast<Vector4, float>(this.destination.GetSpan());
SimdUtils.BulkConvertByteToNormalizedFloat(sBytes, dFloats);
int n = dFloats.Length / Vector<byte>.Count;
ref Vector<byte> sourceBase = ref Unsafe.As<byte, Vector<byte>>(ref MemoryMarshal.GetReference((ReadOnlySpan<byte>)sBytes));
ref Vector<float> destBase = ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(dFloats));
ref Vector<uint> destBaseU = ref Unsafe.As<Vector<float>, Vector<uint>>(ref destBase);
for (int i = 0; i < n; i++)
{
Vector<byte> b = Unsafe.Add(ref sourceBase, i);
Vector.Widen(b, out Vector<ushort> s0, out Vector<ushort> s1);
Vector.Widen(s0, out Vector<uint> w0, out Vector<uint> w1);
Vector.Widen(s1, out Vector<uint> w2, out Vector<uint> w3);
ref Vector<uint> d = ref Unsafe.Add(ref destBaseU, i * 4);
d = w0;
Unsafe.Add(ref d, 1) = w1;
Unsafe.Add(ref d, 2) = w2;
Unsafe.Add(ref d, 3) = w3;
}
n = dFloats.Length / Vector<float>.Count;
var scale = new Vector<float>(1f / 255f);
for (int i = 0; i < n; i++)
{
ref Vector<float> dRef = ref Unsafe.Add(ref destBase, i);
Vector<int> du = Vector.AsVectorInt32(dRef);
Vector<float> v = Vector.ConvertToSingle(du);
v *= scale;
dRef = v;
}
}
[Benchmark]
public void ExtendedIntrinsics_BulkConvertByteToNormalizedFloat()
public void ExtendedIntrinsics_BulkConvertByteToNormalizedFloat_ConvertInSameLoop()
{
Span<byte> sBytes = MemoryMarshal.Cast<Rgba32, byte>(this.source.GetSpan());
Span<float> dFloats = MemoryMarshal.Cast<Vector4, float>(this.destination.GetSpan());
SimdUtils.ExtendedIntrinsics.BulkConvertByteToNormalizedFloat(sBytes, dFloats);
int n = dFloats.Length / Vector<byte>.Count;
ref Vector<byte> sourceBase = ref Unsafe.As<byte, Vector<byte>>(ref MemoryMarshal.GetReference((ReadOnlySpan<byte>)sBytes));
ref Vector<float> destBase = ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(dFloats));
var scale = new Vector<float>(1f / 255f);
for (int i = 0; i < n; i++)
{
Vector<byte> b = Unsafe.Add(ref sourceBase, i);
Vector.Widen(b, out Vector<ushort> s0, out Vector<ushort> s1);
Vector.Widen(s0, out Vector<uint> w0, out Vector<uint> w1);
Vector.Widen(s1, out Vector<uint> w2, out Vector<uint> w3);
Vector<float> f0 = ConvertToNormalizedSingle(w0, scale);
Vector<float> f1 = ConvertToNormalizedSingle(w1, scale);
Vector<float> f2 = ConvertToNormalizedSingle(w2, scale);
Vector<float> f3 = ConvertToNormalizedSingle(w3, scale);
ref Vector<float> d = ref Unsafe.Add(ref destBase, i * 4);
d = f0;
Unsafe.Add(ref d, 1) = f1;
Unsafe.Add(ref d, 2) = f2;
Unsafe.Add(ref d, 3) = f3;
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector<float> ConvertToNormalizedSingle(Vector<uint> u, Vector<float> scale)
{
Vector<int> vi = Vector.AsVectorInt32(u);
Vector<float> v = Vector.ConvertToSingle(vi);
v *= scale;
return v;
}
//[Benchmark]
public void Original()
public void OldImplementation()
{
ToVector4SimdAligned(this.source.GetSpan(), this.destination.GetSpan(), this.Count);
ToVector4OldImplementation(this.source.GetSpan(), this.destination.GetSpan(), this.Count);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void ToVector4SimdAligned(ReadOnlySpan<Rgba32> sourceColors, Span<Vector4> destVectors, int count)
private static void ToVector4OldImplementation(ReadOnlySpan<Rgba32> sourceColors, Span<Vector4> destVectors, int count)
{
if (!Vector.IsHardwareAccelerated)
{

52
tests/ImageSharp.Benchmarks/General/Vectorization/UInt32ToSingle.cs

@ -9,7 +9,7 @@ namespace SixLabors.ImageSharp.Benchmarks.General.Vectorization
{
private float[] data;
private const int Count = 64;
private const int Count = 32;
[GlobalSetup]
public void Setup()
@ -24,8 +24,10 @@ namespace SixLabors.ImageSharp.Benchmarks.General.Vectorization
int n = Count / Vector<float>.Count;
Vector<float> magick = new Vector<float>(32768.0f);
Vector<float> scale = new Vector<float>(255f) / new Vector<float>(256f);
var bVec = new Vector<float>(256.0f / 255.0f);
var magicFloat = new Vector<float>(32768.0f);
var magicInt = new Vector<uint>(1191182336); // reinterpreded value of 32768.0f
var mask = new Vector<uint>(255);
for (int i = 0; i < n; i++)
{
@ -33,13 +35,16 @@ namespace SixLabors.ImageSharp.Benchmarks.General.Vectorization
// u.f = 32768.0f + x * (255.0f / 256.0f);
// return (uint8_t)u.i;
ref Vector<float> d = ref Unsafe.Add(ref b, i);
Vector<float> x = d;
//x = Vector.Max(x, Vector<float>.Zero);
//x = Vector.Min(x, Vector<float>.One);
ref Vector<float> df = ref Unsafe.Add(ref b, i);
var vi = Vector.AsVectorUInt32(df);
vi &= mask;
vi |= magicInt;
var vf = Vector.AsVectorSingle(vi);
vf = (vf - magicFloat) * bVec;
x = (x * scale) + magick;
d = x;
df = vf;
}
}
@ -48,18 +53,37 @@ namespace SixLabors.ImageSharp.Benchmarks.General.Vectorization
{
int n = Count / Vector<float>.Count;
ref Vector<float> b = ref Unsafe.As<float, Vector<float>>(ref this.data[0]);
ref Vector<float> bf = ref Unsafe.As<float, Vector<float>>(ref this.data[0]);
ref Vector<uint> bu = ref Unsafe.As<Vector<float>, Vector<uint>>(ref bf);
var scale = new Vector<float>(1f / 255f);
for (int i = 0; i < n; i++)
{
ref Vector<float> df = ref Unsafe.Add(ref b, i);
Vector<uint> du = Unsafe.As<Vector<float>, Vector<uint>>(ref df);
Vector<uint> u = Unsafe.Add(ref bu, i);
Vector<float> v = Vector.ConvertToSingle(u);
v *= scale;
Unsafe.Add(ref bf, i) = v;
}
}
Vector<float> v = Vector.ConvertToSingle(du);
// This code is not correct at all, it's just here as reference
[Benchmark]
public void StandardSimdFromInt()
{
int n = Count / Vector<float>.Count;
ref Vector<float> bf = ref Unsafe.As<float, Vector<float>>(ref this.data[0]);
ref Vector<int> bu = ref Unsafe.As<Vector<float>, Vector<int>>(ref bf);
var scale = new Vector<float>(1f / 255f);
for (int i = 0; i < n; i++)
{
Vector<int> u = Unsafe.Add(ref bu, i);
Vector<float> v = Vector.ConvertToSingle(u);
v *= scale;
df = v;
Unsafe.Add(ref bf, i) = v;
}
}
}

6
tests/ImageSharp.Tests/Common/SimdUtilsTests.cs

@ -212,15 +212,11 @@ namespace SixLabors.ImageSharp.Tests.Common
[InlineData(3, 128)]
public void ExtendedIntrinsics_BulkConvertByteToNormalizedFloat(int seed, int count)
{
if (!Vector.IsHardwareAccelerated)
{
return;
}
byte[] source = new Random(seed).GenerateRandomByteArray(count);
float[] result = new float[count];
float[] expected = source.Select(b => (float)b / 255f).ToArray();
SimdUtils.ExtendedIntrinsics.BulkConvertByteToNormalizedFloat(source, result);
Assert.Equal(expected, result, new ApproximateFloatComparer(1e-5f));

Loading…
Cancel
Save