Browse Source

cleanup

af/merge-core
Anton Firszov 7 years ago
parent
commit
8f4e8a663a
  1. 32
      src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs
  2. 18
      src/ImageSharp/Common/Helpers/SimdUtils.cs
  3. 56
      src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs
  4. 69
      src/ImageSharp/PixelFormats/Rgba32.PixelOperations.cs
  5. 112
      tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4.cs
  6. 2
      tests/ImageSharp.Tests/PixelFormats/PixelOperationsTests.cs

32
src/ImageSharp/Common/Extensions/SimdUtils.ExtendedIntrinsics.cs → src/ImageSharp/Common/Helpers/SimdUtils.ExtendedIntrinsics.cs

@ -18,7 +18,7 @@ namespace SixLabors.ImageSharp
{
public static bool IsAvailable { get; } =
#if NETCOREAPP2_1
// TODO: Add a build target for .NET 4.7.2
// TODO: Also available in .NET 4.7.2, we need to add a build target!
true;
#else
false;
@ -31,14 +31,15 @@ namespace SixLabors.ImageSharp
internal static void BulkConvertByteToNormalizedFloat(ReadOnlySpan<byte> source, Span<float> dest)
{
Guard.IsTrue(
source.Length % Vector<byte>.Count == 0,
dest.Length % Vector<byte>.Count == 0,
nameof(source),
"dest.Length should be divisable by Vector<byte>.Count!");
int n = source.Length / Vector<byte>.Count;
int n = dest.Length / Vector<byte>.Count;
ref Vector<byte> sourceBase = ref Unsafe.As<byte, Vector<byte>>(ref MemoryMarshal.GetReference(source));
ref Vector<float> destBase = ref Unsafe.As<float, Vector<float>>(ref MemoryMarshal.GetReference(dest));
ref Vector<uint> destBaseU = ref Unsafe.As<Vector<float>, Vector<uint>>(ref destBase);
const float Scale = 1f / 255f;
@ -50,16 +51,23 @@ namespace SixLabors.ImageSharp
Vector.Widen(s0, out Vector<uint> w0, out Vector<uint> w1);
Vector.Widen(s1, out Vector<uint> w2, out Vector<uint> w3);
Vector<float> f0 = Vector.ConvertToSingle(w0) * Scale;
Vector<float> f1 = Vector.ConvertToSingle(w1) * Scale;
Vector<float> f2 = Vector.ConvertToSingle(w2) * Scale;
Vector<float> f3 = Vector.ConvertToSingle(w3) * Scale;
ref Vector<uint> d = ref Unsafe.Add(ref destBaseU, i * 4);
d = w0;
Unsafe.Add(ref d, 1) = w1;
Unsafe.Add(ref d, 2) = w2;
Unsafe.Add(ref d, 3) = w3;
}
n = dest.Length / Vector<float>.Count;
for (int i = 0; i < n; i++)
{
ref Vector<float> df = ref Unsafe.Add(ref destBase, i);
ref Vector<uint> du = ref Unsafe.As<Vector<float>, Vector<uint>>(ref df);
ref Vector<float> d = ref Unsafe.Add(ref destBase, i * 4);
d = f0;
Unsafe.Add(ref d, 1) = f1;
Unsafe.Add(ref d, 2) = f2;
Unsafe.Add(ref d, 3) = f3;
Vector<float> v = Vector.ConvertToSingle(du);
v *= Scale;
df = v;
}
}

18
src/ImageSharp/Common/Extensions/SimdUtils.cs → src/ImageSharp/Common/Helpers/SimdUtils.cs

@ -2,13 +2,10 @@
// Licensed under the Apache License, Version 2.0.
using System;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp
{
/// <summary>
@ -131,23 +128,26 @@ namespace SixLabors.ImageSharp
ref Vector<float> destBaseAsFloat = ref Unsafe.As<Octet.OfUInt32, Vector<float>>(ref destBaseAsWideOctet);
int n = dest.Length / 8;
Octet.OfUInt32 temp = default;
for (int i = 0; i < n; i++)
{
Octet.OfByte sVal = Unsafe.Add(ref sourceBase, i);
ref Octet.OfByte s = ref Unsafe.Add(ref sourceBase, i);
ref Octet.OfUInt32 d = ref Unsafe.Add(ref destBaseAsWideOctet, i);
d.LoadFrom(ref s);
}
// This call is the bottleneck now:
temp.LoadFrom(ref sVal);
for (int i = 0; i < n; i++)
{
ref Vector<float> df = ref Unsafe.Add(ref destBaseAsFloat, i);
Vector<uint> vi = Unsafe.As<Octet.OfUInt32, Vector<uint>>(ref temp);
var vi = Vector.AsVectorUInt32(df);
vi &= mask;
vi |= magicInt;
var vf = Vector.AsVectorSingle(vi);
vf = (vf - magicFloat) * bVec;
Unsafe.Add(ref destBaseAsFloat, i) = vf;
df = vf;
}
}

56
src/ImageSharp/PixelFormats/PixelOperations{TPixel}.cs

@ -29,17 +29,7 @@ namespace SixLabors.ImageSharp.PixelFormats
/// <param name="count">The number of pixels to convert.</param>
internal virtual void PackFromVector4(ReadOnlySpan<Vector4> sourceVectors, Span<TPixel> destinationColors, int count)
{
GuardSpans(sourceVectors, nameof(sourceVectors), destinationColors, nameof(destinationColors), count);
ref Vector4 sourceRef = ref MemoryMarshal.GetReference(sourceVectors);
ref TPixel destRef = ref MemoryMarshal.GetReference(destinationColors);
for (int i = 0; i < count; i++)
{
ref Vector4 sp = ref Unsafe.Add(ref sourceRef, i);
ref TPixel dp = ref Unsafe.Add(ref destRef, i);
dp.PackFromVector4(sp);
}
PackFromVector4Common(sourceVectors, destinationColors, count);
}
/// <summary>
@ -50,17 +40,7 @@ namespace SixLabors.ImageSharp.PixelFormats
/// <param name="count">The number of pixels to convert.</param>
internal virtual void ToVector4(ReadOnlySpan<TPixel> sourceColors, Span<Vector4> destinationVectors, int count)
{
GuardSpans(sourceColors, nameof(sourceColors), destinationVectors, nameof(destinationVectors), count);
ref TPixel sourceRef = ref MemoryMarshal.GetReference(sourceColors);
ref Vector4 destRef = ref MemoryMarshal.GetReference(destinationVectors);
for (int i = 0; i < count; i++)
{
ref TPixel sp = ref Unsafe.Add(ref sourceRef, i);
ref Vector4 dp = ref Unsafe.Add(ref destRef, i);
dp = sp.ToVector4();
}
ToVector4Common(sourceColors, destinationVectors, count);
}
/// <summary>
@ -126,5 +106,37 @@ namespace SixLabors.ImageSharp.PixelFormats
Guard.MustBeSizedAtLeast(source, minLength, sourceParamName);
Guard.MustBeSizedAtLeast(destination, minLength, destinationParamName);
}
[MethodImpl(InliningOptions.ShortMethod)]
internal static void PackFromVector4Common(ReadOnlySpan<Vector4> sourceVectors, Span<TPixel> destinationColors, int count)
{
GuardSpans(sourceVectors, nameof(sourceVectors), destinationColors, nameof(destinationColors), count);
ref Vector4 sourceRef = ref MemoryMarshal.GetReference(sourceVectors);
ref TPixel destRef = ref MemoryMarshal.GetReference(destinationColors);
for (int i = 0; i < count; i++)
{
ref Vector4 sp = ref Unsafe.Add(ref sourceRef, i);
ref TPixel dp = ref Unsafe.Add(ref destRef, i);
dp.PackFromVector4(sp);
}
}
[MethodImpl(InliningOptions.ShortMethod)]
internal static void ToVector4Common(ReadOnlySpan<TPixel> sourceColors, Span<Vector4> destinationVectors, int count)
{
GuardSpans(sourceColors, nameof(sourceColors), destinationVectors, nameof(destinationVectors), count);
ref TPixel sourceRef = ref MemoryMarshal.GetReference(sourceColors);
ref Vector4 destRef = ref MemoryMarshal.GetReference(destinationVectors);
for (int i = 0; i < count; i++)
{
ref TPixel sp = ref Unsafe.Add(ref sourceRef, i);
ref Vector4 dp = ref Unsafe.Add(ref destRef, i);
dp = sp.ToVector4();
}
}
}
}

69
src/ImageSharp/PixelFormats/Rgba32.PixelOperations.cs

@ -27,28 +27,17 @@ namespace SixLabors.ImageSharp.PixelFormats
if (count < 128 || !SimdUtils.IsAvx2CompatibleArchitecture)
{
// Doesn't worth to bother with SIMD:
base.ToVector4(sourceColors, destinationVectors, count);
ToVector4Common(sourceColors, destinationVectors, count);
return;
}
int remainder = count % 2;
int alignedCount = count - remainder;
if (alignedCount > 0)
if (SimdUtils.ExtendedIntrinsics.IsAvailable)
{
ReadOnlySpan<byte> rawSrc = MemoryMarshal.Cast<Rgba32, byte>(sourceColors);
Span<float> rawDest = MemoryMarshal.Cast<Vector4, float>(destinationVectors.Slice(0, alignedCount));
SimdUtils.BulkConvertByteToNormalizedFloat(
rawSrc,
rawDest);
ConvertToVector4UsingExtendedIntrinsics(sourceColors, destinationVectors, count);
}
if (remainder > 0)
else
{
// actually: remainder == 1
int lastIdx = count - 1;
destinationVectors[lastIdx] = sourceColors[lastIdx].ToVector4();
ConvertToVector4UsingStandardIntrinsics(sourceColors, destinationVectors, count);
}
}
@ -59,7 +48,7 @@ namespace SixLabors.ImageSharp.PixelFormats
if (count < 128 || !SimdUtils.IsAvx2CompatibleArchitecture)
{
base.PackFromVector4(sourceVectors, destinationColors, count);
PackFromVector4Common(sourceVectors, destinationColors, count);
return;
}
@ -109,6 +98,52 @@ namespace SixLabors.ImageSharp.PixelFormats
sourcePixels.Slice(0, count).CopyTo(dest);
}
private static void ConvertToVector4UsingExtendedIntrinsics(
ReadOnlySpan<Rgba32> sourceColors,
Span<Vector4> destinationVectors,
int count)
{
int remainder = count % 8;
int alignedCount = count - remainder;
if (alignedCount > 0)
{
ReadOnlySpan<byte> rawSrc = MemoryMarshal.Cast<Rgba32, byte>(sourceColors);
Span<float> rawDest = MemoryMarshal.Cast<Vector4, float>(destinationVectors.Slice(0, alignedCount));
SimdUtils.ExtendedIntrinsics.BulkConvertByteToNormalizedFloat(rawSrc, rawDest);
}
if (remainder > 0)
{
ToVector4Common(sourceColors.Slice(alignedCount), destinationVectors.Slice(alignedCount), remainder);
}
}
private static void ConvertToVector4UsingStandardIntrinsics(
ReadOnlySpan<Rgba32> sourceColors,
Span<Vector4> destinationVectors,
int count)
{
int remainder = count % 2;
int alignedCount = count - remainder;
if (alignedCount > 0)
{
ReadOnlySpan<byte> rawSrc = MemoryMarshal.Cast<Rgba32, byte>(sourceColors);
Span<float> rawDest = MemoryMarshal.Cast<Vector4, float>(destinationVectors.Slice(0, alignedCount));
SimdUtils.BulkConvertByteToNormalizedFloat(rawSrc, rawDest);
}
if (remainder > 0)
{
// actually: remainder == 1
int lastIdx = count - 1;
destinationVectors[lastIdx] = sourceColors[lastIdx].ToVector4();
}
}
}
}
}

112
tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4.cs

@ -6,6 +6,7 @@
using System.Buffers;
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using BenchmarkDotNet.Attributes;
@ -28,7 +29,9 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
[Params(
//64,
2048)]
//512
256
)]
public int Count { get; set; }
[GlobalSetup]
@ -45,7 +48,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
this.destination.Dispose();
}
[Benchmark]
//[Benchmark]
public void PerElement()
{
Span<TPixel> s = this.source.GetSpan();
@ -53,32 +56,48 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
for (int i = 0; i < this.Count; i++)
{
TPixel c = s[i];
d[i] = c.ToVector4();
d[i] = s[i].ToVector4();
}
}
[Benchmark(Baseline = true)]
//[Benchmark]
public void CommonBulk()
{
new PixelOperations<TPixel>().ToVector4(this.source.GetSpan(), this.destination.GetSpan(), this.Count);
}
[Benchmark]
//[Benchmark]
public void OptimizedBulk()
{
PixelOperations<TPixel>.Instance.ToVector4(this.source.GetSpan(), this.destination.GetSpan(), this.Count);
}
}
[CoreJob]
[ClrJob]
[RyuJitX64Job]
[DisassemblyDiagnoser(printAsm: true, printSource: true)]
public class ToVector4_Rgba32 : ToVector4<Rgba32>
{
class Config : ManualConfig
{
}
[Benchmark(Baseline = true)]
public void FastScalarBulk()
{
ref Rgba32 sBase = ref this.source.GetSpan()[0];
ref Vector4 dBase = ref this.destination.GetSpan()[0];
for (int i = 0; i < this.Count; i++)
{
ref Rgba32 s = ref Unsafe.Add(ref sBase, i);
ref Vector4 d = ref Unsafe.Add(ref dBase, i);
d.X = s.R;
d.Y = s.G;
d.Z = s.B;
d.W = s.A;
}
}
[Benchmark]
public void BulkConvertByteToNormalizedFloat()
{
@ -97,5 +116,82 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
SimdUtils.ExtendedIntrinsics.BulkConvertByteToNormalizedFloat(sBytes, dFloats);
}
//[Benchmark]
public void Original()
{
ToVector4SimdAligned(this.source.GetSpan(), this.destination.GetSpan(), this.Count);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void ToVector4SimdAligned(ReadOnlySpan<Rgba32> sourceColors, Span<Vector4> destVectors, int count)
{
if (!Vector.IsHardwareAccelerated)
{
throw new InvalidOperationException(
"Rgba32.PixelOperations.ToVector4SimdAligned() should not be called when Vector.IsHardwareAccelerated == false!");
}
DebugGuard.IsTrue(
count % Vector<uint>.Count == 0,
nameof(count),
"Argument 'count' should divisible by Vector<uint>.Count!");
var bVec = new Vector<float>(256.0f / 255.0f);
var magicFloat = new Vector<float>(32768.0f);
var magicInt = new Vector<uint>(1191182336); // reinterpreded value of 32768.0f
var mask = new Vector<uint>(255);
int unpackedRawCount = count * 4;
ref uint sourceBase = ref Unsafe.As<Rgba32, uint>(ref MemoryMarshal.GetReference(sourceColors));
ref UnpackedRGBA destBaseAsUnpacked = ref Unsafe.As<Vector4, UnpackedRGBA>(ref MemoryMarshal.GetReference(destVectors));
ref Vector<uint> destBaseAsUInt = ref Unsafe.As<UnpackedRGBA, Vector<uint>>(ref destBaseAsUnpacked);
ref Vector<float> destBaseAsFloat = ref Unsafe.As<UnpackedRGBA, Vector<float>>(ref destBaseAsUnpacked);
for (int i = 0; i < count; i++)
{
uint sVal = Unsafe.Add(ref sourceBase, i);
ref UnpackedRGBA dst = ref Unsafe.Add(ref destBaseAsUnpacked, i);
// This call is the bottleneck now:
dst.Load(sVal);
}
int numOfVectors = unpackedRawCount / Vector<uint>.Count;
for (int i = 0; i < numOfVectors; i++)
{
Vector<uint> vi = Unsafe.Add(ref destBaseAsUInt, i);
vi &= mask;
vi |= magicInt;
var vf = Vector.AsVectorSingle(vi);
vf = (vf - magicFloat) * bVec;
Unsafe.Add(ref destBaseAsFloat, i) = vf;
}
}
[StructLayout(LayoutKind.Sequential)]
private struct UnpackedRGBA
{
private uint r;
private uint g;
private uint b;
private uint a;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Load(uint p)
{
this.r = p;
this.g = p >> 8;
this.b = p >> 16;
this.a = p >> 24;
}
}
}
}

2
tests/ImageSharp.Tests/PixelFormats/PixelOperationsTests.cs

@ -90,7 +90,7 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats
{
}
public static TheoryData<int> ArraySizesData => new TheoryData<int> { 0, 1, 2, 7, 16, 1111 };
public static TheoryData<int> ArraySizesData => new TheoryData<int> { 0, 1, 2, 7, 16, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 1111 };
private static PixelOperations<TPixel> Operations => PixelOperations<TPixel>.Instance;

Loading…
Cancel
Save