Browse Source

optimized Rgba32.PixelOperations.PackFromVector4()

pull/299/head
Anton Firszov 9 years ago
parent
commit
e342c5dbe0
  1. 97
      src/ImageSharp/Common/Extensions/SimdUtils.cs
  2. 30
      src/ImageSharp/PixelFormats/Rgba32.PixelOperations.cs
  3. 65
      tests/ImageSharp.Benchmarks/Color/Bulk/PackFromVector4.cs
  4. 39
      tests/ImageSharp.Tests/Common/SimdUtilsTests.cs
  5. 19
      tests/ImageSharp.Tests/PixelFormats/PixelOperationsTests.cs

97
src/ImageSharp/Common/Extensions/SimdUtils.cs

@ -2,13 +2,13 @@
// Licensed under the Apache License, Version 2.0.
using System;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace SixLabors.ImageSharp
{
using System.Diagnostics;
/// <summary>
/// Various extension and utility methods for <see cref="Vector4"/> and <see cref="Vector{T}"/> utilizing SIMD capabilities
/// </summary>
@ -19,7 +19,6 @@ namespace SixLabors.ImageSharp
/// </summary>
public static readonly bool IsAvx2 = Vector<float>.Count == 8 && Vector<int>.Count == 8;
[Conditional("DEBUG")]
internal static void GuardAvx2(string operation)
{
if (!IsAvx2)
@ -79,11 +78,16 @@ namespace SixLabors.ImageSharp
ref Vector<float> srcBase = ref Unsafe.As<float, Vector<float>>(ref source.DangerousGetPinnableReference());
ref Octet.OfByte destBase = ref Unsafe.As<byte, Octet.OfByte>(ref dest.DangerousGetPinnableReference());
int n = source.Length / 8;
Vector<float> magick = new Vector<float>(32768.0f);
Vector<float> scale = new Vector<float>(255f) / new Vector<float>(256f);
int n = source.Length;
// need to copy to a temporal struct, because
// SimdUtils.Octet.OfUInt32 temp = Unsafe.As<Vector<float>, SimdUtils.Octet.OfUInt32>(ref x)
// does not work. TODO: This might be a CoreClr bug, need to ask/report
var temp = default(Octet.OfUInt32);
ref Vector<float> tempRef = ref Unsafe.As<Octet.OfUInt32, Vector<float>>(ref temp);
for (int i = 0; i < n; i++)
{
@ -92,13 +96,10 @@ namespace SixLabors.ImageSharp
// return (uint8_t)u.i;
Vector<float> x = Unsafe.Add(ref srcBase, i);
x = (x * scale) + magick;
Vector<uint> u = Vector.AsVectorUInt32(x);
Octet.OfUInt32 ii = Unsafe.As<Vector<uint>, Octet.OfUInt32>(ref u);
tempRef = x;
ref Octet.OfByte d = ref Unsafe.Add(ref destBase, i);
d.LoadFrom(ref ii);
d.LoadFrom(ref temp);
}
}
@ -118,11 +119,16 @@ namespace SixLabors.ImageSharp
ref Vector<float> srcBase = ref Unsafe.As<float, Vector<float>>(ref source.DangerousGetPinnableReference());
ref Octet.OfByte destBase = ref Unsafe.As<byte, Octet.OfByte>(ref dest.DangerousGetPinnableReference());
int n = source.Length / 8;
Vector<float> magick = new Vector<float>(32768.0f);
Vector<float> scale = new Vector<float>(255f) / new Vector<float>(256f);
int n = source.Length;
// need to copy to a temporal struct, because
// SimdUtils.Octet.OfUInt32 temp = Unsafe.As<Vector<float>, SimdUtils.Octet.OfUInt32>(ref x)
// does not work. TODO: This might be a CoreClr bug, need to ask/report
var temp = default(Octet.OfUInt32);
ref Vector<float> tempRef = ref Unsafe.As<Octet.OfUInt32, Vector<float>>(ref temp);
for (int i = 0; i < n; i++)
{
@ -134,27 +140,80 @@ namespace SixLabors.ImageSharp
x = Vector.Min(x, Vector<float>.One);
x = (x * scale) + magick;
Vector<uint> u = Vector.AsVectorUInt32(x);
Octet.OfUInt32 ii = Unsafe.As<Vector<uint>, Octet.OfUInt32>(ref u);
tempRef = x;
ref Octet.OfByte d = ref Unsafe.Add(ref destBase, i);
d.LoadFrom(ref ii);
d.LoadFrom(ref temp);
}
}
#pragma warning disable SA1132 // Do not combine fields
private static class Octet
// TODO: Replace these with T4-d library level tuples!
internal static class Octet
{
[StructLayout(LayoutKind.Explicit, Size = 8 * sizeof(uint))]
public struct OfUInt32
{
public uint V0, V1, V2, V3, V4, V5, V6, V7;
[FieldOffset(0 * sizeof(uint))]
public uint V0;
[FieldOffset(1 * sizeof(uint))]
public uint V1;
[FieldOffset(2 * sizeof(uint))]
public uint V2;
[FieldOffset(3 * sizeof(uint))]
public uint V3;
[FieldOffset(4 * sizeof(uint))]
public uint V4;
[FieldOffset(5 * sizeof(uint))]
public uint V5;
[FieldOffset(6 * sizeof(uint))]
public uint V6;
[FieldOffset(7 * sizeof(uint))]
public uint V7;
public override string ToString()
{
return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]";
}
}
[StructLayout(LayoutKind.Explicit, Size = 8)]
public struct OfByte
{
public byte V0, V1, V2, V3, V4, V5, V6, V7;
[FieldOffset(0)]
public byte V0;
[FieldOffset(1)]
public byte V1;
[FieldOffset(2)]
public byte V2;
[FieldOffset(3)]
public byte V3;
[FieldOffset(4)]
public byte V4;
[FieldOffset(5)]
public byte V5;
[FieldOffset(6)]
public byte V6;
[FieldOffset(7)]
public byte V7;
public override string ToString()
{
return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]";
}
public void LoadFrom(ref OfUInt32 i)
{

30
src/ImageSharp/PixelFormats/Rgba32.PixelOperations.cs

@ -101,7 +101,6 @@ namespace SixLabors.ImageSharp
}
int remainder = count % Vector<uint>.Count;
int alignedCount = count - remainder;
if (alignedCount > 0)
@ -117,6 +116,35 @@ namespace SixLabors.ImageSharp
}
}
internal override void PackFromVector4(Span<Vector4> sourceVectors, Span<Rgba32> destColors, int count)
{
GuardSpans(sourceVectors, nameof(sourceVectors), destColors, nameof(destColors), count);
if (!SimdUtils.IsAvx2)
{
base.PackFromVector4(sourceVectors, destColors, count);
return;
}
int remainder = count % 2;
int alignedCount = count - remainder;
if (alignedCount > 0)
{
Span<float> flatSrc = sourceVectors.Slice(0, alignedCount).NonPortableCast<Vector4, float>();
Span<byte> flatDest = destColors.NonPortableCast<Rgba32, byte>();
SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows(flatSrc, flatDest);
}
if (remainder > 0)
{
// actually: remainder == 1
int lastIdx = count - 1;
destColors[lastIdx].PackFromVector4(sourceVectors[lastIdx]);
}
}
/// <inheritdoc />
internal override void PackFromRgba32(Span<Rgba32> source, Span<Rgba32> destPixels, int count)
{

65
tests/ImageSharp.Benchmarks/Color/Bulk/PackFromVector4.cs

@ -0,0 +1,65 @@
// ReSharper disable InconsistentNaming
namespace SixLabors.ImageSharp.Benchmarks.Color.Bulk
{
using System.Numerics;
using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.PixelFormats;
[Config(typeof(Config.Short))]
public abstract class PackFromVector4<TPixel>
where TPixel : struct, IPixel<TPixel>
{
private Buffer<Vector4> source;
private Buffer<TPixel> destination;
[Params(16, 128, 512)]
public int Count { get; set; }
[GlobalSetup]
public void Setup()
{
this.destination = new Buffer<TPixel>(this.Count);
this.source = new Buffer<Vector4>(this.Count);
}
[GlobalCleanup]
public void Cleanup()
{
this.destination.Dispose();
this.source.Dispose();
}
[Benchmark(Baseline = true)]
public void PerElement()
{
Vector4[] s = this.source.Array;
TPixel[] d = this.destination.Array;
for (int i = 0; i < this.Count; i++)
{
d[i].PackFromVector4(s[i]);
}
}
[Benchmark]
public void CommonBulk()
{
new PixelOperations<TPixel>().PackFromVector4(this.source, this.destination, this.Count);
}
[Benchmark]
public void OptimizedBulk()
{
PixelOperations<TPixel>.Instance.PackFromVector4(this.source, this.destination, this.Count);
}
}
public class PackFromVector4_Rgba32 : PackFromVector4<Rgba32>
{
}
}

39
tests/ImageSharp.Tests/Common/SimdUtilsTests.cs

@ -182,6 +182,21 @@ namespace SixLabors.ImageSharp.Tests.Common
Assert.Equal(expected, actual);
}
[Fact]
private void BulkConvertNormalizedFloatToByte_Step()
{
float[] source = {0, 7, 42, 255, 0.5f, 1.1f, 2.6f, 16f};
byte[] expected = source.Select(f => (byte)Math.Round(f)).ToArray();
source = source.Select(f => f / 255f).ToArray();
byte[] dest = new byte[8];
this.MagicConvert(source, dest);
Assert.Equal(expected, dest);
}
private static byte MagicConvert(float x)
{
float f = 32768.0f + x;
@ -189,6 +204,30 @@ namespace SixLabors.ImageSharp.Tests.Common
return (byte)i;
}
private void MagicConvert(Span<float> source, Span<byte> dest)
{
Vector<float> magick = new Vector<float>(32768.0f);
Vector<float> scale = new Vector<float>(255f) / new Vector<float>(256f);
Vector<float> x = source.NonPortableCast<float, Vector<float>>()[0];
x = (x * scale) + magick;
SimdUtils.Octet.OfUInt32 ii = default(SimdUtils.Octet.OfUInt32);
ref Vector<float> iiRef = ref Unsafe.As<SimdUtils.Octet.OfUInt32, Vector<float>>(ref ii);
iiRef = x;
//SimdUtils.Octet.OfUInt32 ii = Unsafe.As<Vector<float>, SimdUtils.Octet.OfUInt32>(ref x);
ref SimdUtils.Octet.OfByte d = ref dest.NonPortableCast<byte, SimdUtils.Octet.OfByte>()[0];
d.LoadFrom(ref ii);
this.Output.WriteLine(ii.ToString());
this.Output.WriteLine(d.ToString());
}
private static void AssertEvenRoundIsCorrect(Vector<float> r, Vector<float> v)
{
for (int i = 0; i < Vector<float>.Count; i++)

19
tests/ImageSharp.Tests/PixelFormats/PixelOperationsTests.cs

@ -12,10 +12,9 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats
{
public partial class PixelOperationsTests
{
public class Color32 : PixelOperationsTests<Rgba32>
public class Rgba32 : PixelOperationsTests<ImageSharp.Rgba32>
{
public Color32(ITestOutputHelper output)
public Rgba32(ITestOutputHelper output)
: base(output)
{
}
@ -26,19 +25,19 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats
[Fact]
public void IsSpecialImplementation()
{
Assert.IsType<Rgba32.PixelOperations>(PixelOperations<Rgba32>.Instance);
Assert.IsType<ImageSharp.Rgba32.PixelOperations>(PixelOperations<ImageSharp.Rgba32>.Instance);
}
[Fact]
public void ToVector4SimdAligned()
{
Rgba32[] source = CreatePixelTestData(64);
ImageSharp.Rgba32[] source = CreatePixelTestData(64);
Vector4[] expected = CreateExpectedVector4Data(source);
TestOperation(
source,
expected,
(s, d) => Rgba32.PixelOperations.ToVector4SimdAligned(s, d, 64)
(s, d) => ImageSharp.Rgba32.PixelOperations.ToVector4SimdAligned(s, d, 64)
);
}
@ -51,23 +50,23 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats
int times = 200000;
int count = 1024;
using (Buffer<Rgba32> source = new Buffer<Rgba32>(count))
using (Buffer<ImageSharp.Rgba32> source = new Buffer<ImageSharp.Rgba32>(count))
using (Buffer<Vector4> dest = new Buffer<Vector4>(count))
{
this.Measure(
times,
() =>
{
PixelOperations<Rgba32>.Instance.ToVector4(source, dest, count);
PixelOperations<ImageSharp.Rgba32>.Instance.ToVector4(source, dest, count);
});
}
}
}
public class Argb : PixelOperationsTests<Argb32>
public class Argb32 : PixelOperationsTests<ImageSharp.PixelFormats.Argb32>
{
// For 4.6 test runner MemberData does not work without redeclaring the public field in the derived test class:
public Argb(ITestOutputHelper output)
public Argb32(ITestOutputHelper output)
: base(output)
{
}

Loading…
Cancel
Save