diff --git a/src/ImageSharp/Common/Extensions/SimdUtils.cs b/src/ImageSharp/Common/Extensions/SimdUtils.cs
index c9acbc9fc..cb80a672a 100644
--- a/src/ImageSharp/Common/Extensions/SimdUtils.cs
+++ b/src/ImageSharp/Common/Extensions/SimdUtils.cs
@@ -2,13 +2,13 @@
// Licensed under the Apache License, Version 2.0.
using System;
+using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
namespace SixLabors.ImageSharp
{
- using System.Diagnostics;
-
///
/// Various extension and utility methods for and utilizing SIMD capabilities
///
@@ -19,7 +19,6 @@ namespace SixLabors.ImageSharp
///
public static readonly bool IsAvx2 = Vector.Count == 8 && Vector.Count == 8;
- [Conditional("DEBUG")]
internal static void GuardAvx2(string operation)
{
if (!IsAvx2)
@@ -79,11 +78,16 @@ namespace SixLabors.ImageSharp
ref Vector srcBase = ref Unsafe.As>(ref source.DangerousGetPinnableReference());
ref Octet.OfByte destBase = ref Unsafe.As(ref dest.DangerousGetPinnableReference());
+ int n = source.Length / 8;
Vector magick = new Vector(32768.0f);
Vector scale = new Vector(255f) / new Vector(256f);
- int n = source.Length;
+ // need to copy to a temporal struct, because
+ // SimdUtils.Octet.OfUInt32 temp = Unsafe.As, SimdUtils.Octet.OfUInt32>(ref x)
+ // does not work. TODO: This might be a CoreClr bug, need to ask/report
+ var temp = default(Octet.OfUInt32);
+ ref Vector tempRef = ref Unsafe.As>(ref temp);
for (int i = 0; i < n; i++)
{
@@ -92,13 +96,10 @@ namespace SixLabors.ImageSharp
// return (uint8_t)u.i;
Vector x = Unsafe.Add(ref srcBase, i);
x = (x * scale) + magick;
-
- Vector u = Vector.AsVectorUInt32(x);
-
- Octet.OfUInt32 ii = Unsafe.As, Octet.OfUInt32>(ref u);
+ tempRef = x;
ref Octet.OfByte d = ref Unsafe.Add(ref destBase, i);
- d.LoadFrom(ref ii);
+ d.LoadFrom(ref temp);
}
}
@@ -118,11 +119,16 @@ namespace SixLabors.ImageSharp
ref Vector srcBase = ref Unsafe.As>(ref source.DangerousGetPinnableReference());
ref Octet.OfByte destBase = ref Unsafe.As(ref dest.DangerousGetPinnableReference());
+ int n = source.Length / 8;
Vector magick = new Vector(32768.0f);
Vector scale = new Vector(255f) / new Vector(256f);
- int n = source.Length;
+ // need to copy to a temporal struct, because
+ // SimdUtils.Octet.OfUInt32 temp = Unsafe.As, SimdUtils.Octet.OfUInt32>(ref x)
+ // does not work. TODO: This might be a CoreClr bug, need to ask/report
+ var temp = default(Octet.OfUInt32);
+ ref Vector tempRef = ref Unsafe.As>(ref temp);
for (int i = 0; i < n; i++)
{
@@ -134,27 +140,80 @@ namespace SixLabors.ImageSharp
x = Vector.Min(x, Vector.One);
x = (x * scale) + magick;
-
- Vector u = Vector.AsVectorUInt32(x);
-
- Octet.OfUInt32 ii = Unsafe.As, Octet.OfUInt32>(ref u);
+ tempRef = x;
ref Octet.OfByte d = ref Unsafe.Add(ref destBase, i);
- d.LoadFrom(ref ii);
+ d.LoadFrom(ref temp);
}
}
-#pragma warning disable SA1132 // Do not combine fields
- private static class Octet
+ // TODO: Replace these with T4-d library level tuples!
+ internal static class Octet
{
+ [StructLayout(LayoutKind.Explicit, Size = 8 * sizeof(uint))]
public struct OfUInt32
{
- public uint V0, V1, V2, V3, V4, V5, V6, V7;
+ [FieldOffset(0 * sizeof(uint))]
+ public uint V0;
+
+ [FieldOffset(1 * sizeof(uint))]
+ public uint V1;
+
+ [FieldOffset(2 * sizeof(uint))]
+ public uint V2;
+
+ [FieldOffset(3 * sizeof(uint))]
+ public uint V3;
+
+ [FieldOffset(4 * sizeof(uint))]
+ public uint V4;
+
+ [FieldOffset(5 * sizeof(uint))]
+ public uint V5;
+
+ [FieldOffset(6 * sizeof(uint))]
+ public uint V6;
+
+ [FieldOffset(7 * sizeof(uint))]
+ public uint V7;
+
+ public override string ToString()
+ {
+ return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]";
+ }
}
+ [StructLayout(LayoutKind.Explicit, Size = 8)]
public struct OfByte
{
- public byte V0, V1, V2, V3, V4, V5, V6, V7;
+ [FieldOffset(0)]
+ public byte V0;
+
+ [FieldOffset(1)]
+ public byte V1;
+
+ [FieldOffset(2)]
+ public byte V2;
+
+ [FieldOffset(3)]
+ public byte V3;
+
+ [FieldOffset(4)]
+ public byte V4;
+
+ [FieldOffset(5)]
+ public byte V5;
+
+ [FieldOffset(6)]
+ public byte V6;
+
+ [FieldOffset(7)]
+ public byte V7;
+
+ public override string ToString()
+ {
+ return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]";
+ }
public void LoadFrom(ref OfUInt32 i)
{
diff --git a/src/ImageSharp/PixelFormats/Rgba32.PixelOperations.cs b/src/ImageSharp/PixelFormats/Rgba32.PixelOperations.cs
index e80f0e9b8..6f4f93d87 100644
--- a/src/ImageSharp/PixelFormats/Rgba32.PixelOperations.cs
+++ b/src/ImageSharp/PixelFormats/Rgba32.PixelOperations.cs
@@ -101,7 +101,6 @@ namespace SixLabors.ImageSharp
}
int remainder = count % Vector.Count;
-
int alignedCount = count - remainder;
if (alignedCount > 0)
@@ -117,6 +116,35 @@ namespace SixLabors.ImageSharp
}
}
+ internal override void PackFromVector4(Span sourceVectors, Span destColors, int count)
+ {
+ GuardSpans(sourceVectors, nameof(sourceVectors), destColors, nameof(destColors), count);
+
+ if (!SimdUtils.IsAvx2)
+ {
+ base.PackFromVector4(sourceVectors, destColors, count);
+ return;
+ }
+
+ int remainder = count % 2;
+ int alignedCount = count - remainder;
+
+ if (alignedCount > 0)
+ {
+ Span flatSrc = sourceVectors.Slice(0, alignedCount).NonPortableCast();
+ Span flatDest = destColors.NonPortableCast();
+
+ SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows(flatSrc, flatDest);
+ }
+
+ if (remainder > 0)
+ {
+ // actually: remainder == 1
+ int lastIdx = count - 1;
+ destColors[lastIdx].PackFromVector4(sourceVectors[lastIdx]);
+ }
+ }
+
///
internal override void PackFromRgba32(Span source, Span destPixels, int count)
{
diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/PackFromVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/PackFromVector4.cs
new file mode 100644
index 000000000..e88981959
--- /dev/null
+++ b/tests/ImageSharp.Benchmarks/Color/Bulk/PackFromVector4.cs
@@ -0,0 +1,65 @@
+// ReSharper disable InconsistentNaming
+namespace SixLabors.ImageSharp.Benchmarks.Color.Bulk
+{
+ using System.Numerics;
+
+ using BenchmarkDotNet.Attributes;
+
+ using SixLabors.ImageSharp.Memory;
+ using SixLabors.ImageSharp.PixelFormats;
+
+ [Config(typeof(Config.Short))]
+ public abstract class PackFromVector4
+ where TPixel : struct, IPixel
+ {
+ private Buffer source;
+
+ private Buffer destination;
+
+ [Params(16, 128, 512)]
+ public int Count { get; set; }
+
+ [GlobalSetup]
+ public void Setup()
+ {
+ this.destination = new Buffer(this.Count);
+ this.source = new Buffer(this.Count);
+ }
+
+ [GlobalCleanup]
+ public void Cleanup()
+ {
+ this.destination.Dispose();
+ this.source.Dispose();
+ }
+
+ [Benchmark(Baseline = true)]
+ public void PerElement()
+ {
+ Vector4[] s = this.source.Array;
+ TPixel[] d = this.destination.Array;
+
+ for (int i = 0; i < this.Count; i++)
+ {
+ d[i].PackFromVector4(s[i]);
+ }
+ }
+
+ [Benchmark]
+ public void CommonBulk()
+ {
+ new PixelOperations().PackFromVector4(this.source, this.destination, this.Count);
+ }
+
+ [Benchmark]
+ public void OptimizedBulk()
+ {
+ PixelOperations.Instance.PackFromVector4(this.source, this.destination, this.Count);
+ }
+ }
+
+ public class PackFromVector4_Rgba32 : PackFromVector4
+ {
+
+ }
+}
\ No newline at end of file
diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs
index cb2591999..44762a243 100644
--- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs
+++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs
@@ -182,6 +182,21 @@ namespace SixLabors.ImageSharp.Tests.Common
Assert.Equal(expected, actual);
}
+ [Fact]
+ private void BulkConvertNormalizedFloatToByte_Step()
+ {
+ float[] source = {0, 7, 42, 255, 0.5f, 1.1f, 2.6f, 16f};
+ byte[] expected = source.Select(f => (byte)Math.Round(f)).ToArray();
+
+ source = source.Select(f => f / 255f).ToArray();
+
+ byte[] dest = new byte[8];
+
+ this.MagicConvert(source, dest);
+
+ Assert.Equal(expected, dest);
+ }
+
private static byte MagicConvert(float x)
{
float f = 32768.0f + x;
@@ -189,6 +204,30 @@ namespace SixLabors.ImageSharp.Tests.Common
return (byte)i;
}
+ private void MagicConvert(Span source, Span dest)
+ {
+ Vector magick = new Vector(32768.0f);
+ Vector scale = new Vector(255f) / new Vector(256f);
+
+ Vector x = source.NonPortableCast>()[0];
+
+ x = (x * scale) + magick;
+
+ SimdUtils.Octet.OfUInt32 ii = default(SimdUtils.Octet.OfUInt32);
+
+ ref Vector iiRef = ref Unsafe.As>(ref ii);
+
+ iiRef = x;
+
+ //SimdUtils.Octet.OfUInt32 ii = Unsafe.As, SimdUtils.Octet.OfUInt32>(ref x);
+
+ ref SimdUtils.Octet.OfByte d = ref dest.NonPortableCast()[0];
+ d.LoadFrom(ref ii);
+
+ this.Output.WriteLine(ii.ToString());
+ this.Output.WriteLine(d.ToString());
+ }
+
private static void AssertEvenRoundIsCorrect(Vector r, Vector v)
{
for (int i = 0; i < Vector.Count; i++)
diff --git a/tests/ImageSharp.Tests/PixelFormats/PixelOperationsTests.cs b/tests/ImageSharp.Tests/PixelFormats/PixelOperationsTests.cs
index 6a108503b..dbb9a6c24 100644
--- a/tests/ImageSharp.Tests/PixelFormats/PixelOperationsTests.cs
+++ b/tests/ImageSharp.Tests/PixelFormats/PixelOperationsTests.cs
@@ -12,10 +12,9 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats
{
public partial class PixelOperationsTests
{
-
- public class Color32 : PixelOperationsTests
+ public class Rgba32 : PixelOperationsTests
{
- public Color32(ITestOutputHelper output)
+ public Rgba32(ITestOutputHelper output)
: base(output)
{
}
@@ -26,19 +25,19 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats
[Fact]
public void IsSpecialImplementation()
{
- Assert.IsType(PixelOperations.Instance);
+ Assert.IsType(PixelOperations.Instance);
}
[Fact]
public void ToVector4SimdAligned()
{
- Rgba32[] source = CreatePixelTestData(64);
+ ImageSharp.Rgba32[] source = CreatePixelTestData(64);
Vector4[] expected = CreateExpectedVector4Data(source);
TestOperation(
source,
expected,
- (s, d) => Rgba32.PixelOperations.ToVector4SimdAligned(s, d, 64)
+ (s, d) => ImageSharp.Rgba32.PixelOperations.ToVector4SimdAligned(s, d, 64)
);
}
@@ -51,23 +50,23 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats
int times = 200000;
int count = 1024;
- using (Buffer source = new Buffer(count))
+ using (Buffer source = new Buffer(count))
using (Buffer dest = new Buffer(count))
{
this.Measure(
times,
() =>
{
- PixelOperations.Instance.ToVector4(source, dest, count);
+ PixelOperations.Instance.ToVector4(source, dest, count);
});
}
}
}
- public class Argb : PixelOperationsTests
+ public class Argb32 : PixelOperationsTests
{
// For 4.6 test runner MemberData does not work without redeclaring the public field in the derived test class:
- public Argb(ITestOutputHelper output)
+ public Argb32(ITestOutputHelper output)
: base(output)
{
}