diff --git a/src/ImageSharp/Common/Extensions/SimdUtils.cs b/src/ImageSharp/Common/Extensions/SimdUtils.cs index c9acbc9fc..cb80a672a 100644 --- a/src/ImageSharp/Common/Extensions/SimdUtils.cs +++ b/src/ImageSharp/Common/Extensions/SimdUtils.cs @@ -2,13 +2,13 @@ // Licensed under the Apache License, Version 2.0. using System; +using System.Diagnostics; using System.Numerics; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; namespace SixLabors.ImageSharp { - using System.Diagnostics; - /// /// Various extension and utility methods for and utilizing SIMD capabilities /// @@ -19,7 +19,6 @@ namespace SixLabors.ImageSharp /// public static readonly bool IsAvx2 = Vector.Count == 8 && Vector.Count == 8; - [Conditional("DEBUG")] internal static void GuardAvx2(string operation) { if (!IsAvx2) @@ -79,11 +78,16 @@ namespace SixLabors.ImageSharp ref Vector srcBase = ref Unsafe.As>(ref source.DangerousGetPinnableReference()); ref Octet.OfByte destBase = ref Unsafe.As(ref dest.DangerousGetPinnableReference()); + int n = source.Length / 8; Vector magick = new Vector(32768.0f); Vector scale = new Vector(255f) / new Vector(256f); - int n = source.Length; + // need to copy to a temporal struct, because + // SimdUtils.Octet.OfUInt32 temp = Unsafe.As, SimdUtils.Octet.OfUInt32>(ref x) + // does not work. TODO: This might be a CoreClr bug, need to ask/report + var temp = default(Octet.OfUInt32); + ref Vector tempRef = ref Unsafe.As>(ref temp); for (int i = 0; i < n; i++) { @@ -92,13 +96,10 @@ namespace SixLabors.ImageSharp // return (uint8_t)u.i; Vector x = Unsafe.Add(ref srcBase, i); x = (x * scale) + magick; - - Vector u = Vector.AsVectorUInt32(x); - - Octet.OfUInt32 ii = Unsafe.As, Octet.OfUInt32>(ref u); + tempRef = x; ref Octet.OfByte d = ref Unsafe.Add(ref destBase, i); - d.LoadFrom(ref ii); + d.LoadFrom(ref temp); } } @@ -118,11 +119,16 @@ namespace SixLabors.ImageSharp ref Vector srcBase = ref Unsafe.As>(ref source.DangerousGetPinnableReference()); ref Octet.OfByte destBase = ref Unsafe.As(ref dest.DangerousGetPinnableReference()); + int n = source.Length / 8; Vector magick = new Vector(32768.0f); Vector scale = new Vector(255f) / new Vector(256f); - int n = source.Length; + // need to copy to a temporal struct, because + // SimdUtils.Octet.OfUInt32 temp = Unsafe.As, SimdUtils.Octet.OfUInt32>(ref x) + // does not work. TODO: This might be a CoreClr bug, need to ask/report + var temp = default(Octet.OfUInt32); + ref Vector tempRef = ref Unsafe.As>(ref temp); for (int i = 0; i < n; i++) { @@ -134,27 +140,80 @@ namespace SixLabors.ImageSharp x = Vector.Min(x, Vector.One); x = (x * scale) + magick; - - Vector u = Vector.AsVectorUInt32(x); - - Octet.OfUInt32 ii = Unsafe.As, Octet.OfUInt32>(ref u); + tempRef = x; ref Octet.OfByte d = ref Unsafe.Add(ref destBase, i); - d.LoadFrom(ref ii); + d.LoadFrom(ref temp); } } -#pragma warning disable SA1132 // Do not combine fields - private static class Octet + // TODO: Replace these with T4-d library level tuples! + internal static class Octet { + [StructLayout(LayoutKind.Explicit, Size = 8 * sizeof(uint))] public struct OfUInt32 { - public uint V0, V1, V2, V3, V4, V5, V6, V7; + [FieldOffset(0 * sizeof(uint))] + public uint V0; + + [FieldOffset(1 * sizeof(uint))] + public uint V1; + + [FieldOffset(2 * sizeof(uint))] + public uint V2; + + [FieldOffset(3 * sizeof(uint))] + public uint V3; + + [FieldOffset(4 * sizeof(uint))] + public uint V4; + + [FieldOffset(5 * sizeof(uint))] + public uint V5; + + [FieldOffset(6 * sizeof(uint))] + public uint V6; + + [FieldOffset(7 * sizeof(uint))] + public uint V7; + + public override string ToString() + { + return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]"; + } } + [StructLayout(LayoutKind.Explicit, Size = 8)] public struct OfByte { - public byte V0, V1, V2, V3, V4, V5, V6, V7; + [FieldOffset(0)] + public byte V0; + + [FieldOffset(1)] + public byte V1; + + [FieldOffset(2)] + public byte V2; + + [FieldOffset(3)] + public byte V3; + + [FieldOffset(4)] + public byte V4; + + [FieldOffset(5)] + public byte V5; + + [FieldOffset(6)] + public byte V6; + + [FieldOffset(7)] + public byte V7; + + public override string ToString() + { + return $"[{this.V0},{this.V1},{this.V2},{this.V3},{this.V4},{this.V5},{this.V6},{this.V7}]"; + } public void LoadFrom(ref OfUInt32 i) { diff --git a/src/ImageSharp/PixelFormats/Rgba32.PixelOperations.cs b/src/ImageSharp/PixelFormats/Rgba32.PixelOperations.cs index e80f0e9b8..6f4f93d87 100644 --- a/src/ImageSharp/PixelFormats/Rgba32.PixelOperations.cs +++ b/src/ImageSharp/PixelFormats/Rgba32.PixelOperations.cs @@ -101,7 +101,6 @@ namespace SixLabors.ImageSharp } int remainder = count % Vector.Count; - int alignedCount = count - remainder; if (alignedCount > 0) @@ -117,6 +116,35 @@ namespace SixLabors.ImageSharp } } + internal override void PackFromVector4(Span sourceVectors, Span destColors, int count) + { + GuardSpans(sourceVectors, nameof(sourceVectors), destColors, nameof(destColors), count); + + if (!SimdUtils.IsAvx2) + { + base.PackFromVector4(sourceVectors, destColors, count); + return; + } + + int remainder = count % 2; + int alignedCount = count - remainder; + + if (alignedCount > 0) + { + Span flatSrc = sourceVectors.Slice(0, alignedCount).NonPortableCast(); + Span flatDest = destColors.NonPortableCast(); + + SimdUtils.BulkConvertNormalizedFloatToByteClampOverflows(flatSrc, flatDest); + } + + if (remainder > 0) + { + // actually: remainder == 1 + int lastIdx = count - 1; + destColors[lastIdx].PackFromVector4(sourceVectors[lastIdx]); + } + } + /// internal override void PackFromRgba32(Span source, Span destPixels, int count) { diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/PackFromVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/PackFromVector4.cs new file mode 100644 index 000000000..e88981959 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/PackFromVector4.cs @@ -0,0 +1,65 @@ +// ReSharper disable InconsistentNaming +namespace SixLabors.ImageSharp.Benchmarks.Color.Bulk +{ + using System.Numerics; + + using BenchmarkDotNet.Attributes; + + using SixLabors.ImageSharp.Memory; + using SixLabors.ImageSharp.PixelFormats; + + [Config(typeof(Config.Short))] + public abstract class PackFromVector4 + where TPixel : struct, IPixel + { + private Buffer source; + + private Buffer destination; + + [Params(16, 128, 512)] + public int Count { get; set; } + + [GlobalSetup] + public void Setup() + { + this.destination = new Buffer(this.Count); + this.source = new Buffer(this.Count); + } + + [GlobalCleanup] + public void Cleanup() + { + this.destination.Dispose(); + this.source.Dispose(); + } + + [Benchmark(Baseline = true)] + public void PerElement() + { + Vector4[] s = this.source.Array; + TPixel[] d = this.destination.Array; + + for (int i = 0; i < this.Count; i++) + { + d[i].PackFromVector4(s[i]); + } + } + + [Benchmark] + public void CommonBulk() + { + new PixelOperations().PackFromVector4(this.source, this.destination, this.Count); + } + + [Benchmark] + public void OptimizedBulk() + { + PixelOperations.Instance.PackFromVector4(this.source, this.destination, this.Count); + } + } + + public class PackFromVector4_Rgba32 : PackFromVector4 + { + + } +} \ No newline at end of file diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs index cb2591999..44762a243 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs @@ -182,6 +182,21 @@ namespace SixLabors.ImageSharp.Tests.Common Assert.Equal(expected, actual); } + [Fact] + private void BulkConvertNormalizedFloatToByte_Step() + { + float[] source = {0, 7, 42, 255, 0.5f, 1.1f, 2.6f, 16f}; + byte[] expected = source.Select(f => (byte)Math.Round(f)).ToArray(); + + source = source.Select(f => f / 255f).ToArray(); + + byte[] dest = new byte[8]; + + this.MagicConvert(source, dest); + + Assert.Equal(expected, dest); + } + private static byte MagicConvert(float x) { float f = 32768.0f + x; @@ -189,6 +204,30 @@ namespace SixLabors.ImageSharp.Tests.Common return (byte)i; } + private void MagicConvert(Span source, Span dest) + { + Vector magick = new Vector(32768.0f); + Vector scale = new Vector(255f) / new Vector(256f); + + Vector x = source.NonPortableCast>()[0]; + + x = (x * scale) + magick; + + SimdUtils.Octet.OfUInt32 ii = default(SimdUtils.Octet.OfUInt32); + + ref Vector iiRef = ref Unsafe.As>(ref ii); + + iiRef = x; + + //SimdUtils.Octet.OfUInt32 ii = Unsafe.As, SimdUtils.Octet.OfUInt32>(ref x); + + ref SimdUtils.Octet.OfByte d = ref dest.NonPortableCast()[0]; + d.LoadFrom(ref ii); + + this.Output.WriteLine(ii.ToString()); + this.Output.WriteLine(d.ToString()); + } + private static void AssertEvenRoundIsCorrect(Vector r, Vector v) { for (int i = 0; i < Vector.Count; i++) diff --git a/tests/ImageSharp.Tests/PixelFormats/PixelOperationsTests.cs b/tests/ImageSharp.Tests/PixelFormats/PixelOperationsTests.cs index 6a108503b..dbb9a6c24 100644 --- a/tests/ImageSharp.Tests/PixelFormats/PixelOperationsTests.cs +++ b/tests/ImageSharp.Tests/PixelFormats/PixelOperationsTests.cs @@ -12,10 +12,9 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats { public partial class PixelOperationsTests { - - public class Color32 : PixelOperationsTests + public class Rgba32 : PixelOperationsTests { - public Color32(ITestOutputHelper output) + public Rgba32(ITestOutputHelper output) : base(output) { } @@ -26,19 +25,19 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats [Fact] public void IsSpecialImplementation() { - Assert.IsType(PixelOperations.Instance); + Assert.IsType(PixelOperations.Instance); } [Fact] public void ToVector4SimdAligned() { - Rgba32[] source = CreatePixelTestData(64); + ImageSharp.Rgba32[] source = CreatePixelTestData(64); Vector4[] expected = CreateExpectedVector4Data(source); TestOperation( source, expected, - (s, d) => Rgba32.PixelOperations.ToVector4SimdAligned(s, d, 64) + (s, d) => ImageSharp.Rgba32.PixelOperations.ToVector4SimdAligned(s, d, 64) ); } @@ -51,23 +50,23 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats int times = 200000; int count = 1024; - using (Buffer source = new Buffer(count)) + using (Buffer source = new Buffer(count)) using (Buffer dest = new Buffer(count)) { this.Measure( times, () => { - PixelOperations.Instance.ToVector4(source, dest, count); + PixelOperations.Instance.ToVector4(source, dest, count); }); } } } - public class Argb : PixelOperationsTests + public class Argb32 : PixelOperationsTests { // For 4.6 test runner MemberData does not work without redeclaring the public field in the derived test class: - public Argb(ITestOutputHelper output) + public Argb32(ITestOutputHelper output) : base(output) { }