diff --git a/src/ImageSharp/Common/Helpers/IComponentShuffle.cs b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs similarity index 56% rename from src/ImageSharp/Common/Helpers/IComponentShuffle.cs rename to src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs index e354a57b0..7687a5b95 100644 --- a/src/ImageSharp/Common/Helpers/IComponentShuffle.cs +++ b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs @@ -6,6 +6,9 @@ using System.Buffers.Binary; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +// The JIT can detect and optimize rotation idioms ROTL (Rotate Left) +// and ROTR (Rotate Right) emitting efficient CPU instructions: +// https://github.com/dotnet/coreclr/pull/1830 namespace SixLabors.ImageSharp { /// @@ -28,15 +31,32 @@ namespace SixLabors.ImageSharp void RunFallbackShuffle(ReadOnlySpan source, Span dest); } - internal readonly struct DefaultShuffle4 : IComponentShuffle + /// + internal interface IShuffle4 : IComponentShuffle { + } + + internal readonly struct DefaultShuffle4 : IShuffle4 + { + private readonly byte p3; + private readonly byte p2; + private readonly byte p1; + private readonly byte p0; + public DefaultShuffle4(byte p3, byte p2, byte p1, byte p0) - : this(SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0)) { + DebugGuard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); + DebugGuard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); + DebugGuard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); + DebugGuard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); + + this.p3 = p3; + this.p2 = p2; + this.p1 = p1; + this.p0 = p0; + this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0); } - public DefaultShuffle4(byte control) => this.Control = control; - public byte Control { get; } [MethodImpl(InliningOptions.ShortMethod)] @@ -44,12 +64,11 @@ namespace SixLabors.ImageSharp { ref byte sBase = ref MemoryMarshal.GetReference(source); ref byte dBase = ref MemoryMarshal.GetReference(dest); - SimdUtils.Shuffle.InverseMmShuffle( - this.Control, - out int p3, - out int p2, - out int p1, - out int p0); + + int p3 = this.p3; + int p2 = this.p2; + int p1 = this.p1; + int p0 = this.p0; for (int i = 0; i < source.Length; i += 4) { @@ -61,22 +80,22 @@ namespace SixLabors.ImageSharp } } - internal readonly struct WXYZShuffle4 : IComponentShuffle + internal readonly struct WXYZShuffle4 : IShuffle4 { - public byte Control => SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3); + public byte Control + { + [MethodImpl(InliningOptions.ShortMethod)] + get => SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3); + } [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) { - ReadOnlySpan s = MemoryMarshal.Cast(source); - Span d = MemoryMarshal.Cast(dest); - ref uint sBase = ref MemoryMarshal.GetReference(s); - ref uint dBase = ref MemoryMarshal.GetReference(d); - - // The JIT can detect and optimize rotation idioms ROTL (Rotate Left) - // and ROTR (Rotate Right) emitting efficient CPU instructions: - // https://github.com/dotnet/coreclr/pull/1830 - for (int i = 0; i < s.Length; i++) + ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); + ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + int n = source.Length / 4; + + for (int i = 0; i < n; i++) { uint packed = Unsafe.Add(ref sBase, i); @@ -87,19 +106,22 @@ namespace SixLabors.ImageSharp } } - internal readonly struct WZYXShuffle4 : IComponentShuffle + internal readonly struct WZYXShuffle4 : IShuffle4 { - public byte Control => SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3); + public byte Control + { + [MethodImpl(InliningOptions.ShortMethod)] + get => SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3); + } [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) { - ReadOnlySpan s = MemoryMarshal.Cast(source); - Span d = MemoryMarshal.Cast(dest); - ref uint sBase = ref MemoryMarshal.GetReference(s); - ref uint dBase = ref MemoryMarshal.GetReference(d); + ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); + ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + int n = source.Length / 4; - for (int i = 0; i < s.Length; i++) + for (int i = 0; i < n; i++) { uint packed = Unsafe.Add(ref sBase, i); @@ -110,19 +132,22 @@ namespace SixLabors.ImageSharp } } - internal readonly struct YZWXShuffle4 : IComponentShuffle + internal readonly struct YZWXShuffle4 : IShuffle4 { - public byte Control => SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1); + public byte Control + { + [MethodImpl(InliningOptions.ShortMethod)] + get => SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1); + } [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) { - ReadOnlySpan s = MemoryMarshal.Cast(source); - Span d = MemoryMarshal.Cast(dest); - ref uint sBase = ref MemoryMarshal.GetReference(s); - ref uint dBase = ref MemoryMarshal.GetReference(d); + ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); + ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + int n = source.Length / 4; - for (int i = 0; i < s.Length; i++) + for (int i = 0; i < n; i++) { uint packed = Unsafe.Add(ref sBase, i); @@ -133,19 +158,22 @@ namespace SixLabors.ImageSharp } } - internal readonly struct ZYXWShuffle4 : IComponentShuffle + internal readonly struct ZYXWShuffle4 : IShuffle4 { - public byte Control => SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2); + public byte Control + { + [MethodImpl(InliningOptions.ShortMethod)] + get => SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2); + } [MethodImpl(InliningOptions.ShortMethod)] public void RunFallbackShuffle(ReadOnlySpan source, Span dest) { - ReadOnlySpan s = MemoryMarshal.Cast(source); - Span d = MemoryMarshal.Cast(dest); - ref uint sBase = ref MemoryMarshal.GetReference(s); - ref uint dBase = ref MemoryMarshal.GetReference(d); + ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); + ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + int n = source.Length / 4; - for (int i = 0; i < s.Length; i++) + for (int i = 0; i < n; i++) { uint packed = Unsafe.Add(ref sBase, i); diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs new file mode 100644 index 000000000..0c2b1d508 --- /dev/null +++ b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs @@ -0,0 +1,103 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace SixLabors.ImageSharp +{ + /// + internal interface IPad3Shuffle4 : IComponentShuffle + { + } + + internal readonly struct DefaultPad3Shuffle4 : IPad3Shuffle4 + { + private readonly byte p3; + private readonly byte p2; + private readonly byte p1; + private readonly byte p0; + + public DefaultPad3Shuffle4(byte p3, byte p2, byte p1, byte p0) + { + DebugGuard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); + DebugGuard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); + DebugGuard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); + DebugGuard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); + + this.p3 = p3; + this.p2 = p2; + this.p1 = p1; + this.p0 = p0; + this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0); + } + + public byte Control { get; } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); + + int p3 = this.p3; + int p2 = this.p2; + int p1 = this.p1; + int p0 = this.p0; + + Span temp = stackalloc byte[4]; + ref byte t = ref MemoryMarshal.GetReference(temp); + ref uint tu = ref Unsafe.As(ref t); + + for (int i = 0, j = 0; i < source.Length; i += 3, j += 4) + { + ref var s = ref Unsafe.Add(ref sBase, i); + tu = Unsafe.As(ref s) | 0xFF000000; + + Unsafe.Add(ref dBase, j) = Unsafe.Add(ref t, p0); + Unsafe.Add(ref dBase, j + 1) = Unsafe.Add(ref t, p1); + Unsafe.Add(ref dBase, j + 2) = Unsafe.Add(ref t, p2); + Unsafe.Add(ref dBase, j + 3) = Unsafe.Add(ref t, p3); + } + } + } + + internal readonly struct XYZWPad3Shuffle4 : IPad3Shuffle4 + { + public byte Control + { + [MethodImpl(InliningOptions.ShortMethod)] + get => SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0); + } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); + + ref byte sEnd = ref Unsafe.Add(ref sBase, source.Length); + ref byte sLoopEnd = ref Unsafe.Subtract(ref sEnd, 4); + + while (Unsafe.IsAddressLessThan(ref sBase, ref sLoopEnd)) + { + Unsafe.As(ref dBase) = Unsafe.As(ref sBase) | 0xFF000000; + + sBase = ref Unsafe.Add(ref sBase, 3); + dBase = ref Unsafe.Add(ref dBase, 4); + } + + while (Unsafe.IsAddressLessThan(ref sBase, ref sEnd)) + { + Unsafe.Add(ref dBase, 0) = Unsafe.Add(ref sBase, 0); + Unsafe.Add(ref dBase, 1) = Unsafe.Add(ref sBase, 1); + Unsafe.Add(ref dBase, 2) = Unsafe.Add(ref sBase, 2); + Unsafe.Add(ref dBase, 3) = byte.MaxValue; + + sBase = ref Unsafe.Add(ref sBase, 3); + dBase = ref Unsafe.Add(ref dBase, 4); + } + } + } +} diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs new file mode 100644 index 000000000..61e99890e --- /dev/null +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs @@ -0,0 +1,53 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace SixLabors.ImageSharp +{ + /// + internal interface IShuffle3 : IComponentShuffle + { + } + + internal readonly struct DefaultShuffle3 : IShuffle3 + { + private readonly byte p2; + private readonly byte p1; + private readonly byte p0; + + public DefaultShuffle3(byte p2, byte p1, byte p0) + { + DebugGuard.MustBeBetweenOrEqualTo(p2, 0, 2, nameof(p2)); + DebugGuard.MustBeBetweenOrEqualTo(p1, 0, 2, nameof(p1)); + DebugGuard.MustBeBetweenOrEqualTo(p0, 0, 2, nameof(p0)); + + this.p2 = p2; + this.p1 = p1; + this.p0 = p0; + this.Control = SimdUtils.Shuffle.MmShuffle(3, p2, p1, p0); + } + + public byte Control { get; } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); + + int p2 = this.p2; + int p1 = this.p1; + int p0 = this.p0; + + for (int i = 0; i < source.Length; i += 3) + { + Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i); + Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i); + Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i); + } + } + } +} diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs new file mode 100644 index 000000000..86e4174f1 --- /dev/null +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs @@ -0,0 +1,101 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace SixLabors.ImageSharp +{ + /// + internal interface IShuffle4Slice3 : IComponentShuffle + { + } + + internal readonly struct DefaultShuffle4Slice3 : IShuffle4Slice3 + { + private readonly byte p2; + private readonly byte p1; + private readonly byte p0; + + public DefaultShuffle4Slice3(byte p3, byte p2, byte p1, byte p0) + { + DebugGuard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); + DebugGuard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); + DebugGuard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); + DebugGuard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); + + this.p2 = p2; + this.p1 = p1; + this.p0 = p0; + this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0); + } + + public byte Control { get; } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); + + int p2 = this.p2; + int p1 = this.p1; + int p0 = this.p0; + + for (int i = 0, j = 0; i < dest.Length; i += 3, j += 4) + { + Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + j); + Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + j); + Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + j); + } + } + } + + internal readonly struct XYZWShuffle4Slice3 : IShuffle4Slice3 + { + public byte Control + { + [MethodImpl(InliningOptions.ShortMethod)] + get => SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0); + } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); + ref Byte3 dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + + int n = source.Length / 4; + int m = ImageMaths.Modulo4(n); + int u = n - m; + + ref uint sLoopEnd = ref Unsafe.Add(ref sBase, u); + ref uint sEnd = ref Unsafe.Add(ref sBase, n); + + while (Unsafe.IsAddressLessThan(ref sBase, ref sLoopEnd)) + { + Unsafe.Add(ref dBase, 0) = Unsafe.As(ref Unsafe.Add(ref sBase, 0)); + Unsafe.Add(ref dBase, 1) = Unsafe.As(ref Unsafe.Add(ref sBase, 1)); + Unsafe.Add(ref dBase, 2) = Unsafe.As(ref Unsafe.Add(ref sBase, 2)); + Unsafe.Add(ref dBase, 3) = Unsafe.As(ref Unsafe.Add(ref sBase, 3)); + + sBase = ref Unsafe.Add(ref sBase, 4); + dBase = ref Unsafe.Add(ref dBase, 4); + } + + while (Unsafe.IsAddressLessThan(ref sBase, ref sEnd)) + { + Unsafe.Add(ref dBase, 0) = Unsafe.As(ref Unsafe.Add(ref sBase, 0)); + + sBase = ref Unsafe.Add(ref sBase, 1); + dBase = ref Unsafe.Add(ref dBase, 1); + } + } + } + + [StructLayout(LayoutKind.Explicit, Size = 3)] + internal readonly struct Byte3 + { + } +} diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 782328edd..2ea7f2c9b 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -18,6 +18,10 @@ namespace SixLabors.ImageSharp public static ReadOnlySpan PermuteMaskEvenOdd8x32 => new byte[] { 0, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0 }; + private static ReadOnlySpan ShuffleMaskPad4Nx16 => new byte[] { 0, 1, 2, 0x80, 3, 4, 5, 0x80, 6, 7, 8, 0x80, 9, 10, 11, 0x80 }; + + private static ReadOnlySpan ShuffleMaskSlice4Nx16 => new byte[] { 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 0x80, 0x80, 0x80, 0x80 }; + /// /// Shuffle single-precision (32-bit) floating-point elements in /// using the control and store the results in . @@ -26,7 +30,7 @@ namespace SixLabors.ImageSharp /// The destination span of floats. /// The byte control. [MethodImpl(InliningOptions.ShortMethod)] - public static void Shuffle4ChannelReduce( + public static void Shuffle4Reduce( ref ReadOnlySpan source, ref Span dest, byte control) @@ -41,7 +45,7 @@ namespace SixLabors.ImageSharp if (adjustedCount > 0) { - Shuffle4Channel( + Shuffle4( source.Slice(0, adjustedCount), dest.Slice(0, adjustedCount), control); @@ -53,14 +57,14 @@ namespace SixLabors.ImageSharp } /// - /// Shuffle 8-bit integers in a within 128-bit lanes in + /// Shuffle 8-bit integers within 128-bit lanes in /// using the control and store the results in . /// /// The source span of bytes. /// The destination span of bytes. /// The byte control. [MethodImpl(InliningOptions.ShortMethod)] - public static void Shuffle4ChannelReduce( + public static void Shuffle4Reduce( ref ReadOnlySpan source, ref Span dest, byte control) @@ -75,7 +79,7 @@ namespace SixLabors.ImageSharp if (adjustedCount > 0) { - Shuffle4Channel( + Shuffle4( source.Slice(0, adjustedCount), dest.Slice(0, adjustedCount), control); @@ -86,8 +90,106 @@ namespace SixLabors.ImageSharp } } + /// + /// Shuffles 8-bit integer triplets within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The byte control. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle3Reduce( + ref ReadOnlySpan source, + ref Span dest, + byte control) + { + if (Ssse3.IsSupported) + { + int remainder = source.Length % (Vector128.Count * 3); + + int adjustedCount = source.Length - remainder; + + if (adjustedCount > 0) + { + Shuffle3( + source.Slice(0, adjustedCount), + dest.Slice(0, adjustedCount), + control); + + source = source.Slice(adjustedCount); + dest = dest.Slice(adjustedCount); + } + } + } + + /// + /// Pads then shuffles 8-bit integers within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The byte control. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Pad3Shuffle4Reduce( + ref ReadOnlySpan source, + ref Span dest, + byte control) + { + if (Ssse3.IsSupported) + { + int remainder = source.Length % (Vector128.Count * 3); + + int sourceCount = source.Length - remainder; + int destCount = sourceCount * 4 / 3; + + if (sourceCount > 0) + { + Pad3Shuffle4( + source.Slice(0, sourceCount), + dest.Slice(0, destCount), + control); + + source = source.Slice(sourceCount); + dest = dest.Slice(destCount); + } + } + } + + /// + /// Shuffles then slices 8-bit integers within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The byte control. [MethodImpl(InliningOptions.ShortMethod)] - private static void Shuffle4Channel( + public static void Shuffle4Slice3Reduce( + ref ReadOnlySpan source, + ref Span dest, + byte control) + { + if (Ssse3.IsSupported) + { + int remainder = source.Length % (Vector128.Count * 4); + + int sourceCount = source.Length - remainder; + int destCount = sourceCount * 3 / 4; + + if (sourceCount > 0) + { + Shuffle4Slice3( + source.Slice(0, sourceCount), + dest.Slice(0, destCount), + control); + + source = source.Slice(sourceCount); + dest = dest.Slice(destCount); + } + } + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void Shuffle4( ReadOnlySpan source, Span dest, byte control) @@ -165,7 +267,7 @@ namespace SixLabors.ImageSharp } [MethodImpl(InliningOptions.ShortMethod)] - private static void Shuffle4Channel( + private static void Shuffle4( ReadOnlySpan source, Span dest, byte control) @@ -177,7 +279,7 @@ namespace SixLabors.ImageSharp // We can add static ROS instances if need be in the future. Span bytes = stackalloc byte[Vector256.Count]; Shuffle.MmShuffleSpan(ref bytes, control); - Vector256 vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + Vector256 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); @@ -194,17 +296,17 @@ namespace SixLabors.ImageSharp ref Vector256 vs0 = ref Unsafe.Add(ref sourceBase, i); ref Vector256 vd0 = ref Unsafe.Add(ref destBase, i); - vd0 = Avx2.Shuffle(vs0, vcm); - Unsafe.Add(ref vd0, 1) = Avx2.Shuffle(Unsafe.Add(ref vs0, 1), vcm); - Unsafe.Add(ref vd0, 2) = Avx2.Shuffle(Unsafe.Add(ref vs0, 2), vcm); - Unsafe.Add(ref vd0, 3) = Avx2.Shuffle(Unsafe.Add(ref vs0, 3), vcm); + vd0 = Avx2.Shuffle(vs0, vshuffle); + Unsafe.Add(ref vd0, 1) = Avx2.Shuffle(Unsafe.Add(ref vs0, 1), vshuffle); + Unsafe.Add(ref vd0, 2) = Avx2.Shuffle(Unsafe.Add(ref vs0, 2), vshuffle); + Unsafe.Add(ref vd0, 3) = Avx2.Shuffle(Unsafe.Add(ref vs0, 3), vshuffle); } if (m > 0) { for (int i = u; i < n; i++) { - Unsafe.Add(ref destBase, i) = Avx2.Shuffle(Unsafe.Add(ref sourceBase, i), vcm); + Unsafe.Add(ref destBase, i) = Avx2.Shuffle(Unsafe.Add(ref sourceBase, i), vshuffle); } } } @@ -213,7 +315,7 @@ namespace SixLabors.ImageSharp // Ssse3 Span bytes = stackalloc byte[Vector128.Count]; Shuffle.MmShuffleSpan(ref bytes, control); - Vector128 vcm = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); ref Vector128 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); @@ -230,22 +332,186 @@ namespace SixLabors.ImageSharp ref Vector128 vs0 = ref Unsafe.Add(ref sourceBase, i); ref Vector128 vd0 = ref Unsafe.Add(ref destBase, i); - vd0 = Ssse3.Shuffle(vs0, vcm); - Unsafe.Add(ref vd0, 1) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 1), vcm); - Unsafe.Add(ref vd0, 2) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 2), vcm); - Unsafe.Add(ref vd0, 3) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 3), vcm); + vd0 = Ssse3.Shuffle(vs0, vshuffle); + Unsafe.Add(ref vd0, 1) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 1), vshuffle); + Unsafe.Add(ref vd0, 2) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 2), vshuffle); + Unsafe.Add(ref vd0, 3) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 3), vshuffle); } if (m > 0) { for (int i = u; i < n; i++) { - Unsafe.Add(ref destBase, i) = Ssse3.Shuffle(Unsafe.Add(ref sourceBase, i), vcm); + Unsafe.Add(ref destBase, i) = Ssse3.Shuffle(Unsafe.Add(ref sourceBase, i), vshuffle); } } } } + [MethodImpl(InliningOptions.ShortMethod)] + private static void Shuffle3( + ReadOnlySpan source, + Span dest, + byte control) + { + if (Ssse3.IsSupported) + { + ref byte vmaskBase = ref MemoryMarshal.GetReference(ShuffleMaskPad4Nx16); + Vector128 vmask = Unsafe.As>(ref vmaskBase); + ref byte vmaskoBase = ref MemoryMarshal.GetReference(ShuffleMaskSlice4Nx16); + Vector128 vmasko = Unsafe.As>(ref vmaskoBase); + Vector128 vmaske = Ssse3.AlignRight(vmasko, vmasko, 12); + + Span bytes = stackalloc byte[Vector128.Count]; + Shuffle.MmShuffleSpan(ref bytes, control); + Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + + ref Vector128 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector128 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + int n = source.Length / Vector128.Count; + + for (int i = 0; i < n; i += 3) + { + ref Vector128 vs = ref Unsafe.Add(ref sourceBase, i); + + Vector128 v0 = vs; + Vector128 v1 = Unsafe.Add(ref vs, 1); + Vector128 v2 = Unsafe.Add(ref vs, 2); + Vector128 v3 = Sse2.ShiftRightLogical128BitLane(v2, 4); + + v2 = Ssse3.AlignRight(v2, v1, 8); + v1 = Ssse3.AlignRight(v1, v0, 12); + + v0 = Ssse3.Shuffle(Ssse3.Shuffle(v0, vmask), vshuffle); + v1 = Ssse3.Shuffle(Ssse3.Shuffle(v1, vmask), vshuffle); + v2 = Ssse3.Shuffle(Ssse3.Shuffle(v2, vmask), vshuffle); + v3 = Ssse3.Shuffle(Ssse3.Shuffle(v3, vmask), vshuffle); + + v0 = Ssse3.Shuffle(v0, vmaske); + v1 = Ssse3.Shuffle(v1, vmasko); + v2 = Ssse3.Shuffle(v2, vmaske); + v3 = Ssse3.Shuffle(v3, vmasko); + + v0 = Ssse3.AlignRight(v1, v0, 4); + v3 = Ssse3.AlignRight(v3, v2, 12); + + v1 = Sse2.ShiftLeftLogical128BitLane(v1, 4); + v2 = Sse2.ShiftRightLogical128BitLane(v2, 4); + + v1 = Ssse3.AlignRight(v2, v1, 8); + + ref Vector128 vd = ref Unsafe.Add(ref destBase, i); + + vd = v0; + Unsafe.Add(ref vd, 1) = v1; + Unsafe.Add(ref vd, 2) = v3; + } + } + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void Pad3Shuffle4( + ReadOnlySpan source, + Span dest, + byte control) + { + if (Ssse3.IsSupported) + { + ref byte vmaskBase = ref MemoryMarshal.GetReference(ShuffleMaskPad4Nx16); + Vector128 vmask = Unsafe.As>(ref vmaskBase); + Vector128 vfill = Vector128.Create(0xff000000ff000000ul).AsByte(); + + Span bytes = stackalloc byte[Vector128.Count]; + Shuffle.MmShuffleSpan(ref bytes, control); + Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + + ref Vector128 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector128 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + int n = source.Length / Vector128.Count; + + for (int i = 0, j = 0; i < n; i += 3, j += 4) + { + ref Vector128 v0 = ref Unsafe.Add(ref sourceBase, i); + Vector128 v1 = Unsafe.Add(ref v0, 1); + Vector128 v2 = Unsafe.Add(ref v0, 2); + Vector128 v3 = Sse2.ShiftRightLogical128BitLane(v2, 4); + + v2 = Ssse3.AlignRight(v2, v1, 8); + v1 = Ssse3.AlignRight(v1, v0, 12); + + ref Vector128 vd = ref Unsafe.Add(ref destBase, j); + + vd = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v0, vmask), vfill), vshuffle); + Unsafe.Add(ref vd, 1) = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v1, vmask), vfill), vshuffle); + Unsafe.Add(ref vd, 2) = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v2, vmask), vfill), vshuffle); + Unsafe.Add(ref vd, 3) = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v3, vmask), vfill), vshuffle); + } + } + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void Shuffle4Slice3( + ReadOnlySpan source, + Span dest, + byte control) + { + if (Ssse3.IsSupported) + { + ref byte vmaskoBase = ref MemoryMarshal.GetReference(ShuffleMaskSlice4Nx16); + Vector128 vmasko = Unsafe.As>(ref vmaskoBase); + Vector128 vmaske = Ssse3.AlignRight(vmasko, vmasko, 12); + + Span bytes = stackalloc byte[Vector128.Count]; + Shuffle.MmShuffleSpan(ref bytes, control); + Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + + ref Vector128 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector128 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + int n = source.Length / Vector128.Count; + + for (int i = 0, j = 0; i < n; i += 4, j += 3) + { + ref Vector128 vs = ref Unsafe.Add(ref sourceBase, i); + + Vector128 v0 = vs; + Vector128 v1 = Unsafe.Add(ref vs, 1); + Vector128 v2 = Unsafe.Add(ref vs, 2); + Vector128 v3 = Unsafe.Add(ref vs, 3); + + v0 = Ssse3.Shuffle(Ssse3.Shuffle(v0, vshuffle), vmaske); + v1 = Ssse3.Shuffle(Ssse3.Shuffle(v1, vshuffle), vmasko); + v2 = Ssse3.Shuffle(Ssse3.Shuffle(v2, vshuffle), vmaske); + v3 = Ssse3.Shuffle(Ssse3.Shuffle(v3, vshuffle), vmasko); + + v0 = Ssse3.AlignRight(v1, v0, 4); + v3 = Ssse3.AlignRight(v3, v2, 12); + + v1 = Sse2.ShiftLeftLogical128BitLane(v1, 4); + v2 = Sse2.ShiftRightLogical128BitLane(v2, 4); + + v1 = Ssse3.AlignRight(v2, v1, 8); + + ref Vector128 vd = ref Unsafe.Add(ref destBase, j); + + vd = v0; + Unsafe.Add(ref vd, 1) = v1; + Unsafe.Add(ref vd, 2) = v3; + } + } + } + /// /// Performs a multiplication and an addition of the . /// diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs index a4a40fb4f..07744566a 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -18,21 +18,21 @@ namespace SixLabors.ImageSharp /// The destination span of floats. /// The byte control. [MethodImpl(InliningOptions.ShortMethod)] - public static void Shuffle4Channel( + public static void Shuffle4( ReadOnlySpan source, Span dest, byte control) { - VerifyShuffleSpanInput(source, dest); + VerifyShuffle4SpanInput(source, dest); #if SUPPORTS_RUNTIME_INTRINSICS - HwIntrinsics.Shuffle4ChannelReduce(ref source, ref dest, control); + HwIntrinsics.Shuffle4Reduce(ref source, ref dest, control); #endif // Deal with the remainder: if (source.Length > 0) { - ShuffleRemainder4Channel(source, dest, control); + Shuffle4Remainder(source, dest, control); } } @@ -44,16 +44,16 @@ namespace SixLabors.ImageSharp /// The destination span of bytes. /// The type of shuffle to perform. [MethodImpl(InliningOptions.ShortMethod)] - public static void Shuffle4Channel( + public static void Shuffle4( ReadOnlySpan source, Span dest, TShuffle shuffle) - where TShuffle : struct, IComponentShuffle + where TShuffle : struct, IShuffle4 { - VerifyShuffleSpanInput(source, dest); + VerifyShuffle4SpanInput(source, dest); #if SUPPORTS_RUNTIME_INTRINSICS - HwIntrinsics.Shuffle4ChannelReduce(ref source, ref dest, shuffle.Control); + HwIntrinsics.Shuffle4Reduce(ref source, ref dest, shuffle.Control); #endif // Deal with the remainder: @@ -63,7 +63,88 @@ namespace SixLabors.ImageSharp } } - public static void ShuffleRemainder4Channel( + /// + /// Shuffle 8-bit integer triplets within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The type of shuffle to perform. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle3( + ReadOnlySpan source, + Span dest, + TShuffle shuffle) + where TShuffle : struct, IShuffle3 + { + VerifyShuffle3SpanInput(source, dest); + +#if SUPPORTS_RUNTIME_INTRINSICS + HwIntrinsics.Shuffle3Reduce(ref source, ref dest, shuffle.Control); +#endif + + // Deal with the remainder: + if (source.Length > 0) + { + shuffle.RunFallbackShuffle(source, dest); + } + } + + /// + /// Pads then shuffles 8-bit integers within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The type of shuffle to perform. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Pad3Shuffle4( + ReadOnlySpan source, + Span dest, + TShuffle shuffle) + where TShuffle : struct, IPad3Shuffle4 + { + VerifyPad3Shuffle4SpanInput(source, dest); + +#if SUPPORTS_RUNTIME_INTRINSICS + HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref dest, shuffle.Control); +#endif + + // Deal with the remainder: + if (source.Length > 0) + { + shuffle.RunFallbackShuffle(source, dest); + } + } + + /// + /// Shuffles then slices 8-bit integers within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The type of shuffle to perform. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle4Slice3( + ReadOnlySpan source, + Span dest, + TShuffle shuffle) + where TShuffle : struct, IShuffle4Slice3 + { + VerifyShuffle4Slice3SpanInput(source, dest); + +#if SUPPORTS_RUNTIME_INTRINSICS + HwIntrinsics.Shuffle4Slice3Reduce(ref source, ref dest, shuffle.Control); +#endif + + // Deal with the remainder: + if (source.Length > 0) + { + shuffle.RunFallbackShuffle(source, dest); + } + } + + private static void Shuffle4Remainder( ReadOnlySpan source, Span dest, byte control) @@ -82,7 +163,22 @@ namespace SixLabors.ImageSharp } [Conditional("DEBUG")] - private static void VerifyShuffleSpanInput(ReadOnlySpan source, Span dest) + private static void VerifyShuffle4SpanInput(ReadOnlySpan source, Span dest) + where T : struct + { + DebugGuard.IsTrue( + source.Length == dest.Length, + nameof(source), + "Input spans must be of same length!"); + + DebugGuard.IsTrue( + source.Length % 4 == 0, + nameof(source), + "Input spans must be divisable by 4!"); + } + + [Conditional("DEBUG")] + private static void VerifyShuffle3SpanInput(ReadOnlySpan source, Span dest) where T : struct { DebugGuard.IsTrue( @@ -90,10 +186,48 @@ namespace SixLabors.ImageSharp nameof(source), "Input spans must be of same length!"); + DebugGuard.IsTrue( + source.Length % 3 == 0, + nameof(source), + "Input spans must be divisable by 3!"); + } + + [Conditional("DEBUG")] + private static void VerifyPad3Shuffle4SpanInput(ReadOnlySpan source, Span dest) + { + DebugGuard.IsTrue( + source.Length % 3 == 0, + nameof(source), + "Input span must be divisable by 3!"); + + DebugGuard.IsTrue( + dest.Length % 4 == 0, + nameof(dest), + "Output span must be divisable by 4!"); + + DebugGuard.IsTrue( + source.Length == dest.Length * 3 / 4, + nameof(source), + "Input span must be 3/4 the length of the output span!"); + } + + [Conditional("DEBUG")] + private static void VerifyShuffle4Slice3SpanInput(ReadOnlySpan source, Span dest) + { DebugGuard.IsTrue( source.Length % 4 == 0, nameof(source), - "Input spans must be divisiable by 4!"); + "Input span must be divisable by 4!"); + + DebugGuard.IsTrue( + dest.Length % 3 == 0, + nameof(dest), + "Output span must be divisable by 3!"); + + DebugGuard.IsTrue( + dest.Length >= source.Length * 3 / 4, + nameof(source), + "Output span must be at least 3/4 the length of the input span!"); } public static class Shuffle diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Argb32.PixelOperations.Generated.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Argb32.PixelOperations.Generated.cs index 3f48d2acc..d30616997 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Argb32.PixelOperations.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Argb32.PixelOperations.Generated.cs @@ -106,23 +106,59 @@ namespace SixLabors.ImageSharp.PixelFormats Span dest = MemoryMarshal.Cast(destinationPixels); PixelConverter.FromBgra32.ToArgb32(source, dest); } + /// + public override void ToRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToRgb24(source, dest); + } /// - public override void ToBgr24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Argb32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Bgr24 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToArgb32(source, dest); + } + /// + public override void ToBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Argb32 sp = ref Unsafe.Add(ref sourceRef, i); - ref Bgr24 dp = ref Unsafe.Add(ref destRef, i); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToBgr24(source, dest); + } - dp.FromArgb32(sp); - } + /// + public override void FromBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToArgb32(source, dest); } /// @@ -197,24 +233,6 @@ namespace SixLabors.ImageSharp.PixelFormats } } - /// - public override void ToRgb24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) - { - Guard.NotNull(configuration, nameof(configuration)); - Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - - ref Argb32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Rgb24 destRef = ref MemoryMarshal.GetReference(destinationPixels); - - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Argb32 sp = ref Unsafe.Add(ref sourceRef, i); - ref Rgb24 dp = ref Unsafe.Add(ref destRef, i); - - dp.FromArgb32(sp); - } - } - /// public override void ToRgb48(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgr24.PixelOperations.Generated.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgr24.PixelOperations.Generated.cs index b73bb8b83..50d4942ec 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgr24.PixelOperations.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgr24.PixelOperations.Generated.cs @@ -52,146 +52,182 @@ namespace SixLabors.ImageSharp.PixelFormats { Vector4Converters.RgbaCompatible.ToVector4(configuration, this, sourcePixels, destVectors, modifiers.Remove(PixelConversionModifiers.Scale | PixelConversionModifiers.Premultiply)); } - /// - public override void ToArgb32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Argb32 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToRgba32(source, dest); + } - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Argb32 dp = ref Unsafe.Add(ref destRef, i); + /// + public override void FromRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - dp.FromBgr24(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToBgr24(source, dest); } - /// - public override void ToBgra32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Bgra32 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToArgb32(source, dest); + } - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Bgra32 dp = ref Unsafe.Add(ref destRef, i); + /// + public override void FromArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - dp.FromBgr24(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToBgr24(source, dest); } - /// - public override void ToL8(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref L8 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToBgra32(source, dest); + } - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref L8 dp = ref Unsafe.Add(ref destRef, i); + /// + public override void FromBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - dp.FromBgr24(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToBgr24(source, dest); } - /// - public override void ToL16(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref L16 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToRgb24(source, dest); + } - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref L16 dp = ref Unsafe.Add(ref destRef, i); + /// + public override void FromRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - dp.FromBgr24(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToBgr24(source, dest); } /// - public override void ToLa16(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToL8(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref La16 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ref L8 destRef = ref MemoryMarshal.GetReference(destinationPixels); for (int i = 0; i < sourcePixels.Length; i++) { ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref La16 dp = ref Unsafe.Add(ref destRef, i); + ref L8 dp = ref Unsafe.Add(ref destRef, i); dp.FromBgr24(sp); } } /// - public override void ToLa32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToL16(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref La32 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ref L16 destRef = ref MemoryMarshal.GetReference(destinationPixels); for (int i = 0; i < sourcePixels.Length; i++) { ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref La32 dp = ref Unsafe.Add(ref destRef, i); + ref L16 dp = ref Unsafe.Add(ref destRef, i); dp.FromBgr24(sp); } } /// - public override void ToRgb24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToLa16(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Rgb24 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ref La16 destRef = ref MemoryMarshal.GetReference(destinationPixels); for (int i = 0; i < sourcePixels.Length; i++) { ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Rgb24 dp = ref Unsafe.Add(ref destRef, i); + ref La16 dp = ref Unsafe.Add(ref destRef, i); dp.FromBgr24(sp); } } /// - public override void ToRgba32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToLa32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Rgba32 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ref La32 destRef = ref MemoryMarshal.GetReference(destinationPixels); for (int i = 0; i < sourcePixels.Length; i++) { ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Rgba32 dp = ref Unsafe.Add(ref destRef, i); + ref La32 dp = ref Unsafe.Add(ref destRef, i); dp.FromBgr24(sp); } diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgra32.PixelOperations.Generated.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgra32.PixelOperations.Generated.cs index 8cf2d5850..b38e5f19d 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgra32.PixelOperations.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgra32.PixelOperations.Generated.cs @@ -106,23 +106,59 @@ namespace SixLabors.ImageSharp.PixelFormats Span dest = MemoryMarshal.Cast(destinationPixels); PixelConverter.FromArgb32.ToBgra32(source, dest); } + /// + public override void ToRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToRgb24(source, dest); + } /// - public override void ToBgr24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Bgra32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Bgr24 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToBgra32(source, dest); + } + /// + public override void ToBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Bgra32 sp = ref Unsafe.Add(ref sourceRef, i); - ref Bgr24 dp = ref Unsafe.Add(ref destRef, i); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToBgr24(source, dest); + } - dp.FromBgra32(sp); - } + /// + public override void FromBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToBgra32(source, dest); } /// @@ -197,24 +233,6 @@ namespace SixLabors.ImageSharp.PixelFormats } } - /// - public override void ToRgb24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) - { - Guard.NotNull(configuration, nameof(configuration)); - Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - - ref Bgra32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Rgb24 destRef = ref MemoryMarshal.GetReference(destinationPixels); - - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Bgra32 sp = ref Unsafe.Add(ref sourceRef, i); - ref Rgb24 dp = ref Unsafe.Add(ref destRef, i); - - dp.FromBgra32(sp); - } - } - /// public override void ToRgb48(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgb24.PixelOperations.Generated.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgb24.PixelOperations.Generated.cs index 332683fc7..9a4173892 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgb24.PixelOperations.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgb24.PixelOperations.Generated.cs @@ -52,59 +52,114 @@ namespace SixLabors.ImageSharp.PixelFormats { Vector4Converters.RgbaCompatible.ToVector4(configuration, this, sourcePixels, destVectors, modifiers.Remove(PixelConversionModifiers.Scale | PixelConversionModifiers.Premultiply)); } - /// - public override void ToArgb32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Rgb24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Argb32 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToRgba32(source, dest); + } - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Rgb24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Argb32 dp = ref Unsafe.Add(ref destRef, i); + /// + public override void FromRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - dp.FromRgb24(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToRgb24(source, dest); } /// - public override void ToBgr24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Rgb24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Bgr24 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToArgb32(source, dest); + } - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Rgb24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Bgr24 dp = ref Unsafe.Add(ref destRef, i); + /// + public override void FromArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - dp.FromRgb24(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToRgb24(source, dest); + } + /// + public override void ToBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToBgra32(source, dest); } /// - public override void ToBgra32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Rgb24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Bgra32 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToRgb24(source, dest); + } + /// + public override void ToBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Rgb24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Bgra32 dp = ref Unsafe.Add(ref destRef, i); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToBgr24(source, dest); + } - dp.FromRgb24(sp); - } + /// + public override void FromBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToRgb24(source, dest); } /// @@ -179,24 +234,6 @@ namespace SixLabors.ImageSharp.PixelFormats } } - /// - public override void ToRgba32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) - { - Guard.NotNull(configuration, nameof(configuration)); - Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - - ref Rgb24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Rgba32 destRef = ref MemoryMarshal.GetReference(destinationPixels); - - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Rgb24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Rgba32 dp = ref Unsafe.Add(ref destRef, i); - - dp.FromRgb24(sp); - } - } - /// public override void ToRgb48(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgba32.PixelOperations.Generated.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgba32.PixelOperations.Generated.cs index 9a36ec29a..5b60ec10e 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgba32.PixelOperations.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgba32.PixelOperations.Generated.cs @@ -95,23 +95,59 @@ namespace SixLabors.ImageSharp.PixelFormats Span dest = MemoryMarshal.Cast(destinationPixels); PixelConverter.FromBgra32.ToRgba32(source, dest); } + /// + public override void ToRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToRgb24(source, dest); + } /// - public override void ToBgr24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Rgba32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Bgr24 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToRgba32(source, dest); + } + /// + public override void ToBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Rgba32 sp = ref Unsafe.Add(ref sourceRef, i); - ref Bgr24 dp = ref Unsafe.Add(ref destRef, i); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToBgr24(source, dest); + } - dp.FromRgba32(sp); - } + /// + public override void FromBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToRgba32(source, dest); } /// @@ -186,24 +222,6 @@ namespace SixLabors.ImageSharp.PixelFormats } } - /// - public override void ToRgb24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) - { - Guard.NotNull(configuration, nameof(configuration)); - Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - - ref Rgba32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Rgb24 destRef = ref MemoryMarshal.GetReference(destinationPixels); - - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Rgba32 sp = ref Unsafe.Add(ref sourceRef, i); - ref Rgb24 dp = ref Unsafe.Add(ref destRef, i); - - dp.FromRgba32(sp); - } - } - /// public override void ToRgb48(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/_Common.ttinclude b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/_Common.ttinclude index d8b5286cd..b728b0115 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/_Common.ttinclude +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/_Common.ttinclude @@ -17,7 +17,7 @@ using System.Runtime.InteropServices; <#+ static readonly string[] CommonPixelTypes = { "Argb32", "Bgr24", "Bgra32", "L8", "L16", "La16", "La32", "Rgb24", "Rgba32", "Rgb48", "Rgba64", "Bgra5551" }; - static readonly string[] Optimized32BitTypes = { "Rgba32", "Argb32", "Bgra32" }; + static readonly string[] OptimizedPixelTypes = { "Rgba32", "Argb32", "Bgra32", "Rgb24", "Bgr24" }; // Types with Rgba32-combatible to/from Vector4 conversion static readonly string[] Rgba32CompatibleTypes = { "Argb32", "Bgra32", "Rgb24", "Bgr24" }; @@ -148,8 +148,8 @@ using System.Runtime.InteropServices; GenerateRgba32CompatibleVector4ConversionMethods(pixelType, pixelType.EndsWith("32")); } - var matching32BitTypes = Optimized32BitTypes.Contains(pixelType) ? - Optimized32BitTypes.Where(p => p != pixelType) : + var matching32BitTypes = OptimizedPixelTypes.Contains(pixelType) ? + OptimizedPixelTypes.Where(p => p != pixelType) : Enumerable.Empty(); foreach (string destPixelType in matching32BitTypes) diff --git a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs index ab9011a5c..7215fa860 100644 --- a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs +++ b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs @@ -27,7 +27,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToArgb32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, default); + => SimdUtils.Shuffle4(source, dest, default); /// /// Converts a representing a collection of @@ -36,7 +36,25 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgra32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, default); + => SimdUtils.Shuffle4(source, dest, default); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToRgb24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Slice3(source, dest, default); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToBgr24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(3, 0, 1, 2)); } public static class FromArgb32 @@ -48,7 +66,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgba32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, default); + => SimdUtils.Shuffle4(source, dest, default); /// /// Converts a representing a collection of @@ -57,7 +75,25 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToBgra32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, default); + => SimdUtils.Shuffle4(source, dest, default); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToRgb24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(0, 3, 2, 1)); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToBgr24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(0, 1, 2, 3)); } public static class FromBgra32 @@ -69,7 +105,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToArgb32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, default); + => SimdUtils.Shuffle4(source, dest, default); /// /// Converts a representing a collection of @@ -78,7 +114,103 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils /// [MethodImpl(InliningOptions.ShortMethod)] public static void ToRgba32(ReadOnlySpan source, Span dest) - => SimdUtils.Shuffle4Channel(source, dest, default); + => SimdUtils.Shuffle4(source, dest, default); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToRgb24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(3, 0, 1, 2)); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToBgr24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Slice3(source, dest, default); + } + + public static class FromRgb24 + { + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToRgba32(ReadOnlySpan source, Span dest) + => SimdUtils.Pad3Shuffle4(source, dest, default); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToArgb32(ReadOnlySpan source, Span dest) + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(2, 1, 0, 3)); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToBgra32(ReadOnlySpan source, Span dest) + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(3, 0, 1, 2)); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToBgr24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle3(source, dest, new DefaultShuffle3(0, 1, 2)); + } + + public static class FromBgr24 + { + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToArgb32(ReadOnlySpan source, Span dest) + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(0, 1, 2, 3)); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToRgba32(ReadOnlySpan source, Span dest) + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(3, 0, 1, 2)); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToBgra32(ReadOnlySpan source, Span dest) + => SimdUtils.Pad3Shuffle4(source, dest, default); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToRgb24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle3(source, dest, new DefaultShuffle3(0, 1, 2)); } } } diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs index dc030e07a..04ca8cd65 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs @@ -30,7 +30,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk protected Configuration Configuration => Configuration.Default; // [Params(64, 2048)] - [Params(1024)] + [Params(64, 256, 2048)] public int Count { get; set; } [GlobalSetup] @@ -58,7 +58,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk } } - [Benchmark] + [Benchmark(Baseline = true)] public void PixelOperations_Base() { new PixelOperations().FromVector4Destructive(this.Configuration, this.source.GetSpan(), this.destination.GetSpan()); @@ -91,7 +91,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk SimdUtils.BasicIntrinsics256.NormalizedFloatToByteSaturate(sBytes, dFloats); } - [Benchmark(Baseline = true)] + [Benchmark] public void ExtendedIntrinsic() { Span sBytes = MemoryMarshal.Cast(this.source.GetSpan()); diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4_Rgb24.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4_Rgb24.cs new file mode 100644 index 000000000..5da6edc6b --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4_Rgb24.cs @@ -0,0 +1,55 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.PixelFormats; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.ShortClr))] + public class FromVector4_Rgb24 : FromVector4 + { + } +} + +// 2020-11-02 +// ########## +// +// BenchmarkDotNet = v0.12.1, OS = Windows 10.0.19041.572(2004 /?/ 20H1) +// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores +// .NET Core SDK=3.1.403 +// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT +// Job-XYEQXL : .NET Framework 4.8 (4.8.4250.0), X64 RyuJIT +// Job-HSXNJV : .NET Core 2.1.23 (CoreCLR 4.6.29321.03, CoreFX 4.6.29321.01), X64 RyuJIT +// Job-YUREJO : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT +// +// IterationCount=3 LaunchCount=1 WarmupCount=3 +// +// | Method | Job | Runtime | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | +// |---------------------------- |----------- |-------------- |------ |-----------:|------------:|----------:|------:|--------:|-------:|------:|------:|----------:| +// | PixelOperations_Base | Job-XYEQXL | .NET 4.7.2 | 64 | 343.2 ns | 305.91 ns | 16.77 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-XYEQXL | .NET 4.7.2 | 64 | 320.8 ns | 19.93 ns | 1.09 ns | 0.94 | 0.05 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-HSXNJV | .NET Core 2.1 | 64 | 234.3 ns | 17.98 ns | 0.99 ns | 1.00 | 0.00 | 0.0052 | - | - | 24 B | +// | PixelOperations_Specialized | Job-HSXNJV | .NET Core 2.1 | 64 | 246.0 ns | 82.34 ns | 4.51 ns | 1.05 | 0.02 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-YUREJO | .NET Core 3.1 | 64 | 222.3 ns | 39.46 ns | 2.16 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-YUREJO | .NET Core 3.1 | 64 | 243.4 ns | 33.58 ns | 1.84 ns | 1.09 | 0.01 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-XYEQXL | .NET 4.7.2 | 256 | 824.9 ns | 32.77 ns | 1.80 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-XYEQXL | .NET 4.7.2 | 256 | 967.0 ns | 39.09 ns | 2.14 ns | 1.17 | 0.01 | 0.0172 | - | - | 72 B | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-HSXNJV | .NET Core 2.1 | 256 | 756.9 ns | 94.43 ns | 5.18 ns | 1.00 | 0.00 | 0.0048 | - | - | 24 B | +// | PixelOperations_Specialized | Job-HSXNJV | .NET Core 2.1 | 256 | 1,003.3 ns | 3,192.09 ns | 174.97 ns | 1.32 | 0.22 | 0.0172 | - | - | 72 B | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-YUREJO | .NET Core 3.1 | 256 | 748.6 ns | 248.03 ns | 13.60 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-YUREJO | .NET Core 3.1 | 256 | 437.0 ns | 36.48 ns | 2.00 ns | 0.58 | 0.01 | 0.0172 | - | - | 72 B | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-XYEQXL | .NET 4.7.2 | 2048 | 5,751.6 ns | 704.24 ns | 38.60 ns | 1.00 | 0.00 | - | - | - | 24 B | +// | PixelOperations_Specialized | Job-XYEQXL | .NET 4.7.2 | 2048 | 4,391.6 ns | 718.17 ns | 39.37 ns | 0.76 | 0.00 | 0.0153 | - | - | 72 B | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-HSXNJV | .NET Core 2.1 | 2048 | 6,202.0 ns | 1,815.18 ns | 99.50 ns | 1.00 | 0.00 | - | - | - | 24 B | +// | PixelOperations_Specialized | Job-HSXNJV | .NET Core 2.1 | 2048 | 4,225.6 ns | 1,004.03 ns | 55.03 ns | 0.68 | 0.01 | 0.0153 | - | - | 72 B | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-YUREJO | .NET Core 3.1 | 2048 | 6,157.1 ns | 2,516.98 ns | 137.96 ns | 1.00 | 0.00 | - | - | - | 24 B | +// | PixelOperations_Specialized | Job-YUREJO | .NET Core 3.1 | 2048 | 1,822.7 ns | 1,764.43 ns | 96.71 ns | 0.30 | 0.02 | 0.0172 | - | - | 72 B | diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs new file mode 100644 index 000000000..4af028605 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs @@ -0,0 +1,87 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using BenchmarkDotNet.Attributes; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class Pad3Shuffle4Channel + { + private static readonly DefaultPad3Shuffle4 Control = new DefaultPad3Shuffle4(1, 0, 3, 2); + private static readonly XYZWPad3Shuffle4 ControlFast = default; + private byte[] source; + private byte[] destination; + + [GlobalSetup] + public void Setup() + { + this.source = new byte[this.Count]; + new Random(this.Count).NextBytes(this.source); + this.destination = new byte[this.Count * 4 / 3]; + } + + [Params(96, 384, 768, 1536)] + public int Count { get; set; } + + [Benchmark] + public void Pad3Shuffle4() + { + SimdUtils.Pad3Shuffle4(this.source, this.destination, Control); + } + + [Benchmark] + public void Pad3Shuffle4FastFallback() + { + SimdUtils.Pad3Shuffle4(this.source, this.destination, ControlFast); + } + } + + // 2020-10-30 + // ########## + // + // BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1) + // Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores + // .NET Core SDK=3.1.403 + // [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // + // Runtime=.NET Core 3.1 + // + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |------------------------- |------------------- |-------------------------------------------------- |------ |------------:|----------:|----------:|------------:|------:|--------:|------:|------:|------:|----------:| + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 120.64 ns | 7.190 ns | 21.200 ns | 114.26 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 96 | 23.63 ns | 0.175 ns | 0.155 ns | 23.65 ns | 0.15 | 0.01 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 96 | 25.25 ns | 0.356 ns | 0.298 ns | 25.27 ns | 0.17 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 14.80 ns | 0.358 ns | 1.032 ns | 14.64 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 2. AVX | Empty | 96 | 24.84 ns | 0.376 ns | 0.333 ns | 24.74 ns | 1.57 | 0.06 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 96 | 24.58 ns | 0.471 ns | 0.704 ns | 24.38 ns | 1.60 | 0.09 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 258.92 ns | 4.873 ns | 4.069 ns | 257.95 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 384 | 41.41 ns | 0.859 ns | 1.204 ns | 41.33 ns | 0.16 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 384 | 40.74 ns | 0.848 ns | 0.793 ns | 40.48 ns | 0.16 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 74.50 ns | 0.490 ns | 0.383 ns | 74.49 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 2. AVX | Empty | 384 | 40.74 ns | 0.624 ns | 0.584 ns | 40.72 ns | 0.55 | 0.01 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 384 | 38.28 ns | 0.534 ns | 0.417 ns | 38.22 ns | 0.51 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 503.91 ns | 6.466 ns | 6.048 ns | 501.58 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 768 | 62.86 ns | 0.332 ns | 0.277 ns | 62.80 ns | 0.12 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 768 | 64.59 ns | 0.469 ns | 0.415 ns | 64.62 ns | 0.13 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 110.51 ns | 0.592 ns | 0.554 ns | 110.33 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 2. AVX | Empty | 768 | 64.72 ns | 1.306 ns | 1.090 ns | 64.51 ns | 0.59 | 0.01 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 768 | 62.11 ns | 0.816 ns | 0.682 ns | 61.98 ns | 0.56 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 1,005.84 ns | 13.176 ns | 12.325 ns | 1,004.70 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 1536 | 110.05 ns | 0.256 ns | 0.214 ns | 110.04 ns | 0.11 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 110.23 ns | 0.545 ns | 0.483 ns | 110.09 ns | 0.11 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 220.37 ns | 1.601 ns | 1.419 ns | 220.13 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 2. AVX | Empty | 1536 | 111.54 ns | 2.173 ns | 2.901 ns | 111.27 ns | 0.51 | 0.01 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 110.23 ns | 0.456 ns | 0.427 ns | 110.25 ns | 0.50 | 0.00 | - | - | - | - | +} diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs new file mode 100644 index 000000000..3667b973e --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs @@ -0,0 +1,64 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using BenchmarkDotNet.Attributes; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class Shuffle3Channel + { + private static readonly DefaultShuffle3 Control = new DefaultShuffle3(1, 0, 2); + private byte[] source; + private byte[] destination; + + [GlobalSetup] + public void Setup() + { + this.source = new byte[this.Count]; + new Random(this.Count).NextBytes(this.source); + this.destination = new byte[this.Count]; + } + + [Params(96, 384, 768, 1536)] + public int Count { get; set; } + + [Benchmark] + public void Shuffle3() + { + SimdUtils.Shuffle3(this.source, this.destination, Control); + } + } + + // 2020-11-02 + // ########## + // + // BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1) + // Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores + // .NET Core SDK=3.1.403 + // [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // + // Runtime=.NET Core 3.1 + // + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |--------------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|----------:|------:|--------:|------:|------:|------:|----------:| + // | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 48.46 ns | 1.034 ns | 2.438 ns | 47.46 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle3 | 2. AVX | Empty | 96 | 32.42 ns | 0.537 ns | 0.476 ns | 32.34 ns | 0.66 | 0.04 | - | - | - | - | + // | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 96 | 32.51 ns | 0.373 ns | 0.349 ns | 32.56 ns | 0.66 | 0.03 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 199.04 ns | 1.512 ns | 1.180 ns | 199.17 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle3 | 2. AVX | Empty | 384 | 71.20 ns | 2.654 ns | 7.784 ns | 69.60 ns | 0.41 | 0.02 | - | - | - | - | + // | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 384 | 63.23 ns | 0.569 ns | 0.505 ns | 63.21 ns | 0.32 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 391.28 ns | 5.087 ns | 3.972 ns | 391.22 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle3 | 2. AVX | Empty | 768 | 109.12 ns | 2.149 ns | 2.010 ns | 108.66 ns | 0.28 | 0.01 | - | - | - | - | + // | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 768 | 106.51 ns | 0.734 ns | 0.613 ns | 106.56 ns | 0.27 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 773.70 ns | 5.516 ns | 4.890 ns | 772.96 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle3 | 2. AVX | Empty | 1536 | 190.41 ns | 1.090 ns | 0.851 ns | 190.38 ns | 0.25 | 0.00 | - | - | - | - | + // | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 190.94 ns | 0.985 ns | 0.769 ns | 190.85 ns | 0.25 | 0.00 | - | - | - | - | +} diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs new file mode 100644 index 000000000..9cf24ccd6 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs @@ -0,0 +1,95 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using BenchmarkDotNet.Attributes; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class Shuffle4Slice3Channel + { + private static readonly DefaultShuffle4Slice3 Control = new DefaultShuffle4Slice3(1, 0, 3, 2); + private static readonly XYZWShuffle4Slice3 ControlFast = default; + private byte[] source; + private byte[] destination; + + [GlobalSetup] + public void Setup() + { + this.source = new byte[this.Count]; + new Random(this.Count).NextBytes(this.source); + this.destination = new byte[(int)(this.Count * (3 / 4F))]; + } + + [Params(128, 256, 512, 1024, 2048)] + public int Count { get; set; } + + [Benchmark] + public void Shuffle4Slice3() + { + SimdUtils.Shuffle4Slice3(this.source, this.destination, Control); + } + + [Benchmark] + public void Shuffle4Slice3FastFallback() + { + SimdUtils.Shuffle4Slice3(this.source, this.destination, ControlFast); + } + } + + // 2020-10-29 + // ########## + // + // BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1) + // Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores + // .NET Core SDK=3.1.403 + // [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // + // Runtime=.NET Core 3.1 + // + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |--------------------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|----------:|------:|--------:|------:|------:|------:|----------:| + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 56.44 ns | 2.843 ns | 8.382 ns | 56.70 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 128 | 27.15 ns | 0.556 ns | 0.762 ns | 27.34 ns | 0.41 | 0.03 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 128 | 26.36 ns | 0.321 ns | 0.268 ns | 26.26 ns | 0.38 | 0.02 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 25.85 ns | 0.494 ns | 0.462 ns | 25.84 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 2. AVX | Empty | 128 | 26.15 ns | 0.113 ns | 0.106 ns | 26.16 ns | 1.01 | 0.02 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 128 | 25.57 ns | 0.078 ns | 0.061 ns | 25.56 ns | 0.99 | 0.02 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 97.47 ns | 0.327 ns | 0.289 ns | 97.35 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 256 | 32.61 ns | 0.107 ns | 0.095 ns | 32.62 ns | 0.33 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 256 | 33.21 ns | 0.169 ns | 0.150 ns | 33.15 ns | 0.34 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 52.34 ns | 0.779 ns | 0.729 ns | 51.94 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 2. AVX | Empty | 256 | 32.16 ns | 0.111 ns | 0.104 ns | 32.16 ns | 0.61 | 0.01 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 256 | 33.61 ns | 0.342 ns | 0.319 ns | 33.62 ns | 0.64 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 210.74 ns | 3.825 ns | 5.956 ns | 207.70 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 512 | 51.03 ns | 0.535 ns | 0.501 ns | 51.18 ns | 0.24 | 0.01 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 512 | 66.60 ns | 1.313 ns | 1.613 ns | 65.93 ns | 0.31 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 119.12 ns | 1.905 ns | 1.689 ns | 118.52 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 2. AVX | Empty | 512 | 50.33 ns | 0.382 ns | 0.339 ns | 50.41 ns | 0.42 | 0.01 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 512 | 49.25 ns | 0.555 ns | 0.492 ns | 49.26 ns | 0.41 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 423.55 ns | 4.891 ns | 4.336 ns | 423.27 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 1024 | 77.13 ns | 1.355 ns | 2.264 ns | 76.19 ns | 0.19 | 0.01 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 79.39 ns | 0.103 ns | 0.086 ns | 79.37 ns | 0.19 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 226.57 ns | 2.930 ns | 2.598 ns | 226.10 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 2. AVX | Empty | 1024 | 80.25 ns | 1.647 ns | 2.082 ns | 80.98 ns | 0.35 | 0.01 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 84.99 ns | 1.234 ns | 1.155 ns | 85.60 ns | 0.38 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 794.96 ns | 1.735 ns | 1.538 ns | 795.15 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 2048 | 128.41 ns | 0.417 ns | 0.390 ns | 128.24 ns | 0.16 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 127.24 ns | 0.294 ns | 0.229 ns | 127.23 ns | 0.16 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 382.97 ns | 1.064 ns | 0.831 ns | 382.87 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 2. AVX | Empty | 2048 | 126.93 ns | 0.382 ns | 0.339 ns | 126.94 ns | 0.33 | 0.00 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 149.36 ns | 1.875 ns | 1.754 ns | 149.33 ns | 0.39 | 0.00 | - | - | - | - | +} diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs index 749859eac..db4947001 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs @@ -26,7 +26,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk [Benchmark] public void Shuffle4Channel() { - SimdUtils.Shuffle4Channel(this.source, this.destination, default); + SimdUtils.Shuffle4(this.source, this.destination, default); } } diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs index 6f5b5001b..86b1f766e 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs @@ -10,7 +10,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk [Config(typeof(Config.HwIntrinsics_SSE_AVX))] public class ShuffleFloat4Channel { - private static readonly byte control = default(WXYZShuffle4).Control; + private static readonly byte Control = default(WXYZShuffle4).Control; private float[] source; private float[] destination; @@ -27,7 +27,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk [Benchmark] public void Shuffle4Channel() { - SimdUtils.Shuffle4Channel(this.source, this.destination, control); + SimdUtils.Shuffle4(this.source, this.destination, Control); } } diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgb24.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgb24.cs new file mode 100644 index 000000000..aecd41831 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgb24.cs @@ -0,0 +1,65 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; + +using SixLabors.ImageSharp.Memory; +using SixLabors.ImageSharp.PixelFormats; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.ShortClr))] + public class ToVector4_Rgb24 : ToVector4 + { + [Benchmark(Baseline = true)] + public void PixelOperations_Base() + { + new PixelOperations().ToVector4( + this.Configuration, + this.source.GetSpan(), + this.destination.GetSpan()); + } + } +} + +// 2020-11-02 +// ########## +// +// BenchmarkDotNet = v0.12.1, OS = Windows 10.0.19041.572(2004 /?/ 20H1) +// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores +// .NET Core SDK=3.1.403 +// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT +// Job-XYEQXL : .NET Framework 4.8 (4.8.4250.0), X64 RyuJIT +// Job-HSXNJV : .NET Core 2.1.23 (CoreCLR 4.6.29321.03, CoreFX 4.6.29321.01), X64 RyuJIT +// Job-YUREJO : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT +// +// IterationCount=3 LaunchCount=1 WarmupCount=3 +// +// | Method | Job | Runtime | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | +// |---------------------------- |----------- |-------------- |------ |-----------:|------------:|----------:|------:|--------:|-------:|------:|------:|----------:| +// | PixelOperations_Base | Job-OIBEDX | .NET 4.7.2 | 64 | 298.4 ns | 33.63 ns | 1.84 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-OIBEDX | .NET 4.7.2 | 64 | 355.5 ns | 908.51 ns | 49.80 ns | 1.19 | 0.17 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-OPAORC | .NET Core 2.1 | 64 | 220.1 ns | 13.77 ns | 0.75 ns | 1.00 | 0.00 | 0.0055 | - | - | 24 B | +// | PixelOperations_Specialized | Job-OPAORC | .NET Core 2.1 | 64 | 228.5 ns | 41.41 ns | 2.27 ns | 1.04 | 0.01 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-VPSIRL | .NET Core 3.1 | 64 | 213.6 ns | 12.47 ns | 0.68 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-VPSIRL | .NET Core 3.1 | 64 | 217.0 ns | 9.95 ns | 0.55 ns | 1.02 | 0.01 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-OIBEDX | .NET 4.7.2 | 256 | 829.0 ns | 242.93 ns | 13.32 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-OIBEDX | .NET 4.7.2 | 256 | 448.9 ns | 4.04 ns | 0.22 ns | 0.54 | 0.01 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-OPAORC | .NET Core 2.1 | 256 | 863.0 ns | 1,253.26 ns | 68.70 ns | 1.00 | 0.00 | 0.0048 | - | - | 24 B | +// | PixelOperations_Specialized | Job-OPAORC | .NET Core 2.1 | 256 | 309.2 ns | 66.16 ns | 3.63 ns | 0.36 | 0.03 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-VPSIRL | .NET Core 3.1 | 256 | 737.0 ns | 253.90 ns | 13.92 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-VPSIRL | .NET Core 3.1 | 256 | 212.3 ns | 1.07 ns | 0.06 ns | 0.29 | 0.01 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-OIBEDX | .NET 4.7.2 | 2048 | 5,625.6 ns | 404.35 ns | 22.16 ns | 1.00 | 0.00 | - | - | - | 24 B | +// | PixelOperations_Specialized | Job-OIBEDX | .NET 4.7.2 | 2048 | 1,974.1 ns | 229.84 ns | 12.60 ns | 0.35 | 0.00 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-OPAORC | .NET Core 2.1 | 2048 | 5,467.2 ns | 537.29 ns | 29.45 ns | 1.00 | 0.00 | - | - | - | 24 B | +// | PixelOperations_Specialized | Job-OPAORC | .NET Core 2.1 | 2048 | 1,985.5 ns | 4,714.23 ns | 258.40 ns | 0.36 | 0.05 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-VPSIRL | .NET Core 3.1 | 2048 | 5,888.2 ns | 1,622.23 ns | 88.92 ns | 1.00 | 0.00 | - | - | - | 24 B | +// | PixelOperations_Specialized | Job-VPSIRL | .NET Core 3.1 | 2048 | 1,165.0 ns | 191.71 ns | 10.51 ns | 0.20 | 0.00 | - | - | - | - | diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs index 06f61e617..f1bfaa4ad 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -22,7 +22,7 @@ namespace SixLabors.ImageSharp.Tests.Common TestShuffleFloat4Channel( size, - (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, control), + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, control), control); } @@ -39,56 +39,51 @@ namespace SixLabors.ImageSharp.Tests.Common static void RunTest(string serialized) { int size = FeatureTestRunner.Deserialize(serialized); - foreach (var item in ArraySizesDivisibleBy4) - { - // These cannot be expressed as a theory as you cannot - // use RemoteExecutor within generic methods nor pass - // IComponentShuffle to the generic utils method. - foreach (var count in item) - { - WXYZShuffle4 wxyz = default; - TestShuffleByte4Channel( - size, - (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, wxyz), - wxyz.Control); - - WZYXShuffle4 wzyx = default; - TestShuffleByte4Channel( - size, - (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, wzyx), - wzyx.Control); - - YZWXShuffle4 yzwx = default; - TestShuffleByte4Channel( - size, - (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, yzwx), - yzwx.Control); - - ZYXWShuffle4 zyxw = default; - TestShuffleByte4Channel( - size, - (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, zyxw), - zyxw.Control); - - var xwyz = new DefaultShuffle4(2, 1, 3, 0); - TestShuffleByte4Channel( - size, - (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, xwyz), - xwyz.Control); - - var yyyy = new DefaultShuffle4(1, 1, 1, 1); - TestShuffleByte4Channel( - size, - (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, yyyy), - yyyy.Control); - - var wwww = new DefaultShuffle4(3, 3, 3, 3); - TestShuffleByte4Channel( - size, - (s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, wwww), - wwww.Control); - } - } + + // These cannot be expressed as a theory as you cannot + // use RemoteExecutor within generic methods nor pass + // IShuffle4 to the generic utils method. + WXYZShuffle4 wxyz = default; + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wxyz), + wxyz.Control); + + WZYXShuffle4 wzyx = default; + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wzyx), + wzyx.Control); + + YZWXShuffle4 yzwx = default; + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yzwx), + yzwx.Control); + + ZYXWShuffle4 zyxw = default; + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, zyxw), + zyxw.Control); + + var xwyz = new DefaultShuffle4(2, 1, 3, 0); + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, xwyz), + xwyz.Control); + + var yyyy = new DefaultShuffle4(1, 1, 1, 1); + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yyyy), + yyyy.Control); + + var wwww = new DefaultShuffle4(3, 3, 3, 3); + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wwww), + wwww.Control); } FeatureTestRunner.RunWithHwIntrinsicsFeature( @@ -97,6 +92,132 @@ namespace SixLabors.ImageSharp.Tests.Common HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE); } + [Theory] + [MemberData(nameof(ArraySizesDivisibleBy3))] + public void BulkShuffleByte3Channel(int count) + { + static void RunTest(string serialized) + { + int size = FeatureTestRunner.Deserialize(serialized); + + // These cannot be expressed as a theory as you cannot + // use RemoteExecutor within generic methods nor pass + // IShuffle3 to the generic utils method. + var zyx = new DefaultShuffle3(0, 1, 2); + TestShuffleByte3Channel( + size, + (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, zyx), + zyx.Control); + + var xyz = new DefaultShuffle3(2, 1, 0); + TestShuffleByte3Channel( + size, + (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, xyz), + xyz.Control); + + var yyy = new DefaultShuffle3(1, 1, 1); + TestShuffleByte3Channel( + size, + (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, yyy), + yyy.Control); + + var zzz = new DefaultShuffle3(2, 2, 2); + TestShuffleByte3Channel( + size, + (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, zzz), + zzz.Control); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + count, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE); + } + + [Theory] + [MemberData(nameof(ArraySizesDivisibleBy3))] + public void BulkPad3Shuffle4Channel(int count) + { + static void RunTest(string serialized) + { + int size = FeatureTestRunner.Deserialize(serialized); + + // These cannot be expressed as a theory as you cannot + // use RemoteExecutor within generic methods nor pass + // IPad3Shuffle4 to the generic utils method. + XYZWPad3Shuffle4 xyzw = default; + TestPad3Shuffle4Channel( + size, + (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, xyzw), + xyzw.Control); + + var xwyz = new DefaultPad3Shuffle4(2, 1, 3, 0); + TestPad3Shuffle4Channel( + size, + (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, xwyz), + xwyz.Control); + + var yyyy = new DefaultPad3Shuffle4(1, 1, 1, 1); + TestPad3Shuffle4Channel( + size, + (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, yyyy), + yyyy.Control); + + var wwww = new DefaultPad3Shuffle4(3, 3, 3, 3); + TestPad3Shuffle4Channel( + size, + (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, wwww), + wwww.Control); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + count, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE); + } + + [Theory] + [MemberData(nameof(ArraySizesDivisibleBy4))] + public void BulkShuffle4Slice3Channel(int count) + { + static void RunTest(string serialized) + { + int size = FeatureTestRunner.Deserialize(serialized); + + // These cannot be expressed as a theory as you cannot + // use RemoteExecutor within generic methods nor pass + // IShuffle4Slice3 to the generic utils method. + XYZWShuffle4Slice3 xyzw = default; + TestShuffle4Slice3Channel( + size, + (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, xyzw), + xyzw.Control); + + var xwyz = new DefaultShuffle4Slice3(2, 1, 3, 0); + TestShuffle4Slice3Channel( + size, + (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, xwyz), + xwyz.Control); + + var yyyy = new DefaultShuffle4Slice3(1, 1, 1, 1); + TestShuffle4Slice3Channel( + size, + (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, yyyy), + yyyy.Control); + + var wwww = new DefaultShuffle4Slice3(3, 3, 3, 3); + TestShuffle4Slice3Channel( + size, + (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, wwww), + wwww.Control); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + count, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE); + } + private static void TestShuffleFloat4Channel( int count, Action, Memory> convert, @@ -157,5 +278,122 @@ namespace SixLabors.ImageSharp.Tests.Common Assert.Equal(expected, result); } + + private static void TestShuffleByte3Channel( + int count, + Action, Memory> convert, + byte control) + { + byte[] source = new byte[count]; + new Random(count).NextBytes(source); + var result = new byte[count]; + + byte[] expected = new byte[count]; + + SimdUtils.Shuffle.InverseMmShuffle( + control, + out int _, + out int p2, + out int p1, + out int p0); + + for (int i = 0; i < expected.Length; i += 3) + { + expected[i] = source[p0 + i]; + expected[i + 1] = source[p1 + i]; + expected[i + 2] = source[p2 + i]; + } + + convert(source, result); + + Assert.Equal(expected, result); + } + + private static void TestPad3Shuffle4Channel( + int count, + Action, Memory> convert, + byte control) + { + byte[] source = new byte[count]; + new Random(count).NextBytes(source); + + var result = new byte[count * 4 / 3]; + + byte[] expected = new byte[result.Length]; + + SimdUtils.Shuffle.InverseMmShuffle( + control, + out int p3, + out int p2, + out int p1, + out int p0); + + for (int i = 0, j = 0; i < expected.Length; i += 4, j += 3) + { + expected[p0 + i] = source[j]; + expected[p1 + i] = source[j + 1]; + expected[p2 + i] = source[j + 2]; + expected[p3 + i] = byte.MaxValue; + } + + Span temp = stackalloc byte[4]; + for (int i = 0, j = 0; i < expected.Length; i += 4, j += 3) + { + temp[0] = source[j]; + temp[1] = source[j + 1]; + temp[2] = source[j + 2]; + temp[3] = byte.MaxValue; + + expected[i] = temp[p0]; + expected[i + 1] = temp[p1]; + expected[i + 2] = temp[p2]; + expected[i + 3] = temp[p3]; + } + + convert(source, result); + + for (int i = 0; i < expected.Length; i++) + { + Assert.Equal(expected[i], result[i]); + } + + Assert.Equal(expected, result); + } + + private static void TestShuffle4Slice3Channel( + int count, + Action, Memory> convert, + byte control) + { + byte[] source = new byte[count]; + new Random(count).NextBytes(source); + + var result = new byte[count * 3 / 4]; + + byte[] expected = new byte[result.Length]; + + SimdUtils.Shuffle.InverseMmShuffle( + control, + out int _, + out int p2, + out int p1, + out int p0); + + for (int i = 0, j = 0; i < expected.Length; i += 3, j += 4) + { + expected[i] = source[p0 + j]; + expected[i + 1] = source[p1 + j]; + expected[i + 2] = source[p2 + j]; + } + + convert(source, result); + + for (int i = 0; i < expected.Length; i++) + { + Assert.Equal(expected[i], result[i]); + } + + Assert.Equal(expected, result); + } } } diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs index bddadff4d..ec09e43e5 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs @@ -163,7 +163,7 @@ namespace SixLabors.ImageSharp.Tests.Common public static readonly TheoryData ArraySizesDivisibleBy8 = new TheoryData { 0, 8, 16, 1024 }; public static readonly TheoryData ArraySizesDivisibleBy4 = new TheoryData { 0, 4, 8, 28, 1020 }; - + public static readonly TheoryData ArraySizesDivisibleBy3 = new TheoryData { 0, 3, 9, 36, 957 }; public static readonly TheoryData ArraySizesDivisibleBy32 = new TheoryData { 0, 32, 512 }; public static readonly TheoryData ArbitraryArraySizes =