Browse Source

Merge pull request #1409 from SixLabors/js/Shuffle3Channel

3 <==> 4 Channel Shuffling with Hardware Intrinsics
js/color-alpha-handling
James Jackson-South 5 years ago
committed by GitHub
parent
commit
522a91e57d
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 112
      src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs
  2. 103
      src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs
  3. 53
      src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs
  4. 101
      src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs
  5. 304
      src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
  6. 156
      src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs
  7. 72
      src/ImageSharp/PixelFormats/PixelImplementations/Generated/Argb32.PixelOperations.Generated.cs
  8. 140
      src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgr24.PixelOperations.Generated.cs
  9. 72
      src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgra32.PixelOperations.Generated.cs
  10. 129
      src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgb24.PixelOperations.Generated.cs
  11. 72
      src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgba32.PixelOperations.Generated.cs
  12. 6
      src/ImageSharp/PixelFormats/PixelImplementations/Generated/_Common.ttinclude
  13. 144
      src/ImageSharp/PixelFormats/Utils/PixelConverter.cs
  14. 6
      tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs
  15. 55
      tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4_Rgb24.cs
  16. 87
      tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs
  17. 64
      tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs
  18. 95
      tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs
  19. 2
      tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs
  20. 4
      tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs
  21. 65
      tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgb24.cs
  22. 340
      tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs
  23. 2
      tests/ImageSharp.Tests/Common/SimdUtilsTests.cs

112
src/ImageSharp/Common/Helpers/IComponentShuffle.cs → src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs

@ -6,6 +6,9 @@ using System.Buffers.Binary;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
// The JIT can detect and optimize rotation idioms ROTL (Rotate Left)
// and ROTR (Rotate Right) emitting efficient CPU instructions:
// https://github.com/dotnet/coreclr/pull/1830
namespace SixLabors.ImageSharp
{
/// <summary>
@ -28,15 +31,32 @@ namespace SixLabors.ImageSharp
void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest);
}
internal readonly struct DefaultShuffle4 : IComponentShuffle
/// <inheritdoc/>
internal interface IShuffle4 : IComponentShuffle
{
}
internal readonly struct DefaultShuffle4 : IShuffle4
{
private readonly byte p3;
private readonly byte p2;
private readonly byte p1;
private readonly byte p0;
public DefaultShuffle4(byte p3, byte p2, byte p1, byte p0)
: this(SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0))
{
DebugGuard.MustBeBetweenOrEqualTo<byte>(p3, 0, 3, nameof(p3));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p2, 0, 3, nameof(p2));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p1, 0, 3, nameof(p1));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p0, 0, 3, nameof(p0));
this.p3 = p3;
this.p2 = p2;
this.p1 = p1;
this.p0 = p0;
this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0);
}
public DefaultShuffle4(byte control) => this.Control = control;
public byte Control { get; }
[MethodImpl(InliningOptions.ShortMethod)]
@ -44,12 +64,11 @@ namespace SixLabors.ImageSharp
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);
SimdUtils.Shuffle.InverseMmShuffle(
this.Control,
out int p3,
out int p2,
out int p1,
out int p0);
int p3 = this.p3;
int p2 = this.p2;
int p1 = this.p1;
int p0 = this.p0;
for (int i = 0; i < source.Length; i += 4)
{
@ -61,22 +80,22 @@ namespace SixLabors.ImageSharp
}
}
internal readonly struct WXYZShuffle4 : IComponentShuffle
internal readonly struct WXYZShuffle4 : IShuffle4
{
public byte Control => SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3);
public byte Control
{
[MethodImpl(InliningOptions.ShortMethod)]
get => SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3);
}
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source);
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest);
ref uint sBase = ref MemoryMarshal.GetReference(s);
ref uint dBase = ref MemoryMarshal.GetReference(d);
// The JIT can detect and optimize rotation idioms ROTL (Rotate Left)
// and ROTR (Rotate Right) emitting efficient CPU instructions:
// https://github.com/dotnet/coreclr/pull/1830
for (int i = 0; i < s.Length; i++)
ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / 4;
for (int i = 0; i < n; i++)
{
uint packed = Unsafe.Add(ref sBase, i);
@ -87,19 +106,22 @@ namespace SixLabors.ImageSharp
}
}
internal readonly struct WZYXShuffle4 : IComponentShuffle
internal readonly struct WZYXShuffle4 : IShuffle4
{
public byte Control => SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3);
public byte Control
{
[MethodImpl(InliningOptions.ShortMethod)]
get => SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3);
}
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source);
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest);
ref uint sBase = ref MemoryMarshal.GetReference(s);
ref uint dBase = ref MemoryMarshal.GetReference(d);
ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / 4;
for (int i = 0; i < s.Length; i++)
for (int i = 0; i < n; i++)
{
uint packed = Unsafe.Add(ref sBase, i);
@ -110,19 +132,22 @@ namespace SixLabors.ImageSharp
}
}
internal readonly struct YZWXShuffle4 : IComponentShuffle
internal readonly struct YZWXShuffle4 : IShuffle4
{
public byte Control => SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1);
public byte Control
{
[MethodImpl(InliningOptions.ShortMethod)]
get => SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1);
}
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source);
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest);
ref uint sBase = ref MemoryMarshal.GetReference(s);
ref uint dBase = ref MemoryMarshal.GetReference(d);
ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / 4;
for (int i = 0; i < s.Length; i++)
for (int i = 0; i < n; i++)
{
uint packed = Unsafe.Add(ref sBase, i);
@ -133,19 +158,22 @@ namespace SixLabors.ImageSharp
}
}
internal readonly struct ZYXWShuffle4 : IComponentShuffle
internal readonly struct ZYXWShuffle4 : IShuffle4
{
public byte Control => SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2);
public byte Control
{
[MethodImpl(InliningOptions.ShortMethod)]
get => SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2);
}
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source);
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest);
ref uint sBase = ref MemoryMarshal.GetReference(s);
ref uint dBase = ref MemoryMarshal.GetReference(d);
ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / 4;
for (int i = 0; i < s.Length; i++)
for (int i = 0; i < n; i++)
{
uint packed = Unsafe.Add(ref sBase, i);

103
src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs

@ -0,0 +1,103 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace SixLabors.ImageSharp
{
/// <inheritdoc/>
internal interface IPad3Shuffle4 : IComponentShuffle
{
}
internal readonly struct DefaultPad3Shuffle4 : IPad3Shuffle4
{
private readonly byte p3;
private readonly byte p2;
private readonly byte p1;
private readonly byte p0;
public DefaultPad3Shuffle4(byte p3, byte p2, byte p1, byte p0)
{
DebugGuard.MustBeBetweenOrEqualTo<byte>(p3, 0, 3, nameof(p3));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p2, 0, 3, nameof(p2));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p1, 0, 3, nameof(p1));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p0, 0, 3, nameof(p0));
this.p3 = p3;
this.p2 = p2;
this.p1 = p1;
this.p0 = p0;
this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0);
}
public byte Control { get; }
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);
int p3 = this.p3;
int p2 = this.p2;
int p1 = this.p1;
int p0 = this.p0;
Span<byte> temp = stackalloc byte[4];
ref byte t = ref MemoryMarshal.GetReference(temp);
ref uint tu = ref Unsafe.As<byte, uint>(ref t);
for (int i = 0, j = 0; i < source.Length; i += 3, j += 4)
{
ref var s = ref Unsafe.Add(ref sBase, i);
tu = Unsafe.As<byte, uint>(ref s) | 0xFF000000;
Unsafe.Add(ref dBase, j) = Unsafe.Add(ref t, p0);
Unsafe.Add(ref dBase, j + 1) = Unsafe.Add(ref t, p1);
Unsafe.Add(ref dBase, j + 2) = Unsafe.Add(ref t, p2);
Unsafe.Add(ref dBase, j + 3) = Unsafe.Add(ref t, p3);
}
}
}
internal readonly struct XYZWPad3Shuffle4 : IPad3Shuffle4
{
public byte Control
{
[MethodImpl(InliningOptions.ShortMethod)]
get => SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0);
}
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);
ref byte sEnd = ref Unsafe.Add(ref sBase, source.Length);
ref byte sLoopEnd = ref Unsafe.Subtract(ref sEnd, 4);
while (Unsafe.IsAddressLessThan(ref sBase, ref sLoopEnd))
{
Unsafe.As<byte, uint>(ref dBase) = Unsafe.As<byte, uint>(ref sBase) | 0xFF000000;
sBase = ref Unsafe.Add(ref sBase, 3);
dBase = ref Unsafe.Add(ref dBase, 4);
}
while (Unsafe.IsAddressLessThan(ref sBase, ref sEnd))
{
Unsafe.Add(ref dBase, 0) = Unsafe.Add(ref sBase, 0);
Unsafe.Add(ref dBase, 1) = Unsafe.Add(ref sBase, 1);
Unsafe.Add(ref dBase, 2) = Unsafe.Add(ref sBase, 2);
Unsafe.Add(ref dBase, 3) = byte.MaxValue;
sBase = ref Unsafe.Add(ref sBase, 3);
dBase = ref Unsafe.Add(ref dBase, 4);
}
}
}
}

53
src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs

@ -0,0 +1,53 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace SixLabors.ImageSharp
{
/// <inheritdoc/>
internal interface IShuffle3 : IComponentShuffle
{
}
internal readonly struct DefaultShuffle3 : IShuffle3
{
private readonly byte p2;
private readonly byte p1;
private readonly byte p0;
public DefaultShuffle3(byte p2, byte p1, byte p0)
{
DebugGuard.MustBeBetweenOrEqualTo<byte>(p2, 0, 2, nameof(p2));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p1, 0, 2, nameof(p1));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p0, 0, 2, nameof(p0));
this.p2 = p2;
this.p1 = p1;
this.p0 = p0;
this.Control = SimdUtils.Shuffle.MmShuffle(3, p2, p1, p0);
}
public byte Control { get; }
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);
int p2 = this.p2;
int p1 = this.p1;
int p0 = this.p0;
for (int i = 0; i < source.Length; i += 3)
{
Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i);
Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i);
Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i);
}
}
}
}

101
src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs

@ -0,0 +1,101 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace SixLabors.ImageSharp
{
/// <inheritdoc/>
internal interface IShuffle4Slice3 : IComponentShuffle
{
}
internal readonly struct DefaultShuffle4Slice3 : IShuffle4Slice3
{
private readonly byte p2;
private readonly byte p1;
private readonly byte p0;
public DefaultShuffle4Slice3(byte p3, byte p2, byte p1, byte p0)
{
DebugGuard.MustBeBetweenOrEqualTo<byte>(p3, 0, 3, nameof(p3));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p2, 0, 3, nameof(p2));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p1, 0, 3, nameof(p1));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p0, 0, 3, nameof(p0));
this.p2 = p2;
this.p1 = p1;
this.p0 = p0;
this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0);
}
public byte Control { get; }
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);
int p2 = this.p2;
int p1 = this.p1;
int p0 = this.p0;
for (int i = 0, j = 0; i < dest.Length; i += 3, j += 4)
{
Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + j);
Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + j);
Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + j);
}
}
}
internal readonly struct XYZWShuffle4Slice3 : IShuffle4Slice3
{
public byte Control
{
[MethodImpl(InliningOptions.ShortMethod)]
get => SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0);
}
[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
ref Byte3 dBase = ref Unsafe.As<byte, Byte3>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / 4;
int m = ImageMaths.Modulo4(n);
int u = n - m;
ref uint sLoopEnd = ref Unsafe.Add(ref sBase, u);
ref uint sEnd = ref Unsafe.Add(ref sBase, n);
while (Unsafe.IsAddressLessThan(ref sBase, ref sLoopEnd))
{
Unsafe.Add(ref dBase, 0) = Unsafe.As<uint, Byte3>(ref Unsafe.Add(ref sBase, 0));
Unsafe.Add(ref dBase, 1) = Unsafe.As<uint, Byte3>(ref Unsafe.Add(ref sBase, 1));
Unsafe.Add(ref dBase, 2) = Unsafe.As<uint, Byte3>(ref Unsafe.Add(ref sBase, 2));
Unsafe.Add(ref dBase, 3) = Unsafe.As<uint, Byte3>(ref Unsafe.Add(ref sBase, 3));
sBase = ref Unsafe.Add(ref sBase, 4);
dBase = ref Unsafe.Add(ref dBase, 4);
}
while (Unsafe.IsAddressLessThan(ref sBase, ref sEnd))
{
Unsafe.Add(ref dBase, 0) = Unsafe.As<uint, Byte3>(ref Unsafe.Add(ref sBase, 0));
sBase = ref Unsafe.Add(ref sBase, 1);
dBase = ref Unsafe.Add(ref dBase, 1);
}
}
}
[StructLayout(LayoutKind.Explicit, Size = 3)]
internal readonly struct Byte3
{
}
}

304
src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs

@ -18,6 +18,10 @@ namespace SixLabors.ImageSharp
public static ReadOnlySpan<byte> PermuteMaskEvenOdd8x32 => new byte[] { 0, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0 };
private static ReadOnlySpan<byte> ShuffleMaskPad4Nx16 => new byte[] { 0, 1, 2, 0x80, 3, 4, 5, 0x80, 6, 7, 8, 0x80, 9, 10, 11, 0x80 };
private static ReadOnlySpan<byte> ShuffleMaskSlice4Nx16 => new byte[] { 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 0x80, 0x80, 0x80, 0x80 };
/// <summary>
/// Shuffle single-precision (32-bit) floating-point elements in <paramref name="source"/>
/// using the control and store the results in <paramref name="dest"/>.
@ -26,7 +30,7 @@ namespace SixLabors.ImageSharp
/// <param name="dest">The destination span of floats.</param>
/// <param name="control">The byte control.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public static void Shuffle4ChannelReduce(
public static void Shuffle4Reduce(
ref ReadOnlySpan<float> source,
ref Span<float> dest,
byte control)
@ -41,7 +45,7 @@ namespace SixLabors.ImageSharp
if (adjustedCount > 0)
{
Shuffle4Channel(
Shuffle4(
source.Slice(0, adjustedCount),
dest.Slice(0, adjustedCount),
control);
@ -53,14 +57,14 @@ namespace SixLabors.ImageSharp
}
/// <summary>
/// Shuffle 8-bit integers in a within 128-bit lanes in <paramref name="source"/>
/// Shuffle 8-bit integers within 128-bit lanes in <paramref name="source"/>
/// using the control and store the results in <paramref name="dest"/>.
/// </summary>
/// <param name="source">The source span of bytes.</param>
/// <param name="dest">The destination span of bytes.</param>
/// <param name="control">The byte control.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public static void Shuffle4ChannelReduce(
public static void Shuffle4Reduce(
ref ReadOnlySpan<byte> source,
ref Span<byte> dest,
byte control)
@ -75,7 +79,7 @@ namespace SixLabors.ImageSharp
if (adjustedCount > 0)
{
Shuffle4Channel(
Shuffle4(
source.Slice(0, adjustedCount),
dest.Slice(0, adjustedCount),
control);
@ -86,8 +90,106 @@ namespace SixLabors.ImageSharp
}
}
/// <summary>
/// Shuffles 8-bit integer triplets within 128-bit lanes in <paramref name="source"/>
/// using the control and store the results in <paramref name="dest"/>.
/// </summary>
/// <param name="source">The source span of bytes.</param>
/// <param name="dest">The destination span of bytes.</param>
/// <param name="control">The byte control.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public static void Shuffle3Reduce(
ref ReadOnlySpan<byte> source,
ref Span<byte> dest,
byte control)
{
if (Ssse3.IsSupported)
{
int remainder = source.Length % (Vector128<byte>.Count * 3);
int adjustedCount = source.Length - remainder;
if (adjustedCount > 0)
{
Shuffle3(
source.Slice(0, adjustedCount),
dest.Slice(0, adjustedCount),
control);
source = source.Slice(adjustedCount);
dest = dest.Slice(adjustedCount);
}
}
}
/// <summary>
/// Pads then shuffles 8-bit integers within 128-bit lanes in <paramref name="source"/>
/// using the control and store the results in <paramref name="dest"/>.
/// </summary>
/// <param name="source">The source span of bytes.</param>
/// <param name="dest">The destination span of bytes.</param>
/// <param name="control">The byte control.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public static void Pad3Shuffle4Reduce(
ref ReadOnlySpan<byte> source,
ref Span<byte> dest,
byte control)
{
if (Ssse3.IsSupported)
{
int remainder = source.Length % (Vector128<byte>.Count * 3);
int sourceCount = source.Length - remainder;
int destCount = sourceCount * 4 / 3;
if (sourceCount > 0)
{
Pad3Shuffle4(
source.Slice(0, sourceCount),
dest.Slice(0, destCount),
control);
source = source.Slice(sourceCount);
dest = dest.Slice(destCount);
}
}
}
/// <summary>
/// Shuffles then slices 8-bit integers within 128-bit lanes in <paramref name="source"/>
/// using the control and store the results in <paramref name="dest"/>.
/// </summary>
/// <param name="source">The source span of bytes.</param>
/// <param name="dest">The destination span of bytes.</param>
/// <param name="control">The byte control.</param>
[MethodImpl(InliningOptions.ShortMethod)]
private static void Shuffle4Channel(
public static void Shuffle4Slice3Reduce(
ref ReadOnlySpan<byte> source,
ref Span<byte> dest,
byte control)
{
if (Ssse3.IsSupported)
{
int remainder = source.Length % (Vector128<byte>.Count * 4);
int sourceCount = source.Length - remainder;
int destCount = sourceCount * 3 / 4;
if (sourceCount > 0)
{
Shuffle4Slice3(
source.Slice(0, sourceCount),
dest.Slice(0, destCount),
control);
source = source.Slice(sourceCount);
dest = dest.Slice(destCount);
}
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void Shuffle4(
ReadOnlySpan<float> source,
Span<float> dest,
byte control)
@ -165,7 +267,7 @@ namespace SixLabors.ImageSharp
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void Shuffle4Channel(
private static void Shuffle4(
ReadOnlySpan<byte> source,
Span<byte> dest,
byte control)
@ -177,7 +279,7 @@ namespace SixLabors.ImageSharp
// We can add static ROS instances if need be in the future.
Span<byte> bytes = stackalloc byte[Vector256<byte>.Count];
Shuffle.MmShuffleSpan(ref bytes, control);
Vector256<byte> vcm = Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(bytes));
Vector256<byte> vshuffle = Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(bytes));
ref Vector256<byte> sourceBase =
ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(source));
@ -194,17 +296,17 @@ namespace SixLabors.ImageSharp
ref Vector256<byte> vs0 = ref Unsafe.Add(ref sourceBase, i);
ref Vector256<byte> vd0 = ref Unsafe.Add(ref destBase, i);
vd0 = Avx2.Shuffle(vs0, vcm);
Unsafe.Add(ref vd0, 1) = Avx2.Shuffle(Unsafe.Add(ref vs0, 1), vcm);
Unsafe.Add(ref vd0, 2) = Avx2.Shuffle(Unsafe.Add(ref vs0, 2), vcm);
Unsafe.Add(ref vd0, 3) = Avx2.Shuffle(Unsafe.Add(ref vs0, 3), vcm);
vd0 = Avx2.Shuffle(vs0, vshuffle);
Unsafe.Add(ref vd0, 1) = Avx2.Shuffle(Unsafe.Add(ref vs0, 1), vshuffle);
Unsafe.Add(ref vd0, 2) = Avx2.Shuffle(Unsafe.Add(ref vs0, 2), vshuffle);
Unsafe.Add(ref vd0, 3) = Avx2.Shuffle(Unsafe.Add(ref vs0, 3), vshuffle);
}
if (m > 0)
{
for (int i = u; i < n; i++)
{
Unsafe.Add(ref destBase, i) = Avx2.Shuffle(Unsafe.Add(ref sourceBase, i), vcm);
Unsafe.Add(ref destBase, i) = Avx2.Shuffle(Unsafe.Add(ref sourceBase, i), vshuffle);
}
}
}
@ -213,7 +315,7 @@ namespace SixLabors.ImageSharp
// Ssse3
Span<byte> bytes = stackalloc byte[Vector128<byte>.Count];
Shuffle.MmShuffleSpan(ref bytes, control);
Vector128<byte> vcm = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(bytes));
Vector128<byte> vshuffle = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(bytes));
ref Vector128<byte> sourceBase =
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(source));
@ -230,22 +332,186 @@ namespace SixLabors.ImageSharp
ref Vector128<byte> vs0 = ref Unsafe.Add(ref sourceBase, i);
ref Vector128<byte> vd0 = ref Unsafe.Add(ref destBase, i);
vd0 = Ssse3.Shuffle(vs0, vcm);
Unsafe.Add(ref vd0, 1) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 1), vcm);
Unsafe.Add(ref vd0, 2) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 2), vcm);
Unsafe.Add(ref vd0, 3) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 3), vcm);
vd0 = Ssse3.Shuffle(vs0, vshuffle);
Unsafe.Add(ref vd0, 1) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 1), vshuffle);
Unsafe.Add(ref vd0, 2) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 2), vshuffle);
Unsafe.Add(ref vd0, 3) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 3), vshuffle);
}
if (m > 0)
{
for (int i = u; i < n; i++)
{
Unsafe.Add(ref destBase, i) = Ssse3.Shuffle(Unsafe.Add(ref sourceBase, i), vcm);
Unsafe.Add(ref destBase, i) = Ssse3.Shuffle(Unsafe.Add(ref sourceBase, i), vshuffle);
}
}
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void Shuffle3(
ReadOnlySpan<byte> source,
Span<byte> dest,
byte control)
{
if (Ssse3.IsSupported)
{
ref byte vmaskBase = ref MemoryMarshal.GetReference(ShuffleMaskPad4Nx16);
Vector128<byte> vmask = Unsafe.As<byte, Vector128<byte>>(ref vmaskBase);
ref byte vmaskoBase = ref MemoryMarshal.GetReference(ShuffleMaskSlice4Nx16);
Vector128<byte> vmasko = Unsafe.As<byte, Vector128<byte>>(ref vmaskoBase);
Vector128<byte> vmaske = Ssse3.AlignRight(vmasko, vmasko, 12);
Span<byte> bytes = stackalloc byte[Vector128<byte>.Count];
Shuffle.MmShuffleSpan(ref bytes, control);
Vector128<byte> vshuffle = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(bytes));
ref Vector128<byte> sourceBase =
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(source));
ref Vector128<byte> destBase =
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / Vector128<byte>.Count;
for (int i = 0; i < n; i += 3)
{
ref Vector128<byte> vs = ref Unsafe.Add(ref sourceBase, i);
Vector128<byte> v0 = vs;
Vector128<byte> v1 = Unsafe.Add(ref vs, 1);
Vector128<byte> v2 = Unsafe.Add(ref vs, 2);
Vector128<byte> v3 = Sse2.ShiftRightLogical128BitLane(v2, 4);
v2 = Ssse3.AlignRight(v2, v1, 8);
v1 = Ssse3.AlignRight(v1, v0, 12);
v0 = Ssse3.Shuffle(Ssse3.Shuffle(v0, vmask), vshuffle);
v1 = Ssse3.Shuffle(Ssse3.Shuffle(v1, vmask), vshuffle);
v2 = Ssse3.Shuffle(Ssse3.Shuffle(v2, vmask), vshuffle);
v3 = Ssse3.Shuffle(Ssse3.Shuffle(v3, vmask), vshuffle);
v0 = Ssse3.Shuffle(v0, vmaske);
v1 = Ssse3.Shuffle(v1, vmasko);
v2 = Ssse3.Shuffle(v2, vmaske);
v3 = Ssse3.Shuffle(v3, vmasko);
v0 = Ssse3.AlignRight(v1, v0, 4);
v3 = Ssse3.AlignRight(v3, v2, 12);
v1 = Sse2.ShiftLeftLogical128BitLane(v1, 4);
v2 = Sse2.ShiftRightLogical128BitLane(v2, 4);
v1 = Ssse3.AlignRight(v2, v1, 8);
ref Vector128<byte> vd = ref Unsafe.Add(ref destBase, i);
vd = v0;
Unsafe.Add(ref vd, 1) = v1;
Unsafe.Add(ref vd, 2) = v3;
}
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void Pad3Shuffle4(
ReadOnlySpan<byte> source,
Span<byte> dest,
byte control)
{
if (Ssse3.IsSupported)
{
ref byte vmaskBase = ref MemoryMarshal.GetReference(ShuffleMaskPad4Nx16);
Vector128<byte> vmask = Unsafe.As<byte, Vector128<byte>>(ref vmaskBase);
Vector128<byte> vfill = Vector128.Create(0xff000000ff000000ul).AsByte();
Span<byte> bytes = stackalloc byte[Vector128<byte>.Count];
Shuffle.MmShuffleSpan(ref bytes, control);
Vector128<byte> vshuffle = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(bytes));
ref Vector128<byte> sourceBase =
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(source));
ref Vector128<byte> destBase =
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / Vector128<byte>.Count;
for (int i = 0, j = 0; i < n; i += 3, j += 4)
{
ref Vector128<byte> v0 = ref Unsafe.Add(ref sourceBase, i);
Vector128<byte> v1 = Unsafe.Add(ref v0, 1);
Vector128<byte> v2 = Unsafe.Add(ref v0, 2);
Vector128<byte> v3 = Sse2.ShiftRightLogical128BitLane(v2, 4);
v2 = Ssse3.AlignRight(v2, v1, 8);
v1 = Ssse3.AlignRight(v1, v0, 12);
ref Vector128<byte> vd = ref Unsafe.Add(ref destBase, j);
vd = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v0, vmask), vfill), vshuffle);
Unsafe.Add(ref vd, 1) = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v1, vmask), vfill), vshuffle);
Unsafe.Add(ref vd, 2) = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v2, vmask), vfill), vshuffle);
Unsafe.Add(ref vd, 3) = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v3, vmask), vfill), vshuffle);
}
}
}
[MethodImpl(InliningOptions.ShortMethod)]
private static void Shuffle4Slice3(
ReadOnlySpan<byte> source,
Span<byte> dest,
byte control)
{
if (Ssse3.IsSupported)
{
ref byte vmaskoBase = ref MemoryMarshal.GetReference(ShuffleMaskSlice4Nx16);
Vector128<byte> vmasko = Unsafe.As<byte, Vector128<byte>>(ref vmaskoBase);
Vector128<byte> vmaske = Ssse3.AlignRight(vmasko, vmasko, 12);
Span<byte> bytes = stackalloc byte[Vector128<byte>.Count];
Shuffle.MmShuffleSpan(ref bytes, control);
Vector128<byte> vshuffle = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(bytes));
ref Vector128<byte> sourceBase =
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(source));
ref Vector128<byte> destBase =
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / Vector128<byte>.Count;
for (int i = 0, j = 0; i < n; i += 4, j += 3)
{
ref Vector128<byte> vs = ref Unsafe.Add(ref sourceBase, i);
Vector128<byte> v0 = vs;
Vector128<byte> v1 = Unsafe.Add(ref vs, 1);
Vector128<byte> v2 = Unsafe.Add(ref vs, 2);
Vector128<byte> v3 = Unsafe.Add(ref vs, 3);
v0 = Ssse3.Shuffle(Ssse3.Shuffle(v0, vshuffle), vmaske);
v1 = Ssse3.Shuffle(Ssse3.Shuffle(v1, vshuffle), vmasko);
v2 = Ssse3.Shuffle(Ssse3.Shuffle(v2, vshuffle), vmaske);
v3 = Ssse3.Shuffle(Ssse3.Shuffle(v3, vshuffle), vmasko);
v0 = Ssse3.AlignRight(v1, v0, 4);
v3 = Ssse3.AlignRight(v3, v2, 12);
v1 = Sse2.ShiftLeftLogical128BitLane(v1, 4);
v2 = Sse2.ShiftRightLogical128BitLane(v2, 4);
v1 = Ssse3.AlignRight(v2, v1, 8);
ref Vector128<byte> vd = ref Unsafe.Add(ref destBase, j);
vd = v0;
Unsafe.Add(ref vd, 1) = v1;
Unsafe.Add(ref vd, 2) = v3;
}
}
}
/// <summary>
/// Performs a multiplication and an addition of the <see cref="Vector256{T}"/>.
/// </summary>

156
src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs

@ -18,21 +18,21 @@ namespace SixLabors.ImageSharp
/// <param name="dest">The destination span of floats.</param>
/// <param name="control">The byte control.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public static void Shuffle4Channel(
public static void Shuffle4(
ReadOnlySpan<float> source,
Span<float> dest,
byte control)
{
VerifyShuffleSpanInput(source, dest);
VerifyShuffle4SpanInput(source, dest);
#if SUPPORTS_RUNTIME_INTRINSICS
HwIntrinsics.Shuffle4ChannelReduce(ref source, ref dest, control);
HwIntrinsics.Shuffle4Reduce(ref source, ref dest, control);
#endif
// Deal with the remainder:
if (source.Length > 0)
{
ShuffleRemainder4Channel(source, dest, control);
Shuffle4Remainder(source, dest, control);
}
}
@ -44,16 +44,16 @@ namespace SixLabors.ImageSharp
/// <param name="dest">The destination span of bytes.</param>
/// <param name="shuffle">The type of shuffle to perform.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public static void Shuffle4Channel<TShuffle>(
public static void Shuffle4<TShuffle>(
ReadOnlySpan<byte> source,
Span<byte> dest,
TShuffle shuffle)
where TShuffle : struct, IComponentShuffle
where TShuffle : struct, IShuffle4
{
VerifyShuffleSpanInput(source, dest);
VerifyShuffle4SpanInput(source, dest);
#if SUPPORTS_RUNTIME_INTRINSICS
HwIntrinsics.Shuffle4ChannelReduce(ref source, ref dest, shuffle.Control);
HwIntrinsics.Shuffle4Reduce(ref source, ref dest, shuffle.Control);
#endif
// Deal with the remainder:
@ -63,7 +63,88 @@ namespace SixLabors.ImageSharp
}
}
public static void ShuffleRemainder4Channel(
/// <summary>
/// Shuffle 8-bit integer triplets within 128-bit lanes in <paramref name="source"/>
/// using the control and store the results in <paramref name="dest"/>.
/// </summary>
/// <param name="source">The source span of bytes.</param>
/// <param name="dest">The destination span of bytes.</param>
/// <param name="shuffle">The type of shuffle to perform.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public static void Shuffle3<TShuffle>(
ReadOnlySpan<byte> source,
Span<byte> dest,
TShuffle shuffle)
where TShuffle : struct, IShuffle3
{
VerifyShuffle3SpanInput(source, dest);
#if SUPPORTS_RUNTIME_INTRINSICS
HwIntrinsics.Shuffle3Reduce(ref source, ref dest, shuffle.Control);
#endif
// Deal with the remainder:
if (source.Length > 0)
{
shuffle.RunFallbackShuffle(source, dest);
}
}
/// <summary>
/// Pads then shuffles 8-bit integers within 128-bit lanes in <paramref name="source"/>
/// using the control and store the results in <paramref name="dest"/>.
/// </summary>
/// <param name="source">The source span of bytes.</param>
/// <param name="dest">The destination span of bytes.</param>
/// <param name="shuffle">The type of shuffle to perform.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public static void Pad3Shuffle4<TShuffle>(
ReadOnlySpan<byte> source,
Span<byte> dest,
TShuffle shuffle)
where TShuffle : struct, IPad3Shuffle4
{
VerifyPad3Shuffle4SpanInput(source, dest);
#if SUPPORTS_RUNTIME_INTRINSICS
HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref dest, shuffle.Control);
#endif
// Deal with the remainder:
if (source.Length > 0)
{
shuffle.RunFallbackShuffle(source, dest);
}
}
/// <summary>
/// Shuffles then slices 8-bit integers within 128-bit lanes in <paramref name="source"/>
/// using the control and store the results in <paramref name="dest"/>.
/// </summary>
/// <param name="source">The source span of bytes.</param>
/// <param name="dest">The destination span of bytes.</param>
/// <param name="shuffle">The type of shuffle to perform.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public static void Shuffle4Slice3<TShuffle>(
ReadOnlySpan<byte> source,
Span<byte> dest,
TShuffle shuffle)
where TShuffle : struct, IShuffle4Slice3
{
VerifyShuffle4Slice3SpanInput(source, dest);
#if SUPPORTS_RUNTIME_INTRINSICS
HwIntrinsics.Shuffle4Slice3Reduce(ref source, ref dest, shuffle.Control);
#endif
// Deal with the remainder:
if (source.Length > 0)
{
shuffle.RunFallbackShuffle(source, dest);
}
}
private static void Shuffle4Remainder(
ReadOnlySpan<float> source,
Span<float> dest,
byte control)
@ -82,7 +163,22 @@ namespace SixLabors.ImageSharp
}
[Conditional("DEBUG")]
private static void VerifyShuffleSpanInput<T>(ReadOnlySpan<T> source, Span<T> dest)
private static void VerifyShuffle4SpanInput<T>(ReadOnlySpan<T> source, Span<T> dest)
where T : struct
{
DebugGuard.IsTrue(
source.Length == dest.Length,
nameof(source),
"Input spans must be of same length!");
DebugGuard.IsTrue(
source.Length % 4 == 0,
nameof(source),
"Input spans must be divisable by 4!");
}
[Conditional("DEBUG")]
private static void VerifyShuffle3SpanInput<T>(ReadOnlySpan<T> source, Span<T> dest)
where T : struct
{
DebugGuard.IsTrue(
@ -90,10 +186,48 @@ namespace SixLabors.ImageSharp
nameof(source),
"Input spans must be of same length!");
DebugGuard.IsTrue(
source.Length % 3 == 0,
nameof(source),
"Input spans must be divisable by 3!");
}
[Conditional("DEBUG")]
private static void VerifyPad3Shuffle4SpanInput(ReadOnlySpan<byte> source, Span<byte> dest)
{
DebugGuard.IsTrue(
source.Length % 3 == 0,
nameof(source),
"Input span must be divisable by 3!");
DebugGuard.IsTrue(
dest.Length % 4 == 0,
nameof(dest),
"Output span must be divisable by 4!");
DebugGuard.IsTrue(
source.Length == dest.Length * 3 / 4,
nameof(source),
"Input span must be 3/4 the length of the output span!");
}
[Conditional("DEBUG")]
private static void VerifyShuffle4Slice3SpanInput(ReadOnlySpan<byte> source, Span<byte> dest)
{
DebugGuard.IsTrue(
source.Length % 4 == 0,
nameof(source),
"Input spans must be divisiable by 4!");
"Input span must be divisable by 4!");
DebugGuard.IsTrue(
dest.Length % 3 == 0,
nameof(dest),
"Output span must be divisable by 3!");
DebugGuard.IsTrue(
dest.Length >= source.Length * 3 / 4,
nameof(source),
"Output span must be at least 3/4 the length of the input span!");
}
public static class Shuffle

72
src/ImageSharp/PixelFormats/PixelImplementations/Generated/Argb32.PixelOperations.Generated.cs

@ -106,23 +106,59 @@ namespace SixLabors.ImageSharp.PixelFormats
Span<byte> dest = MemoryMarshal.Cast<Argb32, byte>(destinationPixels);
PixelConverter.FromBgra32.ToArgb32(source, dest);
}
/// <inheritdoc />
public override void ToRgb24(
Configuration configuration,
ReadOnlySpan<Argb32> sourcePixels,
Span<Rgb24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Argb32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Rgb24, byte>(destinationPixels);
PixelConverter.FromArgb32.ToRgb24(source, dest);
}
/// <inheritdoc />
public override void ToBgr24(Configuration configuration, ReadOnlySpan<Argb32> sourcePixels, Span<Bgr24> destinationPixels)
public override void FromRgb24(
Configuration configuration,
ReadOnlySpan<Rgb24> sourcePixels,
Span<Argb32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Argb32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref Bgr24 destRef = ref MemoryMarshal.GetReference(destinationPixels);
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Rgb24, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Argb32, byte>(destinationPixels);
PixelConverter.FromRgb24.ToArgb32(source, dest);
}
/// <inheritdoc />
public override void ToBgr24(
Configuration configuration,
ReadOnlySpan<Argb32> sourcePixels,
Span<Bgr24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Argb32 sp = ref Unsafe.Add(ref sourceRef, i);
ref Bgr24 dp = ref Unsafe.Add(ref destRef, i);
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Argb32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Bgr24, byte>(destinationPixels);
PixelConverter.FromArgb32.ToBgr24(source, dest);
}
dp.FromArgb32(sp);
}
/// <inheritdoc />
public override void FromBgr24(
Configuration configuration,
ReadOnlySpan<Bgr24> sourcePixels,
Span<Argb32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Bgr24, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Argb32, byte>(destinationPixels);
PixelConverter.FromBgr24.ToArgb32(source, dest);
}
/// <inheritdoc />
@ -197,24 +233,6 @@ namespace SixLabors.ImageSharp.PixelFormats
}
}
/// <inheritdoc />
public override void ToRgb24(Configuration configuration, ReadOnlySpan<Argb32> sourcePixels, Span<Rgb24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Argb32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref Rgb24 destRef = ref MemoryMarshal.GetReference(destinationPixels);
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Argb32 sp = ref Unsafe.Add(ref sourceRef, i);
ref Rgb24 dp = ref Unsafe.Add(ref destRef, i);
dp.FromArgb32(sp);
}
}
/// <inheritdoc />
public override void ToRgb48(Configuration configuration, ReadOnlySpan<Argb32> sourcePixels, Span<Rgb48> destinationPixels)
{

140
src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgr24.PixelOperations.Generated.cs

@ -52,146 +52,182 @@ namespace SixLabors.ImageSharp.PixelFormats
{
Vector4Converters.RgbaCompatible.ToVector4(configuration, this, sourcePixels, destVectors, modifiers.Remove(PixelConversionModifiers.Scale | PixelConversionModifiers.Premultiply));
}
/// <inheritdoc />
public override void ToArgb32(Configuration configuration, ReadOnlySpan<Bgr24> sourcePixels, Span<Argb32> destinationPixels)
public override void ToRgba32(
Configuration configuration,
ReadOnlySpan<Bgr24> sourcePixels,
Span<Rgba32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref Argb32 destRef = ref MemoryMarshal.GetReference(destinationPixels);
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Bgr24, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Rgba32, byte>(destinationPixels);
PixelConverter.FromBgr24.ToRgba32(source, dest);
}
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i);
ref Argb32 dp = ref Unsafe.Add(ref destRef, i);
/// <inheritdoc />
public override void FromRgba32(
Configuration configuration,
ReadOnlySpan<Rgba32> sourcePixels,
Span<Bgr24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
dp.FromBgr24(sp);
}
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Rgba32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Bgr24, byte>(destinationPixels);
PixelConverter.FromRgba32.ToBgr24(source, dest);
}
/// <inheritdoc />
public override void ToBgra32(Configuration configuration, ReadOnlySpan<Bgr24> sourcePixels, Span<Bgra32> destinationPixels)
public override void ToArgb32(
Configuration configuration,
ReadOnlySpan<Bgr24> sourcePixels,
Span<Argb32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref Bgra32 destRef = ref MemoryMarshal.GetReference(destinationPixels);
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Bgr24, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Argb32, byte>(destinationPixels);
PixelConverter.FromBgr24.ToArgb32(source, dest);
}
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i);
ref Bgra32 dp = ref Unsafe.Add(ref destRef, i);
/// <inheritdoc />
public override void FromArgb32(
Configuration configuration,
ReadOnlySpan<Argb32> sourcePixels,
Span<Bgr24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
dp.FromBgr24(sp);
}
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Argb32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Bgr24, byte>(destinationPixels);
PixelConverter.FromArgb32.ToBgr24(source, dest);
}
/// <inheritdoc />
public override void ToL8(Configuration configuration, ReadOnlySpan<Bgr24> sourcePixels, Span<L8> destinationPixels)
public override void ToBgra32(
Configuration configuration,
ReadOnlySpan<Bgr24> sourcePixels,
Span<Bgra32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref L8 destRef = ref MemoryMarshal.GetReference(destinationPixels);
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Bgr24, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Bgra32, byte>(destinationPixels);
PixelConverter.FromBgr24.ToBgra32(source, dest);
}
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i);
ref L8 dp = ref Unsafe.Add(ref destRef, i);
/// <inheritdoc />
public override void FromBgra32(
Configuration configuration,
ReadOnlySpan<Bgra32> sourcePixels,
Span<Bgr24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
dp.FromBgr24(sp);
}
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Bgra32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Bgr24, byte>(destinationPixels);
PixelConverter.FromBgra32.ToBgr24(source, dest);
}
/// <inheritdoc />
public override void ToL16(Configuration configuration, ReadOnlySpan<Bgr24> sourcePixels, Span<L16> destinationPixels)
public override void ToRgb24(
Configuration configuration,
ReadOnlySpan<Bgr24> sourcePixels,
Span<Rgb24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref L16 destRef = ref MemoryMarshal.GetReference(destinationPixels);
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Bgr24, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Rgb24, byte>(destinationPixels);
PixelConverter.FromBgr24.ToRgb24(source, dest);
}
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i);
ref L16 dp = ref Unsafe.Add(ref destRef, i);
/// <inheritdoc />
public override void FromRgb24(
Configuration configuration,
ReadOnlySpan<Rgb24> sourcePixels,
Span<Bgr24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
dp.FromBgr24(sp);
}
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Rgb24, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Bgr24, byte>(destinationPixels);
PixelConverter.FromRgb24.ToBgr24(source, dest);
}
/// <inheritdoc />
public override void ToLa16(Configuration configuration, ReadOnlySpan<Bgr24> sourcePixels, Span<La16> destinationPixels)
public override void ToL8(Configuration configuration, ReadOnlySpan<Bgr24> sourcePixels, Span<L8> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref La16 destRef = ref MemoryMarshal.GetReference(destinationPixels);
ref L8 destRef = ref MemoryMarshal.GetReference(destinationPixels);
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i);
ref La16 dp = ref Unsafe.Add(ref destRef, i);
ref L8 dp = ref Unsafe.Add(ref destRef, i);
dp.FromBgr24(sp);
}
}
/// <inheritdoc />
public override void ToLa32(Configuration configuration, ReadOnlySpan<Bgr24> sourcePixels, Span<La32> destinationPixels)
public override void ToL16(Configuration configuration, ReadOnlySpan<Bgr24> sourcePixels, Span<L16> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref La32 destRef = ref MemoryMarshal.GetReference(destinationPixels);
ref L16 destRef = ref MemoryMarshal.GetReference(destinationPixels);
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i);
ref La32 dp = ref Unsafe.Add(ref destRef, i);
ref L16 dp = ref Unsafe.Add(ref destRef, i);
dp.FromBgr24(sp);
}
}
/// <inheritdoc />
public override void ToRgb24(Configuration configuration, ReadOnlySpan<Bgr24> sourcePixels, Span<Rgb24> destinationPixels)
public override void ToLa16(Configuration configuration, ReadOnlySpan<Bgr24> sourcePixels, Span<La16> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref Rgb24 destRef = ref MemoryMarshal.GetReference(destinationPixels);
ref La16 destRef = ref MemoryMarshal.GetReference(destinationPixels);
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i);
ref Rgb24 dp = ref Unsafe.Add(ref destRef, i);
ref La16 dp = ref Unsafe.Add(ref destRef, i);
dp.FromBgr24(sp);
}
}
/// <inheritdoc />
public override void ToRgba32(Configuration configuration, ReadOnlySpan<Bgr24> sourcePixels, Span<Rgba32> destinationPixels)
public override void ToLa32(Configuration configuration, ReadOnlySpan<Bgr24> sourcePixels, Span<La32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref Rgba32 destRef = ref MemoryMarshal.GetReference(destinationPixels);
ref La32 destRef = ref MemoryMarshal.GetReference(destinationPixels);
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i);
ref Rgba32 dp = ref Unsafe.Add(ref destRef, i);
ref La32 dp = ref Unsafe.Add(ref destRef, i);
dp.FromBgr24(sp);
}

72
src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgra32.PixelOperations.Generated.cs

@ -106,23 +106,59 @@ namespace SixLabors.ImageSharp.PixelFormats
Span<byte> dest = MemoryMarshal.Cast<Bgra32, byte>(destinationPixels);
PixelConverter.FromArgb32.ToBgra32(source, dest);
}
/// <inheritdoc />
public override void ToRgb24(
Configuration configuration,
ReadOnlySpan<Bgra32> sourcePixels,
Span<Rgb24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Bgra32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Rgb24, byte>(destinationPixels);
PixelConverter.FromBgra32.ToRgb24(source, dest);
}
/// <inheritdoc />
public override void ToBgr24(Configuration configuration, ReadOnlySpan<Bgra32> sourcePixels, Span<Bgr24> destinationPixels)
public override void FromRgb24(
Configuration configuration,
ReadOnlySpan<Rgb24> sourcePixels,
Span<Bgra32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Bgra32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref Bgr24 destRef = ref MemoryMarshal.GetReference(destinationPixels);
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Rgb24, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Bgra32, byte>(destinationPixels);
PixelConverter.FromRgb24.ToBgra32(source, dest);
}
/// <inheritdoc />
public override void ToBgr24(
Configuration configuration,
ReadOnlySpan<Bgra32> sourcePixels,
Span<Bgr24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Bgra32 sp = ref Unsafe.Add(ref sourceRef, i);
ref Bgr24 dp = ref Unsafe.Add(ref destRef, i);
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Bgra32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Bgr24, byte>(destinationPixels);
PixelConverter.FromBgra32.ToBgr24(source, dest);
}
dp.FromBgra32(sp);
}
/// <inheritdoc />
public override void FromBgr24(
Configuration configuration,
ReadOnlySpan<Bgr24> sourcePixels,
Span<Bgra32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Bgr24, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Bgra32, byte>(destinationPixels);
PixelConverter.FromBgr24.ToBgra32(source, dest);
}
/// <inheritdoc />
@ -197,24 +233,6 @@ namespace SixLabors.ImageSharp.PixelFormats
}
}
/// <inheritdoc />
public override void ToRgb24(Configuration configuration, ReadOnlySpan<Bgra32> sourcePixels, Span<Rgb24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Bgra32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref Rgb24 destRef = ref MemoryMarshal.GetReference(destinationPixels);
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Bgra32 sp = ref Unsafe.Add(ref sourceRef, i);
ref Rgb24 dp = ref Unsafe.Add(ref destRef, i);
dp.FromBgra32(sp);
}
}
/// <inheritdoc />
public override void ToRgb48(Configuration configuration, ReadOnlySpan<Bgra32> sourcePixels, Span<Rgb48> destinationPixels)
{

129
src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgb24.PixelOperations.Generated.cs

@ -52,59 +52,114 @@ namespace SixLabors.ImageSharp.PixelFormats
{
Vector4Converters.RgbaCompatible.ToVector4(configuration, this, sourcePixels, destVectors, modifiers.Remove(PixelConversionModifiers.Scale | PixelConversionModifiers.Premultiply));
}
/// <inheritdoc />
public override void ToArgb32(Configuration configuration, ReadOnlySpan<Rgb24> sourcePixels, Span<Argb32> destinationPixels)
public override void ToRgba32(
Configuration configuration,
ReadOnlySpan<Rgb24> sourcePixels,
Span<Rgba32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Rgb24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref Argb32 destRef = ref MemoryMarshal.GetReference(destinationPixels);
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Rgb24, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Rgba32, byte>(destinationPixels);
PixelConverter.FromRgb24.ToRgba32(source, dest);
}
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Rgb24 sp = ref Unsafe.Add(ref sourceRef, i);
ref Argb32 dp = ref Unsafe.Add(ref destRef, i);
/// <inheritdoc />
public override void FromRgba32(
Configuration configuration,
ReadOnlySpan<Rgba32> sourcePixels,
Span<Rgb24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
dp.FromRgb24(sp);
}
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Rgba32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Rgb24, byte>(destinationPixels);
PixelConverter.FromRgba32.ToRgb24(source, dest);
}
/// <inheritdoc />
public override void ToBgr24(Configuration configuration, ReadOnlySpan<Rgb24> sourcePixels, Span<Bgr24> destinationPixels)
public override void ToArgb32(
Configuration configuration,
ReadOnlySpan<Rgb24> sourcePixels,
Span<Argb32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Rgb24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref Bgr24 destRef = ref MemoryMarshal.GetReference(destinationPixels);
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Rgb24, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Argb32, byte>(destinationPixels);
PixelConverter.FromRgb24.ToArgb32(source, dest);
}
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Rgb24 sp = ref Unsafe.Add(ref sourceRef, i);
ref Bgr24 dp = ref Unsafe.Add(ref destRef, i);
/// <inheritdoc />
public override void FromArgb32(
Configuration configuration,
ReadOnlySpan<Argb32> sourcePixels,
Span<Rgb24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
dp.FromRgb24(sp);
}
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Argb32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Rgb24, byte>(destinationPixels);
PixelConverter.FromArgb32.ToRgb24(source, dest);
}
/// <inheritdoc />
public override void ToBgra32(
Configuration configuration,
ReadOnlySpan<Rgb24> sourcePixels,
Span<Bgra32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Rgb24, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Bgra32, byte>(destinationPixels);
PixelConverter.FromRgb24.ToBgra32(source, dest);
}
/// <inheritdoc />
public override void ToBgra32(Configuration configuration, ReadOnlySpan<Rgb24> sourcePixels, Span<Bgra32> destinationPixels)
public override void FromBgra32(
Configuration configuration,
ReadOnlySpan<Bgra32> sourcePixels,
Span<Rgb24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Rgb24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref Bgra32 destRef = ref MemoryMarshal.GetReference(destinationPixels);
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Bgra32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Rgb24, byte>(destinationPixels);
PixelConverter.FromBgra32.ToRgb24(source, dest);
}
/// <inheritdoc />
public override void ToBgr24(
Configuration configuration,
ReadOnlySpan<Rgb24> sourcePixels,
Span<Bgr24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Rgb24 sp = ref Unsafe.Add(ref sourceRef, i);
ref Bgra32 dp = ref Unsafe.Add(ref destRef, i);
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Rgb24, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Bgr24, byte>(destinationPixels);
PixelConverter.FromRgb24.ToBgr24(source, dest);
}
dp.FromRgb24(sp);
}
/// <inheritdoc />
public override void FromBgr24(
Configuration configuration,
ReadOnlySpan<Bgr24> sourcePixels,
Span<Rgb24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Bgr24, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Rgb24, byte>(destinationPixels);
PixelConverter.FromBgr24.ToRgb24(source, dest);
}
/// <inheritdoc />
@ -179,24 +234,6 @@ namespace SixLabors.ImageSharp.PixelFormats
}
}
/// <inheritdoc />
public override void ToRgba32(Configuration configuration, ReadOnlySpan<Rgb24> sourcePixels, Span<Rgba32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Rgb24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref Rgba32 destRef = ref MemoryMarshal.GetReference(destinationPixels);
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Rgb24 sp = ref Unsafe.Add(ref sourceRef, i);
ref Rgba32 dp = ref Unsafe.Add(ref destRef, i);
dp.FromRgb24(sp);
}
}
/// <inheritdoc />
public override void ToRgb48(Configuration configuration, ReadOnlySpan<Rgb24> sourcePixels, Span<Rgb48> destinationPixels)
{

72
src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgba32.PixelOperations.Generated.cs

@ -95,23 +95,59 @@ namespace SixLabors.ImageSharp.PixelFormats
Span<byte> dest = MemoryMarshal.Cast<Rgba32, byte>(destinationPixels);
PixelConverter.FromBgra32.ToRgba32(source, dest);
}
/// <inheritdoc />
public override void ToRgb24(
Configuration configuration,
ReadOnlySpan<Rgba32> sourcePixels,
Span<Rgb24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Rgba32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Rgb24, byte>(destinationPixels);
PixelConverter.FromRgba32.ToRgb24(source, dest);
}
/// <inheritdoc />
public override void ToBgr24(Configuration configuration, ReadOnlySpan<Rgba32> sourcePixels, Span<Bgr24> destinationPixels)
public override void FromRgb24(
Configuration configuration,
ReadOnlySpan<Rgb24> sourcePixels,
Span<Rgba32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Rgba32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref Bgr24 destRef = ref MemoryMarshal.GetReference(destinationPixels);
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Rgb24, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Rgba32, byte>(destinationPixels);
PixelConverter.FromRgb24.ToRgba32(source, dest);
}
/// <inheritdoc />
public override void ToBgr24(
Configuration configuration,
ReadOnlySpan<Rgba32> sourcePixels,
Span<Bgr24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Rgba32 sp = ref Unsafe.Add(ref sourceRef, i);
ref Bgr24 dp = ref Unsafe.Add(ref destRef, i);
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Rgba32, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Bgr24, byte>(destinationPixels);
PixelConverter.FromRgba32.ToBgr24(source, dest);
}
dp.FromRgba32(sp);
}
/// <inheritdoc />
public override void FromBgr24(
Configuration configuration,
ReadOnlySpan<Bgr24> sourcePixels,
Span<Rgba32> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ReadOnlySpan<byte> source = MemoryMarshal.Cast<Bgr24, byte>(sourcePixels);
Span<byte> dest = MemoryMarshal.Cast<Rgba32, byte>(destinationPixels);
PixelConverter.FromBgr24.ToRgba32(source, dest);
}
/// <inheritdoc />
@ -186,24 +222,6 @@ namespace SixLabors.ImageSharp.PixelFormats
}
}
/// <inheritdoc />
public override void ToRgb24(Configuration configuration, ReadOnlySpan<Rgba32> sourcePixels, Span<Rgb24> destinationPixels)
{
Guard.NotNull(configuration, nameof(configuration));
Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels));
ref Rgba32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels);
ref Rgb24 destRef = ref MemoryMarshal.GetReference(destinationPixels);
for (int i = 0; i < sourcePixels.Length; i++)
{
ref Rgba32 sp = ref Unsafe.Add(ref sourceRef, i);
ref Rgb24 dp = ref Unsafe.Add(ref destRef, i);
dp.FromRgba32(sp);
}
}
/// <inheritdoc />
public override void ToRgb48(Configuration configuration, ReadOnlySpan<Rgba32> sourcePixels, Span<Rgb48> destinationPixels)
{

6
src/ImageSharp/PixelFormats/PixelImplementations/Generated/_Common.ttinclude

@ -17,7 +17,7 @@ using System.Runtime.InteropServices;
<#+
static readonly string[] CommonPixelTypes = { "Argb32", "Bgr24", "Bgra32", "L8", "L16", "La16", "La32", "Rgb24", "Rgba32", "Rgb48", "Rgba64", "Bgra5551" };
static readonly string[] Optimized32BitTypes = { "Rgba32", "Argb32", "Bgra32" };
static readonly string[] OptimizedPixelTypes = { "Rgba32", "Argb32", "Bgra32", "Rgb24", "Bgr24" };
// Types with Rgba32-combatible to/from Vector4 conversion
static readonly string[] Rgba32CompatibleTypes = { "Argb32", "Bgra32", "Rgb24", "Bgr24" };
@ -148,8 +148,8 @@ using System.Runtime.InteropServices;
GenerateRgba32CompatibleVector4ConversionMethods(pixelType, pixelType.EndsWith("32"));
}
var matching32BitTypes = Optimized32BitTypes.Contains(pixelType) ?
Optimized32BitTypes.Where(p => p != pixelType) :
var matching32BitTypes = OptimizedPixelTypes.Contains(pixelType) ?
OptimizedPixelTypes.Where(p => p != pixelType) :
Enumerable.Empty<string>();
foreach (string destPixelType in matching32BitTypes)

144
src/ImageSharp/PixelFormats/Utils/PixelConverter.cs

@ -27,7 +27,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToArgb32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4Channel<WXYZShuffle4>(source, dest, default);
=> SimdUtils.Shuffle4<WXYZShuffle4>(source, dest, default);
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
@ -36,7 +36,25 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToBgra32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4Channel<ZYXWShuffle4>(source, dest, default);
=> SimdUtils.Shuffle4<ZYXWShuffle4>(source, dest, default);
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Rgba32"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Rgb24"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToRgb24(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4Slice3<XYZWShuffle4Slice3>(source, dest, default);
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Rgba32"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Bgr24"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToBgr24(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(3, 0, 1, 2));
}
public static class FromArgb32
@ -48,7 +66,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToRgba32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4Channel<YZWXShuffle4>(source, dest, default);
=> SimdUtils.Shuffle4<YZWXShuffle4>(source, dest, default);
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
@ -57,7 +75,25 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToBgra32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4Channel<WZYXShuffle4>(source, dest, default);
=> SimdUtils.Shuffle4<WZYXShuffle4>(source, dest, default);
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Argb32"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Rgb24"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToRgb24(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(0, 3, 2, 1));
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Argb32"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Bgr24"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToBgr24(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(0, 1, 2, 3));
}
public static class FromBgra32
@ -69,7 +105,7 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToArgb32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4Channel<WZYXShuffle4>(source, dest, default);
=> SimdUtils.Shuffle4<WZYXShuffle4>(source, dest, default);
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
@ -78,7 +114,103 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToRgba32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4Channel<ZYXWShuffle4>(source, dest, default);
=> SimdUtils.Shuffle4<ZYXWShuffle4>(source, dest, default);
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Argb32"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Rgb24"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToRgb24(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(3, 0, 1, 2));
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Argb32"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Bgr24"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToBgr24(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle4Slice3<XYZWShuffle4Slice3>(source, dest, default);
}
public static class FromRgb24
{
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Rgb24"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Rgba32"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToRgba32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Pad3Shuffle4<XYZWPad3Shuffle4>(source, dest, default);
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Rgba32"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Argb32"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToArgb32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(2, 1, 0, 3));
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Rgba32"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Bgra32"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToBgra32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(3, 0, 1, 2));
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Rgb24"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Bgr24"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToBgr24(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle3(source, dest, new DefaultShuffle3(0, 1, 2));
}
public static class FromBgr24
{
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Bgr24"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Argb32"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToArgb32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(0, 1, 2, 3));
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Bgr24"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Bgra32"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToRgba32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(3, 0, 1, 2));
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Bgr24"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Bgra32"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToBgra32(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Pad3Shuffle4<XYZWPad3Shuffle4>(source, dest, default);
/// <summary>
/// Converts a <see cref="ReadOnlySpan{Byte}"/> representing a collection of
/// <see cref="Bgr24"/> pixels to a <see cref="Span{Byte}"/> representing
/// a collection of <see cref="Rgb24"/> pixels.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static void ToRgb24(ReadOnlySpan<byte> source, Span<byte> dest)
=> SimdUtils.Shuffle3(source, dest, new DefaultShuffle3(0, 1, 2));
}
}
}

6
tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs

@ -30,7 +30,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
protected Configuration Configuration => Configuration.Default;
// [Params(64, 2048)]
[Params(1024)]
[Params(64, 256, 2048)]
public int Count { get; set; }
[GlobalSetup]
@ -58,7 +58,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
}
}
[Benchmark]
[Benchmark(Baseline = true)]
public void PixelOperations_Base()
{
new PixelOperations<TPixel>().FromVector4Destructive(this.Configuration, this.source.GetSpan(), this.destination.GetSpan());
@ -91,7 +91,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
SimdUtils.BasicIntrinsics256.NormalizedFloatToByteSaturate(sBytes, dFloats);
}
[Benchmark(Baseline = true)]
[Benchmark]
public void ExtendedIntrinsic()
{
Span<float> sBytes = MemoryMarshal.Cast<Vector4, float>(this.source.GetSpan());

55
tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4_Rgb24.cs

@ -0,0 +1,55 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
{
[Config(typeof(Config.ShortClr))]
public class FromVector4_Rgb24 : FromVector4<Rgb24>
{
}
}
// 2020-11-02
// ##########
//
// BenchmarkDotNet = v0.12.1, OS = Windows 10.0.19041.572(2004 /?/ 20H1)
// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores
// .NET Core SDK=3.1.403
// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// Job-XYEQXL : .NET Framework 4.8 (4.8.4250.0), X64 RyuJIT
// Job-HSXNJV : .NET Core 2.1.23 (CoreCLR 4.6.29321.03, CoreFX 4.6.29321.01), X64 RyuJIT
// Job-YUREJO : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
//
// IterationCount=3 LaunchCount=1 WarmupCount=3
//
// | Method | Job | Runtime | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
// |---------------------------- |----------- |-------------- |------ |-----------:|------------:|----------:|------:|--------:|-------:|------:|------:|----------:|
// | PixelOperations_Base | Job-XYEQXL | .NET 4.7.2 | 64 | 343.2 ns | 305.91 ns | 16.77 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B |
// | PixelOperations_Specialized | Job-XYEQXL | .NET 4.7.2 | 64 | 320.8 ns | 19.93 ns | 1.09 ns | 0.94 | 0.05 | - | - | - | - |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-HSXNJV | .NET Core 2.1 | 64 | 234.3 ns | 17.98 ns | 0.99 ns | 1.00 | 0.00 | 0.0052 | - | - | 24 B |
// | PixelOperations_Specialized | Job-HSXNJV | .NET Core 2.1 | 64 | 246.0 ns | 82.34 ns | 4.51 ns | 1.05 | 0.02 | - | - | - | - |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-YUREJO | .NET Core 3.1 | 64 | 222.3 ns | 39.46 ns | 2.16 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B |
// | PixelOperations_Specialized | Job-YUREJO | .NET Core 3.1 | 64 | 243.4 ns | 33.58 ns | 1.84 ns | 1.09 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-XYEQXL | .NET 4.7.2 | 256 | 824.9 ns | 32.77 ns | 1.80 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B |
// | PixelOperations_Specialized | Job-XYEQXL | .NET 4.7.2 | 256 | 967.0 ns | 39.09 ns | 2.14 ns | 1.17 | 0.01 | 0.0172 | - | - | 72 B |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-HSXNJV | .NET Core 2.1 | 256 | 756.9 ns | 94.43 ns | 5.18 ns | 1.00 | 0.00 | 0.0048 | - | - | 24 B |
// | PixelOperations_Specialized | Job-HSXNJV | .NET Core 2.1 | 256 | 1,003.3 ns | 3,192.09 ns | 174.97 ns | 1.32 | 0.22 | 0.0172 | - | - | 72 B |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-YUREJO | .NET Core 3.1 | 256 | 748.6 ns | 248.03 ns | 13.60 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B |
// | PixelOperations_Specialized | Job-YUREJO | .NET Core 3.1 | 256 | 437.0 ns | 36.48 ns | 2.00 ns | 0.58 | 0.01 | 0.0172 | - | - | 72 B |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-XYEQXL | .NET 4.7.2 | 2048 | 5,751.6 ns | 704.24 ns | 38.60 ns | 1.00 | 0.00 | - | - | - | 24 B |
// | PixelOperations_Specialized | Job-XYEQXL | .NET 4.7.2 | 2048 | 4,391.6 ns | 718.17 ns | 39.37 ns | 0.76 | 0.00 | 0.0153 | - | - | 72 B |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-HSXNJV | .NET Core 2.1 | 2048 | 6,202.0 ns | 1,815.18 ns | 99.50 ns | 1.00 | 0.00 | - | - | - | 24 B |
// | PixelOperations_Specialized | Job-HSXNJV | .NET Core 2.1 | 2048 | 4,225.6 ns | 1,004.03 ns | 55.03 ns | 0.68 | 0.01 | 0.0153 | - | - | 72 B |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-YUREJO | .NET Core 3.1 | 2048 | 6,157.1 ns | 2,516.98 ns | 137.96 ns | 1.00 | 0.00 | - | - | - | 24 B |
// | PixelOperations_Specialized | Job-YUREJO | .NET Core 3.1 | 2048 | 1,822.7 ns | 1,764.43 ns | 96.71 ns | 0.30 | 0.02 | 0.0172 | - | - | 72 B |

87
tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs

@ -0,0 +1,87 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using BenchmarkDotNet.Attributes;
namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
{
[Config(typeof(Config.HwIntrinsics_SSE_AVX))]
public class Pad3Shuffle4Channel
{
private static readonly DefaultPad3Shuffle4 Control = new DefaultPad3Shuffle4(1, 0, 3, 2);
private static readonly XYZWPad3Shuffle4 ControlFast = default;
private byte[] source;
private byte[] destination;
[GlobalSetup]
public void Setup()
{
this.source = new byte[this.Count];
new Random(this.Count).NextBytes(this.source);
this.destination = new byte[this.Count * 4 / 3];
}
[Params(96, 384, 768, 1536)]
public int Count { get; set; }
[Benchmark]
public void Pad3Shuffle4()
{
SimdUtils.Pad3Shuffle4(this.source, this.destination, Control);
}
[Benchmark]
public void Pad3Shuffle4FastFallback()
{
SimdUtils.Pad3Shuffle4(this.source, this.destination, ControlFast);
}
}
// 2020-10-30
// ##########
//
// BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1)
// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores
// .NET Core SDK=3.1.403
// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
//
// Runtime=.NET Core 3.1
//
// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
// |------------------------- |------------------- |-------------------------------------------------- |------ |------------:|----------:|----------:|------------:|------:|--------:|------:|------:|------:|----------:|
// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 120.64 ns | 7.190 ns | 21.200 ns | 114.26 ns | 1.00 | 0.00 | - | - | - | - |
// | Pad3Shuffle4 | 2. AVX | Empty | 96 | 23.63 ns | 0.175 ns | 0.155 ns | 23.65 ns | 0.15 | 0.01 | - | - | - | - |
// | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 96 | 25.25 ns | 0.356 ns | 0.298 ns | 25.27 ns | 0.17 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 14.80 ns | 0.358 ns | 1.032 ns | 14.64 ns | 1.00 | 0.00 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 2. AVX | Empty | 96 | 24.84 ns | 0.376 ns | 0.333 ns | 24.74 ns | 1.57 | 0.06 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 96 | 24.58 ns | 0.471 ns | 0.704 ns | 24.38 ns | 1.60 | 0.09 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 258.92 ns | 4.873 ns | 4.069 ns | 257.95 ns | 1.00 | 0.00 | - | - | - | - |
// | Pad3Shuffle4 | 2. AVX | Empty | 384 | 41.41 ns | 0.859 ns | 1.204 ns | 41.33 ns | 0.16 | 0.00 | - | - | - | - |
// | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 384 | 40.74 ns | 0.848 ns | 0.793 ns | 40.48 ns | 0.16 | 0.00 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 74.50 ns | 0.490 ns | 0.383 ns | 74.49 ns | 1.00 | 0.00 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 2. AVX | Empty | 384 | 40.74 ns | 0.624 ns | 0.584 ns | 40.72 ns | 0.55 | 0.01 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 384 | 38.28 ns | 0.534 ns | 0.417 ns | 38.22 ns | 0.51 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 503.91 ns | 6.466 ns | 6.048 ns | 501.58 ns | 1.00 | 0.00 | - | - | - | - |
// | Pad3Shuffle4 | 2. AVX | Empty | 768 | 62.86 ns | 0.332 ns | 0.277 ns | 62.80 ns | 0.12 | 0.00 | - | - | - | - |
// | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 768 | 64.59 ns | 0.469 ns | 0.415 ns | 64.62 ns | 0.13 | 0.00 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 110.51 ns | 0.592 ns | 0.554 ns | 110.33 ns | 1.00 | 0.00 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 2. AVX | Empty | 768 | 64.72 ns | 1.306 ns | 1.090 ns | 64.51 ns | 0.59 | 0.01 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 768 | 62.11 ns | 0.816 ns | 0.682 ns | 61.98 ns | 0.56 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 1,005.84 ns | 13.176 ns | 12.325 ns | 1,004.70 ns | 1.00 | 0.00 | - | - | - | - |
// | Pad3Shuffle4 | 2. AVX | Empty | 1536 | 110.05 ns | 0.256 ns | 0.214 ns | 110.04 ns | 0.11 | 0.00 | - | - | - | - |
// | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 110.23 ns | 0.545 ns | 0.483 ns | 110.09 ns | 0.11 | 0.00 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 220.37 ns | 1.601 ns | 1.419 ns | 220.13 ns | 1.00 | 0.00 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 2. AVX | Empty | 1536 | 111.54 ns | 2.173 ns | 2.901 ns | 111.27 ns | 0.51 | 0.01 | - | - | - | - |
// | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 110.23 ns | 0.456 ns | 0.427 ns | 110.25 ns | 0.50 | 0.00 | - | - | - | - |
}

64
tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs

@ -0,0 +1,64 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using BenchmarkDotNet.Attributes;
namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
{
[Config(typeof(Config.HwIntrinsics_SSE_AVX))]
public class Shuffle3Channel
{
private static readonly DefaultShuffle3 Control = new DefaultShuffle3(1, 0, 2);
private byte[] source;
private byte[] destination;
[GlobalSetup]
public void Setup()
{
this.source = new byte[this.Count];
new Random(this.Count).NextBytes(this.source);
this.destination = new byte[this.Count];
}
[Params(96, 384, 768, 1536)]
public int Count { get; set; }
[Benchmark]
public void Shuffle3()
{
SimdUtils.Shuffle3(this.source, this.destination, Control);
}
}
// 2020-11-02
// ##########
//
// BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1)
// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores
// .NET Core SDK=3.1.403
// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
//
// Runtime=.NET Core 3.1
//
// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
// |--------------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|----------:|------:|--------:|------:|------:|------:|----------:|
// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 48.46 ns | 1.034 ns | 2.438 ns | 47.46 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle3 | 2. AVX | Empty | 96 | 32.42 ns | 0.537 ns | 0.476 ns | 32.34 ns | 0.66 | 0.04 | - | - | - | - |
// | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 96 | 32.51 ns | 0.373 ns | 0.349 ns | 32.56 ns | 0.66 | 0.03 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 199.04 ns | 1.512 ns | 1.180 ns | 199.17 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle3 | 2. AVX | Empty | 384 | 71.20 ns | 2.654 ns | 7.784 ns | 69.60 ns | 0.41 | 0.02 | - | - | - | - |
// | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 384 | 63.23 ns | 0.569 ns | 0.505 ns | 63.21 ns | 0.32 | 0.00 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 391.28 ns | 5.087 ns | 3.972 ns | 391.22 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle3 | 2. AVX | Empty | 768 | 109.12 ns | 2.149 ns | 2.010 ns | 108.66 ns | 0.28 | 0.01 | - | - | - | - |
// | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 768 | 106.51 ns | 0.734 ns | 0.613 ns | 106.56 ns | 0.27 | 0.00 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 773.70 ns | 5.516 ns | 4.890 ns | 772.96 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle3 | 2. AVX | Empty | 1536 | 190.41 ns | 1.090 ns | 0.851 ns | 190.38 ns | 0.25 | 0.00 | - | - | - | - |
// | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 190.94 ns | 0.985 ns | 0.769 ns | 190.85 ns | 0.25 | 0.00 | - | - | - | - |
}

95
tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs

@ -0,0 +1,95 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using System;
using BenchmarkDotNet.Attributes;
namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
{
[Config(typeof(Config.HwIntrinsics_SSE_AVX))]
public class Shuffle4Slice3Channel
{
private static readonly DefaultShuffle4Slice3 Control = new DefaultShuffle4Slice3(1, 0, 3, 2);
private static readonly XYZWShuffle4Slice3 ControlFast = default;
private byte[] source;
private byte[] destination;
[GlobalSetup]
public void Setup()
{
this.source = new byte[this.Count];
new Random(this.Count).NextBytes(this.source);
this.destination = new byte[(int)(this.Count * (3 / 4F))];
}
[Params(128, 256, 512, 1024, 2048)]
public int Count { get; set; }
[Benchmark]
public void Shuffle4Slice3()
{
SimdUtils.Shuffle4Slice3(this.source, this.destination, Control);
}
[Benchmark]
public void Shuffle4Slice3FastFallback()
{
SimdUtils.Shuffle4Slice3(this.source, this.destination, ControlFast);
}
}
// 2020-10-29
// ##########
//
// BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1)
// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores
// .NET Core SDK=3.1.403
// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
//
// Runtime=.NET Core 3.1
//
// | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
// |--------------------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|----------:|------:|--------:|------:|------:|------:|----------:|
// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 56.44 ns | 2.843 ns | 8.382 ns | 56.70 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Slice3 | 2. AVX | Empty | 128 | 27.15 ns | 0.556 ns | 0.762 ns | 27.34 ns | 0.41 | 0.03 | - | - | - | - |
// | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 128 | 26.36 ns | 0.321 ns | 0.268 ns | 26.26 ns | 0.38 | 0.02 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 25.85 ns | 0.494 ns | 0.462 ns | 25.84 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 2. AVX | Empty | 128 | 26.15 ns | 0.113 ns | 0.106 ns | 26.16 ns | 1.01 | 0.02 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 128 | 25.57 ns | 0.078 ns | 0.061 ns | 25.56 ns | 0.99 | 0.02 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 97.47 ns | 0.327 ns | 0.289 ns | 97.35 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Slice3 | 2. AVX | Empty | 256 | 32.61 ns | 0.107 ns | 0.095 ns | 32.62 ns | 0.33 | 0.00 | - | - | - | - |
// | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 256 | 33.21 ns | 0.169 ns | 0.150 ns | 33.15 ns | 0.34 | 0.00 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 52.34 ns | 0.779 ns | 0.729 ns | 51.94 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 2. AVX | Empty | 256 | 32.16 ns | 0.111 ns | 0.104 ns | 32.16 ns | 0.61 | 0.01 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 256 | 33.61 ns | 0.342 ns | 0.319 ns | 33.62 ns | 0.64 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 210.74 ns | 3.825 ns | 5.956 ns | 207.70 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Slice3 | 2. AVX | Empty | 512 | 51.03 ns | 0.535 ns | 0.501 ns | 51.18 ns | 0.24 | 0.01 | - | - | - | - |
// | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 512 | 66.60 ns | 1.313 ns | 1.613 ns | 65.93 ns | 0.31 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 119.12 ns | 1.905 ns | 1.689 ns | 118.52 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 2. AVX | Empty | 512 | 50.33 ns | 0.382 ns | 0.339 ns | 50.41 ns | 0.42 | 0.01 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 512 | 49.25 ns | 0.555 ns | 0.492 ns | 49.26 ns | 0.41 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 423.55 ns | 4.891 ns | 4.336 ns | 423.27 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Slice3 | 2. AVX | Empty | 1024 | 77.13 ns | 1.355 ns | 2.264 ns | 76.19 ns | 0.19 | 0.01 | - | - | - | - |
// | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 79.39 ns | 0.103 ns | 0.086 ns | 79.37 ns | 0.19 | 0.00 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 226.57 ns | 2.930 ns | 2.598 ns | 226.10 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 2. AVX | Empty | 1024 | 80.25 ns | 1.647 ns | 2.082 ns | 80.98 ns | 0.35 | 0.01 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 84.99 ns | 1.234 ns | 1.155 ns | 85.60 ns | 0.38 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 794.96 ns | 1.735 ns | 1.538 ns | 795.15 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Slice3 | 2. AVX | Empty | 2048 | 128.41 ns | 0.417 ns | 0.390 ns | 128.24 ns | 0.16 | 0.00 | - | - | - | - |
// | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 127.24 ns | 0.294 ns | 0.229 ns | 127.23 ns | 0.16 | 0.00 | - | - | - | - |
// | | | | | | | | | | | | | | |
// | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 382.97 ns | 1.064 ns | 0.831 ns | 382.87 ns | 1.00 | 0.00 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 2. AVX | Empty | 2048 | 126.93 ns | 0.382 ns | 0.339 ns | 126.94 ns | 0.33 | 0.00 | - | - | - | - |
// | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 149.36 ns | 1.875 ns | 1.754 ns | 149.33 ns | 0.39 | 0.00 | - | - | - | - |
}

2
tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs

@ -26,7 +26,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
[Benchmark]
public void Shuffle4Channel()
{
SimdUtils.Shuffle4Channel<WXYZShuffle4>(this.source, this.destination, default);
SimdUtils.Shuffle4<WXYZShuffle4>(this.source, this.destination, default);
}
}

4
tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs

@ -10,7 +10,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
[Config(typeof(Config.HwIntrinsics_SSE_AVX))]
public class ShuffleFloat4Channel
{
private static readonly byte control = default(WXYZShuffle4).Control;
private static readonly byte Control = default(WXYZShuffle4).Control;
private float[] source;
private float[] destination;
@ -27,7 +27,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
[Benchmark]
public void Shuffle4Channel()
{
SimdUtils.Shuffle4Channel(this.source, this.destination, control);
SimdUtils.Shuffle4(this.source, this.destination, Control);
}
}

65
tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgb24.cs

@ -0,0 +1,65 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
using BenchmarkDotNet.Attributes;
using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.PixelFormats;
namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk
{
[Config(typeof(Config.ShortClr))]
public class ToVector4_Rgb24 : ToVector4<Rgb24>
{
[Benchmark(Baseline = true)]
public void PixelOperations_Base()
{
new PixelOperations<Rgb24>().ToVector4(
this.Configuration,
this.source.GetSpan(),
this.destination.GetSpan());
}
}
}
// 2020-11-02
// ##########
//
// BenchmarkDotNet = v0.12.1, OS = Windows 10.0.19041.572(2004 /?/ 20H1)
// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores
// .NET Core SDK=3.1.403
// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
// Job-XYEQXL : .NET Framework 4.8 (4.8.4250.0), X64 RyuJIT
// Job-HSXNJV : .NET Core 2.1.23 (CoreCLR 4.6.29321.03, CoreFX 4.6.29321.01), X64 RyuJIT
// Job-YUREJO : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT
//
// IterationCount=3 LaunchCount=1 WarmupCount=3
//
// | Method | Job | Runtime | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated |
// |---------------------------- |----------- |-------------- |------ |-----------:|------------:|----------:|------:|--------:|-------:|------:|------:|----------:|
// | PixelOperations_Base | Job-OIBEDX | .NET 4.7.2 | 64 | 298.4 ns | 33.63 ns | 1.84 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B |
// | PixelOperations_Specialized | Job-OIBEDX | .NET 4.7.2 | 64 | 355.5 ns | 908.51 ns | 49.80 ns | 1.19 | 0.17 | - | - | - | - |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-OPAORC | .NET Core 2.1 | 64 | 220.1 ns | 13.77 ns | 0.75 ns | 1.00 | 0.00 | 0.0055 | - | - | 24 B |
// | PixelOperations_Specialized | Job-OPAORC | .NET Core 2.1 | 64 | 228.5 ns | 41.41 ns | 2.27 ns | 1.04 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-VPSIRL | .NET Core 3.1 | 64 | 213.6 ns | 12.47 ns | 0.68 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B |
// | PixelOperations_Specialized | Job-VPSIRL | .NET Core 3.1 | 64 | 217.0 ns | 9.95 ns | 0.55 ns | 1.02 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-OIBEDX | .NET 4.7.2 | 256 | 829.0 ns | 242.93 ns | 13.32 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B |
// | PixelOperations_Specialized | Job-OIBEDX | .NET 4.7.2 | 256 | 448.9 ns | 4.04 ns | 0.22 ns | 0.54 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-OPAORC | .NET Core 2.1 | 256 | 863.0 ns | 1,253.26 ns | 68.70 ns | 1.00 | 0.00 | 0.0048 | - | - | 24 B |
// | PixelOperations_Specialized | Job-OPAORC | .NET Core 2.1 | 256 | 309.2 ns | 66.16 ns | 3.63 ns | 0.36 | 0.03 | - | - | - | - |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-VPSIRL | .NET Core 3.1 | 256 | 737.0 ns | 253.90 ns | 13.92 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B |
// | PixelOperations_Specialized | Job-VPSIRL | .NET Core 3.1 | 256 | 212.3 ns | 1.07 ns | 0.06 ns | 0.29 | 0.01 | - | - | - | - |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-OIBEDX | .NET 4.7.2 | 2048 | 5,625.6 ns | 404.35 ns | 22.16 ns | 1.00 | 0.00 | - | - | - | 24 B |
// | PixelOperations_Specialized | Job-OIBEDX | .NET 4.7.2 | 2048 | 1,974.1 ns | 229.84 ns | 12.60 ns | 0.35 | 0.00 | - | - | - | - |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-OPAORC | .NET Core 2.1 | 2048 | 5,467.2 ns | 537.29 ns | 29.45 ns | 1.00 | 0.00 | - | - | - | 24 B |
// | PixelOperations_Specialized | Job-OPAORC | .NET Core 2.1 | 2048 | 1,985.5 ns | 4,714.23 ns | 258.40 ns | 0.36 | 0.05 | - | - | - | - |
// | | | | | | | | | | | | | |
// | PixelOperations_Base | Job-VPSIRL | .NET Core 3.1 | 2048 | 5,888.2 ns | 1,622.23 ns | 88.92 ns | 1.00 | 0.00 | - | - | - | 24 B |
// | PixelOperations_Specialized | Job-VPSIRL | .NET Core 3.1 | 2048 | 1,165.0 ns | 191.71 ns | 10.51 ns | 0.20 | 0.00 | - | - | - | - |

340
tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs

@ -22,7 +22,7 @@ namespace SixLabors.ImageSharp.Tests.Common
TestShuffleFloat4Channel(
size,
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, control),
(s, d) => SimdUtils.Shuffle4(s.Span, d.Span, control),
control);
}
@ -39,56 +39,51 @@ namespace SixLabors.ImageSharp.Tests.Common
static void RunTest(string serialized)
{
int size = FeatureTestRunner.Deserialize<int>(serialized);
foreach (var item in ArraySizesDivisibleBy4)
{
// These cannot be expressed as a theory as you cannot
// use RemoteExecutor within generic methods nor pass
// IComponentShuffle to the generic utils method.
foreach (var count in item)
{
WXYZShuffle4 wxyz = default;
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, wxyz),
wxyz.Control);
WZYXShuffle4 wzyx = default;
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, wzyx),
wzyx.Control);
YZWXShuffle4 yzwx = default;
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, yzwx),
yzwx.Control);
ZYXWShuffle4 zyxw = default;
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, zyxw),
zyxw.Control);
var xwyz = new DefaultShuffle4(2, 1, 3, 0);
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, xwyz),
xwyz.Control);
var yyyy = new DefaultShuffle4(1, 1, 1, 1);
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, yyyy),
yyyy.Control);
var wwww = new DefaultShuffle4(3, 3, 3, 3);
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4Channel(s.Span, d.Span, wwww),
wwww.Control);
}
}
// These cannot be expressed as a theory as you cannot
// use RemoteExecutor within generic methods nor pass
// IShuffle4 to the generic utils method.
WXYZShuffle4 wxyz = default;
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wxyz),
wxyz.Control);
WZYXShuffle4 wzyx = default;
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wzyx),
wzyx.Control);
YZWXShuffle4 yzwx = default;
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yzwx),
yzwx.Control);
ZYXWShuffle4 zyxw = default;
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4(s.Span, d.Span, zyxw),
zyxw.Control);
var xwyz = new DefaultShuffle4(2, 1, 3, 0);
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4(s.Span, d.Span, xwyz),
xwyz.Control);
var yyyy = new DefaultShuffle4(1, 1, 1, 1);
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yyyy),
yyyy.Control);
var wwww = new DefaultShuffle4(3, 3, 3, 3);
TestShuffleByte4Channel(
size,
(s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wwww),
wwww.Control);
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
@ -97,6 +92,132 @@ namespace SixLabors.ImageSharp.Tests.Common
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE);
}
[Theory]
[MemberData(nameof(ArraySizesDivisibleBy3))]
public void BulkShuffleByte3Channel(int count)
{
static void RunTest(string serialized)
{
int size = FeatureTestRunner.Deserialize<int>(serialized);
// These cannot be expressed as a theory as you cannot
// use RemoteExecutor within generic methods nor pass
// IShuffle3 to the generic utils method.
var zyx = new DefaultShuffle3(0, 1, 2);
TestShuffleByte3Channel(
size,
(s, d) => SimdUtils.Shuffle3(s.Span, d.Span, zyx),
zyx.Control);
var xyz = new DefaultShuffle3(2, 1, 0);
TestShuffleByte3Channel(
size,
(s, d) => SimdUtils.Shuffle3(s.Span, d.Span, xyz),
xyz.Control);
var yyy = new DefaultShuffle3(1, 1, 1);
TestShuffleByte3Channel(
size,
(s, d) => SimdUtils.Shuffle3(s.Span, d.Span, yyy),
yyy.Control);
var zzz = new DefaultShuffle3(2, 2, 2);
TestShuffleByte3Channel(
size,
(s, d) => SimdUtils.Shuffle3(s.Span, d.Span, zzz),
zzz.Control);
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
count,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE);
}
[Theory]
[MemberData(nameof(ArraySizesDivisibleBy3))]
public void BulkPad3Shuffle4Channel(int count)
{
static void RunTest(string serialized)
{
int size = FeatureTestRunner.Deserialize<int>(serialized);
// These cannot be expressed as a theory as you cannot
// use RemoteExecutor within generic methods nor pass
// IPad3Shuffle4 to the generic utils method.
XYZWPad3Shuffle4 xyzw = default;
TestPad3Shuffle4Channel(
size,
(s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, xyzw),
xyzw.Control);
var xwyz = new DefaultPad3Shuffle4(2, 1, 3, 0);
TestPad3Shuffle4Channel(
size,
(s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, xwyz),
xwyz.Control);
var yyyy = new DefaultPad3Shuffle4(1, 1, 1, 1);
TestPad3Shuffle4Channel(
size,
(s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, yyyy),
yyyy.Control);
var wwww = new DefaultPad3Shuffle4(3, 3, 3, 3);
TestPad3Shuffle4Channel(
size,
(s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, wwww),
wwww.Control);
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
count,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE);
}
[Theory]
[MemberData(nameof(ArraySizesDivisibleBy4))]
public void BulkShuffle4Slice3Channel(int count)
{
static void RunTest(string serialized)
{
int size = FeatureTestRunner.Deserialize<int>(serialized);
// These cannot be expressed as a theory as you cannot
// use RemoteExecutor within generic methods nor pass
// IShuffle4Slice3 to the generic utils method.
XYZWShuffle4Slice3 xyzw = default;
TestShuffle4Slice3Channel(
size,
(s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, xyzw),
xyzw.Control);
var xwyz = new DefaultShuffle4Slice3(2, 1, 3, 0);
TestShuffle4Slice3Channel(
size,
(s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, xwyz),
xwyz.Control);
var yyyy = new DefaultShuffle4Slice3(1, 1, 1, 1);
TestShuffle4Slice3Channel(
size,
(s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, yyyy),
yyyy.Control);
var wwww = new DefaultShuffle4Slice3(3, 3, 3, 3);
TestShuffle4Slice3Channel(
size,
(s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, wwww),
wwww.Control);
}
FeatureTestRunner.RunWithHwIntrinsicsFeature(
RunTest,
count,
HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE);
}
private static void TestShuffleFloat4Channel(
int count,
Action<Memory<float>, Memory<float>> convert,
@ -157,5 +278,122 @@ namespace SixLabors.ImageSharp.Tests.Common
Assert.Equal(expected, result);
}
private static void TestShuffleByte3Channel(
int count,
Action<Memory<byte>, Memory<byte>> convert,
byte control)
{
byte[] source = new byte[count];
new Random(count).NextBytes(source);
var result = new byte[count];
byte[] expected = new byte[count];
SimdUtils.Shuffle.InverseMmShuffle(
control,
out int _,
out int p2,
out int p1,
out int p0);
for (int i = 0; i < expected.Length; i += 3)
{
expected[i] = source[p0 + i];
expected[i + 1] = source[p1 + i];
expected[i + 2] = source[p2 + i];
}
convert(source, result);
Assert.Equal(expected, result);
}
private static void TestPad3Shuffle4Channel(
int count,
Action<Memory<byte>, Memory<byte>> convert,
byte control)
{
byte[] source = new byte[count];
new Random(count).NextBytes(source);
var result = new byte[count * 4 / 3];
byte[] expected = new byte[result.Length];
SimdUtils.Shuffle.InverseMmShuffle(
control,
out int p3,
out int p2,
out int p1,
out int p0);
for (int i = 0, j = 0; i < expected.Length; i += 4, j += 3)
{
expected[p0 + i] = source[j];
expected[p1 + i] = source[j + 1];
expected[p2 + i] = source[j + 2];
expected[p3 + i] = byte.MaxValue;
}
Span<byte> temp = stackalloc byte[4];
for (int i = 0, j = 0; i < expected.Length; i += 4, j += 3)
{
temp[0] = source[j];
temp[1] = source[j + 1];
temp[2] = source[j + 2];
temp[3] = byte.MaxValue;
expected[i] = temp[p0];
expected[i + 1] = temp[p1];
expected[i + 2] = temp[p2];
expected[i + 3] = temp[p3];
}
convert(source, result);
for (int i = 0; i < expected.Length; i++)
{
Assert.Equal(expected[i], result[i]);
}
Assert.Equal(expected, result);
}
private static void TestShuffle4Slice3Channel(
int count,
Action<Memory<byte>, Memory<byte>> convert,
byte control)
{
byte[] source = new byte[count];
new Random(count).NextBytes(source);
var result = new byte[count * 3 / 4];
byte[] expected = new byte[result.Length];
SimdUtils.Shuffle.InverseMmShuffle(
control,
out int _,
out int p2,
out int p1,
out int p0);
for (int i = 0, j = 0; i < expected.Length; i += 3, j += 4)
{
expected[i] = source[p0 + j];
expected[i + 1] = source[p1 + j];
expected[i + 2] = source[p2 + j];
}
convert(source, result);
for (int i = 0; i < expected.Length; i++)
{
Assert.Equal(expected[i], result[i]);
}
Assert.Equal(expected, result);
}
}
}

2
tests/ImageSharp.Tests/Common/SimdUtilsTests.cs

@ -163,7 +163,7 @@ namespace SixLabors.ImageSharp.Tests.Common
public static readonly TheoryData<int> ArraySizesDivisibleBy8 = new TheoryData<int> { 0, 8, 16, 1024 };
public static readonly TheoryData<int> ArraySizesDivisibleBy4 = new TheoryData<int> { 0, 4, 8, 28, 1020 };
public static readonly TheoryData<int> ArraySizesDivisibleBy3 = new TheoryData<int> { 0, 3, 9, 36, 957 };
public static readonly TheoryData<int> ArraySizesDivisibleBy32 = new TheoryData<int> { 0, 32, 512 };
public static readonly TheoryData<int> ArbitraryArraySizes =

Loading…
Cancel
Save