diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 0e093a8347..c8f3997946 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -17,11 +17,11 @@ jobs: - os: ubuntu-latest framework: netcoreapp3.1 runtime: -x64 - codecov: false + codecov: true - os: windows-latest framework: netcoreapp3.1 runtime: -x64 - codecov: true + codecov: false - os: windows-latest framework: netcoreapp2.1 runtime: -x64 diff --git a/.runsettings b/.runsettings new file mode 100644 index 0000000000..ca48342bd6 --- /dev/null +++ b/.runsettings @@ -0,0 +1,7 @@ + + + + + category!=failing + + diff --git a/Directory.Build.props b/Directory.Build.props index 0f9c5bdde2..bb97810a8f 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -15,6 +15,7 @@ $(MSBuildThisFileDirectory)artifacts/ $(SixLaborsProjectCategory)/$(MSBuildProjectName) https://github.com/SixLabors/ImageSharp/ + $(MSBuildThisFileDirectory)/.runsettings @@ -120,6 +121,7 @@ https://api.nuget.org/v3/index.json; https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-eng/nuget/v3/index.json; + https://www.myget.org/F/coverlet-dev/api/v3/index.json; true $(MSBuildThisFileDirectory)shared-infrastructure/SixLabors.snk diff --git a/Directory.Build.targets b/Directory.Build.targets index 4e7ab9e6b7..2a7d25b977 100644 --- a/Directory.Build.targets +++ b/Directory.Build.targets @@ -18,22 +18,18 @@ - + - + - - + diff --git a/ImageSharp.sln b/ImageSharp.sln index 509dcf96bf..b1d3176ad2 100644 --- a/ImageSharp.sln +++ b/ImageSharp.sln @@ -9,6 +9,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution .gitattributes = .gitattributes .gitignore = .gitignore .gitmodules = .gitmodules + .runsettings = .runsettings ci-build.ps1 = ci-build.ps1 ci-pack.ps1 = ci-pack.ps1 ci-test.ps1 = ci-test.ps1 diff --git a/src/Directory.Build.targets b/src/Directory.Build.targets index d1875262d3..9b8be05b56 100644 --- a/src/Directory.Build.targets +++ b/src/Directory.Build.targets @@ -21,16 +21,25 @@ - + + $([System.IO.Path]::Combine('$(IntermediateOutputPath)','$(TargetFrameworkMoniker).AssemblyAttributes$(DefaultLanguageSourceExtension)')) + + + + + + + + + DependsOnTargets="InitializeSourceRootMappedPaths" + Returns="@(_LocalTopLevelSourceRoot)" + Condition="'$(DeterministicSourcePaths)' == 'true'"> <_LocalTopLevelSourceRoot Include="@(SourceRoot)" Condition="'%(SourceRoot.NestedRoot)' == ''"/> - + false @@ -62,7 +71,7 @@ - + @@ -74,7 +83,7 @@ SkipUnchangedFiles = "true" DestinationFolder="..\..\" /> - + - + diff --git a/src/ImageSharp/Common/Helpers/ImageMaths.cs b/src/ImageSharp/Common/Helpers/ImageMaths.cs index 977432f8bb..d24230fe18 100644 --- a/src/ImageSharp/Common/Helpers/ImageMaths.cs +++ b/src/ImageSharp/Common/Helpers/ImageMaths.cs @@ -132,6 +132,12 @@ namespace SixLabors.ImageSharp return (a / GreatestCommonDivisor(a, b)) * b; } + /// + /// Calculates % 2 + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static int Modulo2(int x) => x & 1; + /// /// Calculates % 4 /// diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs new file mode 100644 index 0000000000..7687a5b95f --- /dev/null +++ b/src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs @@ -0,0 +1,193 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Buffers.Binary; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// The JIT can detect and optimize rotation idioms ROTL (Rotate Left) +// and ROTR (Rotate Right) emitting efficient CPU instructions: +// https://github.com/dotnet/coreclr/pull/1830 +namespace SixLabors.ImageSharp +{ + /// + /// Defines the contract for methods that allow the shuffling of pixel components. + /// Used for shuffling on platforms that do not support Hardware Intrinsics. + /// + internal interface IComponentShuffle + { + /// + /// Gets the shuffle control. + /// + byte Control { get; } + + /// + /// Shuffle 8-bit integers within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + void RunFallbackShuffle(ReadOnlySpan source, Span dest); + } + + /// + internal interface IShuffle4 : IComponentShuffle + { + } + + internal readonly struct DefaultShuffle4 : IShuffle4 + { + private readonly byte p3; + private readonly byte p2; + private readonly byte p1; + private readonly byte p0; + + public DefaultShuffle4(byte p3, byte p2, byte p1, byte p0) + { + DebugGuard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); + DebugGuard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); + DebugGuard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); + DebugGuard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); + + this.p3 = p3; + this.p2 = p2; + this.p1 = p1; + this.p0 = p0; + this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0); + } + + public byte Control { get; } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); + + int p3 = this.p3; + int p2 = this.p2; + int p1 = this.p1; + int p0 = this.p0; + + for (int i = 0; i < source.Length; i += 4) + { + Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i); + Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i); + Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i); + Unsafe.Add(ref dBase, i + 3) = Unsafe.Add(ref sBase, p3 + i); + } + } + } + + internal readonly struct WXYZShuffle4 : IShuffle4 + { + public byte Control + { + [MethodImpl(InliningOptions.ShortMethod)] + get => SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3); + } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); + ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + int n = source.Length / 4; + + for (int i = 0; i < n; i++) + { + uint packed = Unsafe.Add(ref sBase, i); + + // packed = [W Z Y X] + // ROTL(8, packed) = [Z Y X W] + Unsafe.Add(ref dBase, i) = (packed << 8) | (packed >> 24); + } + } + } + + internal readonly struct WZYXShuffle4 : IShuffle4 + { + public byte Control + { + [MethodImpl(InliningOptions.ShortMethod)] + get => SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3); + } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); + ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + int n = source.Length / 4; + + for (int i = 0; i < n; i++) + { + uint packed = Unsafe.Add(ref sBase, i); + + // packed = [W Z Y X] + // REVERSE(packedArgb) = [X Y Z W] + Unsafe.Add(ref dBase, i) = BinaryPrimitives.ReverseEndianness(packed); + } + } + } + + internal readonly struct YZWXShuffle4 : IShuffle4 + { + public byte Control + { + [MethodImpl(InliningOptions.ShortMethod)] + get => SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1); + } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); + ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + int n = source.Length / 4; + + for (int i = 0; i < n; i++) + { + uint packed = Unsafe.Add(ref sBase, i); + + // packed = [W Z Y X] + // ROTR(8, packedArgb) = [Y Z W X] + Unsafe.Add(ref dBase, i) = (packed >> 8) | (packed << 24); + } + } + } + + internal readonly struct ZYXWShuffle4 : IShuffle4 + { + public byte Control + { + [MethodImpl(InliningOptions.ShortMethod)] + get => SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2); + } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); + ref uint dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + int n = source.Length / 4; + + for (int i = 0; i < n; i++) + { + uint packed = Unsafe.Add(ref sBase, i); + + // packed = [W Z Y X] + // tmp1 = [W 0 Y 0] + // tmp2 = [0 Z 0 X] + // tmp3=ROTL(16, tmp2) = [0 X 0 Z] + // tmp1 + tmp3 = [W X Y Z] + uint tmp1 = packed & 0xFF00FF00; + uint tmp2 = packed & 0x00FF00FF; + uint tmp3 = (tmp2 << 16) | (tmp2 >> 16); + + Unsafe.Add(ref dBase, i) = tmp1 + tmp3; + } + } + } +} diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs new file mode 100644 index 0000000000..0c2b1d5082 --- /dev/null +++ b/src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs @@ -0,0 +1,103 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace SixLabors.ImageSharp +{ + /// + internal interface IPad3Shuffle4 : IComponentShuffle + { + } + + internal readonly struct DefaultPad3Shuffle4 : IPad3Shuffle4 + { + private readonly byte p3; + private readonly byte p2; + private readonly byte p1; + private readonly byte p0; + + public DefaultPad3Shuffle4(byte p3, byte p2, byte p1, byte p0) + { + DebugGuard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); + DebugGuard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); + DebugGuard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); + DebugGuard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); + + this.p3 = p3; + this.p2 = p2; + this.p1 = p1; + this.p0 = p0; + this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0); + } + + public byte Control { get; } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); + + int p3 = this.p3; + int p2 = this.p2; + int p1 = this.p1; + int p0 = this.p0; + + Span temp = stackalloc byte[4]; + ref byte t = ref MemoryMarshal.GetReference(temp); + ref uint tu = ref Unsafe.As(ref t); + + for (int i = 0, j = 0; i < source.Length; i += 3, j += 4) + { + ref var s = ref Unsafe.Add(ref sBase, i); + tu = Unsafe.As(ref s) | 0xFF000000; + + Unsafe.Add(ref dBase, j) = Unsafe.Add(ref t, p0); + Unsafe.Add(ref dBase, j + 1) = Unsafe.Add(ref t, p1); + Unsafe.Add(ref dBase, j + 2) = Unsafe.Add(ref t, p2); + Unsafe.Add(ref dBase, j + 3) = Unsafe.Add(ref t, p3); + } + } + } + + internal readonly struct XYZWPad3Shuffle4 : IPad3Shuffle4 + { + public byte Control + { + [MethodImpl(InliningOptions.ShortMethod)] + get => SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0); + } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); + + ref byte sEnd = ref Unsafe.Add(ref sBase, source.Length); + ref byte sLoopEnd = ref Unsafe.Subtract(ref sEnd, 4); + + while (Unsafe.IsAddressLessThan(ref sBase, ref sLoopEnd)) + { + Unsafe.As(ref dBase) = Unsafe.As(ref sBase) | 0xFF000000; + + sBase = ref Unsafe.Add(ref sBase, 3); + dBase = ref Unsafe.Add(ref dBase, 4); + } + + while (Unsafe.IsAddressLessThan(ref sBase, ref sEnd)) + { + Unsafe.Add(ref dBase, 0) = Unsafe.Add(ref sBase, 0); + Unsafe.Add(ref dBase, 1) = Unsafe.Add(ref sBase, 1); + Unsafe.Add(ref dBase, 2) = Unsafe.Add(ref sBase, 2); + Unsafe.Add(ref dBase, 3) = byte.MaxValue; + + sBase = ref Unsafe.Add(ref sBase, 3); + dBase = ref Unsafe.Add(ref dBase, 4); + } + } + } +} diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs new file mode 100644 index 0000000000..61e99890e7 --- /dev/null +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs @@ -0,0 +1,53 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace SixLabors.ImageSharp +{ + /// + internal interface IShuffle3 : IComponentShuffle + { + } + + internal readonly struct DefaultShuffle3 : IShuffle3 + { + private readonly byte p2; + private readonly byte p1; + private readonly byte p0; + + public DefaultShuffle3(byte p2, byte p1, byte p0) + { + DebugGuard.MustBeBetweenOrEqualTo(p2, 0, 2, nameof(p2)); + DebugGuard.MustBeBetweenOrEqualTo(p1, 0, 2, nameof(p1)); + DebugGuard.MustBeBetweenOrEqualTo(p0, 0, 2, nameof(p0)); + + this.p2 = p2; + this.p1 = p1; + this.p0 = p0; + this.Control = SimdUtils.Shuffle.MmShuffle(3, p2, p1, p0); + } + + public byte Control { get; } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); + + int p2 = this.p2; + int p1 = this.p1; + int p0 = this.p0; + + for (int i = 0; i < source.Length; i += 3) + { + Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i); + Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i); + Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i); + } + } + } +} diff --git a/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs new file mode 100644 index 0000000000..86e4174f11 --- /dev/null +++ b/src/ImageSharp/Common/Helpers/Shuffle/IShuffle4Slice3.cs @@ -0,0 +1,101 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace SixLabors.ImageSharp +{ + /// + internal interface IShuffle4Slice3 : IComponentShuffle + { + } + + internal readonly struct DefaultShuffle4Slice3 : IShuffle4Slice3 + { + private readonly byte p2; + private readonly byte p1; + private readonly byte p0; + + public DefaultShuffle4Slice3(byte p3, byte p2, byte p1, byte p0) + { + DebugGuard.MustBeBetweenOrEqualTo(p3, 0, 3, nameof(p3)); + DebugGuard.MustBeBetweenOrEqualTo(p2, 0, 3, nameof(p2)); + DebugGuard.MustBeBetweenOrEqualTo(p1, 0, 3, nameof(p1)); + DebugGuard.MustBeBetweenOrEqualTo(p0, 0, 3, nameof(p0)); + + this.p2 = p2; + this.p1 = p1; + this.p0 = p0; + this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0); + } + + public byte Control { get; } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref byte sBase = ref MemoryMarshal.GetReference(source); + ref byte dBase = ref MemoryMarshal.GetReference(dest); + + int p2 = this.p2; + int p1 = this.p1; + int p0 = this.p0; + + for (int i = 0, j = 0; i < dest.Length; i += 3, j += 4) + { + Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + j); + Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + j); + Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + j); + } + } + } + + internal readonly struct XYZWShuffle4Slice3 : IShuffle4Slice3 + { + public byte Control + { + [MethodImpl(InliningOptions.ShortMethod)] + get => SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0); + } + + [MethodImpl(InliningOptions.ShortMethod)] + public void RunFallbackShuffle(ReadOnlySpan source, Span dest) + { + ref uint sBase = ref Unsafe.As(ref MemoryMarshal.GetReference(source)); + ref Byte3 dBase = ref Unsafe.As(ref MemoryMarshal.GetReference(dest)); + + int n = source.Length / 4; + int m = ImageMaths.Modulo4(n); + int u = n - m; + + ref uint sLoopEnd = ref Unsafe.Add(ref sBase, u); + ref uint sEnd = ref Unsafe.Add(ref sBase, n); + + while (Unsafe.IsAddressLessThan(ref sBase, ref sLoopEnd)) + { + Unsafe.Add(ref dBase, 0) = Unsafe.As(ref Unsafe.Add(ref sBase, 0)); + Unsafe.Add(ref dBase, 1) = Unsafe.As(ref Unsafe.Add(ref sBase, 1)); + Unsafe.Add(ref dBase, 2) = Unsafe.As(ref Unsafe.Add(ref sBase, 2)); + Unsafe.Add(ref dBase, 3) = Unsafe.As(ref Unsafe.Add(ref sBase, 3)); + + sBase = ref Unsafe.Add(ref sBase, 4); + dBase = ref Unsafe.Add(ref dBase, 4); + } + + while (Unsafe.IsAddressLessThan(ref sBase, ref sEnd)) + { + Unsafe.Add(ref dBase, 0) = Unsafe.As(ref Unsafe.Add(ref sBase, 0)); + + sBase = ref Unsafe.Add(ref sBase, 1); + dBase = ref Unsafe.Add(ref dBase, 1); + } + } + } + + [StructLayout(LayoutKind.Explicit, Size = 3)] + internal readonly struct Byte3 + { + } +} diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs deleted file mode 100644 index b56c92dab7..0000000000 --- a/src/ImageSharp/Common/Helpers/SimdUtils.Avx2Intrinsics.cs +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -#if SUPPORTS_RUNTIME_INTRINSICS - -using System; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; - -namespace SixLabors.ImageSharp -{ - internal static partial class SimdUtils - { - public static class Avx2Intrinsics - { - private static ReadOnlySpan PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 }; - - /// - /// as many elements as possible, slicing them down (keeping the remainder). - /// - [MethodImpl(InliningOptions.ShortMethod)] - internal static void NormalizedFloatToByteSaturateReduce( - ref ReadOnlySpan source, - ref Span dest) - { - DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); - - if (Avx2.IsSupported) - { - int remainder = ImageMaths.ModuloP2(source.Length, Vector.Count); - int adjustedCount = source.Length - remainder; - - if (adjustedCount > 0) - { - NormalizedFloatToByteSaturate( - source.Slice(0, adjustedCount), - dest.Slice(0, adjustedCount)); - - source = source.Slice(adjustedCount); - dest = dest.Slice(adjustedCount); - } - } - } - - /// - /// Implementation of , which is faster on new .NET runtime. - /// - /// - /// Implementation is based on MagicScaler code: - /// https://github.com/saucecontrol/PhotoSauce/blob/a9bd6e5162d2160419f0cf743fd4f536c079170b/src/MagicScaler/Magic/Processors/ConvertersFloat.cs#L453-L477 - /// - internal static void NormalizedFloatToByteSaturate( - ReadOnlySpan source, - Span dest) - { - VerifySpanInput(source, dest, Vector256.Count); - - int n = dest.Length / Vector256.Count; - - ref Vector256 sourceBase = - ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); - ref Vector256 destBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); - - var maxBytes = Vector256.Create(255f); - ref byte maskBase = ref MemoryMarshal.GetReference(PermuteMaskDeinterleave8x32); - Vector256 mask = Unsafe.As>(ref maskBase); - - for (int i = 0; i < n; i++) - { - ref Vector256 s = ref Unsafe.Add(ref sourceBase, i * 4); - - Vector256 f0 = s; - Vector256 f1 = Unsafe.Add(ref s, 1); - Vector256 f2 = Unsafe.Add(ref s, 2); - Vector256 f3 = Unsafe.Add(ref s, 3); - - Vector256 w0 = ConvertToInt32(f0, maxBytes); - Vector256 w1 = ConvertToInt32(f1, maxBytes); - Vector256 w2 = ConvertToInt32(f2, maxBytes); - Vector256 w3 = ConvertToInt32(f3, maxBytes); - - Vector256 u0 = Avx2.PackSignedSaturate(w0, w1); - Vector256 u1 = Avx2.PackSignedSaturate(w2, w3); - Vector256 b = Avx2.PackUnsignedSaturate(u0, u1); - b = Avx2.PermuteVar8x32(b.AsInt32(), mask).AsByte(); - - Unsafe.Add(ref destBase, i) = b; - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector256 ConvertToInt32(Vector256 vf, Vector256 scale) - { - vf = Avx.Multiply(vf, scale); - return Avx.ConvertToVector256Int32(vf); - } - } - } -} -#endif diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs new file mode 100644 index 0000000000..2ea7f2c9bd --- /dev/null +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -0,0 +1,795 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +#if SUPPORTS_RUNTIME_INTRINSICS +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; + +namespace SixLabors.ImageSharp +{ + internal static partial class SimdUtils + { + public static class HwIntrinsics + { + public static ReadOnlySpan PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 }; + + public static ReadOnlySpan PermuteMaskEvenOdd8x32 => new byte[] { 0, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0 }; + + private static ReadOnlySpan ShuffleMaskPad4Nx16 => new byte[] { 0, 1, 2, 0x80, 3, 4, 5, 0x80, 6, 7, 8, 0x80, 9, 10, 11, 0x80 }; + + private static ReadOnlySpan ShuffleMaskSlice4Nx16 => new byte[] { 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 0x80, 0x80, 0x80, 0x80 }; + + /// + /// Shuffle single-precision (32-bit) floating-point elements in + /// using the control and store the results in . + /// + /// The source span of floats. + /// The destination span of floats. + /// The byte control. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle4Reduce( + ref ReadOnlySpan source, + ref Span dest, + byte control) + { + if (Avx.IsSupported || Sse.IsSupported) + { + int remainder = Avx.IsSupported + ? ImageMaths.ModuloP2(source.Length, Vector256.Count) + : ImageMaths.ModuloP2(source.Length, Vector128.Count); + + int adjustedCount = source.Length - remainder; + + if (adjustedCount > 0) + { + Shuffle4( + source.Slice(0, adjustedCount), + dest.Slice(0, adjustedCount), + control); + + source = source.Slice(adjustedCount); + dest = dest.Slice(adjustedCount); + } + } + } + + /// + /// Shuffle 8-bit integers within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The byte control. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle4Reduce( + ref ReadOnlySpan source, + ref Span dest, + byte control) + { + if (Avx2.IsSupported || Ssse3.IsSupported) + { + int remainder = Avx2.IsSupported + ? ImageMaths.ModuloP2(source.Length, Vector256.Count) + : ImageMaths.ModuloP2(source.Length, Vector128.Count); + + int adjustedCount = source.Length - remainder; + + if (adjustedCount > 0) + { + Shuffle4( + source.Slice(0, adjustedCount), + dest.Slice(0, adjustedCount), + control); + + source = source.Slice(adjustedCount); + dest = dest.Slice(adjustedCount); + } + } + } + + /// + /// Shuffles 8-bit integer triplets within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The byte control. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle3Reduce( + ref ReadOnlySpan source, + ref Span dest, + byte control) + { + if (Ssse3.IsSupported) + { + int remainder = source.Length % (Vector128.Count * 3); + + int adjustedCount = source.Length - remainder; + + if (adjustedCount > 0) + { + Shuffle3( + source.Slice(0, adjustedCount), + dest.Slice(0, adjustedCount), + control); + + source = source.Slice(adjustedCount); + dest = dest.Slice(adjustedCount); + } + } + } + + /// + /// Pads then shuffles 8-bit integers within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The byte control. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Pad3Shuffle4Reduce( + ref ReadOnlySpan source, + ref Span dest, + byte control) + { + if (Ssse3.IsSupported) + { + int remainder = source.Length % (Vector128.Count * 3); + + int sourceCount = source.Length - remainder; + int destCount = sourceCount * 4 / 3; + + if (sourceCount > 0) + { + Pad3Shuffle4( + source.Slice(0, sourceCount), + dest.Slice(0, destCount), + control); + + source = source.Slice(sourceCount); + dest = dest.Slice(destCount); + } + } + } + + /// + /// Shuffles then slices 8-bit integers within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The byte control. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle4Slice3Reduce( + ref ReadOnlySpan source, + ref Span dest, + byte control) + { + if (Ssse3.IsSupported) + { + int remainder = source.Length % (Vector128.Count * 4); + + int sourceCount = source.Length - remainder; + int destCount = sourceCount * 3 / 4; + + if (sourceCount > 0) + { + Shuffle4Slice3( + source.Slice(0, sourceCount), + dest.Slice(0, destCount), + control); + + source = source.Slice(sourceCount); + dest = dest.Slice(destCount); + } + } + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void Shuffle4( + ReadOnlySpan source, + Span dest, + byte control) + { + if (Avx.IsSupported) + { + ref Vector256 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector256 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + int n = dest.Length / Vector256.Count; + int m = ImageMaths.Modulo4(n); + int u = n - m; + + for (int i = 0; i < u; i += 4) + { + ref Vector256 vd0 = ref Unsafe.Add(ref destBase, i); + ref Vector256 vs0 = ref Unsafe.Add(ref sourceBase, i); + + vd0 = Avx.Permute(vs0, control); + Unsafe.Add(ref vd0, 1) = Avx.Permute(Unsafe.Add(ref vs0, 1), control); + Unsafe.Add(ref vd0, 2) = Avx.Permute(Unsafe.Add(ref vs0, 2), control); + Unsafe.Add(ref vd0, 3) = Avx.Permute(Unsafe.Add(ref vs0, 3), control); + } + + if (m > 0) + { + for (int i = u; i < n; i++) + { + Unsafe.Add(ref destBase, i) = Avx.Permute(Unsafe.Add(ref sourceBase, i), control); + } + } + } + else + { + // Sse + ref Vector128 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector128 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + int n = dest.Length / Vector128.Count; + int m = ImageMaths.Modulo4(n); + int u = n - m; + + for (int i = 0; i < u; i += 4) + { + ref Vector128 vd0 = ref Unsafe.Add(ref destBase, i); + ref Vector128 vs0 = ref Unsafe.Add(ref sourceBase, i); + + vd0 = Sse.Shuffle(vs0, vs0, control); + + Vector128 vs1 = Unsafe.Add(ref vs0, 1); + Unsafe.Add(ref vd0, 1) = Sse.Shuffle(vs1, vs1, control); + + Vector128 vs2 = Unsafe.Add(ref vs0, 2); + Unsafe.Add(ref vd0, 2) = Sse.Shuffle(vs2, vs2, control); + + Vector128 vs3 = Unsafe.Add(ref vs0, 3); + Unsafe.Add(ref vd0, 3) = Sse.Shuffle(vs3, vs3, control); + } + + if (m > 0) + { + for (int i = u; i < n; i++) + { + Vector128 vs = Unsafe.Add(ref sourceBase, i); + Unsafe.Add(ref destBase, i) = Sse.Shuffle(vs, vs, control); + } + } + } + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void Shuffle4( + ReadOnlySpan source, + Span dest, + byte control) + { + if (Avx2.IsSupported) + { + // I've chosen to do this for convenience while we determine what + // shuffle controls to add to the library. + // We can add static ROS instances if need be in the future. + Span bytes = stackalloc byte[Vector256.Count]; + Shuffle.MmShuffleSpan(ref bytes, control); + Vector256 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + + ref Vector256 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector256 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + int n = dest.Length / Vector256.Count; + int m = ImageMaths.Modulo4(n); + int u = n - m; + + for (int i = 0; i < u; i += 4) + { + ref Vector256 vs0 = ref Unsafe.Add(ref sourceBase, i); + ref Vector256 vd0 = ref Unsafe.Add(ref destBase, i); + + vd0 = Avx2.Shuffle(vs0, vshuffle); + Unsafe.Add(ref vd0, 1) = Avx2.Shuffle(Unsafe.Add(ref vs0, 1), vshuffle); + Unsafe.Add(ref vd0, 2) = Avx2.Shuffle(Unsafe.Add(ref vs0, 2), vshuffle); + Unsafe.Add(ref vd0, 3) = Avx2.Shuffle(Unsafe.Add(ref vs0, 3), vshuffle); + } + + if (m > 0) + { + for (int i = u; i < n; i++) + { + Unsafe.Add(ref destBase, i) = Avx2.Shuffle(Unsafe.Add(ref sourceBase, i), vshuffle); + } + } + } + else + { + // Ssse3 + Span bytes = stackalloc byte[Vector128.Count]; + Shuffle.MmShuffleSpan(ref bytes, control); + Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + + ref Vector128 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector128 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + int n = dest.Length / Vector128.Count; + int m = ImageMaths.Modulo4(n); + int u = n - m; + + for (int i = 0; i < u; i += 4) + { + ref Vector128 vs0 = ref Unsafe.Add(ref sourceBase, i); + ref Vector128 vd0 = ref Unsafe.Add(ref destBase, i); + + vd0 = Ssse3.Shuffle(vs0, vshuffle); + Unsafe.Add(ref vd0, 1) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 1), vshuffle); + Unsafe.Add(ref vd0, 2) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 2), vshuffle); + Unsafe.Add(ref vd0, 3) = Ssse3.Shuffle(Unsafe.Add(ref vs0, 3), vshuffle); + } + + if (m > 0) + { + for (int i = u; i < n; i++) + { + Unsafe.Add(ref destBase, i) = Ssse3.Shuffle(Unsafe.Add(ref sourceBase, i), vshuffle); + } + } + } + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void Shuffle3( + ReadOnlySpan source, + Span dest, + byte control) + { + if (Ssse3.IsSupported) + { + ref byte vmaskBase = ref MemoryMarshal.GetReference(ShuffleMaskPad4Nx16); + Vector128 vmask = Unsafe.As>(ref vmaskBase); + ref byte vmaskoBase = ref MemoryMarshal.GetReference(ShuffleMaskSlice4Nx16); + Vector128 vmasko = Unsafe.As>(ref vmaskoBase); + Vector128 vmaske = Ssse3.AlignRight(vmasko, vmasko, 12); + + Span bytes = stackalloc byte[Vector128.Count]; + Shuffle.MmShuffleSpan(ref bytes, control); + Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + + ref Vector128 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector128 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + int n = source.Length / Vector128.Count; + + for (int i = 0; i < n; i += 3) + { + ref Vector128 vs = ref Unsafe.Add(ref sourceBase, i); + + Vector128 v0 = vs; + Vector128 v1 = Unsafe.Add(ref vs, 1); + Vector128 v2 = Unsafe.Add(ref vs, 2); + Vector128 v3 = Sse2.ShiftRightLogical128BitLane(v2, 4); + + v2 = Ssse3.AlignRight(v2, v1, 8); + v1 = Ssse3.AlignRight(v1, v0, 12); + + v0 = Ssse3.Shuffle(Ssse3.Shuffle(v0, vmask), vshuffle); + v1 = Ssse3.Shuffle(Ssse3.Shuffle(v1, vmask), vshuffle); + v2 = Ssse3.Shuffle(Ssse3.Shuffle(v2, vmask), vshuffle); + v3 = Ssse3.Shuffle(Ssse3.Shuffle(v3, vmask), vshuffle); + + v0 = Ssse3.Shuffle(v0, vmaske); + v1 = Ssse3.Shuffle(v1, vmasko); + v2 = Ssse3.Shuffle(v2, vmaske); + v3 = Ssse3.Shuffle(v3, vmasko); + + v0 = Ssse3.AlignRight(v1, v0, 4); + v3 = Ssse3.AlignRight(v3, v2, 12); + + v1 = Sse2.ShiftLeftLogical128BitLane(v1, 4); + v2 = Sse2.ShiftRightLogical128BitLane(v2, 4); + + v1 = Ssse3.AlignRight(v2, v1, 8); + + ref Vector128 vd = ref Unsafe.Add(ref destBase, i); + + vd = v0; + Unsafe.Add(ref vd, 1) = v1; + Unsafe.Add(ref vd, 2) = v3; + } + } + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void Pad3Shuffle4( + ReadOnlySpan source, + Span dest, + byte control) + { + if (Ssse3.IsSupported) + { + ref byte vmaskBase = ref MemoryMarshal.GetReference(ShuffleMaskPad4Nx16); + Vector128 vmask = Unsafe.As>(ref vmaskBase); + Vector128 vfill = Vector128.Create(0xff000000ff000000ul).AsByte(); + + Span bytes = stackalloc byte[Vector128.Count]; + Shuffle.MmShuffleSpan(ref bytes, control); + Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + + ref Vector128 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector128 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + int n = source.Length / Vector128.Count; + + for (int i = 0, j = 0; i < n; i += 3, j += 4) + { + ref Vector128 v0 = ref Unsafe.Add(ref sourceBase, i); + Vector128 v1 = Unsafe.Add(ref v0, 1); + Vector128 v2 = Unsafe.Add(ref v0, 2); + Vector128 v3 = Sse2.ShiftRightLogical128BitLane(v2, 4); + + v2 = Ssse3.AlignRight(v2, v1, 8); + v1 = Ssse3.AlignRight(v1, v0, 12); + + ref Vector128 vd = ref Unsafe.Add(ref destBase, j); + + vd = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v0, vmask), vfill), vshuffle); + Unsafe.Add(ref vd, 1) = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v1, vmask), vfill), vshuffle); + Unsafe.Add(ref vd, 2) = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v2, vmask), vfill), vshuffle); + Unsafe.Add(ref vd, 3) = Ssse3.Shuffle(Sse2.Or(Ssse3.Shuffle(v3, vmask), vfill), vshuffle); + } + } + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void Shuffle4Slice3( + ReadOnlySpan source, + Span dest, + byte control) + { + if (Ssse3.IsSupported) + { + ref byte vmaskoBase = ref MemoryMarshal.GetReference(ShuffleMaskSlice4Nx16); + Vector128 vmasko = Unsafe.As>(ref vmaskoBase); + Vector128 vmaske = Ssse3.AlignRight(vmasko, vmasko, 12); + + Span bytes = stackalloc byte[Vector128.Count]; + Shuffle.MmShuffleSpan(ref bytes, control); + Vector128 vshuffle = Unsafe.As>(ref MemoryMarshal.GetReference(bytes)); + + ref Vector128 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector128 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + int n = source.Length / Vector128.Count; + + for (int i = 0, j = 0; i < n; i += 4, j += 3) + { + ref Vector128 vs = ref Unsafe.Add(ref sourceBase, i); + + Vector128 v0 = vs; + Vector128 v1 = Unsafe.Add(ref vs, 1); + Vector128 v2 = Unsafe.Add(ref vs, 2); + Vector128 v3 = Unsafe.Add(ref vs, 3); + + v0 = Ssse3.Shuffle(Ssse3.Shuffle(v0, vshuffle), vmaske); + v1 = Ssse3.Shuffle(Ssse3.Shuffle(v1, vshuffle), vmasko); + v2 = Ssse3.Shuffle(Ssse3.Shuffle(v2, vshuffle), vmaske); + v3 = Ssse3.Shuffle(Ssse3.Shuffle(v3, vshuffle), vmasko); + + v0 = Ssse3.AlignRight(v1, v0, 4); + v3 = Ssse3.AlignRight(v3, v2, 12); + + v1 = Sse2.ShiftLeftLogical128BitLane(v1, 4); + v2 = Sse2.ShiftRightLogical128BitLane(v2, 4); + + v1 = Ssse3.AlignRight(v2, v1, 8); + + ref Vector128 vd = ref Unsafe.Add(ref destBase, j); + + vd = v0; + Unsafe.Add(ref vd, 1) = v1; + Unsafe.Add(ref vd, 2) = v3; + } + } + } + + /// + /// Performs a multiplication and an addition of the . + /// + /// The vector to add to the intermediate result. + /// The first vector to multiply. + /// The second vector to multiply. + /// The . + [MethodImpl(InliningOptions.ShortMethod)] + public static Vector256 MultiplyAdd( + in Vector256 va, + in Vector256 vm0, + in Vector256 vm1) + { + if (Fma.IsSupported) + { + return Fma.MultiplyAdd(vm1, vm0, va); + } + else + { + return Avx.Add(Avx.Multiply(vm0, vm1), va); + } + } + + /// + /// as many elements as possible, slicing them down (keeping the remainder). + /// + [MethodImpl(InliningOptions.ShortMethod)] + internal static void ByteToNormalizedFloatReduce( + ref ReadOnlySpan source, + ref Span dest) + { + DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); + + if (Avx2.IsSupported || Sse2.IsSupported) + { + int remainder; + if (Avx2.IsSupported) + { + remainder = ImageMaths.ModuloP2(source.Length, Vector256.Count); + } + else + { + remainder = ImageMaths.ModuloP2(source.Length, Vector128.Count); + } + + int adjustedCount = source.Length - remainder; + + if (adjustedCount > 0) + { + ByteToNormalizedFloat(source.Slice(0, adjustedCount), dest.Slice(0, adjustedCount)); + + source = source.Slice(adjustedCount); + dest = dest.Slice(adjustedCount); + } + } + } + + /// + /// Implementation , which is faster on new RyuJIT runtime. + /// + /// + /// Implementation is based on MagicScaler code: + /// https://github.com/saucecontrol/PhotoSauce/blob/b5811908041200488aa18fdfd17df5fc457415dc/src/MagicScaler/Magic/Processors/ConvertersFloat.cs#L80-L182 + /// + internal static unsafe void ByteToNormalizedFloat( + ReadOnlySpan source, + Span dest) + { + if (Avx2.IsSupported) + { + VerifySpanInput(source, dest, Vector256.Count); + + int n = dest.Length / Vector256.Count; + + byte* sourceBase = (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(source)); + + ref Vector256 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + var scale = Vector256.Create(1 / (float)byte.MaxValue); + + for (int i = 0; i < n; i++) + { + int si = Vector256.Count * i; + Vector256 i0 = Avx2.ConvertToVector256Int32(sourceBase + si); + Vector256 i1 = Avx2.ConvertToVector256Int32(sourceBase + si + Vector256.Count); + Vector256 i2 = Avx2.ConvertToVector256Int32(sourceBase + si + (Vector256.Count * 2)); + Vector256 i3 = Avx2.ConvertToVector256Int32(sourceBase + si + (Vector256.Count * 3)); + + Vector256 f0 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i0)); + Vector256 f1 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i1)); + Vector256 f2 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i2)); + Vector256 f3 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i3)); + + ref Vector256 d = ref Unsafe.Add(ref destBase, i * 4); + + d = f0; + Unsafe.Add(ref d, 1) = f1; + Unsafe.Add(ref d, 2) = f2; + Unsafe.Add(ref d, 3) = f3; + } + } + else + { + // Sse + VerifySpanInput(source, dest, Vector128.Count); + + int n = dest.Length / Vector128.Count; + + byte* sourceBase = (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(source)); + + ref Vector128 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + var scale = Vector128.Create(1 / (float)byte.MaxValue); + Vector128 zero = Vector128.Zero; + + for (int i = 0; i < n; i++) + { + int si = Vector128.Count * i; + + Vector128 i0, i1, i2, i3; + if (Sse41.IsSupported) + { + i0 = Sse41.ConvertToVector128Int32(sourceBase + si); + i1 = Sse41.ConvertToVector128Int32(sourceBase + si + Vector128.Count); + i2 = Sse41.ConvertToVector128Int32(sourceBase + si + (Vector128.Count * 2)); + i3 = Sse41.ConvertToVector128Int32(sourceBase + si + (Vector128.Count * 3)); + } + else + { + Vector128 b = Sse2.LoadVector128(sourceBase + si); + Vector128 s0 = Sse2.UnpackLow(b, zero).AsInt16(); + Vector128 s1 = Sse2.UnpackHigh(b, zero).AsInt16(); + + i0 = Sse2.UnpackLow(s0, zero.AsInt16()).AsInt32(); + i1 = Sse2.UnpackHigh(s0, zero.AsInt16()).AsInt32(); + i2 = Sse2.UnpackLow(s1, zero.AsInt16()).AsInt32(); + i3 = Sse2.UnpackHigh(s1, zero.AsInt16()).AsInt32(); + } + + Vector128 f0 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i0)); + Vector128 f1 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i1)); + Vector128 f2 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i2)); + Vector128 f3 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i3)); + + ref Vector128 d = ref Unsafe.Add(ref destBase, i * 4); + + d = f0; + Unsafe.Add(ref d, 1) = f1; + Unsafe.Add(ref d, 2) = f2; + Unsafe.Add(ref d, 3) = f3; + } + } + } + + /// + /// as many elements as possible, slicing them down (keeping the remainder). + /// + [MethodImpl(InliningOptions.ShortMethod)] + internal static void NormalizedFloatToByteSaturateReduce( + ref ReadOnlySpan source, + ref Span dest) + { + DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); + + if (Avx2.IsSupported || Sse2.IsSupported) + { + int remainder; + if (Avx2.IsSupported) + { + remainder = ImageMaths.ModuloP2(source.Length, Vector256.Count); + } + else + { + remainder = ImageMaths.ModuloP2(source.Length, Vector128.Count); + } + + int adjustedCount = source.Length - remainder; + + if (adjustedCount > 0) + { + NormalizedFloatToByteSaturate( + source.Slice(0, adjustedCount), + dest.Slice(0, adjustedCount)); + + source = source.Slice(adjustedCount); + dest = dest.Slice(adjustedCount); + } + } + } + + /// + /// Implementation of , which is faster on new .NET runtime. + /// + /// + /// Implementation is based on MagicScaler code: + /// https://github.com/saucecontrol/PhotoSauce/blob/b5811908041200488aa18fdfd17df5fc457415dc/src/MagicScaler/Magic/Processors/ConvertersFloat.cs#L541-L622 + /// + internal static void NormalizedFloatToByteSaturate( + ReadOnlySpan source, + Span dest) + { + if (Avx2.IsSupported) + { + VerifySpanInput(source, dest, Vector256.Count); + + int n = dest.Length / Vector256.Count; + + ref Vector256 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector256 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + var scale = Vector256.Create((float)byte.MaxValue); + ref byte maskBase = ref MemoryMarshal.GetReference(PermuteMaskDeinterleave8x32); + Vector256 mask = Unsafe.As>(ref maskBase); + + for (int i = 0; i < n; i++) + { + ref Vector256 s = ref Unsafe.Add(ref sourceBase, i * 4); + + Vector256 f0 = Avx.Multiply(scale, s); + Vector256 f1 = Avx.Multiply(scale, Unsafe.Add(ref s, 1)); + Vector256 f2 = Avx.Multiply(scale, Unsafe.Add(ref s, 2)); + Vector256 f3 = Avx.Multiply(scale, Unsafe.Add(ref s, 3)); + + Vector256 w0 = Avx.ConvertToVector256Int32(f0); + Vector256 w1 = Avx.ConvertToVector256Int32(f1); + Vector256 w2 = Avx.ConvertToVector256Int32(f2); + Vector256 w3 = Avx.ConvertToVector256Int32(f3); + + Vector256 u0 = Avx2.PackSignedSaturate(w0, w1); + Vector256 u1 = Avx2.PackSignedSaturate(w2, w3); + Vector256 b = Avx2.PackUnsignedSaturate(u0, u1); + b = Avx2.PermuteVar8x32(b.AsInt32(), mask).AsByte(); + + Unsafe.Add(ref destBase, i) = b; + } + } + else + { + // Sse + VerifySpanInput(source, dest, Vector128.Count); + + int n = dest.Length / Vector128.Count; + + ref Vector128 sourceBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + + ref Vector128 destBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(dest)); + + var scale = Vector128.Create((float)byte.MaxValue); + + for (int i = 0; i < n; i++) + { + ref Vector128 s = ref Unsafe.Add(ref sourceBase, i * 4); + + Vector128 f0 = Sse.Multiply(scale, s); + Vector128 f1 = Sse.Multiply(scale, Unsafe.Add(ref s, 1)); + Vector128 f2 = Sse.Multiply(scale, Unsafe.Add(ref s, 2)); + Vector128 f3 = Sse.Multiply(scale, Unsafe.Add(ref s, 3)); + + Vector128 w0 = Sse2.ConvertToVector128Int32(f0); + Vector128 w1 = Sse2.ConvertToVector128Int32(f1); + Vector128 w2 = Sse2.ConvertToVector128Int32(f2); + Vector128 w3 = Sse2.ConvertToVector128Int32(f3); + + Vector128 u0 = Sse2.PackSignedSaturate(w0, w1); + Vector128 u1 = Sse2.PackSignedSaturate(w2, w3); + + Unsafe.Add(ref destBase, i) = Sse2.PackUnsignedSaturate(u0, u1); + } + } + } + } + } +} +#endif diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs new file mode 100644 index 0000000000..07744566a3 --- /dev/null +++ b/src/ImageSharp/Common/Helpers/SimdUtils.Shuffle.cs @@ -0,0 +1,275 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace SixLabors.ImageSharp +{ + internal static partial class SimdUtils + { + /// + /// Shuffle single-precision (32-bit) floating-point elements in + /// using the control and store the results in . + /// + /// The source span of floats. + /// The destination span of floats. + /// The byte control. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle4( + ReadOnlySpan source, + Span dest, + byte control) + { + VerifyShuffle4SpanInput(source, dest); + +#if SUPPORTS_RUNTIME_INTRINSICS + HwIntrinsics.Shuffle4Reduce(ref source, ref dest, control); +#endif + + // Deal with the remainder: + if (source.Length > 0) + { + Shuffle4Remainder(source, dest, control); + } + } + + /// + /// Shuffle 8-bit integers within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The type of shuffle to perform. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle4( + ReadOnlySpan source, + Span dest, + TShuffle shuffle) + where TShuffle : struct, IShuffle4 + { + VerifyShuffle4SpanInput(source, dest); + +#if SUPPORTS_RUNTIME_INTRINSICS + HwIntrinsics.Shuffle4Reduce(ref source, ref dest, shuffle.Control); +#endif + + // Deal with the remainder: + if (source.Length > 0) + { + shuffle.RunFallbackShuffle(source, dest); + } + } + + /// + /// Shuffle 8-bit integer triplets within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The type of shuffle to perform. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle3( + ReadOnlySpan source, + Span dest, + TShuffle shuffle) + where TShuffle : struct, IShuffle3 + { + VerifyShuffle3SpanInput(source, dest); + +#if SUPPORTS_RUNTIME_INTRINSICS + HwIntrinsics.Shuffle3Reduce(ref source, ref dest, shuffle.Control); +#endif + + // Deal with the remainder: + if (source.Length > 0) + { + shuffle.RunFallbackShuffle(source, dest); + } + } + + /// + /// Pads then shuffles 8-bit integers within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The type of shuffle to perform. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Pad3Shuffle4( + ReadOnlySpan source, + Span dest, + TShuffle shuffle) + where TShuffle : struct, IPad3Shuffle4 + { + VerifyPad3Shuffle4SpanInput(source, dest); + +#if SUPPORTS_RUNTIME_INTRINSICS + HwIntrinsics.Pad3Shuffle4Reduce(ref source, ref dest, shuffle.Control); +#endif + + // Deal with the remainder: + if (source.Length > 0) + { + shuffle.RunFallbackShuffle(source, dest); + } + } + + /// + /// Shuffles then slices 8-bit integers within 128-bit lanes in + /// using the control and store the results in . + /// + /// The source span of bytes. + /// The destination span of bytes. + /// The type of shuffle to perform. + [MethodImpl(InliningOptions.ShortMethod)] + public static void Shuffle4Slice3( + ReadOnlySpan source, + Span dest, + TShuffle shuffle) + where TShuffle : struct, IShuffle4Slice3 + { + VerifyShuffle4Slice3SpanInput(source, dest); + +#if SUPPORTS_RUNTIME_INTRINSICS + HwIntrinsics.Shuffle4Slice3Reduce(ref source, ref dest, shuffle.Control); +#endif + + // Deal with the remainder: + if (source.Length > 0) + { + shuffle.RunFallbackShuffle(source, dest); + } + } + + private static void Shuffle4Remainder( + ReadOnlySpan source, + Span dest, + byte control) + { + ref float sBase = ref MemoryMarshal.GetReference(source); + ref float dBase = ref MemoryMarshal.GetReference(dest); + Shuffle.InverseMmShuffle(control, out int p3, out int p2, out int p1, out int p0); + + for (int i = 0; i < source.Length; i += 4) + { + Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i); + Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i); + Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i); + Unsafe.Add(ref dBase, i + 3) = Unsafe.Add(ref sBase, p3 + i); + } + } + + [Conditional("DEBUG")] + private static void VerifyShuffle4SpanInput(ReadOnlySpan source, Span dest) + where T : struct + { + DebugGuard.IsTrue( + source.Length == dest.Length, + nameof(source), + "Input spans must be of same length!"); + + DebugGuard.IsTrue( + source.Length % 4 == 0, + nameof(source), + "Input spans must be divisable by 4!"); + } + + [Conditional("DEBUG")] + private static void VerifyShuffle3SpanInput(ReadOnlySpan source, Span dest) + where T : struct + { + DebugGuard.IsTrue( + source.Length == dest.Length, + nameof(source), + "Input spans must be of same length!"); + + DebugGuard.IsTrue( + source.Length % 3 == 0, + nameof(source), + "Input spans must be divisable by 3!"); + } + + [Conditional("DEBUG")] + private static void VerifyPad3Shuffle4SpanInput(ReadOnlySpan source, Span dest) + { + DebugGuard.IsTrue( + source.Length % 3 == 0, + nameof(source), + "Input span must be divisable by 3!"); + + DebugGuard.IsTrue( + dest.Length % 4 == 0, + nameof(dest), + "Output span must be divisable by 4!"); + + DebugGuard.IsTrue( + source.Length == dest.Length * 3 / 4, + nameof(source), + "Input span must be 3/4 the length of the output span!"); + } + + [Conditional("DEBUG")] + private static void VerifyShuffle4Slice3SpanInput(ReadOnlySpan source, Span dest) + { + DebugGuard.IsTrue( + source.Length % 4 == 0, + nameof(source), + "Input span must be divisable by 4!"); + + DebugGuard.IsTrue( + dest.Length % 3 == 0, + nameof(dest), + "Output span must be divisable by 3!"); + + DebugGuard.IsTrue( + dest.Length >= source.Length * 3 / 4, + nameof(source), + "Output span must be at least 3/4 the length of the input span!"); + } + + public static class Shuffle + { + [MethodImpl(InliningOptions.ShortMethod)] + public static byte MmShuffle(byte p3, byte p2, byte p1, byte p0) + => (byte)((p3 << 6) | (p2 << 4) | (p1 << 2) | p0); + + [MethodImpl(InliningOptions.ShortMethod)] + public static void MmShuffleSpan(ref Span span, byte control) + { + InverseMmShuffle( + control, + out int p3, + out int p2, + out int p1, + out int p0); + + ref byte spanBase = ref MemoryMarshal.GetReference(span); + + for (int i = 0; i < span.Length; i += 4) + { + Unsafe.Add(ref spanBase, i) = (byte)(p0 + i); + Unsafe.Add(ref spanBase, i + 1) = (byte)(p1 + i); + Unsafe.Add(ref spanBase, i + 2) = (byte)(p2 + i); + Unsafe.Add(ref spanBase, i + 3) = (byte)(p3 + i); + } + } + + [MethodImpl(InliningOptions.ShortMethod)] + public static void InverseMmShuffle( + byte control, + out int p3, + out int p2, + out int p1, + out int p0) + { + p3 = control >> 6 & 0x3; + p2 = control >> 4 & 0x3; + p1 = control >> 2 & 0x3; + p0 = control >> 0 & 0x3; + } + } + } +} diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.cs b/src/ImageSharp/Common/Helpers/SimdUtils.cs index 7f917648dc..df533cedf1 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.cs @@ -79,8 +79,9 @@ namespace SixLabors.ImageSharp internal static void ByteToNormalizedFloat(ReadOnlySpan source, Span dest) { DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); - -#if SUPPORTS_EXTENDED_INTRINSICS +#if SUPPORTS_RUNTIME_INTRINSICS + HwIntrinsics.ByteToNormalizedFloatReduce(ref source, ref dest); +#elif SUPPORTS_EXTENDED_INTRINSICS ExtendedIntrinsics.ByteToNormalizedFloatReduce(ref source, ref dest); #else BasicIntrinsics256.ByteToNormalizedFloatReduce(ref source, ref dest); @@ -110,7 +111,7 @@ namespace SixLabors.ImageSharp DebugGuard.IsTrue(source.Length == dest.Length, nameof(source), "Input spans must be of same length!"); #if SUPPORTS_RUNTIME_INTRINSICS - Avx2Intrinsics.NormalizedFloatToByteSaturateReduce(ref source, ref dest); + HwIntrinsics.NormalizedFloatToByteSaturateReduce(ref source, ref dest); #elif SUPPORTS_EXTENDED_INTRINSICS ExtendedIntrinsics.NormalizedFloatToByteSaturateReduce(ref source, ref dest); #else diff --git a/src/ImageSharp/Common/Helpers/Vector4Utilities.cs b/src/ImageSharp/Common/Helpers/Vector4Utilities.cs index fccc50755d..f617e9a3ea 100644 --- a/src/ImageSharp/Common/Helpers/Vector4Utilities.cs +++ b/src/ImageSharp/Common/Helpers/Vector4Utilities.cs @@ -5,6 +5,10 @@ using System; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif namespace SixLabors.ImageSharp { @@ -13,6 +17,9 @@ namespace SixLabors.ImageSharp /// internal static class Vector4Utilities { + private const int BlendAlphaControl = 0b_10_00_10_00; + private const int ShuffleAlphaControl = 0b_11_11_11_11; + /// /// Restricts a vector between a minimum and a maximum value. /// 5x Faster then . @@ -56,13 +63,39 @@ namespace SixLabors.ImageSharp [MethodImpl(InliningOptions.ShortMethod)] public static void Premultiply(Span vectors) { - // TODO: This method can be AVX2 optimized using Vector - ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx2.IsSupported && vectors.Length >= 2) + { + ref Vector256 vectorsBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(vectors)); - for (int i = 0; i < vectors.Length; i++) + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u)); + + while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast)) + { + Vector256 source = vectorsBase; + Vector256 multiply = Avx.Shuffle(source, source, ShuffleAlphaControl); + vectorsBase = Avx.Blend(Avx.Multiply(source, multiply), source, BlendAlphaControl); + vectorsBase = ref Unsafe.Add(ref vectorsBase, 1); + } + + if (ImageMaths.Modulo2(vectors.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + Premultiply(ref MemoryMarshal.GetReference(vectors.Slice(vectors.Length - 1))); + } + } + else +#endif { - ref Vector4 v = ref Unsafe.Add(ref baseRef, i); - Premultiply(ref v); + ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); + + for (int i = 0; i < vectors.Length; i++) + { + ref Vector4 v = ref Unsafe.Add(ref baseRef, i); + Premultiply(ref v); + } } } @@ -73,13 +106,39 @@ namespace SixLabors.ImageSharp [MethodImpl(InliningOptions.ShortMethod)] public static void UnPremultiply(Span vectors) { - // TODO: This method can be AVX2 optimized using Vector - ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx2.IsSupported && vectors.Length >= 2) + { + ref Vector256 vectorsBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(vectors)); - for (int i = 0; i < vectors.Length; i++) + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 vectorsLast = ref Unsafe.Add(ref vectorsBase, (IntPtr)((uint)vectors.Length / 2u)); + + while (Unsafe.IsAddressLessThan(ref vectorsBase, ref vectorsLast)) + { + Vector256 source = vectorsBase; + Vector256 multiply = Avx.Shuffle(source, source, ShuffleAlphaControl); + vectorsBase = Avx.Blend(Avx.Divide(source, multiply), source, BlendAlphaControl); + vectorsBase = ref Unsafe.Add(ref vectorsBase, 1); + } + + if (ImageMaths.Modulo2(vectors.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. + UnPremultiply(ref MemoryMarshal.GetReference(vectors.Slice(vectors.Length - 1))); + } + } + else +#endif { - ref Vector4 v = ref Unsafe.Add(ref baseRef, i); - UnPremultiply(ref v); + ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); + + for (int i = 0; i < vectors.Length; i++) + { + ref Vector4 v = ref Unsafe.Add(ref baseRef, i); + UnPremultiply(ref v); + } } } diff --git a/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs b/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs index eb29c44050..454440f634 100644 --- a/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs +++ b/src/ImageSharp/Formats/Bmp/BmpEncoderCore.cs @@ -171,7 +171,7 @@ namespace SixLabors.ImageSharp.Formats.Bmp var fileHeader = new BmpFileHeader( type: BmpConstants.TypeMarkers.Bitmap, - fileSize: BmpFileHeader.Size + infoHeaderSize + infoHeader.ImageSize, + fileSize: BmpFileHeader.Size + infoHeaderSize + colorPaletteSize + infoHeader.ImageSize, reserved: 0, offset: BmpFileHeader.Size + infoHeaderSize + colorPaletteSize); diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs index f6f5903684..0efefc06b5 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs @@ -10,90 +10,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components { internal partial struct Block8x8F { - /// - /// Transpose the block into the destination block. - /// - /// The destination block - [MethodImpl(InliningOptions.ShortMethod)] - public void TransposeInto(ref Block8x8F d) - { - d.V0L.X = V0L.X; - d.V1L.X = V0L.Y; - d.V2L.X = V0L.Z; - d.V3L.X = V0L.W; - d.V4L.X = V0R.X; - d.V5L.X = V0R.Y; - d.V6L.X = V0R.Z; - d.V7L.X = V0R.W; - - d.V0L.Y = V1L.X; - d.V1L.Y = V1L.Y; - d.V2L.Y = V1L.Z; - d.V3L.Y = V1L.W; - d.V4L.Y = V1R.X; - d.V5L.Y = V1R.Y; - d.V6L.Y = V1R.Z; - d.V7L.Y = V1R.W; - - d.V0L.Z = V2L.X; - d.V1L.Z = V2L.Y; - d.V2L.Z = V2L.Z; - d.V3L.Z = V2L.W; - d.V4L.Z = V2R.X; - d.V5L.Z = V2R.Y; - d.V6L.Z = V2R.Z; - d.V7L.Z = V2R.W; - - d.V0L.W = V3L.X; - d.V1L.W = V3L.Y; - d.V2L.W = V3L.Z; - d.V3L.W = V3L.W; - d.V4L.W = V3R.X; - d.V5L.W = V3R.Y; - d.V6L.W = V3R.Z; - d.V7L.W = V3R.W; - - d.V0R.X = V4L.X; - d.V1R.X = V4L.Y; - d.V2R.X = V4L.Z; - d.V3R.X = V4L.W; - d.V4R.X = V4R.X; - d.V5R.X = V4R.Y; - d.V6R.X = V4R.Z; - d.V7R.X = V4R.W; - - d.V0R.Y = V5L.X; - d.V1R.Y = V5L.Y; - d.V2R.Y = V5L.Z; - d.V3R.Y = V5L.W; - d.V4R.Y = V5R.X; - d.V5R.Y = V5R.Y; - d.V6R.Y = V5R.Z; - d.V7R.Y = V5R.W; - - d.V0R.Z = V6L.X; - d.V1R.Z = V6L.Y; - d.V2R.Z = V6L.Z; - d.V3R.Z = V6L.W; - d.V4R.Z = V6R.X; - d.V5R.Z = V6R.Y; - d.V6R.Z = V6R.Z; - d.V7R.Z = V6R.W; - - d.V0R.W = V7L.X; - d.V1R.W = V7L.Y; - d.V2R.W = V7L.Z; - d.V3R.W = V7L.W; - d.V4R.W = V7R.X; - d.V5R.W = V7R.Y; - d.V6R.W = V7R.Z; - d.V7R.W = V7R.W; - } - /// /// Level shift by +maximum/2, clip to [0, maximum] /// - public void NormalizeColorsInplace(float maximum) + public void NormalizeColorsInPlace(float maximum) { var CMin4 = new Vector4(0F); var CMax4 = new Vector4(maximum); @@ -118,10 +38,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components } /// - /// AVX2-only variant for executing and in one step. + /// AVX2-only variant for executing and in one step. /// [MethodImpl(InliningOptions.ShortMethod)] - public void NormalizeColorsAndRoundInplaceVector8(float maximum) + public void NormalizeColorsAndRoundInPlaceVector8(float maximum) { var off = new Vector(MathF.Ceiling(maximum / 2)); var max = new Vector(maximum); diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt index 6ee0540213..e5a62dc075 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.tt @@ -23,42 +23,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components { internal partial struct Block8x8F { - /// - /// Transpose the block into the destination block. - /// - /// The destination block - [MethodImpl(InliningOptions.ShortMethod)] - public void TransposeInto(ref Block8x8F d) - { - <# - PushIndent(" "); - - for (int i = 0; i < 8; i++) - { - char destCoord = coordz[i % 4]; - char destSide = (i / 4) % 2 == 0 ? 'L' : 'R'; - - for (int j = 0; j < 8; j++) - { - if(i > 0 && j == 0){ - WriteLine(""); - } - - char srcCoord = coordz[j % 4]; - char srcSide = (j / 4) % 2 == 0 ? 'L' : 'R'; - - var expression = $"d.V{j}{destSide}.{destCoord} = V{i}{srcSide}.{srcCoord};\r\n"; - Write(expression); - } - } - PopIndent(); - #> - } - /// /// Level shift by +maximum/2, clip to [0, maximum] /// - public void NormalizeColorsInplace(float maximum) + public void NormalizeColorsInPlace(float maximum) { var CMin4 = new Vector4(0F); var CMax4 = new Vector4(maximum); @@ -81,10 +49,10 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components } /// - /// AVX2-only variant for executing and in one step. + /// AVX2-only variant for executing and in one step. /// [MethodImpl(InliningOptions.ShortMethod)] - public void NormalizeColorsAndRoundInplaceVector8(float maximum) + public void NormalizeColorsAndRoundInPlaceVector8(float maximum) { var off = new Vector(MathF.Ceiling(maximum / 2)); var max = new Vector(maximum); diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index b7835d6706..0dbdadbeb4 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -6,6 +6,10 @@ using System.Diagnostics; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif using System.Text; // ReSharper disable InconsistentNaming @@ -277,73 +281,156 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components /// /// The value to multiply by. [MethodImpl(InliningOptions.ShortMethod)] - public void MultiplyInplace(float value) - { - this.V0L *= value; - this.V0R *= value; - this.V1L *= value; - this.V1R *= value; - this.V2L *= value; - this.V2R *= value; - this.V3L *= value; - this.V3R *= value; - this.V4L *= value; - this.V4R *= value; - this.V5L *= value; - this.V5R *= value; - this.V6L *= value; - this.V6R *= value; - this.V7L *= value; - this.V7R *= value; + public void MultiplyInPlace(float value) + { +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx.IsSupported) + { + var valueVec = Vector256.Create(value); + Unsafe.As>(ref this.V0L) = Avx.Multiply(Unsafe.As>(ref this.V0L), valueVec); + Unsafe.As>(ref this.V1L) = Avx.Multiply(Unsafe.As>(ref this.V1L), valueVec); + Unsafe.As>(ref this.V2L) = Avx.Multiply(Unsafe.As>(ref this.V2L), valueVec); + Unsafe.As>(ref this.V3L) = Avx.Multiply(Unsafe.As>(ref this.V3L), valueVec); + Unsafe.As>(ref this.V4L) = Avx.Multiply(Unsafe.As>(ref this.V4L), valueVec); + Unsafe.As>(ref this.V5L) = Avx.Multiply(Unsafe.As>(ref this.V5L), valueVec); + Unsafe.As>(ref this.V6L) = Avx.Multiply(Unsafe.As>(ref this.V6L), valueVec); + Unsafe.As>(ref this.V7L) = Avx.Multiply(Unsafe.As>(ref this.V7L), valueVec); + } + else +#endif + { + var valueVec = new Vector4(value); + this.V0L *= valueVec; + this.V0R *= valueVec; + this.V1L *= valueVec; + this.V1R *= valueVec; + this.V2L *= valueVec; + this.V2R *= valueVec; + this.V3L *= valueVec; + this.V3R *= valueVec; + this.V4L *= valueVec; + this.V4R *= valueVec; + this.V5L *= valueVec; + this.V5R *= valueVec; + this.V6L *= valueVec; + this.V6R *= valueVec; + this.V7L *= valueVec; + this.V7R *= valueVec; + } } /// /// Multiply all elements of the block by the corresponding elements of 'other'. /// [MethodImpl(InliningOptions.ShortMethod)] - public void MultiplyInplace(ref Block8x8F other) - { - this.V0L *= other.V0L; - this.V0R *= other.V0R; - this.V1L *= other.V1L; - this.V1R *= other.V1R; - this.V2L *= other.V2L; - this.V2R *= other.V2R; - this.V3L *= other.V3L; - this.V3R *= other.V3R; - this.V4L *= other.V4L; - this.V4R *= other.V4R; - this.V5L *= other.V5L; - this.V5R *= other.V5R; - this.V6L *= other.V6L; - this.V6R *= other.V6R; - this.V7L *= other.V7L; - this.V7R *= other.V7R; + public unsafe void MultiplyInPlace(ref Block8x8F other) + { +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx.IsSupported) + { + Unsafe.As>(ref this.V0L) + = Avx.Multiply( + Unsafe.As>(ref this.V0L), + Unsafe.As>(ref other.V0L)); + + Unsafe.As>(ref this.V1L) + = Avx.Multiply( + Unsafe.As>(ref this.V1L), + Unsafe.As>(ref other.V1L)); + + Unsafe.As>(ref this.V2L) + = Avx.Multiply( + Unsafe.As>(ref this.V2L), + Unsafe.As>(ref other.V2L)); + + Unsafe.As>(ref this.V3L) + = Avx.Multiply( + Unsafe.As>(ref this.V3L), + Unsafe.As>(ref other.V3L)); + + Unsafe.As>(ref this.V4L) + = Avx.Multiply( + Unsafe.As>(ref this.V4L), + Unsafe.As>(ref other.V4L)); + + Unsafe.As>(ref this.V5L) + = Avx.Multiply( + Unsafe.As>(ref this.V5L), + Unsafe.As>(ref other.V5L)); + + Unsafe.As>(ref this.V6L) + = Avx.Multiply( + Unsafe.As>(ref this.V6L), + Unsafe.As>(ref other.V6L)); + + Unsafe.As>(ref this.V7L) + = Avx.Multiply( + Unsafe.As>(ref this.V7L), + Unsafe.As>(ref other.V7L)); + } + else +#endif + { + this.V0L *= other.V0L; + this.V0R *= other.V0R; + this.V1L *= other.V1L; + this.V1R *= other.V1R; + this.V2L *= other.V2L; + this.V2R *= other.V2R; + this.V3L *= other.V3L; + this.V3R *= other.V3R; + this.V4L *= other.V4L; + this.V4R *= other.V4R; + this.V5L *= other.V5L; + this.V5R *= other.V5R; + this.V6L *= other.V6L; + this.V6R *= other.V6R; + this.V7L *= other.V7L; + this.V7R *= other.V7R; + } } /// /// Adds a vector to all elements of the block. /// - /// The added vector + /// The added vector. [MethodImpl(InliningOptions.ShortMethod)] - public void AddToAllInplace(Vector4 diff) - { - this.V0L += diff; - this.V0R += diff; - this.V1L += diff; - this.V1R += diff; - this.V2L += diff; - this.V2R += diff; - this.V3L += diff; - this.V3R += diff; - this.V4L += diff; - this.V4R += diff; - this.V5L += diff; - this.V5R += diff; - this.V6L += diff; - this.V6R += diff; - this.V7L += diff; - this.V7R += diff; + public void AddInPlace(float value) + { +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx.IsSupported) + { + var valueVec = Vector256.Create(value); + Unsafe.As>(ref this.V0L) = Avx.Add(Unsafe.As>(ref this.V0L), valueVec); + Unsafe.As>(ref this.V1L) = Avx.Add(Unsafe.As>(ref this.V1L), valueVec); + Unsafe.As>(ref this.V2L) = Avx.Add(Unsafe.As>(ref this.V2L), valueVec); + Unsafe.As>(ref this.V3L) = Avx.Add(Unsafe.As>(ref this.V3L), valueVec); + Unsafe.As>(ref this.V4L) = Avx.Add(Unsafe.As>(ref this.V4L), valueVec); + Unsafe.As>(ref this.V5L) = Avx.Add(Unsafe.As>(ref this.V5L), valueVec); + Unsafe.As>(ref this.V6L) = Avx.Add(Unsafe.As>(ref this.V6L), valueVec); + Unsafe.As>(ref this.V7L) = Avx.Add(Unsafe.As>(ref this.V7L), valueVec); + } + else +#endif + { + var valueVec = new Vector4(value); + this.V0L += valueVec; + this.V0R += valueVec; + this.V1L += valueVec; + this.V1R += valueVec; + this.V2L += valueVec; + this.V2R += valueVec; + this.V3L += valueVec; + this.V3R += valueVec; + this.V4L += valueVec; + this.V4R += valueVec; + this.V5L += valueVec; + this.V5R += valueVec; + this.V6L += valueVec; + this.V6R += valueVec; + this.V7L += valueVec; + this.V7R += valueVec; + } } /// @@ -464,23 +551,23 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components /// /// Level shift by +maximum/2, clip to [0..maximum], and round all the values in the block. /// - public void NormalizeColorsAndRoundInplace(float maximum) + public void NormalizeColorsAndRoundInPlace(float maximum) { if (SimdUtils.HasVector8) { - this.NormalizeColorsAndRoundInplaceVector8(maximum); + this.NormalizeColorsAndRoundInPlaceVector8(maximum); } else { - this.NormalizeColorsInplace(maximum); - this.RoundInplace(); + this.NormalizeColorsInPlace(maximum); + this.RoundInPlace(); } } /// /// Rounds all values in the block. /// - public void RoundInplace() + public void RoundInPlace() { for (int i = 0; i < Size; i++) { @@ -596,5 +683,157 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components DebugGuard.MustBeLessThan(idx, Size, nameof(idx)); DebugGuard.MustBeGreaterThanOrEqualTo(idx, 0, nameof(idx)); } + + /// + /// Transpose the block into the destination block. + /// + /// The destination block + [MethodImpl(InliningOptions.ShortMethod)] + public void TransposeInto(ref Block8x8F d) + { +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx.IsSupported) + { + // https://stackoverflow.com/questions/25622745/transpose-an-8x8-float-using-avx-avx2/25627536#25627536 + Vector256 r0 = Avx.InsertVector128( + Unsafe.As>(ref this.V0L).ToVector256(), + Unsafe.As>(ref this.V4L), + 1); + + Vector256 r1 = Avx.InsertVector128( + Unsafe.As>(ref this.V1L).ToVector256(), + Unsafe.As>(ref this.V5L), + 1); + + Vector256 r2 = Avx.InsertVector128( + Unsafe.As>(ref this.V2L).ToVector256(), + Unsafe.As>(ref this.V6L), + 1); + + Vector256 r3 = Avx.InsertVector128( + Unsafe.As>(ref this.V3L).ToVector256(), + Unsafe.As>(ref this.V7L), + 1); + + Vector256 r4 = Avx.InsertVector128( + Unsafe.As>(ref this.V0R).ToVector256(), + Unsafe.As>(ref this.V4R), + 1); + + Vector256 r5 = Avx.InsertVector128( + Unsafe.As>(ref this.V1R).ToVector256(), + Unsafe.As>(ref this.V5R), + 1); + + Vector256 r6 = Avx.InsertVector128( + Unsafe.As>(ref this.V2R).ToVector256(), + Unsafe.As>(ref this.V6R), + 1); + + Vector256 r7 = Avx.InsertVector128( + Unsafe.As>(ref this.V3R).ToVector256(), + Unsafe.As>(ref this.V7R), + 1); + + Vector256 t0 = Avx.UnpackLow(r0, r1); + Vector256 t2 = Avx.UnpackLow(r2, r3); + Vector256 v = Avx.Shuffle(t0, t2, 0x4E); + Unsafe.As>(ref d.V0L) = Avx.Blend(t0, v, 0xCC); + Unsafe.As>(ref d.V1L) = Avx.Blend(t2, v, 0x33); + + Vector256 t4 = Avx.UnpackLow(r4, r5); + Vector256 t6 = Avx.UnpackLow(r6, r7); + v = Avx.Shuffle(t4, t6, 0x4E); + Unsafe.As>(ref d.V4L) = Avx.Blend(t4, v, 0xCC); + Unsafe.As>(ref d.V5L) = Avx.Blend(t6, v, 0x33); + + Vector256 t1 = Avx.UnpackHigh(r0, r1); + Vector256 t3 = Avx.UnpackHigh(r2, r3); + v = Avx.Shuffle(t1, t3, 0x4E); + Unsafe.As>(ref d.V2L) = Avx.Blend(t1, v, 0xCC); + Unsafe.As>(ref d.V3L) = Avx.Blend(t3, v, 0x33); + + Vector256 t5 = Avx.UnpackHigh(r4, r5); + Vector256 t7 = Avx.UnpackHigh(r6, r7); + v = Avx.Shuffle(t5, t7, 0x4E); + Unsafe.As>(ref d.V6L) = Avx.Blend(t5, v, 0xCC); + Unsafe.As>(ref d.V7L) = Avx.Blend(t7, v, 0x33); + } + else +#endif + { + d.V0L.X = this.V0L.X; + d.V1L.X = this.V0L.Y; + d.V2L.X = this.V0L.Z; + d.V3L.X = this.V0L.W; + d.V4L.X = this.V0R.X; + d.V5L.X = this.V0R.Y; + d.V6L.X = this.V0R.Z; + d.V7L.X = this.V0R.W; + + d.V0L.Y = this.V1L.X; + d.V1L.Y = this.V1L.Y; + d.V2L.Y = this.V1L.Z; + d.V3L.Y = this.V1L.W; + d.V4L.Y = this.V1R.X; + d.V5L.Y = this.V1R.Y; + d.V6L.Y = this.V1R.Z; + d.V7L.Y = this.V1R.W; + + d.V0L.Z = this.V2L.X; + d.V1L.Z = this.V2L.Y; + d.V2L.Z = this.V2L.Z; + d.V3L.Z = this.V2L.W; + d.V4L.Z = this.V2R.X; + d.V5L.Z = this.V2R.Y; + d.V6L.Z = this.V2R.Z; + d.V7L.Z = this.V2R.W; + + d.V0L.W = this.V3L.X; + d.V1L.W = this.V3L.Y; + d.V2L.W = this.V3L.Z; + d.V3L.W = this.V3L.W; + d.V4L.W = this.V3R.X; + d.V5L.W = this.V3R.Y; + d.V6L.W = this.V3R.Z; + d.V7L.W = this.V3R.W; + + d.V0R.X = this.V4L.X; + d.V1R.X = this.V4L.Y; + d.V2R.X = this.V4L.Z; + d.V3R.X = this.V4L.W; + d.V4R.X = this.V4R.X; + d.V5R.X = this.V4R.Y; + d.V6R.X = this.V4R.Z; + d.V7R.X = this.V4R.W; + + d.V0R.Y = this.V5L.X; + d.V1R.Y = this.V5L.Y; + d.V2R.Y = this.V5L.Z; + d.V3R.Y = this.V5L.W; + d.V4R.Y = this.V5R.X; + d.V5R.Y = this.V5R.Y; + d.V6R.Y = this.V5R.Z; + d.V7R.Y = this.V5R.W; + + d.V0R.Z = this.V6L.X; + d.V1R.Z = this.V6L.Y; + d.V2R.Z = this.V6L.Z; + d.V3R.Z = this.V6L.W; + d.V4R.Z = this.V6R.X; + d.V5R.Z = this.V6R.Y; + d.V6R.Z = this.V6R.Z; + d.V7R.Z = this.V6R.W; + + d.V0R.W = this.V7L.X; + d.V1R.W = this.V7L.Y; + d.V2R.W = this.V7L.Z; + d.V3R.W = this.V7L.W; + d.V4R.W = this.V7R.X; + d.V5R.W = this.V7R.Y; + d.V6R.W = this.V7R.Z; + d.V7R.W = this.V7R.W; + } + } } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs index c4d1408a2e..1319b56ee0 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.FromYCbCrSimdAvx2.cs @@ -1,11 +1,15 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. using System; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; - +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static SixLabors.ImageSharp.SimdUtils; +#endif using SixLabors.ImageSharp.Tuples; // ReSharper disable ImpureMethodCallOnReadonlyValueField @@ -47,6 +51,73 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters "JpegColorConverter.FromYCbCrSimd256 can be used only on architecture having 256 byte floating point SIMD registers!"); } +#if SUPPORTS_RUNTIME_INTRINSICS + ref Vector256 yBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); + ref Vector256 cbBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component1)); + ref Vector256 crBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component2)); + + ref Vector256 resultBase = + ref Unsafe.As>(ref MemoryMarshal.GetReference(result)); + + // Used for the color conversion + var chromaOffset = Vector256.Create(-halfValue); + var scale = Vector256.Create(1 / maxValue); + var rCrMult = Vector256.Create(1.402F); + var gCbMult = Vector256.Create(-0.344136F); + var gCrMult = Vector256.Create(-0.714136F); + var bCbMult = Vector256.Create(1.772F); + + // Used for packing. + var va = Vector256.Create(1F); + ref byte control = ref MemoryMarshal.GetReference(HwIntrinsics.PermuteMaskEvenOdd8x32); + Vector256 vcontrol = Unsafe.As>(ref control); + + // Walking 8 elements at one step: + int n = result.Length / 8; + for (int i = 0; i < n; i++) + { + // y = yVals[i]; + // cb = cbVals[i] - 128F; + // cr = crVals[i] - 128F; + Vector256 y = Unsafe.Add(ref yBase, i); + Vector256 cb = Avx.Add(Unsafe.Add(ref cbBase, i), chromaOffset); + Vector256 cr = Avx.Add(Unsafe.Add(ref crBase, i), chromaOffset); + + y = Avx2.PermuteVar8x32(y, vcontrol); + cb = Avx2.PermuteVar8x32(cb, vcontrol); + cr = Avx2.PermuteVar8x32(cr, vcontrol); + + // r = y + (1.402F * cr); + // g = y - (0.344136F * cb) - (0.714136F * cr); + // b = y + (1.772F * cb); + // Adding & multiplying 8 elements at one time: + Vector256 r = HwIntrinsics.MultiplyAdd(y, cr, rCrMult); + Vector256 g = HwIntrinsics.MultiplyAdd(HwIntrinsics.MultiplyAdd(y, cb, gCbMult), cr, gCrMult); + Vector256 b = HwIntrinsics.MultiplyAdd(y, cb, bCbMult); + + // TODO: We should be savving to RGBA not Vector4 + r = Avx.Multiply(Avx.RoundToNearestInteger(r), scale); + g = Avx.Multiply(Avx.RoundToNearestInteger(g), scale); + b = Avx.Multiply(Avx.RoundToNearestInteger(b), scale); + + Vector256 vte = Avx.UnpackLow(r, b); + Vector256 vto = Avx.UnpackLow(g, va); + + ref Vector256 destination = ref Unsafe.Add(ref resultBase, i * 4); + + destination = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 1) = Avx.UnpackHigh(vte, vto); + + vte = Avx.UnpackHigh(r, b); + vto = Avx.UnpackHigh(g, va); + + Unsafe.Add(ref destination, 2) = Avx.UnpackLow(vte, vto); + Unsafe.Add(ref destination, 3) = Avx.UnpackHigh(vte, vto); + } +#else ref Vector yBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(values.Component0)); ref Vector cbBase = @@ -104,6 +175,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder.ColorConverters ref Vector4Octet destination = ref Unsafe.Add(ref resultBase, i); destination.Pack(ref rr, ref gg, ref bb); } +#endif } } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs index f68bca0412..7c780700c9 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/ColorConverters/JpegColorConverter.cs @@ -4,7 +4,6 @@ using System; using System.Collections.Generic; using System.Numerics; - using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.Tuples; diff --git a/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs b/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs index 40683e25a9..e0311dafef 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Decoder/JpegBlockPostProcessor.cs @@ -81,14 +81,14 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components.Decoder b.LoadFrom(ref sourceBlock); // Dequantize: - b.MultiplyInplace(ref this.DequantiazationTable); + b.MultiplyInPlace(ref this.DequantiazationTable); FastFloatingPointDCT.TransformIDCT(ref b, ref this.WorkspaceBlock1, ref this.WorkspaceBlock2); // To conform better to libjpeg we actually NEED TO loose precision here. // This is because they store blocks as Int16 between all the operations. // To be "more accurate", we need to emulate this by rounding! - this.WorkspaceBlock1.NormalizeColorsAndRoundInplace(maximumValue); + this.WorkspaceBlock1.NormalizeColorsAndRoundInPlace(maximumValue); this.WorkspaceBlock1.ScaledCopyTo( ref destAreaOrigin, diff --git a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs index ee06f2bdeb..a6d0622dd8 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.cs @@ -1,4 +1,4 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. using System.Numerics; @@ -50,8 +50,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components /// Temporary block provided by the caller public static void TransformIDCT(ref Block8x8F src, ref Block8x8F dest, ref Block8x8F temp) { - // TODO: Transpose is a bottleneck now. We need full AVX support to optimize it: - // https://github.com/dotnet/corefx/issues/22940 src.TransposeInto(ref temp); IDCT8x4_LeftPart(ref temp, ref dest); @@ -63,7 +61,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components IDCT8x4_RightPart(ref temp, ref dest); // TODO: What if we leave the blocks in a scaled-by-x8 state until final color packing? - dest.MultiplyInplace(C_0_125); + dest.MultiplyInPlace(C_0_125); } /// @@ -326,7 +324,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components src.TransposeInto(ref temp); if (offsetSourceByNeg128) { - temp.AddToAllInplace(new Vector4(-128)); + temp.AddInPlace(-128F); } FDCT8x4_LeftPart(ref temp, ref dest); @@ -337,7 +335,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components FDCT8x4_LeftPart(ref temp, ref dest); FDCT8x4_RightPart(ref temp, ref dest); - dest.MultiplyInplace(C_0_125); + dest.MultiplyInPlace(C_0_125); } } -} \ No newline at end of file +} diff --git a/src/ImageSharp/Formats/Png/PngEncoderOptionsHelpers.cs b/src/ImageSharp/Formats/Png/PngEncoderOptionsHelpers.cs index b0311f0887..9342e09dfe 100644 --- a/src/ImageSharp/Formats/Png/PngEncoderOptionsHelpers.cs +++ b/src/ImageSharp/Formats/Png/PngEncoderOptionsHelpers.cs @@ -35,6 +35,15 @@ namespace SixLabors.ImageSharp.Formats.Png options.ColorType ??= pngMetadata.ColorType ?? SuggestColorType(); options.BitDepth ??= pngMetadata.BitDepth ?? SuggestBitDepth(); + // Ensure bit depth and color type are a supported combination. + // Bit8 is the only bit depth supported by all color types. + byte bits = (byte)options.BitDepth; + byte[] validBitDepths = PngConstants.ColorTypes[options.ColorType.Value]; + if (Array.IndexOf(validBitDepths, bits) == -1) + { + options.BitDepth = PngBitDepth.Bit8; + } + options.InterlaceMethod ??= pngMetadata.InterlaceMethod; use16Bit = options.BitDepth == PngBitDepth.Bit16; @@ -44,12 +53,6 @@ namespace SixLabors.ImageSharp.Formats.Png { options.ChunkFilter = PngChunkFilter.ExcludeAll; } - - // Ensure we are not allowing impossible combinations. - if (!PngConstants.ColorTypes.ContainsKey(options.ColorType.Value)) - { - throw new NotSupportedException("Color type is not supported or not valid."); - } } /// @@ -68,15 +71,10 @@ namespace SixLabors.ImageSharp.Formats.Png return null; } - byte bits = (byte)options.BitDepth; - if (Array.IndexOf(PngConstants.ColorTypes[options.ColorType.Value], bits) == -1) - { - throw new NotSupportedException("Bit depth is not supported or not valid."); - } - // Use the metadata to determine what quantization depth to use if no quantizer has been set. if (options.Quantizer is null) { + byte bits = (byte)options.BitDepth; var maxColors = ImageMaths.GetColorCountForBitDepth(bits); options.Quantizer = new WuQuantizer(new QuantizerOptions { MaxColors = maxColors }); } diff --git a/src/ImageSharp/Image{TPixel}.cs b/src/ImageSharp/Image{TPixel}.cs index 255193c8ea..83ecc37530 100644 --- a/src/ImageSharp/Image{TPixel}.cs +++ b/src/ImageSharp/Image{TPixel}.cs @@ -201,14 +201,14 @@ namespace SixLabors.ImageSharp public bool TryGetSinglePixelSpan(out Span span) { IMemoryGroup mg = this.GetPixelMemoryGroup(); - if (mg.Count > 1) + if (mg.Count == 1) { - span = default; - return false; + span = mg[0].Span; + return true; } - span = mg.Single().Span; - return true; + span = default; + return false; } /// diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Argb32.PixelOperations.Generated.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Argb32.PixelOperations.Generated.cs index 0b1292b641..d30616997c 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Argb32.PixelOperations.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Argb32.PixelOperations.Generated.cs @@ -53,84 +53,112 @@ namespace SixLabors.ImageSharp.PixelFormats Vector4Converters.RgbaCompatible.ToVector4(configuration, this, sourcePixels, destVectors, modifiers.Remove(PixelConversionModifiers.Scale)); } /// - public override void ToRgba32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromArgb32.ToRgba32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToRgba32(source, dest); } /// - public override void FromRgba32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromRgba32.ToArgb32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToArgb32(source, dest); } /// - public override void ToBgra32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromArgb32.ToBgra32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToBgra32(source, dest); } /// - public override void FromBgra32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToArgb32(source, dest); + } + /// + public override void ToRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromBgra32.ToArgb32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToRgb24(source, dest); } /// - public override void ToBgr24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Argb32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Bgr24 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToArgb32(source, dest); + } + /// + public override void ToBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Argb32 sp = ref Unsafe.Add(ref sourceRef, i); - ref Bgr24 dp = ref Unsafe.Add(ref destRef, i); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToBgr24(source, dest); + } - dp.FromArgb32(sp); - } + /// + public override void FromBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToArgb32(source, dest); } /// @@ -205,24 +233,6 @@ namespace SixLabors.ImageSharp.PixelFormats } } - /// - public override void ToRgb24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) - { - Guard.NotNull(configuration, nameof(configuration)); - Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - - ref Argb32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Rgb24 destRef = ref MemoryMarshal.GetReference(destinationPixels); - - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Argb32 sp = ref Unsafe.Add(ref sourceRef, i); - ref Rgb24 dp = ref Unsafe.Add(ref destRef, i); - - dp.FromArgb32(sp); - } - } - /// public override void ToRgb48(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgr24.PixelOperations.Generated.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgr24.PixelOperations.Generated.cs index b73bb8b831..50d4942ecb 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgr24.PixelOperations.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgr24.PixelOperations.Generated.cs @@ -52,146 +52,182 @@ namespace SixLabors.ImageSharp.PixelFormats { Vector4Converters.RgbaCompatible.ToVector4(configuration, this, sourcePixels, destVectors, modifiers.Remove(PixelConversionModifiers.Scale | PixelConversionModifiers.Premultiply)); } - /// - public override void ToArgb32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Argb32 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToRgba32(source, dest); + } - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Argb32 dp = ref Unsafe.Add(ref destRef, i); + /// + public override void FromRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - dp.FromBgr24(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToBgr24(source, dest); } - /// - public override void ToBgra32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Bgra32 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToArgb32(source, dest); + } - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Bgra32 dp = ref Unsafe.Add(ref destRef, i); + /// + public override void FromArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - dp.FromBgr24(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToBgr24(source, dest); } - /// - public override void ToL8(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref L8 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToBgra32(source, dest); + } - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref L8 dp = ref Unsafe.Add(ref destRef, i); + /// + public override void FromBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - dp.FromBgr24(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToBgr24(source, dest); } - /// - public override void ToL16(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref L16 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToRgb24(source, dest); + } - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref L16 dp = ref Unsafe.Add(ref destRef, i); + /// + public override void FromRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - dp.FromBgr24(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToBgr24(source, dest); } /// - public override void ToLa16(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToL8(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref La16 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ref L8 destRef = ref MemoryMarshal.GetReference(destinationPixels); for (int i = 0; i < sourcePixels.Length; i++) { ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref La16 dp = ref Unsafe.Add(ref destRef, i); + ref L8 dp = ref Unsafe.Add(ref destRef, i); dp.FromBgr24(sp); } } /// - public override void ToLa32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToL16(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref La32 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ref L16 destRef = ref MemoryMarshal.GetReference(destinationPixels); for (int i = 0; i < sourcePixels.Length; i++) { ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref La32 dp = ref Unsafe.Add(ref destRef, i); + ref L16 dp = ref Unsafe.Add(ref destRef, i); dp.FromBgr24(sp); } } /// - public override void ToRgb24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToLa16(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Rgb24 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ref La16 destRef = ref MemoryMarshal.GetReference(destinationPixels); for (int i = 0; i < sourcePixels.Length; i++) { ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Rgb24 dp = ref Unsafe.Add(ref destRef, i); + ref La16 dp = ref Unsafe.Add(ref destRef, i); dp.FromBgr24(sp); } } /// - public override void ToRgba32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToLa32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); ref Bgr24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Rgba32 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ref La32 destRef = ref MemoryMarshal.GetReference(destinationPixels); for (int i = 0; i < sourcePixels.Length; i++) { ref Bgr24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Rgba32 dp = ref Unsafe.Add(ref destRef, i); + ref La32 dp = ref Unsafe.Add(ref destRef, i); dp.FromBgr24(sp); } diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgra32.PixelOperations.Generated.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgra32.PixelOperations.Generated.cs index 5bdd10404d..b38e5f19d6 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgra32.PixelOperations.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Bgra32.PixelOperations.Generated.cs @@ -53,84 +53,112 @@ namespace SixLabors.ImageSharp.PixelFormats Vector4Converters.RgbaCompatible.ToVector4(configuration, this, sourcePixels, destVectors, modifiers.Remove(PixelConversionModifiers.Scale)); } /// - public override void ToRgba32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromBgra32.ToRgba32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToRgba32(source, dest); } /// - public override void FromRgba32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromRgba32.ToBgra32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToBgra32(source, dest); } /// - public override void ToArgb32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromBgra32.ToArgb32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToArgb32(source, dest); } /// - public override void FromArgb32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToBgra32(source, dest); + } + /// + public override void ToRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromArgb32.ToBgra32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToRgb24(source, dest); } /// - public override void ToBgr24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Bgra32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Bgr24 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToBgra32(source, dest); + } + /// + public override void ToBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Bgra32 sp = ref Unsafe.Add(ref sourceRef, i); - ref Bgr24 dp = ref Unsafe.Add(ref destRef, i); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToBgr24(source, dest); + } - dp.FromBgra32(sp); - } + /// + public override void FromBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToBgra32(source, dest); } /// @@ -205,24 +233,6 @@ namespace SixLabors.ImageSharp.PixelFormats } } - /// - public override void ToRgb24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) - { - Guard.NotNull(configuration, nameof(configuration)); - Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - - ref Bgra32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Rgb24 destRef = ref MemoryMarshal.GetReference(destinationPixels); - - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Bgra32 sp = ref Unsafe.Add(ref sourceRef, i); - ref Rgb24 dp = ref Unsafe.Add(ref destRef, i); - - dp.FromBgra32(sp); - } - } - /// public override void ToRgb48(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgb24.PixelOperations.Generated.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgb24.PixelOperations.Generated.cs index 332683fc7f..9a4173892e 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgb24.PixelOperations.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgb24.PixelOperations.Generated.cs @@ -52,59 +52,114 @@ namespace SixLabors.ImageSharp.PixelFormats { Vector4Converters.RgbaCompatible.ToVector4(configuration, this, sourcePixels, destVectors, modifiers.Remove(PixelConversionModifiers.Scale | PixelConversionModifiers.Premultiply)); } - /// - public override void ToArgb32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Rgb24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Argb32 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToRgba32(source, dest); + } - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Rgb24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Argb32 dp = ref Unsafe.Add(ref destRef, i); + /// + public override void FromRgba32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - dp.FromRgb24(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToRgb24(source, dest); } /// - public override void ToBgr24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Rgb24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Bgr24 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToArgb32(source, dest); + } - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Rgb24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Bgr24 dp = ref Unsafe.Add(ref destRef, i); + /// + public override void FromArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - dp.FromRgb24(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToRgb24(source, dest); + } + /// + public override void ToBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToBgra32(source, dest); } /// - public override void ToBgra32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Rgb24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Bgra32 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToRgb24(source, dest); + } + /// + public override void ToBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Rgb24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Bgra32 dp = ref Unsafe.Add(ref destRef, i); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToBgr24(source, dest); + } - dp.FromRgb24(sp); - } + /// + public override void FromBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToRgb24(source, dest); } /// @@ -179,24 +234,6 @@ namespace SixLabors.ImageSharp.PixelFormats } } - /// - public override void ToRgba32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) - { - Guard.NotNull(configuration, nameof(configuration)); - Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - - ref Rgb24 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Rgba32 destRef = ref MemoryMarshal.GetReference(destinationPixels); - - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Rgb24 sp = ref Unsafe.Add(ref sourceRef, i); - ref Rgba32 dp = ref Unsafe.Add(ref destRef, i); - - dp.FromRgb24(sp); - } - } - /// public override void ToRgb48(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgba32.PixelOperations.Generated.cs b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgba32.PixelOperations.Generated.cs index b05c62f1f7..5b60ec10e3 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgba32.PixelOperations.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/Rgba32.PixelOperations.Generated.cs @@ -42,84 +42,112 @@ namespace SixLabors.ImageSharp.PixelFormats } /// - public override void ToArgb32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromRgba32.ToArgb32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToArgb32(source, dest); } /// - public override void FromArgb32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromArgb32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromArgb32.ToRgba32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromArgb32.ToRgba32(source, dest); } /// - public override void ToBgra32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void ToBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromRgba32.ToBgra32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToBgra32(source, dest); } /// - public override void FromBgra32(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromBgra32( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(destinationPixels)); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgra32.ToRgba32(source, dest); + } + /// + public override void ToRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.FromBgra32.ToRgba32(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToRgb24(source, dest); } /// - public override void ToBgr24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) + public override void FromRgb24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref Rgba32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Bgr24 destRef = ref MemoryMarshal.GetReference(destinationPixels); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgb24.ToRgba32(source, dest); + } + /// + public override void ToBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Rgba32 sp = ref Unsafe.Add(ref sourceRef, i); - ref Bgr24 dp = ref Unsafe.Add(ref destRef, i); + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromRgba32.ToBgr24(source, dest); + } - dp.FromRgba32(sp); - } + /// + public override void FromBgr24( + Configuration configuration, + ReadOnlySpan sourcePixels, + Span destinationPixels) + { + Guard.NotNull(configuration, nameof(configuration)); + Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); + + ReadOnlySpan source = MemoryMarshal.Cast(sourcePixels); + Span dest = MemoryMarshal.Cast(destinationPixels); + PixelConverter.FromBgr24.ToRgba32(source, dest); } /// @@ -194,24 +222,6 @@ namespace SixLabors.ImageSharp.PixelFormats } } - /// - public override void ToRgb24(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) - { - Guard.NotNull(configuration, nameof(configuration)); - Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - - ref Rgba32 sourceRef = ref MemoryMarshal.GetReference(sourcePixels); - ref Rgb24 destRef = ref MemoryMarshal.GetReference(destinationPixels); - - for (int i = 0; i < sourcePixels.Length; i++) - { - ref Rgba32 sp = ref Unsafe.Add(ref sourceRef, i); - ref Rgb24 dp = ref Unsafe.Add(ref destRef, i); - - dp.FromRgba32(sp); - } - } - /// public override void ToRgb48(Configuration configuration, ReadOnlySpan sourcePixels, Span destinationPixels) { diff --git a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/_Common.ttinclude b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/_Common.ttinclude index 5d56731ba6..b728b01152 100644 --- a/src/ImageSharp/PixelFormats/PixelImplementations/Generated/_Common.ttinclude +++ b/src/ImageSharp/PixelFormats/PixelImplementations/Generated/_Common.ttinclude @@ -17,7 +17,7 @@ using System.Runtime.InteropServices; <#+ static readonly string[] CommonPixelTypes = { "Argb32", "Bgr24", "Bgra32", "L8", "L16", "La16", "La32", "Rgb24", "Rgba32", "Rgb48", "Rgba64", "Bgra5551" }; - static readonly string[] Optimized32BitTypes = { "Rgba32", "Argb32", "Bgra32" }; + static readonly string[] OptimizedPixelTypes = { "Rgba32", "Argb32", "Bgra32", "Rgb24", "Bgr24" }; // Types with Rgba32-combatible to/from Vector4 conversion static readonly string[] Rgba32CompatibleTypes = { "Argb32", "Bgra32", "Rgb24", "Bgr24" }; @@ -88,35 +88,31 @@ using System.Runtime.InteropServices; { #> /// - public override void To<#=otherPixelType#>(Configuration configuration, ReadOnlySpan<<#=thisPixelType#>> sourcePixels, Span<<#=otherPixelType#>> destinationPixels) + public override void To<#=otherPixelType#>( + Configuration configuration, + ReadOnlySpan<<#=thisPixelType#>> sourcePixels, + Span<<#=otherPixelType#>> destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As<<#=thisPixelType#>,uint>(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As<<#=otherPixelType#>, uint>(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.From<#=thisPixelType#>.To<#=otherPixelType#>(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast<<#=thisPixelType#>, byte>(sourcePixels); + Span dest = MemoryMarshal.Cast<<#=otherPixelType#>, byte>(destinationPixels); + PixelConverter.From<#=thisPixelType#>.To<#=otherPixelType#>(source, dest); } /// - public override void From<#=otherPixelType#>(Configuration configuration, ReadOnlySpan<<#=otherPixelType#>> sourcePixels, Span<<#=thisPixelType#>> destinationPixels) + public override void From<#=otherPixelType#>( + Configuration configuration, + ReadOnlySpan<<#=otherPixelType#>> sourcePixels, + Span<<#=thisPixelType#>> destinationPixels) { Guard.NotNull(configuration, nameof(configuration)); Guard.DestinationShouldNotBeTooShort(sourcePixels, destinationPixels, nameof(destinationPixels)); - ref uint sourceRef = ref Unsafe.As<<#=otherPixelType#>,uint>(ref MemoryMarshal.GetReference(sourcePixels)); - ref uint destRef = ref Unsafe.As<<#=thisPixelType#>, uint>(ref MemoryMarshal.GetReference(destinationPixels)); - - for (int i = 0; i < sourcePixels.Length; i++) - { - uint sp = Unsafe.Add(ref sourceRef, i); - Unsafe.Add(ref destRef, i) = PixelConverter.From<#=otherPixelType#>.To<#=thisPixelType#>(sp); - } + ReadOnlySpan source = MemoryMarshal.Cast<<#=otherPixelType#>, byte>(sourcePixels); + Span dest = MemoryMarshal.Cast<<#=thisPixelType#>, byte>(destinationPixels); + PixelConverter.From<#=otherPixelType#>.To<#=thisPixelType#>(source, dest); } <#+ } @@ -152,8 +148,8 @@ using System.Runtime.InteropServices; GenerateRgba32CompatibleVector4ConversionMethods(pixelType, pixelType.EndsWith("32")); } - var matching32BitTypes = Optimized32BitTypes.Contains(pixelType) ? - Optimized32BitTypes.Where(p => p != pixelType) : + var matching32BitTypes = OptimizedPixelTypes.Contains(pixelType) ? + OptimizedPixelTypes.Where(p => p != pixelType) : Enumerable.Empty(); foreach (string destPixelType in matching32BitTypes) diff --git a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs index 8142640848..7215fa860b 100644 --- a/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs +++ b/src/ImageSharp/PixelFormats/Utils/PixelConverter.cs @@ -1,7 +1,7 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. -using System.Buffers.Binary; +using System; using System.Runtime.CompilerServices; namespace SixLabors.ImageSharp.PixelFormats.Utils @@ -21,88 +21,196 @@ namespace SixLabors.ImageSharp.PixelFormats.Utils public static class FromRgba32 { /// - /// Converts a packed to . + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. /// [MethodImpl(InliningOptions.ShortMethod)] - public static uint ToArgb32(uint packedRgba) - { - // packedRgba = [aa bb gg rr] - // ROTL(8, packedRgba) = [bb gg rr aa] - return (packedRgba << 8) | (packedRgba >> 24); - } + public static void ToArgb32(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4(source, dest, default); /// - /// Converts a packed to . + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. /// [MethodImpl(InliningOptions.ShortMethod)] - public static uint ToBgra32(uint packedRgba) - { - // packedRgba = [aa bb gg rr] - // tmp1 = [aa 00 gg 00] - // tmp2 = [00 bb 00 rr] - // tmp3=ROTL(16, tmp2) = [00 rr 00 bb] - // tmp1 + tmp3 = [aa rr gg bb] - uint tmp1 = packedRgba & 0xFF00FF00; - uint tmp2 = packedRgba & 0x00FF00FF; - uint tmp3 = (tmp2 << 16) | (tmp2 >> 16); - return tmp1 + tmp3; - } + public static void ToBgra32(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4(source, dest, default); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToRgb24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Slice3(source, dest, default); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToBgr24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(3, 0, 1, 2)); } public static class FromArgb32 { /// - /// Converts a packed to . + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToRgba32(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4(source, dest, default); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToBgra32(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4(source, dest, default); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. /// [MethodImpl(InliningOptions.ShortMethod)] - public static uint ToRgba32(uint packedArgb) - { - // packedArgb = [bb gg rr aa] - // ROTR(8, packedArgb) = [aa bb gg rr] - return (packedArgb >> 8) | (packedArgb << 24); - } + public static void ToRgb24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(0, 3, 2, 1)); /// - /// Converts a packed to . + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. /// [MethodImpl(InliningOptions.ShortMethod)] - public static uint ToBgra32(uint packedArgb) - { - // packedArgb = [bb gg rr aa] - // REVERSE(packedArgb) = [aa rr gg bb] - return BinaryPrimitives.ReverseEndianness(packedArgb); - } + public static void ToBgr24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(0, 1, 2, 3)); } public static class FromBgra32 { /// - /// Converts a packed to . + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToArgb32(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4(source, dest, default); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToRgba32(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4(source, dest, default); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToRgb24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Slice3(source, dest, new DefaultShuffle4Slice3(3, 0, 1, 2)); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToBgr24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle4Slice3(source, dest, default); + } + + public static class FromRgb24 + { + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToRgba32(ReadOnlySpan source, Span dest) + => SimdUtils.Pad3Shuffle4(source, dest, default); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToArgb32(ReadOnlySpan source, Span dest) + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(2, 1, 0, 3)); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToBgra32(ReadOnlySpan source, Span dest) + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(3, 0, 1, 2)); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToBgr24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle3(source, dest, new DefaultShuffle3(0, 1, 2)); + } + + public static class FromBgr24 + { + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToArgb32(ReadOnlySpan source, Span dest) + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(0, 1, 2, 3)); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. + /// + [MethodImpl(InliningOptions.ShortMethod)] + public static void ToRgba32(ReadOnlySpan source, Span dest) + => SimdUtils.Pad3Shuffle4(source, dest, new DefaultPad3Shuffle4(3, 0, 1, 2)); + + /// + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. /// [MethodImpl(InliningOptions.ShortMethod)] - public static uint ToArgb32(uint packedBgra) - { - // packedBgra = [aa rr gg bb] - // REVERSE(packedBgra) = [bb gg rr aa] - return BinaryPrimitives.ReverseEndianness(packedBgra); - } + public static void ToBgra32(ReadOnlySpan source, Span dest) + => SimdUtils.Pad3Shuffle4(source, dest, default); /// - /// Converts a packed to . + /// Converts a representing a collection of + /// pixels to a representing + /// a collection of pixels. /// [MethodImpl(InliningOptions.ShortMethod)] - public static uint ToRgba32(uint packedBgra) - { - // packedRgba = [aa rr gg bb] - // tmp1 = [aa 00 gg 00] - // tmp2 = [00 rr 00 bb] - // tmp3=ROTL(16, tmp2) = [00 bb 00 rr] - // tmp1 + tmp3 = [aa bb gg rr] - uint tmp1 = packedBgra & 0xFF00FF00; - uint tmp2 = packedBgra & 0x00FF00FF; - uint tmp3 = (tmp2 << 16) | (tmp2 >> 16); - return tmp1 + tmp3; - } + public static void ToRgb24(ReadOnlySpan source, Span dest) + => SimdUtils.Shuffle3(source, dest, new DefaultShuffle3(0, 1, 2)); } } -} \ No newline at end of file +} diff --git a/src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs b/src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs index 19514c4b6f..274376671b 100644 --- a/src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Normalization/GlobalHistogramEqualizationProcessor{TPixel}.cs @@ -106,15 +106,23 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization } /// +#if NETSTANDARD2_0 + // https://github.com/SixLabors/ImageSharp/issues/1204 + [MethodImpl(MethodImplOptions.NoOptimization)] +#else [MethodImpl(InliningOptions.ShortMethod)] +#endif public void Invoke(int y) { ref int histogramBase = ref MemoryMarshal.GetReference(this.histogramBuffer.GetSpan()); ref TPixel pixelBase = ref MemoryMarshal.GetReference(this.source.GetPixelRowSpan(y)); + int levels = this.luminanceLevels; for (int x = 0; x < this.bounds.Width; x++) { - int luminance = GetLuminance(Unsafe.Add(ref pixelBase, x), this.luminanceLevels); + // TODO: We should bulk convert here. + var vector = Unsafe.Add(ref pixelBase, x).ToVector4(); + int luminance = ImageMaths.GetBT709Luminance(ref vector, levels); Unsafe.Add(ref histogramBase, luminance)++; } } @@ -147,18 +155,27 @@ namespace SixLabors.ImageSharp.Processing.Processors.Normalization } /// +#if NETSTANDARD2_0 + // https://github.com/SixLabors/ImageSharp/issues/1204 + [MethodImpl(MethodImplOptions.NoOptimization)] +#else [MethodImpl(InliningOptions.ShortMethod)] +#endif public void Invoke(int y) { ref int cdfBase = ref MemoryMarshal.GetReference(this.cdfBuffer.GetSpan()); ref TPixel pixelBase = ref MemoryMarshal.GetReference(this.source.GetPixelRowSpan(y)); + int levels = this.luminanceLevels; + float noOfPixelsMinusCdfMin = this.numberOfPixelsMinusCdfMin; for (int x = 0; x < this.bounds.Width; x++) { + // TODO: We should bulk convert here. ref TPixel pixel = ref Unsafe.Add(ref pixelBase, x); - int luminance = GetLuminance(pixel, this.luminanceLevels); - float luminanceEqualized = Unsafe.Add(ref cdfBase, luminance) / this.numberOfPixelsMinusCdfMin; - pixel.FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, pixel.ToVector4().W)); + var vector = pixel.ToVector4(); + int luminance = ImageMaths.GetBT709Luminance(ref vector, levels); + float luminanceEqualized = Unsafe.Add(ref cdfBase, luminance) / noOfPixelsMinusCdfMin; + pixel.FromVector4(new Vector4(luminanceEqualized, luminanceEqualized, luminanceEqualized, vector.W)); } } } diff --git a/tests/Directory.Build.targets b/tests/Directory.Build.targets index e9e93a855f..1f699c9dd1 100644 --- a/tests/Directory.Build.targets +++ b/tests/Directory.Build.targets @@ -26,18 +26,19 @@ - + - - - - - + + + + + + - + - + diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_AddInPlace.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_AddInPlace.cs new file mode 100644 index 0000000000..61fb2745b9 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_AddInPlace.cs @@ -0,0 +1,21 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Formats.Jpeg.Components; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class Block8x8F_AddInPlace + { + [Benchmark] + public float AddInplace() + { + float f = 42F; + Block8x8F b = default; + b.AddInPlace(f); + return f; + } + } +} diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_MultiplyInPlaceBlock.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_MultiplyInPlaceBlock.cs new file mode 100644 index 0000000000..0d1e67112f --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_MultiplyInPlaceBlock.cs @@ -0,0 +1,37 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Formats.Jpeg.Components; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class Block8x8F_MultiplyInPlaceBlock + { + private static readonly Block8x8F Source = Create8x8FloatData(); + + [Benchmark] + public void MultiplyInPlaceBlock() + { + Block8x8F dest = default; + Source.MultiplyInPlace(ref dest); + } + + private static Block8x8F Create8x8FloatData() + { + var result = new float[64]; + for (int i = 0; i < 8; i++) + { + for (int j = 0; j < 8; j++) + { + result[(i * 8) + j] = (i * 10) + j; + } + } + + var source = default(Block8x8F); + source.LoadFrom(result); + return source; + } + } +} diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_MultiplyInPlaceScalar.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_MultiplyInPlaceScalar.cs new file mode 100644 index 0000000000..31a6ca713f --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_MultiplyInPlaceScalar.cs @@ -0,0 +1,21 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Formats.Jpeg.Components; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class Block8x8F_MultiplyInPlaceScalar + { + [Benchmark] + public float MultiplyInPlaceScalar() + { + float f = 42F; + Block8x8F b = default; + b.MultiplyInPlace(f); + return f; + } + } +} diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs new file mode 100644 index 0000000000..1d103cd1a0 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs @@ -0,0 +1,37 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Formats.Jpeg.Components; + +namespace SixLabors.ImageSharp.Benchmarks.Codecs.Jpeg.BlockOperations +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class Block8x8F_Transpose + { + private static readonly Block8x8F Source = Create8x8FloatData(); + + [Benchmark] + public void TransposeInto() + { + var dest = default(Block8x8F); + Source.TransposeInto(ref dest); + } + + private static Block8x8F Create8x8FloatData() + { + var result = new float[64]; + for (int i = 0; i < 8; i++) + { + for (int j = 0; j < 8; j++) + { + result[(i * 8) + j] = (i * 10) + j; + } + } + + var source = default(Block8x8F); + source.LoadFrom(result); + return source; + } + } +} diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs index da15da24c7..04ca8cd652 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4.cs @@ -13,15 +13,13 @@ using System.Runtime.Intrinsics.X86; #endif using BenchmarkDotNet.Attributes; -using BenchmarkDotNet.Environments; -using BenchmarkDotNet.Jobs; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; // ReSharper disable InconsistentNaming namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk { - [Config(typeof(Config.ShortClr))] + [Config(typeof(Config.ShortCore31))] public abstract class FromVector4 where TPixel : unmanaged, IPixel { @@ -32,7 +30,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk protected Configuration Configuration => Configuration.Default; // [Params(64, 2048)] - [Params(1024)] + [Params(64, 256, 2048)] public int Count { get; set; } [GlobalSetup] @@ -60,7 +58,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk } } - [Benchmark] + [Benchmark(Baseline = true)] public void PixelOperations_Base() { new PixelOperations().FromVector4Destructive(this.Configuration, this.source.GetSpan(), this.destination.GetSpan()); @@ -93,7 +91,7 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk SimdUtils.BasicIntrinsics256.NormalizedFloatToByteSaturate(sBytes, dFloats); } - [Benchmark(Baseline = true)] + [Benchmark] public void ExtendedIntrinsic() { Span sBytes = MemoryMarshal.Cast(this.source.GetSpan()); @@ -104,12 +102,12 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk #if SUPPORTS_RUNTIME_INTRINSICS [Benchmark] - public void UseAvx2() + public void UseHwIntrinsics() { Span sBytes = MemoryMarshal.Cast(this.source.GetSpan()); Span dFloats = MemoryMarshal.Cast(this.destination.GetSpan()); - SimdUtils.Avx2Intrinsics.NormalizedFloatToByteSaturate(sBytes, dFloats); + SimdUtils.HwIntrinsics.NormalizedFloatToByteSaturate(sBytes, dFloats); } private static ReadOnlySpan PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 }; diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4_Rgb24.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4_Rgb24.cs new file mode 100644 index 0000000000..5da6edc6b5 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/FromVector4_Rgb24.cs @@ -0,0 +1,55 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.PixelFormats; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.ShortClr))] + public class FromVector4_Rgb24 : FromVector4 + { + } +} + +// 2020-11-02 +// ########## +// +// BenchmarkDotNet = v0.12.1, OS = Windows 10.0.19041.572(2004 /?/ 20H1) +// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores +// .NET Core SDK=3.1.403 +// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT +// Job-XYEQXL : .NET Framework 4.8 (4.8.4250.0), X64 RyuJIT +// Job-HSXNJV : .NET Core 2.1.23 (CoreCLR 4.6.29321.03, CoreFX 4.6.29321.01), X64 RyuJIT +// Job-YUREJO : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT +// +// IterationCount=3 LaunchCount=1 WarmupCount=3 +// +// | Method | Job | Runtime | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | +// |---------------------------- |----------- |-------------- |------ |-----------:|------------:|----------:|------:|--------:|-------:|------:|------:|----------:| +// | PixelOperations_Base | Job-XYEQXL | .NET 4.7.2 | 64 | 343.2 ns | 305.91 ns | 16.77 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-XYEQXL | .NET 4.7.2 | 64 | 320.8 ns | 19.93 ns | 1.09 ns | 0.94 | 0.05 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-HSXNJV | .NET Core 2.1 | 64 | 234.3 ns | 17.98 ns | 0.99 ns | 1.00 | 0.00 | 0.0052 | - | - | 24 B | +// | PixelOperations_Specialized | Job-HSXNJV | .NET Core 2.1 | 64 | 246.0 ns | 82.34 ns | 4.51 ns | 1.05 | 0.02 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-YUREJO | .NET Core 3.1 | 64 | 222.3 ns | 39.46 ns | 2.16 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-YUREJO | .NET Core 3.1 | 64 | 243.4 ns | 33.58 ns | 1.84 ns | 1.09 | 0.01 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-XYEQXL | .NET 4.7.2 | 256 | 824.9 ns | 32.77 ns | 1.80 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-XYEQXL | .NET 4.7.2 | 256 | 967.0 ns | 39.09 ns | 2.14 ns | 1.17 | 0.01 | 0.0172 | - | - | 72 B | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-HSXNJV | .NET Core 2.1 | 256 | 756.9 ns | 94.43 ns | 5.18 ns | 1.00 | 0.00 | 0.0048 | - | - | 24 B | +// | PixelOperations_Specialized | Job-HSXNJV | .NET Core 2.1 | 256 | 1,003.3 ns | 3,192.09 ns | 174.97 ns | 1.32 | 0.22 | 0.0172 | - | - | 72 B | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-YUREJO | .NET Core 3.1 | 256 | 748.6 ns | 248.03 ns | 13.60 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-YUREJO | .NET Core 3.1 | 256 | 437.0 ns | 36.48 ns | 2.00 ns | 0.58 | 0.01 | 0.0172 | - | - | 72 B | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-XYEQXL | .NET 4.7.2 | 2048 | 5,751.6 ns | 704.24 ns | 38.60 ns | 1.00 | 0.00 | - | - | - | 24 B | +// | PixelOperations_Specialized | Job-XYEQXL | .NET 4.7.2 | 2048 | 4,391.6 ns | 718.17 ns | 39.37 ns | 0.76 | 0.00 | 0.0153 | - | - | 72 B | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-HSXNJV | .NET Core 2.1 | 2048 | 6,202.0 ns | 1,815.18 ns | 99.50 ns | 1.00 | 0.00 | - | - | - | 24 B | +// | PixelOperations_Specialized | Job-HSXNJV | .NET Core 2.1 | 2048 | 4,225.6 ns | 1,004.03 ns | 55.03 ns | 0.68 | 0.01 | 0.0153 | - | - | 72 B | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-YUREJO | .NET Core 3.1 | 2048 | 6,157.1 ns | 2,516.98 ns | 137.96 ns | 1.00 | 0.00 | - | - | - | 24 B | +// | PixelOperations_Specialized | Job-YUREJO | .NET Core 3.1 | 2048 | 1,822.7 ns | 1,764.43 ns | 96.71 ns | 0.30 | 0.02 | 0.0172 | - | - | 72 B | diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs new file mode 100644 index 0000000000..4af0286054 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Pad3Shuffle4Channel.cs @@ -0,0 +1,87 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using BenchmarkDotNet.Attributes; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class Pad3Shuffle4Channel + { + private static readonly DefaultPad3Shuffle4 Control = new DefaultPad3Shuffle4(1, 0, 3, 2); + private static readonly XYZWPad3Shuffle4 ControlFast = default; + private byte[] source; + private byte[] destination; + + [GlobalSetup] + public void Setup() + { + this.source = new byte[this.Count]; + new Random(this.Count).NextBytes(this.source); + this.destination = new byte[this.Count * 4 / 3]; + } + + [Params(96, 384, 768, 1536)] + public int Count { get; set; } + + [Benchmark] + public void Pad3Shuffle4() + { + SimdUtils.Pad3Shuffle4(this.source, this.destination, Control); + } + + [Benchmark] + public void Pad3Shuffle4FastFallback() + { + SimdUtils.Pad3Shuffle4(this.source, this.destination, ControlFast); + } + } + + // 2020-10-30 + // ########## + // + // BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1) + // Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores + // .NET Core SDK=3.1.403 + // [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // + // Runtime=.NET Core 3.1 + // + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |------------------------- |------------------- |-------------------------------------------------- |------ |------------:|----------:|----------:|------------:|------:|--------:|------:|------:|------:|----------:| + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 120.64 ns | 7.190 ns | 21.200 ns | 114.26 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 96 | 23.63 ns | 0.175 ns | 0.155 ns | 23.65 ns | 0.15 | 0.01 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 96 | 25.25 ns | 0.356 ns | 0.298 ns | 25.27 ns | 0.17 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 14.80 ns | 0.358 ns | 1.032 ns | 14.64 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 2. AVX | Empty | 96 | 24.84 ns | 0.376 ns | 0.333 ns | 24.74 ns | 1.57 | 0.06 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 96 | 24.58 ns | 0.471 ns | 0.704 ns | 24.38 ns | 1.60 | 0.09 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 258.92 ns | 4.873 ns | 4.069 ns | 257.95 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 384 | 41.41 ns | 0.859 ns | 1.204 ns | 41.33 ns | 0.16 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 384 | 40.74 ns | 0.848 ns | 0.793 ns | 40.48 ns | 0.16 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 74.50 ns | 0.490 ns | 0.383 ns | 74.49 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 2. AVX | Empty | 384 | 40.74 ns | 0.624 ns | 0.584 ns | 40.72 ns | 0.55 | 0.01 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 384 | 38.28 ns | 0.534 ns | 0.417 ns | 38.22 ns | 0.51 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 503.91 ns | 6.466 ns | 6.048 ns | 501.58 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 768 | 62.86 ns | 0.332 ns | 0.277 ns | 62.80 ns | 0.12 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 768 | 64.59 ns | 0.469 ns | 0.415 ns | 64.62 ns | 0.13 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 110.51 ns | 0.592 ns | 0.554 ns | 110.33 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 2. AVX | Empty | 768 | 64.72 ns | 1.306 ns | 1.090 ns | 64.51 ns | 0.59 | 0.01 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 768 | 62.11 ns | 0.816 ns | 0.682 ns | 61.98 ns | 0.56 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 1,005.84 ns | 13.176 ns | 12.325 ns | 1,004.70 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 2. AVX | Empty | 1536 | 110.05 ns | 0.256 ns | 0.214 ns | 110.04 ns | 0.11 | 0.00 | - | - | - | - | + // | Pad3Shuffle4 | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 110.23 ns | 0.545 ns | 0.483 ns | 110.09 ns | 0.11 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Pad3Shuffle4FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 220.37 ns | 1.601 ns | 1.419 ns | 220.13 ns | 1.00 | 0.00 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 2. AVX | Empty | 1536 | 111.54 ns | 2.173 ns | 2.901 ns | 111.27 ns | 0.51 | 0.01 | - | - | - | - | + // | Pad3Shuffle4FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 110.23 ns | 0.456 ns | 0.427 ns | 110.25 ns | 0.50 | 0.00 | - | - | - | - | +} diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/PremultiplyVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/PremultiplyVector4.cs new file mode 100644 index 0000000000..2a886c6879 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/PremultiplyVector4.cs @@ -0,0 +1,68 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using BenchmarkDotNet.Attributes; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.ShortCore31))] + public class PremultiplyVector4 + { + private static readonly Vector4[] Vectors = CreateVectors(); + + [Benchmark(Baseline = true)] + public void PremultiplyBaseline() + { + ref Vector4 baseRef = ref MemoryMarshal.GetReference(Vectors); + + for (int i = 0; i < Vectors.Length; i++) + { + ref Vector4 v = ref Unsafe.Add(ref baseRef, i); + Premultiply(ref v); + } + } + + [Benchmark] + public void Premultiply() + { + Vector4Utilities.Premultiply(Vectors); + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void Premultiply(ref Vector4 source) + { + float w = source.W; + source *= w; + source.W = w; + } + + private static Vector4[] CreateVectors() + { + var rnd = new Random(42); + return GenerateRandomVectorArray(rnd, 2048, 0, 1); + } + + private static Vector4[] GenerateRandomVectorArray(Random rnd, int length, float minVal, float maxVal) + { + var values = new Vector4[length]; + + for (int i = 0; i < length; i++) + { + ref Vector4 v = ref values[i]; + v.X = GetRandomFloat(rnd, minVal, maxVal); + v.Y = GetRandomFloat(rnd, minVal, maxVal); + v.Z = GetRandomFloat(rnd, minVal, maxVal); + v.W = GetRandomFloat(rnd, minVal, maxVal); + } + + return values; + } + + private static float GetRandomFloat(Random rnd, float minVal, float maxVal) + => ((float)rnd.NextDouble() * (maxVal - minVal)) + minVal; + } +} diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs new file mode 100644 index 0000000000..3667b973ef --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle3Channel.cs @@ -0,0 +1,64 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using BenchmarkDotNet.Attributes; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class Shuffle3Channel + { + private static readonly DefaultShuffle3 Control = new DefaultShuffle3(1, 0, 2); + private byte[] source; + private byte[] destination; + + [GlobalSetup] + public void Setup() + { + this.source = new byte[this.Count]; + new Random(this.Count).NextBytes(this.source); + this.destination = new byte[this.Count]; + } + + [Params(96, 384, 768, 1536)] + public int Count { get; set; } + + [Benchmark] + public void Shuffle3() + { + SimdUtils.Shuffle3(this.source, this.destination, Control); + } + } + + // 2020-11-02 + // ########## + // + // BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1) + // Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores + // .NET Core SDK=3.1.403 + // [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // + // Runtime=.NET Core 3.1 + // + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |--------------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|----------:|------:|--------:|------:|------:|------:|----------:| + // | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 96 | 48.46 ns | 1.034 ns | 2.438 ns | 47.46 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle3 | 2. AVX | Empty | 96 | 32.42 ns | 0.537 ns | 0.476 ns | 32.34 ns | 0.66 | 0.04 | - | - | - | - | + // | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 96 | 32.51 ns | 0.373 ns | 0.349 ns | 32.56 ns | 0.66 | 0.03 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 384 | 199.04 ns | 1.512 ns | 1.180 ns | 199.17 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle3 | 2. AVX | Empty | 384 | 71.20 ns | 2.654 ns | 7.784 ns | 69.60 ns | 0.41 | 0.02 | - | - | - | - | + // | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 384 | 63.23 ns | 0.569 ns | 0.505 ns | 63.21 ns | 0.32 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 768 | 391.28 ns | 5.087 ns | 3.972 ns | 391.22 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle3 | 2. AVX | Empty | 768 | 109.12 ns | 2.149 ns | 2.010 ns | 108.66 ns | 0.28 | 0.01 | - | - | - | - | + // | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 768 | 106.51 ns | 0.734 ns | 0.613 ns | 106.56 ns | 0.27 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1536 | 773.70 ns | 5.516 ns | 4.890 ns | 772.96 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle3 | 2. AVX | Empty | 1536 | 190.41 ns | 1.090 ns | 0.851 ns | 190.38 ns | 0.25 | 0.00 | - | - | - | - | + // | Shuffle3 | 3. SSE | COMPlus_EnableAVX=0 | 1536 | 190.94 ns | 0.985 ns | 0.769 ns | 190.85 ns | 0.25 | 0.00 | - | - | - | - | +} diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs new file mode 100644 index 0000000000..9cf24ccd69 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/Shuffle4Slice3Channel.cs @@ -0,0 +1,95 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using BenchmarkDotNet.Attributes; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class Shuffle4Slice3Channel + { + private static readonly DefaultShuffle4Slice3 Control = new DefaultShuffle4Slice3(1, 0, 3, 2); + private static readonly XYZWShuffle4Slice3 ControlFast = default; + private byte[] source; + private byte[] destination; + + [GlobalSetup] + public void Setup() + { + this.source = new byte[this.Count]; + new Random(this.Count).NextBytes(this.source); + this.destination = new byte[(int)(this.Count * (3 / 4F))]; + } + + [Params(128, 256, 512, 1024, 2048)] + public int Count { get; set; } + + [Benchmark] + public void Shuffle4Slice3() + { + SimdUtils.Shuffle4Slice3(this.source, this.destination, Control); + } + + [Benchmark] + public void Shuffle4Slice3FastFallback() + { + SimdUtils.Shuffle4Slice3(this.source, this.destination, ControlFast); + } + } + + // 2020-10-29 + // ########## + // + // BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1) + // Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores + // .NET Core SDK=3.1.403 + // [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // + // Runtime=.NET Core 3.1 + // + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |--------------------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|----------:|------:|--------:|------:|------:|------:|----------:| + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 56.44 ns | 2.843 ns | 8.382 ns | 56.70 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 128 | 27.15 ns | 0.556 ns | 0.762 ns | 27.34 ns | 0.41 | 0.03 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 128 | 26.36 ns | 0.321 ns | 0.268 ns | 26.26 ns | 0.38 | 0.02 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 25.85 ns | 0.494 ns | 0.462 ns | 25.84 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 2. AVX | Empty | 128 | 26.15 ns | 0.113 ns | 0.106 ns | 26.16 ns | 1.01 | 0.02 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 128 | 25.57 ns | 0.078 ns | 0.061 ns | 25.56 ns | 0.99 | 0.02 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 97.47 ns | 0.327 ns | 0.289 ns | 97.35 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 256 | 32.61 ns | 0.107 ns | 0.095 ns | 32.62 ns | 0.33 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 256 | 33.21 ns | 0.169 ns | 0.150 ns | 33.15 ns | 0.34 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 52.34 ns | 0.779 ns | 0.729 ns | 51.94 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 2. AVX | Empty | 256 | 32.16 ns | 0.111 ns | 0.104 ns | 32.16 ns | 0.61 | 0.01 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 256 | 33.61 ns | 0.342 ns | 0.319 ns | 33.62 ns | 0.64 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 210.74 ns | 3.825 ns | 5.956 ns | 207.70 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 512 | 51.03 ns | 0.535 ns | 0.501 ns | 51.18 ns | 0.24 | 0.01 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 512 | 66.60 ns | 1.313 ns | 1.613 ns | 65.93 ns | 0.31 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 119.12 ns | 1.905 ns | 1.689 ns | 118.52 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 2. AVX | Empty | 512 | 50.33 ns | 0.382 ns | 0.339 ns | 50.41 ns | 0.42 | 0.01 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 512 | 49.25 ns | 0.555 ns | 0.492 ns | 49.26 ns | 0.41 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 423.55 ns | 4.891 ns | 4.336 ns | 423.27 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 1024 | 77.13 ns | 1.355 ns | 2.264 ns | 76.19 ns | 0.19 | 0.01 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 79.39 ns | 0.103 ns | 0.086 ns | 79.37 ns | 0.19 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 226.57 ns | 2.930 ns | 2.598 ns | 226.10 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 2. AVX | Empty | 1024 | 80.25 ns | 1.647 ns | 2.082 ns | 80.98 ns | 0.35 | 0.01 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 84.99 ns | 1.234 ns | 1.155 ns | 85.60 ns | 0.38 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3 | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 794.96 ns | 1.735 ns | 1.538 ns | 795.15 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 2. AVX | Empty | 2048 | 128.41 ns | 0.417 ns | 0.390 ns | 128.24 ns | 0.16 | 0.00 | - | - | - | - | + // | Shuffle4Slice3 | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 127.24 ns | 0.294 ns | 0.229 ns | 127.23 ns | 0.16 | 0.00 | - | - | - | - | + // | | | | | | | | | | | | | | | + // | Shuffle4Slice3FastFallback | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 382.97 ns | 1.064 ns | 0.831 ns | 382.87 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 2. AVX | Empty | 2048 | 126.93 ns | 0.382 ns | 0.339 ns | 126.94 ns | 0.33 | 0.00 | - | - | - | - | + // | Shuffle4Slice3FastFallback | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 149.36 ns | 1.875 ns | 1.754 ns | 149.33 ns | 0.39 | 0.00 | - | - | - | - | +} diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs new file mode 100644 index 0000000000..db49470011 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleByte4Channel.cs @@ -0,0 +1,67 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using BenchmarkDotNet.Attributes; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class ShuffleByte4Channel + { + private byte[] source; + private byte[] destination; + + [GlobalSetup] + public void Setup() + { + this.source = new byte[this.Count]; + new Random(this.Count).NextBytes(this.source); + this.destination = new byte[this.Count]; + } + + [Params(128, 256, 512, 1024, 2048)] + public int Count { get; set; } + + [Benchmark] + public void Shuffle4Channel() + { + SimdUtils.Shuffle4(this.source, this.destination, default); + } + } + + // 2020-10-29 + // ########## + // + // BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1) + // Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores + // .NET Core SDK=3.1.403 + // [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // + // Runtime=.NET Core 3.1 + // + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |---------------- |------------------- |-------------------------------------------------- |------ |----------:|---------:|---------:|------:|--------:|------:|------:|------:|----------:| + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 17.39 ns | 0.187 ns | 0.175 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 128 | 21.72 ns | 0.299 ns | 0.279 ns | 1.25 | 0.02 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 128 | 18.10 ns | 0.346 ns | 0.289 ns | 1.04 | 0.02 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 35.51 ns | 0.711 ns | 0.790 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 256 | 23.90 ns | 0.508 ns | 0.820 ns | 0.69 | 0.02 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 256 | 20.40 ns | 0.133 ns | 0.111 ns | 0.57 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 73.39 ns | 0.310 ns | 0.259 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 512 | 26.10 ns | 0.418 ns | 0.391 ns | 0.36 | 0.01 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 512 | 27.59 ns | 0.556 ns | 0.571 ns | 0.38 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 150.64 ns | 2.903 ns | 2.716 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 1024 | 38.67 ns | 0.801 ns | 1.889 ns | 0.24 | 0.02 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 47.13 ns | 0.948 ns | 1.054 ns | 0.31 | 0.01 | - | - | - | - | + // | | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 315.29 ns | 5.206 ns | 6.583 ns | 1.00 | 0.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 2048 | 57.37 ns | 1.152 ns | 1.078 ns | 0.18 | 0.01 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 65.75 ns | 1.198 ns | 1.600 ns | 0.21 | 0.01 | - | - | - | - | +} diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs new file mode 100644 index 0000000000..86b1f766e1 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ShuffleFloat4Channel.cs @@ -0,0 +1,68 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using BenchmarkDotNet.Attributes; +using SixLabors.ImageSharp.Tests; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.HwIntrinsics_SSE_AVX))] + public class ShuffleFloat4Channel + { + private static readonly byte Control = default(WXYZShuffle4).Control; + private float[] source; + private float[] destination; + + [GlobalSetup] + public void Setup() + { + this.source = new Random(this.Count).GenerateRandomFloatArray(this.Count, 0, 256); + this.destination = new float[this.Count]; + } + + [Params(128, 256, 512, 1024, 2048)] + public int Count { get; set; } + + [Benchmark] + public void Shuffle4Channel() + { + SimdUtils.Shuffle4(this.source, this.destination, Control); + } + } + + // 2020-10-29 + // ########## + // + // BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.572 (2004/?/20H1) + // Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores + // .NET Core SDK=3.1.403 + // [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 1. No HwIntrinsics : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 2. AVX : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // 3. SSE : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT + // + // Runtime=.NET Core 3.1 + // + // | Method | Job | EnvironmentVariables | Count | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated | + // |---------------- |------------------- |-------------------------------------------------- |------ |-----------:|----------:|----------:|------:|------:|------:|------:|----------:| + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 128 | 63.647 ns | 0.5475 ns | 0.4853 ns | 1.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 128 | 9.818 ns | 0.1457 ns | 0.1292 ns | 0.15 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 128 | 15.267 ns | 0.1005 ns | 0.0940 ns | 0.24 | - | - | - | - | + // | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 256 | 125.586 ns | 1.9312 ns | 1.8064 ns | 1.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 256 | 15.878 ns | 0.1983 ns | 0.1758 ns | 0.13 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 256 | 29.170 ns | 0.2925 ns | 0.2442 ns | 0.23 | - | - | - | - | + // | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 512 | 263.859 ns | 2.6660 ns | 2.3634 ns | 1.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 512 | 29.452 ns | 0.3334 ns | 0.3118 ns | 0.11 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 512 | 52.912 ns | 0.1932 ns | 0.1713 ns | 0.20 | - | - | - | - | + // | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 1024 | 495.717 ns | 1.9850 ns | 1.8567 ns | 1.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 1024 | 53.757 ns | 0.3212 ns | 0.2847 ns | 0.11 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 1024 | 107.815 ns | 1.6201 ns | 1.3528 ns | 0.22 | - | - | - | - | + // | | | | | | | | | | | | | + // | Shuffle4Channel | 1. No HwIntrinsics | COMPlus_EnableHWIntrinsic=0,COMPlus_FeatureSIMD=0 | 2048 | 980.134 ns | 3.7407 ns | 3.1237 ns | 1.00 | - | - | - | - | + // | Shuffle4Channel | 2. AVX | Empty | 2048 | 105.120 ns | 0.6140 ns | 0.5443 ns | 0.11 | - | - | - | - | + // | Shuffle4Channel | 3. SSE | COMPlus_EnableAVX=0 | 2048 | 216.473 ns | 2.3268 ns | 2.0627 ns | 0.22 | - | - | - | - | +} diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgb24.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgb24.cs new file mode 100644 index 0000000000..aecd418316 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgb24.cs @@ -0,0 +1,65 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using BenchmarkDotNet.Attributes; + +using SixLabors.ImageSharp.Memory; +using SixLabors.ImageSharp.PixelFormats; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.ShortClr))] + public class ToVector4_Rgb24 : ToVector4 + { + [Benchmark(Baseline = true)] + public void PixelOperations_Base() + { + new PixelOperations().ToVector4( + this.Configuration, + this.source.GetSpan(), + this.destination.GetSpan()); + } + } +} + +// 2020-11-02 +// ########## +// +// BenchmarkDotNet = v0.12.1, OS = Windows 10.0.19041.572(2004 /?/ 20H1) +// Intel Core i7-8650U CPU 1.90GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores +// .NET Core SDK=3.1.403 +// [Host] : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT +// Job-XYEQXL : .NET Framework 4.8 (4.8.4250.0), X64 RyuJIT +// Job-HSXNJV : .NET Core 2.1.23 (CoreCLR 4.6.29321.03, CoreFX 4.6.29321.01), X64 RyuJIT +// Job-YUREJO : .NET Core 3.1.9 (CoreCLR 4.700.20.47201, CoreFX 4.700.20.47203), X64 RyuJIT +// +// IterationCount=3 LaunchCount=1 WarmupCount=3 +// +// | Method | Job | Runtime | Count | Mean | Error | StdDev | Ratio | RatioSD | Gen 0 | Gen 1 | Gen 2 | Allocated | +// |---------------------------- |----------- |-------------- |------ |-----------:|------------:|----------:|------:|--------:|-------:|------:|------:|----------:| +// | PixelOperations_Base | Job-OIBEDX | .NET 4.7.2 | 64 | 298.4 ns | 33.63 ns | 1.84 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-OIBEDX | .NET 4.7.2 | 64 | 355.5 ns | 908.51 ns | 49.80 ns | 1.19 | 0.17 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-OPAORC | .NET Core 2.1 | 64 | 220.1 ns | 13.77 ns | 0.75 ns | 1.00 | 0.00 | 0.0055 | - | - | 24 B | +// | PixelOperations_Specialized | Job-OPAORC | .NET Core 2.1 | 64 | 228.5 ns | 41.41 ns | 2.27 ns | 1.04 | 0.01 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-VPSIRL | .NET Core 3.1 | 64 | 213.6 ns | 12.47 ns | 0.68 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-VPSIRL | .NET Core 3.1 | 64 | 217.0 ns | 9.95 ns | 0.55 ns | 1.02 | 0.01 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-OIBEDX | .NET 4.7.2 | 256 | 829.0 ns | 242.93 ns | 13.32 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-OIBEDX | .NET 4.7.2 | 256 | 448.9 ns | 4.04 ns | 0.22 ns | 0.54 | 0.01 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-OPAORC | .NET Core 2.1 | 256 | 863.0 ns | 1,253.26 ns | 68.70 ns | 1.00 | 0.00 | 0.0048 | - | - | 24 B | +// | PixelOperations_Specialized | Job-OPAORC | .NET Core 2.1 | 256 | 309.2 ns | 66.16 ns | 3.63 ns | 0.36 | 0.03 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-VPSIRL | .NET Core 3.1 | 256 | 737.0 ns | 253.90 ns | 13.92 ns | 1.00 | 0.00 | 0.0057 | - | - | 24 B | +// | PixelOperations_Specialized | Job-VPSIRL | .NET Core 3.1 | 256 | 212.3 ns | 1.07 ns | 0.06 ns | 0.29 | 0.01 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-OIBEDX | .NET 4.7.2 | 2048 | 5,625.6 ns | 404.35 ns | 22.16 ns | 1.00 | 0.00 | - | - | - | 24 B | +// | PixelOperations_Specialized | Job-OIBEDX | .NET 4.7.2 | 2048 | 1,974.1 ns | 229.84 ns | 12.60 ns | 0.35 | 0.00 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-OPAORC | .NET Core 2.1 | 2048 | 5,467.2 ns | 537.29 ns | 29.45 ns | 1.00 | 0.00 | - | - | - | 24 B | +// | PixelOperations_Specialized | Job-OPAORC | .NET Core 2.1 | 2048 | 1,985.5 ns | 4,714.23 ns | 258.40 ns | 0.36 | 0.05 | - | - | - | - | +// | | | | | | | | | | | | | | +// | PixelOperations_Base | Job-VPSIRL | .NET Core 3.1 | 2048 | 5,888.2 ns | 1,622.23 ns | 88.92 ns | 1.00 | 0.00 | - | - | - | 24 B | +// | PixelOperations_Specialized | Job-VPSIRL | .NET Core 3.1 | 2048 | 1,165.0 ns | 191.71 ns | 10.51 ns | 0.20 | 0.00 | - | - | - | - | diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgba32.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgba32.cs index 145bf9889b..9ae3b073d4 100644 --- a/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgba32.cs +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/ToVector4_Rgba32.cs @@ -13,7 +13,7 @@ using SixLabors.ImageSharp.PixelFormats; namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk { - [Config(typeof(Config.ShortClr))] + [Config(typeof(Config.ShortCore31))] public class ToVector4_Rgba32 : ToVector4 { [Benchmark] @@ -52,6 +52,17 @@ namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk SimdUtils.ExtendedIntrinsics.ByteToNormalizedFloat(sBytes, dFloats); } +#if SUPPORTS_RUNTIME_INTRINSICS + [Benchmark] + public void HwIntrinsics() + { + Span sBytes = MemoryMarshal.Cast(this.source.GetSpan()); + Span dFloats = MemoryMarshal.Cast(this.destination.GetSpan()); + + SimdUtils.HwIntrinsics.ByteToNormalizedFloat(sBytes, dFloats); + } +#endif + // [Benchmark] public void ExtendedIntrinsics_BulkConvertByteToNormalizedFloat_2Loops() { diff --git a/tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs b/tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs new file mode 100644 index 0000000000..1312c767be --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Color/Bulk/UnPremultiplyVector4.cs @@ -0,0 +1,68 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using BenchmarkDotNet.Attributes; + +namespace SixLabors.ImageSharp.Benchmarks.ColorSpaces.Bulk +{ + [Config(typeof(Config.ShortCore31))] + public class UnPremultiplyVector4 + { + private static readonly Vector4[] Vectors = CreateVectors(); + + [Benchmark(Baseline = true)] + public void UnPremultiplyBaseline() + { + ref Vector4 baseRef = ref MemoryMarshal.GetReference(Vectors); + + for (int i = 0; i < Vectors.Length; i++) + { + ref Vector4 v = ref Unsafe.Add(ref baseRef, i); + UnPremultiply(ref v); + } + } + + [Benchmark] + public void UnPremultiply() + { + Vector4Utilities.UnPremultiply(Vectors); + } + + [MethodImpl(InliningOptions.ShortMethod)] + private static void UnPremultiply(ref Vector4 source) + { + float w = source.W; + source /= w; + source.W = w; + } + + private static Vector4[] CreateVectors() + { + var rnd = new Random(42); + return GenerateRandomVectorArray(rnd, 2048, 0, 1); + } + + private static Vector4[] GenerateRandomVectorArray(Random rnd, int length, float minVal, float maxVal) + { + var values = new Vector4[length]; + + for (int i = 0; i < length; i++) + { + ref Vector4 v = ref values[i]; + v.X = GetRandomFloat(rnd, minVal, maxVal); + v.Y = GetRandomFloat(rnd, minVal, maxVal); + v.Z = GetRandomFloat(rnd, minVal, maxVal); + v.W = GetRandomFloat(rnd, minVal, maxVal); + } + + return values; + } + + private static float GetRandomFloat(Random rnd, float minVal, float maxVal) + => ((float)rnd.NextDouble() * (maxVal - minVal)) + minVal; + } +} diff --git a/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs b/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs new file mode 100644 index 0000000000..5ceb4c8a00 --- /dev/null +++ b/tests/ImageSharp.Benchmarks/Config.HwIntrinsics.cs @@ -0,0 +1,84 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics.X86; +#endif +using BenchmarkDotNet.Environments; +using BenchmarkDotNet.Jobs; + +namespace SixLabors.ImageSharp.Benchmarks +{ + public partial class Config + { + private const string On = "1"; + private const string Off = "0"; + + // See https://github.com/SixLabors/ImageSharp/pull/1229#discussion_r440477861 + // * EnableHWIntrinsic + // * EnableSSE + // * EnableSSE2 + // * EnableAES + // * EnablePCLMULQDQ + // * EnableSSE3 + // * EnableSSSE3 + // * EnableSSE41 + // * EnableSSE42 + // * EnablePOPCNT + // * EnableAVX + // * EnableFMA + // * EnableAVX2 + // * EnableBMI1 + // * EnableBMI2 + // * EnableLZCNT + // + // `FeatureSIMD` ends up impacting all SIMD support(including `System.Numerics`) but not things + // like `LZCNT`, `BMI1`, or `BMI2` + // `EnableSSE3_4` is a legacy switch that exists for compat and is basically the same as `EnableSSE3` + private const string EnableAES = "COMPlus_EnableAES"; + private const string EnableAVX = "COMPlus_EnableAVX"; + private const string EnableAVX2 = "COMPlus_EnableAVX2"; + private const string EnableBMI1 = "COMPlus_EnableBMI1"; + private const string EnableBMI2 = "COMPlus_EnableBMI2"; + private const string EnableFMA = "COMPlus_EnableFMA"; + private const string EnableHWIntrinsic = "COMPlus_EnableHWIntrinsic"; + private const string EnableLZCNT = "COMPlus_EnableLZCNT"; + private const string EnablePCLMULQDQ = "COMPlus_EnablePCLMULQDQ"; + private const string EnablePOPCNT = "COMPlus_EnablePOPCNT"; + private const string EnableSSE = "COMPlus_EnableSSE"; + private const string EnableSSE2 = "COMPlus_EnableSSE2"; + private const string EnableSSE3 = "COMPlus_EnableSSE3"; + private const string EnableSSE3_4 = "COMPlus_EnableSSE3_4"; + private const string EnableSSE41 = "COMPlus_EnableSSE41"; + private const string EnableSSE42 = "COMPlus_EnableSSE42"; + private const string EnableSSSE3 = "COMPlus_EnableSSSE3"; + private const string FeatureSIMD = "COMPlus_FeatureSIMD"; + + public class HwIntrinsics_SSE_AVX : Config + { + public HwIntrinsics_SSE_AVX() + { + this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31) + .WithEnvironmentVariables( + new EnvironmentVariable(EnableHWIntrinsic, Off), + new EnvironmentVariable(FeatureSIMD, Off)) + .WithId("1. No HwIntrinsics").AsBaseline()); + +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx.IsSupported) + { + this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31) + .WithId("2. AVX")); + } + + if (Sse.IsSupported) + { + this.AddJob(Job.Default.WithRuntime(CoreRuntime.Core31) + .WithEnvironmentVariables(new EnvironmentVariable(EnableAVX, Off)) + .WithId("3. SSE")); + } +#endif + } + } + } +} diff --git a/tests/ImageSharp.Benchmarks/Config.cs b/tests/ImageSharp.Benchmarks/Config.cs index f9240779b9..53271f522d 100644 --- a/tests/ImageSharp.Benchmarks/Config.cs +++ b/tests/ImageSharp.Benchmarks/Config.cs @@ -12,7 +12,7 @@ using BenchmarkDotNet.Jobs; namespace SixLabors.ImageSharp.Benchmarks { - public class Config : ManualConfig + public partial class Config : ManualConfig { public Config() { diff --git a/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_ConvertFromRgba32.cs b/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_ConvertFromRgba32.cs index 7d6c2efedf..a933f890fc 100644 --- a/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_ConvertFromRgba32.cs +++ b/tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_ConvertFromRgba32.cs @@ -168,49 +168,27 @@ namespace SixLabors.ImageSharp.Benchmarks.General.PixelConversion [Benchmark] public void PixelConverter_Rgba32_ToArgb32() { - ref uint sBase = ref Unsafe.As(ref this.PermutedRunnerRgbaToArgb.Source[0]); - ref uint dBase = ref Unsafe.As(ref this.PermutedRunnerRgbaToArgb.Dest[0]); + Span source = MemoryMarshal.Cast(this.PermutedRunnerRgbaToArgb.Source); + Span dest = MemoryMarshal.Cast(this.PermutedRunnerRgbaToArgb.Dest); - for (int i = 0; i < this.Count; i++) - { - uint s = Unsafe.Add(ref sBase, i); - Unsafe.Add(ref dBase, i) = PixelConverter.FromRgba32.ToArgb32(s); - } - } - - [Benchmark] - public void PixelConverter_Rgba32_ToArgb32_CopyThenWorkOnSingleBuffer() - { - Span source = MemoryMarshal.Cast(this.PermutedRunnerRgbaToArgb.Source); - Span dest = MemoryMarshal.Cast(this.PermutedRunnerRgbaToArgb.Dest); - source.CopyTo(dest); - - ref uint dBase = ref MemoryMarshal.GetReference(dest); - - for (int i = 0; i < this.Count; i++) - { - uint s = Unsafe.Add(ref dBase, i); - Unsafe.Add(ref dBase, i) = PixelConverter.FromRgba32.ToArgb32(s); - } + PixelConverter.FromRgba32.ToArgb32(source, dest); } /* RESULTS: - Method | Count | Mean | Error | StdDev | Scaled | ScaledSD | - ---------------------------------------------------------- |------ |-----------:|-----------:|-----------:|-------:|---------:| - ByRef | 256 | 328.7 ns | 6.6141 ns | 6.1868 ns | 1.00 | 0.00 | - ByVal | 256 | 322.0 ns | 4.3541 ns | 4.0728 ns | 0.98 | 0.02 | - FromBytes | 256 | 321.5 ns | 3.3499 ns | 3.1335 ns | 0.98 | 0.02 | - InlineShuffle | 256 | 330.7 ns | 4.2525 ns | 3.9778 ns | 1.01 | 0.02 | - PixelConverter_Rgba32_ToArgb32 | 256 | 167.4 ns | 0.6357 ns | 0.5309 ns | 0.51 | 0.01 | - PixelConverter_Rgba32_ToArgb32_CopyThenWorkOnSingleBuffer | 256 | 196.6 ns | 0.8929 ns | 0.7915 ns | 0.60 | 0.01 | - | | | | | | | - ByRef | 2048 | 2,534.4 ns | 8.2947 ns | 6.9265 ns | 1.00 | 0.00 | - ByVal | 2048 | 2,638.5 ns | 52.6843 ns | 70.3320 ns | 1.04 | 0.03 | - FromBytes | 2048 | 2,517.2 ns | 40.8055 ns | 38.1695 ns | 0.99 | 0.01 | - InlineShuffle | 2048 | 2,546.5 ns | 21.2506 ns | 19.8778 ns | 1.00 | 0.01 | - PixelConverter_Rgba32_ToArgb32 | 2048 | 1,265.7 ns | 5.1397 ns | 4.5562 ns | 0.50 | 0.00 | - PixelConverter_Rgba32_ToArgb32_CopyThenWorkOnSingleBuffer | 2048 | 1,410.3 ns | 11.1939 ns | 9.9231 ns | 0.56 | 0.00 | - */ + | Method | Count | Mean | Error | StdDev | Median | Ratio | RatioSD | + |------------------------------- |------ |------------:|----------:|----------:|------------:|------:|--------:| + | ByRef | 256 | 288.84 ns | 19.601 ns | 52.319 ns | 268.10 ns | 1.00 | 0.00 | + | ByVal | 256 | 267.97 ns | 1.831 ns | 1.713 ns | 267.85 ns | 0.77 | 0.18 | + | FromBytes | 256 | 266.81 ns | 2.427 ns | 2.270 ns | 266.47 ns | 0.76 | 0.18 | + | InlineShuffle | 256 | 291.41 ns | 5.820 ns | 5.444 ns | 290.17 ns | 0.83 | 0.19 | + | PixelConverter_Rgba32_ToArgb32 | 256 | 38.62 ns | 0.431 ns | 0.403 ns | 38.68 ns | 0.11 | 0.03 | + | | | | | | | | | + | ByRef | 2048 | 2,197.69 ns | 15.826 ns | 14.804 ns | 2,197.25 ns | 1.00 | 0.00 | + | ByVal | 2048 | 2,226.81 ns | 44.266 ns | 62.054 ns | 2,197.17 ns | 1.03 | 0.04 | + | FromBytes | 2048 | 2,181.35 ns | 18.033 ns | 16.868 ns | 2,185.97 ns | 0.99 | 0.01 | + | InlineShuffle | 2048 | 2,233.10 ns | 27.673 ns | 24.531 ns | 2,229.78 ns | 1.02 | 0.01 | + | PixelConverter_Rgba32_ToArgb32 | 2048 | 139.90 ns | 2.152 ns | 3.825 ns | 138.70 ns | 0.06 | 0.00 | + */ } } diff --git a/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj b/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj index eaab162ff2..4784a219b2 100644 --- a/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj +++ b/tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj @@ -17,6 +17,7 @@ + diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs new file mode 100644 index 0000000000..f1bfaa4ad4 --- /dev/null +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.Shuffle.cs @@ -0,0 +1,399 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using SixLabors.ImageSharp.Tests.TestUtilities; +using Xunit; + +namespace SixLabors.ImageSharp.Tests.Common +{ + public partial class SimdUtilsTests + { + [Theory] + [MemberData(nameof(ArraySizesDivisibleBy4))] + public void BulkShuffleFloat4Channel(int count) + { + static void RunTest(string serialized) + { + // No need to test multiple shuffle controls as the + // pipeline is always the same. + int size = FeatureTestRunner.Deserialize(serialized); + byte control = default(WZYXShuffle4).Control; + + TestShuffleFloat4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, control), + control); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + count, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE); + } + + [Theory] + [MemberData(nameof(ArraySizesDivisibleBy4))] + public void BulkShuffleByte4Channel(int count) + { + static void RunTest(string serialized) + { + int size = FeatureTestRunner.Deserialize(serialized); + + // These cannot be expressed as a theory as you cannot + // use RemoteExecutor within generic methods nor pass + // IShuffle4 to the generic utils method. + WXYZShuffle4 wxyz = default; + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wxyz), + wxyz.Control); + + WZYXShuffle4 wzyx = default; + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wzyx), + wzyx.Control); + + YZWXShuffle4 yzwx = default; + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yzwx), + yzwx.Control); + + ZYXWShuffle4 zyxw = default; + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, zyxw), + zyxw.Control); + + var xwyz = new DefaultShuffle4(2, 1, 3, 0); + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, xwyz), + xwyz.Control); + + var yyyy = new DefaultShuffle4(1, 1, 1, 1); + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, yyyy), + yyyy.Control); + + var wwww = new DefaultShuffle4(3, 3, 3, 3); + TestShuffleByte4Channel( + size, + (s, d) => SimdUtils.Shuffle4(s.Span, d.Span, wwww), + wwww.Control); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + count, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE); + } + + [Theory] + [MemberData(nameof(ArraySizesDivisibleBy3))] + public void BulkShuffleByte3Channel(int count) + { + static void RunTest(string serialized) + { + int size = FeatureTestRunner.Deserialize(serialized); + + // These cannot be expressed as a theory as you cannot + // use RemoteExecutor within generic methods nor pass + // IShuffle3 to the generic utils method. + var zyx = new DefaultShuffle3(0, 1, 2); + TestShuffleByte3Channel( + size, + (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, zyx), + zyx.Control); + + var xyz = new DefaultShuffle3(2, 1, 0); + TestShuffleByte3Channel( + size, + (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, xyz), + xyz.Control); + + var yyy = new DefaultShuffle3(1, 1, 1); + TestShuffleByte3Channel( + size, + (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, yyy), + yyy.Control); + + var zzz = new DefaultShuffle3(2, 2, 2); + TestShuffleByte3Channel( + size, + (s, d) => SimdUtils.Shuffle3(s.Span, d.Span, zzz), + zzz.Control); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + count, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE); + } + + [Theory] + [MemberData(nameof(ArraySizesDivisibleBy3))] + public void BulkPad3Shuffle4Channel(int count) + { + static void RunTest(string serialized) + { + int size = FeatureTestRunner.Deserialize(serialized); + + // These cannot be expressed as a theory as you cannot + // use RemoteExecutor within generic methods nor pass + // IPad3Shuffle4 to the generic utils method. + XYZWPad3Shuffle4 xyzw = default; + TestPad3Shuffle4Channel( + size, + (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, xyzw), + xyzw.Control); + + var xwyz = new DefaultPad3Shuffle4(2, 1, 3, 0); + TestPad3Shuffle4Channel( + size, + (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, xwyz), + xwyz.Control); + + var yyyy = new DefaultPad3Shuffle4(1, 1, 1, 1); + TestPad3Shuffle4Channel( + size, + (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, yyyy), + yyyy.Control); + + var wwww = new DefaultPad3Shuffle4(3, 3, 3, 3); + TestPad3Shuffle4Channel( + size, + (s, d) => SimdUtils.Pad3Shuffle4(s.Span, d.Span, wwww), + wwww.Control); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + count, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE); + } + + [Theory] + [MemberData(nameof(ArraySizesDivisibleBy4))] + public void BulkShuffle4Slice3Channel(int count) + { + static void RunTest(string serialized) + { + int size = FeatureTestRunner.Deserialize(serialized); + + // These cannot be expressed as a theory as you cannot + // use RemoteExecutor within generic methods nor pass + // IShuffle4Slice3 to the generic utils method. + XYZWShuffle4Slice3 xyzw = default; + TestShuffle4Slice3Channel( + size, + (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, xyzw), + xyzw.Control); + + var xwyz = new DefaultShuffle4Slice3(2, 1, 3, 0); + TestShuffle4Slice3Channel( + size, + (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, xwyz), + xwyz.Control); + + var yyyy = new DefaultShuffle4Slice3(1, 1, 1, 1); + TestShuffle4Slice3Channel( + size, + (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, yyyy), + yyyy.Control); + + var wwww = new DefaultShuffle4Slice3(3, 3, 3, 3); + TestShuffle4Slice3Channel( + size, + (s, d) => SimdUtils.Shuffle4Slice3(s.Span, d.Span, wwww), + wwww.Control); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + count, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX | HwIntrinsics.DisableSSE); + } + + private static void TestShuffleFloat4Channel( + int count, + Action, Memory> convert, + byte control) + { + float[] source = new Random(count).GenerateRandomFloatArray(count, 0, 256); + var result = new float[count]; + + float[] expected = new float[count]; + + SimdUtils.Shuffle.InverseMmShuffle( + control, + out int p3, + out int p2, + out int p1, + out int p0); + + for (int i = 0; i < expected.Length; i += 4) + { + expected[i] = source[p0 + i]; + expected[i + 1] = source[p1 + i]; + expected[i + 2] = source[p2 + i]; + expected[i + 3] = source[p3 + i]; + } + + convert(source, result); + + Assert.Equal(expected, result, new ApproximateFloatComparer(1e-5F)); + } + + private static void TestShuffleByte4Channel( + int count, + Action, Memory> convert, + byte control) + { + byte[] source = new byte[count]; + new Random(count).NextBytes(source); + var result = new byte[count]; + + byte[] expected = new byte[count]; + + SimdUtils.Shuffle.InverseMmShuffle( + control, + out int p3, + out int p2, + out int p1, + out int p0); + + for (int i = 0; i < expected.Length; i += 4) + { + expected[i] = source[p0 + i]; + expected[i + 1] = source[p1 + i]; + expected[i + 2] = source[p2 + i]; + expected[i + 3] = source[p3 + i]; + } + + convert(source, result); + + Assert.Equal(expected, result); + } + + private static void TestShuffleByte3Channel( + int count, + Action, Memory> convert, + byte control) + { + byte[] source = new byte[count]; + new Random(count).NextBytes(source); + var result = new byte[count]; + + byte[] expected = new byte[count]; + + SimdUtils.Shuffle.InverseMmShuffle( + control, + out int _, + out int p2, + out int p1, + out int p0); + + for (int i = 0; i < expected.Length; i += 3) + { + expected[i] = source[p0 + i]; + expected[i + 1] = source[p1 + i]; + expected[i + 2] = source[p2 + i]; + } + + convert(source, result); + + Assert.Equal(expected, result); + } + + private static void TestPad3Shuffle4Channel( + int count, + Action, Memory> convert, + byte control) + { + byte[] source = new byte[count]; + new Random(count).NextBytes(source); + + var result = new byte[count * 4 / 3]; + + byte[] expected = new byte[result.Length]; + + SimdUtils.Shuffle.InverseMmShuffle( + control, + out int p3, + out int p2, + out int p1, + out int p0); + + for (int i = 0, j = 0; i < expected.Length; i += 4, j += 3) + { + expected[p0 + i] = source[j]; + expected[p1 + i] = source[j + 1]; + expected[p2 + i] = source[j + 2]; + expected[p3 + i] = byte.MaxValue; + } + + Span temp = stackalloc byte[4]; + for (int i = 0, j = 0; i < expected.Length; i += 4, j += 3) + { + temp[0] = source[j]; + temp[1] = source[j + 1]; + temp[2] = source[j + 2]; + temp[3] = byte.MaxValue; + + expected[i] = temp[p0]; + expected[i + 1] = temp[p1]; + expected[i + 2] = temp[p2]; + expected[i + 3] = temp[p3]; + } + + convert(source, result); + + for (int i = 0; i < expected.Length; i++) + { + Assert.Equal(expected[i], result[i]); + } + + Assert.Equal(expected, result); + } + + private static void TestShuffle4Slice3Channel( + int count, + Action, Memory> convert, + byte control) + { + byte[] source = new byte[count]; + new Random(count).NextBytes(source); + + var result = new byte[count * 3 / 4]; + + byte[] expected = new byte[result.Length]; + + SimdUtils.Shuffle.InverseMmShuffle( + control, + out int _, + out int p2, + out int p1, + out int p0); + + for (int i = 0, j = 0; i < expected.Length; i += 3, j += 4) + { + expected[i] = source[p0 + j]; + expected[i + 1] = source[p1 + j]; + expected[i + 2] = source[p2 + j]; + } + + convert(source, result); + + for (int i = 0; i < expected.Length; i++) + { + Assert.Equal(expected[i], result[i]); + } + + Assert.Equal(expected, result); + } + } +} diff --git a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs index 6dce489353..ec09e43e57 100644 --- a/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs +++ b/tests/ImageSharp.Tests/Common/SimdUtilsTests.cs @@ -7,13 +7,13 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using SixLabors.ImageSharp.Common.Tuples; - +using SixLabors.ImageSharp.Tests.TestUtilities; using Xunit; using Xunit.Abstractions; namespace SixLabors.ImageSharp.Tests.Common { - public class SimdUtilsTests + public partial class SimdUtilsTests { private ITestOutputHelper Output { get; } @@ -163,7 +163,7 @@ namespace SixLabors.ImageSharp.Tests.Common public static readonly TheoryData ArraySizesDivisibleBy8 = new TheoryData { 0, 8, 16, 1024 }; public static readonly TheoryData ArraySizesDivisibleBy4 = new TheoryData { 0, 4, 8, 28, 1020 }; - + public static readonly TheoryData ArraySizesDivisibleBy3 = new TheoryData { 0, 3, 9, 36, 957 }; public static readonly TheoryData ArraySizesDivisibleBy32 = new TheoryData { 0, 32, 512 }; public static readonly TheoryData ArbitraryArraySizes = @@ -204,6 +204,25 @@ namespace SixLabors.ImageSharp.Tests.Common (s, d) => SimdUtils.ExtendedIntrinsics.ByteToNormalizedFloat(s.Span, d.Span)); } +#if SUPPORTS_RUNTIME_INTRINSICS + [Theory] + [MemberData(nameof(ArraySizesDivisibleBy32))] + public void HwIntrinsics_BulkConvertByteToNormalizedFloat(int count) + { + static void RunTest(string serialized) + { + TestImpl_BulkConvertByteToNormalizedFloat( + FeatureTestRunner.Deserialize(serialized), + (s, d) => SimdUtils.HwIntrinsics.ByteToNormalizedFloat(s.Span, d.Span)); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + count, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2 | HwIntrinsics.DisableSSE41); + } +#endif + [Theory] [MemberData(nameof(ArbitraryArraySizes))] public void BulkConvertByteToNormalizedFloat(int count) @@ -281,16 +300,19 @@ namespace SixLabors.ImageSharp.Tests.Common [Theory] [MemberData(nameof(ArraySizesDivisibleBy32))] - public void Avx2_BulkConvertNormalizedFloatToByteClampOverflows(int count) + public void HwIntrinsics_BulkConvertNormalizedFloatToByteClampOverflows(int count) { - if (!System.Runtime.Intrinsics.X86.Avx2.IsSupported) + static void RunTest(string serialized) { - return; + TestImpl_BulkConvertNormalizedFloatToByteClampOverflows( + FeatureTestRunner.Deserialize(serialized), + (s, d) => SimdUtils.HwIntrinsics.NormalizedFloatToByteSaturate(s.Span, d.Span)); } - TestImpl_BulkConvertNormalizedFloatToByteClampOverflows( + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, count, - (s, d) => SimdUtils.Avx2Intrinsics.NormalizedFloatToByteSaturate(s.Span, d.Span)); + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX2); } #endif diff --git a/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs index 3f767620a6..f98fa3c7f3 100644 --- a/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Bmp/BmpDecoderTests.cs @@ -39,22 +39,32 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp }; [Theory] - [WithFileCollection(nameof(MiscBmpFiles), PixelTypes.Rgba32, false)] - [WithFileCollection(nameof(MiscBmpFiles), PixelTypes.Rgba32, true)] - public void BmpDecoder_CanDecode_MiscellaneousBitmaps(TestImageProvider provider, bool enforceDiscontiguousBuffers) + [WithFileCollection(nameof(MiscBmpFiles), PixelTypes.Rgba32)] + public void BmpDecoder_CanDecode_MiscellaneousBitmaps(TestImageProvider provider) where TPixel : unmanaged, IPixel + { + using Image image = provider.GetImage(BmpDecoder); + image.DebugSave(provider); + + if (TestEnvironment.IsWindows) + { + image.CompareToOriginal(provider); + } + } + + [Theory] + [WithFileCollection(nameof(MiscBmpFiles), PixelTypes.Rgba32)] + public void BmpDecoder_CanDecode_MiscellaneousBitmaps_WithLimitedAllocatorBufferCapacity( + TestImageProvider provider) { static void RunTest(string providerDump, string nonContiguousBuffersStr) { - TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); + TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); - if (!string.IsNullOrEmpty(nonContiguousBuffersStr)) - { - provider.LimitAllocatorBufferCapacity().InPixelsSqrt(100); - } + provider.LimitAllocatorBufferCapacity().InPixelsSqrt(100); - using Image image = provider.GetImage(BmpDecoder); - image.DebugSave(provider, testOutputDetails: nonContiguousBuffersStr); + using Image image = provider.GetImage(BmpDecoder); + image.DebugSave(provider, nonContiguousBuffersStr); if (TestEnvironment.IsWindows) { @@ -66,7 +76,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp RemoteExecutor.Invoke( RunTest, providerDump, - enforceDiscontiguousBuffers ? "Disco" : string.Empty) + "Disco") .Dispose(); } @@ -348,7 +358,9 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp using (Image image = provider.GetImage(BmpDecoder)) { image.DebugSave(provider); - image.CompareToOriginal(provider); + + // Do not validate. Reference files will fail validation. + image.CompareToOriginal(provider, new MagickReferenceDecoder(false)); } } diff --git a/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs b/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs index b05486e356..83b67a01af 100644 --- a/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Bmp/BmpEncoderTests.cs @@ -10,7 +10,7 @@ using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.Processing; using SixLabors.ImageSharp.Processing.Processors.Quantization; using SixLabors.ImageSharp.Tests.TestUtilities.ImageComparison; - +using SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs; using Xunit; using Xunit.Abstractions; @@ -200,10 +200,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp Quantizer = new WuQuantizer() }; string actualOutputFile = provider.Utility.SaveTestOutputFile(image, "bmp", encoder, appendPixelTypeToFileName: false); + + // Use the default decoder to test our encoded image. This verifies the content. + // We do not verify the reference image though as some are invalid. IImageDecoder referenceDecoder = TestEnvironment.GetReferenceDecoder(actualOutputFile); using (var referenceImage = Image.Load(actualOutputFile, referenceDecoder)) { - referenceImage.CompareToReferenceOutput(ImageComparer.TolerantPercentage(0.01f), provider, extension: "bmp", appendPixelTypeToFileName: false); + referenceImage.CompareToReferenceOutput( + ImageComparer.TolerantPercentage(0.01f), + provider, + extension: "bmp", + appendPixelTypeToFileName: false, + decoder: new MagickReferenceDecoder(false)); } } } @@ -226,10 +234,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.Bmp Quantizer = new OctreeQuantizer() }; string actualOutputFile = provider.Utility.SaveTestOutputFile(image, "bmp", encoder, appendPixelTypeToFileName: false); + + // Use the default decoder to test our encoded image. This verifies the content. + // We do not verify the reference image though as some are invalid. IImageDecoder referenceDecoder = TestEnvironment.GetReferenceDecoder(actualOutputFile); using (var referenceImage = Image.Load(actualOutputFile, referenceDecoder)) { - referenceImage.CompareToReferenceOutput(ImageComparer.TolerantPercentage(0.01f), provider, extension: "bmp", appendPixelTypeToFileName: false); + referenceImage.CompareToReferenceOutput( + ImageComparer.TolerantPercentage(0.01f), + provider, + extension: "bmp", + appendPixelTypeToFileName: false, + decoder: new MagickReferenceDecoder(false)); } } } diff --git a/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs index 63aae5c559..eb2643b8cd 100644 --- a/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Gif/GifDecoderTests.cs @@ -198,17 +198,18 @@ namespace SixLabors.ImageSharp.Tests.Formats.Gif [Theory] [WithFile(TestImages.Gif.Giphy, PixelTypes.Rgba32)] [WithFile(TestImages.Gif.Kumin, PixelTypes.Rgba32)] - public void GifDecoder_CanDecode_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) - where TPixel : unmanaged, IPixel + public void GifDecoder_CanDecode_WithLimitedAllocatorBufferCapacity( + TestImageProvider provider) { static void RunTest(string providerDump, string nonContiguousBuffersStr) { - TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); + TestImageProvider provider + = BasicSerializer.Deserialize>(providerDump); provider.LimitAllocatorBufferCapacity().InPixelsSqrt(100); - using Image image = provider.GetImage(GifDecoder); - image.DebugSave(provider); + using Image image = provider.GetImage(GifDecoder); + image.DebugSave(provider, nonContiguousBuffersStr); image.CompareToOriginal(provider); } diff --git a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs index 722521f98d..927d7c2528 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs @@ -8,7 +8,7 @@ using System.Diagnostics; using SixLabors.ImageSharp.Formats.Jpeg.Components; using SixLabors.ImageSharp.Tests.Formats.Jpg.Utils; - +using SixLabors.ImageSharp.Tests.TestUtilities; using Xunit; using Xunit.Abstractions; @@ -45,20 +45,20 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg this.Measure( Times, () => + { + var block = default(Block8x8F); + + for (int i = 0; i < Block8x8F.Size; i++) { - var block = default(Block8x8F); - - for (int i = 0; i < Block8x8F.Size; i++) - { - block[i] = i; - } - - sum = 0; - for (int i = 0; i < Block8x8F.Size; i++) - { - sum += block[i]; - } - }); + block[i] = i; + } + + sum = 0; + for (int i = 0; i < Block8x8F.Size; i++) + { + sum += block[i]; + } + }); Assert.Equal(sum, 64f * 63f * 0.5f); } @@ -70,20 +70,20 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg this.Measure( Times, () => + { + // Block8x8F block = new Block8x8F(); + float[] block = new float[64]; + for (int i = 0; i < Block8x8F.Size; i++) { - // Block8x8F block = new Block8x8F(); - float[] block = new float[64]; - for (int i = 0; i < Block8x8F.Size; i++) - { - block[i] = i; - } - - sum = 0; - for (int i = 0; i < Block8x8F.Size; i++) - { - sum += block[i]; - } - }); + block[i] = i; + } + + sum = 0; + for (int i = 0; i < Block8x8F.Size; i++) + { + sum += block[i]; + } + }); Assert.Equal(sum, 64f * 63f * 0.5f); } @@ -101,11 +101,11 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg this.Measure( Times, () => - { - var b = default(Block8x8F); - b.LoadFrom(data); - b.ScaledCopyTo(mirror); - }); + { + var b = default(Block8x8F); + b.LoadFrom(data); + b.ScaledCopyTo(mirror); + }); Assert.Equal(data, mirror); @@ -126,11 +126,11 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg this.Measure( Times, () => - { - var b = default(Block8x8F); - Block8x8F.LoadFrom(&b, data); - Block8x8F.ScaledCopyTo(&b, mirror); - }); + { + var b = default(Block8x8F); + Block8x8F.LoadFrom(&b, data); + Block8x8F.ScaledCopyTo(&b, mirror); + }); Assert.Equal(data, mirror); @@ -151,11 +151,11 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg this.Measure( Times, () => - { - var v = default(Block8x8F); - v.LoadFrom(data); - v.ScaledCopyTo(mirror); - }); + { + var v = default(Block8x8F); + v.LoadFrom(data); + v.ScaledCopyTo(mirror); + }); Assert.Equal(data, mirror); @@ -165,19 +165,26 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg [Fact] public void TransposeInto() { - float[] expected = Create8x8FloatData(); - ReferenceImplementations.Transpose8x8(expected); + static void RunTest() + { + float[] expected = Create8x8FloatData(); + ReferenceImplementations.Transpose8x8(expected); - var source = default(Block8x8F); - source.LoadFrom(Create8x8FloatData()); + var source = default(Block8x8F); + source.LoadFrom(Create8x8FloatData()); - var dest = default(Block8x8F); - source.TransposeInto(ref dest); + var dest = default(Block8x8F); + source.TransposeInto(ref dest); - float[] actual = new float[64]; - dest.ScaledCopyTo(actual); + float[] actual = new float[64]; + dest.ScaledCopyTo(actual); - Assert.Equal(expected, actual); + Assert.Equal(expected, actual); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX); } private class BufferHolder @@ -228,7 +235,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg this.PrintLinearData(input); Block8x8F dest = block; - dest.NormalizeColorsInplace(255); + dest.NormalizeColorsInPlace(255); float[] array = new float[64]; dest.ScaledCopyTo(array); @@ -253,11 +260,11 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg Block8x8F source = CreateRandomFloatBlock(-200, 200, seed); Block8x8F expected = source; - expected.NormalizeColorsInplace(255); - expected.RoundInplace(); + expected.NormalizeColorsInPlace(255); + expected.RoundInPlace(); Block8x8F actual = source; - actual.NormalizeColorsAndRoundInplaceVector8(255); + actual.NormalizeColorsAndRoundInPlaceVector8(255); this.Output.WriteLine(expected.ToString()); this.Output.WriteLine(actual.ToString()); @@ -318,12 +325,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg [InlineData(1)] [InlineData(2)] [InlineData(3)] - public void RoundInplaceSlow(int seed) + public void RoundInPlaceSlow(int seed) { Block8x8F s = CreateRandomFloatBlock(-500, 500, seed); Block8x8F d = s; - d.RoundInplace(); + d.RoundInPlace(); this.Output.WriteLine(s.ToString()); this.Output.WriteLine(d.ToString()); @@ -338,19 +345,26 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg } [Fact] - public void MultiplyInplace_ByOtherBlock() + public void MultiplyInPlace_ByOtherBlock() { - Block8x8F original = CreateRandomFloatBlock(-500, 500, 42); - Block8x8F m = CreateRandomFloatBlock(-500, 500, 42); + static void RunTest() + { + Block8x8F original = CreateRandomFloatBlock(-500, 500, 42); + Block8x8F m = CreateRandomFloatBlock(-500, 500, 42); - Block8x8F actual = original; + Block8x8F actual = original; - actual.MultiplyInplace(ref m); + actual.MultiplyInPlace(ref m); - for (int i = 0; i < Block8x8F.Size; i++) - { - Assert.Equal(original[i] * m[i], actual[i]); + for (int i = 0; i < Block8x8F.Size; i++) + { + Assert.Equal(original[i] * m[i], actual[i]); + } } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX); } [Theory] @@ -390,23 +404,51 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg ReferenceImplementations.DequantizeBlock(&expected, &qt, unzig.Data); - actual.MultiplyInplace(ref zigQt); + actual.MultiplyInPlace(ref zigQt); this.CompareBlocks(expected, actual, 0); } [Fact] - public void MultiplyInplace_ByScalar() + public void AddToAllInPlace() { - Block8x8F original = CreateRandomFloatBlock(-500, 500); + static void RunTest() + { + Block8x8F original = CreateRandomFloatBlock(-500, 500); - Block8x8F actual = original; - actual.MultiplyInplace(42f); + Block8x8F actual = original; + actual.AddInPlace(42f); - for (int i = 0; i < 64; i++) + for (int i = 0; i < 64; i++) + { + Assert.Equal(original[i] + 42f, actual[i]); + } + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX); + } + + [Fact] + public void MultiplyInPlace_ByScalar() + { + static void RunTest() { - Assert.Equal(original[i] * 42f, actual[i]); + Block8x8F original = CreateRandomFloatBlock(-500, 500); + + Block8x8F actual = original; + actual.MultiplyInPlace(42f); + + for (int i = 0; i < 64; i++) + { + Assert.Equal(original[i] * 42f, actual[i]); + } } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX); } [Fact] diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs index e29d8f158b..cc23a45fcb 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.Progressive.cs @@ -14,22 +14,32 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg public const string DecodeProgressiveJpegOutputName = "DecodeProgressiveJpeg"; [Theory] - [WithFileCollection(nameof(ProgressiveTestJpegs), PixelTypes.Rgba32, false)] - [WithFile(TestImages.Jpeg.Progressive.Progress, PixelTypes.Rgba32, true)] - public void DecodeProgressiveJpeg(TestImageProvider provider, bool enforceDiscontiguousBuffers) + [WithFileCollection(nameof(ProgressiveTestJpegs), PixelTypes.Rgba32)] + public void DecodeProgressiveJpeg(TestImageProvider provider) where TPixel : unmanaged, IPixel + { + using Image image = provider.GetImage(JpegDecoder); + image.DebugSave(provider); + + provider.Utility.TestName = DecodeProgressiveJpegOutputName; + image.CompareToReferenceOutput( + GetImageComparer(provider), + provider, + appendPixelTypeToFileName: false); + } + + [Theory] + [WithFile(TestImages.Jpeg.Progressive.Progress, PixelTypes.Rgba32)] + public void DecodeProgressiveJpeg_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) { static void RunTest(string providerDump, string nonContiguousBuffersStr) { - TestImageProvider provider = - BasicSerializer.Deserialize>(providerDump); + TestImageProvider provider = + BasicSerializer.Deserialize>(providerDump); - if (!string.IsNullOrEmpty(nonContiguousBuffersStr)) - { - provider.LimitAllocatorBufferCapacity().InBytesSqrt(200); - } + provider.LimitAllocatorBufferCapacity().InBytesSqrt(200); - using Image image = provider.GetImage(JpegDecoder); + using Image image = provider.GetImage(JpegDecoder); image.DebugSave(provider, nonContiguousBuffersStr); provider.Utility.TestName = DecodeProgressiveJpegOutputName; @@ -44,8 +54,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg RemoteExecutor.Invoke( RunTest, providerDump, - enforceDiscontiguousBuffers ? "Disco" : string.Empty) - .Dispose(); + "Disco").Dispose(); } } } diff --git a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.cs index 78218aec90..1825252335 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/JpegDecoderTests.cs @@ -129,10 +129,10 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg [Theory] [InlineData(TestImages.Jpeg.Baseline.Jpeg420Small, 0)] [InlineData(TestImages.Jpeg.Issues.ExifGetString750Transform, 1)] - [InlineData(TestImages.Jpeg.Issues.ExifGetString750Transform, 10)] + [InlineData(TestImages.Jpeg.Issues.ExifGetString750Transform, 15)] [InlineData(TestImages.Jpeg.Issues.ExifGetString750Transform, 30)] [InlineData(TestImages.Jpeg.Issues.BadRstProgressive518, 1)] - [InlineData(TestImages.Jpeg.Issues.BadRstProgressive518, 10)] + [InlineData(TestImages.Jpeg.Issues.BadRstProgressive518, 15)] [InlineData(TestImages.Jpeg.Issues.BadRstProgressive518, 30)] public async Task Decode_IsCancellable(string fileName, int cancellationDelayMs) { @@ -141,17 +141,32 @@ namespace SixLabors.ImageSharp.Tests.Formats.Jpg TestEnvironment.InputImagesDirectoryFullPath, fileName); - var cts = new CancellationTokenSource(); - if (cancellationDelayMs == 0) - { - cts.Cancel(); - } - else + const int NumberOfRuns = 5; + + for (int i = 0; i < NumberOfRuns; i++) { - cts.CancelAfter(cancellationDelayMs); + var cts = new CancellationTokenSource(); + if (cancellationDelayMs == 0) + { + cts.Cancel(); + } + else + { + cts.CancelAfter(cancellationDelayMs); + } + + try + { + using var image = await Image.LoadAsync(hugeFile, cts.Token); + } + catch (TaskCanceledException) + { + // Succesfully observed a cancellation + return; + } } - await Assert.ThrowsAsync(() => Image.LoadAsync(hugeFile, cts.Token)); + throw new Exception($"No cancellation happened out of {NumberOfRuns} runs!"); } [Theory(Skip = "Identify is too fast, doesn't work reliably.")] diff --git a/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs index 5b6adfe1af..2164975df0 100644 --- a/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs @@ -404,16 +404,15 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png [Theory] [WithFile(TestImages.Png.Splash, PixelTypes.Rgba32)] [WithFile(TestImages.Png.Bike, PixelTypes.Rgba32)] - public void PngDecoder_CanDecode_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) - where TPixel : unmanaged, IPixel + public void PngDecoder_CanDecode_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) { static void RunTest(string providerDump, string nonContiguousBuffersStr) { - TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); + TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); provider.LimitAllocatorBufferCapacity().InPixelsSqrt(100); - using Image image = provider.GetImage(PngDecoder); + using Image image = provider.GetImage(PngDecoder); image.DebugSave(provider, testOutputDetails: nonContiguousBuffersStr); image.CompareToOriginal(provider); } diff --git a/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs index b9f5f16fa5..b4670cb5d4 100644 --- a/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/PngEncoderTests.cs @@ -2,13 +2,8 @@ // Licensed under the Apache License, Version 2.0. // ReSharper disable InconsistentNaming -using System.Diagnostics; using System.IO; using System.Linq; -#if SUPPORTS_RUNTIME_INTRINSICS -using System.Runtime.Intrinsics.X86; -#endif -using Microsoft.DotNet.RemoteExecutor; using SixLabors.ImageSharp.Formats; using SixLabors.ImageSharp.Formats.Png; using SixLabors.ImageSharp.Metadata; @@ -16,7 +11,6 @@ using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.Processing.Processors.Quantization; using SixLabors.ImageSharp.Tests.TestUtilities; using SixLabors.ImageSharp.Tests.TestUtilities.ImageComparison; - using Xunit; namespace SixLabors.ImageSharp.Tests.Formats.Png @@ -536,16 +530,12 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png [Theory] [WithTestPatternImages(100, 100, PixelTypes.Rgba32)] - public void EncodeWorksWithoutSsse3Intrinsics(TestImageProvider provider) - where TPixel : unmanaged, IPixel + public void EncodeWorksWithoutSsse3Intrinsics(TestImageProvider provider) { - static void RunTest(string providerDump) + static void RunTest(string serialized) { - TestImageProvider provider = - BasicSerializer.Deserialize>(providerDump); -#if SUPPORTS_RUNTIME_INTRINSICS - Assert.False(Ssse3.IsSupported); -#endif + TestImageProvider provider = + FeatureTestRunner.DeserializeForXunit>(serialized); foreach (PngInterlaceMode interlaceMode in InterlaceMode) { @@ -560,19 +550,21 @@ namespace SixLabors.ImageSharp.Tests.Formats.Png } } - string providerDump = BasicSerializer.Serialize(provider); + FeatureTestRunner.RunWithHwIntrinsicsFeature( + RunTest, + HwIntrinsics.DisableSSSE3, + provider); + } - var processStartInfo = new ProcessStartInfo(); - processStartInfo.Environment[TestEnvironment.Features.EnableSSE3] = TestEnvironment.Features.Off; + [Fact] + public void EncodeFixesInvalidOptions() + { + // https://github.com/SixLabors/ImageSharp/issues/935 + using var ms = new MemoryStream(); + var testFile = TestFile.Create(TestImages.Png.Issue935); + using Image image = testFile.CreateRgba32Image(new PngDecoder()); - RemoteExecutor.Invoke( - RunTest, - providerDump, - new RemoteInvokeOptions - { - StartInfo = processStartInfo - }) - .Dispose(); + image.Save(ms, new PngEncoder { ColorType = PngColorType.RgbWithAlpha }); } private static void TestPngEncoderCore( diff --git a/tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs index 5fb15541ec..edb43aa126 100644 --- a/tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Tga/TgaDecoderTests.cs @@ -747,16 +747,15 @@ namespace SixLabors.ImageSharp.Tests.Formats.Tga [Theory] [WithFile(Bit24BottomLeft, PixelTypes.Rgba32)] [WithFile(Bit32BottomLeft, PixelTypes.Rgba32)] - public void TgaDecoder_CanDecode_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) - where TPixel : unmanaged, IPixel + public void TgaDecoder_CanDecode_WithLimitedAllocatorBufferCapacity(TestImageProvider provider) { static void RunTest(string providerDump, string nonContiguousBuffersStr) { - TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); + TestImageProvider provider = BasicSerializer.Deserialize>(providerDump); provider.LimitAllocatorBufferCapacity().InPixelsSqrt(100); - using Image image = provider.GetImage(TgaDecoder); + using Image image = provider.GetImage(TgaDecoder); image.DebugSave(provider, testOutputDetails: nonContiguousBuffersStr); if (TestEnvironment.IsWindows) diff --git a/tests/ImageSharp.Tests/Formats/Tga/TgaTestUtils.cs b/tests/ImageSharp.Tests/Formats/Tga/TgaTestUtils.cs index 0f76d99317..58ed31e610 100644 --- a/tests/ImageSharp.Tests/Formats/Tga/TgaTestUtils.cs +++ b/tests/ImageSharp.Tests/Formats/Tga/TgaTestUtils.cs @@ -18,7 +18,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Tga Image image, bool useExactComparer = true, float compareTolerance = 0.01f) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { string path = TestImageProvider.GetFilePathOrNull(provider); if (path == null) @@ -39,7 +39,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Tga } public static Image DecodeWithMagick(Configuration configuration, FileInfo fileInfo) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { using (var magickImage = new MagickImage(fileInfo)) { @@ -48,7 +48,7 @@ namespace SixLabors.ImageSharp.Tests.Formats.Tga Assert.True(result.TryGetSinglePixelSpan(out Span resultPixels)); - using (IPixelCollection pixels = magickImage.GetPixelsUnsafe()) + using (IUnsafePixelCollection pixels = magickImage.GetPixelsUnsafe()) { byte[] data = pixels.ToByteArray(PixelMapping.RGBA); diff --git a/tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs b/tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs index 27689f6813..7d16623877 100644 --- a/tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs +++ b/tests/ImageSharp.Tests/Helpers/ImageMathsTests.cs @@ -10,6 +10,21 @@ namespace SixLabors.ImageSharp.Tests.Helpers { public class ImageMathsTests { + [Theory] + [InlineData(0)] + [InlineData(1)] + [InlineData(2)] + [InlineData(3)] + [InlineData(4)] + [InlineData(100)] + [InlineData(123)] + [InlineData(53436353)] + public void Modulo2(int x) + { + int actual = ImageMaths.Modulo2(x); + Assert.Equal(x % 2, actual); + } + [Theory] [InlineData(0)] [InlineData(1)] diff --git a/tests/ImageSharp.Tests/Helpers/Vector4UtilsTests.cs b/tests/ImageSharp.Tests/Helpers/Vector4UtilsTests.cs index c3b8e79ee2..2bb43c440b 100644 --- a/tests/ImageSharp.Tests/Helpers/Vector4UtilsTests.cs +++ b/tests/ImageSharp.Tests/Helpers/Vector4UtilsTests.cs @@ -17,6 +17,7 @@ namespace SixLabors.ImageSharp.Tests.Helpers [InlineData(0)] [InlineData(1)] [InlineData(30)] + [InlineData(63)] public void Premultiply_VectorSpan(int length) { var rnd = new Random(42); @@ -36,6 +37,7 @@ namespace SixLabors.ImageSharp.Tests.Helpers [InlineData(0)] [InlineData(1)] [InlineData(30)] + [InlineData(63)] public void UnPremultiply_VectorSpan(int length) { var rnd = new Random(42); diff --git a/tests/ImageSharp.Tests/ImageSharp.Tests.csproj b/tests/ImageSharp.Tests/ImageSharp.Tests.csproj index ba849ab251..07ade97d5d 100644 --- a/tests/ImageSharp.Tests/ImageSharp.Tests.csproj +++ b/tests/ImageSharp.Tests/ImageSharp.Tests.csproj @@ -20,6 +20,7 @@ + diff --git a/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.ReferenceImplementations.cs b/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.ReferenceImplementations.cs index 6fda9dbbad..9d0d09a983 100644 --- a/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.ReferenceImplementations.cs +++ b/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.ReferenceImplementations.cs @@ -13,34 +13,49 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats { public static class ReferenceImplementations { - public static Rgba32 MakeRgba32(byte r, byte g, byte b, byte a) + public static byte[] MakeRgba32ByteArray(byte r, byte g, byte b, byte a) { - Rgba32 d = default; - d.R = r; - d.G = g; - d.B = b; - d.A = a; - return d; + var buffer = new byte[256]; + + for (int i = 0; i < buffer.Length; i += 4) + { + buffer[i] = r; + buffer[i + 1] = g; + buffer[i + 2] = b; + buffer[i + 3] = a; + } + + return buffer; } - public static Argb32 MakeArgb32(byte r, byte g, byte b, byte a) + public static byte[] MakeArgb32ByteArray(byte r, byte g, byte b, byte a) { - Argb32 d = default; - d.R = r; - d.G = g; - d.B = b; - d.A = a; - return d; + var buffer = new byte[256]; + + for (int i = 0; i < buffer.Length; i += 4) + { + buffer[i] = a; + buffer[i + 1] = r; + buffer[i + 2] = g; + buffer[i + 3] = b; + } + + return buffer; } - public static Bgra32 MakeBgra32(byte r, byte g, byte b, byte a) + public static byte[] MakeBgra32ByteArray(byte r, byte g, byte b, byte a) { - Bgra32 d = default; - d.R = r; - d.G = g; - d.B = b; - d.A = a; - return d; + var buffer = new byte[256]; + + for (int i = 0; i < buffer.Length; i += 4) + { + buffer[i] = b; + buffer[i + 1] = g; + buffer[i + 2] = r; + buffer[i + 3] = a; + } + + return buffer; } internal static void To( @@ -83,8 +98,7 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats if (typeof(TDestinationPixel) == typeof(L8)) { - ref L8 l8Ref = ref MemoryMarshal.GetReference( - MemoryMarshal.Cast(destinationPixels)); + ref L8 l8Ref = ref MemoryMarshal.GetReference(MemoryMarshal.Cast(destinationPixels)); for (int i = 0; i < count; i++) { ref TSourcePixel sp = ref Unsafe.Add(ref sourceRef, i); diff --git a/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.cs b/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.cs index 3de6804dcf..6eed875f38 100644 --- a/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.cs +++ b/tests/ImageSharp.Tests/PixelFormats/PixelConverterTests.cs @@ -1,6 +1,7 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. +using System; using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.PixelFormats.Utils; @@ -33,30 +34,28 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats [MemberData(nameof(RgbaData))] public void ToArgb32(byte r, byte g, byte b, byte a) { - Rgba32 s = ReferenceImplementations.MakeRgba32(r, g, b, a); + byte[] source = ReferenceImplementations.MakeRgba32ByteArray(r, g, b, a); + var actual = new byte[source.Length]; - // Act: - uint actualPacked = PixelConverter.FromRgba32.ToArgb32(s.PackedValue); + PixelConverter.FromRgba32.ToArgb32(source, actual); - // Assert: - uint expectedPacked = ReferenceImplementations.MakeArgb32(r, g, b, a).PackedValue; + byte[] expected = ReferenceImplementations.MakeArgb32ByteArray(r, g, b, a); - Assert.Equal(expectedPacked, actualPacked); + Assert.Equal(expected, actual); } [Theory] [MemberData(nameof(RgbaData))] public void ToBgra32(byte r, byte g, byte b, byte a) { - Rgba32 s = ReferenceImplementations.MakeRgba32(r, g, b, a); + byte[] source = ReferenceImplementations.MakeRgba32ByteArray(r, g, b, a); + var actual = new byte[source.Length]; - // Act: - uint actualPacked = PixelConverter.FromRgba32.ToBgra32(s.PackedValue); + PixelConverter.FromRgba32.ToBgra32(source, actual); - // Assert: - uint expectedPacked = ReferenceImplementations.MakeBgra32(r, g, b, a).PackedValue; + byte[] expected = ReferenceImplementations.MakeBgra32ByteArray(r, g, b, a); - Assert.Equal(expectedPacked, actualPacked); + Assert.Equal(expected, actual); } } @@ -66,30 +65,28 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats [MemberData(nameof(RgbaData))] public void ToRgba32(byte r, byte g, byte b, byte a) { - Argb32 s = ReferenceImplementations.MakeArgb32(r, g, b, a); + byte[] source = ReferenceImplementations.MakeArgb32ByteArray(r, g, b, a); + var actual = new byte[source.Length]; - // Act: - uint actualPacked = PixelConverter.FromArgb32.ToRgba32(s.PackedValue); + PixelConverter.FromArgb32.ToRgba32(source, actual); - // Assert: - uint expectedPacked = ReferenceImplementations.MakeRgba32(r, g, b, a).PackedValue; + byte[] expected = ReferenceImplementations.MakeRgba32ByteArray(r, g, b, a); - Assert.Equal(expectedPacked, actualPacked); + Assert.Equal(expected, actual); } [Theory] [MemberData(nameof(RgbaData))] public void ToBgra32(byte r, byte g, byte b, byte a) { - Argb32 s = ReferenceImplementations.MakeArgb32(r, g, b, a); + byte[] source = ReferenceImplementations.MakeArgb32ByteArray(r, g, b, a); + var actual = new byte[source.Length]; - // Act: - uint actualPacked = PixelConverter.FromArgb32.ToBgra32(s.PackedValue); + PixelConverter.FromArgb32.ToBgra32(source, actual); - // Assert: - uint expectedPacked = ReferenceImplementations.MakeBgra32(r, g, b, a).PackedValue; + byte[] expected = ReferenceImplementations.MakeBgra32ByteArray(r, g, b, a); - Assert.Equal(expectedPacked, actualPacked); + Assert.Equal(expected, actual); } } @@ -99,30 +96,28 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats [MemberData(nameof(RgbaData))] public void ToArgb32(byte r, byte g, byte b, byte a) { - Bgra32 s = ReferenceImplementations.MakeBgra32(r, g, b, a); + byte[] source = ReferenceImplementations.MakeBgra32ByteArray(r, g, b, a); + var actual = new byte[source.Length]; - // Act: - uint actualPacked = PixelConverter.FromBgra32.ToArgb32(s.PackedValue); + PixelConverter.FromBgra32.ToArgb32(source, actual); - // Assert: - uint expectedPacked = ReferenceImplementations.MakeArgb32(r, g, b, a).PackedValue; + byte[] expected = ReferenceImplementations.MakeArgb32ByteArray(r, g, b, a); - Assert.Equal(expectedPacked, actualPacked); + Assert.Equal(expected, actual); } [Theory] [MemberData(nameof(RgbaData))] public void ToRgba32(byte r, byte g, byte b, byte a) { - Bgra32 s = ReferenceImplementations.MakeBgra32(r, g, b, a); + byte[] source = ReferenceImplementations.MakeBgra32ByteArray(r, g, b, a); + var actual = new byte[source.Length]; - // Act: - uint actualPacked = PixelConverter.FromBgra32.ToRgba32(s.PackedValue); + PixelConverter.FromBgra32.ToRgba32(source, actual); - // Assert: - uint expectedPacked = ReferenceImplementations.MakeRgba32(r, g, b, a).PackedValue; + byte[] expected = ReferenceImplementations.MakeRgba32ByteArray(r, g, b, a); - Assert.Equal(expectedPacked, actualPacked); + Assert.Equal(expected, actual); } } } diff --git a/tests/ImageSharp.Tests/Processing/Processors/Convolution/BokehBlurTest.cs b/tests/ImageSharp.Tests/Processing/Processors/Convolution/BokehBlurTest.cs index 50b8782e47..6c48cf843d 100644 --- a/tests/ImageSharp.Tests/Processing/Processors/Convolution/BokehBlurTest.cs +++ b/tests/ImageSharp.Tests/Processing/Processors/Convolution/BokehBlurTest.cs @@ -138,21 +138,10 @@ namespace SixLabors.ImageSharp.Tests.Processing.Processors.Convolution public void BokehBlurFilterProcessor(TestImageProvider provider, BokehBlurInfo value) where TPixel : unmanaged, IPixel { - static void RunTest(string providerDump, string infoDump) - { - TestImageProvider provider = - BasicSerializer.Deserialize>(providerDump); - BokehBlurInfo value = BasicSerializer.Deserialize(infoDump); - - provider.RunValidatingProcessorTest( - x => x.BokehBlur(value.Radius, value.Components, value.Gamma), - testOutputDetails: value.ToString(), - appendPixelTypeToFileName: false); - } - - RemoteExecutor - .Invoke(RunTest, BasicSerializer.Serialize(provider), BasicSerializer.Serialize(value)) - .Dispose(); + provider.RunValidatingProcessorTest( + x => x.BokehBlur(value.Radius, value.Components, value.Gamma), + testOutputDetails: value.ToString(), + appendPixelTypeToFileName: false); } [Theory] @@ -164,18 +153,9 @@ namespace SixLabors.ImageSharp.Tests.Processing.Processors.Convolution public void BokehBlurFilterProcessor_WorksWithAllPixelTypes(TestImageProvider provider) where TPixel : unmanaged, IPixel { - static void RunTest(string providerDump) - { - TestImageProvider provider = - BasicSerializer.Deserialize>(providerDump); - provider.RunValidatingProcessorTest( + provider.RunValidatingProcessorTest( x => x.BokehBlur(8, 2, 3), appendSourceFileOrDescription: false); - } - - RemoteExecutor - .Invoke(RunTest, BasicSerializer.Serialize(provider)) - .Dispose(); } [Theory] @@ -183,26 +163,15 @@ namespace SixLabors.ImageSharp.Tests.Processing.Processors.Convolution public void BokehBlurFilterProcessor_Bounded(TestImageProvider provider, BokehBlurInfo value) where TPixel : unmanaged, IPixel { - static void RunTest(string providerDump, string infoDump) - { - TestImageProvider provider = - BasicSerializer.Deserialize>(providerDump); - BokehBlurInfo value = BasicSerializer.Deserialize(infoDump); - - provider.RunValidatingProcessorTest( - x => - { - Size size = x.GetCurrentSize(); - var bounds = new Rectangle(10, 10, size.Width / 2, size.Height / 2); - x.BokehBlur(value.Radius, value.Components, value.Gamma, bounds); - }, - testOutputDetails: value.ToString(), - appendPixelTypeToFileName: false); - } - - RemoteExecutor - .Invoke(RunTest, BasicSerializer.Serialize(provider), BasicSerializer.Serialize(value)) - .Dispose(); + provider.RunValidatingProcessorTest( + x => + { + Size size = x.GetCurrentSize(); + var bounds = new Rectangle(10, 10, size.Width / 2, size.Height / 2); + x.BokehBlur(value.Radius, value.Components, value.Gamma, bounds); + }, + testOutputDetails: value.ToString(), + appendPixelTypeToFileName: false); } [Theory] diff --git a/tests/ImageSharp.Tests/TestImages.cs b/tests/ImageSharp.Tests/TestImages.cs index fd5296c375..dce36bb0fb 100644 --- a/tests/ImageSharp.Tests/TestImages.cs +++ b/tests/ImageSharp.Tests/TestImages.cs @@ -107,6 +107,9 @@ namespace SixLabors.ImageSharp.Tests public const string Issue1177_1 = "Png/issues/Issue_1177_1.png"; public const string Issue1177_2 = "Png/issues/Issue_1177_2.png"; + // Issue 935: https://github.com/SixLabors/ImageSharp/issues/935 + public const string Issue935 = "Png/issues/Issue_935.png"; + public static class Bad { public const string MissingDataChunk = "Png/xdtn0g01.png"; diff --git a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs new file mode 100644 index 0000000000..4720ea78ac --- /dev/null +++ b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs @@ -0,0 +1,319 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using Microsoft.DotNet.RemoteExecutor; +using Xunit.Abstractions; + +namespace SixLabors.ImageSharp.Tests.TestUtilities +{ + /// + /// Allows the testing against specific feature sets. + /// + public static class FeatureTestRunner + { + private static readonly char[] SplitChars = new[] { ',', ' ' }; + + /// + /// Allows the deserialization of parameters passed to the feature test. + /// + /// + /// This is required because does not allow + /// marshalling of fields so we cannot pass a wrapped + /// allowing automatic deserialization. + /// + /// + /// + /// The type to deserialize to. + /// The string value to deserialize. + /// The value. + public static T DeserializeForXunit(string value) + where T : IXunitSerializable + => BasicSerializer.Deserialize(value); + + /// + /// Allows the deserialization of types implementing + /// passed to the feature test. + /// + /// The string value to deserialize. + /// The value. + public static T Deserialize(string value) + where T : IConvertible + => (T)Convert.ChangeType(value, typeof(T)); + + /// + /// Runs the given test within an environment + /// where the given features. + /// + /// The test action to run. + /// The intrinsics features. + public static void RunWithHwIntrinsicsFeature( + Action action, + HwIntrinsics intrinsics) + { + if (!RemoteExecutor.IsSupported) + { + return; + } + + foreach (KeyValuePair intrinsic in intrinsics.ToFeatureKeyValueCollection()) + { + var processStartInfo = new ProcessStartInfo(); + if (intrinsic.Key != HwIntrinsics.AllowAll) + { + processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; + + RemoteExecutor.Invoke( + action, + new RemoteInvokeOptions + { + StartInfo = processStartInfo + }) + .Dispose(); + } + else + { + // Since we are running using the default architecture there is no + // point creating the overhead of running the action in a separate process. + action(); + } + } + } + + /// + /// Runs the given test within an environment + /// where the given features. + /// + /// + /// The test action to run. + /// The parameter passed will be a string representing the currently testing . + /// The intrinsics features. + public static void RunWithHwIntrinsicsFeature( + Action action, + HwIntrinsics intrinsics) + { + if (!RemoteExecutor.IsSupported) + { + return; + } + + foreach (KeyValuePair intrinsic in intrinsics.ToFeatureKeyValueCollection()) + { + var processStartInfo = new ProcessStartInfo(); + if (intrinsic.Key != HwIntrinsics.AllowAll) + { + processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; + + RemoteExecutor.Invoke( + action, + intrinsic.Key.ToString(), + new RemoteInvokeOptions + { + StartInfo = processStartInfo + }) + .Dispose(); + } + else + { + // Since we are running using the default architecture there is no + // point creating the overhead of running the action in a separate process. + action(intrinsic.Key.ToString()); + } + } + } + + /// + /// Runs the given test within an environment + /// where the given features. + /// + /// The test action to run. + /// The intrinsics features. + /// The value to pass as a parameter to the test action. + public static void RunWithHwIntrinsicsFeature( + Action action, + HwIntrinsics intrinsics, + T serializable) + where T : IXunitSerializable + { + if (!RemoteExecutor.IsSupported) + { + return; + } + + foreach (KeyValuePair intrinsic in intrinsics.ToFeatureKeyValueCollection()) + { + var processStartInfo = new ProcessStartInfo(); + if (intrinsic.Key != HwIntrinsics.AllowAll) + { + processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; + + RemoteExecutor.Invoke( + action, + BasicSerializer.Serialize(serializable), + new RemoteInvokeOptions + { + StartInfo = processStartInfo + }) + .Dispose(); + } + else + { + // Since we are running using the default architecture there is no + // point creating the overhead of running the action in a separate process. + action(BasicSerializer.Serialize(serializable)); + } + } + } + + /// + /// Runs the given test within an environment + /// where the given features. + /// + /// The test action to run. + /// The intrinsics features. + /// The value to pass as a parameter to the test action. + public static void RunWithHwIntrinsicsFeature( + Action action, + HwIntrinsics intrinsics, + T serializable) + where T : IXunitSerializable + { + if (!RemoteExecutor.IsSupported) + { + return; + } + + foreach (KeyValuePair intrinsic in intrinsics.ToFeatureKeyValueCollection()) + { + var processStartInfo = new ProcessStartInfo(); + if (intrinsic.Key != HwIntrinsics.AllowAll) + { + processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; + + RemoteExecutor.Invoke( + action, + BasicSerializer.Serialize(serializable), + intrinsic.Key.ToString(), + new RemoteInvokeOptions + { + StartInfo = processStartInfo + }) + .Dispose(); + } + else + { + // Since we are running using the default architecture there is no + // point creating the overhead of running the action in a separate process. + action(BasicSerializer.Serialize(serializable), intrinsic.Key.ToString()); + } + } + } + + /// + /// Runs the given test within an environment + /// where the given features. + /// + /// The test action to run. + /// The value to pass as a parameter to the test action. + /// The intrinsics features. + public static void RunWithHwIntrinsicsFeature( + Action action, + T serializable, + HwIntrinsics intrinsics) + where T : IConvertible + { + if (!RemoteExecutor.IsSupported) + { + return; + } + + foreach (KeyValuePair intrinsic in intrinsics.ToFeatureKeyValueCollection()) + { + var processStartInfo = new ProcessStartInfo(); + if (intrinsic.Key != HwIntrinsics.AllowAll) + { + processStartInfo.Environment[$"COMPlus_{intrinsic.Value}"] = "0"; + + RemoteExecutor.Invoke( + action, + serializable.ToString(), + new RemoteInvokeOptions + { + StartInfo = processStartInfo + }) + .Dispose(); + } + else + { + // Since we are running using the default architecture there is no + // point creating the overhead of running the action in a separate process. + action(serializable.ToString()); + } + } + } + + internal static Dictionary ToFeatureKeyValueCollection(this HwIntrinsics intrinsics) + { + // Loop through and translate the given values into COMPlus equivaluents + var features = new Dictionary(); + foreach (string intrinsic in intrinsics.ToString("G").Split(SplitChars, StringSplitOptions.RemoveEmptyEntries)) + { + var key = (HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic); + switch (intrinsic) + { + case nameof(HwIntrinsics.DisableSIMD): + features.Add(key, "FeatureSIMD"); + break; + + case nameof(HwIntrinsics.AllowAll): + + // Not a COMPlus value. We filter in calling method. + features.Add(key, nameof(HwIntrinsics.AllowAll)); + break; + + default: + features.Add(key, intrinsic.Replace("Disable", "Enable")); + break; + } + } + + return features; + } + } + + /// + /// See + /// + /// ends up impacting all SIMD support(including System.Numerics) + /// but not things like , , and . + /// + /// + [Flags] +#pragma warning disable RCS1135 // Declare enum member with zero value (when enum has FlagsAttribute). + public enum HwIntrinsics +#pragma warning restore RCS1135 // Declare enum member with zero value (when enum has FlagsAttribute). + { + // Use flags so we can pass multiple values without using params. + // Don't base on 0 or use inverse for All as that doesn't translate to string values. + DisableSIMD = 1 << 0, + DisableHWIntrinsic = 1 << 1, + DisableSSE = 1 << 2, + DisableSSE2 = 1 << 3, + DisableAES = 1 << 4, + DisablePCLMULQDQ = 1 << 5, + DisableSSE3 = 1 << 6, + DisableSSSE3 = 1 << 7, + DisableSSE41 = 1 << 8, + DisableSSE42 = 1 << 9, + DisablePOPCNT = 1 << 10, + DisableAVX = 1 << 11, + DisableFMA = 1 << 12, + DisableAVX2 = 1 << 13, + DisableBMI1 = 1 << 14, + DisableBMI2 = 1 << 15, + DisableLZCNT = 1 << 16, + AllowAll = 1 << 17 + } +} diff --git a/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs b/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs index de8278a33e..d20e7d2a7e 100644 --- a/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs +++ b/tests/ImageSharp.Tests/TestUtilities/ReferenceCodecs/MagickReferenceDecoder.cs @@ -7,6 +7,7 @@ using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; using ImageMagick; +using ImageMagick.Formats.Bmp; using SixLabors.ImageSharp.Formats; using SixLabors.ImageSharp.Memory; using SixLabors.ImageSharp.PixelFormats; @@ -15,10 +16,22 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs { public class MagickReferenceDecoder : IImageDecoder { + private readonly bool validate; + + public MagickReferenceDecoder() + : this(true) + { + } + + public MagickReferenceDecoder(bool validate) + { + this.validate = validate; + } + public static MagickReferenceDecoder Instance { get; } = new MagickReferenceDecoder(); private static void FromRgba32Bytes(Configuration configuration, Span rgbaBytes, IMemoryGroup destinationGroup) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { foreach (Memory m in destinationGroup) { @@ -33,7 +46,7 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs } private static void FromRgba64Bytes(Configuration configuration, Span rgbaBytes, IMemoryGroup destinationGroup) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { foreach (Memory m in destinationGroup) { @@ -48,17 +61,25 @@ namespace SixLabors.ImageSharp.Tests.TestUtilities.ReferenceCodecs } public Task> DecodeAsync(Configuration configuration, Stream stream, CancellationToken cancellationToken) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel => Task.FromResult(this.Decode(configuration, stream)); public Image Decode(Configuration configuration, Stream stream) - where TPixel : unmanaged, IPixel + where TPixel : unmanaged, ImageSharp.PixelFormats.IPixel { - using var magickImage = new MagickImage(stream); + var bmpReadDefines = new BmpReadDefines + { + IgnoreFileSize = !this.validate + }; + + var settings = new MagickReadSettings(); + settings.SetDefines(bmpReadDefines); + + using var magickImage = new MagickImage(stream, settings); var result = new Image(configuration, magickImage.Width, magickImage.Height); MemoryGroup resultPixels = result.GetRootFramePixelBuffer().FastMemoryGroup; - using (IPixelCollection pixels = magickImage.GetPixelsUnsafe()) + using (IUnsafePixelCollection pixels = magickImage.GetPixelsUnsafe()) { if (magickImage.Depth == 8) { diff --git a/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.Features.cs b/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.Features.cs deleted file mode 100644 index 3568c1e5dc..0000000000 --- a/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.Features.cs +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright (c) Six Labors. -// Licensed under the Apache License, Version 2.0. - -namespace SixLabors.ImageSharp.Tests -{ - public static partial class TestEnvironment - { - internal static class Features - { - public const string On = "1"; - public const string Off = "0"; - - // See https://github.com/SixLabors/ImageSharp/pull/1229#discussion_r440477861 - // * EnableHWIntrinsic - // * EnableSSE - // * EnableSSE2 - // * EnableAES - // * EnablePCLMULQDQ - // * EnableSSE3 - // * EnableSSSE3 - // * EnableSSE41 - // * EnableSSE42 - // * EnablePOPCNT - // * EnableAVX - // * EnableFMA - // * EnableAVX2 - // * EnableBMI1 - // * EnableBMI2 - // * EnableLZCNT - // - // `FeatureSIMD` ends up impacting all SIMD support(including `System.Numerics`) but not things - // like `LZCNT`, `BMI1`, or `BMI2` - // `EnableSSE3_4` is a legacy switch that exists for compat and is basically the same as `EnableSSE3` - public const string EnableAES = "COMPlus_EnableAES"; - public const string EnableAVX = "COMPlus_EnableAVX"; - public const string EnableAVX2 = "COMPlus_EnableAVX2"; - public const string EnableBMI1 = "COMPlus_EnableBMI1"; - public const string EnableBMI2 = "COMPlus_EnableBMI2"; - public const string EnableFMA = "COMPlus_EnableFMA"; - public const string EnableHWIntrinsic = "COMPlus_EnableHWIntrinsic"; - public const string EnableLZCNT = "COMPlus_EnableLZCNT"; - public const string EnablePCLMULQDQ = "COMPlus_EnablePCLMULQDQ"; - public const string EnablePOPCNT = "COMPlus_EnablePOPCNT"; - public const string EnableSSE = "COMPlus_EnableSSE"; - public const string EnableSSE2 = "COMPlus_EnableSSE2"; - public const string EnableSSE3 = "COMPlus_EnableSSE3"; - public const string EnableSSE3_4 = "COMPlus_EnableSSE3_4"; - public const string EnableSSE41 = "COMPlus_EnableSSE41"; - public const string EnableSSE42 = "COMPlus_EnableSSE42"; - public const string EnableSSSE3 = "COMPlus_EnableSSSE3"; - public const string FeatureSIMD = "COMPlus_FeatureSIMD"; - } - } -} diff --git a/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.cs b/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.cs index 1375b5763e..48728faf0e 100644 --- a/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.cs +++ b/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.cs @@ -170,7 +170,10 @@ namespace SixLabors.ImageSharp.Tests } string testProjectConfigPath = TestAssemblyFile.FullName + ".config"; - File.Copy(testProjectConfigPath, remoteExecutorConfigPath); + if (File.Exists(testProjectConfigPath)) + { + File.Copy(testProjectConfigPath, remoteExecutorConfigPath); + } if (Is64BitProcess) { diff --git a/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs new file mode 100644 index 0000000000..4cbbefe686 --- /dev/null +++ b/tests/ImageSharp.Tests/TestUtilities/Tests/FeatureTestRunnerTests.cs @@ -0,0 +1,296 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Numerics; +#if SUPPORTS_RUNTIME_INTRINSICS +using System.Runtime.Intrinsics.X86; +#endif +using Xunit; +using Xunit.Abstractions; + +namespace SixLabors.ImageSharp.Tests.TestUtilities.Tests +{ + public class FeatureTestRunnerTests + { + public static TheoryData Intrinsics => + new TheoryData + { + { HwIntrinsics.DisableAES | HwIntrinsics.AllowAll, new string[] { "EnableAES", "AllowAll" } }, + { HwIntrinsics.DisableSIMD | HwIntrinsics.DisableHWIntrinsic, new string[] { "FeatureSIMD", "EnableHWIntrinsic" } }, + { HwIntrinsics.DisableSSE42 | HwIntrinsics.DisableAVX, new string[] { "EnableSSE42", "EnableAVX" } } + }; + + [Theory] + [MemberData(nameof(Intrinsics))] + public void ToFeatureCollectionReturnsExpectedResult(HwIntrinsics expectedItrinsics, string[] expectedValues) + { + Dictionary features = expectedItrinsics.ToFeatureKeyValueCollection(); + HwIntrinsics[] keys = features.Keys.ToArray(); + + HwIntrinsics actualIntrinsics = keys[0]; + for (int i = 1; i < keys.Length; i++) + { + actualIntrinsics |= keys[i]; + } + + Assert.Equal(expectedItrinsics, actualIntrinsics); + + IEnumerable actualValues = features.Select(x => x.Value); + Assert.Equal(expectedValues, actualValues); + } + + [Fact] + public void AllowsAllHwIntrinsicFeatures() + { + if (!Vector.IsHardwareAccelerated) + { + return; + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + () => Assert.True(Vector.IsHardwareAccelerated), + HwIntrinsics.AllowAll); + } + + [Fact] + public void CanLimitHwIntrinsicSIMDFeatures() + { + FeatureTestRunner.RunWithHwIntrinsicsFeature( + () => Assert.False(Vector.IsHardwareAccelerated), + HwIntrinsics.DisableSIMD); + } + +#if SUPPORTS_RUNTIME_INTRINSICS + [Fact] + public void CanLimitHwIntrinsicBaseFeatures() + { + static void AssertDisabled() + { + Assert.False(Sse.IsSupported); + Assert.False(Sse2.IsSupported); + Assert.False(Aes.IsSupported); + Assert.False(Pclmulqdq.IsSupported); + Assert.False(Sse3.IsSupported); + Assert.False(Ssse3.IsSupported); + Assert.False(Sse41.IsSupported); + Assert.False(Sse42.IsSupported); + Assert.False(Popcnt.IsSupported); + Assert.False(Avx.IsSupported); + Assert.False(Fma.IsSupported); + Assert.False(Avx2.IsSupported); + Assert.False(Bmi1.IsSupported); + Assert.False(Bmi2.IsSupported); + Assert.False(Lzcnt.IsSupported); + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + AssertDisabled, + HwIntrinsics.DisableHWIntrinsic); + } +#endif + + [Fact] + public void CanLimitHwIntrinsicFeaturesWithIntrinsicsParam() + { + static void AssertHwIntrinsicsFeatureDisabled(string intrinsic) + { + Assert.NotNull(intrinsic); + + switch ((HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic)) + { + case HwIntrinsics.DisableSIMD: + Assert.False(Vector.IsHardwareAccelerated); + break; +#if SUPPORTS_RUNTIME_INTRINSICS + case HwIntrinsics.DisableHWIntrinsic: + Assert.False(Sse.IsSupported); + Assert.False(Sse2.IsSupported); + Assert.False(Aes.IsSupported); + Assert.False(Pclmulqdq.IsSupported); + Assert.False(Sse3.IsSupported); + Assert.False(Ssse3.IsSupported); + Assert.False(Sse41.IsSupported); + Assert.False(Sse42.IsSupported); + Assert.False(Popcnt.IsSupported); + Assert.False(Avx.IsSupported); + Assert.False(Fma.IsSupported); + Assert.False(Avx2.IsSupported); + Assert.False(Bmi1.IsSupported); + Assert.False(Bmi2.IsSupported); + Assert.False(Lzcnt.IsSupported); + break; + case HwIntrinsics.DisableSSE: + Assert.False(Sse.IsSupported); + break; + case HwIntrinsics.DisableSSE2: + Assert.False(Sse2.IsSupported); + break; + case HwIntrinsics.DisableAES: + Assert.False(Aes.IsSupported); + break; + case HwIntrinsics.DisablePCLMULQDQ: + Assert.False(Pclmulqdq.IsSupported); + break; + case HwIntrinsics.DisableSSE3: + Assert.False(Sse3.IsSupported); + break; + case HwIntrinsics.DisableSSSE3: + Assert.False(Ssse3.IsSupported); + break; + case HwIntrinsics.DisableSSE41: + Assert.False(Sse41.IsSupported); + break; + case HwIntrinsics.DisableSSE42: + Assert.False(Sse42.IsSupported); + break; + case HwIntrinsics.DisablePOPCNT: + Assert.False(Popcnt.IsSupported); + break; + case HwIntrinsics.DisableAVX: + Assert.False(Avx.IsSupported); + break; + case HwIntrinsics.DisableFMA: + Assert.False(Fma.IsSupported); + break; + case HwIntrinsics.DisableAVX2: + Assert.False(Avx2.IsSupported); + break; + case HwIntrinsics.DisableBMI1: + Assert.False(Bmi1.IsSupported); + break; + case HwIntrinsics.DisableBMI2: + Assert.False(Bmi2.IsSupported); + break; + case HwIntrinsics.DisableLZCNT: + Assert.False(Lzcnt.IsSupported); + break; +#endif + } + } + + foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) + { + FeatureTestRunner.RunWithHwIntrinsicsFeature(AssertHwIntrinsicsFeatureDisabled, intrinsic); + } + } + + [Fact] + public void CanLimitHwIntrinsicFeaturesWithSerializableParam() + { + static void AssertHwIntrinsicsFeatureDisabled(string serializable) + { + Assert.NotNull(serializable); + Assert.NotNull(FeatureTestRunner.DeserializeForXunit(serializable)); + +#if SUPPORTS_RUNTIME_INTRINSICS + Assert.False(Sse.IsSupported); +#endif + } + + FeatureTestRunner.RunWithHwIntrinsicsFeature( + AssertHwIntrinsicsFeatureDisabled, + HwIntrinsics.DisableSSE, + new FakeSerializable()); + } + + [Fact] + public void CanLimitHwIntrinsicFeaturesWithSerializableAndIntrinsicsParams() + { + static void AssertHwIntrinsicsFeatureDisabled(string serializable, string intrinsic) + { + Assert.NotNull(serializable); + Assert.NotNull(FeatureTestRunner.DeserializeForXunit(serializable)); + + switch ((HwIntrinsics)Enum.Parse(typeof(HwIntrinsics), intrinsic)) + { + case HwIntrinsics.DisableSIMD: + Assert.False(Vector.IsHardwareAccelerated, nameof(Vector.IsHardwareAccelerated)); + break; +#if SUPPORTS_RUNTIME_INTRINSICS + case HwIntrinsics.DisableHWIntrinsic: + Assert.False(Sse.IsSupported); + Assert.False(Sse2.IsSupported); + Assert.False(Aes.IsSupported); + Assert.False(Pclmulqdq.IsSupported); + Assert.False(Sse3.IsSupported); + Assert.False(Ssse3.IsSupported); + Assert.False(Sse41.IsSupported); + Assert.False(Sse42.IsSupported); + Assert.False(Popcnt.IsSupported); + Assert.False(Avx.IsSupported); + Assert.False(Fma.IsSupported); + Assert.False(Avx2.IsSupported); + Assert.False(Bmi1.IsSupported); + Assert.False(Bmi2.IsSupported); + Assert.False(Lzcnt.IsSupported); + break; + case HwIntrinsics.DisableSSE: + Assert.False(Sse.IsSupported); + break; + case HwIntrinsics.DisableSSE2: + Assert.False(Sse2.IsSupported); + break; + case HwIntrinsics.DisableAES: + Assert.False(Aes.IsSupported); + break; + case HwIntrinsics.DisablePCLMULQDQ: + Assert.False(Pclmulqdq.IsSupported); + break; + case HwIntrinsics.DisableSSE3: + Assert.False(Sse3.IsSupported); + break; + case HwIntrinsics.DisableSSSE3: + Assert.False(Ssse3.IsSupported); + break; + case HwIntrinsics.DisableSSE41: + Assert.False(Sse41.IsSupported); + break; + case HwIntrinsics.DisableSSE42: + Assert.False(Sse42.IsSupported); + break; + case HwIntrinsics.DisablePOPCNT: + Assert.False(Popcnt.IsSupported); + break; + case HwIntrinsics.DisableAVX: + Assert.False(Avx.IsSupported); + break; + case HwIntrinsics.DisableFMA: + Assert.False(Fma.IsSupported); + break; + case HwIntrinsics.DisableAVX2: + Assert.False(Avx2.IsSupported); + break; + case HwIntrinsics.DisableBMI1: + Assert.False(Bmi1.IsSupported); + break; + case HwIntrinsics.DisableBMI2: + Assert.False(Bmi2.IsSupported); + break; + case HwIntrinsics.DisableLZCNT: + Assert.False(Lzcnt.IsSupported); + break; +#endif + } + } + + foreach (HwIntrinsics intrinsic in (HwIntrinsics[])Enum.GetValues(typeof(HwIntrinsics))) + { + FeatureTestRunner.RunWithHwIntrinsicsFeature(AssertHwIntrinsicsFeatureDisabled, intrinsic, new FakeSerializable()); + } + } + + public class FakeSerializable : IXunitSerializable + { + public void Deserialize(IXunitSerializationInfo info) + { + } + + public void Serialize(IXunitSerializationInfo info) + { + } + } + } +} diff --git a/tests/Images/Input/Png/issues/Issue_935.png b/tests/Images/Input/Png/issues/Issue_935.png new file mode 100644 index 0000000000..9f9e84dc3c --- /dev/null +++ b/tests/Images/Input/Png/issues/Issue_935.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a9c5cdacc9bedf481c883828de5bfb7902e2bec038fff08830171cf7075e4f9 +size 870 diff --git a/tests/coverlet.runsettings b/tests/coverlet.runsettings index ee408a5f04..cffce3540b 100644 --- a/tests/coverlet.runsettings +++ b/tests/coverlet.runsettings @@ -1,5 +1,9 @@ + + + category!=failing +